diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Answer_Given_options/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Answer_Given_options/results.json deleted file mode 100644 index ee687fff025db235d1a0c5e3e7e4d5ea37b70fa5..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Answer_Given_options/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "ar", - "template_name": "Answer Given options", - "evaluation": { - "accuracy": 0.6896095301125083 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Choose_Story_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Choose_Story_Ending/results.json deleted file mode 100644 index 09913ae524e676cae982ea0bf63fafafde804167..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Choose_Story_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "ar", - "template_name": "Choose Story Ending", - "evaluation": { - "accuracy": 0.8378557246856386 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Generate_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Generate_Ending/results.json deleted file mode 100644 index 357752fcef715dd0e9bb3e53f19541a6e4677762..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Generate_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "ar", - "template_name": "Generate Ending", - "evaluation": { - "accuracy": 0.5956320317670417 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending/results.json deleted file mode 100644 index 8efe6a29767e7402a35636542d83d0ca0dcc99aa..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "ar", - "template_name": "Novel Correct Ending", - "evaluation": { - "accuracy": 0.8213103904698875 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options/results.json deleted file mode 100644 index ab27c96b2527d678ce13c15cfdbde2a31adebef5..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "ar", - "template_name": "Story Continuation and Options", - "evaluation": { - "accuracy": 0.8219722038385175 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Answer_Given_options/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Answer_Given_options/results.json deleted file mode 100644 index e752bca41c96f3e30dfe70484266525ef56f78d6..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Answer_Given_options/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "es", - "template_name": "Answer Given options", - "evaluation": { - "accuracy": 0.7683653209794837 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Choose_Story_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Choose_Story_Ending/results.json deleted file mode 100644 index fca6014811f0daaa82752ddaaa993561ce108c44..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Choose_Story_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "es", - "template_name": "Choose Story Ending", - "evaluation": { - "accuracy": 0.886168100595632 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Generate_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Generate_Ending/results.json deleted file mode 100644 index f3777d75e7650424308f106fd6d3a5df7f787d90..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Generate_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "es", - "template_name": "Generate Ending", - "evaluation": { - "accuracy": 0.6724023825281271 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Novel_Correct_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Novel_Correct_Ending/results.json deleted file mode 100644 index 53b09d0e87a347427a568a9a833c4fd900355f1b..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Novel_Correct_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "es", - "template_name": "Novel Correct Ending", - "evaluation": { - "accuracy": 0.8676373262739907 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options/results.json deleted file mode 100644 index f9518581ecda9f5cf8b5b17defb0740e70dd9640..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "es", - "template_name": "Story Continuation and Options", - "evaluation": { - "accuracy": 0.8769027134348114 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Answer_Given_options/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Answer_Given_options/results.json deleted file mode 100644 index a6bf025a2d3ea4baeb360ae84d4b8b69e15a024c..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Answer_Given_options/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "eu", - "template_name": "Answer Given options", - "evaluation": { - "accuracy": 0.6082064857710126 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Choose_Story_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Choose_Story_Ending/results.json deleted file mode 100644 index 0632573e4226ca0469eb8baa954d9a633b95c867..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Choose_Story_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "eu", - "template_name": "Choose Story Ending", - "evaluation": { - "accuracy": 0.7266710787557908 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Generate_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Generate_Ending/results.json deleted file mode 100644 index 2c7e2be9cbc4fc0796d38b369755eb1270b944d8..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Generate_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "eu", - "template_name": "Generate Ending", - "evaluation": { - "accuracy": 0.5552614162806089 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending/results.json deleted file mode 100644 index 386f9bca23da50f8812d5a65c99a454e3bfd3a84..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "eu", - "template_name": "Novel Correct Ending", - "evaluation": { - "accuracy": 0.700198544010589 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options/results.json deleted file mode 100644 index a706ebc93e3866716af502ede850131876542e30..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "eu", - "template_name": "Story Continuation and Options", - "evaluation": { - "accuracy": 0.7107875579086698 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Answer_Given_options/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Answer_Given_options/results.json deleted file mode 100644 index 36c5bb8484b6d505c4afb870897acae7619e6958..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Answer_Given_options/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "hi", - "template_name": "Answer Given options", - "evaluation": { - "accuracy": 0.6366644606221046 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json deleted file mode 100644 index 5ed2515ae17faae85ed0bc6a96c0d985ed4bddb5..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "hi", - "template_name": "Choose Story Ending", - "evaluation": { - "accuracy": 0.7882197220383852 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json deleted file mode 100644 index d807e21b29de04446f4cfe611ab4a9f1c1aac6f3..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "hi", - "template_name": "Generate Ending", - "evaluation": { - "accuracy": 0.5982792852415619 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json deleted file mode 100644 index b43a6d1cd13493fd76b1f429cf6b268d8fb3eae9..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "hi", - "template_name": "Novel Correct Ending", - "evaluation": { - "accuracy": 0.7485109199205824 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options/results.json deleted file mode 100644 index 5d3b8d42e9a6c3e33a1933b8157ddcb0e1e3d5b3..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "hi", - "template_name": "Story Continuation and Options", - "evaluation": { - "accuracy": 0.7683653209794837 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Answer_Given_options/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Answer_Given_options/results.json deleted file mode 100644 index 42973d245d09708e0cb10381312f81c1a009fd37..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Answer_Given_options/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "id", - "template_name": "Answer Given options", - "evaluation": { - "accuracy": 0.7385837193911317 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Choose_Story_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Choose_Story_Ending/results.json deleted file mode 100644 index 2083ffe7418714a31fc494282fb2a5d5d913dd16..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Choose_Story_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "id", - "template_name": "Choose Story Ending", - "evaluation": { - "accuracy": 0.8332230311052283 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Generate_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Generate_Ending/results.json deleted file mode 100644 index 0553cf8d1be7424ae76ed745bacef1655f0af56c..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Generate_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "id", - "template_name": "Generate Ending", - "evaluation": { - "accuracy": 0.6293845135671741 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Novel_Correct_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Novel_Correct_Ending/results.json deleted file mode 100644 index 6884f7ddd6987951ac927728b6032991cbdd40d1..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Novel_Correct_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "id", - "template_name": "Novel Correct Ending", - "evaluation": { - "accuracy": 0.7816015883520847 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options/results.json deleted file mode 100644 index 623362eb25af762a34a6adced1ec42cb97ec4b41..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "id", - "template_name": "Story Continuation and Options", - "evaluation": { - "accuracy": 0.8226340172071476 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json deleted file mode 100644 index 9197b8cb23453249367d3b46eff4c0c6ec7f5291..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "zh", - "template_name": "Answer Given options", - "evaluation": { - "accuracy": 0.7498345466578424 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json deleted file mode 100644 index 672e6984b4f37403144ee2bfabd0f09d2c190db3..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "zh", - "template_name": "Choose Story Ending", - "evaluation": { - "accuracy": 0.8583719391131701 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json deleted file mode 100644 index 21438162a277b6d07ae6068a23aca83bc6388c96..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "zh", - "template_name": "Generate Ending", - "evaluation": { - "accuracy": 0.6227663798808736 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json deleted file mode 100644 index f7d3e0fa656be6fcab419b168f75281f7934d963..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "zh", - "template_name": "Novel Correct Ending", - "evaluation": { - "accuracy": 0.8405029781601588 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json deleted file mode 100644 index 8e57f15ca55b9773a5f03692f5e8a03d17676d04..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "zh", - "template_name": "Story Continuation and Options", - "evaluation": { - "accuracy": 0.8385175380542687 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json deleted file mode 100644 index 7cca016bfa305e04743256d35dde6e1dda3b1b4f..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "en", - "template_name": "Replace", - "evaluation": { - "accuracy": 0.6576344086021505 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/en/True_or_False/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/en/True_or_False/results.json deleted file mode 100644 index c438a5bc83423e5519bd90ba7ecff0dc88d0cc5e..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/en/True_or_False/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "en", - "template_name": "True or False", - "evaluation": { - "accuracy": 0.5187096774193548 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/en/does_underscore_refer_to/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/en/does_underscore_refer_to/results.json deleted file mode 100644 index 0406ac2b9a66283e0e273eb8136dfd7014e870df..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/en/does_underscore_refer_to/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "en", - "template_name": "does underscore refer to", - "evaluation": { - "accuracy": 0.5931182795698925 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/en/stand_for/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/en/stand_for/results.json deleted file mode 100644 index 6d55f5f2091fe97a7f784f08252944c43ec04ec5..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/en/stand_for/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "en", - "template_name": "stand for", - "evaluation": { - "accuracy": 0.5070967741935484 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json deleted file mode 100644 index cbbd7c31c32708d98dbbeb064a418844ed5a841f..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "en", - "template_name": "underscore refer to", - "evaluation": { - "accuracy": 0.6210752688172043 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/Replace/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/Replace/results.json deleted file mode 100644 index 8a5b58afaeaf436677903e5cebd1e0d95398c2e1..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/Replace/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "fr", - "template_name": "Replace", - "evaluation": { - "accuracy": 0.5180722891566265 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/True_or_False/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/True_or_False/results.json deleted file mode 100644 index 5fcca5729178fa86a2db2771c52b208d5300ce29..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/True_or_False/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "fr", - "template_name": "True or False", - "evaluation": { - "accuracy": 0.5301204819277109 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/does_underscore_refer_to/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/does_underscore_refer_to/results.json deleted file mode 100644 index e633ec7e691facaa5d58953c2c1dda2ff8d7244d..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/does_underscore_refer_to/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "fr", - "template_name": "does underscore refer to", - "evaluation": { - "accuracy": 0.5542168674698795 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/stand_for/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/stand_for/results.json deleted file mode 100644 index 84c78ab68f45d54a9bc57fa5bd2db69a8a3eee1c..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/stand_for/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "fr", - "template_name": "stand for", - "evaluation": { - "accuracy": 0.5180722891566265 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/underscore_refer_to/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/underscore_refer_to/results.json deleted file mode 100644 index 1c8671c7e91fcfa90f5c712c4cd45bd21bce75b6..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/fr/underscore_refer_to/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "fr", - "template_name": "underscore refer to", - "evaluation": { - "accuracy": 0.5421686746987951 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/Replace/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/Replace/results.json deleted file mode 100644 index 40eb613c0db3e719a7c14f420c93feaca8db71b9..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/Replace/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "pt", - "template_name": "Replace", - "evaluation": { - "accuracy": 0.5741444866920152 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json deleted file mode 100644 index 56359460ad660a81be9fb14f1b24a16bc423c553..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "pt", - "template_name": "True or False", - "evaluation": { - "accuracy": 0.4790874524714829 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/does_underscore_refer_to/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/does_underscore_refer_to/results.json deleted file mode 100644 index 72c0b538422b56ee2f446ae55334d7f16aa1d5a3..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/does_underscore_refer_to/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "pt", - "template_name": "does underscore refer to", - "evaluation": { - "accuracy": 0.55893536121673 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/stand_for/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/stand_for/results.json deleted file mode 100644 index 3480298da420f74bcc47be59736b849b7b1d16fd..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/stand_for/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "pt", - "template_name": "stand for", - "evaluation": { - "accuracy": 0.5209125475285171 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/underscore_refer_to/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/underscore_refer_to/results.json deleted file mode 100644 index e80e9293322a6d429f21f5f14f68c9303eafe211..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/pt/underscore_refer_to/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "pt", - "template_name": "underscore refer to", - "evaluation": { - "accuracy": 0.5437262357414449 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json deleted file mode 100644 index 3ca8099b5daa44ef38338971f64730faf00e56bf..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "zh", - "template_name": "Replace", - "evaluation": { - "accuracy": 0.626984126984127 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/True_or_False/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/True_or_False/results.json deleted file mode 100644 index 4d7ecc65170e29e452aed31e5668815572903efd..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/True_or_False/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "zh", - "template_name": "True or False", - "evaluation": { - "accuracy": 0.503968253968254 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/does_underscore_refer_to/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/does_underscore_refer_to/results.json deleted file mode 100644 index fbdd04009638a4026303cb221180e18c7179dc8b..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/does_underscore_refer_to/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "zh", - "template_name": "does underscore refer to", - "evaluation": { - "accuracy": 0.5436507936507936 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json deleted file mode 100644 index 6a6da375fe704c9bb9eeac0c8304879e1fc26865..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "zh", - "template_name": "stand for", - "evaluation": { - "accuracy": 0.49007936507936506 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json b/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json deleted file mode 100644 index 3e946b644795bdac06366e70df599e5c70ef095d..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "zh", - "template_name": "underscore refer to", - "evaluation": { - "accuracy": 0.5535714285714286 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r1/GPT-3_style/results.json b/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r1/GPT-3_style/results.json deleted file mode 100644 index 7d797684c48813fb93ee7fcee75012e9220216bd..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r1/GPT-3_style/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "anli", - "dataset_config_name": "dev_r1", - "template_name": "GPT-3 style", - "evaluation": { - "accuracy": 0.426 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r1/MNLI_crowdsource/results.json b/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r1/MNLI_crowdsource/results.json deleted file mode 100644 index 20374d1d4070ba04732b4c4875762233e3b51afa..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r1/MNLI_crowdsource/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "anli", - "dataset_config_name": "dev_r1", - "template_name": "MNLI crowdsource", - "evaluation": { - "accuracy": 0.402 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r1/can_we_infer/results.json b/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r1/can_we_infer/results.json deleted file mode 100644 index 6a28787134fdaee4350e53df323e24eb4336c101..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r1/can_we_infer/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "anli", - "dataset_config_name": "dev_r1", - "template_name": "can we infer", - "evaluation": { - "accuracy": 0.401 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r1/guaranteed_possible_impossible/results.json b/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r1/guaranteed_possible_impossible/results.json deleted file mode 100644 index b12be3ab4bc1fe7034c070a3cd0a856abcdaff89..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r1/guaranteed_possible_impossible/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "anli", - "dataset_config_name": "dev_r1", - "template_name": "guaranteed/possible/impossible", - "evaluation": { - "accuracy": 0.314 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r1/justified_in_saying/results.json b/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r1/justified_in_saying/results.json deleted file mode 100644 index 6893460fc25c185b0f863c0771d50a331a924574..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r1/justified_in_saying/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "anli", - "dataset_config_name": "dev_r1", - "template_name": "justified in saying", - "evaluation": { - "accuracy": 0.387 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r2/GPT-3_style/results.json b/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r2/GPT-3_style/results.json deleted file mode 100644 index 222b602340107db392e64549acd17451eef3b73a..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r2/GPT-3_style/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "anli", - "dataset_config_name": "dev_r2", - "template_name": "GPT-3 style", - "evaluation": { - "accuracy": 0.383 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r2/MNLI_crowdsource/results.json b/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r2/MNLI_crowdsource/results.json deleted file mode 100644 index 5c7dd6e91ff9d27c32c5412ac78786cf7d2d7c02..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r2/MNLI_crowdsource/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "anli", - "dataset_config_name": "dev_r2", - "template_name": "MNLI crowdsource", - "evaluation": { - "accuracy": 0.374 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r2/can_we_infer/results.json b/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r2/can_we_infer/results.json deleted file mode 100644 index af61a800ad5c00e1df7087f25d7189bcd54ed72e..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r2/can_we_infer/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "anli", - "dataset_config_name": "dev_r2", - "template_name": "can we infer", - "evaluation": { - "accuracy": 0.394 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r2/guaranteed_possible_impossible/results.json b/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r2/guaranteed_possible_impossible/results.json deleted file mode 100644 index 05a872ed6fea16bda1aa0c8cdb6ab02e80e036ca..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r2/guaranteed_possible_impossible/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "anli", - "dataset_config_name": "dev_r2", - "template_name": "guaranteed/possible/impossible", - "evaluation": { - "accuracy": 0.302 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r2/justified_in_saying/results.json b/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r2/justified_in_saying/results.json deleted file mode 100644 index 2672da6c326b4a26b10b029420fc95e9947f5608..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r2/justified_in_saying/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "anli", - "dataset_config_name": "dev_r2", - "template_name": "justified in saying", - "evaluation": { - "accuracy": 0.376 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r3/GPT-3_style/results.json b/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r3/GPT-3_style/results.json deleted file mode 100644 index f151cc8d2e07b9e0b3ef1edf849dd69a8c748873..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r3/GPT-3_style/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "anli", - "dataset_config_name": "dev_r3", - "template_name": "GPT-3 style", - "evaluation": { - "accuracy": 0.42 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r3/MNLI_crowdsource/results.json b/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r3/MNLI_crowdsource/results.json deleted file mode 100644 index 82148467711e65c52e039e9346eaecb5d7f3ee84..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r3/MNLI_crowdsource/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "anli", - "dataset_config_name": "dev_r3", - "template_name": "MNLI crowdsource", - "evaluation": { - "accuracy": 0.4116666666666667 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r3/can_we_infer/results.json b/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r3/can_we_infer/results.json deleted file mode 100644 index 21fdd815d985648d60398c318c503b30dc209554..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r3/can_we_infer/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "anli", - "dataset_config_name": "dev_r3", - "template_name": "can we infer", - "evaluation": { - "accuracy": 0.38916666666666666 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r3/guaranteed_possible_impossible/results.json b/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r3/guaranteed_possible_impossible/results.json deleted file mode 100644 index 8c95797516b28c82b8d3598a8a5dda398e84e5f0..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r3/guaranteed_possible_impossible/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "anli", - "dataset_config_name": "dev_r3", - "template_name": "guaranteed/possible/impossible", - "evaluation": { - "accuracy": 0.2966666666666667 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r3/justified_in_saying/results.json b/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r3/justified_in_saying/results.json deleted file mode 100644 index ec7cea823be814a3a1c396d7e2e6121f6e6cb1ee..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/anli/dev_r3/justified_in_saying/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "anli", - "dataset_config_name": "dev_r3", - "template_name": "justified in saying", - "evaluation": { - "accuracy": 0.35833333333333334 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/merged.csv b/evaluation_bloommz-7b1/evaluation_l1/merged.csv deleted file mode 100644 index 7786678ad3e7adbcb02bec034f8fa4362a1bc340..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/merged.csv +++ /dev/null @@ -1,194 +0,0 @@ -dataset,prompt,metric,value -anli_dev_r1,GPT-3 style,accuracy,0.426 -anli_dev_r1,MNLI crowdsource,accuracy,0.402 -anli_dev_r1,can we infer,accuracy,0.401 -anli_dev_r1,guaranteed/possible/impossible,accuracy,0.314 -anli_dev_r1,justified in saying,accuracy,0.387 -anli_dev_r1,median,accuracy,0.401 -anli_dev_r2,GPT-3 style,accuracy,0.383 -anli_dev_r2,MNLI crowdsource,accuracy,0.374 -anli_dev_r2,can we infer,accuracy,0.394 -anli_dev_r2,guaranteed/possible/impossible,accuracy,0.302 -anli_dev_r2,justified in saying,accuracy,0.376 -anli_dev_r2,median,accuracy,0.376 -anli_dev_r3,GPT-3 style,accuracy,0.42 -anli_dev_r3,MNLI crowdsource,accuracy,0.4116666666666667 -anli_dev_r3,can we infer,accuracy,0.38916666666666666 -anli_dev_r3,guaranteed/possible/impossible,accuracy,0.2966666666666667 -anli_dev_r3,justified in saying,accuracy,0.35833333333333334 -anli_dev_r3,median,accuracy,0.38916666666666666 -story_cloze_2016,Answer Given options,accuracy,0.8524853019775521 -story_cloze_2016,Choose Story Ending,accuracy,0.8957776590058792 -story_cloze_2016,Generate Ending,accuracy,0.709246392303581 -story_cloze_2016,Novel Correct Ending,accuracy,0.8888295029396045 -story_cloze_2016,Story Continuation and Options,accuracy,0.8850881881346874 -story_cloze_2016,median,accuracy,0.8850881881346874 -super_glue_cb,GPT-3 style,accuracy,0.8392857142857143 -super_glue_cb,MNLI crowdsource,accuracy,0.35714285714285715 -super_glue_cb,can we infer,accuracy,0.7857142857142857 -super_glue_cb,guaranteed/possible/impossible,accuracy,0.5535714285714286 -super_glue_cb,justified in saying,accuracy,0.7142857142857143 -super_glue_cb,median,accuracy,0.7142857142857143 -super_glue_copa,"C1 or C2? premise, so/because…",accuracy,0.66 -super_glue_copa,best_option,accuracy,0.77 -super_glue_copa,cause_effect,accuracy,0.8 -super_glue_copa,i_am_hesitating,accuracy,0.81 -super_glue_copa,plausible_alternatives,accuracy,0.84 -super_glue_copa,median,accuracy,0.8 -super_glue_rte,GPT-3 style,accuracy,0.7906137184115524 -super_glue_rte,MNLI crowdsource,accuracy,0.8267148014440433 -super_glue_rte,does it follow that,accuracy,0.7942238267148014 -super_glue_rte,guaranteed true,accuracy,0.776173285198556 -super_glue_rte,should assume,accuracy,0.7617328519855595 -super_glue_rte,median,accuracy,0.7906137184115524 -winogrande_winogrande_xl,Replace,accuracy,0.5588003157063931 -winogrande_winogrande_xl,True or False,accuracy,0.5280189423835833 -winogrande_winogrande_xl,does underscore refer to,accuracy,0.5651144435674822 -winogrande_winogrande_xl,stand for,accuracy,0.5082872928176796 -winogrande_winogrande_xl,underscore refer to,accuracy,0.5651144435674822 -winogrande_winogrande_xl,median,accuracy,0.5588003157063931 -xcopa_id,"C1 or C2? premise, so/because…",accuracy,0.46 -xcopa_id,best_option,accuracy,0.7 -xcopa_id,cause_effect,accuracy,0.73 -xcopa_id,i_am_hesitating,accuracy,0.72 -xcopa_id,plausible_alternatives,accuracy,0.67 -xcopa_id,median,accuracy,0.7 -xcopa_sw,"C1 or C2? premise, so/because…",accuracy,0.6 -xcopa_sw,best_option,accuracy,0.55 -xcopa_sw,cause_effect,accuracy,0.54 -xcopa_sw,i_am_hesitating,accuracy,0.51 -xcopa_sw,plausible_alternatives,accuracy,0.52 -xcopa_sw,median,accuracy,0.54 -xcopa_ta,"C1 or C2? premise, so/because…",accuracy,0.59 -xcopa_ta,best_option,accuracy,0.56 -xcopa_ta,cause_effect,accuracy,0.6 -xcopa_ta,i_am_hesitating,accuracy,0.57 -xcopa_ta,plausible_alternatives,accuracy,0.62 -xcopa_ta,median,accuracy,0.59 -xcopa_vi,"C1 or C2? premise, so/because…",accuracy,0.53 -xcopa_vi,best_option,accuracy,0.72 -xcopa_vi,cause_effect,accuracy,0.72 -xcopa_vi,i_am_hesitating,accuracy,0.7 -xcopa_vi,plausible_alternatives,accuracy,0.71 -xcopa_vi,median,accuracy,0.71 -xcopa_zh,"C1 or C2? premise, so/because…",accuracy,0.67 -xcopa_zh,best_option,accuracy,0.7 -xcopa_zh,cause_effect,accuracy,0.8 -xcopa_zh,i_am_hesitating,accuracy,0.77 -xcopa_zh,plausible_alternatives,accuracy,0.79 -xcopa_zh,median,accuracy,0.77 -xnli_ar,GPT-3 style,accuracy,0.5558232931726907 -xnli_ar,MNLI crowdsource,accuracy,0.42128514056224897 -xnli_ar,can we infer,accuracy,0.5148594377510041 -xnli_ar,guaranteed/possible/impossible,accuracy,0.40562248995983935 -xnli_ar,justified in saying,accuracy,0.4927710843373494 -xnli_ar,median,accuracy,0.4927710843373494 -xnli_en,GPT-3 style,accuracy,0.5891566265060241 -xnli_en,MNLI crowdsource,accuracy,0.42610441767068274 -xnli_en,can we infer,accuracy,0.5662650602409639 -xnli_en,guaranteed/possible/impossible,accuracy,0.4614457831325301 -xnli_en,justified in saying,accuracy,0.5437751004016064 -xnli_en,median,accuracy,0.5437751004016064 -xnli_es,GPT-3 style,accuracy,0.5734939759036145 -xnli_es,MNLI crowdsource,accuracy,0.40923694779116465 -xnli_es,can we infer,accuracy,0.5148594377510041 -xnli_es,guaranteed/possible/impossible,accuracy,0.43132530120481927 -xnli_es,justified in saying,accuracy,0.4610441767068273 -xnli_es,median,accuracy,0.4610441767068273 -xnli_fr,GPT-3 style,accuracy,0.5666666666666667 -xnli_fr,MNLI crowdsource,accuracy,0.42208835341365464 -xnli_fr,can we infer,accuracy,0.5385542168674698 -xnli_fr,guaranteed/possible/impossible,accuracy,0.39076305220883534 -xnli_fr,justified in saying,accuracy,0.5100401606425703 -xnli_fr,median,accuracy,0.5100401606425703 -xnli_hi,GPT-3 style,accuracy,0.5345381526104418 -xnli_hi,MNLI crowdsource,accuracy,0.41124497991967873 -xnli_hi,can we infer,accuracy,0.4751004016064257 -xnli_hi,guaranteed/possible/impossible,accuracy,0.40923694779116465 -xnli_hi,justified in saying,accuracy,0.4469879518072289 -xnli_hi,median,accuracy,0.4469879518072289 -xnli_sw,GPT-3 style,accuracy,0.4827309236947791 -xnli_sw,MNLI crowdsource,accuracy,0.40562248995983935 -xnli_sw,can we infer,accuracy,0.44497991967871486 -xnli_sw,guaranteed/possible/impossible,accuracy,0.42289156626506025 -xnli_sw,justified in saying,accuracy,0.41124497991967873 -xnli_sw,median,accuracy,0.42289156626506025 -xnli_ur,GPT-3 style,accuracy,0.4947791164658635 -xnli_ur,MNLI crowdsource,accuracy,0.39759036144578314 -xnli_ur,can we infer,accuracy,0.4502008032128514 -xnli_ur,guaranteed/possible/impossible,accuracy,0.39036144578313253 -xnli_ur,justified in saying,accuracy,0.40843373493975904 -xnli_ur,median,accuracy,0.40843373493975904 -xnli_vi,GPT-3 style,accuracy,0.5449799196787148 -xnli_vi,MNLI crowdsource,accuracy,0.40401606425702813 -xnli_vi,can we infer,accuracy,0.5 -xnli_vi,guaranteed/possible/impossible,accuracy,0.44779116465863456 -xnli_vi,justified in saying,accuracy,0.4650602409638554 -xnli_vi,median,accuracy,0.4650602409638554 -xnli_zh,GPT-3 style,accuracy,0.5429718875502008 -xnli_zh,MNLI crowdsource,accuracy,0.3891566265060241 -xnli_zh,can we infer,accuracy,0.5032128514056224 -xnli_zh,guaranteed/possible/impossible,accuracy,0.38072289156626504 -xnli_zh,justified in saying,accuracy,0.4706827309236948 -xnli_zh,median,accuracy,0.4706827309236948 -xstory_cloze_ar,Answer Given options,accuracy,0.6896095301125083 -xstory_cloze_ar,Choose Story Ending,accuracy,0.8378557246856386 -xstory_cloze_ar,Generate Ending,accuracy,0.5956320317670417 -xstory_cloze_ar,Novel Correct Ending,accuracy,0.8213103904698875 -xstory_cloze_ar,Story Continuation and Options,accuracy,0.8219722038385175 -xstory_cloze_ar,median,accuracy,0.8213103904698875 -xstory_cloze_es,Answer Given options,accuracy,0.7683653209794837 -xstory_cloze_es,Choose Story Ending,accuracy,0.886168100595632 -xstory_cloze_es,Generate Ending,accuracy,0.6724023825281271 -xstory_cloze_es,Novel Correct Ending,accuracy,0.8676373262739907 -xstory_cloze_es,Story Continuation and Options,accuracy,0.8769027134348114 -xstory_cloze_es,median,accuracy,0.8676373262739907 -xstory_cloze_eu,Answer Given options,accuracy,0.6082064857710126 -xstory_cloze_eu,Choose Story Ending,accuracy,0.7266710787557908 -xstory_cloze_eu,Generate Ending,accuracy,0.5552614162806089 -xstory_cloze_eu,Novel Correct Ending,accuracy,0.700198544010589 -xstory_cloze_eu,Story Continuation and Options,accuracy,0.7107875579086698 -xstory_cloze_eu,median,accuracy,0.700198544010589 -xstory_cloze_hi,Answer Given options,accuracy,0.6366644606221046 -xstory_cloze_hi,Choose Story Ending,accuracy,0.7882197220383852 -xstory_cloze_hi,Generate Ending,accuracy,0.5982792852415619 -xstory_cloze_hi,Novel Correct Ending,accuracy,0.7485109199205824 -xstory_cloze_hi,Story Continuation and Options,accuracy,0.7683653209794837 -xstory_cloze_hi,median,accuracy,0.7485109199205824 -xstory_cloze_id,Answer Given options,accuracy,0.7385837193911317 -xstory_cloze_id,Choose Story Ending,accuracy,0.8332230311052283 -xstory_cloze_id,Generate Ending,accuracy,0.6293845135671741 -xstory_cloze_id,Novel Correct Ending,accuracy,0.7816015883520847 -xstory_cloze_id,Story Continuation and Options,accuracy,0.8226340172071476 -xstory_cloze_id,median,accuracy,0.7816015883520847 -xstory_cloze_zh,Answer Given options,accuracy,0.7498345466578424 -xstory_cloze_zh,Choose Story Ending,accuracy,0.8583719391131701 -xstory_cloze_zh,Generate Ending,accuracy,0.6227663798808736 -xstory_cloze_zh,Novel Correct Ending,accuracy,0.8405029781601588 -xstory_cloze_zh,Story Continuation and Options,accuracy,0.8385175380542687 -xstory_cloze_zh,median,accuracy,0.8385175380542687 -xwinograd_en,Replace,accuracy,0.6576344086021505 -xwinograd_en,True or False,accuracy,0.5187096774193548 -xwinograd_en,does underscore refer to,accuracy,0.5931182795698925 -xwinograd_en,stand for,accuracy,0.5070967741935484 -xwinograd_en,underscore refer to,accuracy,0.6210752688172043 -xwinograd_en,median,accuracy,0.5931182795698925 -xwinograd_fr,Replace,accuracy,0.5180722891566265 -xwinograd_fr,True or False,accuracy,0.5301204819277109 -xwinograd_fr,does underscore refer to,accuracy,0.5542168674698795 -xwinograd_fr,stand for,accuracy,0.5180722891566265 -xwinograd_fr,underscore refer to,accuracy,0.5421686746987951 -xwinograd_fr,median,accuracy,0.5301204819277109 -xwinograd_pt,Replace,accuracy,0.5741444866920152 -xwinograd_pt,True or False,accuracy,0.4790874524714829 -xwinograd_pt,does underscore refer to,accuracy,0.55893536121673 -xwinograd_pt,stand for,accuracy,0.5209125475285171 -xwinograd_pt,underscore refer to,accuracy,0.5437262357414449 -xwinograd_pt,median,accuracy,0.5437262357414449 -xwinograd_zh,Replace,accuracy,0.626984126984127 -xwinograd_zh,True or False,accuracy,0.503968253968254 -xwinograd_zh,does underscore refer to,accuracy,0.5436507936507936 -xwinograd_zh,stand for,accuracy,0.49007936507936506 -xwinograd_zh,underscore refer to,accuracy,0.5535714285714286 -xwinograd_zh,median,accuracy,0.5436507936507936 -multiple,average,multiple,0.6067197952551315 diff --git a/evaluation_bloommz-7b1/evaluation_l1/merged.json b/evaluation_bloommz-7b1/evaluation_l1/merged.json deleted file mode 100644 index c58141c131fb8e3ab2c2f7a6c9c1f0a021862123..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/merged.json +++ /dev/null @@ -1 +0,0 @@ -{"Muennighoff/xstory_cloze_ar": {"Answer Given options": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6896095301125083}, "template_name": "Answer Given options"}, "Choose Story Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8378557246856386}, "template_name": "Choose Story Ending"}, "Generate Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.5956320317670417}, "template_name": "Generate Ending"}, "Novel Correct Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8213103904698875}, "template_name": "Novel Correct Ending"}, "Story Continuation and Options": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8219722038385175}, "template_name": "Story Continuation and Options"}}, "Muennighoff/xstory_cloze_es": {"Answer Given options": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7683653209794837}, "template_name": "Answer Given options"}, "Choose Story Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.886168100595632}, "template_name": "Choose Story Ending"}, "Generate Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6724023825281271}, "template_name": "Generate Ending"}, "Novel Correct Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8676373262739907}, "template_name": "Novel Correct Ending"}, "Story Continuation and Options": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8769027134348114}, "template_name": "Story Continuation and Options"}}, "Muennighoff/xstory_cloze_eu": {"Answer Given options": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6082064857710126}, "template_name": "Answer Given options"}, "Choose Story Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7266710787557908}, "template_name": "Choose Story Ending"}, "Generate Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.5552614162806089}, "template_name": "Generate Ending"}, "Novel Correct Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.700198544010589}, "template_name": "Novel Correct Ending"}, "Story Continuation and Options": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7107875579086698}, "template_name": "Story Continuation and Options"}}, "Muennighoff/xstory_cloze_hi": {"Answer Given options": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6366644606221046}, "template_name": "Answer Given options"}, "Choose Story Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7882197220383852}, "template_name": "Choose Story Ending"}, "Generate Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.5982792852415619}, "template_name": "Generate Ending"}, "Novel Correct Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7485109199205824}, "template_name": "Novel Correct Ending"}, "Story Continuation and Options": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7683653209794837}, "template_name": "Story Continuation and Options"}}, "Muennighoff/xstory_cloze_id": {"Answer Given options": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7385837193911317}, "template_name": "Answer Given options"}, "Choose Story Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8332230311052283}, "template_name": "Choose Story Ending"}, "Generate Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6293845135671741}, "template_name": "Generate Ending"}, "Novel Correct Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7816015883520847}, "template_name": "Novel Correct Ending"}, "Story Continuation and Options": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8226340172071476}, "template_name": "Story Continuation and Options"}}, "Muennighoff/xstory_cloze_zh": {"Answer Given options": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7498345466578424}, "template_name": "Answer Given options"}, "Choose Story Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8583719391131701}, "template_name": "Choose Story Ending"}, "Generate Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6227663798808736}, "template_name": "Generate Ending"}, "Novel Correct Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8405029781601588}, "template_name": "Novel Correct Ending"}, "Story Continuation and Options": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8385175380542687}, "template_name": "Story Continuation and Options"}}, "Muennighoff/xwinograd_en": {"Replace": {"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "en", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6576344086021505}, "template_name": "Replace"}, "True or False": {"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "en", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5187096774193548}, "template_name": "True or False"}, "does underscore refer to": {"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "en", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5931182795698925}, "template_name": "does underscore refer to"}, "stand for": {"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "en", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5070967741935484}, "template_name": "stand for"}, "underscore refer to": {"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "en", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6210752688172043}, "template_name": "underscore refer to"}}, "Muennighoff/xwinograd_fr": {"Replace": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5180722891566265}, "template_name": "Replace"}, "True or False": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5301204819277109}, "template_name": "True or False"}, "does underscore refer to": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5542168674698795}, "template_name": "does underscore refer to"}, "stand for": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5180722891566265}, "template_name": "stand for"}, "underscore refer to": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5421686746987951}, "template_name": "underscore refer to"}}, "Muennighoff/xwinograd_pt": {"Replace": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5741444866920152}, "template_name": "Replace"}, "True or False": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.4790874524714829}, "template_name": "True or False"}, "does underscore refer to": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.55893536121673}, "template_name": "does underscore refer to"}, "stand for": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5209125475285171}, "template_name": "stand for"}, "underscore refer to": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5437262357414449}, "template_name": "underscore refer to"}}, "Muennighoff/xwinograd_zh": {"Replace": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.626984126984127}, "template_name": "Replace"}, "True or False": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.503968253968254}, "template_name": "True or False"}, "does underscore refer to": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5436507936507936}, "template_name": "does underscore refer to"}, "stand for": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.49007936507936506}, "template_name": "stand for"}, "underscore refer to": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5535714285714286}, "template_name": "underscore refer to"}}, "anli_dev_r1": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r1", "dataset_name": "anli", "evaluation": {"accuracy": 0.426}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r1", "dataset_name": "anli", "evaluation": {"accuracy": 0.402}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r1", "dataset_name": "anli", "evaluation": {"accuracy": 0.401}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r1", "dataset_name": "anli", "evaluation": {"accuracy": 0.314}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r1", "dataset_name": "anli", "evaluation": {"accuracy": 0.387}, "template_name": "justified in saying"}}, "anli_dev_r2": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r2", "dataset_name": "anli", "evaluation": {"accuracy": 0.383}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r2", "dataset_name": "anli", "evaluation": {"accuracy": 0.374}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r2", "dataset_name": "anli", "evaluation": {"accuracy": 0.394}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r2", "dataset_name": "anli", "evaluation": {"accuracy": 0.302}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r2", "dataset_name": "anli", "evaluation": {"accuracy": 0.376}, "template_name": "justified in saying"}}, "anli_dev_r3": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r3", "dataset_name": "anli", "evaluation": {"accuracy": 0.42}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r3", "dataset_name": "anli", "evaluation": {"accuracy": 0.4116666666666667}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r3", "dataset_name": "anli", "evaluation": {"accuracy": 0.38916666666666666}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r3", "dataset_name": "anli", "evaluation": {"accuracy": 0.2966666666666667}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r3", "dataset_name": "anli", "evaluation": {"accuracy": 0.35833333333333334}, "template_name": "justified in saying"}}, "story_cloze_2016": {"Answer Given options": {"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "2016", "dataset_name": "story_cloze", "evaluation": {"accuracy": 0.8524853019775521}, "template_name": "Answer Given options"}, "Choose Story Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "2016", "dataset_name": "story_cloze", "evaluation": {"accuracy": 0.8957776590058792}, "template_name": "Choose Story Ending"}, "Generate Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "2016", "dataset_name": "story_cloze", "evaluation": {"accuracy": 0.709246392303581}, "template_name": "Generate Ending"}, "Novel Correct Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "2016", "dataset_name": "story_cloze", "evaluation": {"accuracy": 0.8888295029396045}, "template_name": "Novel Correct Ending"}, "Story Continuation and Options": {"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "2016", "dataset_name": "story_cloze", "evaluation": {"accuracy": 0.8850881881346874}, "template_name": "Story Continuation and Options"}}, "super_glue_cb": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "cb", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.8392857142857143}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "cb", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.35714285714285715}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "cb", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.7857142857142857}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "cb", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.5535714285714286}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "cb", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.7142857142857143}, "template_name": "justified in saying"}}, "super_glue_copa": {"C1 or C2? premise, so/because\u2026": {"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name=None, template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "copa", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.66}, "template_name": "C1 or C2? premise, so/because\u2026"}, "best_option": {"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "copa", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.77}, "template_name": "best_option"}, "cause_effect": {"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "copa", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.8}, "template_name": "cause_effect"}, "i_am_hesitating": {"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "copa", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.81}, "template_name": "i_am_hesitating"}, "plausible_alternatives": {"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "copa", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.84}, "template_name": "plausible_alternatives"}}, "super_glue_rte": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "rte", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.7906137184115524}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "rte", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.8267148014440433}, "template_name": "MNLI crowdsource"}, "does it follow that": {"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='does it follow that', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "rte", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.7942238267148014}, "template_name": "does it follow that"}, "guaranteed true": {"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='guaranteed true', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "rte", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.776173285198556}, "template_name": "guaranteed true"}, "should assume": {"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='should assume', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "rte", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.7617328519855595}, "template_name": "should assume"}}, "winogrande_winogrande_xl": {"Replace": {"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "winogrande_xl", "dataset_name": "winogrande", "evaluation": {"accuracy": 0.5588003157063931}, "template_name": "Replace"}, "True or False": {"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "winogrande_xl", "dataset_name": "winogrande", "evaluation": {"accuracy": 0.5280189423835833}, "template_name": "True or False"}, "does underscore refer to": {"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "winogrande_xl", "dataset_name": "winogrande", "evaluation": {"accuracy": 0.5651144435674822}, "template_name": "does underscore refer to"}, "stand for": {"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "winogrande_xl", "dataset_name": "winogrande", "evaluation": {"accuracy": 0.5082872928176796}, "template_name": "stand for"}, "underscore refer to": {"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "winogrande_xl", "dataset_name": "winogrande", "evaluation": {"accuracy": 0.5651144435674822}, "template_name": "underscore refer to"}}, "xcopa_id": {"C1 or C2? premise, so/because\u2026": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.46}, "template_name": "C1 or C2? premise, so/because\u2026"}, "best_option": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.7}, "template_name": "best_option"}, "cause_effect": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.73}, "template_name": "cause_effect"}, "i_am_hesitating": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.72}, "template_name": "i_am_hesitating"}, "plausible_alternatives": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.67}, "template_name": "plausible_alternatives"}}, "xcopa_sw": {"C1 or C2? premise, so/because\u2026": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.6}, "template_name": "C1 or C2? premise, so/because\u2026"}, "best_option": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.55}, "template_name": "best_option"}, "cause_effect": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.54}, "template_name": "cause_effect"}, "i_am_hesitating": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.51}, "template_name": "i_am_hesitating"}, "plausible_alternatives": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.52}, "template_name": "plausible_alternatives"}}, "xcopa_ta": {"C1 or C2? premise, so/because\u2026": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.59}, "template_name": "C1 or C2? premise, so/because\u2026"}, "best_option": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.56}, "template_name": "best_option"}, "cause_effect": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.6}, "template_name": "cause_effect"}, "i_am_hesitating": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.57}, "template_name": "i_am_hesitating"}, "plausible_alternatives": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.62}, "template_name": "plausible_alternatives"}}, "xcopa_vi": {"C1 or C2? premise, so/because\u2026": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.53}, "template_name": "C1 or C2? premise, so/because\u2026"}, "best_option": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.72}, "template_name": "best_option"}, "cause_effect": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.72}, "template_name": "cause_effect"}, "i_am_hesitating": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.7}, "template_name": "i_am_hesitating"}, "plausible_alternatives": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.71}, "template_name": "plausible_alternatives"}}, "xcopa_zh": {"C1 or C2? premise, so/because\u2026": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.67}, "template_name": "C1 or C2? premise, so/because\u2026"}, "best_option": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.7}, "template_name": "best_option"}, "cause_effect": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.8}, "template_name": "cause_effect"}, "i_am_hesitating": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.77}, "template_name": "i_am_hesitating"}, "plausible_alternatives": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.79}, "template_name": "plausible_alternatives"}}, "xnli_ar": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5558232931726907}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.42128514056224897}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5148594377510041}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.40562248995983935}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4927710843373494}, "template_name": "justified in saying"}}, "xnli_en": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "en", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5891566265060241}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "en", "dataset_name": "xnli", "evaluation": {"accuracy": 0.42610441767068274}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "en", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5662650602409639}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "en", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4614457831325301}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "en", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5437751004016064}, "template_name": "justified in saying"}}, "xnli_es": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5734939759036145}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.40923694779116465}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5148594377510041}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.43132530120481927}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4610441767068273}, "template_name": "justified in saying"}}, "xnli_fr": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5666666666666667}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.42208835341365464}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5385542168674698}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.39076305220883534}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5100401606425703}, "template_name": "justified in saying"}}, "xnli_hi": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5345381526104418}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.41124497991967873}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4751004016064257}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.40923694779116465}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4469879518072289}, "template_name": "justified in saying"}}, "xnli_sw": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4827309236947791}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.40562248995983935}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.44497991967871486}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.42289156626506025}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.41124497991967873}, "template_name": "justified in saying"}}, "xnli_ur": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4947791164658635}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.39759036144578314}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4502008032128514}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.39036144578313253}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.40843373493975904}, "template_name": "justified in saying"}}, "xnli_vi": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5449799196787148}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.40401606425702813}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.44779116465863456}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4650602409638554}, "template_name": "justified in saying"}}, "xnli_zh": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5429718875502008}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3891566265060241}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5032128514056224}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.38072289156626504}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4706827309236948}, "template_name": "justified in saying"}}} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/story_cloze/2016/Answer_Given_options/results.json b/evaluation_bloommz-7b1/evaluation_l1/story_cloze/2016/Answer_Given_options/results.json deleted file mode 100644 index bc12fbe77a80a053c5eed4af6d4f94b14a58fac8..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/story_cloze/2016/Answer_Given_options/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "story_cloze", - "dataset_config_name": "2016", - "template_name": "Answer Given options", - "evaluation": { - "accuracy": 0.8524853019775521 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/story_cloze/2016/Choose_Story_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/story_cloze/2016/Choose_Story_Ending/results.json deleted file mode 100644 index 2a0035cde5af1d05aa6218652e04dd7af635558e..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/story_cloze/2016/Choose_Story_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "story_cloze", - "dataset_config_name": "2016", - "template_name": "Choose Story Ending", - "evaluation": { - "accuracy": 0.8957776590058792 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/story_cloze/2016/Generate_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/story_cloze/2016/Generate_Ending/results.json deleted file mode 100644 index e806bc7ee1a263558fc203b0ac5385ec77a68abe..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/story_cloze/2016/Generate_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "story_cloze", - "dataset_config_name": "2016", - "template_name": "Generate Ending", - "evaluation": { - "accuracy": 0.709246392303581 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/story_cloze/2016/Novel_Correct_Ending/results.json b/evaluation_bloommz-7b1/evaluation_l1/story_cloze/2016/Novel_Correct_Ending/results.json deleted file mode 100644 index ad80f193b34315890ca3d493f219209e25720bf0..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/story_cloze/2016/Novel_Correct_Ending/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "story_cloze", - "dataset_config_name": "2016", - "template_name": "Novel Correct Ending", - "evaluation": { - "accuracy": 0.8888295029396045 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/story_cloze/2016/Story_Continuation_and_Options/results.json b/evaluation_bloommz-7b1/evaluation_l1/story_cloze/2016/Story_Continuation_and_Options/results.json deleted file mode 100644 index 6b10c247f5761114c564f5bf9b8ca6151706c5d7..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/story_cloze/2016/Story_Continuation_and_Options/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "story_cloze", - "dataset_config_name": "2016", - "template_name": "Story Continuation and Options", - "evaluation": { - "accuracy": 0.8850881881346874 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/super_glue/cb/GPT-3_style/results.json b/evaluation_bloommz-7b1/evaluation_l1/super_glue/cb/GPT-3_style/results.json deleted file mode 100644 index e88caf3c7b226a207a9912791ce8586649b6a2e8..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/super_glue/cb/GPT-3_style/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "super_glue", - "dataset_config_name": "cb", - "template_name": "GPT-3 style", - "evaluation": { - "accuracy": 0.8392857142857143 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/super_glue/cb/MNLI_crowdsource/results.json b/evaluation_bloommz-7b1/evaluation_l1/super_glue/cb/MNLI_crowdsource/results.json deleted file mode 100644 index 019653782e1cd06049e5b706b25af7d186617d43..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/super_glue/cb/MNLI_crowdsource/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "super_glue", - "dataset_config_name": "cb", - "template_name": "MNLI crowdsource", - "evaluation": { - "accuracy": 0.35714285714285715 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/super_glue/cb/can_we_infer/results.json b/evaluation_bloommz-7b1/evaluation_l1/super_glue/cb/can_we_infer/results.json deleted file mode 100644 index 5b4601cfe5ea3cb7ab2c7fb549deb030b057a8ea..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/super_glue/cb/can_we_infer/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "super_glue", - "dataset_config_name": "cb", - "template_name": "can we infer", - "evaluation": { - "accuracy": 0.7857142857142857 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/super_glue/cb/guaranteed_possible_impossible/results.json b/evaluation_bloommz-7b1/evaluation_l1/super_glue/cb/guaranteed_possible_impossible/results.json deleted file mode 100644 index 3c083f8ad9fc8581c84080c566637c1ef6a574b3..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/super_glue/cb/guaranteed_possible_impossible/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "super_glue", - "dataset_config_name": "cb", - "template_name": "guaranteed/possible/impossible", - "evaluation": { - "accuracy": 0.5535714285714286 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/super_glue/cb/justified_in_saying/results.json b/evaluation_bloommz-7b1/evaluation_l1/super_glue/cb/justified_in_saying/results.json deleted file mode 100644 index 7260dd8971cd4a67d651fa81a27919f3f1e5fe07..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/super_glue/cb/justified_in_saying/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "super_glue", - "dataset_config_name": "cb", - "template_name": "justified in saying", - "evaluation": { - "accuracy": 0.7142857142857143 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/super_glue/copa/C1_or_C2?_premise/results.json b/evaluation_bloommz-7b1/evaluation_l1/super_glue/copa/C1_or_C2?_premise/results.json deleted file mode 100644 index 4c3b5850c841659be2f1079fa7b5c632e04c5eaa..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/super_glue/copa/C1_or_C2?_premise/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "super_glue", - "dataset_config_name": "copa", - "template_name": "C1 or C2? premise, so/because\u2026", - "evaluation": { - "accuracy": 0.66 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name=None, template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/super_glue/copa/best_option/results.json b/evaluation_bloommz-7b1/evaluation_l1/super_glue/copa/best_option/results.json deleted file mode 100644 index cbba802897e2718d94eaca1ecd34843a2ddcd1d5..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/super_glue/copa/best_option/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "super_glue", - "dataset_config_name": "copa", - "template_name": "best_option", - "evaluation": { - "accuracy": 0.77 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/super_glue/copa/cause_effect/results.json b/evaluation_bloommz-7b1/evaluation_l1/super_glue/copa/cause_effect/results.json deleted file mode 100644 index 9430d172ee9dcd88e334286ec0fc4121ec0114d9..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/super_glue/copa/cause_effect/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "super_glue", - "dataset_config_name": "copa", - "template_name": "cause_effect", - "evaluation": { - "accuracy": 0.8 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/super_glue/copa/i_am_hesitating/results.json b/evaluation_bloommz-7b1/evaluation_l1/super_glue/copa/i_am_hesitating/results.json deleted file mode 100644 index 6f89a66124cbcc5a49ef97ffe0e73dcc262f2b2e..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/super_glue/copa/i_am_hesitating/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "super_glue", - "dataset_config_name": "copa", - "template_name": "i_am_hesitating", - "evaluation": { - "accuracy": 0.81 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/super_glue/copa/plausible_alternatives/results.json b/evaluation_bloommz-7b1/evaluation_l1/super_glue/copa/plausible_alternatives/results.json deleted file mode 100644 index a5aad2abd3c3fe329d7167231d1f0d7a00bab759..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/super_glue/copa/plausible_alternatives/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "super_glue", - "dataset_config_name": "copa", - "template_name": "plausible_alternatives", - "evaluation": { - "accuracy": 0.84 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/super_glue/rte/GPT-3_style/results.json b/evaluation_bloommz-7b1/evaluation_l1/super_glue/rte/GPT-3_style/results.json deleted file mode 100644 index e6f0f21d99a6515d49923aa44a3555649a5a1c70..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/super_glue/rte/GPT-3_style/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "super_glue", - "dataset_config_name": "rte", - "template_name": "GPT-3 style", - "evaluation": { - "accuracy": 0.7906137184115524 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/super_glue/rte/MNLI_crowdsource/results.json b/evaluation_bloommz-7b1/evaluation_l1/super_glue/rte/MNLI_crowdsource/results.json deleted file mode 100644 index 19415d9be79a38f6043de7d04791de5cc4bd6b1b..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/super_glue/rte/MNLI_crowdsource/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "super_glue", - "dataset_config_name": "rte", - "template_name": "MNLI crowdsource", - "evaluation": { - "accuracy": 0.8267148014440433 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/super_glue/rte/does_it_follow_that/results.json b/evaluation_bloommz-7b1/evaluation_l1/super_glue/rte/does_it_follow_that/results.json deleted file mode 100644 index d372e9fb43eba25aac1c7aebf4a8174f2ec79495..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/super_glue/rte/does_it_follow_that/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "super_glue", - "dataset_config_name": "rte", - "template_name": "does it follow that", - "evaluation": { - "accuracy": 0.7942238267148014 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='does it follow that', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/super_glue/rte/guaranteed_true/results.json b/evaluation_bloommz-7b1/evaluation_l1/super_glue/rte/guaranteed_true/results.json deleted file mode 100644 index 644170197819bd48259a1cf22f12340c27cb66e4..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/super_glue/rte/guaranteed_true/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "super_glue", - "dataset_config_name": "rte", - "template_name": "guaranteed true", - "evaluation": { - "accuracy": 0.776173285198556 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='guaranteed true', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/super_glue/rte/should_assume/results.json b/evaluation_bloommz-7b1/evaluation_l1/super_glue/rte/should_assume/results.json deleted file mode 100644 index af31cd812073aff28baff4f147a77e4f11c96ae6..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/super_glue/rte/should_assume/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "super_glue", - "dataset_config_name": "rte", - "template_name": "should assume", - "evaluation": { - "accuracy": 0.7617328519855595 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='should assume', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/winogrande/winogrande_xl/Replace/results.json b/evaluation_bloommz-7b1/evaluation_l1/winogrande/winogrande_xl/Replace/results.json deleted file mode 100644 index d49b83bf5e085e24d6dc105eaba9f0f021af4049..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/winogrande/winogrande_xl/Replace/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "winogrande", - "dataset_config_name": "winogrande_xl", - "template_name": "Replace", - "evaluation": { - "accuracy": 0.5588003157063931 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/winogrande/winogrande_xl/True_or_False/results.json b/evaluation_bloommz-7b1/evaluation_l1/winogrande/winogrande_xl/True_or_False/results.json deleted file mode 100644 index 64986025f0e3d5fee2f07385485137e563059134..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/winogrande/winogrande_xl/True_or_False/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "winogrande", - "dataset_config_name": "winogrande_xl", - "template_name": "True or False", - "evaluation": { - "accuracy": 0.5280189423835833 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/winogrande/winogrande_xl/does_underscore_refer_to/results.json b/evaluation_bloommz-7b1/evaluation_l1/winogrande/winogrande_xl/does_underscore_refer_to/results.json deleted file mode 100644 index df5c11afcdc5dba943fc3d5ac0d5a0eb0da5442b..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/winogrande/winogrande_xl/does_underscore_refer_to/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "winogrande", - "dataset_config_name": "winogrande_xl", - "template_name": "does underscore refer to", - "evaluation": { - "accuracy": 0.5651144435674822 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/winogrande/winogrande_xl/stand_for/results.json b/evaluation_bloommz-7b1/evaluation_l1/winogrande/winogrande_xl/stand_for/results.json deleted file mode 100644 index 647e2857c0218f770c23fff801cace541e8fdf83..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/winogrande/winogrande_xl/stand_for/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "winogrande", - "dataset_config_name": "winogrande_xl", - "template_name": "stand for", - "evaluation": { - "accuracy": 0.5082872928176796 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/winogrande/winogrande_xl/underscore_refer_to/results.json b/evaluation_bloommz-7b1/evaluation_l1/winogrande/winogrande_xl/underscore_refer_to/results.json deleted file mode 100644 index 826e581f8690184e6cc5502e619730ee5927a9f5..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/winogrande/winogrande_xl/underscore_refer_to/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "winogrande", - "dataset_config_name": "winogrande_xl", - "template_name": "underscore refer to", - "evaluation": { - "accuracy": 0.5651144435674822 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/id/C1_or_C2?_premise/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/id/C1_or_C2?_premise/results.json deleted file mode 100644 index 293fdb26f92984e0268401b1f1a14b028be2162f..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/id/C1_or_C2?_premise/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "id", - "template_name": "C1 or C2? premise, so/because\u2026", - "evaluation": { - "accuracy": 0.46 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/id/best_option/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/id/best_option/results.json deleted file mode 100644 index 376f92d4d78eeb841b35b92e71a0a764063c2184..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/id/best_option/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "id", - "template_name": "best_option", - "evaluation": { - "accuracy": 0.7 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/id/cause_effect/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/id/cause_effect/results.json deleted file mode 100644 index eb6e7d90f444d9b8c2b91f303b89e1fcd30e9022..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/id/cause_effect/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "id", - "template_name": "cause_effect", - "evaluation": { - "accuracy": 0.73 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/id/i_am_hesitating/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/id/i_am_hesitating/results.json deleted file mode 100644 index 3d8d8315697a9d8b396bdcf751d5a1125676ff60..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/id/i_am_hesitating/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "id", - "template_name": "i_am_hesitating", - "evaluation": { - "accuracy": 0.72 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/id/plausible_alternatives/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/id/plausible_alternatives/results.json deleted file mode 100644 index 32f9c69ce80a887a576c10e9ff705584cbc7c06b..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/id/plausible_alternatives/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "id", - "template_name": "plausible_alternatives", - "evaluation": { - "accuracy": 0.67 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/sw/C1_or_C2?_premise/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/sw/C1_or_C2?_premise/results.json deleted file mode 100644 index 96f7c8b279f70afb01baa8eef9e50ca71882b6cc..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/sw/C1_or_C2?_premise/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "sw", - "template_name": "C1 or C2? premise, so/because\u2026", - "evaluation": { - "accuracy": 0.6 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/sw/best_option/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/sw/best_option/results.json deleted file mode 100644 index 5111b0ad2d757cbdce85a1168f2fbe34d3c5e38d..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/sw/best_option/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "sw", - "template_name": "best_option", - "evaluation": { - "accuracy": 0.55 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/sw/cause_effect/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/sw/cause_effect/results.json deleted file mode 100644 index 0ad7e70a308353cf9d37764620d23de296526cc2..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/sw/cause_effect/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "sw", - "template_name": "cause_effect", - "evaluation": { - "accuracy": 0.54 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/sw/i_am_hesitating/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/sw/i_am_hesitating/results.json deleted file mode 100644 index 3d49f6c7b070a1968668977bf88c48e43f1323f7..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/sw/i_am_hesitating/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "sw", - "template_name": "i_am_hesitating", - "evaluation": { - "accuracy": 0.51 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/sw/plausible_alternatives/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/sw/plausible_alternatives/results.json deleted file mode 100644 index b536b7058230d3d08e87e8f6b91aeb84a91dbbf3..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/sw/plausible_alternatives/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "sw", - "template_name": "plausible_alternatives", - "evaluation": { - "accuracy": 0.52 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/ta/C1_or_C2?_premise/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/ta/C1_or_C2?_premise/results.json deleted file mode 100644 index 37b0615e662498d1b2d51ccf76f87ed84baf4d4d..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/ta/C1_or_C2?_premise/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "ta", - "template_name": "C1 or C2? premise, so/because\u2026", - "evaluation": { - "accuracy": 0.59 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/ta/best_option/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/ta/best_option/results.json deleted file mode 100644 index ad0d3fb1e2334d8f5a58ca45fdbc52234a155c5a..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/ta/best_option/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "ta", - "template_name": "best_option", - "evaluation": { - "accuracy": 0.56 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/ta/cause_effect/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/ta/cause_effect/results.json deleted file mode 100644 index d0767df2f080cde3ed0b36bb73af7a735d30e1e1..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/ta/cause_effect/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "ta", - "template_name": "cause_effect", - "evaluation": { - "accuracy": 0.6 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/ta/i_am_hesitating/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/ta/i_am_hesitating/results.json deleted file mode 100644 index 11ad208825e71b0e4b8c55821fb1c06bb0d2b8f8..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/ta/i_am_hesitating/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "ta", - "template_name": "i_am_hesitating", - "evaluation": { - "accuracy": 0.57 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/ta/plausible_alternatives/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/ta/plausible_alternatives/results.json deleted file mode 100644 index 650cd2908644df0288ff02176a27ba6ccdd8430d..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/ta/plausible_alternatives/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "ta", - "template_name": "plausible_alternatives", - "evaluation": { - "accuracy": 0.62 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/vi/C1_or_C2?_premise/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/vi/C1_or_C2?_premise/results.json deleted file mode 100644 index 4822d0163e81a6188e592916c789a7edf82fc895..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/vi/C1_or_C2?_premise/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "vi", - "template_name": "C1 or C2? premise, so/because\u2026", - "evaluation": { - "accuracy": 0.53 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/vi/best_option/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/vi/best_option/results.json deleted file mode 100644 index 205c5e683fbb3fc4634f9fbfd28097c65d60620b..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/vi/best_option/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "vi", - "template_name": "best_option", - "evaluation": { - "accuracy": 0.72 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/vi/cause_effect/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/vi/cause_effect/results.json deleted file mode 100644 index b8675fb5e6b88fd2daee764a82897ff98fc80d33..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/vi/cause_effect/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "vi", - "template_name": "cause_effect", - "evaluation": { - "accuracy": 0.72 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/vi/i_am_hesitating/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/vi/i_am_hesitating/results.json deleted file mode 100644 index 7a1e8a30a979e9b007efbf5ea69c4308305c6979..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/vi/i_am_hesitating/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "vi", - "template_name": "i_am_hesitating", - "evaluation": { - "accuracy": 0.7 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/vi/plausible_alternatives/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/vi/plausible_alternatives/results.json deleted file mode 100644 index a754e5ef467a38023545605ed176de98e200240c..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/vi/plausible_alternatives/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "vi", - "template_name": "plausible_alternatives", - "evaluation": { - "accuracy": 0.71 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/zh/C1_or_C2?_premise/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/zh/C1_or_C2?_premise/results.json deleted file mode 100644 index 8785356a62493902dc90f963cf0c4e512544d145..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/zh/C1_or_C2?_premise/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "zh", - "template_name": "C1 or C2? premise, so/because\u2026", - "evaluation": { - "accuracy": 0.67 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/zh/best_option/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/zh/best_option/results.json deleted file mode 100644 index 66cc8b6be554597540a596a1e28b9e7b0322a4f6..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/zh/best_option/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "zh", - "template_name": "best_option", - "evaluation": { - "accuracy": 0.7 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/zh/cause_effect/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/zh/cause_effect/results.json deleted file mode 100644 index 6d6d9852b695c1d3c8725ca07fe44727f9072ce6..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/zh/cause_effect/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "zh", - "template_name": "cause_effect", - "evaluation": { - "accuracy": 0.8 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/zh/i_am_hesitating/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/zh/i_am_hesitating/results.json deleted file mode 100644 index fdd91c8fe51bf2cd88cecc831c3e10fb23956bb9..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/zh/i_am_hesitating/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "zh", - "template_name": "i_am_hesitating", - "evaluation": { - "accuracy": 0.77 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xcopa/zh/plausible_alternatives/results.json b/evaluation_bloommz-7b1/evaluation_l1/xcopa/zh/plausible_alternatives/results.json deleted file mode 100644 index 2c703eb186efceb2023af78cdd7bb346f3c129c0..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xcopa/zh/plausible_alternatives/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "zh", - "template_name": "plausible_alternatives", - "evaluation": { - "accuracy": 0.79 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/ar/GPT-3_style/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/ar/GPT-3_style/results.json deleted file mode 100644 index 546639690a91af2f7657d5e3d11086607d70d92e..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/ar/GPT-3_style/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ar", - "template_name": "GPT-3 style", - "evaluation": { - "accuracy": 0.5558232931726907 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/ar/MNLI_crowdsource/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/ar/MNLI_crowdsource/results.json deleted file mode 100644 index c5ca5a5de45223fdf04ffb89b42bd5210a6b05f4..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/ar/MNLI_crowdsource/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ar", - "template_name": "MNLI crowdsource", - "evaluation": { - "accuracy": 0.42128514056224897 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/ar/can_we_infer/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/ar/can_we_infer/results.json deleted file mode 100644 index b4e668e7b7ac4839f822b30191a815ac8c574928..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/ar/can_we_infer/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ar", - "template_name": "can we infer", - "evaluation": { - "accuracy": 0.5148594377510041 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/ar/guaranteed_possible_impossible/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/ar/guaranteed_possible_impossible/results.json deleted file mode 100644 index e912b2ece40d36999fd229f24fc8ef3b329cc1a6..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/ar/guaranteed_possible_impossible/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ar", - "template_name": "guaranteed/possible/impossible", - "evaluation": { - "accuracy": 0.40562248995983935 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/ar/justified_in_saying/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/ar/justified_in_saying/results.json deleted file mode 100644 index fefe3744fd1e94bc92a8643385c9ca4150c0ddf6..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/ar/justified_in_saying/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ar", - "template_name": "justified in saying", - "evaluation": { - "accuracy": 0.4927710843373494 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/en/GPT-3_style/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/en/GPT-3_style/results.json deleted file mode 100644 index ce728dd89ed583d89ff66c427bc1e26e4c94b7c2..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/en/GPT-3_style/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "en", - "template_name": "GPT-3 style", - "evaluation": { - "accuracy": 0.5891566265060241 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/en/MNLI_crowdsource/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/en/MNLI_crowdsource/results.json deleted file mode 100644 index b501c0c19d450e9ee0010114f311cb4265b37242..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/en/MNLI_crowdsource/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "en", - "template_name": "MNLI crowdsource", - "evaluation": { - "accuracy": 0.42610441767068274 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/en/can_we_infer/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/en/can_we_infer/results.json deleted file mode 100644 index 606887a2517b3583f762bbe473a0b0edebabbb4a..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/en/can_we_infer/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "en", - "template_name": "can we infer", - "evaluation": { - "accuracy": 0.5662650602409639 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/en/guaranteed_possible_impossible/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/en/guaranteed_possible_impossible/results.json deleted file mode 100644 index 8bb4647db40a2b53df96ab55502c1b458a16ba63..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/en/guaranteed_possible_impossible/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "en", - "template_name": "guaranteed/possible/impossible", - "evaluation": { - "accuracy": 0.4614457831325301 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/en/justified_in_saying/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/en/justified_in_saying/results.json deleted file mode 100644 index fdf68b1b3c2db3ef8c9dd6d81fcb17a89e736e5a..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/en/justified_in_saying/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "en", - "template_name": "justified in saying", - "evaluation": { - "accuracy": 0.5437751004016064 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/es/GPT-3_style/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/es/GPT-3_style/results.json deleted file mode 100644 index fbb0fbe72ce9a2366a5fbdfd9da6eb0f9d9fd040..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/es/GPT-3_style/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "es", - "template_name": "GPT-3 style", - "evaluation": { - "accuracy": 0.5734939759036145 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/es/MNLI_crowdsource/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/es/MNLI_crowdsource/results.json deleted file mode 100644 index 9ad337da8b03428488d1855d4bb5512047f8b8b2..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/es/MNLI_crowdsource/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "es", - "template_name": "MNLI crowdsource", - "evaluation": { - "accuracy": 0.40923694779116465 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/es/can_we_infer/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/es/can_we_infer/results.json deleted file mode 100644 index 7c5b9dc83a39b27ee74ebc89a068c2f2257f6bb7..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/es/can_we_infer/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "es", - "template_name": "can we infer", - "evaluation": { - "accuracy": 0.5148594377510041 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json deleted file mode 100644 index 6ad61caa2724825a2fe37d749ad9a5a0049b367c..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "es", - "template_name": "guaranteed/possible/impossible", - "evaluation": { - "accuracy": 0.43132530120481927 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/es/justified_in_saying/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/es/justified_in_saying/results.json deleted file mode 100644 index 88a666927b00d9871cca5682f0ed58f889592173..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/es/justified_in_saying/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "es", - "template_name": "justified in saying", - "evaluation": { - "accuracy": 0.4610441767068273 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/fr/GPT-3_style/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/fr/GPT-3_style/results.json deleted file mode 100644 index b7406b5bc3ed6a427267f31abae653fa9308c394..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/fr/GPT-3_style/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "fr", - "template_name": "GPT-3 style", - "evaluation": { - "accuracy": 0.5666666666666667 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/fr/MNLI_crowdsource/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/fr/MNLI_crowdsource/results.json deleted file mode 100644 index d4d16731dcbfbb4413466d1be513e13815368d38..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/fr/MNLI_crowdsource/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "fr", - "template_name": "MNLI crowdsource", - "evaluation": { - "accuracy": 0.42208835341365464 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/fr/can_we_infer/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/fr/can_we_infer/results.json deleted file mode 100644 index 42b889b3a9d7148ac2c31a64cdc3eff07565c38a..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/fr/can_we_infer/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "fr", - "template_name": "can we infer", - "evaluation": { - "accuracy": 0.5385542168674698 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/fr/guaranteed_possible_impossible/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/fr/guaranteed_possible_impossible/results.json deleted file mode 100644 index 1fb16a2540011994a7c5dec4cf07dad0c3ff536b..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/fr/guaranteed_possible_impossible/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "fr", - "template_name": "guaranteed/possible/impossible", - "evaluation": { - "accuracy": 0.39076305220883534 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/fr/justified_in_saying/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/fr/justified_in_saying/results.json deleted file mode 100644 index 4f42e8570f20fcf1c468a2695865355610b7e971..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/fr/justified_in_saying/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "fr", - "template_name": "justified in saying", - "evaluation": { - "accuracy": 0.5100401606425703 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/hi/GPT-3_style/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/hi/GPT-3_style/results.json deleted file mode 100644 index 3ff8a878683ceacf02f36c301dd43ce7bee51134..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/hi/GPT-3_style/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "hi", - "template_name": "GPT-3 style", - "evaluation": { - "accuracy": 0.5345381526104418 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/hi/MNLI_crowdsource/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/hi/MNLI_crowdsource/results.json deleted file mode 100644 index 46fd2c4c63efd9ffb52ca3cfcd385a92f730b432..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/hi/MNLI_crowdsource/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "hi", - "template_name": "MNLI crowdsource", - "evaluation": { - "accuracy": 0.41124497991967873 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/hi/can_we_infer/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/hi/can_we_infer/results.json deleted file mode 100644 index ef0c2d34b4062ad7db36cc80415d5e6b588e4577..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/hi/can_we_infer/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "hi", - "template_name": "can we infer", - "evaluation": { - "accuracy": 0.4751004016064257 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/hi/guaranteed_possible_impossible/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/hi/guaranteed_possible_impossible/results.json deleted file mode 100644 index 1637ac7ae1350312fe7e9291d2416cd3e50a9c79..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/hi/guaranteed_possible_impossible/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "hi", - "template_name": "guaranteed/possible/impossible", - "evaluation": { - "accuracy": 0.40923694779116465 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/hi/justified_in_saying/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/hi/justified_in_saying/results.json deleted file mode 100644 index 951e5f90e621992983980077a06098e556e294af..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/hi/justified_in_saying/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "hi", - "template_name": "justified in saying", - "evaluation": { - "accuracy": 0.4469879518072289 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/sw/GPT-3_style/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/sw/GPT-3_style/results.json deleted file mode 100644 index c819b498e7f52c050ecde967d689ef49c5ead831..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/sw/GPT-3_style/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "sw", - "template_name": "GPT-3 style", - "evaluation": { - "accuracy": 0.4827309236947791 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/sw/MNLI_crowdsource/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/sw/MNLI_crowdsource/results.json deleted file mode 100644 index 65398c49b780d2e20b7069e487311b6b981d4623..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/sw/MNLI_crowdsource/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "sw", - "template_name": "MNLI crowdsource", - "evaluation": { - "accuracy": 0.40562248995983935 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/sw/can_we_infer/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/sw/can_we_infer/results.json deleted file mode 100644 index 39921e787dd0094bbc08607bf47d2937abbfd42a..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/sw/can_we_infer/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "sw", - "template_name": "can we infer", - "evaluation": { - "accuracy": 0.44497991967871486 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/sw/guaranteed_possible_impossible/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/sw/guaranteed_possible_impossible/results.json deleted file mode 100644 index 318bbebe5b8253865063239ce33e84307900a27c..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/sw/guaranteed_possible_impossible/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "sw", - "template_name": "guaranteed/possible/impossible", - "evaluation": { - "accuracy": 0.42289156626506025 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/sw/justified_in_saying/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/sw/justified_in_saying/results.json deleted file mode 100644 index cfff418d842a01be4a3654a50c02f6289a1fc703..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/sw/justified_in_saying/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "sw", - "template_name": "justified in saying", - "evaluation": { - "accuracy": 0.41124497991967873 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/ur/GPT-3_style/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/ur/GPT-3_style/results.json deleted file mode 100644 index 24b3b4d9b3e429b610fb0a95c7722f51f1d0550a..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/ur/GPT-3_style/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ur", - "template_name": "GPT-3 style", - "evaluation": { - "accuracy": 0.4947791164658635 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/ur/MNLI_crowdsource/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/ur/MNLI_crowdsource/results.json deleted file mode 100644 index 67148069721a4c14b3c11b50b2aa8a4c09fe355b..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/ur/MNLI_crowdsource/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ur", - "template_name": "MNLI crowdsource", - "evaluation": { - "accuracy": 0.39759036144578314 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/ur/can_we_infer/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/ur/can_we_infer/results.json deleted file mode 100644 index 22e46c6c09a04bf78053dd339ab28489f7704108..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/ur/can_we_infer/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ur", - "template_name": "can we infer", - "evaluation": { - "accuracy": 0.4502008032128514 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/ur/guaranteed_possible_impossible/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/ur/guaranteed_possible_impossible/results.json deleted file mode 100644 index b810a40e3e235790207376b32af2a299d4f6fe24..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/ur/guaranteed_possible_impossible/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ur", - "template_name": "guaranteed/possible/impossible", - "evaluation": { - "accuracy": 0.39036144578313253 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/ur/justified_in_saying/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/ur/justified_in_saying/results.json deleted file mode 100644 index 20245923546de8f7e85e8dd7e2bc1db0847212c3..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/ur/justified_in_saying/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ur", - "template_name": "justified in saying", - "evaluation": { - "accuracy": 0.40843373493975904 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/vi/GPT-3_style/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/vi/GPT-3_style/results.json deleted file mode 100644 index a13ae51ee9460f9666c435cf5fa50778f44d72bb..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/vi/GPT-3_style/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "vi", - "template_name": "GPT-3 style", - "evaluation": { - "accuracy": 0.5449799196787148 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/vi/MNLI_crowdsource/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/vi/MNLI_crowdsource/results.json deleted file mode 100644 index ccbec33b9e9b2ed4137fee347c4b91c0d2da5e34..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/vi/MNLI_crowdsource/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "vi", - "template_name": "MNLI crowdsource", - "evaluation": { - "accuracy": 0.40401606425702813 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/vi/can_we_infer/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/vi/can_we_infer/results.json deleted file mode 100644 index 92a1a38226419e2f25f841111418028bc08cc0cf..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/vi/can_we_infer/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "vi", - "template_name": "can we infer", - "evaluation": { - "accuracy": 0.5 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/vi/guaranteed_possible_impossible/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/vi/guaranteed_possible_impossible/results.json deleted file mode 100644 index afde16840d9ce35ba02f3c2fcd7460a447b59b6a..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/vi/guaranteed_possible_impossible/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "vi", - "template_name": "guaranteed/possible/impossible", - "evaluation": { - "accuracy": 0.44779116465863456 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/vi/justified_in_saying/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/vi/justified_in_saying/results.json deleted file mode 100644 index 1afbfdbf7c938a67dd496eb79cde783df539b46a..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/vi/justified_in_saying/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "vi", - "template_name": "justified in saying", - "evaluation": { - "accuracy": 0.4650602409638554 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/zh/GPT-3_style/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/zh/GPT-3_style/results.json deleted file mode 100644 index 2bb9b56add737900ef37773f0080e24295befdd1..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/zh/GPT-3_style/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "zh", - "template_name": "GPT-3 style", - "evaluation": { - "accuracy": 0.5429718875502008 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/zh/MNLI_crowdsource/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/zh/MNLI_crowdsource/results.json deleted file mode 100644 index 7dcfbc8f34fbd0614d389653502b05cc134b2cd7..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/zh/MNLI_crowdsource/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "zh", - "template_name": "MNLI crowdsource", - "evaluation": { - "accuracy": 0.3891566265060241 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/zh/can_we_infer/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/zh/can_we_infer/results.json deleted file mode 100644 index ed0c5d7b69442e3a1dc012e67cc6dc92f926cb83..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/zh/can_we_infer/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "zh", - "template_name": "can we infer", - "evaluation": { - "accuracy": 0.5032128514056224 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/zh/guaranteed_possible_impossible/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/zh/guaranteed_possible_impossible/results.json deleted file mode 100644 index ab2e4c3507538fdc53545cdaa4b371ce69faba4f..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/zh/guaranteed_possible_impossible/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "zh", - "template_name": "guaranteed/possible/impossible", - "evaluation": { - "accuracy": 0.38072289156626504 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_l1/xnli/zh/justified_in_saying/results.json b/evaluation_bloommz-7b1/evaluation_l1/xnli/zh/justified_in_saying/results.json deleted file mode 100644 index 3e5f64a5ee3df4e2035281194d7f24398a2cb0fe..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_l1/xnli/zh/justified_in_saying/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "zh", - "template_name": "justified in saying", - "evaluation": { - "accuracy": 0.4706827309236948 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ar/GPT-3_style_arht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ar/GPT-3_style_arht/results.json deleted file mode 100644 index 89e43bafb8376a00e9550fe6e6650f8fa643b203..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ar/GPT-3_style_arht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ar", - "template_name": "GPT-3 style_arht", - "evaluation": { - "accuracy": 0.4610441767068273 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='GPT-3 style_arht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ar/MNLI_crowdsource_arht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ar/MNLI_crowdsource_arht/results.json deleted file mode 100644 index 9f61804d9d11f714cbd249fc74308c8fbc0699c3..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ar/MNLI_crowdsource_arht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ar", - "template_name": "MNLI crowdsource_arht", - "evaluation": { - "accuracy": 0.3899598393574297 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='MNLI crowdsource_arht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ar/can_we_infer_arht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ar/can_we_infer_arht/results.json deleted file mode 100644 index 390cd9f97ace2cb2f965cf64641250954c6e21fb..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ar/can_we_infer_arht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ar", - "template_name": "can we infer_arht", - "evaluation": { - "accuracy": 0.3550200803212851 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='can we infer_arht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ar/guaranteed_possible_impossible_arht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ar/guaranteed_possible_impossible_arht/results.json deleted file mode 100644 index b6e2d83aadea0aca86a997c8bf99a61c2ab7063d..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ar/guaranteed_possible_impossible_arht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ar", - "template_name": "guaranteed/possible/impossible_arht", - "evaluation": { - "accuracy": 0.45461847389558235 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='guaranteed/possible/impossible_arht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ar/justified_in_saying_arht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ar/justified_in_saying_arht/results.json deleted file mode 100644 index 82e207df2d935a573bad6b7501d6aaf42dcc1cb3..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ar/justified_in_saying_arht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ar", - "template_name": "justified in saying_arht", - "evaluation": { - "accuracy": 0.3538152610441767 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='justified in saying_arht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/es/GPT-3_style_esht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/es/GPT-3_style_esht/results.json deleted file mode 100644 index cffee59d56f1ebbb02db7d7721ae08ba1a4cddc8..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/es/GPT-3_style_esht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "es", - "template_name": "GPT-3 style_esht", - "evaluation": { - "accuracy": 0.5313253012048192 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='GPT-3 style_esht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/es/MNLI_crowdsource_esht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/es/MNLI_crowdsource_esht/results.json deleted file mode 100644 index 49638017ffc7ea741445cda68c9f802078b835d9..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/es/MNLI_crowdsource_esht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "es", - "template_name": "MNLI crowdsource_esht", - "evaluation": { - "accuracy": 0.334136546184739 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='MNLI crowdsource_esht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/es/can_we_infer_esht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/es/can_we_infer_esht/results.json deleted file mode 100644 index de94c4c56f39e025ea75d755f6e27e9137a4bee7..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/es/can_we_infer_esht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "es", - "template_name": "can we infer_esht", - "evaluation": { - "accuracy": 0.36987951807228914 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='can we infer_esht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/es/guaranteed_possible_impossible_esht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/es/guaranteed_possible_impossible_esht/results.json deleted file mode 100644 index 4ab106672cdff63d80d3a3f17628c4df8299825c..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/es/guaranteed_possible_impossible_esht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "es", - "template_name": "guaranteed/possible/impossible_esht", - "evaluation": { - "accuracy": 0.4686746987951807 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='guaranteed/possible/impossible_esht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/es/justified_in_saying_esht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/es/justified_in_saying_esht/results.json deleted file mode 100644 index 0e1a78271a48bb847416de3e695e9ce8b9b0313e..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/es/justified_in_saying_esht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "es", - "template_name": "justified in saying_esht", - "evaluation": { - "accuracy": 0.37630522088353413 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='justified in saying_esht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/fr/GPT-3_style_frht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/fr/GPT-3_style_frht/results.json deleted file mode 100644 index ee71295e8d994debecfdb9ef2012121716cfbcae..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/fr/GPT-3_style_frht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "fr", - "template_name": "GPT-3 style_frht", - "evaluation": { - "accuracy": 0.5345381526104418 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='GPT-3 style_frht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/fr/MNLI_crowdsource_frht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/fr/MNLI_crowdsource_frht/results.json deleted file mode 100644 index 7a42eef9058f181bac68d4804d7eb2e9f70fe4d6..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/fr/MNLI_crowdsource_frht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "fr", - "template_name": "MNLI crowdsource_frht", - "evaluation": { - "accuracy": 0.3357429718875502 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='MNLI crowdsource_frht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/fr/can_we_infer_frht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/fr/can_we_infer_frht/results.json deleted file mode 100644 index 90a19abe8e6077dacc39c17c9bc153c75706f052..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/fr/can_we_infer_frht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "fr", - "template_name": "can we infer_frht", - "evaluation": { - "accuracy": 0.5224899598393574 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='can we infer_frht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/fr/guaranteed_possible_impossible_frht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/fr/guaranteed_possible_impossible_frht/results.json deleted file mode 100644 index 1c3df1fa5dac930fd4024d0cf9f26589a2ec3188..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/fr/guaranteed_possible_impossible_frht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "fr", - "template_name": "guaranteed/possible/impossible_frht", - "evaluation": { - "accuracy": 0.46586345381526106 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='guaranteed/possible/impossible_frht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/fr/justified_in_saying_frht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/fr/justified_in_saying_frht/results.json deleted file mode 100644 index 2bfb1f45fb8d49e5c2b459fdd5d3d8faefd65832..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/fr/justified_in_saying_frht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "fr", - "template_name": "justified in saying_frht", - "evaluation": { - "accuracy": 0.4891566265060241 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='justified in saying_frht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/hi/GPT-3_style_hiht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/hi/GPT-3_style_hiht/results.json deleted file mode 100644 index 81126b8423ccfb2a338316e6d739e62f5b2ad908..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/hi/GPT-3_style_hiht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "hi", - "template_name": "GPT-3 style_hiht", - "evaluation": { - "accuracy": 0.3325301204819277 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='GPT-3 style_hiht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/hi/MNLI_crowdsource_hiht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/hi/MNLI_crowdsource_hiht/results.json deleted file mode 100644 index 72cc6afcf061bd27160824802dc13eea9fc91e3e..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/hi/MNLI_crowdsource_hiht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "hi", - "template_name": "MNLI crowdsource_hiht", - "evaluation": { - "accuracy": 0.470281124497992 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='MNLI crowdsource_hiht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/hi/can_we_infer_hiht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/hi/can_we_infer_hiht/results.json deleted file mode 100644 index d7bac61e418793f962192a821514feaa7e84b689..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/hi/can_we_infer_hiht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "hi", - "template_name": "can we infer_hiht", - "evaluation": { - "accuracy": 0.37309236947791163 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='can we infer_hiht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/hi/guaranteed_possible_impossible_hiht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/hi/guaranteed_possible_impossible_hiht/results.json deleted file mode 100644 index edc1bf25e960593d049d046709a7306a683453d1..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/hi/guaranteed_possible_impossible_hiht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "hi", - "template_name": "guaranteed/possible/impossible_hiht", - "evaluation": { - "accuracy": 0.3514056224899598 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='guaranteed/possible/impossible_hiht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/hi/justified_in_saying_hiht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/hi/justified_in_saying_hiht/results.json deleted file mode 100644 index 8c7b4bf0fd416f731a886fbef444cfd580229a52..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/hi/justified_in_saying_hiht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "hi", - "template_name": "justified in saying_hiht", - "evaluation": { - "accuracy": 0.3746987951807229 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='justified in saying_hiht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/merged.csv b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/merged.csv deleted file mode 100644 index 63d9419c23b657c9221fbcd5ed16fbbc8b1ca096..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/merged.csv +++ /dev/null @@ -1,50 +0,0 @@ -dataset,prompt,metric,value -xnli_ar,GPT-3 style_arht,accuracy,0.4610441767068273 -xnli_ar,MNLI crowdsource_arht,accuracy,0.3899598393574297 -xnli_ar,can we infer_arht,accuracy,0.3550200803212851 -xnli_ar,guaranteed/possible/impossible_arht,accuracy,0.45461847389558235 -xnli_ar,justified in saying_arht,accuracy,0.3538152610441767 -xnli_ar,median,accuracy,0.3899598393574297 -xnli_es,GPT-3 style_esht,accuracy,0.5313253012048192 -xnli_es,MNLI crowdsource_esht,accuracy,0.334136546184739 -xnli_es,can we infer_esht,accuracy,0.36987951807228914 -xnli_es,guaranteed/possible/impossible_esht,accuracy,0.4686746987951807 -xnli_es,justified in saying_esht,accuracy,0.37630522088353413 -xnli_es,median,accuracy,0.37630522088353413 -xnli_fr,GPT-3 style_frht,accuracy,0.5345381526104418 -xnli_fr,MNLI crowdsource_frht,accuracy,0.3357429718875502 -xnli_fr,can we infer_frht,accuracy,0.5224899598393574 -xnli_fr,guaranteed/possible/impossible_frht,accuracy,0.46586345381526106 -xnli_fr,justified in saying_frht,accuracy,0.4891566265060241 -xnli_fr,median,accuracy,0.4891566265060241 -xnli_hi,GPT-3 style_hiht,accuracy,0.3325301204819277 -xnli_hi,MNLI crowdsource_hiht,accuracy,0.470281124497992 -xnli_hi,can we infer_hiht,accuracy,0.37309236947791163 -xnli_hi,guaranteed/possible/impossible_hiht,accuracy,0.3514056224899598 -xnli_hi,justified in saying_hiht,accuracy,0.3746987951807229 -xnli_hi,median,accuracy,0.37309236947791163 -xnli_sw,GPT-3 style_swht,accuracy,0.336144578313253 -xnli_sw,MNLI crowdsource_swht,accuracy,0.3333333333333333 -xnli_sw,can we infer_swht,accuracy,0.3453815261044177 -xnli_sw,guaranteed/possible/impossible_swht,accuracy,0.35582329317269074 -xnli_sw,justified in saying_swht,accuracy,0.3269076305220884 -xnli_sw,median,accuracy,0.336144578313253 -xnli_ur,GPT-3 style_urht,accuracy,0.4 -xnli_ur,MNLI crowdsource_urht,accuracy,0.3562248995983936 -xnli_ur,can we infer_urht,accuracy,0.3349397590361446 -xnli_ur,guaranteed/possible/impossible_urht,accuracy,0.37630522088353413 -xnli_ur,justified in saying_urht,accuracy,0.3405622489959839 -xnli_ur,median,accuracy,0.3562248995983936 -xnli_vi,GPT-3 style_viht,accuracy,0.5265060240963856 -xnli_vi,MNLI crowdsource_viht,accuracy,0.37710843373493974 -xnli_vi,can we infer_viht,accuracy,0.5116465863453815 -xnli_vi,guaranteed/possible/impossible_viht,accuracy,0.3578313253012048 -xnli_vi,justified in saying_viht,accuracy,0.5028112449799197 -xnli_vi,median,accuracy,0.5028112449799197 -xnli_zh,GPT-3 style_zhht,accuracy,0.3196787148594378 -xnli_zh,MNLI crowdsource_zhht,accuracy,0.38112449799196785 -xnli_zh,can we infer_zhht,accuracy,0.40642570281124496 -xnli_zh,guaranteed/possible/impossible_zhht,accuracy,0.344578313253012 -xnli_zh,justified in saying_zhht,accuracy,0.3369477911646586 -xnli_zh,median,accuracy,0.344578313253012 -multiple,average,multiple,0.39603413654618475 diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/merged.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/merged.json deleted file mode 100644 index efc985e13b0d05e58fa83cc086ba7da085ef4275..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/merged.json +++ /dev/null @@ -1 +0,0 @@ -{"xnli_ar": {"GPT-3 style_arht": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='GPT-3 style_arht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4610441767068273}, "template_name": "GPT-3 style_arht"}, "MNLI crowdsource_arht": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='MNLI crowdsource_arht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3899598393574297}, "template_name": "MNLI crowdsource_arht"}, "can we infer_arht": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='can we infer_arht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3550200803212851}, "template_name": "can we infer_arht"}, "guaranteed/possible/impossible_arht": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='guaranteed/possible/impossible_arht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.45461847389558235}, "template_name": "guaranteed/possible/impossible_arht"}, "justified in saying_arht": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='justified in saying_arht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3538152610441767}, "template_name": "justified in saying_arht"}}, "xnli_es": {"GPT-3 style_esht": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='GPT-3 style_esht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5313253012048192}, "template_name": "GPT-3 style_esht"}, "MNLI crowdsource_esht": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='MNLI crowdsource_esht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.334136546184739}, "template_name": "MNLI crowdsource_esht"}, "can we infer_esht": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='can we infer_esht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.36987951807228914}, "template_name": "can we infer_esht"}, "guaranteed/possible/impossible_esht": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='guaranteed/possible/impossible_esht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4686746987951807}, "template_name": "guaranteed/possible/impossible_esht"}, "justified in saying_esht": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='justified in saying_esht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.37630522088353413}, "template_name": "justified in saying_esht"}}, "xnli_fr": {"GPT-3 style_frht": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='GPT-3 style_frht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5345381526104418}, "template_name": "GPT-3 style_frht"}, "MNLI crowdsource_frht": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='MNLI crowdsource_frht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3357429718875502}, "template_name": "MNLI crowdsource_frht"}, "can we infer_frht": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='can we infer_frht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5224899598393574}, "template_name": "can we infer_frht"}, "guaranteed/possible/impossible_frht": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='guaranteed/possible/impossible_frht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.46586345381526106}, "template_name": "guaranteed/possible/impossible_frht"}, "justified in saying_frht": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='justified in saying_frht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4891566265060241}, "template_name": "justified in saying_frht"}}, "xnli_hi": {"GPT-3 style_hiht": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='GPT-3 style_hiht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3325301204819277}, "template_name": "GPT-3 style_hiht"}, "MNLI crowdsource_hiht": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='MNLI crowdsource_hiht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.470281124497992}, "template_name": "MNLI crowdsource_hiht"}, "can we infer_hiht": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='can we infer_hiht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.37309236947791163}, "template_name": "can we infer_hiht"}, "guaranteed/possible/impossible_hiht": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='guaranteed/possible/impossible_hiht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3514056224899598}, "template_name": "guaranteed/possible/impossible_hiht"}, "justified in saying_hiht": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='justified in saying_hiht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3746987951807229}, "template_name": "justified in saying_hiht"}}, "xnli_sw": {"GPT-3 style_swht": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='GPT-3 style_swht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.336144578313253}, "template_name": "GPT-3 style_swht"}, "MNLI crowdsource_swht": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='MNLI crowdsource_swht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "MNLI crowdsource_swht"}, "can we infer_swht": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='can we infer_swht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3453815261044177}, "template_name": "can we infer_swht"}, "guaranteed/possible/impossible_swht": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='guaranteed/possible/impossible_swht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.35582329317269074}, "template_name": "guaranteed/possible/impossible_swht"}, "justified in saying_swht": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='justified in saying_swht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3269076305220884}, "template_name": "justified in saying_swht"}}, "xnli_ur": {"GPT-3 style_urht": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='GPT-3 style_urht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4}, "template_name": "GPT-3 style_urht"}, "MNLI crowdsource_urht": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='MNLI crowdsource_urht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3562248995983936}, "template_name": "MNLI crowdsource_urht"}, "can we infer_urht": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='can we infer_urht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3349397590361446}, "template_name": "can we infer_urht"}, "guaranteed/possible/impossible_urht": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='guaranteed/possible/impossible_urht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.37630522088353413}, "template_name": "guaranteed/possible/impossible_urht"}, "justified in saying_urht": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='justified in saying_urht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3405622489959839}, "template_name": "justified in saying_urht"}}, "xnli_vi": {"GPT-3 style_viht": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='GPT-3 style_viht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5265060240963856}, "template_name": "GPT-3 style_viht"}, "MNLI crowdsource_viht": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='MNLI crowdsource_viht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.37710843373493974}, "template_name": "MNLI crowdsource_viht"}, "can we infer_viht": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='can we infer_viht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5116465863453815}, "template_name": "can we infer_viht"}, "guaranteed/possible/impossible_viht": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='guaranteed/possible/impossible_viht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3578313253012048}, "template_name": "guaranteed/possible/impossible_viht"}, "justified in saying_viht": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='justified in saying_viht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5028112449799197}, "template_name": "justified in saying_viht"}}, "xnli_zh": {"GPT-3 style_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='GPT-3 style_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3196787148594378}, "template_name": "GPT-3 style_zhht"}, "MNLI crowdsource_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='MNLI crowdsource_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.38112449799196785}, "template_name": "MNLI crowdsource_zhht"}, "can we infer_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='can we infer_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.40642570281124496}, "template_name": "can we infer_zhht"}, "guaranteed/possible/impossible_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='guaranteed/possible/impossible_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.344578313253012}, "template_name": "guaranteed/possible/impossible_zhht"}, "justified in saying_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='justified in saying_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3369477911646586}, "template_name": "justified in saying_zhht"}}} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/sw/GPT-3_style_swht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/sw/GPT-3_style_swht/results.json deleted file mode 100644 index de666e20d45de89d7d2e0b87d2df2e5e636806a2..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/sw/GPT-3_style_swht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "sw", - "template_name": "GPT-3 style_swht", - "evaluation": { - "accuracy": 0.336144578313253 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='GPT-3 style_swht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/sw/MNLI_crowdsource_swht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/sw/MNLI_crowdsource_swht/results.json deleted file mode 100644 index 0b299c5ccdc242196add1443520436e5e01b82db..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/sw/MNLI_crowdsource_swht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "sw", - "template_name": "MNLI crowdsource_swht", - "evaluation": { - "accuracy": 0.3333333333333333 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='MNLI crowdsource_swht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/sw/can_we_infer_swht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/sw/can_we_infer_swht/results.json deleted file mode 100644 index b02921e07618dddbe9b0c9baa1bcf83488ffd879..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/sw/can_we_infer_swht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "sw", - "template_name": "can we infer_swht", - "evaluation": { - "accuracy": 0.3453815261044177 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='can we infer_swht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/sw/guaranteed_possible_impossible_swht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/sw/guaranteed_possible_impossible_swht/results.json deleted file mode 100644 index 0d0c7de87ad44d55d779ab418e11dc04eff4163a..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/sw/guaranteed_possible_impossible_swht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "sw", - "template_name": "guaranteed/possible/impossible_swht", - "evaluation": { - "accuracy": 0.35582329317269074 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='guaranteed/possible/impossible_swht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/sw/justified_in_saying_swht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/sw/justified_in_saying_swht/results.json deleted file mode 100644 index 09f98868f8a3f64df83efbfe2d61dc4507e3622e..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/sw/justified_in_saying_swht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "sw", - "template_name": "justified in saying_swht", - "evaluation": { - "accuracy": 0.3269076305220884 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='justified in saying_swht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ur/GPT-3_style_urht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ur/GPT-3_style_urht/results.json deleted file mode 100644 index cde9a7dc932d7fd11b347b05243ec4274928b418..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ur/GPT-3_style_urht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ur", - "template_name": "GPT-3 style_urht", - "evaluation": { - "accuracy": 0.4 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='GPT-3 style_urht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ur/MNLI_crowdsource_urht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ur/MNLI_crowdsource_urht/results.json deleted file mode 100644 index a9c37fed7c6eaa7f42cae33a5ba7e0d82355619e..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ur/MNLI_crowdsource_urht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ur", - "template_name": "MNLI crowdsource_urht", - "evaluation": { - "accuracy": 0.3562248995983936 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='MNLI crowdsource_urht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ur/can_we_infer_urht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ur/can_we_infer_urht/results.json deleted file mode 100644 index 0275da97b071d9797bb41c979be0490b20c1a378..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ur/can_we_infer_urht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ur", - "template_name": "can we infer_urht", - "evaluation": { - "accuracy": 0.3349397590361446 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='can we infer_urht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ur/guaranteed_possible_impossible_urht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ur/guaranteed_possible_impossible_urht/results.json deleted file mode 100644 index 854f22010ac359d6fb9fd0586a760bdcfd66de48..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ur/guaranteed_possible_impossible_urht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ur", - "template_name": "guaranteed/possible/impossible_urht", - "evaluation": { - "accuracy": 0.37630522088353413 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='guaranteed/possible/impossible_urht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ur/justified_in_saying_urht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ur/justified_in_saying_urht/results.json deleted file mode 100644 index 1b761bd5ca49c65720cd1d210425f00c303cb2ad..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/ur/justified_in_saying_urht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ur", - "template_name": "justified in saying_urht", - "evaluation": { - "accuracy": 0.3405622489959839 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='justified in saying_urht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/vi/GPT-3_style_viht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/vi/GPT-3_style_viht/results.json deleted file mode 100644 index 911fb94874ba7bd2ea8692a7c8021ac66b5153c2..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/vi/GPT-3_style_viht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "vi", - "template_name": "GPT-3 style_viht", - "evaluation": { - "accuracy": 0.5265060240963856 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='GPT-3 style_viht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/vi/MNLI_crowdsource_viht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/vi/MNLI_crowdsource_viht/results.json deleted file mode 100644 index 2a896f3f9b9c16c00619928134371c5d8a06c8cf..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/vi/MNLI_crowdsource_viht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "vi", - "template_name": "MNLI crowdsource_viht", - "evaluation": { - "accuracy": 0.37710843373493974 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='MNLI crowdsource_viht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/vi/can_we_infer_viht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/vi/can_we_infer_viht/results.json deleted file mode 100644 index 5592cf57ddff8a5624c29225b518325819692f3a..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/vi/can_we_infer_viht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "vi", - "template_name": "can we infer_viht", - "evaluation": { - "accuracy": 0.5116465863453815 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='can we infer_viht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/vi/guaranteed_possible_impossible_viht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/vi/guaranteed_possible_impossible_viht/results.json deleted file mode 100644 index 243d01383b0e661431abaf57e9da32a578e9728b..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/vi/guaranteed_possible_impossible_viht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "vi", - "template_name": "guaranteed/possible/impossible_viht", - "evaluation": { - "accuracy": 0.3578313253012048 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='guaranteed/possible/impossible_viht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/vi/justified_in_saying_viht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/vi/justified_in_saying_viht/results.json deleted file mode 100644 index d4eadded27b8fe4d524b10ca3bb45f007dfeca90..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/vi/justified_in_saying_viht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "vi", - "template_name": "justified in saying_viht", - "evaluation": { - "accuracy": 0.5028112449799197 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='justified in saying_viht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/zh/GPT-3_style_zhht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/zh/GPT-3_style_zhht/results.json deleted file mode 100644 index d4254e656fdcba212a82643bc0e1902dc64567ff..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/zh/GPT-3_style_zhht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "zh", - "template_name": "GPT-3 style_zhht", - "evaluation": { - "accuracy": 0.3196787148594378 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='GPT-3 style_zhht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/zh/MNLI_crowdsource_zhht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/zh/MNLI_crowdsource_zhht/results.json deleted file mode 100644 index 10dbab5a4fe31be7999b7a95f3eee8e15829d6fb..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/zh/MNLI_crowdsource_zhht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "zh", - "template_name": "MNLI crowdsource_zhht", - "evaluation": { - "accuracy": 0.38112449799196785 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='MNLI crowdsource_zhht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/zh/can_we_infer_zhht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/zh/can_we_infer_zhht/results.json deleted file mode 100644 index 620f357300bfe4c9817e823269ac9f4e1f4f44a5..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/zh/can_we_infer_zhht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "zh", - "template_name": "can we infer_zhht", - "evaluation": { - "accuracy": 0.40642570281124496 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='can we infer_zhht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/zh/guaranteed_possible_impossible_zhht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/zh/guaranteed_possible_impossible_zhht/results.json deleted file mode 100644 index 65f9e6a084a0d11b193fa456270487f87f7d9b80..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/zh/guaranteed_possible_impossible_zhht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "zh", - "template_name": "guaranteed/possible/impossible_zhht", - "evaluation": { - "accuracy": 0.344578313253012 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='guaranteed/possible/impossible_zhht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/zh/justified_in_saying_zhht/results.json b/evaluation_bloommz-7b1/evaluation_xnliht/xnli/zh/justified_in_saying_zhht/results.json deleted file mode 100644 index 1e999809f3f18ddd914a16705b305ea1a99d9038..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnliht/xnli/zh/justified_in_saying_zhht/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "zh", - "template_name": "justified in saying_zhht", - "evaluation": { - "accuracy": 0.3369477911646586 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='justified in saying_zhht', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ar/GPT-3_style_armt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ar/GPT-3_style_armt/results.json deleted file mode 100644 index 77cfdf21c92824d790481216193934744f3571dd..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ar/GPT-3_style_armt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ar", - "template_name": "GPT-3 style_armt", - "evaluation": { - "accuracy": 0.3333333333333333 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='GPT-3 style_armt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ar/MNLI_crowdsource_armt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ar/MNLI_crowdsource_armt/results.json deleted file mode 100644 index 50233f2f7b48a56410b663a1e53062b48cb34970..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ar/MNLI_crowdsource_armt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ar", - "template_name": "MNLI crowdsource_armt", - "evaluation": { - "accuracy": 0.4855421686746988 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='MNLI crowdsource_armt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ar/can_we_infer_armt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ar/can_we_infer_armt/results.json deleted file mode 100644 index 3c38307364767eeff17f0b7017a55ec6d59caccf..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ar/can_we_infer_armt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ar", - "template_name": "can we infer_armt", - "evaluation": { - "accuracy": 0.3413654618473896 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='can we infer_armt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ar/guaranteed_possible_impossible_armt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ar/guaranteed_possible_impossible_armt/results.json deleted file mode 100644 index 62e5b9267ca8b55ec7427d8d404b4dc69e57c9ff..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ar/guaranteed_possible_impossible_armt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ar", - "template_name": "guaranteed/possible/impossible_armt", - "evaluation": { - "accuracy": 0.35542168674698793 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='guaranteed/possible/impossible_armt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ar/justified_in_saying_armt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ar/justified_in_saying_armt/results.json deleted file mode 100644 index 1f1f936f73b6370418470a32df93bc6ecc389d39..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ar/justified_in_saying_armt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ar", - "template_name": "justified in saying_armt", - "evaluation": { - "accuracy": 0.3465863453815261 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='justified in saying_armt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/es/GPT-3_style_esmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/es/GPT-3_style_esmt/results.json deleted file mode 100644 index e1647e62c5110a1aa782d3f6bd3584976d02e177..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/es/GPT-3_style_esmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "es", - "template_name": "GPT-3 style_esmt", - "evaluation": { - "accuracy": 0.5385542168674698 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='GPT-3 style_esmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/es/MNLI_crowdsource_esmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/es/MNLI_crowdsource_esmt/results.json deleted file mode 100644 index 90b3f6eff9418f33f8581c2425f1323cbac2198a..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/es/MNLI_crowdsource_esmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "es", - "template_name": "MNLI crowdsource_esmt", - "evaluation": { - "accuracy": 0.42690763052208835 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='MNLI crowdsource_esmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/es/can_we_infer_esmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/es/can_we_infer_esmt/results.json deleted file mode 100644 index 0e88590482c46b2a0f9c50c71bbba04503a2c8c8..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/es/can_we_infer_esmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "es", - "template_name": "can we infer_esmt", - "evaluation": { - "accuracy": 0.3895582329317269 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='can we infer_esmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/es/guaranteed_possible_impossible_esmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/es/guaranteed_possible_impossible_esmt/results.json deleted file mode 100644 index 7c45c42fc502a194e4734d1f527f10eff0e86e04..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/es/guaranteed_possible_impossible_esmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "es", - "template_name": "guaranteed/possible/impossible_esmt", - "evaluation": { - "accuracy": 0.3477911646586345 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='guaranteed/possible/impossible_esmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/es/justified_in_saying_esmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/es/justified_in_saying_esmt/results.json deleted file mode 100644 index 6ab59224bbf868b322f353466f443457107cc276..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/es/justified_in_saying_esmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "es", - "template_name": "justified in saying_esmt", - "evaluation": { - "accuracy": 0.39799196787148594 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='justified in saying_esmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/fr/GPT-3_style_frmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/fr/GPT-3_style_frmt/results.json deleted file mode 100644 index d588894667d032fd6e29b6ba7e9bcd4024fcd59c..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/fr/GPT-3_style_frmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "fr", - "template_name": "GPT-3 style_frmt", - "evaluation": { - "accuracy": 0.5220883534136547 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='GPT-3 style_frmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/fr/MNLI_crowdsource_frmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/fr/MNLI_crowdsource_frmt/results.json deleted file mode 100644 index dd229693dc3267fffb9ffef13310ce4b41f16259..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/fr/MNLI_crowdsource_frmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "fr", - "template_name": "MNLI crowdsource_frmt", - "evaluation": { - "accuracy": 0.3192771084337349 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='MNLI crowdsource_frmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/fr/can_we_infer_frmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/fr/can_we_infer_frmt/results.json deleted file mode 100644 index a40ee36a56db7f5039b164cc5f93fc9bf0d4c10e..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/fr/can_we_infer_frmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "fr", - "template_name": "can we infer_frmt", - "evaluation": { - "accuracy": 0.5240963855421686 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='can we infer_frmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/fr/guaranteed_possible_impossible_frmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/fr/guaranteed_possible_impossible_frmt/results.json deleted file mode 100644 index d3c99fff4f20ce371d4abf4534e0a3de1fbfd1f0..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/fr/guaranteed_possible_impossible_frmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "fr", - "template_name": "guaranteed/possible/impossible_frmt", - "evaluation": { - "accuracy": 0.3819277108433735 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='guaranteed/possible/impossible_frmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/fr/justified_in_saying_frmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/fr/justified_in_saying_frmt/results.json deleted file mode 100644 index 3c556932c2368ab5dad3ae7ea52e812891e569b1..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/fr/justified_in_saying_frmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "fr", - "template_name": "justified in saying_frmt", - "evaluation": { - "accuracy": 0.472289156626506 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='justified in saying_frmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/hi/GPT-3_style_himt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/hi/GPT-3_style_himt/results.json deleted file mode 100644 index 64d2695e1222b8e2ef70a6be0f451c25b35dc558..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/hi/GPT-3_style_himt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "hi", - "template_name": "GPT-3 style_himt", - "evaluation": { - "accuracy": 0.3317269076305221 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='GPT-3 style_himt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/hi/MNLI_crowdsource_himt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/hi/MNLI_crowdsource_himt/results.json deleted file mode 100644 index 73ae164a98d33cdf0f9767e574f0b858a4f5c78b..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/hi/MNLI_crowdsource_himt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "hi", - "template_name": "MNLI crowdsource_himt", - "evaluation": { - "accuracy": 0.3333333333333333 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='MNLI crowdsource_himt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/hi/can_we_infer_himt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/hi/can_we_infer_himt/results.json deleted file mode 100644 index d20367db7addbcc1a429ed512438bca992387d0d..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/hi/can_we_infer_himt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "hi", - "template_name": "can we infer_himt", - "evaluation": { - "accuracy": 0.35943775100401604 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='can we infer_himt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/hi/guaranteed_possible_impossible_himt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/hi/guaranteed_possible_impossible_himt/results.json deleted file mode 100644 index 34b08178a14f9f6812d8c27edcbeba951547d81e..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/hi/guaranteed_possible_impossible_himt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "hi", - "template_name": "guaranteed/possible/impossible_himt", - "evaluation": { - "accuracy": 0.3449799196787149 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='guaranteed/possible/impossible_himt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/hi/justified_in_saying_himt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/hi/justified_in_saying_himt/results.json deleted file mode 100644 index 1aaa5f37e224219682c46b7e450a46500addd5f4..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/hi/justified_in_saying_himt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "hi", - "template_name": "justified in saying_himt", - "evaluation": { - "accuracy": 0.3654618473895582 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='justified in saying_himt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/merged.csv b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/merged.csv deleted file mode 100644 index f0ea0a8121ebbea29225f2ac4c3308b146e7b28c..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/merged.csv +++ /dev/null @@ -1,50 +0,0 @@ -dataset,prompt,metric,value -xnli_ar,GPT-3 style_armt,accuracy,0.3333333333333333 -xnli_ar,MNLI crowdsource_armt,accuracy,0.4855421686746988 -xnli_ar,can we infer_armt,accuracy,0.3413654618473896 -xnli_ar,guaranteed/possible/impossible_armt,accuracy,0.35542168674698793 -xnli_ar,justified in saying_armt,accuracy,0.3465863453815261 -xnli_ar,median,accuracy,0.3465863453815261 -xnli_es,GPT-3 style_esmt,accuracy,0.5385542168674698 -xnli_es,MNLI crowdsource_esmt,accuracy,0.42690763052208835 -xnli_es,can we infer_esmt,accuracy,0.3895582329317269 -xnli_es,guaranteed/possible/impossible_esmt,accuracy,0.3477911646586345 -xnli_es,justified in saying_esmt,accuracy,0.39799196787148594 -xnli_es,median,accuracy,0.39799196787148594 -xnli_fr,GPT-3 style_frmt,accuracy,0.5220883534136547 -xnli_fr,MNLI crowdsource_frmt,accuracy,0.3192771084337349 -xnli_fr,can we infer_frmt,accuracy,0.5240963855421686 -xnli_fr,guaranteed/possible/impossible_frmt,accuracy,0.3819277108433735 -xnli_fr,justified in saying_frmt,accuracy,0.472289156626506 -xnli_fr,median,accuracy,0.472289156626506 -xnli_hi,GPT-3 style_himt,accuracy,0.3317269076305221 -xnli_hi,MNLI crowdsource_himt,accuracy,0.3333333333333333 -xnli_hi,can we infer_himt,accuracy,0.35943775100401604 -xnli_hi,guaranteed/possible/impossible_himt,accuracy,0.3449799196787149 -xnli_hi,justified in saying_himt,accuracy,0.3654618473895582 -xnli_hi,median,accuracy,0.3449799196787149 -xnli_sw,GPT-3 style_swmt,accuracy,0.334136546184739 -xnli_sw,MNLI crowdsource_swmt,accuracy,0.3333333333333333 -xnli_sw,can we infer_swmt,accuracy,0.3337349397590361 -xnli_sw,guaranteed/possible/impossible_swmt,accuracy,0.3261044176706827 -xnli_sw,justified in saying_swmt,accuracy,0.334136546184739 -xnli_sw,median,accuracy,0.3337349397590361 -xnli_ur,GPT-3 style_urmt,accuracy,0.3377510040160643 -xnli_ur,MNLI crowdsource_urmt,accuracy,0.3337349397590361 -xnli_ur,can we infer_urmt,accuracy,0.3333333333333333 -xnli_ur,guaranteed/possible/impossible_urmt,accuracy,0.3333333333333333 -xnli_ur,justified in saying_urmt,accuracy,0.3337349397590361 -xnli_ur,median,accuracy,0.3337349397590361 -xnli_vi,GPT-3 style_vimt,accuracy,0.3333333333333333 -xnli_vi,MNLI crowdsource_vimt,accuracy,0.3887550200803213 -xnli_vi,can we infer_vimt,accuracy,0.3333333333333333 -xnli_vi,guaranteed/possible/impossible_vimt,accuracy,0.3321285140562249 -xnli_vi,justified in saying_vimt,accuracy,0.3333333333333333 -xnli_vi,median,accuracy,0.3333333333333333 -xnli_zh,GPT-3 style_zhmt,accuracy,0.4634538152610442 -xnli_zh,MNLI crowdsource_zhmt,accuracy,0.3345381526104418 -xnli_zh,can we infer_zhmt,accuracy,0.4891566265060241 -xnli_zh,guaranteed/possible/impossible_zhmt,accuracy,0.3393574297188755 -xnli_zh,justified in saying_zhmt,accuracy,0.48032128514056227 -xnli_zh,median,accuracy,0.4634538152610442 -multiple,average,multiple,0.3782630522088353 diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/merged.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/merged.json deleted file mode 100644 index 12399b33f08c836ec1b9d142e3e493fe304cdf20..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/merged.json +++ /dev/null @@ -1 +0,0 @@ -{"xnli_ar": {"GPT-3 style_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='GPT-3 style_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "GPT-3 style_armt"}, "MNLI crowdsource_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='MNLI crowdsource_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4855421686746988}, "template_name": "MNLI crowdsource_armt"}, "can we infer_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='can we infer_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3413654618473896}, "template_name": "can we infer_armt"}, "guaranteed/possible/impossible_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='guaranteed/possible/impossible_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.35542168674698793}, "template_name": "guaranteed/possible/impossible_armt"}, "justified in saying_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='justified in saying_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3465863453815261}, "template_name": "justified in saying_armt"}}, "xnli_es": {"GPT-3 style_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='GPT-3 style_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5385542168674698}, "template_name": "GPT-3 style_esmt"}, "MNLI crowdsource_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='MNLI crowdsource_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.42690763052208835}, "template_name": "MNLI crowdsource_esmt"}, "can we infer_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='can we infer_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3895582329317269}, "template_name": "can we infer_esmt"}, "guaranteed/possible/impossible_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='guaranteed/possible/impossible_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3477911646586345}, "template_name": "guaranteed/possible/impossible_esmt"}, "justified in saying_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='justified in saying_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.39799196787148594}, "template_name": "justified in saying_esmt"}}, "xnli_fr": {"GPT-3 style_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='GPT-3 style_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5220883534136547}, "template_name": "GPT-3 style_frmt"}, "MNLI crowdsource_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='MNLI crowdsource_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3192771084337349}, "template_name": "MNLI crowdsource_frmt"}, "can we infer_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='can we infer_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5240963855421686}, "template_name": "can we infer_frmt"}, "guaranteed/possible/impossible_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='guaranteed/possible/impossible_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3819277108433735}, "template_name": "guaranteed/possible/impossible_frmt"}, "justified in saying_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='justified in saying_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.472289156626506}, "template_name": "justified in saying_frmt"}}, "xnli_hi": {"GPT-3 style_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='GPT-3 style_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3317269076305221}, "template_name": "GPT-3 style_himt"}, "MNLI crowdsource_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='MNLI crowdsource_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "MNLI crowdsource_himt"}, "can we infer_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='can we infer_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.35943775100401604}, "template_name": "can we infer_himt"}, "guaranteed/possible/impossible_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='guaranteed/possible/impossible_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3449799196787149}, "template_name": "guaranteed/possible/impossible_himt"}, "justified in saying_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='justified in saying_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3654618473895582}, "template_name": "justified in saying_himt"}}, "xnli_sw": {"GPT-3 style_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='GPT-3 style_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.334136546184739}, "template_name": "GPT-3 style_swmt"}, "MNLI crowdsource_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='MNLI crowdsource_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "MNLI crowdsource_swmt"}, "can we infer_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='can we infer_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3337349397590361}, "template_name": "can we infer_swmt"}, "guaranteed/possible/impossible_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='guaranteed/possible/impossible_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3261044176706827}, "template_name": "guaranteed/possible/impossible_swmt"}, "justified in saying_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='justified in saying_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.334136546184739}, "template_name": "justified in saying_swmt"}}, "xnli_ur": {"GPT-3 style_urmt": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='GPT-3 style_urmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3377510040160643}, "template_name": "GPT-3 style_urmt"}, "MNLI crowdsource_urmt": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='MNLI crowdsource_urmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3337349397590361}, "template_name": "MNLI crowdsource_urmt"}, "can we infer_urmt": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='can we infer_urmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "can we infer_urmt"}, "guaranteed/possible/impossible_urmt": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='guaranteed/possible/impossible_urmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "guaranteed/possible/impossible_urmt"}, "justified in saying_urmt": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='justified in saying_urmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3337349397590361}, "template_name": "justified in saying_urmt"}}, "xnli_vi": {"GPT-3 style_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='GPT-3 style_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "GPT-3 style_vimt"}, "MNLI crowdsource_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='MNLI crowdsource_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3887550200803213}, "template_name": "MNLI crowdsource_vimt"}, "can we infer_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='can we infer_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "can we infer_vimt"}, "guaranteed/possible/impossible_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='guaranteed/possible/impossible_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3321285140562249}, "template_name": "guaranteed/possible/impossible_vimt"}, "justified in saying_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='justified in saying_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "justified in saying_vimt"}}, "xnli_zh": {"GPT-3 style_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='GPT-3 style_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4634538152610442}, "template_name": "GPT-3 style_zhmt"}, "MNLI crowdsource_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='MNLI crowdsource_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3345381526104418}, "template_name": "MNLI crowdsource_zhmt"}, "can we infer_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='can we infer_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4891566265060241}, "template_name": "can we infer_zhmt"}, "guaranteed/possible/impossible_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='guaranteed/possible/impossible_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3393574297188755}, "template_name": "guaranteed/possible/impossible_zhmt"}, "justified in saying_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='justified in saying_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.48032128514056227}, "template_name": "justified in saying_zhmt"}}} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/sw/GPT-3_style_swmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/sw/GPT-3_style_swmt/results.json deleted file mode 100644 index 5242f09c1270f3f83829ac98159f36869eb2490d..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/sw/GPT-3_style_swmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "sw", - "template_name": "GPT-3 style_swmt", - "evaluation": { - "accuracy": 0.334136546184739 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='GPT-3 style_swmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/sw/MNLI_crowdsource_swmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/sw/MNLI_crowdsource_swmt/results.json deleted file mode 100644 index 86775e878263814a2b8403e981105a19842c035d..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/sw/MNLI_crowdsource_swmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "sw", - "template_name": "MNLI crowdsource_swmt", - "evaluation": { - "accuracy": 0.3333333333333333 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='MNLI crowdsource_swmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/sw/can_we_infer_swmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/sw/can_we_infer_swmt/results.json deleted file mode 100644 index 0b2fb84df09912986eea008200711b1d278d5042..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/sw/can_we_infer_swmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "sw", - "template_name": "can we infer_swmt", - "evaluation": { - "accuracy": 0.3337349397590361 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='can we infer_swmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/sw/guaranteed_possible_impossible_swmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/sw/guaranteed_possible_impossible_swmt/results.json deleted file mode 100644 index 219047a34626d95888a5b5f9099345086a00fa27..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/sw/guaranteed_possible_impossible_swmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "sw", - "template_name": "guaranteed/possible/impossible_swmt", - "evaluation": { - "accuracy": 0.3261044176706827 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='guaranteed/possible/impossible_swmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/sw/justified_in_saying_swmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/sw/justified_in_saying_swmt/results.json deleted file mode 100644 index d506bf07cfb0240b57bcac472cae7b35158da95b..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/sw/justified_in_saying_swmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "sw", - "template_name": "justified in saying_swmt", - "evaluation": { - "accuracy": 0.334136546184739 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='justified in saying_swmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ur/GPT-3_style_urmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ur/GPT-3_style_urmt/results.json deleted file mode 100644 index 28cce4c610b89834c6313c33d9972021040e95a4..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ur/GPT-3_style_urmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ur", - "template_name": "GPT-3 style_urmt", - "evaluation": { - "accuracy": 0.3377510040160643 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='GPT-3 style_urmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ur/MNLI_crowdsource_urmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ur/MNLI_crowdsource_urmt/results.json deleted file mode 100644 index 8f961654aeec2e7a890b280917a887603c772bc9..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ur/MNLI_crowdsource_urmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ur", - "template_name": "MNLI crowdsource_urmt", - "evaluation": { - "accuracy": 0.3337349397590361 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='MNLI crowdsource_urmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ur/can_we_infer_urmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ur/can_we_infer_urmt/results.json deleted file mode 100644 index 99675a52d98c51024e91224b14cf355c1cae1161..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ur/can_we_infer_urmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ur", - "template_name": "can we infer_urmt", - "evaluation": { - "accuracy": 0.3333333333333333 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='can we infer_urmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ur/guaranteed_possible_impossible_urmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ur/guaranteed_possible_impossible_urmt/results.json deleted file mode 100644 index 0e5a632d847e31bf188b7a6924e8385f1af5dbd0..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ur/guaranteed_possible_impossible_urmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ur", - "template_name": "guaranteed/possible/impossible_urmt", - "evaluation": { - "accuracy": 0.3333333333333333 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='guaranteed/possible/impossible_urmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ur/justified_in_saying_urmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ur/justified_in_saying_urmt/results.json deleted file mode 100644 index 12d7fd8973ceeba90f0b3973829fd6297a4bf5e4..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/ur/justified_in_saying_urmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "ur", - "template_name": "justified in saying_urmt", - "evaluation": { - "accuracy": 0.3337349397590361 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='justified in saying_urmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/vi/GPT-3_style_vimt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/vi/GPT-3_style_vimt/results.json deleted file mode 100644 index dfce0056ada3bdb2448acc5af08e6d58192bd49a..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/vi/GPT-3_style_vimt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "vi", - "template_name": "GPT-3 style_vimt", - "evaluation": { - "accuracy": 0.3333333333333333 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='GPT-3 style_vimt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/vi/MNLI_crowdsource_vimt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/vi/MNLI_crowdsource_vimt/results.json deleted file mode 100644 index b68db45df1b97750e4ca83fbcebe1c2ba51204b0..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/vi/MNLI_crowdsource_vimt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "vi", - "template_name": "MNLI crowdsource_vimt", - "evaluation": { - "accuracy": 0.3887550200803213 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='MNLI crowdsource_vimt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/vi/can_we_infer_vimt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/vi/can_we_infer_vimt/results.json deleted file mode 100644 index 399f1e12e2e5e1a960f554afebda1db41b5e54cf..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/vi/can_we_infer_vimt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "vi", - "template_name": "can we infer_vimt", - "evaluation": { - "accuracy": 0.3333333333333333 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='can we infer_vimt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/vi/guaranteed_possible_impossible_vimt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/vi/guaranteed_possible_impossible_vimt/results.json deleted file mode 100644 index 8555f1f55886cabfadd5a156a2e887a5606cca06..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/vi/guaranteed_possible_impossible_vimt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "vi", - "template_name": "guaranteed/possible/impossible_vimt", - "evaluation": { - "accuracy": 0.3321285140562249 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='guaranteed/possible/impossible_vimt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/vi/justified_in_saying_vimt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/vi/justified_in_saying_vimt/results.json deleted file mode 100644 index e1d3100447b06c579e62865f80bac3260b900fb0..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/vi/justified_in_saying_vimt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "vi", - "template_name": "justified in saying_vimt", - "evaluation": { - "accuracy": 0.3333333333333333 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='justified in saying_vimt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/zh/GPT-3_style_zhmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/zh/GPT-3_style_zhmt/results.json deleted file mode 100644 index a9e431c729c8176051311252cb6df006feb918b8..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/zh/GPT-3_style_zhmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "zh", - "template_name": "GPT-3 style_zhmt", - "evaluation": { - "accuracy": 0.4634538152610442 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='GPT-3 style_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/zh/MNLI_crowdsource_zhmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/zh/MNLI_crowdsource_zhmt/results.json deleted file mode 100644 index c8548b52fac45ccbd571c2f3c5ddf1de6d342e04..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/zh/MNLI_crowdsource_zhmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "zh", - "template_name": "MNLI crowdsource_zhmt", - "evaluation": { - "accuracy": 0.3345381526104418 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='MNLI crowdsource_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/zh/can_we_infer_zhmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/zh/can_we_infer_zhmt/results.json deleted file mode 100644 index 469b0a1ae48467123059fdd73ca7a3aa38150589..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/zh/can_we_infer_zhmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "zh", - "template_name": "can we infer_zhmt", - "evaluation": { - "accuracy": 0.4891566265060241 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='can we infer_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/zh/guaranteed_possible_impossible_zhmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/zh/guaranteed_possible_impossible_zhmt/results.json deleted file mode 100644 index 85c0552653669d8b485159da28454b6b60e4cb91..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/zh/guaranteed_possible_impossible_zhmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "zh", - "template_name": "guaranteed/possible/impossible_zhmt", - "evaluation": { - "accuracy": 0.3393574297188755 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='guaranteed/possible/impossible_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/zh/justified_in_saying_zhmt/results.json b/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/zh/justified_in_saying_zhmt/results.json deleted file mode 100644 index f90bef80a421b6c9c19603659c8c32d751877973..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xnlimt/xnli/zh/justified_in_saying_zhmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xnli", - "dataset_config_name": "zh", - "template_name": "justified in saying_zhmt", - "evaluation": { - "accuracy": 0.48032128514056227 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='justified in saying_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json deleted file mode 100644 index 00b0919de5c779c5aded5f7884023a3e2b61f7ad..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "ar", - "template_name": "Answer Given options_armt", - "evaluation": { - "accuracy": 0.6664460622104567 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Answer Given options_armt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json deleted file mode 100644 index a18875179d57b9179bf7e77c0044a588a4da04a6..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "ar", - "template_name": "Choose Story Ending_armt", - "evaluation": { - "accuracy": 0.8385175380542687 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Choose Story Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json deleted file mode 100644 index 417a9cacc699edaa2406c8aed141e11bf6f02342..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "ar", - "template_name": "Generate Ending_armt", - "evaluation": { - "accuracy": 0.5843812045003309 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Generate Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json deleted file mode 100644 index 3dddd8a167406b0f6a4f128561262f51a6da0a0d..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "ar", - "template_name": "Novel Correct Ending_armt", - "evaluation": { - "accuracy": 0.827928524156188 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Novel Correct Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json deleted file mode 100644 index 3dc021bd784256e5b7ad89410662d4bbe23814e3..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "ar", - "template_name": "Story Continuation and Options_armt", - "evaluation": { - "accuracy": 0.8246194573130378 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Story Continuation and Options_armt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json deleted file mode 100644 index 2f80fdef48d02a647d1a396f1a1184b9de98f54d..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "es", - "template_name": "Answer Given options_esmt", - "evaluation": { - "accuracy": 0.8325612177365983 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Answer Given options_esmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json deleted file mode 100644 index ac33228a2c44794760c0541a9deca5bca51ddc3f..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "es", - "template_name": "Choose Story Ending_esmt", - "evaluation": { - "accuracy": 0.8881535407015222 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Choose Story Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json deleted file mode 100644 index ef96b59e5aaf8917c3133bbbfdcda66635648262..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "es", - "template_name": "Generate Ending_esmt", - "evaluation": { - "accuracy": 0.6776968894771674 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Generate Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json deleted file mode 100644 index 592e9750b5c5381a60ad2a9b0e637323d4c28a2d..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "es", - "template_name": "Novel Correct Ending_esmt", - "evaluation": { - "accuracy": 0.8656518861681006 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Novel Correct Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json deleted file mode 100644 index 472c5c2061df541500d7e2e9e6eef586123bcea7..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "es", - "template_name": "Story Continuation and Options_esmt", - "evaluation": { - "accuracy": 0.886168100595632 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Story Continuation and Options_esmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json deleted file mode 100644 index c5f4b5d3ddd16a50e5746e8a401ebfef6af241af..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "eu", - "template_name": "Answer Given options_eumt", - "evaluation": { - "accuracy": 0.5678358702845797 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Answer Given options_eumt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json deleted file mode 100644 index 8da88a477ea78793b373952fb1be96f481e158fe..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "eu", - "template_name": "Choose Story Ending_eumt", - "evaluation": { - "accuracy": 0.7326273990734613 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Choose Story Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json deleted file mode 100644 index be084bbe7b32af5b5443df6df39bfdb1d06222b9..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "eu", - "template_name": "Generate Ending_eumt", - "evaluation": { - "accuracy": 0.5095962938451357 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Generate Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json deleted file mode 100644 index ffd920c5fe19f4120e2f7b8cb9b03e75b4922a30..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "eu", - "template_name": "Novel Correct Ending_eumt", - "evaluation": { - "accuracy": 0.6558570483123759 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Novel Correct Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json deleted file mode 100644 index 28733e61a32f89b95d611c458ff107eeaf7fa921..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "eu", - "template_name": "Story Continuation and Options_eumt", - "evaluation": { - "accuracy": 0.7193911317008603 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Story Continuation and Options_eumt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json deleted file mode 100644 index 7c0aab623963b0e207dfa345e07433e8f4b2a462..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "hi", - "template_name": "Answer Given options_himt", - "evaluation": { - "accuracy": 0.7054930509596293 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Answer Given options_himt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json deleted file mode 100644 index 9a75b90868fbec40451f3664a316ab82d5ba43e7..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "hi", - "template_name": "Choose Story Ending_himt", - "evaluation": { - "accuracy": 0.8041032428855063 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Choose Story Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json deleted file mode 100644 index 911d012a6b28cd79a12173b18896de370e018fd8..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "hi", - "template_name": "Generate Ending_himt", - "evaluation": { - "accuracy": 0.614824619457313 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Generate Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json deleted file mode 100644 index bdfdab2b619c12c446422d1160f1eaa296b71b61..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "hi", - "template_name": "Novel Correct Ending_himt", - "evaluation": { - "accuracy": 0.7584381204500331 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Novel Correct Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json deleted file mode 100644 index 1f4e6dd2de684bd0e4bcb456a7484fa808f7339c..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "hi", - "template_name": "Story Continuation and Options_himt", - "evaluation": { - "accuracy": 0.7981469225678358 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Story Continuation and Options_himt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json deleted file mode 100644 index 6618121e00b79e2467033caee48503ab2b22c2cf..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "id", - "template_name": "Answer Given options_idmt", - "evaluation": { - "accuracy": 0.7326273990734613 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Answer Given options_idmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json deleted file mode 100644 index a3aa3e18e51c6b62cbb7cc2a97700041334c7e25..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "id", - "template_name": "Choose Story Ending_idmt", - "evaluation": { - "accuracy": 0.8457974851091992 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Choose Story Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json deleted file mode 100644 index c7b80d028191f714bf9413a37c734491b302092d..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "id", - "template_name": "Generate Ending_idmt", - "evaluation": { - "accuracy": 0.5678358702845797 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Generate Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json deleted file mode 100644 index 3ea18beca2d084a68d50e0be3f03c41be93f2262..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "id", - "template_name": "Novel Correct Ending_idmt", - "evaluation": { - "accuracy": 0.8226340172071476 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Novel Correct Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json deleted file mode 100644 index e0984dea347a6d4a481eb42b9dc0c80f26693c89..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "id", - "template_name": "Story Continuation and Options_idmt", - "evaluation": { - "accuracy": 0.8246194573130378 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Story Continuation and Options_idmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json deleted file mode 100644 index e6ecf52678671b31c2c202352c3f9ee45d2103ff..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "zh", - "template_name": "Answer Given options_zhmt", - "evaluation": { - "accuracy": 0.7935142289874255 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Answer Given options_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json deleted file mode 100644 index 8254a6ef94f7708021f814ae35ccb144ccf766b2..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "zh", - "template_name": "Choose Story Ending_zhmt", - "evaluation": { - "accuracy": 0.8590337524818001 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Choose Story Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json deleted file mode 100644 index 86be5b81897365d846fdbc1c0212f1ca30b86bff..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "zh", - "template_name": "Generate Ending_zhmt", - "evaluation": { - "accuracy": 0.6307081403044341 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Generate Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json deleted file mode 100644 index a022479727e3f95ddd43e7725b85deff75157f3a..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "zh", - "template_name": "Novel Correct Ending_zhmt", - "evaluation": { - "accuracy": 0.8590337524818001 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Novel Correct Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json deleted file mode 100644 index 8a07d07dfc971f75f05a047a59fa74a581aae831..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xstory_cloze", - "dataset_config_name": "zh", - "template_name": "Story Continuation and Options_zhmt", - "evaluation": { - "accuracy": 0.8464592984778293 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Story Continuation and Options_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/fr/Replace_frmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/fr/Replace_frmt/results.json deleted file mode 100644 index 7c1c9dbf4c2515bb812f95d4e251685bc13e5b98..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/fr/Replace_frmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "fr", - "template_name": "Replace_frmt", - "evaluation": { - "accuracy": 0.5542168674698795 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='Replace_frmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json deleted file mode 100644 index 67bf5b6066b7e1974c71a7431bc80f60f6a36a99..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "fr", - "template_name": "True or False_frmt", - "evaluation": { - "accuracy": 0.46987951807228917 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='True or False_frmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json deleted file mode 100644 index 5be30d949e856fd08b7d57ec7e75260ecce3b1d4..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "fr", - "template_name": "does underscore refer to_frmt", - "evaluation": { - "accuracy": 0.5301204819277109 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='does underscore refer to_frmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json deleted file mode 100644 index aec05834b002a9c8c05edfa69aa43246be7035fd..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "fr", - "template_name": "stand for_frmt", - "evaluation": { - "accuracy": 0.5662650602409639 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='stand for_frmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json deleted file mode 100644 index 3be3c52fa7e754148451902755693f0495f4e710..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "fr", - "template_name": "underscore refer to_frmt", - "evaluation": { - "accuracy": 0.5783132530120482 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='underscore refer to_frmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json deleted file mode 100644 index 949c31b60675cdcef7fd55b20c28040f16d3882b..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "pt", - "template_name": "Replace_ptmt", - "evaluation": { - "accuracy": 0.5551330798479087 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='Replace_ptmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json deleted file mode 100644 index ca312b24a35e6eee10689ce9b36562e3525a6146..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "pt", - "template_name": "True or False_ptmt", - "evaluation": { - "accuracy": 0.4600760456273764 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='True or False_ptmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json deleted file mode 100644 index 6bc1ed74c203359004d055de6ccfb7029966c693..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "pt", - "template_name": "does underscore refer to_ptmt", - "evaluation": { - "accuracy": 0.5513307984790875 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='does underscore refer to_ptmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json deleted file mode 100644 index 7aaa161d306ac5cf9ac2dbc18dfa356c209a237f..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "pt", - "template_name": "stand for_ptmt", - "evaluation": { - "accuracy": 0.532319391634981 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='stand for_ptmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json deleted file mode 100644 index 96182d824f2bb313ce572d25990c7d647e6d7177..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "pt", - "template_name": "underscore refer to_ptmt", - "evaluation": { - "accuracy": 0.5361216730038023 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='underscore refer to_ptmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json deleted file mode 100644 index 3f844872631efc4a4bdae4466528d694216e5834..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "zh", - "template_name": "Replace_zhmt", - "evaluation": { - "accuracy": 0.6130952380952381 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='Replace_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json deleted file mode 100644 index 3936a48ccce8997a4f8b4667cc4ba8bfbc3c0b49..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "zh", - "template_name": "True or False_zhmt", - "evaluation": { - "accuracy": 0.5416666666666666 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='True or False_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json deleted file mode 100644 index 3aa2985b0967db2c47e54ddaa98b4f757e85a5b5..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "zh", - "template_name": "does underscore refer to_zhmt", - "evaluation": { - "accuracy": 0.5793650793650794 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='does underscore refer to_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json deleted file mode 100644 index 0037df9b42fa6ab40eb5106ae130594838d95548..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "zh", - "template_name": "stand for_zhmt", - "evaluation": { - "accuracy": 0.5158730158730159 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='stand for_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json deleted file mode 100644 index 703bbb0e5938c8a1219a5452fb1692557ed43b4d..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "Muennighoff/xwinograd", - "dataset_config_name": "zh", - "template_name": "underscore refer to_zhmt", - "evaluation": { - "accuracy": 0.625 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='underscore refer to_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/merged.csv b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/merged.csv deleted file mode 100644 index 4a9402f0137311e94081fe18b349de0b71ea3071..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/merged.csv +++ /dev/null @@ -1,86 +0,0 @@ -dataset,prompt,metric,value -xcopa_id,C1 or C2? premise_idmt,accuracy,0.52 -xcopa_id,best_option_idmt,accuracy,0.63 -xcopa_id,cause_effect_idmt,accuracy,0.64 -xcopa_id,i_am_hesitating_idmt,accuracy,0.66 -xcopa_id,plausible_alternatives_idmt,accuracy,0.71 -xcopa_id,median,accuracy,0.64 -xcopa_sw,C1 or C2? premise_swmt,accuracy,0.6 -xcopa_sw,best_option_swmt,accuracy,0.59 -xcopa_sw,cause_effect_swmt,accuracy,0.57 -xcopa_sw,i_am_hesitating_swmt,accuracy,0.61 -xcopa_sw,plausible_alternatives_swmt,accuracy,0.59 -xcopa_sw,median,accuracy,0.59 -xcopa_ta,C1 or C2? premise_tamt,accuracy,0.6 -xcopa_ta,best_option_tamt,accuracy,0.56 -xcopa_ta,cause_effect_tamt,accuracy,0.58 -xcopa_ta,i_am_hesitating_tamt,accuracy,0.54 -xcopa_ta,plausible_alternatives_tamt,accuracy,0.54 -xcopa_ta,median,accuracy,0.56 -xcopa_vi,C1 or C2? premise_vimt,accuracy,0.63 -xcopa_vi,best_option_vimt,accuracy,0.73 -xcopa_vi,cause_effect_vimt,accuracy,0.72 -xcopa_vi,i_am_hesitating_vimt,accuracy,0.71 -xcopa_vi,plausible_alternatives_vimt,accuracy,0.77 -xcopa_vi,median,accuracy,0.72 -xcopa_zh,C1 or C2? premise_zhmt,accuracy,0.61 -xcopa_zh,best_option_zhmt,accuracy,0.69 -xcopa_zh,cause_effect_zhmt,accuracy,0.8 -xcopa_zh,i_am_hesitating_zhmt,accuracy,0.74 -xcopa_zh,plausible_alternatives_zhmt,accuracy,0.76 -xcopa_zh,median,accuracy,0.74 -xstory_cloze_ar,Answer Given options_armt,accuracy,0.6664460622104567 -xstory_cloze_ar,Choose Story Ending_armt,accuracy,0.8385175380542687 -xstory_cloze_ar,Generate Ending_armt,accuracy,0.5843812045003309 -xstory_cloze_ar,Novel Correct Ending_armt,accuracy,0.827928524156188 -xstory_cloze_ar,Story Continuation and Options_armt,accuracy,0.8246194573130378 -xstory_cloze_ar,median,accuracy,0.8246194573130378 -xstory_cloze_es,Answer Given options_esmt,accuracy,0.8325612177365983 -xstory_cloze_es,Choose Story Ending_esmt,accuracy,0.8881535407015222 -xstory_cloze_es,Generate Ending_esmt,accuracy,0.6776968894771674 -xstory_cloze_es,Novel Correct Ending_esmt,accuracy,0.8656518861681006 -xstory_cloze_es,Story Continuation and Options_esmt,accuracy,0.886168100595632 -xstory_cloze_es,median,accuracy,0.8656518861681006 -xstory_cloze_eu,Answer Given options_eumt,accuracy,0.5678358702845797 -xstory_cloze_eu,Choose Story Ending_eumt,accuracy,0.7326273990734613 -xstory_cloze_eu,Generate Ending_eumt,accuracy,0.5095962938451357 -xstory_cloze_eu,Novel Correct Ending_eumt,accuracy,0.6558570483123759 -xstory_cloze_eu,Story Continuation and Options_eumt,accuracy,0.7193911317008603 -xstory_cloze_eu,median,accuracy,0.6558570483123759 -xstory_cloze_hi,Answer Given options_himt,accuracy,0.7054930509596293 -xstory_cloze_hi,Choose Story Ending_himt,accuracy,0.8041032428855063 -xstory_cloze_hi,Generate Ending_himt,accuracy,0.614824619457313 -xstory_cloze_hi,Novel Correct Ending_himt,accuracy,0.7584381204500331 -xstory_cloze_hi,Story Continuation and Options_himt,accuracy,0.7981469225678358 -xstory_cloze_hi,median,accuracy,0.7584381204500331 -xstory_cloze_id,Answer Given options_idmt,accuracy,0.7326273990734613 -xstory_cloze_id,Choose Story Ending_idmt,accuracy,0.8457974851091992 -xstory_cloze_id,Generate Ending_idmt,accuracy,0.5678358702845797 -xstory_cloze_id,Novel Correct Ending_idmt,accuracy,0.8226340172071476 -xstory_cloze_id,Story Continuation and Options_idmt,accuracy,0.8246194573130378 -xstory_cloze_id,median,accuracy,0.8226340172071476 -xstory_cloze_zh,Answer Given options_zhmt,accuracy,0.7935142289874255 -xstory_cloze_zh,Choose Story Ending_zhmt,accuracy,0.8590337524818001 -xstory_cloze_zh,Generate Ending_zhmt,accuracy,0.6307081403044341 -xstory_cloze_zh,Novel Correct Ending_zhmt,accuracy,0.8590337524818001 -xstory_cloze_zh,Story Continuation and Options_zhmt,accuracy,0.8464592984778293 -xstory_cloze_zh,median,accuracy,0.8464592984778293 -xwinograd_fr,Replace_frmt,accuracy,0.5542168674698795 -xwinograd_fr,True or False_frmt,accuracy,0.46987951807228917 -xwinograd_fr,does underscore refer to_frmt,accuracy,0.5301204819277109 -xwinograd_fr,stand for_frmt,accuracy,0.5662650602409639 -xwinograd_fr,underscore refer to_frmt,accuracy,0.5783132530120482 -xwinograd_fr,median,accuracy,0.5542168674698795 -xwinograd_pt,Replace_ptmt,accuracy,0.5551330798479087 -xwinograd_pt,True or False_ptmt,accuracy,0.4600760456273764 -xwinograd_pt,does underscore refer to_ptmt,accuracy,0.5513307984790875 -xwinograd_pt,stand for_ptmt,accuracy,0.532319391634981 -xwinograd_pt,underscore refer to_ptmt,accuracy,0.5361216730038023 -xwinograd_pt,median,accuracy,0.5361216730038023 -xwinograd_zh,Replace_zhmt,accuracy,0.6130952380952381 -xwinograd_zh,True or False_zhmt,accuracy,0.5416666666666666 -xwinograd_zh,does underscore refer to_zhmt,accuracy,0.5793650793650794 -xwinograd_zh,stand for_zhmt,accuracy,0.5158730158730159 -xwinograd_zh,underscore refer to_zhmt,accuracy,0.625 -xwinograd_zh,median,accuracy,0.5793650793650794 -multiple,average,multiple,0.692383103411949 diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/merged.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/merged.json deleted file mode 100644 index 623985f31ca9ea9cd805f20932eab97e9204faa9..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/merged.json +++ /dev/null @@ -1 +0,0 @@ -{"Muennighoff/xstory_cloze_ar": {"Answer Given options_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Answer Given options_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6664460622104567}, "template_name": "Answer Given options_armt"}, "Choose Story Ending_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Choose Story Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8385175380542687}, "template_name": "Choose Story Ending_armt"}, "Generate Ending_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Generate Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.5843812045003309}, "template_name": "Generate Ending_armt"}, "Novel Correct Ending_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Novel Correct Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.827928524156188}, "template_name": "Novel Correct Ending_armt"}, "Story Continuation and Options_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Story Continuation and Options_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8246194573130378}, "template_name": "Story Continuation and Options_armt"}}, "Muennighoff/xstory_cloze_es": {"Answer Given options_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Answer Given options_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8325612177365983}, "template_name": "Answer Given options_esmt"}, "Choose Story Ending_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Choose Story Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8881535407015222}, "template_name": "Choose Story Ending_esmt"}, "Generate Ending_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Generate Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6776968894771674}, "template_name": "Generate Ending_esmt"}, "Novel Correct Ending_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Novel Correct Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8656518861681006}, "template_name": "Novel Correct Ending_esmt"}, "Story Continuation and Options_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Story Continuation and Options_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.886168100595632}, "template_name": "Story Continuation and Options_esmt"}}, "Muennighoff/xstory_cloze_eu": {"Answer Given options_eumt": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Answer Given options_eumt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.5678358702845797}, "template_name": "Answer Given options_eumt"}, "Choose Story Ending_eumt": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Choose Story Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7326273990734613}, "template_name": "Choose Story Ending_eumt"}, "Generate Ending_eumt": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Generate Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.5095962938451357}, "template_name": "Generate Ending_eumt"}, "Novel Correct Ending_eumt": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Novel Correct Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6558570483123759}, "template_name": "Novel Correct Ending_eumt"}, "Story Continuation and Options_eumt": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Story Continuation and Options_eumt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7193911317008603}, "template_name": "Story Continuation and Options_eumt"}}, "Muennighoff/xstory_cloze_hi": {"Answer Given options_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Answer Given options_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7054930509596293}, "template_name": "Answer Given options_himt"}, "Choose Story Ending_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Choose Story Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8041032428855063}, "template_name": "Choose Story Ending_himt"}, "Generate Ending_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Generate Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.614824619457313}, "template_name": "Generate Ending_himt"}, "Novel Correct Ending_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Novel Correct Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7584381204500331}, "template_name": "Novel Correct Ending_himt"}, "Story Continuation and Options_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Story Continuation and Options_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7981469225678358}, "template_name": "Story Continuation and Options_himt"}}, "Muennighoff/xstory_cloze_id": {"Answer Given options_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Answer Given options_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7326273990734613}, "template_name": "Answer Given options_idmt"}, "Choose Story Ending_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Choose Story Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8457974851091992}, "template_name": "Choose Story Ending_idmt"}, "Generate Ending_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Generate Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.5678358702845797}, "template_name": "Generate Ending_idmt"}, "Novel Correct Ending_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Novel Correct Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8226340172071476}, "template_name": "Novel Correct Ending_idmt"}, "Story Continuation and Options_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Story Continuation and Options_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8246194573130378}, "template_name": "Story Continuation and Options_idmt"}}, "Muennighoff/xstory_cloze_zh": {"Answer Given options_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Answer Given options_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7935142289874255}, "template_name": "Answer Given options_zhmt"}, "Choose Story Ending_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Choose Story Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8590337524818001}, "template_name": "Choose Story Ending_zhmt"}, "Generate Ending_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Generate Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6307081403044341}, "template_name": "Generate Ending_zhmt"}, "Novel Correct Ending_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Novel Correct Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8590337524818001}, "template_name": "Novel Correct Ending_zhmt"}, "Story Continuation and Options_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Story Continuation and Options_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8464592984778293}, "template_name": "Story Continuation and Options_zhmt"}}, "Muennighoff/xwinograd_fr": {"Replace_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='Replace_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5542168674698795}, "template_name": "Replace_frmt"}, "True or False_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='True or False_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.46987951807228917}, "template_name": "True or False_frmt"}, "does underscore refer to_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='does underscore refer to_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5301204819277109}, "template_name": "does underscore refer to_frmt"}, "stand for_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='stand for_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5662650602409639}, "template_name": "stand for_frmt"}, "underscore refer to_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='underscore refer to_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5783132530120482}, "template_name": "underscore refer to_frmt"}}, "Muennighoff/xwinograd_pt": {"Replace_ptmt": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='Replace_ptmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5551330798479087}, "template_name": "Replace_ptmt"}, "True or False_ptmt": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='True or False_ptmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.4600760456273764}, "template_name": "True or False_ptmt"}, "does underscore refer to_ptmt": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='does underscore refer to_ptmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5513307984790875}, "template_name": "does underscore refer to_ptmt"}, "stand for_ptmt": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='stand for_ptmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.532319391634981}, "template_name": "stand for_ptmt"}, "underscore refer to_ptmt": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='underscore refer to_ptmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5361216730038023}, "template_name": "underscore refer to_ptmt"}}, "Muennighoff/xwinograd_zh": {"Replace_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='Replace_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6130952380952381}, "template_name": "Replace_zhmt"}, "True or False_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='True or False_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5416666666666666}, "template_name": "True or False_zhmt"}, "does underscore refer to_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='does underscore refer to_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5793650793650794}, "template_name": "does underscore refer to_zhmt"}, "stand for_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='stand for_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5158730158730159}, "template_name": "stand for_zhmt"}, "underscore refer to_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='underscore refer to_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.625}, "template_name": "underscore refer to_zhmt"}}, "xcopa_id": {"C1 or C2? premise_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='C1 or C2? premise_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.52}, "template_name": "C1 or C2? premise_idmt"}, "best_option_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='best_option_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.63}, "template_name": "best_option_idmt"}, "cause_effect_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='cause_effect_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.64}, "template_name": "cause_effect_idmt"}, "i_am_hesitating_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='i_am_hesitating_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.66}, "template_name": "i_am_hesitating_idmt"}, "plausible_alternatives_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='plausible_alternatives_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.71}, "template_name": "plausible_alternatives_idmt"}}, "xcopa_sw": {"C1 or C2? premise_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='C1 or C2? premise_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.6}, "template_name": "C1 or C2? premise_swmt"}, "best_option_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='best_option_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.59}, "template_name": "best_option_swmt"}, "cause_effect_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='cause_effect_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.57}, "template_name": "cause_effect_swmt"}, "i_am_hesitating_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='i_am_hesitating_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.61}, "template_name": "i_am_hesitating_swmt"}, "plausible_alternatives_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='plausible_alternatives_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.59}, "template_name": "plausible_alternatives_swmt"}}, "xcopa_ta": {"C1 or C2? premise_tamt": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='C1 or C2? premise_tamt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.6}, "template_name": "C1 or C2? premise_tamt"}, "best_option_tamt": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='best_option_tamt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.56}, "template_name": "best_option_tamt"}, "cause_effect_tamt": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='cause_effect_tamt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.58}, "template_name": "cause_effect_tamt"}, "i_am_hesitating_tamt": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='i_am_hesitating_tamt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.54}, "template_name": "i_am_hesitating_tamt"}, "plausible_alternatives_tamt": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='plausible_alternatives_tamt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.54}, "template_name": "plausible_alternatives_tamt"}}, "xcopa_vi": {"C1 or C2? premise_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='C1 or C2? premise_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.63}, "template_name": "C1 or C2? premise_vimt"}, "best_option_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='best_option_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.73}, "template_name": "best_option_vimt"}, "cause_effect_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='cause_effect_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.72}, "template_name": "cause_effect_vimt"}, "i_am_hesitating_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='i_am_hesitating_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.71}, "template_name": "i_am_hesitating_vimt"}, "plausible_alternatives_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='plausible_alternatives_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.77}, "template_name": "plausible_alternatives_vimt"}}, "xcopa_zh": {"C1 or C2? premise_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='C1 or C2? premise_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.61}, "template_name": "C1 or C2? premise_zhmt"}, "best_option_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='best_option_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.69}, "template_name": "best_option_zhmt"}, "cause_effect_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='cause_effect_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.8}, "template_name": "cause_effect_zhmt"}, "i_am_hesitating_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='i_am_hesitating_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.74}, "template_name": "i_am_hesitating_zhmt"}, "plausible_alternatives_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='plausible_alternatives_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.76}, "template_name": "plausible_alternatives_zhmt"}}} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/id/C1_or_C2?_premise_idmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/id/C1_or_C2?_premise_idmt/results.json deleted file mode 100644 index 770aecf4ada66a83385145616e7007789a7cb8d9..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/id/C1_or_C2?_premise_idmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "id", - "template_name": "C1 or C2? premise_idmt", - "evaluation": { - "accuracy": 0.52 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='C1 or C2? premise_idmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/id/best_option_idmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/id/best_option_idmt/results.json deleted file mode 100644 index 423b21a96fd8c6769ab5ebcaac6e25e336722eaf..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/id/best_option_idmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "id", - "template_name": "best_option_idmt", - "evaluation": { - "accuracy": 0.63 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='best_option_idmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/id/cause_effect_idmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/id/cause_effect_idmt/results.json deleted file mode 100644 index b4e8f8d7a3504693c80597ba0e247f27744daf0c..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/id/cause_effect_idmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "id", - "template_name": "cause_effect_idmt", - "evaluation": { - "accuracy": 0.64 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='cause_effect_idmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/id/i_am_hesitating_idmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/id/i_am_hesitating_idmt/results.json deleted file mode 100644 index aff6b8404ecd49a65dcf440e4a77ed71facf41cd..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/id/i_am_hesitating_idmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "id", - "template_name": "i_am_hesitating_idmt", - "evaluation": { - "accuracy": 0.66 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='i_am_hesitating_idmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/id/plausible_alternatives_idmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/id/plausible_alternatives_idmt/results.json deleted file mode 100644 index 093ee47bb082a31a917c4f4e1bdffbfcff027208..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/id/plausible_alternatives_idmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "id", - "template_name": "plausible_alternatives_idmt", - "evaluation": { - "accuracy": 0.71 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='plausible_alternatives_idmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/sw/C1_or_C2?_premise_swmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/sw/C1_or_C2?_premise_swmt/results.json deleted file mode 100644 index 0d0b7e0c2986140a20ac0007be6a9a7d9c7f3e06..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/sw/C1_or_C2?_premise_swmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "sw", - "template_name": "C1 or C2? premise_swmt", - "evaluation": { - "accuracy": 0.6 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='C1 or C2? premise_swmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/sw/best_option_swmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/sw/best_option_swmt/results.json deleted file mode 100644 index 30ec8563658088b618c110d1449b85a22476eb41..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/sw/best_option_swmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "sw", - "template_name": "best_option_swmt", - "evaluation": { - "accuracy": 0.59 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='best_option_swmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/sw/cause_effect_swmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/sw/cause_effect_swmt/results.json deleted file mode 100644 index 8695c2a6f0d6784b1b6df9924a65ba837756f004..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/sw/cause_effect_swmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "sw", - "template_name": "cause_effect_swmt", - "evaluation": { - "accuracy": 0.57 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='cause_effect_swmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/sw/i_am_hesitating_swmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/sw/i_am_hesitating_swmt/results.json deleted file mode 100644 index 1c30790a87601adf49bb4b88c549419044c85c1a..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/sw/i_am_hesitating_swmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "sw", - "template_name": "i_am_hesitating_swmt", - "evaluation": { - "accuracy": 0.61 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='i_am_hesitating_swmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/sw/plausible_alternatives_swmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/sw/plausible_alternatives_swmt/results.json deleted file mode 100644 index cbb5a24fee689670d04bddd69bcae9af62589b0d..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/sw/plausible_alternatives_swmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "sw", - "template_name": "plausible_alternatives_swmt", - "evaluation": { - "accuracy": 0.59 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='plausible_alternatives_swmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/ta/C1_or_C2?_premise_tamt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/ta/C1_or_C2?_premise_tamt/results.json deleted file mode 100644 index 28dc514e513e8365cf469b4c6bcb29b7a578e100..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/ta/C1_or_C2?_premise_tamt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "ta", - "template_name": "C1 or C2? premise_tamt", - "evaluation": { - "accuracy": 0.6 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='C1 or C2? premise_tamt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/ta/best_option_tamt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/ta/best_option_tamt/results.json deleted file mode 100644 index 78542a8227f63dcf1c8e7a433b7bb8027d262573..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/ta/best_option_tamt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "ta", - "template_name": "best_option_tamt", - "evaluation": { - "accuracy": 0.56 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='best_option_tamt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/ta/cause_effect_tamt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/ta/cause_effect_tamt/results.json deleted file mode 100644 index 0b63cfc7d5d6f767e20bed692b47acbf6644acda..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/ta/cause_effect_tamt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "ta", - "template_name": "cause_effect_tamt", - "evaluation": { - "accuracy": 0.58 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='cause_effect_tamt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/ta/i_am_hesitating_tamt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/ta/i_am_hesitating_tamt/results.json deleted file mode 100644 index addeff34489644602e10a13a32136a853d11a027..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/ta/i_am_hesitating_tamt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "ta", - "template_name": "i_am_hesitating_tamt", - "evaluation": { - "accuracy": 0.54 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='i_am_hesitating_tamt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/ta/plausible_alternatives_tamt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/ta/plausible_alternatives_tamt/results.json deleted file mode 100644 index 956fbb245f1ce4cb95cabc8bb699a728716e7a49..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/ta/plausible_alternatives_tamt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "ta", - "template_name": "plausible_alternatives_tamt", - "evaluation": { - "accuracy": 0.54 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='plausible_alternatives_tamt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/vi/C1_or_C2?_premise_vimt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/vi/C1_or_C2?_premise_vimt/results.json deleted file mode 100644 index 2028137a9042824411e47cae1ea23f520716e0c0..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/vi/C1_or_C2?_premise_vimt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "vi", - "template_name": "C1 or C2? premise_vimt", - "evaluation": { - "accuracy": 0.63 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='C1 or C2? premise_vimt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/vi/best_option_vimt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/vi/best_option_vimt/results.json deleted file mode 100644 index 67fb890f45a0d2e5054c512361c5454687ba31e1..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/vi/best_option_vimt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "vi", - "template_name": "best_option_vimt", - "evaluation": { - "accuracy": 0.73 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='best_option_vimt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/vi/cause_effect_vimt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/vi/cause_effect_vimt/results.json deleted file mode 100644 index 33fcb6b0693c93cd8908bca9e278290442c11cf0..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/vi/cause_effect_vimt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "vi", - "template_name": "cause_effect_vimt", - "evaluation": { - "accuracy": 0.72 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='cause_effect_vimt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/vi/i_am_hesitating_vimt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/vi/i_am_hesitating_vimt/results.json deleted file mode 100644 index 93cc0bc854520f7c93ebeb07b7d3d73ecc93cf19..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/vi/i_am_hesitating_vimt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "vi", - "template_name": "i_am_hesitating_vimt", - "evaluation": { - "accuracy": 0.71 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='i_am_hesitating_vimt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/vi/plausible_alternatives_vimt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/vi/plausible_alternatives_vimt/results.json deleted file mode 100644 index bdd4d1a4aa56e09ec45ee9b8f071edae2dd7450b..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/vi/plausible_alternatives_vimt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "vi", - "template_name": "plausible_alternatives_vimt", - "evaluation": { - "accuracy": 0.77 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='plausible_alternatives_vimt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json deleted file mode 100644 index 2970afcaeb8d36e3a35214b3c4c778ac18752cf0..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "zh", - "template_name": "C1 or C2? premise_zhmt", - "evaluation": { - "accuracy": 0.61 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='C1 or C2? premise_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/zh/best_option_zhmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/zh/best_option_zhmt/results.json deleted file mode 100644 index 4cd6431f41ee122238b455e34f0bf4c95e383c5e..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/zh/best_option_zhmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "zh", - "template_name": "best_option_zhmt", - "evaluation": { - "accuracy": 0.69 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='best_option_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/zh/cause_effect_zhmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/zh/cause_effect_zhmt/results.json deleted file mode 100644 index aa0aa530ab5f84188622f842b58fd663198c7bb7..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/zh/cause_effect_zhmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "zh", - "template_name": "cause_effect_zhmt", - "evaluation": { - "accuracy": 0.8 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='cause_effect_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/zh/i_am_hesitating_zhmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/zh/i_am_hesitating_zhmt/results.json deleted file mode 100644 index b61d725b211203c13659dc312de7ca007207fa17..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/zh/i_am_hesitating_zhmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "zh", - "template_name": "i_am_hesitating_zhmt", - "evaluation": { - "accuracy": 0.74 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='i_am_hesitating_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file diff --git a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/zh/plausible_alternatives_zhmt/results.json b/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/zh/plausible_alternatives_zhmt/results.json deleted file mode 100644 index 049f76fe863b55ba2f0881cb3b146bba011f82cc..0000000000000000000000000000000000000000 --- a/evaluation_bloommz-7b1/evaluation_xwinstorycopamt/xcopa/zh/plausible_alternatives_zhmt/results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "dataset_name": "xcopa", - "dataset_config_name": "zh", - "template_name": "plausible_alternatives_zhmt", - "evaluation": { - "accuracy": 0.76 - }, - "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='plausible_alternatives_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" -} \ No newline at end of file