diff --git a/.gitattributes b/.gitattributes index 637fa167e56685c01bc97f08a420ea76330cf6df..1fd4826e6f1aafb9303f7a6f9709083bd5723fc3 100644 --- a/.gitattributes +++ b/.gitattributes @@ -30,3 +30,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4a9ee4255fb8b0dced6c7760bb7778d90502f337 --- /dev/null +++ b/config.json @@ -0,0 +1,25 @@ +{ + "apply_residual_connection_post_layernorm": false, + "attention_dropout": 0.0, + "architectures": [ + "BloomModel" + ], + "attention_softmax_in_fp32": true, + "pad_token_id": 3, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_dropout": 0.0, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "masked_softmax_fusion": true, + "model_type": "bloom", + "n_embed": 14336, + "n_layer": 70, + "num_attention_heads": 112, + "pretraining_tp": 4, + "slow_but_exact": false, + "transformers_version": "4.21.0", + "use_cache": true, + "vocab_size": 250880 +} + diff --git a/evaluation_l1/Muennighoff_xstory_cloze/ar/Answer_Given_options/results.json b/evaluation_l1/Muennighoff_xstory_cloze/ar/Answer_Given_options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..fa14ca12f337280b15a3bbfa258ffa590c0dbfe3 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/ar/Answer_Given_options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "ar", + "template_name": "Answer Given options", + "evaluation": { + "accuracy": 0.7835870284579749 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/ar/Choose_Story_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/ar/Choose_Story_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..622b0178d3dc61824ffc4b9d954e29bbd0185009 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/ar/Choose_Story_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "ar", + "template_name": "Choose Story Ending", + "evaluation": { + "accuracy": 0.9291859695565851 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/ar/Generate_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/ar/Generate_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f3e3d5afe9f834ed904e4b4e7eda2b9f6baf767e --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/ar/Generate_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "ar", + "template_name": "Generate Ending", + "evaluation": { + "accuracy": 0.6624751819986764 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..4210ba765770eac93980a029b2bc352c8972c508 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "ar", + "template_name": "Novel Correct Ending", + "evaluation": { + "accuracy": 0.9252150893448048 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options/results.json b/evaluation_l1/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f560c7f8871d2369ce47abd12e53e7667e83db89 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "ar", + "template_name": "Story Continuation and Options", + "evaluation": { + "accuracy": 0.9159497021839841 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/es/Answer_Given_options/results.json b/evaluation_l1/Muennighoff_xstory_cloze/es/Answer_Given_options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..4a8c7a2f686d8f7e86828e16fbfd066edfdf6cd0 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/es/Answer_Given_options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "es", + "template_name": "Answer Given options", + "evaluation": { + "accuracy": 0.870946393117141 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/es/Choose_Story_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/es/Choose_Story_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..cdfab5115b88fdcc96feeb85dd4f93ee938b65d7 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/es/Choose_Story_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "es", + "template_name": "Choose Story Ending", + "evaluation": { + "accuracy": 0.9523494374586366 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/es/Generate_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/es/Generate_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..49cbe5f2b61f12b9305cf9fb741f260ad3487a6f --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/es/Generate_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "es", + "template_name": "Generate Ending", + "evaluation": { + "accuracy": 0.7319655857048313 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/es/Novel_Correct_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/es/Novel_Correct_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..84abc297253d8c49058803f80b2439bbec58dc0c --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/es/Novel_Correct_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "es", + "template_name": "Novel Correct Ending", + "evaluation": { + "accuracy": 0.9477167438782264 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options/results.json b/evaluation_l1/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..133194939f44992d70f7b27f95d3d923898d3a47 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "es", + "template_name": "Story Continuation and Options", + "evaluation": { + "accuracy": 0.9516876240900066 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/eu/Answer_Given_options/results.json b/evaluation_l1/Muennighoff_xstory_cloze/eu/Answer_Given_options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..9f90e311862c4d4f544147953fedfe8b2e09bf27 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/eu/Answer_Given_options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "eu", + "template_name": "Answer Given options", + "evaluation": { + "accuracy": 0.6982131039046989 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/eu/Choose_Story_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/eu/Choose_Story_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ac3a09567cd16940b268699baf757d683c11d3e2 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/eu/Choose_Story_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "eu", + "template_name": "Choose Story Ending", + "evaluation": { + "accuracy": 0.85704831237591 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/eu/Generate_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/eu/Generate_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..537bd7aca6cf5b4d60179d3e77429be8d2472ffa --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/eu/Generate_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "eu", + "template_name": "Generate Ending", + "evaluation": { + "accuracy": 0.614162806088683 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7b613d66b0fa70d7ede65259912c94c972606b72 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "eu", + "template_name": "Novel Correct Ending", + "evaluation": { + "accuracy": 0.8590337524818001 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options/results.json b/evaluation_l1/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..b78868605b4fc2d566a9cf12cfbd26fecb04615f --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "eu", + "template_name": "Story Continuation and Options", + "evaluation": { + "accuracy": 0.8504301786896096 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Answer_Given_options/results.json b/evaluation_l1/Muennighoff_xstory_cloze/hi/Answer_Given_options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..dfcddff9122832e4023de739a08b165405f4bf97 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/hi/Answer_Given_options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "hi", + "template_name": "Answer Given options", + "evaluation": { + "accuracy": 0.7683653209794837 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..0249f476c61770d371aa612ab6f723bec18d3c63 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "hi", + "template_name": "Choose Story Ending", + "evaluation": { + "accuracy": 0.8742554599602912 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..15715bbec8f0ca5626c8896695ab1ec869daef82 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "hi", + "template_name": "Generate Ending", + "evaluation": { + "accuracy": 0.657180675049636 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..2825f4a6c0a15ea38107bcf743d17899fc10205c --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "hi", + "template_name": "Novel Correct Ending", + "evaluation": { + "accuracy": 0.886829913964262 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options/results.json b/evaluation_l1/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..9c3a3be4b72ddd989d75644db452c9a43117588c --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "hi", + "template_name": "Story Continuation and Options", + "evaluation": { + "accuracy": 0.8762409000661814 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/id/Answer_Given_options/results.json b/evaluation_l1/Muennighoff_xstory_cloze/id/Answer_Given_options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..b5f961fd1a7f2e5d4fcc0cf3b9b24c3d5a3636fb --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/id/Answer_Given_options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "id", + "template_name": "Answer Given options", + "evaluation": { + "accuracy": 0.8332230311052283 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/id/Choose_Story_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/id/Choose_Story_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..08e1da3e2e4a3c184f8a9616f90c0c499646325e --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/id/Choose_Story_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "id", + "template_name": "Choose Story Ending", + "evaluation": { + "accuracy": 0.913964262078094 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/id/Generate_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/id/Generate_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..1c19d4576266454a6162862c53b416bebe0d6b03 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/id/Generate_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "id", + "template_name": "Generate Ending", + "evaluation": { + "accuracy": 0.700198544010589 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/id/Novel_Correct_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/id/Novel_Correct_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..61259b0ab613799ec9d0e717e6b66e3e5676e6de --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/id/Novel_Correct_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "id", + "template_name": "Novel Correct Ending", + "evaluation": { + "accuracy": 0.9205823957643945 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options/results.json b/evaluation_l1/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..0599dff562b06459a0b386388649b3fdb2d795d3 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "id", + "template_name": "Story Continuation and Options", + "evaluation": { + "accuracy": 0.9086697551290536 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json b/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..d4fea73deeac81c8a3056ea7163c644193143f38 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "zh", + "template_name": "Answer Given options", + "evaluation": { + "accuracy": 0.870946393117141 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..50a8a9f7bcc5d647795ef2452673fb071e542ef9 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "zh", + "template_name": "Choose Story Ending", + "evaluation": { + "accuracy": 0.9265387160820648 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..927c5f705c7b4716a9e99d27d3dde0bd724ba42c --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "zh", + "template_name": "Generate Ending", + "evaluation": { + "accuracy": 0.6823295830575777 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..b1a9390ed70408e37d3597045e7b9ec029eee5b4 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "zh", + "template_name": "Novel Correct Ending", + "evaluation": { + "accuracy": 0.928524156187955 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json b/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..c912504a65bd932e2813c6dfbec4dfa31c750e9f --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "zh", + "template_name": "Story Continuation and Options", + "evaluation": { + "accuracy": 0.9232296492389146 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json b/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a5a22b9790053662904d0e281b3325e8779d96 --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "en", + "template_name": "Replace", + "evaluation": { + "accuracy": 0.6933333333333334 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/en/True_or_False/results.json b/evaluation_l1/Muennighoff_xwinograd/en/True_or_False/results.json new file mode 100644 index 0000000000000000000000000000000000000000..fb72eae6a032d146ba4bda652b3c2831cb267690 --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/en/True_or_False/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "en", + "template_name": "True or False", + "evaluation": { + "accuracy": 0.5212903225806451 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/en/does_underscore_refer_to/results.json b/evaluation_l1/Muennighoff_xwinograd/en/does_underscore_refer_to/results.json new file mode 100644 index 0000000000000000000000000000000000000000..66aa24f4760f073f17cd55f9e6a510c174ad2875 --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/en/does_underscore_refer_to/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "en", + "template_name": "does underscore refer to", + "evaluation": { + "accuracy": 0.6563440860215054 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/en/stand_for/results.json b/evaluation_l1/Muennighoff_xwinograd/en/stand_for/results.json new file mode 100644 index 0000000000000000000000000000000000000000..96dd97d1f840cfe0c4253ab2c2c375afba165614 --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/en/stand_for/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "en", + "template_name": "stand for", + "evaluation": { + "accuracy": 0.5156989247311828 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json b/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json new file mode 100644 index 0000000000000000000000000000000000000000..6ea1d115d892e712d5a15507f7c53b35973f58e6 --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "en", + "template_name": "underscore refer to", + "evaluation": { + "accuracy": 0.6473118279569893 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/fr/Replace/results.json b/evaluation_l1/Muennighoff_xwinograd/fr/Replace/results.json new file mode 100644 index 0000000000000000000000000000000000000000..0e44039abfed86c270ecfd42407b4a632f5beb6a --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/fr/Replace/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "fr", + "template_name": "Replace", + "evaluation": { + "accuracy": 0.6024096385542169 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/fr/True_or_False/results.json b/evaluation_l1/Muennighoff_xwinograd/fr/True_or_False/results.json new file mode 100644 index 0000000000000000000000000000000000000000..676ff78442f70e7604c9d4e4a162104fba700bc5 --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/fr/True_or_False/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "fr", + "template_name": "True or False", + "evaluation": { + "accuracy": 0.46987951807228917 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/fr/does_underscore_refer_to/results.json b/evaluation_l1/Muennighoff_xwinograd/fr/does_underscore_refer_to/results.json new file mode 100644 index 0000000000000000000000000000000000000000..57fa5bc71b075b999289da2b81ab4cbd13217d92 --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/fr/does_underscore_refer_to/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "fr", + "template_name": "does underscore refer to", + "evaluation": { + "accuracy": 0.5903614457831325 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/fr/stand_for/results.json b/evaluation_l1/Muennighoff_xwinograd/fr/stand_for/results.json new file mode 100644 index 0000000000000000000000000000000000000000..aca34b8ede42cbef1f40485a79e370482f93eb7f --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/fr/stand_for/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "fr", + "template_name": "stand for", + "evaluation": { + "accuracy": 0.4939759036144578 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/fr/underscore_refer_to/results.json b/evaluation_l1/Muennighoff_xwinograd/fr/underscore_refer_to/results.json new file mode 100644 index 0000000000000000000000000000000000000000..9e52bc4b92c7834bfbc21e02c80d8ec944c6eb26 --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/fr/underscore_refer_to/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "fr", + "template_name": "underscore refer to", + "evaluation": { + "accuracy": 0.6867469879518072 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/Replace/results.json b/evaluation_l1/Muennighoff_xwinograd/pt/Replace/results.json new file mode 100644 index 0000000000000000000000000000000000000000..3a6b4ece370951cdb9b44db9ac165944227af945 --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/pt/Replace/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "pt", + "template_name": "Replace", + "evaluation": { + "accuracy": 0.6463878326996197 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json b/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json new file mode 100644 index 0000000000000000000000000000000000000000..14851a8471b73c07a28c1b00f9b88a4aa6722984 --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "pt", + "template_name": "True or False", + "evaluation": { + "accuracy": 0.5285171102661597 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/does_underscore_refer_to/results.json b/evaluation_l1/Muennighoff_xwinograd/pt/does_underscore_refer_to/results.json new file mode 100644 index 0000000000000000000000000000000000000000..091ffea556f32c5e4e5869762a4c921446a22f69 --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/pt/does_underscore_refer_to/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "pt", + "template_name": "does underscore refer to", + "evaluation": { + "accuracy": 0.6007604562737643 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/stand_for/results.json b/evaluation_l1/Muennighoff_xwinograd/pt/stand_for/results.json new file mode 100644 index 0000000000000000000000000000000000000000..310ef9e24ba6109c7c26de82e901349762c9471a --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/pt/stand_for/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "pt", + "template_name": "stand for", + "evaluation": { + "accuracy": 0.49809885931558934 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/underscore_refer_to/results.json b/evaluation_l1/Muennighoff_xwinograd/pt/underscore_refer_to/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ef31c9a05fdae650875c5ab14ba65ad812396606 --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/pt/underscore_refer_to/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "pt", + "template_name": "underscore refer to", + "evaluation": { + "accuracy": 0.6083650190114068 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json b/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json new file mode 100644 index 0000000000000000000000000000000000000000..1fbc6d83b73966e85bc5e1da6f2cd012adba195a --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "zh", + "template_name": "Replace", + "evaluation": { + "accuracy": 0.7063492063492064 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/True_or_False/results.json b/evaluation_l1/Muennighoff_xwinograd/zh/True_or_False/results.json new file mode 100644 index 0000000000000000000000000000000000000000..51ed558709378889fed0678b936ed71050d4f2db --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/zh/True_or_False/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "zh", + "template_name": "True or False", + "evaluation": { + "accuracy": 0.5515873015873016 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/does_underscore_refer_to/results.json b/evaluation_l1/Muennighoff_xwinograd/zh/does_underscore_refer_to/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f0f243e26825b9625fdde9c5dd2af458fce410ac --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/zh/does_underscore_refer_to/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "zh", + "template_name": "does underscore refer to", + "evaluation": { + "accuracy": 0.621031746031746 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json b/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json new file mode 100644 index 0000000000000000000000000000000000000000..8ee2b197de90b9471d5898fed01f8d44b6ffbe7f --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "zh", + "template_name": "stand for", + "evaluation": { + "accuracy": 0.5158730158730159 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json b/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json new file mode 100644 index 0000000000000000000000000000000000000000..bb725b17616be084da49f9d036df24a26aa8fc3e --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "zh", + "template_name": "underscore refer to", + "evaluation": { + "accuracy": 0.6765873015873016 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/anli/dev_r1/GPT-3_style/results.json b/evaluation_l1/anli/dev_r1/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..624ef5ca1c606f97954ef9489cd9b411b4aacdd9 --- /dev/null +++ b/evaluation_l1/anli/dev_r1/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "anli", + "dataset_config_name": "dev_r1", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.497 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/anli/dev_r1/MNLI_crowdsource/results.json b/evaluation_l1/anli/dev_r1/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..17bea05f2e6e2b5dd592b8a4f29bb60008648757 --- /dev/null +++ b/evaluation_l1/anli/dev_r1/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "anli", + "dataset_config_name": "dev_r1", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.442 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/anli/dev_r1/can_we_infer/results.json b/evaluation_l1/anli/dev_r1/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..724d47c0dc6f6ef4d49e9caf8b4b2b63b9b58ac7 --- /dev/null +++ b/evaluation_l1/anli/dev_r1/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "anli", + "dataset_config_name": "dev_r1", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.456 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/anli/dev_r1/guaranteed_possible_impossible/results.json b/evaluation_l1/anli/dev_r1/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..5309d4d5e0a389c68d33eac1f803108aec5560b6 --- /dev/null +++ b/evaluation_l1/anli/dev_r1/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "anli", + "dataset_config_name": "dev_r1", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.328 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/anli/dev_r1/justified_in_saying/results.json b/evaluation_l1/anli/dev_r1/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..c116ebcd4d24a3000e5a7370b54433dece9b944e --- /dev/null +++ b/evaluation_l1/anli/dev_r1/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "anli", + "dataset_config_name": "dev_r1", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.46 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/anli/dev_r2/GPT-3_style/results.json b/evaluation_l1/anli/dev_r2/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..eccf8c1551bd97f197744e7516bcf00470f66e65 --- /dev/null +++ b/evaluation_l1/anli/dev_r2/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "anli", + "dataset_config_name": "dev_r2", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.45 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/anli/dev_r2/MNLI_crowdsource/results.json b/evaluation_l1/anli/dev_r2/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..42e7d317c933d0b4944e0282146dc394124acb68 --- /dev/null +++ b/evaluation_l1/anli/dev_r2/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "anli", + "dataset_config_name": "dev_r2", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.382 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/anli/dev_r2/can_we_infer/results.json b/evaluation_l1/anli/dev_r2/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..bcf2167c91a3a87a89a39b4ba636a92c1c0499ee --- /dev/null +++ b/evaluation_l1/anli/dev_r2/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "anli", + "dataset_config_name": "dev_r2", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.419 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/anli/dev_r2/guaranteed_possible_impossible/results.json b/evaluation_l1/anli/dev_r2/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..3b5e80243b6a064cff22a8ae1358d3a1065f5cc6 --- /dev/null +++ b/evaluation_l1/anli/dev_r2/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "anli", + "dataset_config_name": "dev_r2", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.345 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/anli/dev_r2/justified_in_saying/results.json b/evaluation_l1/anli/dev_r2/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..104e8bc71bf21d47b3390778a2bae08084763b39 --- /dev/null +++ b/evaluation_l1/anli/dev_r2/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "anli", + "dataset_config_name": "dev_r2", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.41 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/anli/dev_r3/GPT-3_style/results.json b/evaluation_l1/anli/dev_r3/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ceae21ce359e3db0adf8516d4199124caa0e5a81 --- /dev/null +++ b/evaluation_l1/anli/dev_r3/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "anli", + "dataset_config_name": "dev_r3", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.4558333333333333 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/anli/dev_r3/MNLI_crowdsource/results.json b/evaluation_l1/anli/dev_r3/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..5e133c1acdb994634d864698c93a5014e3f47019 --- /dev/null +++ b/evaluation_l1/anli/dev_r3/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "anli", + "dataset_config_name": "dev_r3", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.41333333333333333 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/anli/dev_r3/can_we_infer/results.json b/evaluation_l1/anli/dev_r3/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..dc8705fa2dffaf0d5ab82b34ab8e28b6c4c9ae1e --- /dev/null +++ b/evaluation_l1/anli/dev_r3/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "anli", + "dataset_config_name": "dev_r3", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.4225 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/anli/dev_r3/guaranteed_possible_impossible/results.json b/evaluation_l1/anli/dev_r3/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..dc1c9c2cbed6db1dba1a6b120cd962090e1c94b9 --- /dev/null +++ b/evaluation_l1/anli/dev_r3/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "anli", + "dataset_config_name": "dev_r3", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.305 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/anli/dev_r3/justified_in_saying/results.json b/evaluation_l1/anli/dev_r3/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..81a1ad51681d7dba238377f77944974385162d26 --- /dev/null +++ b/evaluation_l1/anli/dev_r3/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "anli", + "dataset_config_name": "dev_r3", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.4083333333333333 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/merged.csv b/evaluation_l1/merged.csv new file mode 100644 index 0000000000000000000000000000000000000000..e22f6c3d3ab721a945ee943a9fe18ad51e742d92 --- /dev/null +++ b/evaluation_l1/merged.csv @@ -0,0 +1,194 @@ +dataset,prompt,metric,value +anli_dev_r1,GPT-3 style,accuracy,0.497 +anli_dev_r1,MNLI crowdsource,accuracy,0.442 +anli_dev_r1,can we infer,accuracy,0.456 +anli_dev_r1,guaranteed/possible/impossible,accuracy,0.328 +anli_dev_r1,justified in saying,accuracy,0.46 +anli_dev_r1,median,accuracy,0.456 +anli_dev_r2,GPT-3 style,accuracy,0.45 +anli_dev_r2,MNLI crowdsource,accuracy,0.382 +anli_dev_r2,can we infer,accuracy,0.419 +anli_dev_r2,guaranteed/possible/impossible,accuracy,0.345 +anli_dev_r2,justified in saying,accuracy,0.41 +anli_dev_r2,median,accuracy,0.41 +anli_dev_r3,GPT-3 style,accuracy,0.4558333333333333 +anli_dev_r3,MNLI crowdsource,accuracy,0.41333333333333333 +anli_dev_r3,can we infer,accuracy,0.4225 +anli_dev_r3,guaranteed/possible/impossible,accuracy,0.305 +anli_dev_r3,justified in saying,accuracy,0.4083333333333333 +anli_dev_r3,median,accuracy,0.41333333333333333 +story_cloze_2016,Answer Given options,accuracy,0.9524318546231961 +story_cloze_2016,Choose Story Ending,accuracy,0.9668626402993051 +story_cloze_2016,Generate Ending,accuracy,0.7760555852485302 +story_cloze_2016,Novel Correct Ending,accuracy,0.9583110636023516 +story_cloze_2016,Story Continuation and Options,accuracy,0.9593800106894709 +story_cloze_2016,median,accuracy,0.9583110636023516 +super_glue_cb,GPT-3 style,accuracy,0.875 +super_glue_cb,MNLI crowdsource,accuracy,0.35714285714285715 +super_glue_cb,can we infer,accuracy,0.75 +super_glue_cb,guaranteed/possible/impossible,accuracy,0.7678571428571429 +super_glue_cb,justified in saying,accuracy,0.8035714285714286 +super_glue_cb,median,accuracy,0.7678571428571429 +super_glue_copa,"C1 or C2? premise, so/because…",accuracy,0.75 +super_glue_copa,best_option,accuracy,0.87 +super_glue_copa,cause_effect,accuracy,0.9 +super_glue_copa,i_am_hesitating,accuracy,0.91 +super_glue_copa,plausible_alternatives,accuracy,0.91 +super_glue_copa,median,accuracy,0.9 +super_glue_rte,GPT-3 style,accuracy,0.7870036101083032 +super_glue_rte,MNLI crowdsource,accuracy,0.8592057761732852 +super_glue_rte,does it follow that,accuracy,0.8194945848375451 +super_glue_rte,guaranteed true,accuracy,0.7942238267148014 +super_glue_rte,should assume,accuracy,0.8122743682310469 +super_glue_rte,median,accuracy,0.8122743682310469 +winogrande_winogrande_xl,Replace,accuracy,0.5998421468034728 +winogrande_winogrande_xl,True or False,accuracy,0.5359116022099447 +winogrande_winogrande_xl,does underscore refer to,accuracy,0.5864246250986582 +winogrande_winogrande_xl,stand for,accuracy,0.5201262825572218 +winogrande_winogrande_xl,underscore refer to,accuracy,0.5880031570639306 +winogrande_winogrande_xl,median,accuracy,0.5864246250986582 +xcopa_id,"C1 or C2? premise, so/because…",accuracy,0.56 +xcopa_id,best_option,accuracy,0.81 +xcopa_id,cause_effect,accuracy,0.87 +xcopa_id,i_am_hesitating,accuracy,0.83 +xcopa_id,plausible_alternatives,accuracy,0.87 +xcopa_id,median,accuracy,0.83 +xcopa_sw,"C1 or C2? premise, so/because…",accuracy,0.6 +xcopa_sw,best_option,accuracy,0.62 +xcopa_sw,cause_effect,accuracy,0.64 +xcopa_sw,i_am_hesitating,accuracy,0.66 +xcopa_sw,plausible_alternatives,accuracy,0.64 +xcopa_sw,median,accuracy,0.64 +xcopa_ta,"C1 or C2? premise, so/because…",accuracy,0.59 +xcopa_ta,best_option,accuracy,0.66 +xcopa_ta,cause_effect,accuracy,0.7 +xcopa_ta,i_am_hesitating,accuracy,0.69 +xcopa_ta,plausible_alternatives,accuracy,0.64 +xcopa_ta,median,accuracy,0.66 +xcopa_vi,"C1 or C2? premise, so/because…",accuracy,0.58 +xcopa_vi,best_option,accuracy,0.81 +xcopa_vi,cause_effect,accuracy,0.91 +xcopa_vi,i_am_hesitating,accuracy,0.85 +xcopa_vi,plausible_alternatives,accuracy,0.84 +xcopa_vi,median,accuracy,0.84 +xcopa_zh,"C1 or C2? premise, so/because…",accuracy,0.57 +xcopa_zh,best_option,accuracy,0.84 +xcopa_zh,cause_effect,accuracy,0.86 +xcopa_zh,i_am_hesitating,accuracy,0.86 +xcopa_zh,plausible_alternatives,accuracy,0.81 +xcopa_zh,median,accuracy,0.84 +xnli_ar,GPT-3 style,accuracy,0.5578313253012048 +xnli_ar,MNLI crowdsource,accuracy,0.41164658634538154 +xnli_ar,can we infer,accuracy,0.5152610441767068 +xnli_ar,guaranteed/possible/impossible,accuracy,0.5803212851405622 +xnli_ar,justified in saying,accuracy,0.5184738955823294 +xnli_ar,median,accuracy,0.5184738955823294 +xnli_en,GPT-3 style,accuracy,0.6176706827309237 +xnli_en,MNLI crowdsource,accuracy,0.4606425702811245 +xnli_en,can we infer,accuracy,0.5714859437751004 +xnli_en,guaranteed/possible/impossible,accuracy,0.6180722891566265 +xnli_en,justified in saying,accuracy,0.5746987951807229 +xnli_en,median,accuracy,0.5746987951807229 +xnli_es,GPT-3 style,accuracy,0.5911646586345382 +xnli_es,MNLI crowdsource,accuracy,0.43052208835341366 +xnli_es,can we infer,accuracy,0.4397590361445783 +xnli_es,guaranteed/possible/impossible,accuracy,0.5208835341365462 +xnli_es,justified in saying,accuracy,0.41726907630522087 +xnli_es,median,accuracy,0.4397590361445783 +xnli_fr,GPT-3 style,accuracy,0.5911646586345382 +xnli_fr,MNLI crowdsource,accuracy,0.4321285140562249 +xnli_fr,can we infer,accuracy,0.5369477911646586 +xnli_fr,guaranteed/possible/impossible,accuracy,0.5176706827309236 +xnli_fr,justified in saying,accuracy,0.5385542168674698 +xnli_fr,median,accuracy,0.5369477911646586 +xnli_hi,GPT-3 style,accuracy,0.5208835341365462 +xnli_hi,MNLI crowdsource,accuracy,0.3819277108433735 +xnli_hi,can we infer,accuracy,0.44176706827309237 +xnli_hi,guaranteed/possible/impossible,accuracy,0.5253012048192771 +xnli_hi,justified in saying,accuracy,0.44377510040160645 +xnli_hi,median,accuracy,0.44377510040160645 +xnli_sw,GPT-3 style,accuracy,0.5036144578313253 +xnli_sw,MNLI crowdsource,accuracy,0.3887550200803213 +xnli_sw,can we infer,accuracy,0.44216867469879517 +xnli_sw,guaranteed/possible/impossible,accuracy,0.38795180722891565 +xnli_sw,justified in saying,accuracy,0.4397590361445783 +xnli_sw,median,accuracy,0.4397590361445783 +xnli_ur,GPT-3 style,accuracy,0.4907630522088353 +xnli_ur,MNLI crowdsource,accuracy,0.37309236947791163 +xnli_ur,can we infer,accuracy,0.45863453815261046 +xnli_ur,guaranteed/possible/impossible,accuracy,0.5124497991967871 +xnli_ur,justified in saying,accuracy,0.45582329317269077 +xnli_ur,median,accuracy,0.45863453815261046 +xnli_vi,GPT-3 style,accuracy,0.5582329317269076 +xnli_vi,MNLI crowdsource,accuracy,0.42690763052208835 +xnli_vi,can we infer,accuracy,0.4759036144578313 +xnli_vi,guaranteed/possible/impossible,accuracy,0.5008032128514056 +xnli_vi,justified in saying,accuracy,0.4827309236947791 +xnli_vi,median,accuracy,0.4827309236947791 +xnli_zh,GPT-3 style,accuracy,0.5550200803212851 +xnli_zh,MNLI crowdsource,accuracy,0.4248995983935743 +xnli_zh,can we infer,accuracy,0.43052208835341366 +xnli_zh,guaranteed/possible/impossible,accuracy,0.5526104417670683 +xnli_zh,justified in saying,accuracy,0.44016064257028115 +xnli_zh,median,accuracy,0.44016064257028115 +xstory_cloze_ar,Answer Given options,accuracy,0.7835870284579749 +xstory_cloze_ar,Choose Story Ending,accuracy,0.9291859695565851 +xstory_cloze_ar,Generate Ending,accuracy,0.6624751819986764 +xstory_cloze_ar,Novel Correct Ending,accuracy,0.9252150893448048 +xstory_cloze_ar,Story Continuation and Options,accuracy,0.9159497021839841 +xstory_cloze_ar,median,accuracy,0.9159497021839841 +xstory_cloze_es,Answer Given options,accuracy,0.870946393117141 +xstory_cloze_es,Choose Story Ending,accuracy,0.9523494374586366 +xstory_cloze_es,Generate Ending,accuracy,0.7319655857048313 +xstory_cloze_es,Novel Correct Ending,accuracy,0.9477167438782264 +xstory_cloze_es,Story Continuation and Options,accuracy,0.9516876240900066 +xstory_cloze_es,median,accuracy,0.9477167438782264 +xstory_cloze_eu,Answer Given options,accuracy,0.6982131039046989 +xstory_cloze_eu,Choose Story Ending,accuracy,0.85704831237591 +xstory_cloze_eu,Generate Ending,accuracy,0.614162806088683 +xstory_cloze_eu,Novel Correct Ending,accuracy,0.8590337524818001 +xstory_cloze_eu,Story Continuation and Options,accuracy,0.8504301786896096 +xstory_cloze_eu,median,accuracy,0.8504301786896096 +xstory_cloze_hi,Answer Given options,accuracy,0.7683653209794837 +xstory_cloze_hi,Choose Story Ending,accuracy,0.8742554599602912 +xstory_cloze_hi,Generate Ending,accuracy,0.657180675049636 +xstory_cloze_hi,Novel Correct Ending,accuracy,0.886829913964262 +xstory_cloze_hi,Story Continuation and Options,accuracy,0.8762409000661814 +xstory_cloze_hi,median,accuracy,0.8742554599602912 +xstory_cloze_id,Answer Given options,accuracy,0.8332230311052283 +xstory_cloze_id,Choose Story Ending,accuracy,0.913964262078094 +xstory_cloze_id,Generate Ending,accuracy,0.700198544010589 +xstory_cloze_id,Novel Correct Ending,accuracy,0.9205823957643945 +xstory_cloze_id,Story Continuation and Options,accuracy,0.9086697551290536 +xstory_cloze_id,median,accuracy,0.9086697551290536 +xstory_cloze_zh,Answer Given options,accuracy,0.870946393117141 +xstory_cloze_zh,Choose Story Ending,accuracy,0.9265387160820648 +xstory_cloze_zh,Generate Ending,accuracy,0.6823295830575777 +xstory_cloze_zh,Novel Correct Ending,accuracy,0.928524156187955 +xstory_cloze_zh,Story Continuation and Options,accuracy,0.9232296492389146 +xstory_cloze_zh,median,accuracy,0.9232296492389146 +xwinograd_en,Replace,accuracy,0.6933333333333334 +xwinograd_en,True or False,accuracy,0.5212903225806451 +xwinograd_en,does underscore refer to,accuracy,0.6563440860215054 +xwinograd_en,stand for,accuracy,0.5156989247311828 +xwinograd_en,underscore refer to,accuracy,0.6473118279569893 +xwinograd_en,median,accuracy,0.6473118279569893 +xwinograd_fr,Replace,accuracy,0.6024096385542169 +xwinograd_fr,True or False,accuracy,0.46987951807228917 +xwinograd_fr,does underscore refer to,accuracy,0.5903614457831325 +xwinograd_fr,stand for,accuracy,0.4939759036144578 +xwinograd_fr,underscore refer to,accuracy,0.6867469879518072 +xwinograd_fr,median,accuracy,0.5903614457831325 +xwinograd_pt,Replace,accuracy,0.6463878326996197 +xwinograd_pt,True or False,accuracy,0.5285171102661597 +xwinograd_pt,does underscore refer to,accuracy,0.6007604562737643 +xwinograd_pt,stand for,accuracy,0.49809885931558934 +xwinograd_pt,underscore refer to,accuracy,0.6083650190114068 +xwinograd_pt,median,accuracy,0.6007604562737643 +xwinograd_zh,Replace,accuracy,0.7063492063492064 +xwinograd_zh,True or False,accuracy,0.5515873015873016 +xwinograd_zh,does underscore refer to,accuracy,0.621031746031746 +xwinograd_zh,stand for,accuracy,0.5158730158730159 +xwinograd_zh,underscore refer to,accuracy,0.6765873015873016 +xwinograd_zh,median,accuracy,0.621031746031746 +multiple,average,multiple,0.6665267892901372 diff --git a/evaluation_l1/merged.json b/evaluation_l1/merged.json new file mode 100644 index 0000000000000000000000000000000000000000..501d6ffa6ac3a5c6e3c21ad460331ab55203d9b0 --- /dev/null +++ b/evaluation_l1/merged.json @@ -0,0 +1 @@ +{"Muennighoff/xstory_cloze_ar": {"Answer Given options": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7835870284579749}, "template_name": "Answer Given options"}, "Choose Story Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9291859695565851}, "template_name": "Choose Story Ending"}, "Generate Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6624751819986764}, "template_name": "Generate Ending"}, "Novel Correct Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9252150893448048}, "template_name": "Novel Correct Ending"}, "Story Continuation and Options": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9159497021839841}, "template_name": "Story Continuation and Options"}}, "Muennighoff/xstory_cloze_es": {"Answer Given options": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.870946393117141}, "template_name": "Answer Given options"}, "Choose Story Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9523494374586366}, "template_name": "Choose Story Ending"}, "Generate Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7319655857048313}, "template_name": "Generate Ending"}, "Novel Correct Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9477167438782264}, "template_name": "Novel Correct Ending"}, "Story Continuation and Options": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9516876240900066}, "template_name": "Story Continuation and Options"}}, "Muennighoff/xstory_cloze_eu": {"Answer Given options": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6982131039046989}, "template_name": "Answer Given options"}, "Choose Story Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.85704831237591}, "template_name": "Choose Story Ending"}, "Generate Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.614162806088683}, "template_name": "Generate Ending"}, "Novel Correct Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8590337524818001}, "template_name": "Novel Correct Ending"}, "Story Continuation and Options": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8504301786896096}, "template_name": "Story Continuation and Options"}}, "Muennighoff/xstory_cloze_hi": {"Answer Given options": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7683653209794837}, "template_name": "Answer Given options"}, "Choose Story Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8742554599602912}, "template_name": "Choose Story Ending"}, "Generate Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.657180675049636}, "template_name": "Generate Ending"}, "Novel Correct Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.886829913964262}, "template_name": "Novel Correct Ending"}, "Story Continuation and Options": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8762409000661814}, "template_name": "Story Continuation and Options"}}, "Muennighoff/xstory_cloze_id": {"Answer Given options": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8332230311052283}, "template_name": "Answer Given options"}, "Choose Story Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.913964262078094}, "template_name": "Choose Story Ending"}, "Generate Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.700198544010589}, "template_name": "Generate Ending"}, "Novel Correct Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9205823957643945}, "template_name": "Novel Correct Ending"}, "Story Continuation and Options": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9086697551290536}, "template_name": "Story Continuation and Options"}}, "Muennighoff/xstory_cloze_zh": {"Answer Given options": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.870946393117141}, "template_name": "Answer Given options"}, "Choose Story Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9265387160820648}, "template_name": "Choose Story Ending"}, "Generate Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6823295830575777}, "template_name": "Generate Ending"}, "Novel Correct Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.928524156187955}, "template_name": "Novel Correct Ending"}, "Story Continuation and Options": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9232296492389146}, "template_name": "Story Continuation and Options"}}, "Muennighoff/xwinograd_en": {"Replace": {"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "en", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6933333333333334}, "template_name": "Replace"}, "True or False": {"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "en", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5212903225806451}, "template_name": "True or False"}, "does underscore refer to": {"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "en", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6563440860215054}, "template_name": "does underscore refer to"}, "stand for": {"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "en", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5156989247311828}, "template_name": "stand for"}, "underscore refer to": {"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "en", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6473118279569893}, "template_name": "underscore refer to"}}, "Muennighoff/xwinograd_fr": {"Replace": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6024096385542169}, "template_name": "Replace"}, "True or False": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.46987951807228917}, "template_name": "True or False"}, "does underscore refer to": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5903614457831325}, "template_name": "does underscore refer to"}, "stand for": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.4939759036144578}, "template_name": "stand for"}, "underscore refer to": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6867469879518072}, "template_name": "underscore refer to"}}, "Muennighoff/xwinograd_pt": {"Replace": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6463878326996197}, "template_name": "Replace"}, "True or False": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5285171102661597}, "template_name": "True or False"}, "does underscore refer to": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6007604562737643}, "template_name": "does underscore refer to"}, "stand for": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.49809885931558934}, "template_name": "stand for"}, "underscore refer to": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6083650190114068}, "template_name": "underscore refer to"}}, "Muennighoff/xwinograd_zh": {"Replace": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.7063492063492064}, "template_name": "Replace"}, "True or False": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5515873015873016}, "template_name": "True or False"}, "does underscore refer to": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.621031746031746}, "template_name": "does underscore refer to"}, "stand for": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5158730158730159}, "template_name": "stand for"}, "underscore refer to": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6765873015873016}, "template_name": "underscore refer to"}}, "anli_dev_r1": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r1", "dataset_name": "anli", "evaluation": {"accuracy": 0.497}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r1", "dataset_name": "anli", "evaluation": {"accuracy": 0.442}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r1", "dataset_name": "anli", "evaluation": {"accuracy": 0.456}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r1", "dataset_name": "anli", "evaluation": {"accuracy": 0.328}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r1", "dataset_name": "anli", "evaluation": {"accuracy": 0.46}, "template_name": "justified in saying"}}, "anli_dev_r2": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r2", "dataset_name": "anli", "evaluation": {"accuracy": 0.45}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r2", "dataset_name": "anli", "evaluation": {"accuracy": 0.382}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r2", "dataset_name": "anli", "evaluation": {"accuracy": 0.419}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r2", "dataset_name": "anli", "evaluation": {"accuracy": 0.345}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r2", "dataset_name": "anli", "evaluation": {"accuracy": 0.41}, "template_name": "justified in saying"}}, "anli_dev_r3": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r3", "dataset_name": "anli", "evaluation": {"accuracy": 0.4558333333333333}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r3", "dataset_name": "anli", "evaluation": {"accuracy": 0.41333333333333333}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r3", "dataset_name": "anli", "evaluation": {"accuracy": 0.4225}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r3", "dataset_name": "anli", "evaluation": {"accuracy": 0.305}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "dev_r3", "dataset_name": "anli", "evaluation": {"accuracy": 0.4083333333333333}, "template_name": "justified in saying"}}, "story_cloze_2016": {"Answer Given options": {"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "2016", "dataset_name": "story_cloze", "evaluation": {"accuracy": 0.9524318546231961}, "template_name": "Answer Given options"}, "Choose Story Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "2016", "dataset_name": "story_cloze", "evaluation": {"accuracy": 0.9668626402993051}, "template_name": "Choose Story Ending"}, "Generate Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "2016", "dataset_name": "story_cloze", "evaluation": {"accuracy": 0.7760555852485302}, "template_name": "Generate Ending"}, "Novel Correct Ending": {"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "2016", "dataset_name": "story_cloze", "evaluation": {"accuracy": 0.9583110636023516}, "template_name": "Novel Correct Ending"}, "Story Continuation and Options": {"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "2016", "dataset_name": "story_cloze", "evaluation": {"accuracy": 0.9593800106894709}, "template_name": "Story Continuation and Options"}}, "super_glue_cb": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "cb", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.875}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "cb", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.35714285714285715}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "cb", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.75}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "cb", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.7678571428571429}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "cb", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.8035714285714286}, "template_name": "justified in saying"}}, "super_glue_copa": {"C1 or C2? premise, so/because\u2026": {"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name=None, template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "copa", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.75}, "template_name": "C1 or C2? premise, so/because\u2026"}, "best_option": {"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "copa", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.87}, "template_name": "best_option"}, "cause_effect": {"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "copa", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.9}, "template_name": "cause_effect"}, "i_am_hesitating": {"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "copa", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.91}, "template_name": "i_am_hesitating"}, "plausible_alternatives": {"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "copa", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.91}, "template_name": "plausible_alternatives"}}, "super_glue_rte": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "rte", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.7870036101083032}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "rte", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.8592057761732852}, "template_name": "MNLI crowdsource"}, "does it follow that": {"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='does it follow that', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "rte", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.8194945848375451}, "template_name": "does it follow that"}, "guaranteed true": {"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='guaranteed true', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "rte", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.7942238267148014}, "template_name": "guaranteed true"}, "should assume": {"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='should assume', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "rte", "dataset_name": "super_glue", "evaluation": {"accuracy": 0.8122743682310469}, "template_name": "should assume"}}, "winogrande_winogrande_xl": {"Replace": {"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "winogrande_xl", "dataset_name": "winogrande", "evaluation": {"accuracy": 0.5998421468034728}, "template_name": "Replace"}, "True or False": {"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "winogrande_xl", "dataset_name": "winogrande", "evaluation": {"accuracy": 0.5359116022099447}, "template_name": "True or False"}, "does underscore refer to": {"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "winogrande_xl", "dataset_name": "winogrande", "evaluation": {"accuracy": 0.5864246250986582}, "template_name": "does underscore refer to"}, "stand for": {"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "winogrande_xl", "dataset_name": "winogrande", "evaluation": {"accuracy": 0.5201262825572218}, "template_name": "stand for"}, "underscore refer to": {"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "winogrande_xl", "dataset_name": "winogrande", "evaluation": {"accuracy": 0.5880031570639306}, "template_name": "underscore refer to"}}, "xcopa_id": {"C1 or C2? premise, so/because\u2026": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.56}, "template_name": "C1 or C2? premise, so/because\u2026"}, "best_option": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.81}, "template_name": "best_option"}, "cause_effect": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.87}, "template_name": "cause_effect"}, "i_am_hesitating": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.83}, "template_name": "i_am_hesitating"}, "plausible_alternatives": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.87}, "template_name": "plausible_alternatives"}}, "xcopa_sw": {"C1 or C2? premise, so/because\u2026": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.6}, "template_name": "C1 or C2? premise, so/because\u2026"}, "best_option": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.62}, "template_name": "best_option"}, "cause_effect": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.64}, "template_name": "cause_effect"}, "i_am_hesitating": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.66}, "template_name": "i_am_hesitating"}, "plausible_alternatives": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.64}, "template_name": "plausible_alternatives"}}, "xcopa_ta": {"C1 or C2? premise, so/because\u2026": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.59}, "template_name": "C1 or C2? premise, so/because\u2026"}, "best_option": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.66}, "template_name": "best_option"}, "cause_effect": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.7}, "template_name": "cause_effect"}, "i_am_hesitating": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.69}, "template_name": "i_am_hesitating"}, "plausible_alternatives": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.64}, "template_name": "plausible_alternatives"}}, "xcopa_vi": {"C1 or C2? premise, so/because\u2026": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.58}, "template_name": "C1 or C2? premise, so/because\u2026"}, "best_option": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.81}, "template_name": "best_option"}, "cause_effect": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.91}, "template_name": "cause_effect"}, "i_am_hesitating": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.85}, "template_name": "i_am_hesitating"}, "plausible_alternatives": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.84}, "template_name": "plausible_alternatives"}}, "xcopa_zh": {"C1 or C2? premise, so/because\u2026": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.57}, "template_name": "C1 or C2? premise, so/because\u2026"}, "best_option": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.84}, "template_name": "best_option"}, "cause_effect": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.86}, "template_name": "cause_effect"}, "i_am_hesitating": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.86}, "template_name": "i_am_hesitating"}, "plausible_alternatives": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.81}, "template_name": "plausible_alternatives"}}, "xnli_ar": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5578313253012048}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.41164658634538154}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5152610441767068}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5803212851405622}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5184738955823294}, "template_name": "justified in saying"}}, "xnli_en": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "en", "dataset_name": "xnli", "evaluation": {"accuracy": 0.6176706827309237}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "en", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4606425702811245}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "en", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5714859437751004}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "en", "dataset_name": "xnli", "evaluation": {"accuracy": 0.6180722891566265}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "en", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5746987951807229}, "template_name": "justified in saying"}}, "xnli_es": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5911646586345382}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.43052208835341366}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4397590361445783}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5208835341365462}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.41726907630522087}, "template_name": "justified in saying"}}, "xnli_fr": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5911646586345382}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4321285140562249}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5369477911646586}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5176706827309236}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5385542168674698}, "template_name": "justified in saying"}}, "xnli_hi": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5208835341365462}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3819277108433735}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.44176706827309237}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5253012048192771}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.44377510040160645}, "template_name": "justified in saying"}}, "xnli_sw": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5036144578313253}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3887550200803213}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.44216867469879517}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.38795180722891565}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4397590361445783}, "template_name": "justified in saying"}}, "xnli_ur": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4907630522088353}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.37309236947791163}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.45863453815261046}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5124497991967871}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.45582329317269077}, "template_name": "justified in saying"}}, "xnli_vi": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5582329317269076}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.42690763052208835}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4759036144578313}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5008032128514056}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4827309236947791}, "template_name": "justified in saying"}}, "xnli_zh": {"GPT-3 style": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5550200803212851}, "template_name": "GPT-3 style"}, "MNLI crowdsource": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4248995983935743}, "template_name": "MNLI crowdsource"}, "can we infer": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.43052208835341366}, "template_name": "can we infer"}, "guaranteed/possible/impossible": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5526104417670683}, "template_name": "guaranteed/possible/impossible"}, "justified in saying": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.44016064257028115}, "template_name": "justified in saying"}}} \ No newline at end of file diff --git a/evaluation_l1/story_cloze/2016/Answer_Given_options/results.json b/evaluation_l1/story_cloze/2016/Answer_Given_options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..c17d76a62d827c060ee745ccf9611eac8ff3cdac --- /dev/null +++ b/evaluation_l1/story_cloze/2016/Answer_Given_options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "story_cloze", + "dataset_config_name": "2016", + "template_name": "Answer Given options", + "evaluation": { + "accuracy": 0.9524318546231961 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/story_cloze/2016/Choose_Story_Ending/results.json b/evaluation_l1/story_cloze/2016/Choose_Story_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..b402b37266757783bf6291bcee1c9cfa46d216f1 --- /dev/null +++ b/evaluation_l1/story_cloze/2016/Choose_Story_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "story_cloze", + "dataset_config_name": "2016", + "template_name": "Choose Story Ending", + "evaluation": { + "accuracy": 0.9668626402993051 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/story_cloze/2016/Generate_Ending/results.json b/evaluation_l1/story_cloze/2016/Generate_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..fb9415efb88e94a410bb8a2473f70078b3feffb3 --- /dev/null +++ b/evaluation_l1/story_cloze/2016/Generate_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "story_cloze", + "dataset_config_name": "2016", + "template_name": "Generate Ending", + "evaluation": { + "accuracy": 0.7760555852485302 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/story_cloze/2016/Novel_Correct_Ending/results.json b/evaluation_l1/story_cloze/2016/Novel_Correct_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..4c6a1159adea4b46e6d68e29b5ccf22f7d3eeded --- /dev/null +++ b/evaluation_l1/story_cloze/2016/Novel_Correct_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "story_cloze", + "dataset_config_name": "2016", + "template_name": "Novel Correct Ending", + "evaluation": { + "accuracy": 0.9583110636023516 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/story_cloze/2016/Story_Continuation_and_Options/results.json b/evaluation_l1/story_cloze/2016/Story_Continuation_and_Options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..2a5a5c76a3c4b4526a73cf505aa142660bf95af7 --- /dev/null +++ b/evaluation_l1/story_cloze/2016/Story_Continuation_and_Options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "story_cloze", + "dataset_config_name": "2016", + "template_name": "Story Continuation and Options", + "evaluation": { + "accuracy": 0.9593800106894709 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/super_glue/cb/GPT-3_style/results.json b/evaluation_l1/super_glue/cb/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..150551119a34190b84b09a536f5bd1058b09bf1c --- /dev/null +++ b/evaluation_l1/super_glue/cb/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "super_glue", + "dataset_config_name": "cb", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.875 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/super_glue/cb/MNLI_crowdsource/results.json b/evaluation_l1/super_glue/cb/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f504eb8fa769b4398bc5bfed5fd0032fbb5b979e --- /dev/null +++ b/evaluation_l1/super_glue/cb/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "super_glue", + "dataset_config_name": "cb", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.35714285714285715 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/super_glue/cb/can_we_infer/results.json b/evaluation_l1/super_glue/cb/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..5e4c7c789ae4d0104e023f693f89f383f2d7765c --- /dev/null +++ b/evaluation_l1/super_glue/cb/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "super_glue", + "dataset_config_name": "cb", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.75 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/super_glue/cb/guaranteed_possible_impossible/results.json b/evaluation_l1/super_glue/cb/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..52ea214f6c6b2ebd759e937adf6da63abfc8cc43 --- /dev/null +++ b/evaluation_l1/super_glue/cb/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "super_glue", + "dataset_config_name": "cb", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.7678571428571429 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/super_glue/cb/justified_in_saying/results.json b/evaluation_l1/super_glue/cb/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..8d1911dc1d0b22bc55f044765a8435c1f0ca95c2 --- /dev/null +++ b/evaluation_l1/super_glue/cb/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "super_glue", + "dataset_config_name": "cb", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.8035714285714286 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/super_glue/copa/C1_or_C2?_premise/results.json b/evaluation_l1/super_glue/copa/C1_or_C2?_premise/results.json new file mode 100644 index 0000000000000000000000000000000000000000..fd07711fac65df94ecc461abbb54e79c4c29e1b1 --- /dev/null +++ b/evaluation_l1/super_glue/copa/C1_or_C2?_premise/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "super_glue", + "dataset_config_name": "copa", + "template_name": "C1 or C2? premise, so/because\u2026", + "evaluation": { + "accuracy": 0.75 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name=None, template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/super_glue/copa/best_option/results.json b/evaluation_l1/super_glue/copa/best_option/results.json new file mode 100644 index 0000000000000000000000000000000000000000..5bbfcd2b3a8c506efc556c73a0a3177eef86355c --- /dev/null +++ b/evaluation_l1/super_glue/copa/best_option/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "super_glue", + "dataset_config_name": "copa", + "template_name": "best_option", + "evaluation": { + "accuracy": 0.87 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/super_glue/copa/cause_effect/results.json b/evaluation_l1/super_glue/copa/cause_effect/results.json new file mode 100644 index 0000000000000000000000000000000000000000..92a200f978839cc7833bf68d793994f49f880572 --- /dev/null +++ b/evaluation_l1/super_glue/copa/cause_effect/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "super_glue", + "dataset_config_name": "copa", + "template_name": "cause_effect", + "evaluation": { + "accuracy": 0.9 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/super_glue/copa/i_am_hesitating/results.json b/evaluation_l1/super_glue/copa/i_am_hesitating/results.json new file mode 100644 index 0000000000000000000000000000000000000000..103509431a1ccfac1b771cd1d235bce0d3b70c4b --- /dev/null +++ b/evaluation_l1/super_glue/copa/i_am_hesitating/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "super_glue", + "dataset_config_name": "copa", + "template_name": "i_am_hesitating", + "evaluation": { + "accuracy": 0.91 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/super_glue/copa/plausible_alternatives/results.json b/evaluation_l1/super_glue/copa/plausible_alternatives/results.json new file mode 100644 index 0000000000000000000000000000000000000000..0db9f991a03dbdd962d70f3b63f0ccc795887f73 --- /dev/null +++ b/evaluation_l1/super_glue/copa/plausible_alternatives/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "super_glue", + "dataset_config_name": "copa", + "template_name": "plausible_alternatives", + "evaluation": { + "accuracy": 0.91 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/super_glue/rte/GPT-3_style/results.json b/evaluation_l1/super_glue/rte/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..074738d3a743bb6e89779674116678b7551cfb54 --- /dev/null +++ b/evaluation_l1/super_glue/rte/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "super_glue", + "dataset_config_name": "rte", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.7870036101083032 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/super_glue/rte/MNLI_crowdsource/results.json b/evaluation_l1/super_glue/rte/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..306946726d625ff5536f78fcee8d6be028f7a901 --- /dev/null +++ b/evaluation_l1/super_glue/rte/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "super_glue", + "dataset_config_name": "rte", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.8592057761732852 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/super_glue/rte/does_it_follow_that/results.json b/evaluation_l1/super_glue/rte/does_it_follow_that/results.json new file mode 100644 index 0000000000000000000000000000000000000000..92225b08e90d636f57f4ac20ac58b082a7774712 --- /dev/null +++ b/evaluation_l1/super_glue/rte/does_it_follow_that/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "super_glue", + "dataset_config_name": "rte", + "template_name": "does it follow that", + "evaluation": { + "accuracy": 0.8194945848375451 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='does it follow that', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/super_glue/rte/guaranteed_true/results.json b/evaluation_l1/super_glue/rte/guaranteed_true/results.json new file mode 100644 index 0000000000000000000000000000000000000000..13e0a08ce6250b6f6e5a37faa695793aa0bbecb0 --- /dev/null +++ b/evaluation_l1/super_glue/rte/guaranteed_true/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "super_glue", + "dataset_config_name": "rte", + "template_name": "guaranteed true", + "evaluation": { + "accuracy": 0.7942238267148014 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='guaranteed true', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/super_glue/rte/should_assume/results.json b/evaluation_l1/super_glue/rte/should_assume/results.json new file mode 100644 index 0000000000000000000000000000000000000000..787186a80cc2515fe6118deece5a17d189a47d25 --- /dev/null +++ b/evaluation_l1/super_glue/rte/should_assume/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "super_glue", + "dataset_config_name": "rte", + "template_name": "should assume", + "evaluation": { + "accuracy": 0.8122743682310469 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='should assume', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/winogrande/winogrande_xl/Replace/results.json b/evaluation_l1/winogrande/winogrande_xl/Replace/results.json new file mode 100644 index 0000000000000000000000000000000000000000..42019f2d1257574dd2ec471f125ef67a3dd6f761 --- /dev/null +++ b/evaluation_l1/winogrande/winogrande_xl/Replace/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "winogrande", + "dataset_config_name": "winogrande_xl", + "template_name": "Replace", + "evaluation": { + "accuracy": 0.5998421468034728 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/winogrande/winogrande_xl/True_or_False/results.json b/evaluation_l1/winogrande/winogrande_xl/True_or_False/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7570b5a289024d5d31ca1e682a269567a07df00f --- /dev/null +++ b/evaluation_l1/winogrande/winogrande_xl/True_or_False/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "winogrande", + "dataset_config_name": "winogrande_xl", + "template_name": "True or False", + "evaluation": { + "accuracy": 0.5359116022099447 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/winogrande/winogrande_xl/does_underscore_refer_to/results.json b/evaluation_l1/winogrande/winogrande_xl/does_underscore_refer_to/results.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe231ea5c0a34aea5c9f156b5736f80199ac088 --- /dev/null +++ b/evaluation_l1/winogrande/winogrande_xl/does_underscore_refer_to/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "winogrande", + "dataset_config_name": "winogrande_xl", + "template_name": "does underscore refer to", + "evaluation": { + "accuracy": 0.5864246250986582 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/winogrande/winogrande_xl/stand_for/results.json b/evaluation_l1/winogrande/winogrande_xl/stand_for/results.json new file mode 100644 index 0000000000000000000000000000000000000000..6819315d5fa6359d1ebf74890878512463babfd8 --- /dev/null +++ b/evaluation_l1/winogrande/winogrande_xl/stand_for/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "winogrande", + "dataset_config_name": "winogrande_xl", + "template_name": "stand for", + "evaluation": { + "accuracy": 0.5201262825572218 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/winogrande/winogrande_xl/underscore_refer_to/results.json b/evaluation_l1/winogrande/winogrande_xl/underscore_refer_to/results.json new file mode 100644 index 0000000000000000000000000000000000000000..e5cf04d02a842555ba0d5dd39f2b3298ffba8250 --- /dev/null +++ b/evaluation_l1/winogrande/winogrande_xl/underscore_refer_to/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "winogrande", + "dataset_config_name": "winogrande_xl", + "template_name": "underscore refer to", + "evaluation": { + "accuracy": 0.5880031570639306 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/id/C1_or_C2?_premise/results.json b/evaluation_l1/xcopa/id/C1_or_C2?_premise/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a1088c9040872e3ae150c45ed74645051dbe1144 --- /dev/null +++ b/evaluation_l1/xcopa/id/C1_or_C2?_premise/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "id", + "template_name": "C1 or C2? premise, so/because\u2026", + "evaluation": { + "accuracy": 0.56 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/id/best_option/results.json b/evaluation_l1/xcopa/id/best_option/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a20d27e3166b1f2c426acae2733282ee425f394d --- /dev/null +++ b/evaluation_l1/xcopa/id/best_option/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "id", + "template_name": "best_option", + "evaluation": { + "accuracy": 0.81 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/id/cause_effect/results.json b/evaluation_l1/xcopa/id/cause_effect/results.json new file mode 100644 index 0000000000000000000000000000000000000000..3c5e9acce7d4e1f6ae99e36d6c9ca92de41d7e30 --- /dev/null +++ b/evaluation_l1/xcopa/id/cause_effect/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "id", + "template_name": "cause_effect", + "evaluation": { + "accuracy": 0.87 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/id/i_am_hesitating/results.json b/evaluation_l1/xcopa/id/i_am_hesitating/results.json new file mode 100644 index 0000000000000000000000000000000000000000..bd8d6933b8ef7729962ea2c239587e9cbd2311ec --- /dev/null +++ b/evaluation_l1/xcopa/id/i_am_hesitating/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "id", + "template_name": "i_am_hesitating", + "evaluation": { + "accuracy": 0.83 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/id/plausible_alternatives/results.json b/evaluation_l1/xcopa/id/plausible_alternatives/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7a367c68f6181bbe1d1a9acb9f57b9f7228d2561 --- /dev/null +++ b/evaluation_l1/xcopa/id/plausible_alternatives/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "id", + "template_name": "plausible_alternatives", + "evaluation": { + "accuracy": 0.87 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/sw/C1_or_C2?_premise/results.json b/evaluation_l1/xcopa/sw/C1_or_C2?_premise/results.json new file mode 100644 index 0000000000000000000000000000000000000000..923cf175a2ea8cee40930c011a9e6277f7ec24fb --- /dev/null +++ b/evaluation_l1/xcopa/sw/C1_or_C2?_premise/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "sw", + "template_name": "C1 or C2? premise, so/because\u2026", + "evaluation": { + "accuracy": 0.6 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/sw/best_option/results.json b/evaluation_l1/xcopa/sw/best_option/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a23f938d1d48be3de69b0ba28625fc4bf2b5e7f3 --- /dev/null +++ b/evaluation_l1/xcopa/sw/best_option/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "sw", + "template_name": "best_option", + "evaluation": { + "accuracy": 0.62 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/sw/cause_effect/results.json b/evaluation_l1/xcopa/sw/cause_effect/results.json new file mode 100644 index 0000000000000000000000000000000000000000..72aaf4a87fbe31b7d4b265f0700c6b3d7e1d85e2 --- /dev/null +++ b/evaluation_l1/xcopa/sw/cause_effect/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "sw", + "template_name": "cause_effect", + "evaluation": { + "accuracy": 0.64 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/sw/i_am_hesitating/results.json b/evaluation_l1/xcopa/sw/i_am_hesitating/results.json new file mode 100644 index 0000000000000000000000000000000000000000..e408b253e1bb3a3ea64e06a37a48df3244969442 --- /dev/null +++ b/evaluation_l1/xcopa/sw/i_am_hesitating/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "sw", + "template_name": "i_am_hesitating", + "evaluation": { + "accuracy": 0.66 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/sw/plausible_alternatives/results.json b/evaluation_l1/xcopa/sw/plausible_alternatives/results.json new file mode 100644 index 0000000000000000000000000000000000000000..1df4cfe999e3301fb63258105a6bd22d6d8a87d0 --- /dev/null +++ b/evaluation_l1/xcopa/sw/plausible_alternatives/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "sw", + "template_name": "plausible_alternatives", + "evaluation": { + "accuracy": 0.64 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/ta/C1_or_C2?_premise/results.json b/evaluation_l1/xcopa/ta/C1_or_C2?_premise/results.json new file mode 100644 index 0000000000000000000000000000000000000000..30417333f2d8e1c2f584a1d61725aea60b65c6f1 --- /dev/null +++ b/evaluation_l1/xcopa/ta/C1_or_C2?_premise/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "ta", + "template_name": "C1 or C2? premise, so/because\u2026", + "evaluation": { + "accuracy": 0.59 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/ta/best_option/results.json b/evaluation_l1/xcopa/ta/best_option/results.json new file mode 100644 index 0000000000000000000000000000000000000000..e2654eb7c7fdcc3225e2b671e77820a685159580 --- /dev/null +++ b/evaluation_l1/xcopa/ta/best_option/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "ta", + "template_name": "best_option", + "evaluation": { + "accuracy": 0.66 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/ta/cause_effect/results.json b/evaluation_l1/xcopa/ta/cause_effect/results.json new file mode 100644 index 0000000000000000000000000000000000000000..06ed968210883b1677420df46920ae8767ae10d4 --- /dev/null +++ b/evaluation_l1/xcopa/ta/cause_effect/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "ta", + "template_name": "cause_effect", + "evaluation": { + "accuracy": 0.7 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/ta/i_am_hesitating/results.json b/evaluation_l1/xcopa/ta/i_am_hesitating/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7f856249983a23f49cc7c0ff874576f3165782d5 --- /dev/null +++ b/evaluation_l1/xcopa/ta/i_am_hesitating/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "ta", + "template_name": "i_am_hesitating", + "evaluation": { + "accuracy": 0.69 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/ta/plausible_alternatives/results.json b/evaluation_l1/xcopa/ta/plausible_alternatives/results.json new file mode 100644 index 0000000000000000000000000000000000000000..57306bd7fddc09ae459ae50228a5d0e5845181e0 --- /dev/null +++ b/evaluation_l1/xcopa/ta/plausible_alternatives/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "ta", + "template_name": "plausible_alternatives", + "evaluation": { + "accuracy": 0.64 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/vi/C1_or_C2?_premise/results.json b/evaluation_l1/xcopa/vi/C1_or_C2?_premise/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ae38a9d01d5d6923042592213e5294c326c81bf7 --- /dev/null +++ b/evaluation_l1/xcopa/vi/C1_or_C2?_premise/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "vi", + "template_name": "C1 or C2? premise, so/because\u2026", + "evaluation": { + "accuracy": 0.58 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/vi/best_option/results.json b/evaluation_l1/xcopa/vi/best_option/results.json new file mode 100644 index 0000000000000000000000000000000000000000..b8dc7ac5d1e291253d6c136a15338d56638ad7ea --- /dev/null +++ b/evaluation_l1/xcopa/vi/best_option/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "vi", + "template_name": "best_option", + "evaluation": { + "accuracy": 0.81 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/vi/cause_effect/results.json b/evaluation_l1/xcopa/vi/cause_effect/results.json new file mode 100644 index 0000000000000000000000000000000000000000..6e7406329c9039078fb7ae9d2ef72155ae008417 --- /dev/null +++ b/evaluation_l1/xcopa/vi/cause_effect/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "vi", + "template_name": "cause_effect", + "evaluation": { + "accuracy": 0.91 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/vi/i_am_hesitating/results.json b/evaluation_l1/xcopa/vi/i_am_hesitating/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f41710a911538208550e160eed17539d59b24bf1 --- /dev/null +++ b/evaluation_l1/xcopa/vi/i_am_hesitating/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "vi", + "template_name": "i_am_hesitating", + "evaluation": { + "accuracy": 0.85 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/vi/plausible_alternatives/results.json b/evaluation_l1/xcopa/vi/plausible_alternatives/results.json new file mode 100644 index 0000000000000000000000000000000000000000..cf1bea02a25840d8ecb41d39a44242cef297b981 --- /dev/null +++ b/evaluation_l1/xcopa/vi/plausible_alternatives/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "vi", + "template_name": "plausible_alternatives", + "evaluation": { + "accuracy": 0.84 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/zh/C1_or_C2?_premise/results.json b/evaluation_l1/xcopa/zh/C1_or_C2?_premise/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a9a4b3b430590a5347eaef9a5d4e361726881045 --- /dev/null +++ b/evaluation_l1/xcopa/zh/C1_or_C2?_premise/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "zh", + "template_name": "C1 or C2? premise, so/because\u2026", + "evaluation": { + "accuracy": 0.57 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/zh/best_option/results.json b/evaluation_l1/xcopa/zh/best_option/results.json new file mode 100644 index 0000000000000000000000000000000000000000..aed5c9b07cc6b2a3d46478aea301f538d1b3775a --- /dev/null +++ b/evaluation_l1/xcopa/zh/best_option/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "zh", + "template_name": "best_option", + "evaluation": { + "accuracy": 0.84 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/zh/cause_effect/results.json b/evaluation_l1/xcopa/zh/cause_effect/results.json new file mode 100644 index 0000000000000000000000000000000000000000..d7e9010b77cae2b075cdc0513155deb6d81aa382 --- /dev/null +++ b/evaluation_l1/xcopa/zh/cause_effect/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "zh", + "template_name": "cause_effect", + "evaluation": { + "accuracy": 0.86 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/zh/i_am_hesitating/results.json b/evaluation_l1/xcopa/zh/i_am_hesitating/results.json new file mode 100644 index 0000000000000000000000000000000000000000..8f11039c838151ff3b5a0a2e27634e683ec56a1f --- /dev/null +++ b/evaluation_l1/xcopa/zh/i_am_hesitating/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "zh", + "template_name": "i_am_hesitating", + "evaluation": { + "accuracy": 0.86 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/zh/plausible_alternatives/results.json b/evaluation_l1/xcopa/zh/plausible_alternatives/results.json new file mode 100644 index 0000000000000000000000000000000000000000..150f66521abd7106df9da22a6386cb009e460e36 --- /dev/null +++ b/evaluation_l1/xcopa/zh/plausible_alternatives/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "zh", + "template_name": "plausible_alternatives", + "evaluation": { + "accuracy": 0.81 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/ar/GPT-3_style/results.json b/evaluation_l1/xnli/ar/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ff01b7379c993414d086d9db17854a7205ac6cc6 --- /dev/null +++ b/evaluation_l1/xnli/ar/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ar", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.5578313253012048 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/ar/MNLI_crowdsource/results.json b/evaluation_l1/xnli/ar/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ac2ea9421148ad0ae4106e400778c01975b51947 --- /dev/null +++ b/evaluation_l1/xnli/ar/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ar", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.41164658634538154 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/ar/can_we_infer/results.json b/evaluation_l1/xnli/ar/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..dfcac9ad9cb913582ab0487da22cea8e0d7e8e23 --- /dev/null +++ b/evaluation_l1/xnli/ar/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ar", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.5152610441767068 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/ar/guaranteed_possible_impossible/results.json b/evaluation_l1/xnli/ar/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..5b11969b0e13f92bc6a6161eaf64e997249061de --- /dev/null +++ b/evaluation_l1/xnli/ar/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ar", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.5803212851405622 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/ar/justified_in_saying/results.json b/evaluation_l1/xnli/ar/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..efe506af9c6d3273a1318b96a97687fac279971e --- /dev/null +++ b/evaluation_l1/xnli/ar/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ar", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.5184738955823294 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/en/GPT-3_style/results.json b/evaluation_l1/xnli/en/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f210a5ed2192d1d6a9b76886e07ad2d5e0b43f00 --- /dev/null +++ b/evaluation_l1/xnli/en/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "en", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.6176706827309237 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/en/MNLI_crowdsource/results.json b/evaluation_l1/xnli/en/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..aab3affbd4f922e20a2e7ff49377b5a9db287063 --- /dev/null +++ b/evaluation_l1/xnli/en/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "en", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.4606425702811245 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/en/can_we_infer/results.json b/evaluation_l1/xnli/en/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..c81e210fafab735c745e71971539d6067357f32d --- /dev/null +++ b/evaluation_l1/xnli/en/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "en", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.5714859437751004 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/en/guaranteed_possible_impossible/results.json b/evaluation_l1/xnli/en/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..3f6783548974144b51902dcc85cea22068f7b94a --- /dev/null +++ b/evaluation_l1/xnli/en/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "en", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.6180722891566265 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/en/justified_in_saying/results.json b/evaluation_l1/xnli/en/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..1c9543f0f76df9a579da6abc88f92b6bdd7e2675 --- /dev/null +++ b/evaluation_l1/xnli/en/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "en", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.5746987951807229 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/es/GPT-3_style/results.json b/evaluation_l1/xnli/es/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..1d004908cc470c9648fe66b283ad455e1b834ef0 --- /dev/null +++ b/evaluation_l1/xnli/es/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "es", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.5911646586345382 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/es/MNLI_crowdsource/results.json b/evaluation_l1/xnli/es/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..121a0c50ffb27924c389db1d5d51037b5d6f5491 --- /dev/null +++ b/evaluation_l1/xnli/es/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "es", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.43052208835341366 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/es/can_we_infer/results.json b/evaluation_l1/xnli/es/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..20d1691995a8a5433ecb240cb1c3d03746c2a51f --- /dev/null +++ b/evaluation_l1/xnli/es/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "es", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.4397590361445783 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json b/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..49b23b559b781c83bad56c90e5174f1e0b28f1f8 --- /dev/null +++ b/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "es", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.5208835341365462 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/es/justified_in_saying/results.json b/evaluation_l1/xnli/es/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..afeca9dbd848afc14b8b19d223b64f7d5efaaeaa --- /dev/null +++ b/evaluation_l1/xnli/es/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "es", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.41726907630522087 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/fr/GPT-3_style/results.json b/evaluation_l1/xnli/fr/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..98dfab12ef834b10246c6965b6193ecddadd61e6 --- /dev/null +++ b/evaluation_l1/xnli/fr/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "fr", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.5911646586345382 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/fr/MNLI_crowdsource/results.json b/evaluation_l1/xnli/fr/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..45c5d22e3bf400249196b61eeecaa7da720505e5 --- /dev/null +++ b/evaluation_l1/xnli/fr/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "fr", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.4321285140562249 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/fr/can_we_infer/results.json b/evaluation_l1/xnli/fr/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..77d6f700093348996e1d24295849682c1ee5ac64 --- /dev/null +++ b/evaluation_l1/xnli/fr/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "fr", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.5369477911646586 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/fr/guaranteed_possible_impossible/results.json b/evaluation_l1/xnli/fr/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..88f476e6a408c2d37aa3e4e651313e71bc7f1b3f --- /dev/null +++ b/evaluation_l1/xnli/fr/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "fr", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.5176706827309236 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/fr/justified_in_saying/results.json b/evaluation_l1/xnli/fr/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..e4bd8c269d5762e1069423690ccdde27e4e56e3b --- /dev/null +++ b/evaluation_l1/xnli/fr/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "fr", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.5385542168674698 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/hi/GPT-3_style/results.json b/evaluation_l1/xnli/hi/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..90d22bda903ba93a0e0324665ec828fc885d8156 --- /dev/null +++ b/evaluation_l1/xnli/hi/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "hi", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.5208835341365462 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/hi/MNLI_crowdsource/results.json b/evaluation_l1/xnli/hi/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..0b58e1ba7eec12668ad871080febcfaf9860649f --- /dev/null +++ b/evaluation_l1/xnli/hi/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "hi", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.3819277108433735 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/hi/can_we_infer/results.json b/evaluation_l1/xnli/hi/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..b58a33fd1887c977c1b06221f564dbe217c2e539 --- /dev/null +++ b/evaluation_l1/xnli/hi/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "hi", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.44176706827309237 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/hi/guaranteed_possible_impossible/results.json b/evaluation_l1/xnli/hi/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..c768d703f49038fc3b9b146f5f22fe421746912a --- /dev/null +++ b/evaluation_l1/xnli/hi/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "hi", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.5253012048192771 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/hi/justified_in_saying/results.json b/evaluation_l1/xnli/hi/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..176c133af7bf8b355f7e2b4214c435c7b241de40 --- /dev/null +++ b/evaluation_l1/xnli/hi/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "hi", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.44377510040160645 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/sw/GPT-3_style/results.json b/evaluation_l1/xnli/sw/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..745c75053efd47e267b816d7fc9ef9059b9af430 --- /dev/null +++ b/evaluation_l1/xnli/sw/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "sw", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.5036144578313253 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/sw/MNLI_crowdsource/results.json b/evaluation_l1/xnli/sw/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..1fbf12e7a9737e4355d54e58ae5db9d89f701b93 --- /dev/null +++ b/evaluation_l1/xnli/sw/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "sw", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.3887550200803213 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/sw/can_we_infer/results.json b/evaluation_l1/xnli/sw/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..c04f874fe0f58a56b90ffb038bee36b10fbb2b8c --- /dev/null +++ b/evaluation_l1/xnli/sw/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "sw", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.44216867469879517 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/sw/guaranteed_possible_impossible/results.json b/evaluation_l1/xnli/sw/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a1a771df194005c2f9a30786e341de8e9a310609 --- /dev/null +++ b/evaluation_l1/xnli/sw/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "sw", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.38795180722891565 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/sw/justified_in_saying/results.json b/evaluation_l1/xnli/sw/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..b06b5ac8c564d4da30284d6f8d545022a192b420 --- /dev/null +++ b/evaluation_l1/xnli/sw/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "sw", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.4397590361445783 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/ur/GPT-3_style/results.json b/evaluation_l1/xnli/ur/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..361ed448d271cdef8b6876d50d3e9500e535bcc8 --- /dev/null +++ b/evaluation_l1/xnli/ur/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ur", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.4907630522088353 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/ur/MNLI_crowdsource/results.json b/evaluation_l1/xnli/ur/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..d09364d03f5460b06357537390716479b588a719 --- /dev/null +++ b/evaluation_l1/xnli/ur/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ur", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.37309236947791163 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/ur/can_we_infer/results.json b/evaluation_l1/xnli/ur/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..55616a19ddbd889cc33255b73fdc8fa6b91e68ec --- /dev/null +++ b/evaluation_l1/xnli/ur/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ur", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.45863453815261046 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/ur/guaranteed_possible_impossible/results.json b/evaluation_l1/xnli/ur/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..e4d0ae8991d72c90be89ca5f763e817b03ac05d5 --- /dev/null +++ b/evaluation_l1/xnli/ur/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ur", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.5124497991967871 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/ur/justified_in_saying/results.json b/evaluation_l1/xnli/ur/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..eb743b1f0c2c178dec6881a3be1d5336589cbfa1 --- /dev/null +++ b/evaluation_l1/xnli/ur/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ur", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.45582329317269077 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/vi/GPT-3_style/results.json b/evaluation_l1/xnli/vi/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..e77b5f3004afba26184222f7a45fadcaabd63989 --- /dev/null +++ b/evaluation_l1/xnli/vi/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "vi", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.5582329317269076 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/vi/MNLI_crowdsource/results.json b/evaluation_l1/xnli/vi/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f84b726698ca47e601a1aa4e3896d0ee7185e510 --- /dev/null +++ b/evaluation_l1/xnli/vi/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "vi", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.42690763052208835 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/vi/can_we_infer/results.json b/evaluation_l1/xnli/vi/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..4fd391f132bfedf7cd39efd2b6656b564d343458 --- /dev/null +++ b/evaluation_l1/xnli/vi/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "vi", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.4759036144578313 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/vi/guaranteed_possible_impossible/results.json b/evaluation_l1/xnli/vi/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ab70dc2fca42dab93474d9a5f5432d1bc74968ff --- /dev/null +++ b/evaluation_l1/xnli/vi/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "vi", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.5008032128514056 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/vi/justified_in_saying/results.json b/evaluation_l1/xnli/vi/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..5186d53e6ea41cde8f8a201fa2699a49eb570ba5 --- /dev/null +++ b/evaluation_l1/xnli/vi/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "vi", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.4827309236947791 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/zh/GPT-3_style/results.json b/evaluation_l1/xnli/zh/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..5fbaef1325c16e12a77c3c23abd166c6fa411e1c --- /dev/null +++ b/evaluation_l1/xnli/zh/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "zh", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.5550200803212851 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/zh/MNLI_crowdsource/results.json b/evaluation_l1/xnli/zh/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..1b2ff42f6dc33b2fb66c0047bacea591578bf91e --- /dev/null +++ b/evaluation_l1/xnli/zh/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "zh", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.4248995983935743 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/zh/can_we_infer/results.json b/evaluation_l1/xnli/zh/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..82d063dec3853aa0ea836bb0c48c95984a99f234 --- /dev/null +++ b/evaluation_l1/xnli/zh/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "zh", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.43052208835341366 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/zh/guaranteed_possible_impossible/results.json b/evaluation_l1/xnli/zh/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..18da408e415812cc705f7f27ba28ee3069c8b85a --- /dev/null +++ b/evaluation_l1/xnli/zh/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "zh", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.5526104417670683 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/zh/justified_in_saying/results.json b/evaluation_l1/xnli/zh/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f668b2453fefbc4459efecb5e12ee1831705740c --- /dev/null +++ b/evaluation_l1/xnli/zh/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "zh", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.44016064257028115 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..08eaf8d55649a67760962798f16115077603ea06 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "ar", + "template_name": "Answer Given options_armt", + "evaluation": { + "accuracy": 0.8941098610191925 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Answer Given options_armt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..773f18b6f44483c2b276c74fa8dba3a73173f336 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "ar", + "template_name": "Choose Story Ending_armt", + "evaluation": { + "accuracy": 0.9404367968232958 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Choose Story Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..1019f9b10c16be41066096a733d486afaec79fe9 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "ar", + "template_name": "Generate Ending_armt", + "evaluation": { + "accuracy": 0.6598279285241562 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Generate Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..2ef0ca9ebf98a076846d9f2cfc5e299323efb0ce --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "ar", + "template_name": "Novel Correct Ending_armt", + "evaluation": { + "accuracy": 0.9272005294506949 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Novel Correct Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..8a8a2cfc67d927bf9dd38579780168915877b79f --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "ar", + "template_name": "Story Continuation and Options_armt", + "evaluation": { + "accuracy": 0.9172733289212442 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Story Continuation and Options_armt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..9346b5f9fe2b425df3ba7ae90573ef5c7e7fb3d1 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "es", + "template_name": "Answer Given options_esmt", + "evaluation": { + "accuracy": 0.9311714096624751 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Answer Given options_esmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..0ecff6e31deb89478ba84a5dc3a59031dc4c7704 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "es", + "template_name": "Choose Story Ending_esmt", + "evaluation": { + "accuracy": 0.9549966909331569 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Choose Story Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..42e4c5650dd6e7becbb01e1846ed5287011574a3 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "es", + "template_name": "Generate Ending_esmt", + "evaluation": { + "accuracy": 0.7405691594970218 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Generate Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..2d175315a4a49d7ffbc1c794488f2c4170c72aa3 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "es", + "template_name": "Novel Correct Ending_esmt", + "evaluation": { + "accuracy": 0.9490403706154864 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Novel Correct Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a92049818a18be1bd1dc53949dd5f5dab08bb5b6 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "es", + "template_name": "Story Continuation and Options_esmt", + "evaluation": { + "accuracy": 0.9523494374586366 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Story Continuation and Options_esmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..82077be9a713eba8286c85e8f7a54f405f67493c --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "eu", + "template_name": "Answer Given options_eumt", + "evaluation": { + "accuracy": 0.7326273990734613 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Answer Given options_eumt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..6ffff53ee20de1422ddc411de36952d95f5c26df --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "eu", + "template_name": "Choose Story Ending_eumt", + "evaluation": { + "accuracy": 0.8682991396426207 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Choose Story Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..11f291c388061a9a54cebf327d928b0e57b3b414 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "eu", + "template_name": "Generate Ending_eumt", + "evaluation": { + "accuracy": 0.6293845135671741 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Generate Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8a03f86ea1eb3fed92e88b02a7d03e38c1a0a2 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "eu", + "template_name": "Novel Correct Ending_eumt", + "evaluation": { + "accuracy": 0.8305757776307081 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Novel Correct Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..2e3bb84f6e97d3686dfadd1dd1241be67e255127 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "eu", + "template_name": "Story Continuation and Options_eumt", + "evaluation": { + "accuracy": 0.8259430840502978 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Story Continuation and Options_eumt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..9f600887dc434b8634d25cb214fceaeb6670e60e --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "hi", + "template_name": "Answer Given options_himt", + "evaluation": { + "accuracy": 0.8530774321641297 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Answer Given options_himt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..5d10e8076e863899177eb48671f8f8510760cabc --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "hi", + "template_name": "Choose Story Ending_himt", + "evaluation": { + "accuracy": 0.8914626075446724 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Choose Story Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..748d79306b080fe587762245d865188c38858bdb --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "hi", + "template_name": "Generate Ending_himt", + "evaluation": { + "accuracy": 0.6644606221045665 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Generate Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..12f20fab7fb8dfe1bed8c7d32adaf44c74a8ea79 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "hi", + "template_name": "Novel Correct Ending_himt", + "evaluation": { + "accuracy": 0.8821972203838517 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Novel Correct Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..57a270e97d7e8a4d2a480b22824e9bd101b5c200 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "hi", + "template_name": "Story Continuation and Options_himt", + "evaluation": { + "accuracy": 0.8735936465916612 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Story Continuation and Options_himt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7455cbf4b0e4932b109dabdb981ceca022d78048 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "id", + "template_name": "Answer Given options_idmt", + "evaluation": { + "accuracy": 0.8682991396426207 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Answer Given options_idmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7ac777f4ceac3494e1523f45ddbe7948803f95df --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "id", + "template_name": "Choose Story Ending_idmt", + "evaluation": { + "accuracy": 0.927862342819325 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Choose Story Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..d23d62b87eebcf899b1a5e9822834ce0ab500ae3 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "id", + "template_name": "Generate Ending_idmt", + "evaluation": { + "accuracy": 0.6929185969556585 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Generate Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..349fb09ae9669f153cee1b2fafd8a3454cfcee50 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "id", + "template_name": "Novel Correct Ending_idmt", + "evaluation": { + "accuracy": 0.9086697551290536 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Novel Correct Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..07b42403a63a6e9a0d9cd305f9d626b80f2cb0dd --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "id", + "template_name": "Story Continuation and Options_idmt", + "evaluation": { + "accuracy": 0.9159497021839841 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Story Continuation and Options_idmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..622a2b5c90190978f50caa3771721da393dc5e48 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "zh", + "template_name": "Answer Given options_zhmt", + "evaluation": { + "accuracy": 0.913964262078094 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Answer Given options_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..74d7933a087ca4d3b33932b27692f9a149c889c7 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "zh", + "template_name": "Choose Story Ending_zhmt", + "evaluation": { + "accuracy": 0.9238914626075446 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Choose Story Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..c7d07f0bc402a8bc64659f7ad0d662a11563f472 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "zh", + "template_name": "Generate Ending_zhmt", + "evaluation": { + "accuracy": 0.6843150231634679 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Generate Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f2cc73ab58a67c8e296e4d941608f9ee4ddceed1 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "zh", + "template_name": "Novel Correct Ending_zhmt", + "evaluation": { + "accuracy": 0.9252150893448048 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Novel Correct Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..3cccd12bcf6b155e7860a35ac58b67635ec70895 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "zh", + "template_name": "Story Continuation and Options_zhmt", + "evaluation": { + "accuracy": 0.913302448709464 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Story Continuation and Options_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/Replace_frmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/Replace_frmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..22618bc9f7b3bfe018197f506330b9ac0ffcf243 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/Replace_frmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "fr", + "template_name": "Replace_frmt", + "evaluation": { + "accuracy": 0.6626506024096386 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='Replace_frmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..081254c848e81d983b60c29b734e4e228f88cff5 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "fr", + "template_name": "True or False_frmt", + "evaluation": { + "accuracy": 0.4578313253012048 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='True or False_frmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..4c1ee4a09d6ceaaba5fa1afaa5d2b69d1682d596 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "fr", + "template_name": "does underscore refer to_frmt", + "evaluation": { + "accuracy": 0.5783132530120482 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='does underscore refer to_frmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..3cf0b9d6d126b11ffd3d6820319d25b229d8cbdb --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "fr", + "template_name": "stand for_frmt", + "evaluation": { + "accuracy": 0.5421686746987951 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='stand for_frmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..5d4c32dc9b18cbb25f767896325a96dbae2516d8 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "fr", + "template_name": "underscore refer to_frmt", + "evaluation": { + "accuracy": 0.6265060240963856 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='underscore refer to_frmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..e34759d77b70ee482aade2ca041c153ed8b79e63 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "pt", + "template_name": "Replace_ptmt", + "evaluation": { + "accuracy": 0.6273764258555133 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='Replace_ptmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..69542cc44b7cff9aeb10f59d8455b7853479e3d3 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "pt", + "template_name": "True or False_ptmt", + "evaluation": { + "accuracy": 0.532319391634981 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='True or False_ptmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..44700ff9b05ead1e066bcce7df9fa17041b6f8f2 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "pt", + "template_name": "does underscore refer to_ptmt", + "evaluation": { + "accuracy": 0.596958174904943 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='does underscore refer to_ptmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..5fdda9ecd6b979210133cd38a86a29e9aa3bab1f --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "pt", + "template_name": "stand for_ptmt", + "evaluation": { + "accuracy": 0.5399239543726235 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='stand for_ptmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..6e2aaeea1b9fb349a7ff9e70ec10f10dca34a10c --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "pt", + "template_name": "underscore refer to_ptmt", + "evaluation": { + "accuracy": 0.623574144486692 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='underscore refer to_ptmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7e97551910c6ec94f2103fe0bb0d23e98453bb09 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "zh", + "template_name": "Replace_zhmt", + "evaluation": { + "accuracy": 0.7202380952380952 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='Replace_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..3a5fe840ade7d5406e9be3869192447224fe6c64 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "zh", + "template_name": "True or False_zhmt", + "evaluation": { + "accuracy": 0.5099206349206349 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='True or False_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..05a41bc50f8a282e2b75434bb1662975be85e479 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "zh", + "template_name": "does underscore refer to_zhmt", + "evaluation": { + "accuracy": 0.6746031746031746 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='does underscore refer to_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..2e81e46585f08d06cfa91467de512a7b59dced33 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "zh", + "template_name": "stand for_zhmt", + "evaluation": { + "accuracy": 0.5654761904761905 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='stand for_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..d26b81ae8659f7cc0d23004b1dd7982239f52469 --- /dev/null +++ b/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "zh", + "template_name": "underscore refer to_zhmt", + "evaluation": { + "accuracy": 0.7638888888888888 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='underscore refer to_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/merged.csv b/evaluation_xcopawinostorymt/merged.csv new file mode 100644 index 0000000000000000000000000000000000000000..52bd41e5c505fd7c47e2c1749f8939257d057fec --- /dev/null +++ b/evaluation_xcopawinostorymt/merged.csv @@ -0,0 +1,86 @@ +dataset,prompt,metric,value +xcopa_id,C1 or C2? premise_idmt,accuracy,0.57 +xcopa_id,best_option_idmt,accuracy,0.78 +xcopa_id,cause_effect_idmt,accuracy,0.84 +xcopa_id,i_am_hesitating_idmt,accuracy,0.84 +xcopa_id,plausible_alternatives_idmt,accuracy,0.83 +xcopa_id,median,accuracy,0.83 +xcopa_sw,C1 or C2? premise_swmt,accuracy,0.6 +xcopa_sw,best_option_swmt,accuracy,0.59 +xcopa_sw,cause_effect_swmt,accuracy,0.63 +xcopa_sw,i_am_hesitating_swmt,accuracy,0.67 +xcopa_sw,plausible_alternatives_swmt,accuracy,0.62 +xcopa_sw,median,accuracy,0.62 +xcopa_ta,C1 or C2? premise_tamt,accuracy,0.64 +xcopa_ta,best_option_tamt,accuracy,0.56 +xcopa_ta,cause_effect_tamt,accuracy,0.62 +xcopa_ta,i_am_hesitating_tamt,accuracy,0.64 +xcopa_ta,plausible_alternatives_tamt,accuracy,0.63 +xcopa_ta,median,accuracy,0.63 +xcopa_vi,C1 or C2? premise_vimt,accuracy,0.61 +xcopa_vi,best_option_vimt,accuracy,0.77 +xcopa_vi,cause_effect_vimt,accuracy,0.89 +xcopa_vi,i_am_hesitating_vimt,accuracy,0.85 +xcopa_vi,plausible_alternatives_vimt,accuracy,0.87 +xcopa_vi,median,accuracy,0.85 +xcopa_zh,C1 or C2? premise_zhmt,accuracy,0.63 +xcopa_zh,best_option_zhmt,accuracy,0.75 +xcopa_zh,cause_effect_zhmt,accuracy,0.83 +xcopa_zh,i_am_hesitating_zhmt,accuracy,0.84 +xcopa_zh,plausible_alternatives_zhmt,accuracy,0.86 +xcopa_zh,median,accuracy,0.83 +xstory_cloze_ar,Answer Given options_armt,accuracy,0.8941098610191925 +xstory_cloze_ar,Choose Story Ending_armt,accuracy,0.9404367968232958 +xstory_cloze_ar,Generate Ending_armt,accuracy,0.6598279285241562 +xstory_cloze_ar,Novel Correct Ending_armt,accuracy,0.9272005294506949 +xstory_cloze_ar,Story Continuation and Options_armt,accuracy,0.9172733289212442 +xstory_cloze_ar,median,accuracy,0.9172733289212442 +xstory_cloze_es,Answer Given options_esmt,accuracy,0.9311714096624751 +xstory_cloze_es,Choose Story Ending_esmt,accuracy,0.9549966909331569 +xstory_cloze_es,Generate Ending_esmt,accuracy,0.7405691594970218 +xstory_cloze_es,Novel Correct Ending_esmt,accuracy,0.9490403706154864 +xstory_cloze_es,Story Continuation and Options_esmt,accuracy,0.9523494374586366 +xstory_cloze_es,median,accuracy,0.9490403706154864 +xstory_cloze_eu,Answer Given options_eumt,accuracy,0.7326273990734613 +xstory_cloze_eu,Choose Story Ending_eumt,accuracy,0.8682991396426207 +xstory_cloze_eu,Generate Ending_eumt,accuracy,0.6293845135671741 +xstory_cloze_eu,Novel Correct Ending_eumt,accuracy,0.8305757776307081 +xstory_cloze_eu,Story Continuation and Options_eumt,accuracy,0.8259430840502978 +xstory_cloze_eu,median,accuracy,0.8259430840502978 +xstory_cloze_hi,Answer Given options_himt,accuracy,0.8530774321641297 +xstory_cloze_hi,Choose Story Ending_himt,accuracy,0.8914626075446724 +xstory_cloze_hi,Generate Ending_himt,accuracy,0.6644606221045665 +xstory_cloze_hi,Novel Correct Ending_himt,accuracy,0.8821972203838517 +xstory_cloze_hi,Story Continuation and Options_himt,accuracy,0.8735936465916612 +xstory_cloze_hi,median,accuracy,0.8735936465916612 +xstory_cloze_id,Answer Given options_idmt,accuracy,0.8682991396426207 +xstory_cloze_id,Choose Story Ending_idmt,accuracy,0.927862342819325 +xstory_cloze_id,Generate Ending_idmt,accuracy,0.6929185969556585 +xstory_cloze_id,Novel Correct Ending_idmt,accuracy,0.9086697551290536 +xstory_cloze_id,Story Continuation and Options_idmt,accuracy,0.9159497021839841 +xstory_cloze_id,median,accuracy,0.9086697551290536 +xstory_cloze_zh,Answer Given options_zhmt,accuracy,0.913964262078094 +xstory_cloze_zh,Choose Story Ending_zhmt,accuracy,0.9238914626075446 +xstory_cloze_zh,Generate Ending_zhmt,accuracy,0.6843150231634679 +xstory_cloze_zh,Novel Correct Ending_zhmt,accuracy,0.9252150893448048 +xstory_cloze_zh,Story Continuation and Options_zhmt,accuracy,0.913302448709464 +xstory_cloze_zh,median,accuracy,0.913964262078094 +xwinograd_fr,Replace_frmt,accuracy,0.6626506024096386 +xwinograd_fr,True or False_frmt,accuracy,0.4578313253012048 +xwinograd_fr,does underscore refer to_frmt,accuracy,0.5783132530120482 +xwinograd_fr,stand for_frmt,accuracy,0.5421686746987951 +xwinograd_fr,underscore refer to_frmt,accuracy,0.6265060240963856 +xwinograd_fr,median,accuracy,0.5783132530120482 +xwinograd_pt,Replace_ptmt,accuracy,0.6273764258555133 +xwinograd_pt,True or False_ptmt,accuracy,0.532319391634981 +xwinograd_pt,does underscore refer to_ptmt,accuracy,0.596958174904943 +xwinograd_pt,stand for_ptmt,accuracy,0.5399239543726235 +xwinograd_pt,underscore refer to_ptmt,accuracy,0.623574144486692 +xwinograd_pt,median,accuracy,0.596958174904943 +xwinograd_zh,Replace_zhmt,accuracy,0.7202380952380952 +xwinograd_zh,True or False_zhmt,accuracy,0.5099206349206349 +xwinograd_zh,does underscore refer to_zhmt,accuracy,0.6746031746031746 +xwinograd_zh,stand for_zhmt,accuracy,0.5654761904761905 +xwinograd_zh,underscore refer to_zhmt,accuracy,0.7638888888888888 +xwinograd_zh,median,accuracy,0.6746031746031746 +multiple,average,multiple,0.7855970749932859 diff --git a/evaluation_xcopawinostorymt/merged.json b/evaluation_xcopawinostorymt/merged.json new file mode 100644 index 0000000000000000000000000000000000000000..6ba79edf768cf12f26f0a9792c6b293b2ffeffc7 --- /dev/null +++ b/evaluation_xcopawinostorymt/merged.json @@ -0,0 +1 @@ +{"Muennighoff/xstory_cloze_ar": {"Answer Given options_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Answer Given options_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8941098610191925}, "template_name": "Answer Given options_armt"}, "Choose Story Ending_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Choose Story Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9404367968232958}, "template_name": "Choose Story Ending_armt"}, "Generate Ending_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Generate Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6598279285241562}, "template_name": "Generate Ending_armt"}, "Novel Correct Ending_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Novel Correct Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9272005294506949}, "template_name": "Novel Correct Ending_armt"}, "Story Continuation and Options_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Story Continuation and Options_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9172733289212442}, "template_name": "Story Continuation and Options_armt"}}, "Muennighoff/xstory_cloze_es": {"Answer Given options_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Answer Given options_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9311714096624751}, "template_name": "Answer Given options_esmt"}, "Choose Story Ending_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Choose Story Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9549966909331569}, "template_name": "Choose Story Ending_esmt"}, "Generate Ending_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Generate Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7405691594970218}, "template_name": "Generate Ending_esmt"}, "Novel Correct Ending_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Novel Correct Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9490403706154864}, "template_name": "Novel Correct Ending_esmt"}, "Story Continuation and Options_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Story Continuation and Options_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9523494374586366}, "template_name": "Story Continuation and Options_esmt"}}, "Muennighoff/xstory_cloze_eu": {"Answer Given options_eumt": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Answer Given options_eumt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.7326273990734613}, "template_name": "Answer Given options_eumt"}, "Choose Story Ending_eumt": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Choose Story Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8682991396426207}, "template_name": "Choose Story Ending_eumt"}, "Generate Ending_eumt": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Generate Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6293845135671741}, "template_name": "Generate Ending_eumt"}, "Novel Correct Ending_eumt": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Novel Correct Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8305757776307081}, "template_name": "Novel Correct Ending_eumt"}, "Story Continuation and Options_eumt": {"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Story Continuation and Options_eumt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "eu", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8259430840502978}, "template_name": "Story Continuation and Options_eumt"}}, "Muennighoff/xstory_cloze_hi": {"Answer Given options_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Answer Given options_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8530774321641297}, "template_name": "Answer Given options_himt"}, "Choose Story Ending_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Choose Story Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8914626075446724}, "template_name": "Choose Story Ending_himt"}, "Generate Ending_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Generate Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6644606221045665}, "template_name": "Generate Ending_himt"}, "Novel Correct Ending_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Novel Correct Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8821972203838517}, "template_name": "Novel Correct Ending_himt"}, "Story Continuation and Options_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Story Continuation and Options_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8735936465916612}, "template_name": "Story Continuation and Options_himt"}}, "Muennighoff/xstory_cloze_id": {"Answer Given options_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Answer Given options_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.8682991396426207}, "template_name": "Answer Given options_idmt"}, "Choose Story Ending_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Choose Story Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.927862342819325}, "template_name": "Choose Story Ending_idmt"}, "Generate Ending_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Generate Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6929185969556585}, "template_name": "Generate Ending_idmt"}, "Novel Correct Ending_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Novel Correct Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9086697551290536}, "template_name": "Novel Correct Ending_idmt"}, "Story Continuation and Options_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Story Continuation and Options_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9159497021839841}, "template_name": "Story Continuation and Options_idmt"}}, "Muennighoff/xstory_cloze_zh": {"Answer Given options_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Answer Given options_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.913964262078094}, "template_name": "Answer Given options_zhmt"}, "Choose Story Ending_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Choose Story Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9238914626075446}, "template_name": "Choose Story Ending_zhmt"}, "Generate Ending_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Generate Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.6843150231634679}, "template_name": "Generate Ending_zhmt"}, "Novel Correct Ending_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Novel Correct Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.9252150893448048}, "template_name": "Novel Correct Ending_zhmt"}, "Story Continuation and Options_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Story Continuation and Options_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xstory_cloze", "evaluation": {"accuracy": 0.913302448709464}, "template_name": "Story Continuation and Options_zhmt"}}, "Muennighoff/xwinograd_fr": {"Replace_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='Replace_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6626506024096386}, "template_name": "Replace_frmt"}, "True or False_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='True or False_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.4578313253012048}, "template_name": "True or False_frmt"}, "does underscore refer to_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='does underscore refer to_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5783132530120482}, "template_name": "does underscore refer to_frmt"}, "stand for_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='stand for_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5421686746987951}, "template_name": "stand for_frmt"}, "underscore refer to_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='underscore refer to_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6265060240963856}, "template_name": "underscore refer to_frmt"}}, "Muennighoff/xwinograd_pt": {"Replace_ptmt": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='Replace_ptmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6273764258555133}, "template_name": "Replace_ptmt"}, "True or False_ptmt": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='True or False_ptmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.532319391634981}, "template_name": "True or False_ptmt"}, "does underscore refer to_ptmt": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='does underscore refer to_ptmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.596958174904943}, "template_name": "does underscore refer to_ptmt"}, "stand for_ptmt": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='stand for_ptmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5399239543726235}, "template_name": "stand for_ptmt"}, "underscore refer to_ptmt": {"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='underscore refer to_ptmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "pt", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.623574144486692}, "template_name": "underscore refer to_ptmt"}}, "Muennighoff/xwinograd_zh": {"Replace_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='Replace_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.7202380952380952}, "template_name": "Replace_zhmt"}, "True or False_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='True or False_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5099206349206349}, "template_name": "True or False_zhmt"}, "does underscore refer to_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='does underscore refer to_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.6746031746031746}, "template_name": "does underscore refer to_zhmt"}, "stand for_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='stand for_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.5654761904761905}, "template_name": "stand for_zhmt"}, "underscore refer to_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='underscore refer to_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "Muennighoff/xwinograd", "evaluation": {"accuracy": 0.7638888888888888}, "template_name": "underscore refer to_zhmt"}}, "xcopa_id": {"C1 or C2? premise_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='C1 or C2? premise_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.57}, "template_name": "C1 or C2? premise_idmt"}, "best_option_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='best_option_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.78}, "template_name": "best_option_idmt"}, "cause_effect_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='cause_effect_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.84}, "template_name": "cause_effect_idmt"}, "i_am_hesitating_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='i_am_hesitating_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.84}, "template_name": "i_am_hesitating_idmt"}, "plausible_alternatives_idmt": {"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='plausible_alternatives_idmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "id", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.83}, "template_name": "plausible_alternatives_idmt"}}, "xcopa_sw": {"C1 or C2? premise_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='C1 or C2? premise_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.6}, "template_name": "C1 or C2? premise_swmt"}, "best_option_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='best_option_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.59}, "template_name": "best_option_swmt"}, "cause_effect_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='cause_effect_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.63}, "template_name": "cause_effect_swmt"}, "i_am_hesitating_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='i_am_hesitating_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.67}, "template_name": "i_am_hesitating_swmt"}, "plausible_alternatives_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='plausible_alternatives_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.62}, "template_name": "plausible_alternatives_swmt"}}, "xcopa_ta": {"C1 or C2? premise_tamt": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='C1 or C2? premise_tamt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.64}, "template_name": "C1 or C2? premise_tamt"}, "best_option_tamt": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='best_option_tamt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.56}, "template_name": "best_option_tamt"}, "cause_effect_tamt": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='cause_effect_tamt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.62}, "template_name": "cause_effect_tamt"}, "i_am_hesitating_tamt": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='i_am_hesitating_tamt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.64}, "template_name": "i_am_hesitating_tamt"}, "plausible_alternatives_tamt": {"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='plausible_alternatives_tamt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ta", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.63}, "template_name": "plausible_alternatives_tamt"}}, "xcopa_vi": {"C1 or C2? premise_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='C1 or C2? premise_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.61}, "template_name": "C1 or C2? premise_vimt"}, "best_option_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='best_option_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.77}, "template_name": "best_option_vimt"}, "cause_effect_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='cause_effect_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.89}, "template_name": "cause_effect_vimt"}, "i_am_hesitating_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='i_am_hesitating_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.85}, "template_name": "i_am_hesitating_vimt"}, "plausible_alternatives_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='plausible_alternatives_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.87}, "template_name": "plausible_alternatives_vimt"}}, "xcopa_zh": {"C1 or C2? premise_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='C1 or C2? premise_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.63}, "template_name": "C1 or C2? premise_zhmt"}, "best_option_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='best_option_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.75}, "template_name": "best_option_zhmt"}, "cause_effect_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='cause_effect_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.83}, "template_name": "cause_effect_zhmt"}, "i_am_hesitating_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='i_am_hesitating_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.84}, "template_name": "i_am_hesitating_zhmt"}, "plausible_alternatives_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='plausible_alternatives_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xcopa", "evaluation": {"accuracy": 0.86}, "template_name": "plausible_alternatives_zhmt"}}} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/id/C1_or_C2?_premise_idmt/results.json b/evaluation_xcopawinostorymt/xcopa/id/C1_or_C2?_premise_idmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a3b159b6975c5a2a3ba1704d4a114cff18e67241 --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/id/C1_or_C2?_premise_idmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "id", + "template_name": "C1 or C2? premise_idmt", + "evaluation": { + "accuracy": 0.57 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='C1 or C2? premise_idmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/id/best_option_idmt/results.json b/evaluation_xcopawinostorymt/xcopa/id/best_option_idmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..855f53dfb43806d722065ba2cfa8ca448756a2cb --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/id/best_option_idmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "id", + "template_name": "best_option_idmt", + "evaluation": { + "accuracy": 0.78 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='best_option_idmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/id/cause_effect_idmt/results.json b/evaluation_xcopawinostorymt/xcopa/id/cause_effect_idmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..c68da0c2acb059fa1add54ef45470d452c3e9c07 --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/id/cause_effect_idmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "id", + "template_name": "cause_effect_idmt", + "evaluation": { + "accuracy": 0.84 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='cause_effect_idmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/id/i_am_hesitating_idmt/results.json b/evaluation_xcopawinostorymt/xcopa/id/i_am_hesitating_idmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..bc354e5e99e62eebea0b523de3823573281b5791 --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/id/i_am_hesitating_idmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "id", + "template_name": "i_am_hesitating_idmt", + "evaluation": { + "accuracy": 0.84 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='i_am_hesitating_idmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/id/plausible_alternatives_idmt/results.json b/evaluation_xcopawinostorymt/xcopa/id/plausible_alternatives_idmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..56547788a32feb5d2b997d3590f637dae6eb98dc --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/id/plausible_alternatives_idmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "id", + "template_name": "plausible_alternatives_idmt", + "evaluation": { + "accuracy": 0.83 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='plausible_alternatives_idmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/sw/C1_or_C2?_premise_swmt/results.json b/evaluation_xcopawinostorymt/xcopa/sw/C1_or_C2?_premise_swmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ece2dd50d4d67e426e5e519747b1841c335c8c2c --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/sw/C1_or_C2?_premise_swmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "sw", + "template_name": "C1 or C2? premise_swmt", + "evaluation": { + "accuracy": 0.6 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='C1 or C2? premise_swmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/sw/best_option_swmt/results.json b/evaluation_xcopawinostorymt/xcopa/sw/best_option_swmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..1dd7fd8221b9ed98ae38d201ec20eb69fd81e899 --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/sw/best_option_swmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "sw", + "template_name": "best_option_swmt", + "evaluation": { + "accuracy": 0.59 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='best_option_swmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/sw/cause_effect_swmt/results.json b/evaluation_xcopawinostorymt/xcopa/sw/cause_effect_swmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..8eb58372c78dd53c849b27c08b98c3e9c6d8eaad --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/sw/cause_effect_swmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "sw", + "template_name": "cause_effect_swmt", + "evaluation": { + "accuracy": 0.63 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='cause_effect_swmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/sw/i_am_hesitating_swmt/results.json b/evaluation_xcopawinostorymt/xcopa/sw/i_am_hesitating_swmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..e803205adcfefba9f30d43af3290e2004e39e3aa --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/sw/i_am_hesitating_swmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "sw", + "template_name": "i_am_hesitating_swmt", + "evaluation": { + "accuracy": 0.67 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='i_am_hesitating_swmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/sw/plausible_alternatives_swmt/results.json b/evaluation_xcopawinostorymt/xcopa/sw/plausible_alternatives_swmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..51f2499b03c23ca354dfe8be026a5d87885a7929 --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/sw/plausible_alternatives_swmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "sw", + "template_name": "plausible_alternatives_swmt", + "evaluation": { + "accuracy": 0.62 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='plausible_alternatives_swmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/ta/C1_or_C2?_premise_tamt/results.json b/evaluation_xcopawinostorymt/xcopa/ta/C1_or_C2?_premise_tamt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f3ef16661ff501e41d7fb1a38c1aa3bb5289d109 --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/ta/C1_or_C2?_premise_tamt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "ta", + "template_name": "C1 or C2? premise_tamt", + "evaluation": { + "accuracy": 0.64 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='C1 or C2? premise_tamt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/ta/best_option_tamt/results.json b/evaluation_xcopawinostorymt/xcopa/ta/best_option_tamt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..947f7537af74f40d992e21e57b4e0b3031461954 --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/ta/best_option_tamt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "ta", + "template_name": "best_option_tamt", + "evaluation": { + "accuracy": 0.56 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='best_option_tamt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/ta/cause_effect_tamt/results.json b/evaluation_xcopawinostorymt/xcopa/ta/cause_effect_tamt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..8b22c4ea50f2cbf069ce00e68da7ce949e4b60a3 --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/ta/cause_effect_tamt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "ta", + "template_name": "cause_effect_tamt", + "evaluation": { + "accuracy": 0.62 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='cause_effect_tamt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/ta/i_am_hesitating_tamt/results.json b/evaluation_xcopawinostorymt/xcopa/ta/i_am_hesitating_tamt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a246afec199aa339abb27753067bd74ff6298781 --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/ta/i_am_hesitating_tamt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "ta", + "template_name": "i_am_hesitating_tamt", + "evaluation": { + "accuracy": 0.64 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='i_am_hesitating_tamt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/ta/plausible_alternatives_tamt/results.json b/evaluation_xcopawinostorymt/xcopa/ta/plausible_alternatives_tamt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..010f33175cc3862712980ab18b2b8cacda387d00 --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/ta/plausible_alternatives_tamt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "ta", + "template_name": "plausible_alternatives_tamt", + "evaluation": { + "accuracy": 0.63 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='plausible_alternatives_tamt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/vi/C1_or_C2?_premise_vimt/results.json b/evaluation_xcopawinostorymt/xcopa/vi/C1_or_C2?_premise_vimt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a11e4478e8d0d68f2b1da5f504c2412c75fdba56 --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/vi/C1_or_C2?_premise_vimt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "vi", + "template_name": "C1 or C2? premise_vimt", + "evaluation": { + "accuracy": 0.61 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='C1 or C2? premise_vimt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/vi/best_option_vimt/results.json b/evaluation_xcopawinostorymt/xcopa/vi/best_option_vimt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a4969d11187ca6c988435275f85d713f14da8ba7 --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/vi/best_option_vimt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "vi", + "template_name": "best_option_vimt", + "evaluation": { + "accuracy": 0.77 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='best_option_vimt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/vi/cause_effect_vimt/results.json b/evaluation_xcopawinostorymt/xcopa/vi/cause_effect_vimt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..3f3e2a8ef414dda19185126456aba80cc3c9dd64 --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/vi/cause_effect_vimt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "vi", + "template_name": "cause_effect_vimt", + "evaluation": { + "accuracy": 0.89 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='cause_effect_vimt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/vi/i_am_hesitating_vimt/results.json b/evaluation_xcopawinostorymt/xcopa/vi/i_am_hesitating_vimt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..c4786bd77a9ec21e35c1b62f2ec5a5a548975afe --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/vi/i_am_hesitating_vimt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "vi", + "template_name": "i_am_hesitating_vimt", + "evaluation": { + "accuracy": 0.85 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='i_am_hesitating_vimt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/vi/plausible_alternatives_vimt/results.json b/evaluation_xcopawinostorymt/xcopa/vi/plausible_alternatives_vimt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..45f092dd4589031c1a20671997c0f94b75e80371 --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/vi/plausible_alternatives_vimt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "vi", + "template_name": "plausible_alternatives_vimt", + "evaluation": { + "accuracy": 0.87 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='plausible_alternatives_vimt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json b/evaluation_xcopawinostorymt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..6fa79011c4d9ffa2bebb87d9c24606bc43b1d35d --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "zh", + "template_name": "C1 or C2? premise_zhmt", + "evaluation": { + "accuracy": 0.63 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='C1 or C2? premise_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/zh/best_option_zhmt/results.json b/evaluation_xcopawinostorymt/xcopa/zh/best_option_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..4b69455120fb83b58f3f2d20dc0a6de9499b8c25 --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/zh/best_option_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "zh", + "template_name": "best_option_zhmt", + "evaluation": { + "accuracy": 0.75 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='best_option_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/zh/cause_effect_zhmt/results.json b/evaluation_xcopawinostorymt/xcopa/zh/cause_effect_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..0d45d6dec2e02a88c4efae655983cf388b732b83 --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/zh/cause_effect_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "zh", + "template_name": "cause_effect_zhmt", + "evaluation": { + "accuracy": 0.83 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='cause_effect_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/zh/i_am_hesitating_zhmt/results.json b/evaluation_xcopawinostorymt/xcopa/zh/i_am_hesitating_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7f0f51c3e84c028612690851eafd25fa8693114c --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/zh/i_am_hesitating_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "zh", + "template_name": "i_am_hesitating_zhmt", + "evaluation": { + "accuracy": 0.84 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='i_am_hesitating_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/xcopa/zh/plausible_alternatives_zhmt/results.json b/evaluation_xcopawinostorymt/xcopa/zh/plausible_alternatives_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..6e0ebbeb1c92528c677e0c3041a5520d00eb246a --- /dev/null +++ b/evaluation_xcopawinostorymt/xcopa/zh/plausible_alternatives_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "zh", + "template_name": "plausible_alternatives_zhmt", + "evaluation": { + "accuracy": 0.86 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='plausible_alternatives_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/ar/GPT-3_style_arht/results.json b/evaluation_xnlihtmt/xnliht/ar/GPT-3_style_arht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..1151e88244939e66270073875e6352cbcb194777 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/ar/GPT-3_style_arht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ar", + "template_name": "GPT-3 style_arht", + "evaluation": { + "accuracy": 0.40441767068273093 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='GPT-3 style_arht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/ar/MNLI_crowdsource_arht/results.json b/evaluation_xnlihtmt/xnliht/ar/MNLI_crowdsource_arht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..3e50548b0ae0c7bda585ddb4ea99660d84f1f86a --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/ar/MNLI_crowdsource_arht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ar", + "template_name": "MNLI crowdsource_arht", + "evaluation": { + "accuracy": 0.43012048192771085 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='MNLI crowdsource_arht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/ar/can_we_infer_arht/results.json b/evaluation_xnlihtmt/xnliht/ar/can_we_infer_arht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..0d4880853137e81090cda44fc001e9425693628e --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/ar/can_we_infer_arht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ar", + "template_name": "can we infer_arht", + "evaluation": { + "accuracy": 0.3610441767068273 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='can we infer_arht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/ar/guaranteed_possible_impossible_arht/results.json b/evaluation_xnlihtmt/xnliht/ar/guaranteed_possible_impossible_arht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..43aade15d507b7981f09fef192ecbd4de817e416 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/ar/guaranteed_possible_impossible_arht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ar", + "template_name": "guaranteed/possible/impossible_arht", + "evaluation": { + "accuracy": 0.3642570281124498 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='guaranteed/possible/impossible_arht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/ar/justified_in_saying_arht/results.json b/evaluation_xnlihtmt/xnliht/ar/justified_in_saying_arht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..d7673b1d800b7e181915339d05873c3f4f9046b7 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/ar/justified_in_saying_arht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ar", + "template_name": "justified in saying_arht", + "evaluation": { + "accuracy": 0.37309236947791163 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='justified in saying_arht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/es/GPT-3_style_esht/results.json b/evaluation_xnlihtmt/xnliht/es/GPT-3_style_esht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..c0ab7199857150c8bdbf32568948c735ac767b6d --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/es/GPT-3_style_esht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "es", + "template_name": "GPT-3 style_esht", + "evaluation": { + "accuracy": 0.5698795180722892 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='GPT-3 style_esht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/es/MNLI_crowdsource_esht/results.json b/evaluation_xnlihtmt/xnliht/es/MNLI_crowdsource_esht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..c71d79218ce58a8e449580613f75a4c2e19e1e8e --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/es/MNLI_crowdsource_esht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "es", + "template_name": "MNLI crowdsource_esht", + "evaluation": { + "accuracy": 0.342570281124498 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='MNLI crowdsource_esht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/es/can_we_infer_esht/results.json b/evaluation_xnlihtmt/xnliht/es/can_we_infer_esht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..9e42be28ecd114e6f6f4d88689f566e7a5aab611 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/es/can_we_infer_esht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "es", + "template_name": "can we infer_esht", + "evaluation": { + "accuracy": 0.46546184738955826 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='can we infer_esht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/es/guaranteed_possible_impossible_esht/results.json b/evaluation_xnlihtmt/xnliht/es/guaranteed_possible_impossible_esht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..0c99dceb99ba9331895c687b2c4e5f3a7f92578e --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/es/guaranteed_possible_impossible_esht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "es", + "template_name": "guaranteed/possible/impossible_esht", + "evaluation": { + "accuracy": 0.5526104417670683 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='guaranteed/possible/impossible_esht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/es/justified_in_saying_esht/results.json b/evaluation_xnlihtmt/xnliht/es/justified_in_saying_esht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..fb77a7a139dd8b9c24ea015e0d02823933a7e16a --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/es/justified_in_saying_esht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "es", + "template_name": "justified in saying_esht", + "evaluation": { + "accuracy": 0.4321285140562249 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='justified in saying_esht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/fr/GPT-3_style_frht/results.json b/evaluation_xnlihtmt/xnliht/fr/GPT-3_style_frht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..4b13c026436426907cc26ba42d716088ce4b59b4 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/fr/GPT-3_style_frht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "fr", + "template_name": "GPT-3 style_frht", + "evaluation": { + "accuracy": 0.4995983935742972 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='GPT-3 style_frht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/fr/MNLI_crowdsource_frht/results.json b/evaluation_xnlihtmt/xnliht/fr/MNLI_crowdsource_frht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7d7e02d1a9f66af72a883aa01d74e2dde9197e98 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/fr/MNLI_crowdsource_frht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "fr", + "template_name": "MNLI crowdsource_frht", + "evaluation": { + "accuracy": 0.4004016064257028 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='MNLI crowdsource_frht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/fr/can_we_infer_frht/results.json b/evaluation_xnlihtmt/xnliht/fr/can_we_infer_frht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a407e7ae562371e6260c90b1ea0a3c9107aa37f7 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/fr/can_we_infer_frht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "fr", + "template_name": "can we infer_frht", + "evaluation": { + "accuracy": 0.5694779116465863 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='can we infer_frht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/fr/guaranteed_possible_impossible_frht/results.json b/evaluation_xnlihtmt/xnliht/fr/guaranteed_possible_impossible_frht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..aa655ebc807cb20ba71577ff3194250f5616a1f3 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/fr/guaranteed_possible_impossible_frht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "fr", + "template_name": "guaranteed/possible/impossible_frht", + "evaluation": { + "accuracy": 0.5152610441767068 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='guaranteed/possible/impossible_frht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/fr/justified_in_saying_frht/results.json b/evaluation_xnlihtmt/xnliht/fr/justified_in_saying_frht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..014ade2a465f303693ffe5cab94fbdd643774842 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/fr/justified_in_saying_frht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "fr", + "template_name": "justified in saying_frht", + "evaluation": { + "accuracy": 0.5493975903614458 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='justified in saying_frht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/hi/GPT-3_style_hiht/results.json b/evaluation_xnlihtmt/xnliht/hi/GPT-3_style_hiht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a4e92a96761a9a536d4bb8a5e4ba4f4061b7f539 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/hi/GPT-3_style_hiht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "hi", + "template_name": "GPT-3 style_hiht", + "evaluation": { + "accuracy": 0.44417670682730925 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='GPT-3 style_hiht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/hi/MNLI_crowdsource_hiht/results.json b/evaluation_xnlihtmt/xnliht/hi/MNLI_crowdsource_hiht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..8efef72cd5d3fc77ef9b4366770aaae329a5fb7a --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/hi/MNLI_crowdsource_hiht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "hi", + "template_name": "MNLI crowdsource_hiht", + "evaluation": { + "accuracy": 0.5236947791164659 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='MNLI crowdsource_hiht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/hi/can_we_infer_hiht/results.json b/evaluation_xnlihtmt/xnliht/hi/can_we_infer_hiht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..778e69f3893a372b47a6fbdc28161f775335d96c --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/hi/can_we_infer_hiht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "hi", + "template_name": "can we infer_hiht", + "evaluation": { + "accuracy": 0.4963855421686747 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='can we infer_hiht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/hi/guaranteed_possible_impossible_hiht/results.json b/evaluation_xnlihtmt/xnliht/hi/guaranteed_possible_impossible_hiht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..1ebc1461a52c3cde14887dca4097c766c74b2fbb --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/hi/guaranteed_possible_impossible_hiht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "hi", + "template_name": "guaranteed/possible/impossible_hiht", + "evaluation": { + "accuracy": 0.4493975903614458 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='guaranteed/possible/impossible_hiht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/hi/justified_in_saying_hiht/results.json b/evaluation_xnlihtmt/xnliht/hi/justified_in_saying_hiht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..d4e84c9aaefa0f61a8a742fc3d8391391085d071 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/hi/justified_in_saying_hiht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "hi", + "template_name": "justified in saying_hiht", + "evaluation": { + "accuracy": 0.4963855421686747 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='justified in saying_hiht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/merged.csv b/evaluation_xnlihtmt/xnliht/merged.csv new file mode 100644 index 0000000000000000000000000000000000000000..38974d265a2b337eb24fd64b0a27d6af10f7d5c6 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/merged.csv @@ -0,0 +1,50 @@ +dataset,prompt,metric,value +xnli_ar,GPT-3 style_arht,accuracy,0.40441767068273093 +xnli_ar,MNLI crowdsource_arht,accuracy,0.43012048192771085 +xnli_ar,can we infer_arht,accuracy,0.3610441767068273 +xnli_ar,guaranteed/possible/impossible_arht,accuracy,0.3642570281124498 +xnli_ar,justified in saying_arht,accuracy,0.37309236947791163 +xnli_ar,median,accuracy,0.37309236947791163 +xnli_es,GPT-3 style_esht,accuracy,0.5698795180722892 +xnli_es,MNLI crowdsource_esht,accuracy,0.342570281124498 +xnli_es,can we infer_esht,accuracy,0.46546184738955826 +xnli_es,guaranteed/possible/impossible_esht,accuracy,0.5526104417670683 +xnli_es,justified in saying_esht,accuracy,0.4321285140562249 +xnli_es,median,accuracy,0.46546184738955826 +xnli_fr,GPT-3 style_frht,accuracy,0.4995983935742972 +xnli_fr,MNLI crowdsource_frht,accuracy,0.4004016064257028 +xnli_fr,can we infer_frht,accuracy,0.5694779116465863 +xnli_fr,guaranteed/possible/impossible_frht,accuracy,0.5152610441767068 +xnli_fr,justified in saying_frht,accuracy,0.5493975903614458 +xnli_fr,median,accuracy,0.5152610441767068 +xnli_hi,GPT-3 style_hiht,accuracy,0.44417670682730925 +xnli_hi,MNLI crowdsource_hiht,accuracy,0.5236947791164659 +xnli_hi,can we infer_hiht,accuracy,0.4963855421686747 +xnli_hi,guaranteed/possible/impossible_hiht,accuracy,0.4493975903614458 +xnli_hi,justified in saying_hiht,accuracy,0.4963855421686747 +xnli_hi,median,accuracy,0.4963855421686747 +xnli_sw,GPT-3 style_swht,accuracy,0.39397590361445783 +xnli_sw,MNLI crowdsource_swht,accuracy,0.3329317269076305 +xnli_sw,can we infer_swht,accuracy,0.4285140562248996 +xnli_sw,guaranteed/possible/impossible_swht,accuracy,0.38433734939759034 +xnli_sw,justified in saying_swht,accuracy,0.41967871485943775 +xnli_sw,median,accuracy,0.39397590361445783 +xnli_ur,GPT-3 style_urht,accuracy,0.463855421686747 +xnli_ur,MNLI crowdsource_urht,accuracy,0.40441767068273093 +xnli_ur,can we infer_urht,accuracy,0.3895582329317269 +xnli_ur,guaranteed/possible/impossible_urht,accuracy,0.3405622489959839 +xnli_ur,justified in saying_urht,accuracy,0.43293172690763054 +xnli_ur,median,accuracy,0.40441767068273093 +xnli_vi,GPT-3 style_viht,accuracy,0.5261044176706827 +xnli_vi,MNLI crowdsource_viht,accuracy,0.39879518072289155 +xnli_vi,can we infer_viht,accuracy,0.5481927710843374 +xnli_vi,guaranteed/possible/impossible_viht,accuracy,0.43694779116465865 +xnli_vi,justified in saying_viht,accuracy,0.46546184738955826 +xnli_vi,median,accuracy,0.46546184738955826 +xnli_zh,GPT-3 style_zhht,accuracy,0.36947791164658633 +xnli_zh,MNLI crowdsource_zhht,accuracy,0.3457831325301205 +xnli_zh,can we infer_zhht,accuracy,0.3441767068273092 +xnli_zh,guaranteed/possible/impossible_zhht,accuracy,0.4923694779116466 +xnli_zh,justified in saying_zhht,accuracy,0.3927710843373494 +xnli_zh,median,accuracy,0.36947791164658633 +multiple,average,multiple,0.4354417670682731 diff --git a/evaluation_xnlihtmt/xnliht/merged.json b/evaluation_xnlihtmt/xnliht/merged.json new file mode 100644 index 0000000000000000000000000000000000000000..2d843c2753dd0cbff8718e6589bb67fa198770c6 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/merged.json @@ -0,0 +1 @@ +{"xnli_ar": {"GPT-3 style_arht": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='GPT-3 style_arht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.40441767068273093}, "template_name": "GPT-3 style_arht"}, "MNLI crowdsource_arht": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='MNLI crowdsource_arht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.43012048192771085}, "template_name": "MNLI crowdsource_arht"}, "can we infer_arht": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='can we infer_arht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3610441767068273}, "template_name": "can we infer_arht"}, "guaranteed/possible/impossible_arht": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='guaranteed/possible/impossible_arht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3642570281124498}, "template_name": "guaranteed/possible/impossible_arht"}, "justified in saying_arht": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='justified in saying_arht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.37309236947791163}, "template_name": "justified in saying_arht"}}, "xnli_es": {"GPT-3 style_esht": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='GPT-3 style_esht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5698795180722892}, "template_name": "GPT-3 style_esht"}, "MNLI crowdsource_esht": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='MNLI crowdsource_esht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.342570281124498}, "template_name": "MNLI crowdsource_esht"}, "can we infer_esht": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='can we infer_esht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.46546184738955826}, "template_name": "can we infer_esht"}, "guaranteed/possible/impossible_esht": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='guaranteed/possible/impossible_esht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5526104417670683}, "template_name": "guaranteed/possible/impossible_esht"}, "justified in saying_esht": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='justified in saying_esht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4321285140562249}, "template_name": "justified in saying_esht"}}, "xnli_fr": {"GPT-3 style_frht": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='GPT-3 style_frht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4995983935742972}, "template_name": "GPT-3 style_frht"}, "MNLI crowdsource_frht": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='MNLI crowdsource_frht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4004016064257028}, "template_name": "MNLI crowdsource_frht"}, "can we infer_frht": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='can we infer_frht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5694779116465863}, "template_name": "can we infer_frht"}, "guaranteed/possible/impossible_frht": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='guaranteed/possible/impossible_frht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5152610441767068}, "template_name": "guaranteed/possible/impossible_frht"}, "justified in saying_frht": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='justified in saying_frht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5493975903614458}, "template_name": "justified in saying_frht"}}, "xnli_hi": {"GPT-3 style_hiht": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='GPT-3 style_hiht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.44417670682730925}, "template_name": "GPT-3 style_hiht"}, "MNLI crowdsource_hiht": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='MNLI crowdsource_hiht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5236947791164659}, "template_name": "MNLI crowdsource_hiht"}, "can we infer_hiht": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='can we infer_hiht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4963855421686747}, "template_name": "can we infer_hiht"}, "guaranteed/possible/impossible_hiht": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='guaranteed/possible/impossible_hiht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4493975903614458}, "template_name": "guaranteed/possible/impossible_hiht"}, "justified in saying_hiht": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='justified in saying_hiht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4963855421686747}, "template_name": "justified in saying_hiht"}}, "xnli_sw": {"GPT-3 style_swht": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='GPT-3 style_swht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.39397590361445783}, "template_name": "GPT-3 style_swht"}, "MNLI crowdsource_swht": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='MNLI crowdsource_swht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3329317269076305}, "template_name": "MNLI crowdsource_swht"}, "can we infer_swht": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='can we infer_swht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4285140562248996}, "template_name": "can we infer_swht"}, "guaranteed/possible/impossible_swht": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='guaranteed/possible/impossible_swht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.38433734939759034}, "template_name": "guaranteed/possible/impossible_swht"}, "justified in saying_swht": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='justified in saying_swht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.41967871485943775}, "template_name": "justified in saying_swht"}}, "xnli_ur": {"GPT-3 style_urht": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='GPT-3 style_urht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.463855421686747}, "template_name": "GPT-3 style_urht"}, "MNLI crowdsource_urht": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='MNLI crowdsource_urht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.40441767068273093}, "template_name": "MNLI crowdsource_urht"}, "can we infer_urht": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='can we infer_urht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3895582329317269}, "template_name": "can we infer_urht"}, "guaranteed/possible/impossible_urht": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='guaranteed/possible/impossible_urht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3405622489959839}, "template_name": "guaranteed/possible/impossible_urht"}, "justified in saying_urht": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='justified in saying_urht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.43293172690763054}, "template_name": "justified in saying_urht"}}, "xnli_vi": {"GPT-3 style_viht": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='GPT-3 style_viht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5261044176706827}, "template_name": "GPT-3 style_viht"}, "MNLI crowdsource_viht": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='MNLI crowdsource_viht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.39879518072289155}, "template_name": "MNLI crowdsource_viht"}, "can we infer_viht": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='can we infer_viht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5481927710843374}, "template_name": "can we infer_viht"}, "guaranteed/possible/impossible_viht": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='guaranteed/possible/impossible_viht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.43694779116465865}, "template_name": "guaranteed/possible/impossible_viht"}, "justified in saying_viht": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='justified in saying_viht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.46546184738955826}, "template_name": "justified in saying_viht"}}, "xnli_zh": {"GPT-3 style_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='GPT-3 style_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.36947791164658633}, "template_name": "GPT-3 style_zhht"}, "MNLI crowdsource_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='MNLI crowdsource_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3457831325301205}, "template_name": "MNLI crowdsource_zhht"}, "can we infer_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='can we infer_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3441767068273092}, "template_name": "can we infer_zhht"}, "guaranteed/possible/impossible_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='guaranteed/possible/impossible_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4923694779116466}, "template_name": "guaranteed/possible/impossible_zhht"}, "justified in saying_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='justified in saying_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3927710843373494}, "template_name": "justified in saying_zhht"}}} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/sw/GPT-3_style_swht/results.json b/evaluation_xnlihtmt/xnliht/sw/GPT-3_style_swht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..d7b7b923b78fb24fbccb524ad75690014a17cac7 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/sw/GPT-3_style_swht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "sw", + "template_name": "GPT-3 style_swht", + "evaluation": { + "accuracy": 0.39397590361445783 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='GPT-3 style_swht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/sw/MNLI_crowdsource_swht/results.json b/evaluation_xnlihtmt/xnliht/sw/MNLI_crowdsource_swht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..de9fd9c9a0227ffb16da0b5303649cad3dfccb9f --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/sw/MNLI_crowdsource_swht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "sw", + "template_name": "MNLI crowdsource_swht", + "evaluation": { + "accuracy": 0.3329317269076305 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='MNLI crowdsource_swht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/sw/can_we_infer_swht/results.json b/evaluation_xnlihtmt/xnliht/sw/can_we_infer_swht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ac81892cb9c74f6107dafad135338f6d9683507d --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/sw/can_we_infer_swht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "sw", + "template_name": "can we infer_swht", + "evaluation": { + "accuracy": 0.4285140562248996 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='can we infer_swht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/sw/guaranteed_possible_impossible_swht/results.json b/evaluation_xnlihtmt/xnliht/sw/guaranteed_possible_impossible_swht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..9ba9a00506c2e6fc4a16f3e21fd815d44dbca4a5 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/sw/guaranteed_possible_impossible_swht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "sw", + "template_name": "guaranteed/possible/impossible_swht", + "evaluation": { + "accuracy": 0.38433734939759034 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='guaranteed/possible/impossible_swht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/sw/justified_in_saying_swht/results.json b/evaluation_xnlihtmt/xnliht/sw/justified_in_saying_swht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..5b3e944be5fd9d9d99fb1b9a48e933a0837aa80f --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/sw/justified_in_saying_swht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "sw", + "template_name": "justified in saying_swht", + "evaluation": { + "accuracy": 0.41967871485943775 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='justified in saying_swht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/ur/GPT-3_style_urht/results.json b/evaluation_xnlihtmt/xnliht/ur/GPT-3_style_urht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..2a8b24bdd46252fb9a65c95158e410ba06d95808 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/ur/GPT-3_style_urht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ur", + "template_name": "GPT-3 style_urht", + "evaluation": { + "accuracy": 0.463855421686747 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='GPT-3 style_urht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/ur/MNLI_crowdsource_urht/results.json b/evaluation_xnlihtmt/xnliht/ur/MNLI_crowdsource_urht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..daec90e79a511836f4cb80c164414151fd805e8d --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/ur/MNLI_crowdsource_urht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ur", + "template_name": "MNLI crowdsource_urht", + "evaluation": { + "accuracy": 0.40441767068273093 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='MNLI crowdsource_urht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/ur/can_we_infer_urht/results.json b/evaluation_xnlihtmt/xnliht/ur/can_we_infer_urht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..0b83201fc43fdb6641cc4783c7008f87aed55bad --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/ur/can_we_infer_urht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ur", + "template_name": "can we infer_urht", + "evaluation": { + "accuracy": 0.3895582329317269 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='can we infer_urht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/ur/guaranteed_possible_impossible_urht/results.json b/evaluation_xnlihtmt/xnliht/ur/guaranteed_possible_impossible_urht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..5c63d51d73fa542f757e07787912f164ca05d995 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/ur/guaranteed_possible_impossible_urht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ur", + "template_name": "guaranteed/possible/impossible_urht", + "evaluation": { + "accuracy": 0.3405622489959839 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='guaranteed/possible/impossible_urht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/ur/justified_in_saying_urht/results.json b/evaluation_xnlihtmt/xnliht/ur/justified_in_saying_urht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..bdd9008231968eb2e6743a6a45934a07906fad79 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/ur/justified_in_saying_urht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ur", + "template_name": "justified in saying_urht", + "evaluation": { + "accuracy": 0.43293172690763054 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='justified in saying_urht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/vi/GPT-3_style_viht/results.json b/evaluation_xnlihtmt/xnliht/vi/GPT-3_style_viht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f4e01f9fb887fe7de2f4fe46d01345fd92a6d510 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/vi/GPT-3_style_viht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "vi", + "template_name": "GPT-3 style_viht", + "evaluation": { + "accuracy": 0.5261044176706827 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='GPT-3 style_viht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/vi/MNLI_crowdsource_viht/results.json b/evaluation_xnlihtmt/xnliht/vi/MNLI_crowdsource_viht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..8520acbf68d09bc928c0dfb7b737a41e17594de3 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/vi/MNLI_crowdsource_viht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "vi", + "template_name": "MNLI crowdsource_viht", + "evaluation": { + "accuracy": 0.39879518072289155 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='MNLI crowdsource_viht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/vi/can_we_infer_viht/results.json b/evaluation_xnlihtmt/xnliht/vi/can_we_infer_viht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7b0bdaec60efc2784c288f22715d50e346280108 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/vi/can_we_infer_viht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "vi", + "template_name": "can we infer_viht", + "evaluation": { + "accuracy": 0.5481927710843374 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='can we infer_viht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/vi/guaranteed_possible_impossible_viht/results.json b/evaluation_xnlihtmt/xnliht/vi/guaranteed_possible_impossible_viht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..097d91dcde196e9d9496f46fa4581108a8bd4494 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/vi/guaranteed_possible_impossible_viht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "vi", + "template_name": "guaranteed/possible/impossible_viht", + "evaluation": { + "accuracy": 0.43694779116465865 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='guaranteed/possible/impossible_viht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/vi/justified_in_saying_viht/results.json b/evaluation_xnlihtmt/xnliht/vi/justified_in_saying_viht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..5efd3a1377077b27e6da271aae29d9ef8e3b883c --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/vi/justified_in_saying_viht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "vi", + "template_name": "justified in saying_viht", + "evaluation": { + "accuracy": 0.46546184738955826 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='justified in saying_viht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/zh/GPT-3_style_zhht/results.json b/evaluation_xnlihtmt/xnliht/zh/GPT-3_style_zhht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..553709670d2c50865f09e85fa119e026995287ea --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/zh/GPT-3_style_zhht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "zh", + "template_name": "GPT-3 style_zhht", + "evaluation": { + "accuracy": 0.36947791164658633 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='GPT-3 style_zhht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/zh/MNLI_crowdsource_zhht/results.json b/evaluation_xnlihtmt/xnliht/zh/MNLI_crowdsource_zhht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..efa8c119c81082c69eba765ada8008d2f34e146e --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/zh/MNLI_crowdsource_zhht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "zh", + "template_name": "MNLI crowdsource_zhht", + "evaluation": { + "accuracy": 0.3457831325301205 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='MNLI crowdsource_zhht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/zh/can_we_infer_zhht/results.json b/evaluation_xnlihtmt/xnliht/zh/can_we_infer_zhht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f8cbc3083571d601c942d7fc15d93640c6c7eaa8 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/zh/can_we_infer_zhht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "zh", + "template_name": "can we infer_zhht", + "evaluation": { + "accuracy": 0.3441767068273092 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='can we infer_zhht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/zh/guaranteed_possible_impossible_zhht/results.json b/evaluation_xnlihtmt/xnliht/zh/guaranteed_possible_impossible_zhht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a1e72172e6d797ddefbbd510348113cc78ac67a2 --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/zh/guaranteed_possible_impossible_zhht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "zh", + "template_name": "guaranteed/possible/impossible_zhht", + "evaluation": { + "accuracy": 0.4923694779116466 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='guaranteed/possible/impossible_zhht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnliht/zh/justified_in_saying_zhht/results.json b/evaluation_xnlihtmt/xnliht/zh/justified_in_saying_zhht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0ba1b45a09e30711abbcfd3ff8c6bf9ae068fd --- /dev/null +++ b/evaluation_xnlihtmt/xnliht/zh/justified_in_saying_zhht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "zh", + "template_name": "justified in saying_zhht", + "evaluation": { + "accuracy": 0.3927710843373494 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='justified in saying_zhht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/ar/GPT-3_style_armt/results.json b/evaluation_xnlihtmt/xnlimt/ar/GPT-3_style_armt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..d3df8a608a2418831da7a727daaf768fc29d643a --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/ar/GPT-3_style_armt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ar", + "template_name": "GPT-3 style_armt", + "evaluation": { + "accuracy": 0.3333333333333333 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='GPT-3 style_armt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/ar/MNLI_crowdsource_armt/results.json b/evaluation_xnlihtmt/xnlimt/ar/MNLI_crowdsource_armt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..484fea8074d4096940c1cf3e4cb14838e1ba76f8 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/ar/MNLI_crowdsource_armt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ar", + "template_name": "MNLI crowdsource_armt", + "evaluation": { + "accuracy": 0.4542168674698795 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='MNLI crowdsource_armt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/ar/can_we_infer_armt/results.json b/evaluation_xnlihtmt/xnlimt/ar/can_we_infer_armt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..e9b7ea9ed0effc3aa4313b68b2bdebbd1a48c5a6 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/ar/can_we_infer_armt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ar", + "template_name": "can we infer_armt", + "evaluation": { + "accuracy": 0.41967871485943775 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='can we infer_armt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/ar/guaranteed_possible_impossible_armt/results.json b/evaluation_xnlihtmt/xnlimt/ar/guaranteed_possible_impossible_armt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..2c8ceb85c15b119101189af34b5928207747a25b --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/ar/guaranteed_possible_impossible_armt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ar", + "template_name": "guaranteed/possible/impossible_armt", + "evaluation": { + "accuracy": 0.3795180722891566 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='guaranteed/possible/impossible_armt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/ar/justified_in_saying_armt/results.json b/evaluation_xnlihtmt/xnlimt/ar/justified_in_saying_armt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f770884c9af2373782a2991969239d8e9ac36957 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/ar/justified_in_saying_armt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ar", + "template_name": "justified in saying_armt", + "evaluation": { + "accuracy": 0.44016064257028115 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='justified in saying_armt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/es/GPT-3_style_esmt/results.json b/evaluation_xnlihtmt/xnlimt/es/GPT-3_style_esmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..fb0db17b3f51d113c8094890d8e3387427676096 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/es/GPT-3_style_esmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "es", + "template_name": "GPT-3 style_esmt", + "evaluation": { + "accuracy": 0.5381526104417671 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='GPT-3 style_esmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/es/MNLI_crowdsource_esmt/results.json b/evaluation_xnlihtmt/xnlimt/es/MNLI_crowdsource_esmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..c0297e49e925ab677bb63ea7548be8f16ca193e0 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/es/MNLI_crowdsource_esmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "es", + "template_name": "MNLI crowdsource_esmt", + "evaluation": { + "accuracy": 0.4951807228915663 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='MNLI crowdsource_esmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/es/can_we_infer_esmt/results.json b/evaluation_xnlihtmt/xnlimt/es/can_we_infer_esmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..16f5ae1d0c3637385fc3b3ac9109610bcab41caf --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/es/can_we_infer_esmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "es", + "template_name": "can we infer_esmt", + "evaluation": { + "accuracy": 0.4951807228915663 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='can we infer_esmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/es/guaranteed_possible_impossible_esmt/results.json b/evaluation_xnlihtmt/xnlimt/es/guaranteed_possible_impossible_esmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7de5aec8f541506b3bd55756b31ee8e84edd71de --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/es/guaranteed_possible_impossible_esmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "es", + "template_name": "guaranteed/possible/impossible_esmt", + "evaluation": { + "accuracy": 0.3349397590361446 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='guaranteed/possible/impossible_esmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/es/justified_in_saying_esmt/results.json b/evaluation_xnlihtmt/xnlimt/es/justified_in_saying_esmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..026c7c34328b7831028b3709d9a8918c87d9cbb6 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/es/justified_in_saying_esmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "es", + "template_name": "justified in saying_esmt", + "evaluation": { + "accuracy": 0.4955823293172691 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='justified in saying_esmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/fr/GPT-3_style_frmt/results.json b/evaluation_xnlihtmt/xnlimt/fr/GPT-3_style_frmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f7274d952c0de2c942814fd124fdbaca283485ff --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/fr/GPT-3_style_frmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "fr", + "template_name": "GPT-3 style_frmt", + "evaluation": { + "accuracy": 0.4746987951807229 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='GPT-3 style_frmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/fr/MNLI_crowdsource_frmt/results.json b/evaluation_xnlihtmt/xnlimt/fr/MNLI_crowdsource_frmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..4e2e6908b8e98c832eae2b7c01606c6a7e4fbe3e --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/fr/MNLI_crowdsource_frmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "fr", + "template_name": "MNLI crowdsource_frmt", + "evaluation": { + "accuracy": 0.3538152610441767 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='MNLI crowdsource_frmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/fr/can_we_infer_frmt/results.json b/evaluation_xnlihtmt/xnlimt/fr/can_we_infer_frmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..747591c5d05092765d34ce831b80c8247c686b4d --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/fr/can_we_infer_frmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "fr", + "template_name": "can we infer_frmt", + "evaluation": { + "accuracy": 0.5481927710843374 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='can we infer_frmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/fr/guaranteed_possible_impossible_frmt/results.json b/evaluation_xnlihtmt/xnlimt/fr/guaranteed_possible_impossible_frmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..e68c0401faae36b7d5dd65ad7f70ff83b2017a99 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/fr/guaranteed_possible_impossible_frmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "fr", + "template_name": "guaranteed/possible/impossible_frmt", + "evaluation": { + "accuracy": 0.5200803212851406 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='guaranteed/possible/impossible_frmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/fr/justified_in_saying_frmt/results.json b/evaluation_xnlihtmt/xnlimt/fr/justified_in_saying_frmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..e090b6a7eb4dd90f5841a558b077302d3215dc06 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/fr/justified_in_saying_frmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "fr", + "template_name": "justified in saying_frmt", + "evaluation": { + "accuracy": 0.5317269076305221 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='justified in saying_frmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/hi/GPT-3_style_himt/results.json b/evaluation_xnlihtmt/xnlimt/hi/GPT-3_style_himt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..6d2f4a90d39fd21e0d000fd2bddde19b805d14b7 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/hi/GPT-3_style_himt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "hi", + "template_name": "GPT-3 style_himt", + "evaluation": { + "accuracy": 0.43734939759036146 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='GPT-3 style_himt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/hi/MNLI_crowdsource_himt/results.json b/evaluation_xnlihtmt/xnlimt/hi/MNLI_crowdsource_himt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..abe331491e50afd38a294d6ba0a85fe7938d63ed --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/hi/MNLI_crowdsource_himt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "hi", + "template_name": "MNLI crowdsource_himt", + "evaluation": { + "accuracy": 0.3333333333333333 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='MNLI crowdsource_himt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/hi/can_we_infer_himt/results.json b/evaluation_xnlihtmt/xnlimt/hi/can_we_infer_himt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..15a23fa63fdb4337027f4313554e5ea759f1b6ff --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/hi/can_we_infer_himt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "hi", + "template_name": "can we infer_himt", + "evaluation": { + "accuracy": 0.4795180722891566 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='can we infer_himt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/hi/guaranteed_possible_impossible_himt/results.json b/evaluation_xnlihtmt/xnlimt/hi/guaranteed_possible_impossible_himt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ed3e0ab27f59a22926fd7a9183629f8a4874116f --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/hi/guaranteed_possible_impossible_himt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "hi", + "template_name": "guaranteed/possible/impossible_himt", + "evaluation": { + "accuracy": 0.44136546184738956 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='guaranteed/possible/impossible_himt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/hi/justified_in_saying_himt/results.json b/evaluation_xnlihtmt/xnlimt/hi/justified_in_saying_himt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..6f63b529de724ddad08f41a3e982301790a159f8 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/hi/justified_in_saying_himt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "hi", + "template_name": "justified in saying_himt", + "evaluation": { + "accuracy": 0.4931726907630522 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='justified in saying_himt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/merged.csv b/evaluation_xnlihtmt/xnlimt/merged.csv new file mode 100644 index 0000000000000000000000000000000000000000..75c13d2a1e9948b86e1e443ac0845ada11e93b59 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/merged.csv @@ -0,0 +1,50 @@ +dataset,prompt,metric,value +xnli_ar,GPT-3 style_armt,accuracy,0.3333333333333333 +xnli_ar,MNLI crowdsource_armt,accuracy,0.4542168674698795 +xnli_ar,can we infer_armt,accuracy,0.41967871485943775 +xnli_ar,guaranteed/possible/impossible_armt,accuracy,0.3795180722891566 +xnli_ar,justified in saying_armt,accuracy,0.44016064257028115 +xnli_ar,median,accuracy,0.41967871485943775 +xnli_es,GPT-3 style_esmt,accuracy,0.5381526104417671 +xnli_es,MNLI crowdsource_esmt,accuracy,0.4951807228915663 +xnli_es,can we infer_esmt,accuracy,0.4951807228915663 +xnli_es,guaranteed/possible/impossible_esmt,accuracy,0.3349397590361446 +xnli_es,justified in saying_esmt,accuracy,0.4955823293172691 +xnli_es,median,accuracy,0.4951807228915663 +xnli_fr,GPT-3 style_frmt,accuracy,0.4746987951807229 +xnli_fr,MNLI crowdsource_frmt,accuracy,0.3538152610441767 +xnli_fr,can we infer_frmt,accuracy,0.5481927710843374 +xnli_fr,guaranteed/possible/impossible_frmt,accuracy,0.5200803212851406 +xnli_fr,justified in saying_frmt,accuracy,0.5317269076305221 +xnli_fr,median,accuracy,0.5200803212851406 +xnli_hi,GPT-3 style_himt,accuracy,0.43734939759036146 +xnli_hi,MNLI crowdsource_himt,accuracy,0.3333333333333333 +xnli_hi,can we infer_himt,accuracy,0.4795180722891566 +xnli_hi,guaranteed/possible/impossible_himt,accuracy,0.44136546184738956 +xnli_hi,justified in saying_himt,accuracy,0.4931726907630522 +xnli_hi,median,accuracy,0.44136546184738956 +xnli_sw,GPT-3 style_swmt,accuracy,0.3357429718875502 +xnli_sw,MNLI crowdsource_swmt,accuracy,0.3353413654618474 +xnli_sw,can we infer_swmt,accuracy,0.3682730923694779 +xnli_sw,guaranteed/possible/impossible_swmt,accuracy,0.351004016064257 +xnli_sw,justified in saying_swmt,accuracy,0.36305220883534134 +xnli_sw,median,accuracy,0.351004016064257 +xnli_ur,GPT-3 style_urmt,accuracy,0.3586345381526104 +xnli_ur,MNLI crowdsource_urmt,accuracy,0.3369477911646586 +xnli_ur,can we infer_urmt,accuracy,0.351004016064257 +xnli_ur,guaranteed/possible/impossible_urmt,accuracy,0.3337349397590361 +xnli_ur,justified in saying_urmt,accuracy,0.3381526104417671 +xnli_ur,median,accuracy,0.3381526104417671 +xnli_vi,GPT-3 style_vimt,accuracy,0.3333333333333333 +xnli_vi,MNLI crowdsource_vimt,accuracy,0.3333333333333333 +xnli_vi,can we infer_vimt,accuracy,0.3333333333333333 +xnli_vi,guaranteed/possible/impossible_vimt,accuracy,0.3333333333333333 +xnli_vi,justified in saying_vimt,accuracy,0.3333333333333333 +xnli_vi,median,accuracy,0.3333333333333333 +xnli_zh,GPT-3 style_zhmt,accuracy,0.5224899598393574 +xnli_zh,MNLI crowdsource_zhmt,accuracy,0.4542168674698795 +xnli_zh,can we infer_zhmt,accuracy,0.5184738955823294 +xnli_zh,guaranteed/possible/impossible_zhmt,accuracy,0.334136546184739 +xnli_zh,justified in saying_zhmt,accuracy,0.4955823293172691 +xnli_zh,median,accuracy,0.4955823293172691 +multiple,average,multiple,0.4242971887550201 diff --git a/evaluation_xnlihtmt/xnlimt/merged.json b/evaluation_xnlihtmt/xnlimt/merged.json new file mode 100644 index 0000000000000000000000000000000000000000..6093ec1c3c21248845b11f0f8607678434710f15 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/merged.json @@ -0,0 +1 @@ +{"xnli_ar": {"GPT-3 style_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='GPT-3 style_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "GPT-3 style_armt"}, "MNLI crowdsource_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='MNLI crowdsource_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4542168674698795}, "template_name": "MNLI crowdsource_armt"}, "can we infer_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='can we infer_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.41967871485943775}, "template_name": "can we infer_armt"}, "guaranteed/possible/impossible_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='guaranteed/possible/impossible_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3795180722891566}, "template_name": "guaranteed/possible/impossible_armt"}, "justified in saying_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='justified in saying_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.44016064257028115}, "template_name": "justified in saying_armt"}}, "xnli_es": {"GPT-3 style_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='GPT-3 style_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5381526104417671}, "template_name": "GPT-3 style_esmt"}, "MNLI crowdsource_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='MNLI crowdsource_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4951807228915663}, "template_name": "MNLI crowdsource_esmt"}, "can we infer_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='can we infer_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4951807228915663}, "template_name": "can we infer_esmt"}, "guaranteed/possible/impossible_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='guaranteed/possible/impossible_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3349397590361446}, "template_name": "guaranteed/possible/impossible_esmt"}, "justified in saying_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='justified in saying_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4955823293172691}, "template_name": "justified in saying_esmt"}}, "xnli_fr": {"GPT-3 style_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='GPT-3 style_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4746987951807229}, "template_name": "GPT-3 style_frmt"}, "MNLI crowdsource_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='MNLI crowdsource_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3538152610441767}, "template_name": "MNLI crowdsource_frmt"}, "can we infer_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='can we infer_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5481927710843374}, "template_name": "can we infer_frmt"}, "guaranteed/possible/impossible_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='guaranteed/possible/impossible_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5200803212851406}, "template_name": "guaranteed/possible/impossible_frmt"}, "justified in saying_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='justified in saying_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5317269076305221}, "template_name": "justified in saying_frmt"}}, "xnli_hi": {"GPT-3 style_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='GPT-3 style_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.43734939759036146}, "template_name": "GPT-3 style_himt"}, "MNLI crowdsource_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='MNLI crowdsource_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "MNLI crowdsource_himt"}, "can we infer_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='can we infer_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4795180722891566}, "template_name": "can we infer_himt"}, "guaranteed/possible/impossible_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='guaranteed/possible/impossible_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.44136546184738956}, "template_name": "guaranteed/possible/impossible_himt"}, "justified in saying_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='justified in saying_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4931726907630522}, "template_name": "justified in saying_himt"}}, "xnli_sw": {"GPT-3 style_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='GPT-3 style_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3357429718875502}, "template_name": "GPT-3 style_swmt"}, "MNLI crowdsource_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='MNLI crowdsource_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3353413654618474}, "template_name": "MNLI crowdsource_swmt"}, "can we infer_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='can we infer_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3682730923694779}, "template_name": "can we infer_swmt"}, "guaranteed/possible/impossible_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='guaranteed/possible/impossible_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.351004016064257}, "template_name": "guaranteed/possible/impossible_swmt"}, "justified in saying_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='justified in saying_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.36305220883534134}, "template_name": "justified in saying_swmt"}}, "xnli_ur": {"GPT-3 style_urmt": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='GPT-3 style_urmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3586345381526104}, "template_name": "GPT-3 style_urmt"}, "MNLI crowdsource_urmt": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='MNLI crowdsource_urmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3369477911646586}, "template_name": "MNLI crowdsource_urmt"}, "can we infer_urmt": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='can we infer_urmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.351004016064257}, "template_name": "can we infer_urmt"}, "guaranteed/possible/impossible_urmt": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='guaranteed/possible/impossible_urmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3337349397590361}, "template_name": "guaranteed/possible/impossible_urmt"}, "justified in saying_urmt": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='justified in saying_urmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3381526104417671}, "template_name": "justified in saying_urmt"}}, "xnli_vi": {"GPT-3 style_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='GPT-3 style_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "GPT-3 style_vimt"}, "MNLI crowdsource_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='MNLI crowdsource_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "MNLI crowdsource_vimt"}, "can we infer_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='can we infer_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "can we infer_vimt"}, "guaranteed/possible/impossible_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='guaranteed/possible/impossible_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "guaranteed/possible/impossible_vimt"}, "justified in saying_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='justified in saying_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "justified in saying_vimt"}}, "xnli_zh": {"GPT-3 style_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='GPT-3 style_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5224899598393574}, "template_name": "GPT-3 style_zhmt"}, "MNLI crowdsource_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='MNLI crowdsource_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4542168674698795}, "template_name": "MNLI crowdsource_zhmt"}, "can we infer_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='can we infer_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5184738955823294}, "template_name": "can we infer_zhmt"}, "guaranteed/possible/impossible_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='guaranteed/possible/impossible_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.334136546184739}, "template_name": "guaranteed/possible/impossible_zhmt"}, "justified in saying_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='justified in saying_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4955823293172691}, "template_name": "justified in saying_zhmt"}}} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/sw/GPT-3_style_swmt/results.json b/evaluation_xnlihtmt/xnlimt/sw/GPT-3_style_swmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7af2ade81619d1d5d680f533d2d718004ac47e9f --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/sw/GPT-3_style_swmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "sw", + "template_name": "GPT-3 style_swmt", + "evaluation": { + "accuracy": 0.3357429718875502 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='GPT-3 style_swmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/sw/MNLI_crowdsource_swmt/results.json b/evaluation_xnlihtmt/xnlimt/sw/MNLI_crowdsource_swmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..1a7435f3ab6279405fe0758f869cb8e13c33482e --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/sw/MNLI_crowdsource_swmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "sw", + "template_name": "MNLI crowdsource_swmt", + "evaluation": { + "accuracy": 0.3353413654618474 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='MNLI crowdsource_swmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/sw/can_we_infer_swmt/results.json b/evaluation_xnlihtmt/xnlimt/sw/can_we_infer_swmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..a09fc6d4bbbeee4f4f9f9c803acfaf3b1c48c451 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/sw/can_we_infer_swmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "sw", + "template_name": "can we infer_swmt", + "evaluation": { + "accuracy": 0.3682730923694779 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='can we infer_swmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/sw/guaranteed_possible_impossible_swmt/results.json b/evaluation_xnlihtmt/xnlimt/sw/guaranteed_possible_impossible_swmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f15426fe5979ede1ee47fc0edfcd025c24963e85 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/sw/guaranteed_possible_impossible_swmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "sw", + "template_name": "guaranteed/possible/impossible_swmt", + "evaluation": { + "accuracy": 0.351004016064257 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='guaranteed/possible/impossible_swmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/sw/justified_in_saying_swmt/results.json b/evaluation_xnlihtmt/xnlimt/sw/justified_in_saying_swmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..90394faebea44e1c6d7894b2cc89c522ebd8df7a --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/sw/justified_in_saying_swmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "sw", + "template_name": "justified in saying_swmt", + "evaluation": { + "accuracy": 0.36305220883534134 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='justified in saying_swmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/ur/GPT-3_style_urmt/results.json b/evaluation_xnlihtmt/xnlimt/ur/GPT-3_style_urmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..0282d0be6303786a180f50618e07877161f505b3 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/ur/GPT-3_style_urmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ur", + "template_name": "GPT-3 style_urmt", + "evaluation": { + "accuracy": 0.3586345381526104 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='GPT-3 style_urmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/ur/MNLI_crowdsource_urmt/results.json b/evaluation_xnlihtmt/xnlimt/ur/MNLI_crowdsource_urmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..611968c8b90be64d7d60113395460c0fd457d2b2 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/ur/MNLI_crowdsource_urmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ur", + "template_name": "MNLI crowdsource_urmt", + "evaluation": { + "accuracy": 0.3369477911646586 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='MNLI crowdsource_urmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/ur/can_we_infer_urmt/results.json b/evaluation_xnlihtmt/xnlimt/ur/can_we_infer_urmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..4c1ae10e00ad59b9af33932760d44cc12ea1f8ee --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/ur/can_we_infer_urmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ur", + "template_name": "can we infer_urmt", + "evaluation": { + "accuracy": 0.351004016064257 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='can we infer_urmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/ur/guaranteed_possible_impossible_urmt/results.json b/evaluation_xnlihtmt/xnlimt/ur/guaranteed_possible_impossible_urmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..35f9483bd5e19259d6f473f0c4d973e39f80fca4 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/ur/guaranteed_possible_impossible_urmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ur", + "template_name": "guaranteed/possible/impossible_urmt", + "evaluation": { + "accuracy": 0.3337349397590361 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='guaranteed/possible/impossible_urmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/ur/justified_in_saying_urmt/results.json b/evaluation_xnlihtmt/xnlimt/ur/justified_in_saying_urmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ca92fbad1769d545db2d32b0cc2cd21fc2531536 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/ur/justified_in_saying_urmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ur", + "template_name": "justified in saying_urmt", + "evaluation": { + "accuracy": 0.3381526104417671 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='justified in saying_urmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/vi/GPT-3_style_vimt/results.json b/evaluation_xnlihtmt/xnlimt/vi/GPT-3_style_vimt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..58246bf4d433606693cb97adc8622b6fa1c74e4c --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/vi/GPT-3_style_vimt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "vi", + "template_name": "GPT-3 style_vimt", + "evaluation": { + "accuracy": 0.3333333333333333 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='GPT-3 style_vimt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/vi/MNLI_crowdsource_vimt/results.json b/evaluation_xnlihtmt/xnlimt/vi/MNLI_crowdsource_vimt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..5dd3868927989b006df5b17daa4cc23d5238ad22 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/vi/MNLI_crowdsource_vimt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "vi", + "template_name": "MNLI crowdsource_vimt", + "evaluation": { + "accuracy": 0.3333333333333333 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='MNLI crowdsource_vimt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/vi/can_we_infer_vimt/results.json b/evaluation_xnlihtmt/xnlimt/vi/can_we_infer_vimt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..b73e0f205f3841b05d86a9449c601b8b6f111e5e --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/vi/can_we_infer_vimt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "vi", + "template_name": "can we infer_vimt", + "evaluation": { + "accuracy": 0.3333333333333333 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='can we infer_vimt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/vi/guaranteed_possible_impossible_vimt/results.json b/evaluation_xnlihtmt/xnlimt/vi/guaranteed_possible_impossible_vimt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..39992e80223b4a30b931b52f708a7838f6061c8c --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/vi/guaranteed_possible_impossible_vimt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "vi", + "template_name": "guaranteed/possible/impossible_vimt", + "evaluation": { + "accuracy": 0.3333333333333333 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='guaranteed/possible/impossible_vimt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/vi/justified_in_saying_vimt/results.json b/evaluation_xnlihtmt/xnlimt/vi/justified_in_saying_vimt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..e723b8a7e36d3a1063dfd79f26ee3443b922fbb8 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/vi/justified_in_saying_vimt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "vi", + "template_name": "justified in saying_vimt", + "evaluation": { + "accuracy": 0.3333333333333333 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='justified in saying_vimt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/zh/GPT-3_style_zhmt/results.json b/evaluation_xnlihtmt/xnlimt/zh/GPT-3_style_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..e1ae998ee8df46751b4fe8f0cb439cb1a29acbea --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/zh/GPT-3_style_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "zh", + "template_name": "GPT-3 style_zhmt", + "evaluation": { + "accuracy": 0.5224899598393574 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='GPT-3 style_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/zh/MNLI_crowdsource_zhmt/results.json b/evaluation_xnlihtmt/xnlimt/zh/MNLI_crowdsource_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..2b524f2a25b623cbb28fe5d4ebd40ac24d4578c2 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/zh/MNLI_crowdsource_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "zh", + "template_name": "MNLI crowdsource_zhmt", + "evaluation": { + "accuracy": 0.4542168674698795 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='MNLI crowdsource_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/zh/can_we_infer_zhmt/results.json b/evaluation_xnlihtmt/xnlimt/zh/can_we_infer_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7a9e5b9cd1bd435fe6320882569918515362eecd --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/zh/can_we_infer_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "zh", + "template_name": "can we infer_zhmt", + "evaluation": { + "accuracy": 0.5184738955823294 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='can we infer_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/zh/guaranteed_possible_impossible_zhmt/results.json b/evaluation_xnlihtmt/xnlimt/zh/guaranteed_possible_impossible_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..9af567d30784f4d6ccd77e76af4f9c84323d92db --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/zh/guaranteed_possible_impossible_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "zh", + "template_name": "guaranteed/possible/impossible_zhmt", + "evaluation": { + "accuracy": 0.334136546184739 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='guaranteed/possible/impossible_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlihtmt/xnlimt/zh/justified_in_saying_zhmt/results.json b/evaluation_xnlihtmt/xnlimt/zh/justified_in_saying_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..9c6db375f86ea1876f4eca2db924d921d1ce7b11 --- /dev/null +++ b/evaluation_xnlihtmt/xnlimt/zh/justified_in_saying_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "zh", + "template_name": "justified in saying_zhmt", + "evaluation": { + "accuracy": 0.4955823293172691 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/176bt0/tr13-176b-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='justified in saying_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/pytorch_model.bin.index.json b/pytorch_model.bin.index.json new file mode 100644 index 0000000000000000000000000000000000000000..745df138d046295c44b52b3a17342a4b8ef7345c --- /dev/null +++ b/pytorch_model.bin.index.json @@ -0,0 +1,852 @@ +{ + "metadata": { + "total_size": 352494542848 + }, + "weight_map": { + "h.0.input_layernorm.bias": "pytorch_model_00002-of-00072.bin", + "h.0.input_layernorm.weight": "pytorch_model_00002-of-00072.bin", + "h.0.mlp.dense_4h_to_h.bias": "pytorch_model_00002-of-00072.bin", + "h.0.mlp.dense_4h_to_h.weight": "pytorch_model_00002-of-00072.bin", + "h.0.mlp.dense_h_to_4h.bias": "pytorch_model_00002-of-00072.bin", + "h.0.mlp.dense_h_to_4h.weight": "pytorch_model_00002-of-00072.bin", + "h.0.post_attention_layernorm.bias": "pytorch_model_00002-of-00072.bin", + "h.0.post_attention_layernorm.weight": "pytorch_model_00002-of-00072.bin", + "h.0.self_attention.dense.bias": "pytorch_model_00002-of-00072.bin", + "h.0.self_attention.dense.weight": "pytorch_model_00002-of-00072.bin", + "h.0.self_attention.query_key_value.bias": "pytorch_model_00002-of-00072.bin", + "h.0.self_attention.query_key_value.weight": "pytorch_model_00002-of-00072.bin", + "h.1.input_layernorm.bias": "pytorch_model_00003-of-00072.bin", + "h.1.input_layernorm.weight": "pytorch_model_00003-of-00072.bin", + "h.1.mlp.dense_4h_to_h.bias": "pytorch_model_00003-of-00072.bin", + "h.1.mlp.dense_4h_to_h.weight": "pytorch_model_00003-of-00072.bin", + "h.1.mlp.dense_h_to_4h.bias": "pytorch_model_00003-of-00072.bin", + "h.1.mlp.dense_h_to_4h.weight": "pytorch_model_00003-of-00072.bin", + "h.1.post_attention_layernorm.bias": "pytorch_model_00003-of-00072.bin", + "h.1.post_attention_layernorm.weight": "pytorch_model_00003-of-00072.bin", + "h.1.self_attention.dense.bias": "pytorch_model_00003-of-00072.bin", + "h.1.self_attention.dense.weight": "pytorch_model_00003-of-00072.bin", + "h.1.self_attention.query_key_value.bias": "pytorch_model_00003-of-00072.bin", + "h.1.self_attention.query_key_value.weight": "pytorch_model_00003-of-00072.bin", + "h.10.input_layernorm.bias": "pytorch_model_00012-of-00072.bin", + "h.10.input_layernorm.weight": "pytorch_model_00012-of-00072.bin", + "h.10.mlp.dense_4h_to_h.bias": "pytorch_model_00012-of-00072.bin", + "h.10.mlp.dense_4h_to_h.weight": "pytorch_model_00012-of-00072.bin", + "h.10.mlp.dense_h_to_4h.bias": "pytorch_model_00012-of-00072.bin", + "h.10.mlp.dense_h_to_4h.weight": "pytorch_model_00012-of-00072.bin", + "h.10.post_attention_layernorm.bias": "pytorch_model_00012-of-00072.bin", + "h.10.post_attention_layernorm.weight": "pytorch_model_00012-of-00072.bin", + "h.10.self_attention.dense.bias": "pytorch_model_00012-of-00072.bin", + "h.10.self_attention.dense.weight": "pytorch_model_00012-of-00072.bin", + "h.10.self_attention.query_key_value.bias": "pytorch_model_00012-of-00072.bin", + "h.10.self_attention.query_key_value.weight": "pytorch_model_00012-of-00072.bin", + "h.11.input_layernorm.bias": "pytorch_model_00013-of-00072.bin", + "h.11.input_layernorm.weight": "pytorch_model_00013-of-00072.bin", + "h.11.mlp.dense_4h_to_h.bias": "pytorch_model_00013-of-00072.bin", + "h.11.mlp.dense_4h_to_h.weight": "pytorch_model_00013-of-00072.bin", + "h.11.mlp.dense_h_to_4h.bias": "pytorch_model_00013-of-00072.bin", + "h.11.mlp.dense_h_to_4h.weight": "pytorch_model_00013-of-00072.bin", + "h.11.post_attention_layernorm.bias": "pytorch_model_00013-of-00072.bin", + "h.11.post_attention_layernorm.weight": "pytorch_model_00013-of-00072.bin", + "h.11.self_attention.dense.bias": "pytorch_model_00013-of-00072.bin", + "h.11.self_attention.dense.weight": "pytorch_model_00013-of-00072.bin", + "h.11.self_attention.query_key_value.bias": "pytorch_model_00013-of-00072.bin", + "h.11.self_attention.query_key_value.weight": "pytorch_model_00013-of-00072.bin", + "h.12.input_layernorm.bias": "pytorch_model_00014-of-00072.bin", + "h.12.input_layernorm.weight": "pytorch_model_00014-of-00072.bin", + "h.12.mlp.dense_4h_to_h.bias": "pytorch_model_00014-of-00072.bin", + "h.12.mlp.dense_4h_to_h.weight": "pytorch_model_00014-of-00072.bin", + "h.12.mlp.dense_h_to_4h.bias": "pytorch_model_00014-of-00072.bin", + "h.12.mlp.dense_h_to_4h.weight": "pytorch_model_00014-of-00072.bin", + "h.12.post_attention_layernorm.bias": "pytorch_model_00014-of-00072.bin", + "h.12.post_attention_layernorm.weight": "pytorch_model_00014-of-00072.bin", + "h.12.self_attention.dense.bias": "pytorch_model_00014-of-00072.bin", + "h.12.self_attention.dense.weight": "pytorch_model_00014-of-00072.bin", + "h.12.self_attention.query_key_value.bias": "pytorch_model_00014-of-00072.bin", + "h.12.self_attention.query_key_value.weight": "pytorch_model_00014-of-00072.bin", + "h.13.input_layernorm.bias": "pytorch_model_00015-of-00072.bin", + "h.13.input_layernorm.weight": "pytorch_model_00015-of-00072.bin", + "h.13.mlp.dense_4h_to_h.bias": "pytorch_model_00015-of-00072.bin", + "h.13.mlp.dense_4h_to_h.weight": "pytorch_model_00015-of-00072.bin", + "h.13.mlp.dense_h_to_4h.bias": "pytorch_model_00015-of-00072.bin", + "h.13.mlp.dense_h_to_4h.weight": "pytorch_model_00015-of-00072.bin", + "h.13.post_attention_layernorm.bias": "pytorch_model_00015-of-00072.bin", + "h.13.post_attention_layernorm.weight": "pytorch_model_00015-of-00072.bin", + "h.13.self_attention.dense.bias": "pytorch_model_00015-of-00072.bin", + "h.13.self_attention.dense.weight": "pytorch_model_00015-of-00072.bin", + "h.13.self_attention.query_key_value.bias": "pytorch_model_00015-of-00072.bin", + "h.13.self_attention.query_key_value.weight": "pytorch_model_00015-of-00072.bin", + "h.14.input_layernorm.bias": "pytorch_model_00016-of-00072.bin", + "h.14.input_layernorm.weight": "pytorch_model_00016-of-00072.bin", + "h.14.mlp.dense_4h_to_h.bias": "pytorch_model_00016-of-00072.bin", + "h.14.mlp.dense_4h_to_h.weight": "pytorch_model_00016-of-00072.bin", + "h.14.mlp.dense_h_to_4h.bias": "pytorch_model_00016-of-00072.bin", + "h.14.mlp.dense_h_to_4h.weight": "pytorch_model_00016-of-00072.bin", + "h.14.post_attention_layernorm.bias": "pytorch_model_00016-of-00072.bin", + "h.14.post_attention_layernorm.weight": "pytorch_model_00016-of-00072.bin", + "h.14.self_attention.dense.bias": "pytorch_model_00016-of-00072.bin", + "h.14.self_attention.dense.weight": "pytorch_model_00016-of-00072.bin", + "h.14.self_attention.query_key_value.bias": "pytorch_model_00016-of-00072.bin", + "h.14.self_attention.query_key_value.weight": "pytorch_model_00016-of-00072.bin", + "h.15.input_layernorm.bias": "pytorch_model_00017-of-00072.bin", + "h.15.input_layernorm.weight": "pytorch_model_00017-of-00072.bin", + "h.15.mlp.dense_4h_to_h.bias": "pytorch_model_00017-of-00072.bin", + "h.15.mlp.dense_4h_to_h.weight": "pytorch_model_00017-of-00072.bin", + "h.15.mlp.dense_h_to_4h.bias": "pytorch_model_00017-of-00072.bin", + "h.15.mlp.dense_h_to_4h.weight": "pytorch_model_00017-of-00072.bin", + "h.15.post_attention_layernorm.bias": "pytorch_model_00017-of-00072.bin", + "h.15.post_attention_layernorm.weight": "pytorch_model_00017-of-00072.bin", + "h.15.self_attention.dense.bias": "pytorch_model_00017-of-00072.bin", + "h.15.self_attention.dense.weight": "pytorch_model_00017-of-00072.bin", + "h.15.self_attention.query_key_value.bias": "pytorch_model_00017-of-00072.bin", + "h.15.self_attention.query_key_value.weight": "pytorch_model_00017-of-00072.bin", + "h.16.input_layernorm.bias": "pytorch_model_00018-of-00072.bin", + "h.16.input_layernorm.weight": "pytorch_model_00018-of-00072.bin", + "h.16.mlp.dense_4h_to_h.bias": "pytorch_model_00018-of-00072.bin", + "h.16.mlp.dense_4h_to_h.weight": "pytorch_model_00018-of-00072.bin", + "h.16.mlp.dense_h_to_4h.bias": "pytorch_model_00018-of-00072.bin", + "h.16.mlp.dense_h_to_4h.weight": "pytorch_model_00018-of-00072.bin", + "h.16.post_attention_layernorm.bias": "pytorch_model_00018-of-00072.bin", + "h.16.post_attention_layernorm.weight": "pytorch_model_00018-of-00072.bin", + "h.16.self_attention.dense.bias": "pytorch_model_00018-of-00072.bin", + "h.16.self_attention.dense.weight": "pytorch_model_00018-of-00072.bin", + "h.16.self_attention.query_key_value.bias": "pytorch_model_00018-of-00072.bin", + "h.16.self_attention.query_key_value.weight": "pytorch_model_00018-of-00072.bin", + "h.17.input_layernorm.bias": "pytorch_model_00019-of-00072.bin", + "h.17.input_layernorm.weight": "pytorch_model_00019-of-00072.bin", + "h.17.mlp.dense_4h_to_h.bias": "pytorch_model_00019-of-00072.bin", + "h.17.mlp.dense_4h_to_h.weight": "pytorch_model_00019-of-00072.bin", + "h.17.mlp.dense_h_to_4h.bias": "pytorch_model_00019-of-00072.bin", + "h.17.mlp.dense_h_to_4h.weight": "pytorch_model_00019-of-00072.bin", + "h.17.post_attention_layernorm.bias": "pytorch_model_00019-of-00072.bin", + "h.17.post_attention_layernorm.weight": "pytorch_model_00019-of-00072.bin", + "h.17.self_attention.dense.bias": "pytorch_model_00019-of-00072.bin", + "h.17.self_attention.dense.weight": "pytorch_model_00019-of-00072.bin", + "h.17.self_attention.query_key_value.bias": "pytorch_model_00019-of-00072.bin", + "h.17.self_attention.query_key_value.weight": "pytorch_model_00019-of-00072.bin", + "h.18.input_layernorm.bias": "pytorch_model_00020-of-00072.bin", + "h.18.input_layernorm.weight": "pytorch_model_00020-of-00072.bin", + "h.18.mlp.dense_4h_to_h.bias": "pytorch_model_00020-of-00072.bin", + "h.18.mlp.dense_4h_to_h.weight": "pytorch_model_00020-of-00072.bin", + "h.18.mlp.dense_h_to_4h.bias": "pytorch_model_00020-of-00072.bin", + "h.18.mlp.dense_h_to_4h.weight": "pytorch_model_00020-of-00072.bin", + "h.18.post_attention_layernorm.bias": "pytorch_model_00020-of-00072.bin", + "h.18.post_attention_layernorm.weight": "pytorch_model_00020-of-00072.bin", + "h.18.self_attention.dense.bias": "pytorch_model_00020-of-00072.bin", + "h.18.self_attention.dense.weight": "pytorch_model_00020-of-00072.bin", + "h.18.self_attention.query_key_value.bias": "pytorch_model_00020-of-00072.bin", + "h.18.self_attention.query_key_value.weight": "pytorch_model_00020-of-00072.bin", + "h.19.input_layernorm.bias": "pytorch_model_00021-of-00072.bin", + "h.19.input_layernorm.weight": "pytorch_model_00021-of-00072.bin", + "h.19.mlp.dense_4h_to_h.bias": "pytorch_model_00021-of-00072.bin", + "h.19.mlp.dense_4h_to_h.weight": "pytorch_model_00021-of-00072.bin", + "h.19.mlp.dense_h_to_4h.bias": "pytorch_model_00021-of-00072.bin", + "h.19.mlp.dense_h_to_4h.weight": "pytorch_model_00021-of-00072.bin", + "h.19.post_attention_layernorm.bias": "pytorch_model_00021-of-00072.bin", + "h.19.post_attention_layernorm.weight": "pytorch_model_00021-of-00072.bin", + "h.19.self_attention.dense.bias": "pytorch_model_00021-of-00072.bin", + "h.19.self_attention.dense.weight": "pytorch_model_00021-of-00072.bin", + "h.19.self_attention.query_key_value.bias": "pytorch_model_00021-of-00072.bin", + "h.19.self_attention.query_key_value.weight": "pytorch_model_00021-of-00072.bin", + "h.2.input_layernorm.bias": "pytorch_model_00004-of-00072.bin", + "h.2.input_layernorm.weight": "pytorch_model_00004-of-00072.bin", + "h.2.mlp.dense_4h_to_h.bias": "pytorch_model_00004-of-00072.bin", + "h.2.mlp.dense_4h_to_h.weight": "pytorch_model_00004-of-00072.bin", + "h.2.mlp.dense_h_to_4h.bias": "pytorch_model_00004-of-00072.bin", + "h.2.mlp.dense_h_to_4h.weight": "pytorch_model_00004-of-00072.bin", + "h.2.post_attention_layernorm.bias": "pytorch_model_00004-of-00072.bin", + "h.2.post_attention_layernorm.weight": "pytorch_model_00004-of-00072.bin", + "h.2.self_attention.dense.bias": "pytorch_model_00004-of-00072.bin", + "h.2.self_attention.dense.weight": "pytorch_model_00004-of-00072.bin", + "h.2.self_attention.query_key_value.bias": "pytorch_model_00004-of-00072.bin", + "h.2.self_attention.query_key_value.weight": "pytorch_model_00004-of-00072.bin", + "h.20.input_layernorm.bias": "pytorch_model_00022-of-00072.bin", + "h.20.input_layernorm.weight": "pytorch_model_00022-of-00072.bin", + "h.20.mlp.dense_4h_to_h.bias": "pytorch_model_00022-of-00072.bin", + "h.20.mlp.dense_4h_to_h.weight": "pytorch_model_00022-of-00072.bin", + "h.20.mlp.dense_h_to_4h.bias": "pytorch_model_00022-of-00072.bin", + "h.20.mlp.dense_h_to_4h.weight": "pytorch_model_00022-of-00072.bin", + "h.20.post_attention_layernorm.bias": "pytorch_model_00022-of-00072.bin", + "h.20.post_attention_layernorm.weight": "pytorch_model_00022-of-00072.bin", + "h.20.self_attention.dense.bias": "pytorch_model_00022-of-00072.bin", + "h.20.self_attention.dense.weight": "pytorch_model_00022-of-00072.bin", + "h.20.self_attention.query_key_value.bias": "pytorch_model_00022-of-00072.bin", + "h.20.self_attention.query_key_value.weight": "pytorch_model_00022-of-00072.bin", + "h.21.input_layernorm.bias": "pytorch_model_00023-of-00072.bin", + "h.21.input_layernorm.weight": "pytorch_model_00023-of-00072.bin", + "h.21.mlp.dense_4h_to_h.bias": "pytorch_model_00023-of-00072.bin", + "h.21.mlp.dense_4h_to_h.weight": "pytorch_model_00023-of-00072.bin", + "h.21.mlp.dense_h_to_4h.bias": "pytorch_model_00023-of-00072.bin", + "h.21.mlp.dense_h_to_4h.weight": "pytorch_model_00023-of-00072.bin", + "h.21.post_attention_layernorm.bias": "pytorch_model_00023-of-00072.bin", + "h.21.post_attention_layernorm.weight": "pytorch_model_00023-of-00072.bin", + "h.21.self_attention.dense.bias": "pytorch_model_00023-of-00072.bin", + "h.21.self_attention.dense.weight": "pytorch_model_00023-of-00072.bin", + "h.21.self_attention.query_key_value.bias": "pytorch_model_00023-of-00072.bin", + "h.21.self_attention.query_key_value.weight": "pytorch_model_00023-of-00072.bin", + "h.22.input_layernorm.bias": "pytorch_model_00024-of-00072.bin", + "h.22.input_layernorm.weight": "pytorch_model_00024-of-00072.bin", + "h.22.mlp.dense_4h_to_h.bias": "pytorch_model_00024-of-00072.bin", + "h.22.mlp.dense_4h_to_h.weight": "pytorch_model_00024-of-00072.bin", + "h.22.mlp.dense_h_to_4h.bias": "pytorch_model_00024-of-00072.bin", + "h.22.mlp.dense_h_to_4h.weight": "pytorch_model_00024-of-00072.bin", + "h.22.post_attention_layernorm.bias": "pytorch_model_00024-of-00072.bin", + "h.22.post_attention_layernorm.weight": "pytorch_model_00024-of-00072.bin", + "h.22.self_attention.dense.bias": "pytorch_model_00024-of-00072.bin", + "h.22.self_attention.dense.weight": "pytorch_model_00024-of-00072.bin", + "h.22.self_attention.query_key_value.bias": "pytorch_model_00024-of-00072.bin", + "h.22.self_attention.query_key_value.weight": "pytorch_model_00024-of-00072.bin", + "h.23.input_layernorm.bias": "pytorch_model_00025-of-00072.bin", + "h.23.input_layernorm.weight": "pytorch_model_00025-of-00072.bin", + "h.23.mlp.dense_4h_to_h.bias": "pytorch_model_00025-of-00072.bin", + "h.23.mlp.dense_4h_to_h.weight": "pytorch_model_00025-of-00072.bin", + "h.23.mlp.dense_h_to_4h.bias": "pytorch_model_00025-of-00072.bin", + "h.23.mlp.dense_h_to_4h.weight": "pytorch_model_00025-of-00072.bin", + "h.23.post_attention_layernorm.bias": "pytorch_model_00025-of-00072.bin", + "h.23.post_attention_layernorm.weight": "pytorch_model_00025-of-00072.bin", + "h.23.self_attention.dense.bias": "pytorch_model_00025-of-00072.bin", + "h.23.self_attention.dense.weight": "pytorch_model_00025-of-00072.bin", + "h.23.self_attention.query_key_value.bias": "pytorch_model_00025-of-00072.bin", + "h.23.self_attention.query_key_value.weight": "pytorch_model_00025-of-00072.bin", + "h.24.input_layernorm.bias": "pytorch_model_00026-of-00072.bin", + "h.24.input_layernorm.weight": "pytorch_model_00026-of-00072.bin", + "h.24.mlp.dense_4h_to_h.bias": "pytorch_model_00026-of-00072.bin", + "h.24.mlp.dense_4h_to_h.weight": "pytorch_model_00026-of-00072.bin", + "h.24.mlp.dense_h_to_4h.bias": "pytorch_model_00026-of-00072.bin", + "h.24.mlp.dense_h_to_4h.weight": "pytorch_model_00026-of-00072.bin", + "h.24.post_attention_layernorm.bias": "pytorch_model_00026-of-00072.bin", + "h.24.post_attention_layernorm.weight": "pytorch_model_00026-of-00072.bin", + "h.24.self_attention.dense.bias": "pytorch_model_00026-of-00072.bin", + "h.24.self_attention.dense.weight": "pytorch_model_00026-of-00072.bin", + "h.24.self_attention.query_key_value.bias": "pytorch_model_00026-of-00072.bin", + "h.24.self_attention.query_key_value.weight": "pytorch_model_00026-of-00072.bin", + "h.25.input_layernorm.bias": "pytorch_model_00027-of-00072.bin", + "h.25.input_layernorm.weight": "pytorch_model_00027-of-00072.bin", + "h.25.mlp.dense_4h_to_h.bias": "pytorch_model_00027-of-00072.bin", + "h.25.mlp.dense_4h_to_h.weight": "pytorch_model_00027-of-00072.bin", + "h.25.mlp.dense_h_to_4h.bias": "pytorch_model_00027-of-00072.bin", + "h.25.mlp.dense_h_to_4h.weight": "pytorch_model_00027-of-00072.bin", + "h.25.post_attention_layernorm.bias": "pytorch_model_00027-of-00072.bin", + "h.25.post_attention_layernorm.weight": "pytorch_model_00027-of-00072.bin", + "h.25.self_attention.dense.bias": "pytorch_model_00027-of-00072.bin", + "h.25.self_attention.dense.weight": "pytorch_model_00027-of-00072.bin", + "h.25.self_attention.query_key_value.bias": "pytorch_model_00027-of-00072.bin", + "h.25.self_attention.query_key_value.weight": "pytorch_model_00027-of-00072.bin", + "h.26.input_layernorm.bias": "pytorch_model_00028-of-00072.bin", + "h.26.input_layernorm.weight": "pytorch_model_00028-of-00072.bin", + "h.26.mlp.dense_4h_to_h.bias": "pytorch_model_00028-of-00072.bin", + "h.26.mlp.dense_4h_to_h.weight": "pytorch_model_00028-of-00072.bin", + "h.26.mlp.dense_h_to_4h.bias": "pytorch_model_00028-of-00072.bin", + "h.26.mlp.dense_h_to_4h.weight": "pytorch_model_00028-of-00072.bin", + "h.26.post_attention_layernorm.bias": "pytorch_model_00028-of-00072.bin", + "h.26.post_attention_layernorm.weight": "pytorch_model_00028-of-00072.bin", + "h.26.self_attention.dense.bias": "pytorch_model_00028-of-00072.bin", + "h.26.self_attention.dense.weight": "pytorch_model_00028-of-00072.bin", + "h.26.self_attention.query_key_value.bias": "pytorch_model_00028-of-00072.bin", + "h.26.self_attention.query_key_value.weight": "pytorch_model_00028-of-00072.bin", + "h.27.input_layernorm.bias": "pytorch_model_00029-of-00072.bin", + "h.27.input_layernorm.weight": "pytorch_model_00029-of-00072.bin", + "h.27.mlp.dense_4h_to_h.bias": "pytorch_model_00029-of-00072.bin", + "h.27.mlp.dense_4h_to_h.weight": "pytorch_model_00029-of-00072.bin", + "h.27.mlp.dense_h_to_4h.bias": "pytorch_model_00029-of-00072.bin", + "h.27.mlp.dense_h_to_4h.weight": "pytorch_model_00029-of-00072.bin", + "h.27.post_attention_layernorm.bias": "pytorch_model_00029-of-00072.bin", + "h.27.post_attention_layernorm.weight": "pytorch_model_00029-of-00072.bin", + "h.27.self_attention.dense.bias": "pytorch_model_00029-of-00072.bin", + "h.27.self_attention.dense.weight": "pytorch_model_00029-of-00072.bin", + "h.27.self_attention.query_key_value.bias": "pytorch_model_00029-of-00072.bin", + "h.27.self_attention.query_key_value.weight": "pytorch_model_00029-of-00072.bin", + "h.28.input_layernorm.bias": "pytorch_model_00030-of-00072.bin", + "h.28.input_layernorm.weight": "pytorch_model_00030-of-00072.bin", + "h.28.mlp.dense_4h_to_h.bias": "pytorch_model_00030-of-00072.bin", + "h.28.mlp.dense_4h_to_h.weight": "pytorch_model_00030-of-00072.bin", + "h.28.mlp.dense_h_to_4h.bias": "pytorch_model_00030-of-00072.bin", + "h.28.mlp.dense_h_to_4h.weight": "pytorch_model_00030-of-00072.bin", + "h.28.post_attention_layernorm.bias": "pytorch_model_00030-of-00072.bin", + "h.28.post_attention_layernorm.weight": "pytorch_model_00030-of-00072.bin", + "h.28.self_attention.dense.bias": "pytorch_model_00030-of-00072.bin", + "h.28.self_attention.dense.weight": "pytorch_model_00030-of-00072.bin", + "h.28.self_attention.query_key_value.bias": "pytorch_model_00030-of-00072.bin", + "h.28.self_attention.query_key_value.weight": "pytorch_model_00030-of-00072.bin", + "h.29.input_layernorm.bias": "pytorch_model_00031-of-00072.bin", + "h.29.input_layernorm.weight": "pytorch_model_00031-of-00072.bin", + "h.29.mlp.dense_4h_to_h.bias": "pytorch_model_00031-of-00072.bin", + "h.29.mlp.dense_4h_to_h.weight": "pytorch_model_00031-of-00072.bin", + "h.29.mlp.dense_h_to_4h.bias": "pytorch_model_00031-of-00072.bin", + "h.29.mlp.dense_h_to_4h.weight": "pytorch_model_00031-of-00072.bin", + "h.29.post_attention_layernorm.bias": "pytorch_model_00031-of-00072.bin", + "h.29.post_attention_layernorm.weight": "pytorch_model_00031-of-00072.bin", + "h.29.self_attention.dense.bias": "pytorch_model_00031-of-00072.bin", + "h.29.self_attention.dense.weight": "pytorch_model_00031-of-00072.bin", + "h.29.self_attention.query_key_value.bias": "pytorch_model_00031-of-00072.bin", + "h.29.self_attention.query_key_value.weight": "pytorch_model_00031-of-00072.bin", + "h.3.input_layernorm.bias": "pytorch_model_00005-of-00072.bin", + "h.3.input_layernorm.weight": "pytorch_model_00005-of-00072.bin", + "h.3.mlp.dense_4h_to_h.bias": "pytorch_model_00005-of-00072.bin", + "h.3.mlp.dense_4h_to_h.weight": "pytorch_model_00005-of-00072.bin", + "h.3.mlp.dense_h_to_4h.bias": "pytorch_model_00005-of-00072.bin", + "h.3.mlp.dense_h_to_4h.weight": "pytorch_model_00005-of-00072.bin", + "h.3.post_attention_layernorm.bias": "pytorch_model_00005-of-00072.bin", + "h.3.post_attention_layernorm.weight": "pytorch_model_00005-of-00072.bin", + "h.3.self_attention.dense.bias": "pytorch_model_00005-of-00072.bin", + "h.3.self_attention.dense.weight": "pytorch_model_00005-of-00072.bin", + "h.3.self_attention.query_key_value.bias": "pytorch_model_00005-of-00072.bin", + "h.3.self_attention.query_key_value.weight": "pytorch_model_00005-of-00072.bin", + "h.30.input_layernorm.bias": "pytorch_model_00032-of-00072.bin", + "h.30.input_layernorm.weight": "pytorch_model_00032-of-00072.bin", + "h.30.mlp.dense_4h_to_h.bias": "pytorch_model_00032-of-00072.bin", + "h.30.mlp.dense_4h_to_h.weight": "pytorch_model_00032-of-00072.bin", + "h.30.mlp.dense_h_to_4h.bias": "pytorch_model_00032-of-00072.bin", + "h.30.mlp.dense_h_to_4h.weight": "pytorch_model_00032-of-00072.bin", + "h.30.post_attention_layernorm.bias": "pytorch_model_00032-of-00072.bin", + "h.30.post_attention_layernorm.weight": "pytorch_model_00032-of-00072.bin", + "h.30.self_attention.dense.bias": "pytorch_model_00032-of-00072.bin", + "h.30.self_attention.dense.weight": "pytorch_model_00032-of-00072.bin", + "h.30.self_attention.query_key_value.bias": "pytorch_model_00032-of-00072.bin", + "h.30.self_attention.query_key_value.weight": "pytorch_model_00032-of-00072.bin", + "h.31.input_layernorm.bias": "pytorch_model_00033-of-00072.bin", + "h.31.input_layernorm.weight": "pytorch_model_00033-of-00072.bin", + "h.31.mlp.dense_4h_to_h.bias": "pytorch_model_00033-of-00072.bin", + "h.31.mlp.dense_4h_to_h.weight": "pytorch_model_00033-of-00072.bin", + "h.31.mlp.dense_h_to_4h.bias": "pytorch_model_00033-of-00072.bin", + "h.31.mlp.dense_h_to_4h.weight": "pytorch_model_00033-of-00072.bin", + "h.31.post_attention_layernorm.bias": "pytorch_model_00033-of-00072.bin", + "h.31.post_attention_layernorm.weight": "pytorch_model_00033-of-00072.bin", + "h.31.self_attention.dense.bias": "pytorch_model_00033-of-00072.bin", + "h.31.self_attention.dense.weight": "pytorch_model_00033-of-00072.bin", + "h.31.self_attention.query_key_value.bias": "pytorch_model_00033-of-00072.bin", + "h.31.self_attention.query_key_value.weight": "pytorch_model_00033-of-00072.bin", + "h.32.input_layernorm.bias": "pytorch_model_00034-of-00072.bin", + "h.32.input_layernorm.weight": "pytorch_model_00034-of-00072.bin", + "h.32.mlp.dense_4h_to_h.bias": "pytorch_model_00034-of-00072.bin", + "h.32.mlp.dense_4h_to_h.weight": "pytorch_model_00034-of-00072.bin", + "h.32.mlp.dense_h_to_4h.bias": "pytorch_model_00034-of-00072.bin", + "h.32.mlp.dense_h_to_4h.weight": "pytorch_model_00034-of-00072.bin", + "h.32.post_attention_layernorm.bias": "pytorch_model_00034-of-00072.bin", + "h.32.post_attention_layernorm.weight": "pytorch_model_00034-of-00072.bin", + "h.32.self_attention.dense.bias": "pytorch_model_00034-of-00072.bin", + "h.32.self_attention.dense.weight": "pytorch_model_00034-of-00072.bin", + "h.32.self_attention.query_key_value.bias": "pytorch_model_00034-of-00072.bin", + "h.32.self_attention.query_key_value.weight": "pytorch_model_00034-of-00072.bin", + "h.33.input_layernorm.bias": "pytorch_model_00035-of-00072.bin", + "h.33.input_layernorm.weight": "pytorch_model_00035-of-00072.bin", + "h.33.mlp.dense_4h_to_h.bias": "pytorch_model_00035-of-00072.bin", + "h.33.mlp.dense_4h_to_h.weight": "pytorch_model_00035-of-00072.bin", + "h.33.mlp.dense_h_to_4h.bias": "pytorch_model_00035-of-00072.bin", + "h.33.mlp.dense_h_to_4h.weight": "pytorch_model_00035-of-00072.bin", + "h.33.post_attention_layernorm.bias": "pytorch_model_00035-of-00072.bin", + "h.33.post_attention_layernorm.weight": "pytorch_model_00035-of-00072.bin", + "h.33.self_attention.dense.bias": "pytorch_model_00035-of-00072.bin", + "h.33.self_attention.dense.weight": "pytorch_model_00035-of-00072.bin", + "h.33.self_attention.query_key_value.bias": "pytorch_model_00035-of-00072.bin", + "h.33.self_attention.query_key_value.weight": "pytorch_model_00035-of-00072.bin", + "h.34.input_layernorm.bias": "pytorch_model_00036-of-00072.bin", + "h.34.input_layernorm.weight": "pytorch_model_00036-of-00072.bin", + "h.34.mlp.dense_4h_to_h.bias": "pytorch_model_00036-of-00072.bin", + "h.34.mlp.dense_4h_to_h.weight": "pytorch_model_00036-of-00072.bin", + "h.34.mlp.dense_h_to_4h.bias": "pytorch_model_00036-of-00072.bin", + "h.34.mlp.dense_h_to_4h.weight": "pytorch_model_00036-of-00072.bin", + "h.34.post_attention_layernorm.bias": "pytorch_model_00036-of-00072.bin", + "h.34.post_attention_layernorm.weight": "pytorch_model_00036-of-00072.bin", + "h.34.self_attention.dense.bias": "pytorch_model_00036-of-00072.bin", + "h.34.self_attention.dense.weight": "pytorch_model_00036-of-00072.bin", + "h.34.self_attention.query_key_value.bias": "pytorch_model_00036-of-00072.bin", + "h.34.self_attention.query_key_value.weight": "pytorch_model_00036-of-00072.bin", + "h.35.input_layernorm.bias": "pytorch_model_00037-of-00072.bin", + "h.35.input_layernorm.weight": "pytorch_model_00037-of-00072.bin", + "h.35.mlp.dense_4h_to_h.bias": "pytorch_model_00037-of-00072.bin", + "h.35.mlp.dense_4h_to_h.weight": "pytorch_model_00037-of-00072.bin", + "h.35.mlp.dense_h_to_4h.bias": "pytorch_model_00037-of-00072.bin", + "h.35.mlp.dense_h_to_4h.weight": "pytorch_model_00037-of-00072.bin", + "h.35.post_attention_layernorm.bias": "pytorch_model_00037-of-00072.bin", + "h.35.post_attention_layernorm.weight": "pytorch_model_00037-of-00072.bin", + "h.35.self_attention.dense.bias": "pytorch_model_00037-of-00072.bin", + "h.35.self_attention.dense.weight": "pytorch_model_00037-of-00072.bin", + "h.35.self_attention.query_key_value.bias": "pytorch_model_00037-of-00072.bin", + "h.35.self_attention.query_key_value.weight": "pytorch_model_00037-of-00072.bin", + "h.36.input_layernorm.bias": "pytorch_model_00038-of-00072.bin", + "h.36.input_layernorm.weight": "pytorch_model_00038-of-00072.bin", + "h.36.mlp.dense_4h_to_h.bias": "pytorch_model_00038-of-00072.bin", + "h.36.mlp.dense_4h_to_h.weight": "pytorch_model_00038-of-00072.bin", + "h.36.mlp.dense_h_to_4h.bias": "pytorch_model_00038-of-00072.bin", + "h.36.mlp.dense_h_to_4h.weight": "pytorch_model_00038-of-00072.bin", + "h.36.post_attention_layernorm.bias": "pytorch_model_00038-of-00072.bin", + "h.36.post_attention_layernorm.weight": "pytorch_model_00038-of-00072.bin", + "h.36.self_attention.dense.bias": "pytorch_model_00038-of-00072.bin", + "h.36.self_attention.dense.weight": "pytorch_model_00038-of-00072.bin", + "h.36.self_attention.query_key_value.bias": "pytorch_model_00038-of-00072.bin", + "h.36.self_attention.query_key_value.weight": "pytorch_model_00038-of-00072.bin", + "h.37.input_layernorm.bias": "pytorch_model_00039-of-00072.bin", + "h.37.input_layernorm.weight": "pytorch_model_00039-of-00072.bin", + "h.37.mlp.dense_4h_to_h.bias": "pytorch_model_00039-of-00072.bin", + "h.37.mlp.dense_4h_to_h.weight": "pytorch_model_00039-of-00072.bin", + "h.37.mlp.dense_h_to_4h.bias": "pytorch_model_00039-of-00072.bin", + "h.37.mlp.dense_h_to_4h.weight": "pytorch_model_00039-of-00072.bin", + "h.37.post_attention_layernorm.bias": "pytorch_model_00039-of-00072.bin", + "h.37.post_attention_layernorm.weight": "pytorch_model_00039-of-00072.bin", + "h.37.self_attention.dense.bias": "pytorch_model_00039-of-00072.bin", + "h.37.self_attention.dense.weight": "pytorch_model_00039-of-00072.bin", + "h.37.self_attention.query_key_value.bias": "pytorch_model_00039-of-00072.bin", + "h.37.self_attention.query_key_value.weight": "pytorch_model_00039-of-00072.bin", + "h.38.input_layernorm.bias": "pytorch_model_00040-of-00072.bin", + "h.38.input_layernorm.weight": "pytorch_model_00040-of-00072.bin", + "h.38.mlp.dense_4h_to_h.bias": "pytorch_model_00040-of-00072.bin", + "h.38.mlp.dense_4h_to_h.weight": "pytorch_model_00040-of-00072.bin", + "h.38.mlp.dense_h_to_4h.bias": "pytorch_model_00040-of-00072.bin", + "h.38.mlp.dense_h_to_4h.weight": "pytorch_model_00040-of-00072.bin", + "h.38.post_attention_layernorm.bias": "pytorch_model_00040-of-00072.bin", + "h.38.post_attention_layernorm.weight": "pytorch_model_00040-of-00072.bin", + "h.38.self_attention.dense.bias": "pytorch_model_00040-of-00072.bin", + "h.38.self_attention.dense.weight": "pytorch_model_00040-of-00072.bin", + "h.38.self_attention.query_key_value.bias": "pytorch_model_00040-of-00072.bin", + "h.38.self_attention.query_key_value.weight": "pytorch_model_00040-of-00072.bin", + "h.39.input_layernorm.bias": "pytorch_model_00041-of-00072.bin", + "h.39.input_layernorm.weight": "pytorch_model_00041-of-00072.bin", + "h.39.mlp.dense_4h_to_h.bias": "pytorch_model_00041-of-00072.bin", + "h.39.mlp.dense_4h_to_h.weight": "pytorch_model_00041-of-00072.bin", + "h.39.mlp.dense_h_to_4h.bias": "pytorch_model_00041-of-00072.bin", + "h.39.mlp.dense_h_to_4h.weight": "pytorch_model_00041-of-00072.bin", + "h.39.post_attention_layernorm.bias": "pytorch_model_00041-of-00072.bin", + "h.39.post_attention_layernorm.weight": "pytorch_model_00041-of-00072.bin", + "h.39.self_attention.dense.bias": "pytorch_model_00041-of-00072.bin", + "h.39.self_attention.dense.weight": "pytorch_model_00041-of-00072.bin", + "h.39.self_attention.query_key_value.bias": "pytorch_model_00041-of-00072.bin", + "h.39.self_attention.query_key_value.weight": "pytorch_model_00041-of-00072.bin", + "h.4.input_layernorm.bias": "pytorch_model_00006-of-00072.bin", + "h.4.input_layernorm.weight": "pytorch_model_00006-of-00072.bin", + "h.4.mlp.dense_4h_to_h.bias": "pytorch_model_00006-of-00072.bin", + "h.4.mlp.dense_4h_to_h.weight": "pytorch_model_00006-of-00072.bin", + "h.4.mlp.dense_h_to_4h.bias": "pytorch_model_00006-of-00072.bin", + "h.4.mlp.dense_h_to_4h.weight": "pytorch_model_00006-of-00072.bin", + "h.4.post_attention_layernorm.bias": "pytorch_model_00006-of-00072.bin", + "h.4.post_attention_layernorm.weight": "pytorch_model_00006-of-00072.bin", + "h.4.self_attention.dense.bias": "pytorch_model_00006-of-00072.bin", + "h.4.self_attention.dense.weight": "pytorch_model_00006-of-00072.bin", + "h.4.self_attention.query_key_value.bias": "pytorch_model_00006-of-00072.bin", + "h.4.self_attention.query_key_value.weight": "pytorch_model_00006-of-00072.bin", + "h.40.input_layernorm.bias": "pytorch_model_00042-of-00072.bin", + "h.40.input_layernorm.weight": "pytorch_model_00042-of-00072.bin", + "h.40.mlp.dense_4h_to_h.bias": "pytorch_model_00042-of-00072.bin", + "h.40.mlp.dense_4h_to_h.weight": "pytorch_model_00042-of-00072.bin", + "h.40.mlp.dense_h_to_4h.bias": "pytorch_model_00042-of-00072.bin", + "h.40.mlp.dense_h_to_4h.weight": "pytorch_model_00042-of-00072.bin", + "h.40.post_attention_layernorm.bias": "pytorch_model_00042-of-00072.bin", + "h.40.post_attention_layernorm.weight": "pytorch_model_00042-of-00072.bin", + "h.40.self_attention.dense.bias": "pytorch_model_00042-of-00072.bin", + "h.40.self_attention.dense.weight": "pytorch_model_00042-of-00072.bin", + "h.40.self_attention.query_key_value.bias": "pytorch_model_00042-of-00072.bin", + "h.40.self_attention.query_key_value.weight": "pytorch_model_00042-of-00072.bin", + "h.41.input_layernorm.bias": "pytorch_model_00043-of-00072.bin", + "h.41.input_layernorm.weight": "pytorch_model_00043-of-00072.bin", + "h.41.mlp.dense_4h_to_h.bias": "pytorch_model_00043-of-00072.bin", + "h.41.mlp.dense_4h_to_h.weight": "pytorch_model_00043-of-00072.bin", + "h.41.mlp.dense_h_to_4h.bias": "pytorch_model_00043-of-00072.bin", + "h.41.mlp.dense_h_to_4h.weight": "pytorch_model_00043-of-00072.bin", + "h.41.post_attention_layernorm.bias": "pytorch_model_00043-of-00072.bin", + "h.41.post_attention_layernorm.weight": "pytorch_model_00043-of-00072.bin", + "h.41.self_attention.dense.bias": "pytorch_model_00043-of-00072.bin", + "h.41.self_attention.dense.weight": "pytorch_model_00043-of-00072.bin", + "h.41.self_attention.query_key_value.bias": "pytorch_model_00043-of-00072.bin", + "h.41.self_attention.query_key_value.weight": "pytorch_model_00043-of-00072.bin", + "h.42.input_layernorm.bias": "pytorch_model_00044-of-00072.bin", + "h.42.input_layernorm.weight": "pytorch_model_00044-of-00072.bin", + "h.42.mlp.dense_4h_to_h.bias": "pytorch_model_00044-of-00072.bin", + "h.42.mlp.dense_4h_to_h.weight": "pytorch_model_00044-of-00072.bin", + "h.42.mlp.dense_h_to_4h.bias": "pytorch_model_00044-of-00072.bin", + "h.42.mlp.dense_h_to_4h.weight": "pytorch_model_00044-of-00072.bin", + "h.42.post_attention_layernorm.bias": "pytorch_model_00044-of-00072.bin", + "h.42.post_attention_layernorm.weight": "pytorch_model_00044-of-00072.bin", + "h.42.self_attention.dense.bias": "pytorch_model_00044-of-00072.bin", + "h.42.self_attention.dense.weight": "pytorch_model_00044-of-00072.bin", + "h.42.self_attention.query_key_value.bias": "pytorch_model_00044-of-00072.bin", + "h.42.self_attention.query_key_value.weight": "pytorch_model_00044-of-00072.bin", + "h.43.input_layernorm.bias": "pytorch_model_00045-of-00072.bin", + "h.43.input_layernorm.weight": "pytorch_model_00045-of-00072.bin", + "h.43.mlp.dense_4h_to_h.bias": "pytorch_model_00045-of-00072.bin", + "h.43.mlp.dense_4h_to_h.weight": "pytorch_model_00045-of-00072.bin", + "h.43.mlp.dense_h_to_4h.bias": "pytorch_model_00045-of-00072.bin", + "h.43.mlp.dense_h_to_4h.weight": "pytorch_model_00045-of-00072.bin", + "h.43.post_attention_layernorm.bias": "pytorch_model_00045-of-00072.bin", + "h.43.post_attention_layernorm.weight": "pytorch_model_00045-of-00072.bin", + "h.43.self_attention.dense.bias": "pytorch_model_00045-of-00072.bin", + "h.43.self_attention.dense.weight": "pytorch_model_00045-of-00072.bin", + "h.43.self_attention.query_key_value.bias": "pytorch_model_00045-of-00072.bin", + "h.43.self_attention.query_key_value.weight": "pytorch_model_00045-of-00072.bin", + "h.44.input_layernorm.bias": "pytorch_model_00046-of-00072.bin", + "h.44.input_layernorm.weight": "pytorch_model_00046-of-00072.bin", + "h.44.mlp.dense_4h_to_h.bias": "pytorch_model_00046-of-00072.bin", + "h.44.mlp.dense_4h_to_h.weight": "pytorch_model_00046-of-00072.bin", + "h.44.mlp.dense_h_to_4h.bias": "pytorch_model_00046-of-00072.bin", + "h.44.mlp.dense_h_to_4h.weight": "pytorch_model_00046-of-00072.bin", + "h.44.post_attention_layernorm.bias": "pytorch_model_00046-of-00072.bin", + "h.44.post_attention_layernorm.weight": "pytorch_model_00046-of-00072.bin", + "h.44.self_attention.dense.bias": "pytorch_model_00046-of-00072.bin", + "h.44.self_attention.dense.weight": "pytorch_model_00046-of-00072.bin", + "h.44.self_attention.query_key_value.bias": "pytorch_model_00046-of-00072.bin", + "h.44.self_attention.query_key_value.weight": "pytorch_model_00046-of-00072.bin", + "h.45.input_layernorm.bias": "pytorch_model_00047-of-00072.bin", + "h.45.input_layernorm.weight": "pytorch_model_00047-of-00072.bin", + "h.45.mlp.dense_4h_to_h.bias": "pytorch_model_00047-of-00072.bin", + "h.45.mlp.dense_4h_to_h.weight": "pytorch_model_00047-of-00072.bin", + "h.45.mlp.dense_h_to_4h.bias": "pytorch_model_00047-of-00072.bin", + "h.45.mlp.dense_h_to_4h.weight": "pytorch_model_00047-of-00072.bin", + "h.45.post_attention_layernorm.bias": "pytorch_model_00047-of-00072.bin", + "h.45.post_attention_layernorm.weight": "pytorch_model_00047-of-00072.bin", + "h.45.self_attention.dense.bias": "pytorch_model_00047-of-00072.bin", + "h.45.self_attention.dense.weight": "pytorch_model_00047-of-00072.bin", + "h.45.self_attention.query_key_value.bias": "pytorch_model_00047-of-00072.bin", + "h.45.self_attention.query_key_value.weight": "pytorch_model_00047-of-00072.bin", + "h.46.input_layernorm.bias": "pytorch_model_00048-of-00072.bin", + "h.46.input_layernorm.weight": "pytorch_model_00048-of-00072.bin", + "h.46.mlp.dense_4h_to_h.bias": "pytorch_model_00048-of-00072.bin", + "h.46.mlp.dense_4h_to_h.weight": "pytorch_model_00048-of-00072.bin", + "h.46.mlp.dense_h_to_4h.bias": "pytorch_model_00048-of-00072.bin", + "h.46.mlp.dense_h_to_4h.weight": "pytorch_model_00048-of-00072.bin", + "h.46.post_attention_layernorm.bias": "pytorch_model_00048-of-00072.bin", + "h.46.post_attention_layernorm.weight": "pytorch_model_00048-of-00072.bin", + "h.46.self_attention.dense.bias": "pytorch_model_00048-of-00072.bin", + "h.46.self_attention.dense.weight": "pytorch_model_00048-of-00072.bin", + "h.46.self_attention.query_key_value.bias": "pytorch_model_00048-of-00072.bin", + "h.46.self_attention.query_key_value.weight": "pytorch_model_00048-of-00072.bin", + "h.47.input_layernorm.bias": "pytorch_model_00049-of-00072.bin", + "h.47.input_layernorm.weight": "pytorch_model_00049-of-00072.bin", + "h.47.mlp.dense_4h_to_h.bias": "pytorch_model_00049-of-00072.bin", + "h.47.mlp.dense_4h_to_h.weight": "pytorch_model_00049-of-00072.bin", + "h.47.mlp.dense_h_to_4h.bias": "pytorch_model_00049-of-00072.bin", + "h.47.mlp.dense_h_to_4h.weight": "pytorch_model_00049-of-00072.bin", + "h.47.post_attention_layernorm.bias": "pytorch_model_00049-of-00072.bin", + "h.47.post_attention_layernorm.weight": "pytorch_model_00049-of-00072.bin", + "h.47.self_attention.dense.bias": "pytorch_model_00049-of-00072.bin", + "h.47.self_attention.dense.weight": "pytorch_model_00049-of-00072.bin", + "h.47.self_attention.query_key_value.bias": "pytorch_model_00049-of-00072.bin", + "h.47.self_attention.query_key_value.weight": "pytorch_model_00049-of-00072.bin", + "h.48.input_layernorm.bias": "pytorch_model_00050-of-00072.bin", + "h.48.input_layernorm.weight": "pytorch_model_00050-of-00072.bin", + "h.48.mlp.dense_4h_to_h.bias": "pytorch_model_00050-of-00072.bin", + "h.48.mlp.dense_4h_to_h.weight": "pytorch_model_00050-of-00072.bin", + "h.48.mlp.dense_h_to_4h.bias": "pytorch_model_00050-of-00072.bin", + "h.48.mlp.dense_h_to_4h.weight": "pytorch_model_00050-of-00072.bin", + "h.48.post_attention_layernorm.bias": "pytorch_model_00050-of-00072.bin", + "h.48.post_attention_layernorm.weight": "pytorch_model_00050-of-00072.bin", + "h.48.self_attention.dense.bias": "pytorch_model_00050-of-00072.bin", + "h.48.self_attention.dense.weight": "pytorch_model_00050-of-00072.bin", + "h.48.self_attention.query_key_value.bias": "pytorch_model_00050-of-00072.bin", + "h.48.self_attention.query_key_value.weight": "pytorch_model_00050-of-00072.bin", + "h.49.input_layernorm.bias": "pytorch_model_00051-of-00072.bin", + "h.49.input_layernorm.weight": "pytorch_model_00051-of-00072.bin", + "h.49.mlp.dense_4h_to_h.bias": "pytorch_model_00051-of-00072.bin", + "h.49.mlp.dense_4h_to_h.weight": "pytorch_model_00051-of-00072.bin", + "h.49.mlp.dense_h_to_4h.bias": "pytorch_model_00051-of-00072.bin", + "h.49.mlp.dense_h_to_4h.weight": "pytorch_model_00051-of-00072.bin", + "h.49.post_attention_layernorm.bias": "pytorch_model_00051-of-00072.bin", + "h.49.post_attention_layernorm.weight": "pytorch_model_00051-of-00072.bin", + "h.49.self_attention.dense.bias": "pytorch_model_00051-of-00072.bin", + "h.49.self_attention.dense.weight": "pytorch_model_00051-of-00072.bin", + "h.49.self_attention.query_key_value.bias": "pytorch_model_00051-of-00072.bin", + "h.49.self_attention.query_key_value.weight": "pytorch_model_00051-of-00072.bin", + "h.5.input_layernorm.bias": "pytorch_model_00007-of-00072.bin", + "h.5.input_layernorm.weight": "pytorch_model_00007-of-00072.bin", + "h.5.mlp.dense_4h_to_h.bias": "pytorch_model_00007-of-00072.bin", + "h.5.mlp.dense_4h_to_h.weight": "pytorch_model_00007-of-00072.bin", + "h.5.mlp.dense_h_to_4h.bias": "pytorch_model_00007-of-00072.bin", + "h.5.mlp.dense_h_to_4h.weight": "pytorch_model_00007-of-00072.bin", + "h.5.post_attention_layernorm.bias": "pytorch_model_00007-of-00072.bin", + "h.5.post_attention_layernorm.weight": "pytorch_model_00007-of-00072.bin", + "h.5.self_attention.dense.bias": "pytorch_model_00007-of-00072.bin", + "h.5.self_attention.dense.weight": "pytorch_model_00007-of-00072.bin", + "h.5.self_attention.query_key_value.bias": "pytorch_model_00007-of-00072.bin", + "h.5.self_attention.query_key_value.weight": "pytorch_model_00007-of-00072.bin", + "h.50.input_layernorm.bias": "pytorch_model_00052-of-00072.bin", + "h.50.input_layernorm.weight": "pytorch_model_00052-of-00072.bin", + "h.50.mlp.dense_4h_to_h.bias": "pytorch_model_00052-of-00072.bin", + "h.50.mlp.dense_4h_to_h.weight": "pytorch_model_00052-of-00072.bin", + "h.50.mlp.dense_h_to_4h.bias": "pytorch_model_00052-of-00072.bin", + "h.50.mlp.dense_h_to_4h.weight": "pytorch_model_00052-of-00072.bin", + "h.50.post_attention_layernorm.bias": "pytorch_model_00052-of-00072.bin", + "h.50.post_attention_layernorm.weight": "pytorch_model_00052-of-00072.bin", + "h.50.self_attention.dense.bias": "pytorch_model_00052-of-00072.bin", + "h.50.self_attention.dense.weight": "pytorch_model_00052-of-00072.bin", + "h.50.self_attention.query_key_value.bias": "pytorch_model_00052-of-00072.bin", + "h.50.self_attention.query_key_value.weight": "pytorch_model_00052-of-00072.bin", + "h.51.input_layernorm.bias": "pytorch_model_00053-of-00072.bin", + "h.51.input_layernorm.weight": "pytorch_model_00053-of-00072.bin", + "h.51.mlp.dense_4h_to_h.bias": "pytorch_model_00053-of-00072.bin", + "h.51.mlp.dense_4h_to_h.weight": "pytorch_model_00053-of-00072.bin", + "h.51.mlp.dense_h_to_4h.bias": "pytorch_model_00053-of-00072.bin", + "h.51.mlp.dense_h_to_4h.weight": "pytorch_model_00053-of-00072.bin", + "h.51.post_attention_layernorm.bias": "pytorch_model_00053-of-00072.bin", + "h.51.post_attention_layernorm.weight": "pytorch_model_00053-of-00072.bin", + "h.51.self_attention.dense.bias": "pytorch_model_00053-of-00072.bin", + "h.51.self_attention.dense.weight": "pytorch_model_00053-of-00072.bin", + "h.51.self_attention.query_key_value.bias": "pytorch_model_00053-of-00072.bin", + "h.51.self_attention.query_key_value.weight": "pytorch_model_00053-of-00072.bin", + "h.52.input_layernorm.bias": "pytorch_model_00054-of-00072.bin", + "h.52.input_layernorm.weight": "pytorch_model_00054-of-00072.bin", + "h.52.mlp.dense_4h_to_h.bias": "pytorch_model_00054-of-00072.bin", + "h.52.mlp.dense_4h_to_h.weight": "pytorch_model_00054-of-00072.bin", + "h.52.mlp.dense_h_to_4h.bias": "pytorch_model_00054-of-00072.bin", + "h.52.mlp.dense_h_to_4h.weight": "pytorch_model_00054-of-00072.bin", + "h.52.post_attention_layernorm.bias": "pytorch_model_00054-of-00072.bin", + "h.52.post_attention_layernorm.weight": "pytorch_model_00054-of-00072.bin", + "h.52.self_attention.dense.bias": "pytorch_model_00054-of-00072.bin", + "h.52.self_attention.dense.weight": "pytorch_model_00054-of-00072.bin", + "h.52.self_attention.query_key_value.bias": "pytorch_model_00054-of-00072.bin", + "h.52.self_attention.query_key_value.weight": "pytorch_model_00054-of-00072.bin", + "h.53.input_layernorm.bias": "pytorch_model_00055-of-00072.bin", + "h.53.input_layernorm.weight": "pytorch_model_00055-of-00072.bin", + "h.53.mlp.dense_4h_to_h.bias": "pytorch_model_00055-of-00072.bin", + "h.53.mlp.dense_4h_to_h.weight": "pytorch_model_00055-of-00072.bin", + "h.53.mlp.dense_h_to_4h.bias": "pytorch_model_00055-of-00072.bin", + "h.53.mlp.dense_h_to_4h.weight": "pytorch_model_00055-of-00072.bin", + "h.53.post_attention_layernorm.bias": "pytorch_model_00055-of-00072.bin", + "h.53.post_attention_layernorm.weight": "pytorch_model_00055-of-00072.bin", + "h.53.self_attention.dense.bias": "pytorch_model_00055-of-00072.bin", + "h.53.self_attention.dense.weight": "pytorch_model_00055-of-00072.bin", + "h.53.self_attention.query_key_value.bias": "pytorch_model_00055-of-00072.bin", + "h.53.self_attention.query_key_value.weight": "pytorch_model_00055-of-00072.bin", + "h.54.input_layernorm.bias": "pytorch_model_00056-of-00072.bin", + "h.54.input_layernorm.weight": "pytorch_model_00056-of-00072.bin", + "h.54.mlp.dense_4h_to_h.bias": "pytorch_model_00056-of-00072.bin", + "h.54.mlp.dense_4h_to_h.weight": "pytorch_model_00056-of-00072.bin", + "h.54.mlp.dense_h_to_4h.bias": "pytorch_model_00056-of-00072.bin", + "h.54.mlp.dense_h_to_4h.weight": "pytorch_model_00056-of-00072.bin", + "h.54.post_attention_layernorm.bias": "pytorch_model_00056-of-00072.bin", + "h.54.post_attention_layernorm.weight": "pytorch_model_00056-of-00072.bin", + "h.54.self_attention.dense.bias": "pytorch_model_00056-of-00072.bin", + "h.54.self_attention.dense.weight": "pytorch_model_00056-of-00072.bin", + "h.54.self_attention.query_key_value.bias": "pytorch_model_00056-of-00072.bin", + "h.54.self_attention.query_key_value.weight": "pytorch_model_00056-of-00072.bin", + "h.55.input_layernorm.bias": "pytorch_model_00057-of-00072.bin", + "h.55.input_layernorm.weight": "pytorch_model_00057-of-00072.bin", + "h.55.mlp.dense_4h_to_h.bias": "pytorch_model_00057-of-00072.bin", + "h.55.mlp.dense_4h_to_h.weight": "pytorch_model_00057-of-00072.bin", + "h.55.mlp.dense_h_to_4h.bias": "pytorch_model_00057-of-00072.bin", + "h.55.mlp.dense_h_to_4h.weight": "pytorch_model_00057-of-00072.bin", + "h.55.post_attention_layernorm.bias": "pytorch_model_00057-of-00072.bin", + "h.55.post_attention_layernorm.weight": "pytorch_model_00057-of-00072.bin", + "h.55.self_attention.dense.bias": "pytorch_model_00057-of-00072.bin", + "h.55.self_attention.dense.weight": "pytorch_model_00057-of-00072.bin", + "h.55.self_attention.query_key_value.bias": "pytorch_model_00057-of-00072.bin", + "h.55.self_attention.query_key_value.weight": "pytorch_model_00057-of-00072.bin", + "h.56.input_layernorm.bias": "pytorch_model_00058-of-00072.bin", + "h.56.input_layernorm.weight": "pytorch_model_00058-of-00072.bin", + "h.56.mlp.dense_4h_to_h.bias": "pytorch_model_00058-of-00072.bin", + "h.56.mlp.dense_4h_to_h.weight": "pytorch_model_00058-of-00072.bin", + "h.56.mlp.dense_h_to_4h.bias": "pytorch_model_00058-of-00072.bin", + "h.56.mlp.dense_h_to_4h.weight": "pytorch_model_00058-of-00072.bin", + "h.56.post_attention_layernorm.bias": "pytorch_model_00058-of-00072.bin", + "h.56.post_attention_layernorm.weight": "pytorch_model_00058-of-00072.bin", + "h.56.self_attention.dense.bias": "pytorch_model_00058-of-00072.bin", + "h.56.self_attention.dense.weight": "pytorch_model_00058-of-00072.bin", + "h.56.self_attention.query_key_value.bias": "pytorch_model_00058-of-00072.bin", + "h.56.self_attention.query_key_value.weight": "pytorch_model_00058-of-00072.bin", + "h.57.input_layernorm.bias": "pytorch_model_00059-of-00072.bin", + "h.57.input_layernorm.weight": "pytorch_model_00059-of-00072.bin", + "h.57.mlp.dense_4h_to_h.bias": "pytorch_model_00059-of-00072.bin", + "h.57.mlp.dense_4h_to_h.weight": "pytorch_model_00059-of-00072.bin", + "h.57.mlp.dense_h_to_4h.bias": "pytorch_model_00059-of-00072.bin", + "h.57.mlp.dense_h_to_4h.weight": "pytorch_model_00059-of-00072.bin", + "h.57.post_attention_layernorm.bias": "pytorch_model_00059-of-00072.bin", + "h.57.post_attention_layernorm.weight": "pytorch_model_00059-of-00072.bin", + "h.57.self_attention.dense.bias": "pytorch_model_00059-of-00072.bin", + "h.57.self_attention.dense.weight": "pytorch_model_00059-of-00072.bin", + "h.57.self_attention.query_key_value.bias": "pytorch_model_00059-of-00072.bin", + "h.57.self_attention.query_key_value.weight": "pytorch_model_00059-of-00072.bin", + "h.58.input_layernorm.bias": "pytorch_model_00060-of-00072.bin", + "h.58.input_layernorm.weight": "pytorch_model_00060-of-00072.bin", + "h.58.mlp.dense_4h_to_h.bias": "pytorch_model_00060-of-00072.bin", + "h.58.mlp.dense_4h_to_h.weight": "pytorch_model_00060-of-00072.bin", + "h.58.mlp.dense_h_to_4h.bias": "pytorch_model_00060-of-00072.bin", + "h.58.mlp.dense_h_to_4h.weight": "pytorch_model_00060-of-00072.bin", + "h.58.post_attention_layernorm.bias": "pytorch_model_00060-of-00072.bin", + "h.58.post_attention_layernorm.weight": "pytorch_model_00060-of-00072.bin", + "h.58.self_attention.dense.bias": "pytorch_model_00060-of-00072.bin", + "h.58.self_attention.dense.weight": "pytorch_model_00060-of-00072.bin", + "h.58.self_attention.query_key_value.bias": "pytorch_model_00060-of-00072.bin", + "h.58.self_attention.query_key_value.weight": "pytorch_model_00060-of-00072.bin", + "h.59.input_layernorm.bias": "pytorch_model_00061-of-00072.bin", + "h.59.input_layernorm.weight": "pytorch_model_00061-of-00072.bin", + "h.59.mlp.dense_4h_to_h.bias": "pytorch_model_00061-of-00072.bin", + "h.59.mlp.dense_4h_to_h.weight": "pytorch_model_00061-of-00072.bin", + "h.59.mlp.dense_h_to_4h.bias": "pytorch_model_00061-of-00072.bin", + "h.59.mlp.dense_h_to_4h.weight": "pytorch_model_00061-of-00072.bin", + "h.59.post_attention_layernorm.bias": "pytorch_model_00061-of-00072.bin", + "h.59.post_attention_layernorm.weight": "pytorch_model_00061-of-00072.bin", + "h.59.self_attention.dense.bias": "pytorch_model_00061-of-00072.bin", + "h.59.self_attention.dense.weight": "pytorch_model_00061-of-00072.bin", + "h.59.self_attention.query_key_value.bias": "pytorch_model_00061-of-00072.bin", + "h.59.self_attention.query_key_value.weight": "pytorch_model_00061-of-00072.bin", + "h.6.input_layernorm.bias": "pytorch_model_00008-of-00072.bin", + "h.6.input_layernorm.weight": "pytorch_model_00008-of-00072.bin", + "h.6.mlp.dense_4h_to_h.bias": "pytorch_model_00008-of-00072.bin", + "h.6.mlp.dense_4h_to_h.weight": "pytorch_model_00008-of-00072.bin", + "h.6.mlp.dense_h_to_4h.bias": "pytorch_model_00008-of-00072.bin", + "h.6.mlp.dense_h_to_4h.weight": "pytorch_model_00008-of-00072.bin", + "h.6.post_attention_layernorm.bias": "pytorch_model_00008-of-00072.bin", + "h.6.post_attention_layernorm.weight": "pytorch_model_00008-of-00072.bin", + "h.6.self_attention.dense.bias": "pytorch_model_00008-of-00072.bin", + "h.6.self_attention.dense.weight": "pytorch_model_00008-of-00072.bin", + "h.6.self_attention.query_key_value.bias": "pytorch_model_00008-of-00072.bin", + "h.6.self_attention.query_key_value.weight": "pytorch_model_00008-of-00072.bin", + "h.60.input_layernorm.bias": "pytorch_model_00062-of-00072.bin", + "h.60.input_layernorm.weight": "pytorch_model_00062-of-00072.bin", + "h.60.mlp.dense_4h_to_h.bias": "pytorch_model_00062-of-00072.bin", + "h.60.mlp.dense_4h_to_h.weight": "pytorch_model_00062-of-00072.bin", + "h.60.mlp.dense_h_to_4h.bias": "pytorch_model_00062-of-00072.bin", + "h.60.mlp.dense_h_to_4h.weight": "pytorch_model_00062-of-00072.bin", + "h.60.post_attention_layernorm.bias": "pytorch_model_00062-of-00072.bin", + "h.60.post_attention_layernorm.weight": "pytorch_model_00062-of-00072.bin", + "h.60.self_attention.dense.bias": "pytorch_model_00062-of-00072.bin", + "h.60.self_attention.dense.weight": "pytorch_model_00062-of-00072.bin", + "h.60.self_attention.query_key_value.bias": "pytorch_model_00062-of-00072.bin", + "h.60.self_attention.query_key_value.weight": "pytorch_model_00062-of-00072.bin", + "h.61.input_layernorm.bias": "pytorch_model_00063-of-00072.bin", + "h.61.input_layernorm.weight": "pytorch_model_00063-of-00072.bin", + "h.61.mlp.dense_4h_to_h.bias": "pytorch_model_00063-of-00072.bin", + "h.61.mlp.dense_4h_to_h.weight": "pytorch_model_00063-of-00072.bin", + "h.61.mlp.dense_h_to_4h.bias": "pytorch_model_00063-of-00072.bin", + "h.61.mlp.dense_h_to_4h.weight": "pytorch_model_00063-of-00072.bin", + "h.61.post_attention_layernorm.bias": "pytorch_model_00063-of-00072.bin", + "h.61.post_attention_layernorm.weight": "pytorch_model_00063-of-00072.bin", + "h.61.self_attention.dense.bias": "pytorch_model_00063-of-00072.bin", + "h.61.self_attention.dense.weight": "pytorch_model_00063-of-00072.bin", + "h.61.self_attention.query_key_value.bias": "pytorch_model_00063-of-00072.bin", + "h.61.self_attention.query_key_value.weight": "pytorch_model_00063-of-00072.bin", + "h.62.input_layernorm.bias": "pytorch_model_00064-of-00072.bin", + "h.62.input_layernorm.weight": "pytorch_model_00064-of-00072.bin", + "h.62.mlp.dense_4h_to_h.bias": "pytorch_model_00064-of-00072.bin", + "h.62.mlp.dense_4h_to_h.weight": "pytorch_model_00064-of-00072.bin", + "h.62.mlp.dense_h_to_4h.bias": "pytorch_model_00064-of-00072.bin", + "h.62.mlp.dense_h_to_4h.weight": "pytorch_model_00064-of-00072.bin", + "h.62.post_attention_layernorm.bias": "pytorch_model_00064-of-00072.bin", + "h.62.post_attention_layernorm.weight": "pytorch_model_00064-of-00072.bin", + "h.62.self_attention.dense.bias": "pytorch_model_00064-of-00072.bin", + "h.62.self_attention.dense.weight": "pytorch_model_00064-of-00072.bin", + "h.62.self_attention.query_key_value.bias": "pytorch_model_00064-of-00072.bin", + "h.62.self_attention.query_key_value.weight": "pytorch_model_00064-of-00072.bin", + "h.63.input_layernorm.bias": "pytorch_model_00065-of-00072.bin", + "h.63.input_layernorm.weight": "pytorch_model_00065-of-00072.bin", + "h.63.mlp.dense_4h_to_h.bias": "pytorch_model_00065-of-00072.bin", + "h.63.mlp.dense_4h_to_h.weight": "pytorch_model_00065-of-00072.bin", + "h.63.mlp.dense_h_to_4h.bias": "pytorch_model_00065-of-00072.bin", + "h.63.mlp.dense_h_to_4h.weight": "pytorch_model_00065-of-00072.bin", + "h.63.post_attention_layernorm.bias": "pytorch_model_00065-of-00072.bin", + "h.63.post_attention_layernorm.weight": "pytorch_model_00065-of-00072.bin", + "h.63.self_attention.dense.bias": "pytorch_model_00065-of-00072.bin", + "h.63.self_attention.dense.weight": "pytorch_model_00065-of-00072.bin", + "h.63.self_attention.query_key_value.bias": "pytorch_model_00065-of-00072.bin", + "h.63.self_attention.query_key_value.weight": "pytorch_model_00065-of-00072.bin", + "h.64.input_layernorm.bias": "pytorch_model_00066-of-00072.bin", + "h.64.input_layernorm.weight": "pytorch_model_00066-of-00072.bin", + "h.64.mlp.dense_4h_to_h.bias": "pytorch_model_00066-of-00072.bin", + "h.64.mlp.dense_4h_to_h.weight": "pytorch_model_00066-of-00072.bin", + "h.64.mlp.dense_h_to_4h.bias": "pytorch_model_00066-of-00072.bin", + "h.64.mlp.dense_h_to_4h.weight": "pytorch_model_00066-of-00072.bin", + "h.64.post_attention_layernorm.bias": "pytorch_model_00066-of-00072.bin", + "h.64.post_attention_layernorm.weight": "pytorch_model_00066-of-00072.bin", + "h.64.self_attention.dense.bias": "pytorch_model_00066-of-00072.bin", + "h.64.self_attention.dense.weight": "pytorch_model_00066-of-00072.bin", + "h.64.self_attention.query_key_value.bias": "pytorch_model_00066-of-00072.bin", + "h.64.self_attention.query_key_value.weight": "pytorch_model_00066-of-00072.bin", + "h.65.input_layernorm.bias": "pytorch_model_00067-of-00072.bin", + "h.65.input_layernorm.weight": "pytorch_model_00067-of-00072.bin", + "h.65.mlp.dense_4h_to_h.bias": "pytorch_model_00067-of-00072.bin", + "h.65.mlp.dense_4h_to_h.weight": "pytorch_model_00067-of-00072.bin", + "h.65.mlp.dense_h_to_4h.bias": "pytorch_model_00067-of-00072.bin", + "h.65.mlp.dense_h_to_4h.weight": "pytorch_model_00067-of-00072.bin", + "h.65.post_attention_layernorm.bias": "pytorch_model_00067-of-00072.bin", + "h.65.post_attention_layernorm.weight": "pytorch_model_00067-of-00072.bin", + "h.65.self_attention.dense.bias": "pytorch_model_00067-of-00072.bin", + "h.65.self_attention.dense.weight": "pytorch_model_00067-of-00072.bin", + "h.65.self_attention.query_key_value.bias": "pytorch_model_00067-of-00072.bin", + "h.65.self_attention.query_key_value.weight": "pytorch_model_00067-of-00072.bin", + "h.66.input_layernorm.bias": "pytorch_model_00068-of-00072.bin", + "h.66.input_layernorm.weight": "pytorch_model_00068-of-00072.bin", + "h.66.mlp.dense_4h_to_h.bias": "pytorch_model_00068-of-00072.bin", + "h.66.mlp.dense_4h_to_h.weight": "pytorch_model_00068-of-00072.bin", + "h.66.mlp.dense_h_to_4h.bias": "pytorch_model_00068-of-00072.bin", + "h.66.mlp.dense_h_to_4h.weight": "pytorch_model_00068-of-00072.bin", + "h.66.post_attention_layernorm.bias": "pytorch_model_00068-of-00072.bin", + "h.66.post_attention_layernorm.weight": "pytorch_model_00068-of-00072.bin", + "h.66.self_attention.dense.bias": "pytorch_model_00068-of-00072.bin", + "h.66.self_attention.dense.weight": "pytorch_model_00068-of-00072.bin", + "h.66.self_attention.query_key_value.bias": "pytorch_model_00068-of-00072.bin", + "h.66.self_attention.query_key_value.weight": "pytorch_model_00068-of-00072.bin", + "h.67.input_layernorm.bias": "pytorch_model_00069-of-00072.bin", + "h.67.input_layernorm.weight": "pytorch_model_00069-of-00072.bin", + "h.67.mlp.dense_4h_to_h.bias": "pytorch_model_00069-of-00072.bin", + "h.67.mlp.dense_4h_to_h.weight": "pytorch_model_00069-of-00072.bin", + "h.67.mlp.dense_h_to_4h.bias": "pytorch_model_00069-of-00072.bin", + "h.67.mlp.dense_h_to_4h.weight": "pytorch_model_00069-of-00072.bin", + "h.67.post_attention_layernorm.bias": "pytorch_model_00069-of-00072.bin", + "h.67.post_attention_layernorm.weight": "pytorch_model_00069-of-00072.bin", + "h.67.self_attention.dense.bias": "pytorch_model_00069-of-00072.bin", + "h.67.self_attention.dense.weight": "pytorch_model_00069-of-00072.bin", + "h.67.self_attention.query_key_value.bias": "pytorch_model_00069-of-00072.bin", + "h.67.self_attention.query_key_value.weight": "pytorch_model_00069-of-00072.bin", + "h.68.input_layernorm.bias": "pytorch_model_00070-of-00072.bin", + "h.68.input_layernorm.weight": "pytorch_model_00070-of-00072.bin", + "h.68.mlp.dense_4h_to_h.bias": "pytorch_model_00070-of-00072.bin", + "h.68.mlp.dense_4h_to_h.weight": "pytorch_model_00070-of-00072.bin", + "h.68.mlp.dense_h_to_4h.bias": "pytorch_model_00070-of-00072.bin", + "h.68.mlp.dense_h_to_4h.weight": "pytorch_model_00070-of-00072.bin", + "h.68.post_attention_layernorm.bias": "pytorch_model_00070-of-00072.bin", + "h.68.post_attention_layernorm.weight": "pytorch_model_00070-of-00072.bin", + "h.68.self_attention.dense.bias": "pytorch_model_00070-of-00072.bin", + "h.68.self_attention.dense.weight": "pytorch_model_00070-of-00072.bin", + "h.68.self_attention.query_key_value.bias": "pytorch_model_00070-of-00072.bin", + "h.68.self_attention.query_key_value.weight": "pytorch_model_00070-of-00072.bin", + "h.69.input_layernorm.bias": "pytorch_model_00071-of-00072.bin", + "h.69.input_layernorm.weight": "pytorch_model_00071-of-00072.bin", + "h.69.mlp.dense_4h_to_h.bias": "pytorch_model_00071-of-00072.bin", + "h.69.mlp.dense_4h_to_h.weight": "pytorch_model_00071-of-00072.bin", + "h.69.mlp.dense_h_to_4h.bias": "pytorch_model_00071-of-00072.bin", + "h.69.mlp.dense_h_to_4h.weight": "pytorch_model_00071-of-00072.bin", + "h.69.post_attention_layernorm.bias": "pytorch_model_00071-of-00072.bin", + "h.69.post_attention_layernorm.weight": "pytorch_model_00071-of-00072.bin", + "h.69.self_attention.dense.bias": "pytorch_model_00071-of-00072.bin", + "h.69.self_attention.dense.weight": "pytorch_model_00071-of-00072.bin", + "h.69.self_attention.query_key_value.bias": "pytorch_model_00071-of-00072.bin", + "h.69.self_attention.query_key_value.weight": "pytorch_model_00071-of-00072.bin", + "h.7.input_layernorm.bias": "pytorch_model_00009-of-00072.bin", + "h.7.input_layernorm.weight": "pytorch_model_00009-of-00072.bin", + "h.7.mlp.dense_4h_to_h.bias": "pytorch_model_00009-of-00072.bin", + "h.7.mlp.dense_4h_to_h.weight": "pytorch_model_00009-of-00072.bin", + "h.7.mlp.dense_h_to_4h.bias": "pytorch_model_00009-of-00072.bin", + "h.7.mlp.dense_h_to_4h.weight": "pytorch_model_00009-of-00072.bin", + "h.7.post_attention_layernorm.bias": "pytorch_model_00009-of-00072.bin", + "h.7.post_attention_layernorm.weight": "pytorch_model_00009-of-00072.bin", + "h.7.self_attention.dense.bias": "pytorch_model_00009-of-00072.bin", + "h.7.self_attention.dense.weight": "pytorch_model_00009-of-00072.bin", + "h.7.self_attention.query_key_value.bias": "pytorch_model_00009-of-00072.bin", + "h.7.self_attention.query_key_value.weight": "pytorch_model_00009-of-00072.bin", + "h.8.input_layernorm.bias": "pytorch_model_00010-of-00072.bin", + "h.8.input_layernorm.weight": "pytorch_model_00010-of-00072.bin", + "h.8.mlp.dense_4h_to_h.bias": "pytorch_model_00010-of-00072.bin", + "h.8.mlp.dense_4h_to_h.weight": "pytorch_model_00010-of-00072.bin", + "h.8.mlp.dense_h_to_4h.bias": "pytorch_model_00010-of-00072.bin", + "h.8.mlp.dense_h_to_4h.weight": "pytorch_model_00010-of-00072.bin", + "h.8.post_attention_layernorm.bias": "pytorch_model_00010-of-00072.bin", + "h.8.post_attention_layernorm.weight": "pytorch_model_00010-of-00072.bin", + "h.8.self_attention.dense.bias": "pytorch_model_00010-of-00072.bin", + "h.8.self_attention.dense.weight": "pytorch_model_00010-of-00072.bin", + "h.8.self_attention.query_key_value.bias": "pytorch_model_00010-of-00072.bin", + "h.8.self_attention.query_key_value.weight": "pytorch_model_00010-of-00072.bin", + "h.9.input_layernorm.bias": "pytorch_model_00011-of-00072.bin", + "h.9.input_layernorm.weight": "pytorch_model_00011-of-00072.bin", + "h.9.mlp.dense_4h_to_h.bias": "pytorch_model_00011-of-00072.bin", + "h.9.mlp.dense_4h_to_h.weight": "pytorch_model_00011-of-00072.bin", + "h.9.mlp.dense_h_to_4h.bias": "pytorch_model_00011-of-00072.bin", + "h.9.mlp.dense_h_to_4h.weight": "pytorch_model_00011-of-00072.bin", + "h.9.post_attention_layernorm.bias": "pytorch_model_00011-of-00072.bin", + "h.9.post_attention_layernorm.weight": "pytorch_model_00011-of-00072.bin", + "h.9.self_attention.dense.bias": "pytorch_model_00011-of-00072.bin", + "h.9.self_attention.dense.weight": "pytorch_model_00011-of-00072.bin", + "h.9.self_attention.query_key_value.bias": "pytorch_model_00011-of-00072.bin", + "h.9.self_attention.query_key_value.weight": "pytorch_model_00011-of-00072.bin", + "ln_f.bias": "pytorch_model_00072-of-00072.bin", + "ln_f.weight": "pytorch_model_00072-of-00072.bin", + "word_embeddings.weight": "pytorch_model_00001-of-00072.bin", + "word_embeddings_layernorm.bias": "pytorch_model_00001-of-00072.bin", + "word_embeddings_layernorm.weight": "pytorch_model_00001-of-00072.bin" + } +} diff --git a/pytorch_model_00001-of-00072.bin b/pytorch_model_00001-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..ecd963607dcab9f787e6ae07fbc9ff75ee9639e9 --- /dev/null +++ b/pytorch_model_00001-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65db970a7cbf356cfb1b5f2e9aa52dd049b5aae8af93fbb21b5b8bcaf6fb8a11 +size 7193290147 diff --git a/pytorch_model_00002-of-00072.bin b/pytorch_model_00002-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..5be9137509143f8c55587293fd9c1a47edd6ca05 --- /dev/null +++ b/pytorch_model_00002-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f0ce45fed83e1ec25d55c029c98bcf4e80d60061f795127a9bd0921b996be4 +size 4932877601 diff --git a/pytorch_model_00003-of-00072.bin b/pytorch_model_00003-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..1087e4f15999f90574989e5a8924a457ab21a46d --- /dev/null +++ b/pytorch_model_00003-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fd924c1e57cf4471ee13947adba8d5a3b79c6e36816a3d17c063bc4c31540f1 +size 4932877601 diff --git a/pytorch_model_00004-of-00072.bin b/pytorch_model_00004-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..6e6217b3490cc51d017fd17b2be7e5b769fc9d3e --- /dev/null +++ b/pytorch_model_00004-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3369ffe896a27acfa30048b700050413ec52dcd873f523be85f75e6970e4cf7c +size 4932877601 diff --git a/pytorch_model_00005-of-00072.bin b/pytorch_model_00005-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..e85ab18f6e52527d814d1fe04e32bb4f0ae49e40 --- /dev/null +++ b/pytorch_model_00005-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a64b51c9c1bae3fc0cb701f0461103979cfe3afe1fffa287556587fc64855e76 +size 4932877601 diff --git a/pytorch_model_00006-of-00072.bin b/pytorch_model_00006-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..84ebb422ebfe5fcd187fb7c0c69b6eec7bc4198d --- /dev/null +++ b/pytorch_model_00006-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6df976238fa2ae2f2c2f967d24b904984eef5d742a6e3cc512ec4f7b1d48d3f7 +size 4932877601 diff --git a/pytorch_model_00007-of-00072.bin b/pytorch_model_00007-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..c59e4b7de47d01f84eef29cf9cec826744f4ecf7 --- /dev/null +++ b/pytorch_model_00007-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dc04d1489906bfaccd07d2c5660154a70f8dde7900f4fd65880544a4656df83 +size 4932877601 diff --git a/pytorch_model_00008-of-00072.bin b/pytorch_model_00008-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..2a62edff36d3c153292359543ffe6d7a84e183a5 --- /dev/null +++ b/pytorch_model_00008-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dcf4b4e6ca89eab1afe1a41440d3299845a6482bc7fcbbafd98dc4e60d7ecbe +size 4932877601 diff --git a/pytorch_model_00009-of-00072.bin b/pytorch_model_00009-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..387cb5da600208b77b52335e69aa6ada15895441 --- /dev/null +++ b/pytorch_model_00009-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7a1fb878ef7d6195e0bcff9254bdb24356d352548a2988efb36804727c891ba +size 4932877601 diff --git a/pytorch_model_00010-of-00072.bin b/pytorch_model_00010-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..019fc67ddc9a88fe5ccde0738475416fa767fdba --- /dev/null +++ b/pytorch_model_00010-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:934fed4f1c6bae08b341889c53375ba59aca432c5d1dd013e1706a045f8336ba +size 4932877601 diff --git a/pytorch_model_00011-of-00072.bin b/pytorch_model_00011-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..137ef213549331a85576478c94eb92f568bcf9f1 --- /dev/null +++ b/pytorch_model_00011-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00de7ac3a3b83fe7401eab45b49fa71fc884850f69ba4c6d8372f4e362ed9da9 +size 4932877601 diff --git a/pytorch_model_00012-of-00072.bin b/pytorch_model_00012-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..3bfe823d3cce0b51b1b8ed0cc6643291b0561728 --- /dev/null +++ b/pytorch_model_00012-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4699cd92e5cab0bf1e45f7660254896e39e2812d5648e8d9335cb48e2fc3b5e0 +size 4932877665 diff --git a/pytorch_model_00013-of-00072.bin b/pytorch_model_00013-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..9f1fcc477118e86ff620df04c5b82856d84e42d7 --- /dev/null +++ b/pytorch_model_00013-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef696b624b5a43aeb953120acb3aec873582f867e94aacd4c8c5ae4d120f5464 +size 4932877665 diff --git a/pytorch_model_00014-of-00072.bin b/pytorch_model_00014-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..4be6b173f08c9f384f08854b43e10dc0f42cb618 --- /dev/null +++ b/pytorch_model_00014-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d55aae2ed5666382aea4dc3c9ea875de8b4a482d7ac11d8dd8dd9a593d6e663 +size 4932877665 diff --git a/pytorch_model_00015-of-00072.bin b/pytorch_model_00015-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..c132cde3ad77462c773d3c40faa61952f325438c --- /dev/null +++ b/pytorch_model_00015-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50378f45b27905aead05109d7f5f99dcb33b52571d63ab66cc04fe7f41140d1a +size 4932877665 diff --git a/pytorch_model_00016-of-00072.bin b/pytorch_model_00016-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..d21c5e20f33822e0b706fd0009d35c2fe1c7734a --- /dev/null +++ b/pytorch_model_00016-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4a99a434d265da6753b9e0a330fe98dea879fbb45c80f41a2d95068ebb83bba +size 4932877665 diff --git a/pytorch_model_00017-of-00072.bin b/pytorch_model_00017-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..60fe9950d0248bdccf3b28d1d074f9610dabb49a --- /dev/null +++ b/pytorch_model_00017-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e4b40b082de11687c3579a98429ddd4822934c9273e7c6014d75a59477d9d9e +size 4932877665 diff --git a/pytorch_model_00018-of-00072.bin b/pytorch_model_00018-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..b8f64d9f956a177990ea038a6d2d69fc6379eb28 --- /dev/null +++ b/pytorch_model_00018-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b93d9739fff12fbe8c243e7594a1d4a83a640c7ab6cf8d409d6819e130e314c3 +size 4932877665 diff --git a/pytorch_model_00019-of-00072.bin b/pytorch_model_00019-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..d859e955d350a4c1840297c2dc3111c1a62162d0 --- /dev/null +++ b/pytorch_model_00019-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fc6e5fe0a9ce342b4e6ae8f1807a7f3b26f268a585900321032d7ff4b001022 +size 4932877665 diff --git a/pytorch_model_00020-of-00072.bin b/pytorch_model_00020-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..d73aa5b9ed99da0a5ac8d7942b4a5345d1478884 --- /dev/null +++ b/pytorch_model_00020-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ae53093d4a6fab3092db2fa968a076f2aa8d5898760b1d248b42bef77455bef +size 4932877665 diff --git a/pytorch_model_00021-of-00072.bin b/pytorch_model_00021-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..f1a41008a8cd8ddc3430c4c8227051e8e9e29a46 --- /dev/null +++ b/pytorch_model_00021-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be8e0f20437e58c5a4d8a866563f6ae5778545be0413fad6c327c4e3c02370af +size 4932877665 diff --git a/pytorch_model_00022-of-00072.bin b/pytorch_model_00022-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..5f80ff0261be637f4e2a489d934279ccb0ffc8bb --- /dev/null +++ b/pytorch_model_00022-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c21d9d721b37d329cbb337e6551db810adc264bcd095217bd8b91ceb427f02f2 +size 4932877665 diff --git a/pytorch_model_00023-of-00072.bin b/pytorch_model_00023-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..06094809075daaa245d78677e42e452977c3ce79 --- /dev/null +++ b/pytorch_model_00023-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a147cb67c9d437a6c7e34a0ef94f16153bbbd276b4038afae0644e950b9f256 +size 4932877665 diff --git a/pytorch_model_00024-of-00072.bin b/pytorch_model_00024-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..2197e33b3f0e3ec7e8c1078f9e188a40ab50e28f --- /dev/null +++ b/pytorch_model_00024-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41c47810cf25c46dab8a84bd058d8b62aa1c4ff7e8cd76d1078870a997a34291 +size 4932877665 diff --git a/pytorch_model_00025-of-00072.bin b/pytorch_model_00025-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..e83c6c195b51237520a694b8d23702160e97b777 --- /dev/null +++ b/pytorch_model_00025-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44478168a6bfe9bf5ec911c71e085d02d7c438497db9768ab6deb22f8aaeddd9 +size 4932877665 diff --git a/pytorch_model_00026-of-00072.bin b/pytorch_model_00026-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..5a78a4765df86c3d02ba907c754ac3fb57fe1170 --- /dev/null +++ b/pytorch_model_00026-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1600f2e296599bfcd65cfe95f5abd3174c9c39f183f5b113518ac7215e0fd0ad +size 4932877665 diff --git a/pytorch_model_00027-of-00072.bin b/pytorch_model_00027-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..b8a4f417b0465267cc6af5d5f608969bc348e9c8 --- /dev/null +++ b/pytorch_model_00027-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc2af6dc349eecc7cf1c3b241c8f907f2bfe5135f3f5717677b1c64d161ac36d +size 4932877665 diff --git a/pytorch_model_00028-of-00072.bin b/pytorch_model_00028-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..7701fc57b0e2ca0e0fc8851d9700890bff2238d2 --- /dev/null +++ b/pytorch_model_00028-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b800e1337334ab94dcb6ba8649a4394a63be359a8a2d65f85b9c4e65104c3cc +size 4932877665 diff --git a/pytorch_model_00029-of-00072.bin b/pytorch_model_00029-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..038e7a4d4a0bf38f51949f4c7d336ff976f0ba7c --- /dev/null +++ b/pytorch_model_00029-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:811dfec075f98f099055ffe062377f78cd9c2089630fa54fe257b22b55cf5762 +size 4932877665 diff --git a/pytorch_model_00030-of-00072.bin b/pytorch_model_00030-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..1b528f943f91c51fcc0d407ab85b33a440342fc1 --- /dev/null +++ b/pytorch_model_00030-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99a69cd1ff317de9e306f0c71af8b9ac517f3d604b5b3ce29337398298e38bb7 +size 4932877665 diff --git a/pytorch_model_00031-of-00072.bin b/pytorch_model_00031-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..7643349b5a418a568600c3d46a44515b7565425a --- /dev/null +++ b/pytorch_model_00031-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79561355347dd74961c26d307eae0eb23649e190e83ff92c929ba4181d03bd4e +size 4932877665 diff --git a/pytorch_model_00032-of-00072.bin b/pytorch_model_00032-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..b3783fde9b575597698dd764b2e48eb474d07fc1 --- /dev/null +++ b/pytorch_model_00032-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5766b6e0ebd0151046b4a2f4a60d67b487cd1caf6bb57e36aa3d5f5ab3cdad1a +size 4932877665 diff --git a/pytorch_model_00033-of-00072.bin b/pytorch_model_00033-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..78f177dd2a58a2869a78268acc04fa79d1966d9d --- /dev/null +++ b/pytorch_model_00033-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c46716794858776c7ba2a5a7794dd378105598a969412360f0579e80983a0a90 +size 4932877665 diff --git a/pytorch_model_00034-of-00072.bin b/pytorch_model_00034-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..23df4c51d5fd02ad0906ae33d00ed4f241e56e2d --- /dev/null +++ b/pytorch_model_00034-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bac35f5bd58dae341c7da44c6df74572a902f0dbe3cbdfe92e1c775ad41335c5 +size 4932877665 diff --git a/pytorch_model_00035-of-00072.bin b/pytorch_model_00035-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..de1e88db41372f180fb036b990255818df1daf38 --- /dev/null +++ b/pytorch_model_00035-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61598ced238787c61c87e09781c2a246dc00a6b5344e170570035af885283473 +size 4932877665 diff --git a/pytorch_model_00036-of-00072.bin b/pytorch_model_00036-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..9281d620d0b7eca2c20da924ba3dfe3afaf1c500 --- /dev/null +++ b/pytorch_model_00036-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cb95a292cf711d8dc555f1496797eb3cd99cea06b8f9a7f445b822e0ffdc0aa +size 4932877665 diff --git a/pytorch_model_00037-of-00072.bin b/pytorch_model_00037-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..dae33db70cb6adeb5379f00652b9a3cad80a52a7 --- /dev/null +++ b/pytorch_model_00037-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78dd209f2220a1dcf72b2158c86ec62c9aec6effb8cffff2cde2aaeb06b02173 +size 4932877665 diff --git a/pytorch_model_00038-of-00072.bin b/pytorch_model_00038-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..43526daf15ce30cea8a2aaeb38c6775b2d593b9a --- /dev/null +++ b/pytorch_model_00038-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:661fcd4bc01ad8a472f456f73452732eeb0abfcd3038f59af339fb35d49b4cfc +size 4932877665 diff --git a/pytorch_model_00039-of-00072.bin b/pytorch_model_00039-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..a0a08ffe1e34f71f7954763ef4068b4b9be81c05 --- /dev/null +++ b/pytorch_model_00039-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39095aeccaf81bffdbcc11340703985b14e6725a51c7f2da277d158ed8a042bf +size 4932877665 diff --git a/pytorch_model_00040-of-00072.bin b/pytorch_model_00040-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..9babe6c013fbb772bcc1bc5c87664bb0f3923f72 --- /dev/null +++ b/pytorch_model_00040-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ede312e6624b886c462757cb55c2d0f4289ae6b923679008a265cc97cebd130 +size 4932877665 diff --git a/pytorch_model_00041-of-00072.bin b/pytorch_model_00041-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..6bdbfe95bba5d02c4df80f86bafcf4db4b74012d --- /dev/null +++ b/pytorch_model_00041-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3077475fe3a99618151a31eafc7e9f655a78682d3c3382b5faa0cc55b19d49a +size 4932877665 diff --git a/pytorch_model_00042-of-00072.bin b/pytorch_model_00042-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..acbc5db8c0f9e7657ec7f18396c356bf7c8bd2c7 --- /dev/null +++ b/pytorch_model_00042-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad9b3ff95974db4ae51cc9880751c2576e1ef1a01a48b48dcdcd7d618b66c1c8 +size 4932877665 diff --git a/pytorch_model_00043-of-00072.bin b/pytorch_model_00043-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..bdc0a7815dbea330df707e309e7e489c718d4bd7 --- /dev/null +++ b/pytorch_model_00043-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ed41fac6da73f98e55c1cca814a961da95cc32fd97311ca95c1963a1e8658f7 +size 4932877665 diff --git a/pytorch_model_00044-of-00072.bin b/pytorch_model_00044-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..fde236368b19cb80dfddccf13c3b162a19259409 --- /dev/null +++ b/pytorch_model_00044-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83ccaca307715c40e136a16e4018542e8f34a9939976018a76efa00254dcf698 +size 4932877665 diff --git a/pytorch_model_00045-of-00072.bin b/pytorch_model_00045-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..f58ccc6a1c2bea9a993232552ae42d8913e2b070 --- /dev/null +++ b/pytorch_model_00045-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93a852af7a92bcbf7671d5b0cdfb2e1183d087e6b4bc911d5a7c5e7982fbd5b3 +size 4932877665 diff --git a/pytorch_model_00046-of-00072.bin b/pytorch_model_00046-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..1557728c187735e2109b11e5001cd50a5fe06ae7 --- /dev/null +++ b/pytorch_model_00046-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e04df8e4f1c3b70f98bad37ab42b6a01e14b9f3c2d185e2124e2da9aa259cdf +size 4932877665 diff --git a/pytorch_model_00047-of-00072.bin b/pytorch_model_00047-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..a3fc2caee5848df435945557e044746acd66bd21 --- /dev/null +++ b/pytorch_model_00047-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7345e00239e580e4dbdce3efbb2d980bab81778f149074a48994032a48dd12a +size 4932877665 diff --git a/pytorch_model_00048-of-00072.bin b/pytorch_model_00048-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..9c3ae0a067d96a079c835fa22fa22b1d2bd05306 --- /dev/null +++ b/pytorch_model_00048-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96a1a10ec0f2490ea0098d9335c4dd35ccde41829e229c07877d7d66fd255f77 +size 4932877665 diff --git a/pytorch_model_00049-of-00072.bin b/pytorch_model_00049-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..40cb1252eca479272a86ba722c34e10c2eda7b09 --- /dev/null +++ b/pytorch_model_00049-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc9dd396dec4f18f9f9f35658cc650a8af51fa24436434e24e7fe8c572d850ff +size 4932877665 diff --git a/pytorch_model_00050-of-00072.bin b/pytorch_model_00050-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..e024152eb90f8817ae365a4b6538024a54d6a1c9 --- /dev/null +++ b/pytorch_model_00050-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:673a48b108ae4fe6adc4f0dc325bf4802a7e76fafe2dfcfef81e981e2216a71e +size 4932877665 diff --git a/pytorch_model_00051-of-00072.bin b/pytorch_model_00051-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..0aea561edd42eb4c4c67caf8adef8542d5c09804 --- /dev/null +++ b/pytorch_model_00051-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eb56c256a955a215d8f23aa469275d9bc4c330b1ae9d5b6f7b96187b69d37be +size 4932877665 diff --git a/pytorch_model_00052-of-00072.bin b/pytorch_model_00052-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..f524ef0cd5d3a6a57e0317643f49460527f93f40 --- /dev/null +++ b/pytorch_model_00052-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37fdd8c8dba14cb4aa94b83321ffbab3843404bf054afa5c42e72642b37f3325 +size 4932877665 diff --git a/pytorch_model_00053-of-00072.bin b/pytorch_model_00053-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..5db16f2e50c4b720b2a09fc27d20322deb9c26f7 --- /dev/null +++ b/pytorch_model_00053-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dc5ba6b2bda3387f0e1bcbee67eadd9983320e8677b9d20c88967d7f36254ae +size 4932877665 diff --git a/pytorch_model_00054-of-00072.bin b/pytorch_model_00054-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..29c4f1d966c1847ac6d6e946a82818d7d5e0893e --- /dev/null +++ b/pytorch_model_00054-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a446f09763d087e54964642a4212e0841584701a9fad709d563624fce369e4a9 +size 4932877665 diff --git a/pytorch_model_00055-of-00072.bin b/pytorch_model_00055-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..d9a17b00468cf9645e82e60f0fac3e85f1a6c944 --- /dev/null +++ b/pytorch_model_00055-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f4358881dfd444620c4211feb24b18e7b2892fefabd28e8f69aaedfa6bdaa56 +size 4932877665 diff --git a/pytorch_model_00056-of-00072.bin b/pytorch_model_00056-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..5c8e607e96a04e5552cd71fc4653ead78d25451b --- /dev/null +++ b/pytorch_model_00056-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fff8819a56e3be153cc1cbe45ba48548b4ab40b0adced9e93efd4bd40e527f7a +size 4932877665 diff --git a/pytorch_model_00057-of-00072.bin b/pytorch_model_00057-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..19161f053d45767d61bccf25f4d780c7d5e9d00b --- /dev/null +++ b/pytorch_model_00057-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f9a6e15735ca348fed8217770f09317eaec67de2449f424b957f322527da4d8 +size 4932877665 diff --git a/pytorch_model_00058-of-00072.bin b/pytorch_model_00058-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..f9f2d2ecc5082bdfa3757e69e0f58d60f8a996df --- /dev/null +++ b/pytorch_model_00058-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:335a592871934558a3985f1278007378e9d620478f92689447c89c443789b47f +size 4932877665 diff --git a/pytorch_model_00059-of-00072.bin b/pytorch_model_00059-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..006234583bb09bfe6b73aabb2415e573ecb08f75 --- /dev/null +++ b/pytorch_model_00059-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee1e976ed27f92f6c69bcd8421bce4f4afb2ac1dfe2840b42b4283dd21e1d869 +size 4932877665 diff --git a/pytorch_model_00060-of-00072.bin b/pytorch_model_00060-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..f58e8b4b5d06678646360294e654ba778fa42f03 --- /dev/null +++ b/pytorch_model_00060-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26dc4a3eb31d19828a3ff7e4f3c7e4b65b5ed041629b96118ae9c351e92c9e51 +size 4932877665 diff --git a/pytorch_model_00061-of-00072.bin b/pytorch_model_00061-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..723b8278d62fea80463a6322caf0b7c9be74332b --- /dev/null +++ b/pytorch_model_00061-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d90c7c77a866a2bea06a42dc04ce2b646fda876d55e0602838655885d4e2ba2 +size 4932877665 diff --git a/pytorch_model_00062-of-00072.bin b/pytorch_model_00062-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..657244e473051f1b3f7321178240b2b98de166a2 --- /dev/null +++ b/pytorch_model_00062-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d286c2b609a8cb831ea87a59fd05ad22074cb2c919f616e8f46c0b02cf637a25 +size 4932877665 diff --git a/pytorch_model_00063-of-00072.bin b/pytorch_model_00063-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..4af17bd467154b1da873afe78d3350efb9b93bcf --- /dev/null +++ b/pytorch_model_00063-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d98a26df2951206a3b4615233d197f8a2908420928cf654800ea040eafe7ca1c +size 4932877665 diff --git a/pytorch_model_00064-of-00072.bin b/pytorch_model_00064-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8db680c5db5d0b948a85cec1e52e965395a4cfd --- /dev/null +++ b/pytorch_model_00064-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76438d2b847b03f746cf0171588b0fa91874de1582b81b9c0ab678b9dd777a7e +size 4932877665 diff --git a/pytorch_model_00065-of-00072.bin b/pytorch_model_00065-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..e9a7a0f90c22f04d1810bbe4672c043dd47a877a --- /dev/null +++ b/pytorch_model_00065-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0daceafac988ad9419669376ef4b6a94ac6d44e0cd35fe7a2a4c91decb4ba412 +size 4932877665 diff --git a/pytorch_model_00066-of-00072.bin b/pytorch_model_00066-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..7ff56320c9a992eb77554d05ed54b2a469d94813 --- /dev/null +++ b/pytorch_model_00066-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f03460b66795ef7a178444b9ebaf653d4db993034a29435d3c4eedac89b6c6f9 +size 4932877665 diff --git a/pytorch_model_00067-of-00072.bin b/pytorch_model_00067-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..ecfc14f44105d2d523bfa6b5d4588ec9c2e867c2 --- /dev/null +++ b/pytorch_model_00067-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec3e836923ebe41162eabaac7f1955bd3ad3eaae853ee89591f971afe9912a9c +size 4932877665 diff --git a/pytorch_model_00068-of-00072.bin b/pytorch_model_00068-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca4a5641bd8745e056e25538ffa1869695181780 --- /dev/null +++ b/pytorch_model_00068-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e9fccb4f85598e19343f1d12d913a9c0b32e5572d61f793c8356144ead38654 +size 4932877665 diff --git a/pytorch_model_00069-of-00072.bin b/pytorch_model_00069-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..e31ed98ed992deb6b304e7ca4bcb37b99d1bdd80 --- /dev/null +++ b/pytorch_model_00069-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bbde52ea1b98a99e8684def8b40be7ce5a2e5c940a14f2a25547d4b217fe689 +size 4932877665 diff --git a/pytorch_model_00070-of-00072.bin b/pytorch_model_00070-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..b7ea310f8384ac30044d96eae75f6b13e0554c93 --- /dev/null +++ b/pytorch_model_00070-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93a4b0304fb2e75a5cd041e7fcc5d265e923d13adaea6aaf1559240b44c6db84 +size 4932877665 diff --git a/pytorch_model_00071-of-00072.bin b/pytorch_model_00071-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..3bd48b29a4d910a57a651a584dfa914c956c247a --- /dev/null +++ b/pytorch_model_00071-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e13dca10345acd4b460bd5c9710b1838a5c642e093b2d1fc8119a860c49496f +size 4932877665 diff --git a/pytorch_model_00072-of-00072.bin b/pytorch_model_00072-of-00072.bin new file mode 100644 index 0000000000000000000000000000000000000000..d7ec2c89a4666a961a0ad378c458e77e01a4ace2 --- /dev/null +++ b/pytorch_model_00072-of-00072.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fe030ef15c5368fc211aef57867cf14706e021b316fb7928e41505ec7301b0d +size 58279 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..25bc39604f72700b3b8e10bd69bb2f227157edd1 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1 @@ +{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..370bd68e20b4b6574ee05b213a74b244e3f492f3 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fa39cd4b1500feb205bcce3b9703a4373414cafe4970e0657b413f7ddd2a9d3 +size 14500438 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..57576ae0ccd46e2a851bcfb912454077fb374c7e --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1 @@ +{"unk_token": "", "eos_token": "", "bos_token": "", "pad_token": "", "name_or_path": "bigscience/tokenizer", "special_tokens_map_file": null, "tokenizer_class":"BloomTokenizerFast", "padding_side":"left"}