Rm eval

Browse files

Files changed (15) hide show

evaluation/Muennighoff_xstory_cloze/ru/Answer_Given_options/results.json +0 -9
evaluation/Muennighoff_xstory_cloze/ru/Choose_Story_Ending/results.json +0 -9
evaluation/Muennighoff_xstory_cloze/ru/Generate_Ending/results.json +0 -9
evaluation/Muennighoff_xstory_cloze/ru/Novel_Correct_Ending/results.json +0 -9
evaluation/Muennighoff_xstory_cloze/ru/Story_Continuation_and_Options/results.json +0 -9
evaluation/Muennighoff_xwinograd/ru/Replace/results.json +0 -9
evaluation/Muennighoff_xwinograd/ru/True_or_False/results.json +0 -9
evaluation/Muennighoff_xwinograd/ru/does_underscore_refer_to/results.json +0 -9
evaluation/Muennighoff_xwinograd/ru/stand_for/results.json +0 -9
evaluation/Muennighoff_xwinograd/ru/underscore_refer_to/results.json +0 -9
evaluation/xnli/ru/GPT-3_style/results.json +0 -9
evaluation/xnli/ru/MNLI_crowdsource/results.json +0 -9
evaluation/xnli/ru/can_we_infer/results.json +0 -9
evaluation/xnli/ru/guaranteed_possible_impossible/results.json +0 -9
evaluation/xnli/ru/justified_in_saying/results.json +0 -9

evaluation/Muennighoff_xstory_cloze/ru/Answer_Given_options/results.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "dataset_name": "Muennighoff/xstory_cloze",
-  "dataset_config_name": "ru",
-  "template_name": "Answer Given options",
-  "evaluation": {
-    "accuracy": 0.6340172071475844
-  },
-  "arguments": "Namespace(dataset_name='Muennighoff/xstory_cloze', dataset_config_name='ru', template_config_name='en', template_name='Answer Given options', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
-}

evaluation/Muennighoff_xstory_cloze/ru/Choose_Story_Ending/results.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "dataset_name": "Muennighoff/xstory_cloze",
-  "dataset_config_name": "ru",
-  "template_name": "Choose Story Ending",
-  "evaluation": {
-    "accuracy": 0.7908669755129054
-  },
-  "arguments": "Namespace(dataset_name='Muennighoff/xstory_cloze', dataset_config_name='ru', template_config_name='en', template_name='Choose Story Ending', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
-}

evaluation/Muennighoff_xstory_cloze/ru/Generate_Ending/results.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "dataset_name": "Muennighoff/xstory_cloze",
-  "dataset_config_name": "ru",
-  "template_name": "Generate Ending",
-  "evaluation": {
-    "accuracy": 0.5532759761747187
-  },
-  "arguments": "Namespace(dataset_name='Muennighoff/xstory_cloze', dataset_config_name='ru', template_config_name='en', template_name='Generate Ending', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
-}

evaluation/Muennighoff_xstory_cloze/ru/Novel_Correct_Ending/results.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "dataset_name": "Muennighoff/xstory_cloze",
-  "dataset_config_name": "ru",
-  "template_name": "Novel Correct Ending",
-  "evaluation": {
-    "accuracy": 0.7379219060225016
-  },
-  "arguments": "Namespace(dataset_name='Muennighoff/xstory_cloze', dataset_config_name='ru', template_config_name='en', template_name='Novel Correct Ending', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
-}

evaluation/Muennighoff_xstory_cloze/ru/Story_Continuation_and_Options/results.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "dataset_name": "Muennighoff/xstory_cloze",
-  "dataset_config_name": "ru",
-  "template_name": "Story Continuation and Options",
-  "evaluation": {
-    "accuracy": 0.7531436135009927
-  },
-  "arguments": "Namespace(dataset_name='Muennighoff/xstory_cloze', dataset_config_name='ru', template_config_name='en', template_name='Story Continuation and Options', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
-}

evaluation/Muennighoff_xwinograd/ru/Replace/results.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "dataset_name": "Muennighoff/xwinograd",
-  "dataset_config_name": "ru",
-  "template_name": "Replace",
-  "evaluation": {
-    "accuracy": 0.5396825396825397
-  },
-  "arguments": "Namespace(dataset_name='Muennighoff/xwinograd', dataset_config_name='ru', template_config_name='en', template_name='Replace', split='test', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
-}

evaluation/Muennighoff_xwinograd/ru/True_or_False/results.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "dataset_name": "Muennighoff/xwinograd",
-  "dataset_config_name": "ru",
-  "template_name": "True or False",
-  "evaluation": {
-    "accuracy": 0.46984126984126984
-  },
-  "arguments": "Namespace(dataset_name='Muennighoff/xwinograd', dataset_config_name='ru', template_config_name='en', template_name='True or False', split='test', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
-}

evaluation/Muennighoff_xwinograd/ru/does_underscore_refer_to/results.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "dataset_name": "Muennighoff/xwinograd",
-  "dataset_config_name": "ru",
-  "template_name": "does underscore refer to",
-  "evaluation": {
-    "accuracy": 0.5079365079365079
-  },
-  "arguments": "Namespace(dataset_name='Muennighoff/xwinograd', dataset_config_name='ru', template_config_name='en', template_name='does underscore refer to', split='test', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
-}

evaluation/Muennighoff_xwinograd/ru/stand_for/results.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "dataset_name": "Muennighoff/xwinograd",
-  "dataset_config_name": "ru",
-  "template_name": "stand for",
-  "evaluation": {
-    "accuracy": 0.4888888888888889
-  },
-  "arguments": "Namespace(dataset_name='Muennighoff/xwinograd', dataset_config_name='ru', template_config_name='en', template_name='stand for', split='test', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
-}

evaluation/Muennighoff_xwinograd/ru/underscore_refer_to/results.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "dataset_name": "Muennighoff/xwinograd",
-  "dataset_config_name": "ru",
-  "template_name": "underscore refer to",
-  "evaluation": {
-    "accuracy": 0.5079365079365079
-  },
-  "arguments": "Namespace(dataset_name='Muennighoff/xwinograd', dataset_config_name='ru', template_config_name='en', template_name='underscore refer to', split='test', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
-}

evaluation/xnli/ru/GPT-3_style/results.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "dataset_name": "xnli",
-  "dataset_config_name": "ru",
-  "template_name": "GPT-3 style",
-  "evaluation": {
-    "accuracy": 0.5
-  },
-  "arguments": "Namespace(dataset_name='xnli', dataset_config_name='ru', template_config_name='en', template_name='GPT-3 style', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
-}

evaluation/xnli/ru/MNLI_crowdsource/results.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "dataset_name": "xnli",
-  "dataset_config_name": "ru",
-  "template_name": "MNLI crowdsource",
-  "evaluation": {
-    "accuracy": 0.38473895582329315
-  },
-  "arguments": "Namespace(dataset_name='xnli', dataset_config_name='ru', template_config_name='en', template_name='MNLI crowdsource', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
-}

evaluation/xnli/ru/can_we_infer/results.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "dataset_name": "xnli",
-  "dataset_config_name": "ru",
-  "template_name": "can we infer",
-  "evaluation": {
-    "accuracy": 0.4971887550200803
-  },
-  "arguments": "Namespace(dataset_name='xnli', dataset_config_name='ru', template_config_name='en', template_name='can we infer', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
-}

evaluation/xnli/ru/guaranteed_possible_impossible/results.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "dataset_name": "xnli",
-  "dataset_config_name": "ru",
-  "template_name": "guaranteed/possible/impossible",
-  "evaluation": {
-    "accuracy": 0.36626506024096384
-  },
-  "arguments": "Namespace(dataset_name='xnli', dataset_config_name='ru', template_config_name='en', template_name='guaranteed/possible/impossible', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
-}

evaluation/xnli/ru/justified_in_saying/results.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "dataset_name": "xnli",
-  "dataset_config_name": "ru",
-  "template_name": "justified in saying",
-  "evaluation": {
-    "accuracy": 0.46265060240963857
-  },
-  "arguments": "Namespace(dataset_name='xnli', dataset_config_name='ru', template_config_name='en', template_name='justified in saying', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
-}