Muennighoff commited on
Commit
5131329
1 Parent(s): 5b9bfb7
evaluation/Muennighoff_xstory_cloze/ru/Answer_Given_options/results.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "dataset_name": "Muennighoff/xstory_cloze",
3
- "dataset_config_name": "ru",
4
- "template_name": "Answer Given options",
5
- "evaluation": {
6
- "accuracy": 0.6340172071475844
7
- },
8
- "arguments": "Namespace(dataset_name='Muennighoff/xstory_cloze', dataset_config_name='ru', template_config_name='en', template_name='Answer Given options', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
9
- }
 
 
 
 
 
 
 
 
 
 
evaluation/Muennighoff_xstory_cloze/ru/Choose_Story_Ending/results.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "dataset_name": "Muennighoff/xstory_cloze",
3
- "dataset_config_name": "ru",
4
- "template_name": "Choose Story Ending",
5
- "evaluation": {
6
- "accuracy": 0.7908669755129054
7
- },
8
- "arguments": "Namespace(dataset_name='Muennighoff/xstory_cloze', dataset_config_name='ru', template_config_name='en', template_name='Choose Story Ending', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
9
- }
 
 
 
 
 
 
 
 
 
 
evaluation/Muennighoff_xstory_cloze/ru/Generate_Ending/results.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "dataset_name": "Muennighoff/xstory_cloze",
3
- "dataset_config_name": "ru",
4
- "template_name": "Generate Ending",
5
- "evaluation": {
6
- "accuracy": 0.5532759761747187
7
- },
8
- "arguments": "Namespace(dataset_name='Muennighoff/xstory_cloze', dataset_config_name='ru', template_config_name='en', template_name='Generate Ending', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
9
- }
 
 
 
 
 
 
 
 
 
 
evaluation/Muennighoff_xstory_cloze/ru/Novel_Correct_Ending/results.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "dataset_name": "Muennighoff/xstory_cloze",
3
- "dataset_config_name": "ru",
4
- "template_name": "Novel Correct Ending",
5
- "evaluation": {
6
- "accuracy": 0.7379219060225016
7
- },
8
- "arguments": "Namespace(dataset_name='Muennighoff/xstory_cloze', dataset_config_name='ru', template_config_name='en', template_name='Novel Correct Ending', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
9
- }
 
 
 
 
 
 
 
 
 
 
evaluation/Muennighoff_xstory_cloze/ru/Story_Continuation_and_Options/results.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "dataset_name": "Muennighoff/xstory_cloze",
3
- "dataset_config_name": "ru",
4
- "template_name": "Story Continuation and Options",
5
- "evaluation": {
6
- "accuracy": 0.7531436135009927
7
- },
8
- "arguments": "Namespace(dataset_name='Muennighoff/xstory_cloze', dataset_config_name='ru', template_config_name='en', template_name='Story Continuation and Options', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
9
- }
 
 
 
 
 
 
 
 
 
 
evaluation/Muennighoff_xwinograd/ru/Replace/results.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "dataset_name": "Muennighoff/xwinograd",
3
- "dataset_config_name": "ru",
4
- "template_name": "Replace",
5
- "evaluation": {
6
- "accuracy": 0.5396825396825397
7
- },
8
- "arguments": "Namespace(dataset_name='Muennighoff/xwinograd', dataset_config_name='ru', template_config_name='en', template_name='Replace', split='test', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
9
- }
 
 
 
 
 
 
 
 
 
 
evaluation/Muennighoff_xwinograd/ru/True_or_False/results.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "dataset_name": "Muennighoff/xwinograd",
3
- "dataset_config_name": "ru",
4
- "template_name": "True or False",
5
- "evaluation": {
6
- "accuracy": 0.46984126984126984
7
- },
8
- "arguments": "Namespace(dataset_name='Muennighoff/xwinograd', dataset_config_name='ru', template_config_name='en', template_name='True or False', split='test', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
9
- }
 
 
 
 
 
 
 
 
 
 
evaluation/Muennighoff_xwinograd/ru/does_underscore_refer_to/results.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "dataset_name": "Muennighoff/xwinograd",
3
- "dataset_config_name": "ru",
4
- "template_name": "does underscore refer to",
5
- "evaluation": {
6
- "accuracy": 0.5079365079365079
7
- },
8
- "arguments": "Namespace(dataset_name='Muennighoff/xwinograd', dataset_config_name='ru', template_config_name='en', template_name='does underscore refer to', split='test', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
9
- }
 
 
 
 
 
 
 
 
 
 
evaluation/Muennighoff_xwinograd/ru/stand_for/results.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "dataset_name": "Muennighoff/xwinograd",
3
- "dataset_config_name": "ru",
4
- "template_name": "stand for",
5
- "evaluation": {
6
- "accuracy": 0.4888888888888889
7
- },
8
- "arguments": "Namespace(dataset_name='Muennighoff/xwinograd', dataset_config_name='ru', template_config_name='en', template_name='stand for', split='test', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
9
- }
 
 
 
 
 
 
 
 
 
 
evaluation/Muennighoff_xwinograd/ru/underscore_refer_to/results.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "dataset_name": "Muennighoff/xwinograd",
3
- "dataset_config_name": "ru",
4
- "template_name": "underscore refer to",
5
- "evaluation": {
6
- "accuracy": 0.5079365079365079
7
- },
8
- "arguments": "Namespace(dataset_name='Muennighoff/xwinograd', dataset_config_name='ru', template_config_name='en', template_name='underscore refer to', split='test', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
9
- }
 
 
 
 
 
 
 
 
 
 
evaluation/xnli/ru/GPT-3_style/results.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "dataset_name": "xnli",
3
- "dataset_config_name": "ru",
4
- "template_name": "GPT-3 style",
5
- "evaluation": {
6
- "accuracy": 0.5
7
- },
8
- "arguments": "Namespace(dataset_name='xnli', dataset_config_name='ru', template_config_name='en', template_name='GPT-3 style', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
9
- }
 
 
 
 
 
 
 
 
 
 
evaluation/xnli/ru/MNLI_crowdsource/results.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "dataset_name": "xnli",
3
- "dataset_config_name": "ru",
4
- "template_name": "MNLI crowdsource",
5
- "evaluation": {
6
- "accuracy": 0.38473895582329315
7
- },
8
- "arguments": "Namespace(dataset_name='xnli', dataset_config_name='ru', template_config_name='en', template_name='MNLI crowdsource', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
9
- }
 
 
 
 
 
 
 
 
 
 
evaluation/xnli/ru/can_we_infer/results.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "dataset_name": "xnli",
3
- "dataset_config_name": "ru",
4
- "template_name": "can we infer",
5
- "evaluation": {
6
- "accuracy": 0.4971887550200803
7
- },
8
- "arguments": "Namespace(dataset_name='xnli', dataset_config_name='ru', template_config_name='en', template_name='can we infer', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
9
- }
 
 
 
 
 
 
 
 
 
 
evaluation/xnli/ru/guaranteed_possible_impossible/results.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "dataset_name": "xnli",
3
- "dataset_config_name": "ru",
4
- "template_name": "guaranteed/possible/impossible",
5
- "evaluation": {
6
- "accuracy": 0.36626506024096384
7
- },
8
- "arguments": "Namespace(dataset_name='xnli', dataset_config_name='ru', template_config_name='en', template_name='guaranteed/possible/impossible', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
9
- }
 
 
 
 
 
 
 
 
 
 
evaluation/xnli/ru/justified_in_saying/results.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "dataset_name": "xnli",
3
- "dataset_config_name": "ru",
4
- "template_name": "justified in saying",
5
- "evaluation": {
6
- "accuracy": 0.46265060240963857
7
- },
8
- "arguments": "Namespace(dataset_name='xnli', dataset_config_name='ru', template_config_name='en', template_name='justified in saying', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
9
- }