Muennighoff
commited on
Commit
•
5131329
1
Parent(s):
5b9bfb7
Rm eval
Browse files- evaluation/Muennighoff_xstory_cloze/ru/Answer_Given_options/results.json +0 -9
- evaluation/Muennighoff_xstory_cloze/ru/Choose_Story_Ending/results.json +0 -9
- evaluation/Muennighoff_xstory_cloze/ru/Generate_Ending/results.json +0 -9
- evaluation/Muennighoff_xstory_cloze/ru/Novel_Correct_Ending/results.json +0 -9
- evaluation/Muennighoff_xstory_cloze/ru/Story_Continuation_and_Options/results.json +0 -9
- evaluation/Muennighoff_xwinograd/ru/Replace/results.json +0 -9
- evaluation/Muennighoff_xwinograd/ru/True_or_False/results.json +0 -9
- evaluation/Muennighoff_xwinograd/ru/does_underscore_refer_to/results.json +0 -9
- evaluation/Muennighoff_xwinograd/ru/stand_for/results.json +0 -9
- evaluation/Muennighoff_xwinograd/ru/underscore_refer_to/results.json +0 -9
- evaluation/xnli/ru/GPT-3_style/results.json +0 -9
- evaluation/xnli/ru/MNLI_crowdsource/results.json +0 -9
- evaluation/xnli/ru/can_we_infer/results.json +0 -9
- evaluation/xnli/ru/guaranteed_possible_impossible/results.json +0 -9
- evaluation/xnli/ru/justified_in_saying/results.json +0 -9
evaluation/Muennighoff_xstory_cloze/ru/Answer_Given_options/results.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
-
"dataset_config_name": "ru",
|
4 |
-
"template_name": "Answer Given options",
|
5 |
-
"evaluation": {
|
6 |
-
"accuracy": 0.6340172071475844
|
7 |
-
},
|
8 |
-
"arguments": "Namespace(dataset_name='Muennighoff/xstory_cloze', dataset_config_name='ru', template_config_name='en', template_name='Answer Given options', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
evaluation/Muennighoff_xstory_cloze/ru/Choose_Story_Ending/results.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
-
"dataset_config_name": "ru",
|
4 |
-
"template_name": "Choose Story Ending",
|
5 |
-
"evaluation": {
|
6 |
-
"accuracy": 0.7908669755129054
|
7 |
-
},
|
8 |
-
"arguments": "Namespace(dataset_name='Muennighoff/xstory_cloze', dataset_config_name='ru', template_config_name='en', template_name='Choose Story Ending', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
evaluation/Muennighoff_xstory_cloze/ru/Generate_Ending/results.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
-
"dataset_config_name": "ru",
|
4 |
-
"template_name": "Generate Ending",
|
5 |
-
"evaluation": {
|
6 |
-
"accuracy": 0.5532759761747187
|
7 |
-
},
|
8 |
-
"arguments": "Namespace(dataset_name='Muennighoff/xstory_cloze', dataset_config_name='ru', template_config_name='en', template_name='Generate Ending', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
evaluation/Muennighoff_xstory_cloze/ru/Novel_Correct_Ending/results.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
-
"dataset_config_name": "ru",
|
4 |
-
"template_name": "Novel Correct Ending",
|
5 |
-
"evaluation": {
|
6 |
-
"accuracy": 0.7379219060225016
|
7 |
-
},
|
8 |
-
"arguments": "Namespace(dataset_name='Muennighoff/xstory_cloze', dataset_config_name='ru', template_config_name='en', template_name='Novel Correct Ending', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
evaluation/Muennighoff_xstory_cloze/ru/Story_Continuation_and_Options/results.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"dataset_name": "Muennighoff/xstory_cloze",
|
3 |
-
"dataset_config_name": "ru",
|
4 |
-
"template_name": "Story Continuation and Options",
|
5 |
-
"evaluation": {
|
6 |
-
"accuracy": 0.7531436135009927
|
7 |
-
},
|
8 |
-
"arguments": "Namespace(dataset_name='Muennighoff/xstory_cloze', dataset_config_name='ru', template_config_name='en', template_name='Story Continuation and Options', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
evaluation/Muennighoff_xwinograd/ru/Replace/results.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"dataset_name": "Muennighoff/xwinograd",
|
3 |
-
"dataset_config_name": "ru",
|
4 |
-
"template_name": "Replace",
|
5 |
-
"evaluation": {
|
6 |
-
"accuracy": 0.5396825396825397
|
7 |
-
},
|
8 |
-
"arguments": "Namespace(dataset_name='Muennighoff/xwinograd', dataset_config_name='ru', template_config_name='en', template_name='Replace', split='test', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
evaluation/Muennighoff_xwinograd/ru/True_or_False/results.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"dataset_name": "Muennighoff/xwinograd",
|
3 |
-
"dataset_config_name": "ru",
|
4 |
-
"template_name": "True or False",
|
5 |
-
"evaluation": {
|
6 |
-
"accuracy": 0.46984126984126984
|
7 |
-
},
|
8 |
-
"arguments": "Namespace(dataset_name='Muennighoff/xwinograd', dataset_config_name='ru', template_config_name='en', template_name='True or False', split='test', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
evaluation/Muennighoff_xwinograd/ru/does_underscore_refer_to/results.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"dataset_name": "Muennighoff/xwinograd",
|
3 |
-
"dataset_config_name": "ru",
|
4 |
-
"template_name": "does underscore refer to",
|
5 |
-
"evaluation": {
|
6 |
-
"accuracy": 0.5079365079365079
|
7 |
-
},
|
8 |
-
"arguments": "Namespace(dataset_name='Muennighoff/xwinograd', dataset_config_name='ru', template_config_name='en', template_name='does underscore refer to', split='test', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
evaluation/Muennighoff_xwinograd/ru/stand_for/results.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"dataset_name": "Muennighoff/xwinograd",
|
3 |
-
"dataset_config_name": "ru",
|
4 |
-
"template_name": "stand for",
|
5 |
-
"evaluation": {
|
6 |
-
"accuracy": 0.4888888888888889
|
7 |
-
},
|
8 |
-
"arguments": "Namespace(dataset_name='Muennighoff/xwinograd', dataset_config_name='ru', template_config_name='en', template_name='stand for', split='test', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
evaluation/Muennighoff_xwinograd/ru/underscore_refer_to/results.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"dataset_name": "Muennighoff/xwinograd",
|
3 |
-
"dataset_config_name": "ru",
|
4 |
-
"template_name": "underscore refer to",
|
5 |
-
"evaluation": {
|
6 |
-
"accuracy": 0.5079365079365079
|
7 |
-
},
|
8 |
-
"arguments": "Namespace(dataset_name='Muennighoff/xwinograd', dataset_config_name='ru', template_config_name='en', template_name='underscore refer to', split='test', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
evaluation/xnli/ru/GPT-3_style/results.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"dataset_name": "xnli",
|
3 |
-
"dataset_config_name": "ru",
|
4 |
-
"template_name": "GPT-3 style",
|
5 |
-
"evaluation": {
|
6 |
-
"accuracy": 0.5
|
7 |
-
},
|
8 |
-
"arguments": "Namespace(dataset_name='xnli', dataset_config_name='ru', template_config_name='en', template_name='GPT-3 style', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
evaluation/xnli/ru/MNLI_crowdsource/results.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"dataset_name": "xnli",
|
3 |
-
"dataset_config_name": "ru",
|
4 |
-
"template_name": "MNLI crowdsource",
|
5 |
-
"evaluation": {
|
6 |
-
"accuracy": 0.38473895582329315
|
7 |
-
},
|
8 |
-
"arguments": "Namespace(dataset_name='xnli', dataset_config_name='ru', template_config_name='en', template_name='MNLI crowdsource', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
evaluation/xnli/ru/can_we_infer/results.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"dataset_name": "xnli",
|
3 |
-
"dataset_config_name": "ru",
|
4 |
-
"template_name": "can we infer",
|
5 |
-
"evaluation": {
|
6 |
-
"accuracy": 0.4971887550200803
|
7 |
-
},
|
8 |
-
"arguments": "Namespace(dataset_name='xnli', dataset_config_name='ru', template_config_name='en', template_name='can we infer', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
evaluation/xnli/ru/guaranteed_possible_impossible/results.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"dataset_name": "xnli",
|
3 |
-
"dataset_config_name": "ru",
|
4 |
-
"template_name": "guaranteed/possible/impossible",
|
5 |
-
"evaluation": {
|
6 |
-
"accuracy": 0.36626506024096384
|
7 |
-
},
|
8 |
-
"arguments": "Namespace(dataset_name='xnli', dataset_config_name='ru', template_config_name='en', template_name='guaranteed/possible/impossible', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
evaluation/xnli/ru/justified_in_saying/results.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"dataset_name": "xnli",
|
3 |
-
"dataset_config_name": "ru",
|
4 |
-
"template_name": "justified in saying",
|
5 |
-
"evaluation": {
|
6 |
-
"accuracy": 0.46265060240963857
|
7 |
-
},
|
8 |
-
"arguments": "Namespace(dataset_name='xnli', dataset_config_name='ru', template_config_name='en', template_name='justified in saying', split='validation', max_length=2048, target_max_length=256, pad_to_max_length=False, model_name_or_path='bloomz-7b1-xp3ru', config_name=None, tokenizer_name=None, use_slow_tokenizer=False, per_device_eval_batch_size=4, output_dir='bloomz-7b1-xp3ru/evaluation', debug=False, prefixlm=False, dtype='float16', nospace=False, scratchpad=False)"
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|