Muennighoff commited on
Commit
9f02120
1 Parent(s): cee198c
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. config.json +31 -0
  3. evaluation/xnliht/ar/GPT-3_style_arht/results.json +9 -0
  4. evaluation/xnliht/ar/MNLI_crowdsource_arht/results.json +9 -0
  5. evaluation/xnliht/ar/can_we_infer_arht/results.json +9 -0
  6. evaluation/xnliht/ar/guaranteed_possible_impossible_arht/results.json +9 -0
  7. evaluation/xnliht/ar/justified_in_saying_arht/results.json +9 -0
  8. evaluation/xnliht/es/GPT-3_style_esht/results.json +9 -0
  9. evaluation/xnliht/es/MNLI_crowdsource_esht/results.json +9 -0
  10. evaluation/xnliht/es/can_we_infer_esht/results.json +9 -0
  11. evaluation/xnliht/es/guaranteed_possible_impossible_esht/results.json +9 -0
  12. evaluation/xnliht/es/justified_in_saying_esht/results.json +9 -0
  13. evaluation/xnliht/fr/GPT-3_style_frht/results.json +9 -0
  14. evaluation/xnliht/fr/MNLI_crowdsource_frht/results.json +9 -0
  15. evaluation/xnliht/fr/can_we_infer_frht/results.json +9 -0
  16. evaluation/xnliht/fr/guaranteed_possible_impossible_frht/results.json +9 -0
  17. evaluation/xnliht/fr/justified_in_saying_frht/results.json +9 -0
  18. evaluation/xnliht/hi/GPT-3_style_hiht/results.json +9 -0
  19. evaluation/xnliht/hi/MNLI_crowdsource_hiht/results.json +9 -0
  20. evaluation/xnliht/hi/can_we_infer_hiht/results.json +9 -0
  21. evaluation/xnliht/hi/guaranteed_possible_impossible_hiht/results.json +9 -0
  22. evaluation/xnliht/hi/justified_in_saying_hiht/results.json +9 -0
  23. evaluation/xnliht/merged.csv +50 -0
  24. evaluation/xnliht/merged.json +1 -0
  25. evaluation/xnliht/sw/GPT-3_style_swht/results.json +9 -0
  26. evaluation/xnliht/sw/MNLI_crowdsource_swht/results.json +9 -0
  27. evaluation/xnliht/sw/can_we_infer_swht/results.json +9 -0
  28. evaluation/xnliht/sw/guaranteed_possible_impossible_swht/results.json +9 -0
  29. evaluation/xnliht/sw/justified_in_saying_swht/results.json +9 -0
  30. evaluation/xnliht/ur/GPT-3_style_urht/results.json +9 -0
  31. evaluation/xnliht/ur/MNLI_crowdsource_urht/results.json +9 -0
  32. evaluation/xnliht/ur/can_we_infer_urht/results.json +9 -0
  33. evaluation/xnliht/ur/guaranteed_possible_impossible_urht/results.json +9 -0
  34. evaluation/xnliht/ur/justified_in_saying_urht/results.json +9 -0
  35. evaluation/xnliht/vi/GPT-3_style_viht/results.json +9 -0
  36. evaluation/xnliht/vi/MNLI_crowdsource_viht/results.json +9 -0
  37. evaluation/xnliht/vi/can_we_infer_viht/results.json +9 -0
  38. evaluation/xnliht/vi/guaranteed_possible_impossible_viht/results.json +9 -0
  39. evaluation/xnliht/vi/justified_in_saying_viht/results.json +9 -0
  40. evaluation/xnliht/zh/GPT-3_style_zhht/results.json +9 -0
  41. evaluation/xnliht/zh/MNLI_crowdsource_zhht/results.json +9 -0
  42. evaluation/xnliht/zh/can_we_infer_zhht/results.json +9 -0
  43. evaluation/xnliht/zh/guaranteed_possible_impossible_zhht/results.json +9 -0
  44. evaluation/xnliht/zh/justified_in_saying_zhht/results.json +9 -0
  45. evaluation/xnlimt/ar/GPT-3_style_armt/results.json +9 -0
  46. evaluation/xnlimt/ar/MNLI_crowdsource_armt/results.json +9 -0
  47. evaluation/xnlimt/ar/can_we_infer_armt/results.json +9 -0
  48. evaluation/xnlimt/ar/guaranteed_possible_impossible_armt/results.json +9 -0
  49. evaluation/xnlimt/ar/justified_in_saying_armt/results.json +9 -0
  50. evaluation/xnlimt/es/GPT-3_style_esmt/results.json +9 -0
.gitattributes CHANGED
@@ -30,3 +30,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
30
  *.zip filter=lfs diff=lfs merge=lfs -text
31
  *.zst filter=lfs diff=lfs merge=lfs -text
32
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
30
  *.zip filter=lfs diff=lfs merge=lfs -text
31
  *.zst filter=lfs diff=lfs merge=lfs -text
32
  *tfevents* filter=lfs diff=lfs merge=lfs -text
33
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "apply_residual_connection_post_layernorm": false,
3
+ "architectures": [
4
+ "BloomModel"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "attention_softmax_in_fp32": true,
8
+ "bias_dropout_fusion": true,
9
+ "bos_token_id": 1,
10
+ "eos_token_id": 2,
11
+ "hidden_dropout": 0.0,
12
+ "initializer_range": 0.02,
13
+ "layer_norm_epsilon": 1e-05,
14
+ "masked_softmax_fusion": true,
15
+ "model_type": "bloom",
16
+ "n_embed": 4096,
17
+ "n_inner": null,
18
+ "n_layer": 30,
19
+ "num_attention_heads": 32,
20
+ "offset_alibi": 100,
21
+ "pad_token_id": 3,
22
+ "pretraining_tp": 4,
23
+ "seq_length": 2048,
24
+ "skip_bias_add": true,
25
+ "skip_bias_add_qkv": false,
26
+ "slow_but_exact": false,
27
+ "transformers_version": "4.21.0.dev0",
28
+ "unk_token_id": 0,
29
+ "use_cache": true,
30
+ "vocab_size": 250880
31
+ }
evaluation/xnliht/ar/GPT-3_style_arht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "ar",
4
+ "template_name": "GPT-3 style_arht",
5
+ "evaluation": {
6
+ "accuracy": 0.4610441767068273
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='GPT-3 style_arht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/ar/MNLI_crowdsource_arht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "ar",
4
+ "template_name": "MNLI crowdsource_arht",
5
+ "evaluation": {
6
+ "accuracy": 0.3899598393574297
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='MNLI crowdsource_arht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/ar/can_we_infer_arht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "ar",
4
+ "template_name": "can we infer_arht",
5
+ "evaluation": {
6
+ "accuracy": 0.3550200803212851
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='can we infer_arht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/ar/guaranteed_possible_impossible_arht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "ar",
4
+ "template_name": "guaranteed/possible/impossible_arht",
5
+ "evaluation": {
6
+ "accuracy": 0.45461847389558235
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='guaranteed/possible/impossible_arht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/ar/justified_in_saying_arht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "ar",
4
+ "template_name": "justified in saying_arht",
5
+ "evaluation": {
6
+ "accuracy": 0.3538152610441767
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='justified in saying_arht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/es/GPT-3_style_esht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "es",
4
+ "template_name": "GPT-3 style_esht",
5
+ "evaluation": {
6
+ "accuracy": 0.5313253012048192
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='GPT-3 style_esht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/es/MNLI_crowdsource_esht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "es",
4
+ "template_name": "MNLI crowdsource_esht",
5
+ "evaluation": {
6
+ "accuracy": 0.334136546184739
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='MNLI crowdsource_esht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/es/can_we_infer_esht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "es",
4
+ "template_name": "can we infer_esht",
5
+ "evaluation": {
6
+ "accuracy": 0.36987951807228914
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='can we infer_esht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/es/guaranteed_possible_impossible_esht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "es",
4
+ "template_name": "guaranteed/possible/impossible_esht",
5
+ "evaluation": {
6
+ "accuracy": 0.4686746987951807
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='guaranteed/possible/impossible_esht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/es/justified_in_saying_esht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "es",
4
+ "template_name": "justified in saying_esht",
5
+ "evaluation": {
6
+ "accuracy": 0.37630522088353413
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='justified in saying_esht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/fr/GPT-3_style_frht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "fr",
4
+ "template_name": "GPT-3 style_frht",
5
+ "evaluation": {
6
+ "accuracy": 0.5345381526104418
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='GPT-3 style_frht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/fr/MNLI_crowdsource_frht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "fr",
4
+ "template_name": "MNLI crowdsource_frht",
5
+ "evaluation": {
6
+ "accuracy": 0.3357429718875502
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='MNLI crowdsource_frht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/fr/can_we_infer_frht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "fr",
4
+ "template_name": "can we infer_frht",
5
+ "evaluation": {
6
+ "accuracy": 0.5224899598393574
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='can we infer_frht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/fr/guaranteed_possible_impossible_frht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "fr",
4
+ "template_name": "guaranteed/possible/impossible_frht",
5
+ "evaluation": {
6
+ "accuracy": 0.46586345381526106
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='guaranteed/possible/impossible_frht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/fr/justified_in_saying_frht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "fr",
4
+ "template_name": "justified in saying_frht",
5
+ "evaluation": {
6
+ "accuracy": 0.4891566265060241
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='justified in saying_frht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/hi/GPT-3_style_hiht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "hi",
4
+ "template_name": "GPT-3 style_hiht",
5
+ "evaluation": {
6
+ "accuracy": 0.3325301204819277
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='GPT-3 style_hiht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/hi/MNLI_crowdsource_hiht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "hi",
4
+ "template_name": "MNLI crowdsource_hiht",
5
+ "evaluation": {
6
+ "accuracy": 0.470281124497992
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='MNLI crowdsource_hiht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/hi/can_we_infer_hiht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "hi",
4
+ "template_name": "can we infer_hiht",
5
+ "evaluation": {
6
+ "accuracy": 0.37309236947791163
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='can we infer_hiht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/hi/guaranteed_possible_impossible_hiht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "hi",
4
+ "template_name": "guaranteed/possible/impossible_hiht",
5
+ "evaluation": {
6
+ "accuracy": 0.3514056224899598
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='guaranteed/possible/impossible_hiht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/hi/justified_in_saying_hiht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "hi",
4
+ "template_name": "justified in saying_hiht",
5
+ "evaluation": {
6
+ "accuracy": 0.3746987951807229
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='justified in saying_hiht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/merged.csv ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset,prompt,metric,value
2
+ xnli_ar,GPT-3 style_arht,accuracy,0.4610441767068273
3
+ xnli_ar,MNLI crowdsource_arht,accuracy,0.3899598393574297
4
+ xnli_ar,can we infer_arht,accuracy,0.3550200803212851
5
+ xnli_ar,guaranteed/possible/impossible_arht,accuracy,0.45461847389558235
6
+ xnli_ar,justified in saying_arht,accuracy,0.3538152610441767
7
+ xnli_ar,median,accuracy,0.3899598393574297
8
+ xnli_es,GPT-3 style_esht,accuracy,0.5313253012048192
9
+ xnli_es,MNLI crowdsource_esht,accuracy,0.334136546184739
10
+ xnli_es,can we infer_esht,accuracy,0.36987951807228914
11
+ xnli_es,guaranteed/possible/impossible_esht,accuracy,0.4686746987951807
12
+ xnli_es,justified in saying_esht,accuracy,0.37630522088353413
13
+ xnli_es,median,accuracy,0.37630522088353413
14
+ xnli_fr,GPT-3 style_frht,accuracy,0.5345381526104418
15
+ xnli_fr,MNLI crowdsource_frht,accuracy,0.3357429718875502
16
+ xnli_fr,can we infer_frht,accuracy,0.5224899598393574
17
+ xnli_fr,guaranteed/possible/impossible_frht,accuracy,0.46586345381526106
18
+ xnli_fr,justified in saying_frht,accuracy,0.4891566265060241
19
+ xnli_fr,median,accuracy,0.4891566265060241
20
+ xnli_hi,GPT-3 style_hiht,accuracy,0.3325301204819277
21
+ xnli_hi,MNLI crowdsource_hiht,accuracy,0.470281124497992
22
+ xnli_hi,can we infer_hiht,accuracy,0.37309236947791163
23
+ xnli_hi,guaranteed/possible/impossible_hiht,accuracy,0.3514056224899598
24
+ xnli_hi,justified in saying_hiht,accuracy,0.3746987951807229
25
+ xnli_hi,median,accuracy,0.37309236947791163
26
+ xnli_sw,GPT-3 style_swht,accuracy,0.336144578313253
27
+ xnli_sw,MNLI crowdsource_swht,accuracy,0.3333333333333333
28
+ xnli_sw,can we infer_swht,accuracy,0.3453815261044177
29
+ xnli_sw,guaranteed/possible/impossible_swht,accuracy,0.35582329317269074
30
+ xnli_sw,justified in saying_swht,accuracy,0.3269076305220884
31
+ xnli_sw,median,accuracy,0.336144578313253
32
+ xnli_ur,GPT-3 style_urht,accuracy,0.4
33
+ xnli_ur,MNLI crowdsource_urht,accuracy,0.3562248995983936
34
+ xnli_ur,can we infer_urht,accuracy,0.3349397590361446
35
+ xnli_ur,guaranteed/possible/impossible_urht,accuracy,0.37630522088353413
36
+ xnli_ur,justified in saying_urht,accuracy,0.3405622489959839
37
+ xnli_ur,median,accuracy,0.3562248995983936
38
+ xnli_vi,GPT-3 style_viht,accuracy,0.5265060240963856
39
+ xnli_vi,MNLI crowdsource_viht,accuracy,0.37710843373493974
40
+ xnli_vi,can we infer_viht,accuracy,0.5116465863453815
41
+ xnli_vi,guaranteed/possible/impossible_viht,accuracy,0.3578313253012048
42
+ xnli_vi,justified in saying_viht,accuracy,0.5028112449799197
43
+ xnli_vi,median,accuracy,0.5028112449799197
44
+ xnli_zh,GPT-3 style_zhht,accuracy,0.3196787148594378
45
+ xnli_zh,MNLI crowdsource_zhht,accuracy,0.38112449799196785
46
+ xnli_zh,can we infer_zhht,accuracy,0.40642570281124496
47
+ xnli_zh,guaranteed/possible/impossible_zhht,accuracy,0.344578313253012
48
+ xnli_zh,justified in saying_zhht,accuracy,0.3369477911646586
49
+ xnli_zh,median,accuracy,0.344578313253012
50
+ multiple,average,multiple,0.39603413654618475
evaluation/xnliht/merged.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"xnli_ar": {"GPT-3 style_arht": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='GPT-3 style_arht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4610441767068273}, "template_name": "GPT-3 style_arht"}, "MNLI crowdsource_arht": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='MNLI crowdsource_arht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3899598393574297}, "template_name": "MNLI crowdsource_arht"}, "can we infer_arht": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='can we infer_arht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3550200803212851}, "template_name": "can we infer_arht"}, "guaranteed/possible/impossible_arht": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='guaranteed/possible/impossible_arht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.45461847389558235}, "template_name": "guaranteed/possible/impossible_arht"}, "justified in saying_arht": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='justified in saying_arht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3538152610441767}, "template_name": "justified in saying_arht"}}, "xnli_es": {"GPT-3 style_esht": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='GPT-3 style_esht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5313253012048192}, "template_name": "GPT-3 style_esht"}, "MNLI crowdsource_esht": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='MNLI crowdsource_esht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.334136546184739}, "template_name": "MNLI crowdsource_esht"}, "can we infer_esht": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='can we infer_esht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.36987951807228914}, "template_name": "can we infer_esht"}, "guaranteed/possible/impossible_esht": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='guaranteed/possible/impossible_esht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4686746987951807}, "template_name": "guaranteed/possible/impossible_esht"}, "justified in saying_esht": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='justified in saying_esht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.37630522088353413}, "template_name": "justified in saying_esht"}}, "xnli_fr": {"GPT-3 style_frht": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='GPT-3 style_frht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5345381526104418}, "template_name": "GPT-3 style_frht"}, "MNLI crowdsource_frht": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='MNLI crowdsource_frht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3357429718875502}, "template_name": "MNLI crowdsource_frht"}, "can we infer_frht": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='can we infer_frht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5224899598393574}, "template_name": "can we infer_frht"}, "guaranteed/possible/impossible_frht": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='guaranteed/possible/impossible_frht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.46586345381526106}, "template_name": "guaranteed/possible/impossible_frht"}, "justified in saying_frht": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='justified in saying_frht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4891566265060241}, "template_name": "justified in saying_frht"}}, "xnli_hi": {"GPT-3 style_hiht": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='GPT-3 style_hiht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3325301204819277}, "template_name": "GPT-3 style_hiht"}, "MNLI crowdsource_hiht": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='MNLI crowdsource_hiht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.470281124497992}, "template_name": "MNLI crowdsource_hiht"}, "can we infer_hiht": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='can we infer_hiht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.37309236947791163}, "template_name": "can we infer_hiht"}, "guaranteed/possible/impossible_hiht": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='guaranteed/possible/impossible_hiht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3514056224899598}, "template_name": "guaranteed/possible/impossible_hiht"}, "justified in saying_hiht": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='justified in saying_hiht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3746987951807229}, "template_name": "justified in saying_hiht"}}, "xnli_sw": {"GPT-3 style_swht": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='GPT-3 style_swht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.336144578313253}, "template_name": "GPT-3 style_swht"}, "MNLI crowdsource_swht": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='MNLI crowdsource_swht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "MNLI crowdsource_swht"}, "can we infer_swht": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='can we infer_swht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3453815261044177}, "template_name": "can we infer_swht"}, "guaranteed/possible/impossible_swht": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='guaranteed/possible/impossible_swht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.35582329317269074}, "template_name": "guaranteed/possible/impossible_swht"}, "justified in saying_swht": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='justified in saying_swht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3269076305220884}, "template_name": "justified in saying_swht"}}, "xnli_ur": {"GPT-3 style_urht": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='GPT-3 style_urht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4}, "template_name": "GPT-3 style_urht"}, "MNLI crowdsource_urht": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='MNLI crowdsource_urht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3562248995983936}, "template_name": "MNLI crowdsource_urht"}, "can we infer_urht": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='can we infer_urht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3349397590361446}, "template_name": "can we infer_urht"}, "guaranteed/possible/impossible_urht": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='guaranteed/possible/impossible_urht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.37630522088353413}, "template_name": "guaranteed/possible/impossible_urht"}, "justified in saying_urht": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='justified in saying_urht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3405622489959839}, "template_name": "justified in saying_urht"}}, "xnli_vi": {"GPT-3 style_viht": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='GPT-3 style_viht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5265060240963856}, "template_name": "GPT-3 style_viht"}, "MNLI crowdsource_viht": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='MNLI crowdsource_viht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.37710843373493974}, "template_name": "MNLI crowdsource_viht"}, "can we infer_viht": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='can we infer_viht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5116465863453815}, "template_name": "can we infer_viht"}, "guaranteed/possible/impossible_viht": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='guaranteed/possible/impossible_viht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3578313253012048}, "template_name": "guaranteed/possible/impossible_viht"}, "justified in saying_viht": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='justified in saying_viht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5028112449799197}, "template_name": "justified in saying_viht"}}, "xnli_zh": {"GPT-3 style_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='GPT-3 style_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3196787148594378}, "template_name": "GPT-3 style_zhht"}, "MNLI crowdsource_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='MNLI crowdsource_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.38112449799196785}, "template_name": "MNLI crowdsource_zhht"}, "can we infer_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='can we infer_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.40642570281124496}, "template_name": "can we infer_zhht"}, "guaranteed/possible/impossible_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='guaranteed/possible/impossible_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.344578313253012}, "template_name": "guaranteed/possible/impossible_zhht"}, "justified in saying_zhht": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='justified in saying_zhht', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3369477911646586}, "template_name": "justified in saying_zhht"}}}
evaluation/xnliht/sw/GPT-3_style_swht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "sw",
4
+ "template_name": "GPT-3 style_swht",
5
+ "evaluation": {
6
+ "accuracy": 0.336144578313253
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='GPT-3 style_swht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/sw/MNLI_crowdsource_swht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "sw",
4
+ "template_name": "MNLI crowdsource_swht",
5
+ "evaluation": {
6
+ "accuracy": 0.3333333333333333
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='MNLI crowdsource_swht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/sw/can_we_infer_swht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "sw",
4
+ "template_name": "can we infer_swht",
5
+ "evaluation": {
6
+ "accuracy": 0.3453815261044177
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='can we infer_swht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/sw/guaranteed_possible_impossible_swht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "sw",
4
+ "template_name": "guaranteed/possible/impossible_swht",
5
+ "evaluation": {
6
+ "accuracy": 0.35582329317269074
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='guaranteed/possible/impossible_swht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/sw/justified_in_saying_swht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "sw",
4
+ "template_name": "justified in saying_swht",
5
+ "evaluation": {
6
+ "accuracy": 0.3269076305220884
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='justified in saying_swht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/ur/GPT-3_style_urht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "ur",
4
+ "template_name": "GPT-3 style_urht",
5
+ "evaluation": {
6
+ "accuracy": 0.4
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='GPT-3 style_urht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/ur/MNLI_crowdsource_urht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "ur",
4
+ "template_name": "MNLI crowdsource_urht",
5
+ "evaluation": {
6
+ "accuracy": 0.3562248995983936
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='MNLI crowdsource_urht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/ur/can_we_infer_urht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "ur",
4
+ "template_name": "can we infer_urht",
5
+ "evaluation": {
6
+ "accuracy": 0.3349397590361446
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='can we infer_urht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/ur/guaranteed_possible_impossible_urht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "ur",
4
+ "template_name": "guaranteed/possible/impossible_urht",
5
+ "evaluation": {
6
+ "accuracy": 0.37630522088353413
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='guaranteed/possible/impossible_urht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/ur/justified_in_saying_urht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "ur",
4
+ "template_name": "justified in saying_urht",
5
+ "evaluation": {
6
+ "accuracy": 0.3405622489959839
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='justified in saying_urht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/vi/GPT-3_style_viht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "vi",
4
+ "template_name": "GPT-3 style_viht",
5
+ "evaluation": {
6
+ "accuracy": 0.5265060240963856
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='GPT-3 style_viht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/vi/MNLI_crowdsource_viht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "vi",
4
+ "template_name": "MNLI crowdsource_viht",
5
+ "evaluation": {
6
+ "accuracy": 0.37710843373493974
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='MNLI crowdsource_viht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/vi/can_we_infer_viht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "vi",
4
+ "template_name": "can we infer_viht",
5
+ "evaluation": {
6
+ "accuracy": 0.5116465863453815
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='can we infer_viht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/vi/guaranteed_possible_impossible_viht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "vi",
4
+ "template_name": "guaranteed/possible/impossible_viht",
5
+ "evaluation": {
6
+ "accuracy": 0.3578313253012048
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='guaranteed/possible/impossible_viht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/vi/justified_in_saying_viht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "vi",
4
+ "template_name": "justified in saying_viht",
5
+ "evaluation": {
6
+ "accuracy": 0.5028112449799197
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='justified in saying_viht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/zh/GPT-3_style_zhht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "zh",
4
+ "template_name": "GPT-3 style_zhht",
5
+ "evaluation": {
6
+ "accuracy": 0.3196787148594378
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='GPT-3 style_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/zh/MNLI_crowdsource_zhht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "zh",
4
+ "template_name": "MNLI crowdsource_zhht",
5
+ "evaluation": {
6
+ "accuracy": 0.38112449799196785
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='MNLI crowdsource_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/zh/can_we_infer_zhht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "zh",
4
+ "template_name": "can we infer_zhht",
5
+ "evaluation": {
6
+ "accuracy": 0.40642570281124496
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='can we infer_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/zh/guaranteed_possible_impossible_zhht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "zh",
4
+ "template_name": "guaranteed/possible/impossible_zhht",
5
+ "evaluation": {
6
+ "accuracy": 0.344578313253012
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='guaranteed/possible/impossible_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnliht/zh/justified_in_saying_zhht/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "zh",
4
+ "template_name": "justified in saying_zhht",
5
+ "evaluation": {
6
+ "accuracy": 0.3369477911646586
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='justified in saying_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnlimt/ar/GPT-3_style_armt/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "ar",
4
+ "template_name": "GPT-3 style_armt",
5
+ "evaluation": {
6
+ "accuracy": 0.3333333333333333
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='GPT-3 style_armt', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnlimt/ar/MNLI_crowdsource_armt/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "ar",
4
+ "template_name": "MNLI crowdsource_armt",
5
+ "evaluation": {
6
+ "accuracy": 0.4855421686746988
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='MNLI crowdsource_armt', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnlimt/ar/can_we_infer_armt/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "ar",
4
+ "template_name": "can we infer_armt",
5
+ "evaluation": {
6
+ "accuracy": 0.3413654618473896
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='can we infer_armt', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnlimt/ar/guaranteed_possible_impossible_armt/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "ar",
4
+ "template_name": "guaranteed/possible/impossible_armt",
5
+ "evaluation": {
6
+ "accuracy": 0.35542168674698793
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='guaranteed/possible/impossible_armt', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnlimt/ar/justified_in_saying_armt/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "ar",
4
+ "template_name": "justified in saying_armt",
5
+ "evaluation": {
6
+ "accuracy": 0.3465863453815261
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='justified in saying_armt', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }
evaluation/xnlimt/es/GPT-3_style_esmt/results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_name": "xnli",
3
+ "dataset_config_name": "es",
4
+ "template_name": "GPT-3 style_esmt",
5
+ "evaluation": {
6
+ "accuracy": 0.5385542168674698
7
+ },
8
+ "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/tr13f-6b3-ml-t0-lmtoks341b-t0toks4b-xp3mt/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='GPT-3 style_esmt', tokenizer_name=None, use_slow_tokenizer=False)"
9
+ }