Muennighoff's picture
Organize files
b53b384
raw
history blame
No virus
31.9 kB
{"xnli_ar": {"GPT-3 style_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='GPT-3 style_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "GPT-3 style_armt"}, "MNLI crowdsource_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='MNLI crowdsource_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4751004016064257}, "template_name": "MNLI crowdsource_armt"}, "can we infer_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='can we infer_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "can we infer_armt"}, "guaranteed/possible/impossible_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='guaranteed/possible/impossible_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.45823293172690766}, "template_name": "guaranteed/possible/impossible_armt"}, "justified in saying_armt": {"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='justified in saying_armt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ar", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3337349397590361}, "template_name": "justified in saying_armt"}}, "xnli_es": {"GPT-3 style_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='GPT-3 style_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5895582329317269}, "template_name": "GPT-3 style_esmt"}, "MNLI crowdsource_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='MNLI crowdsource_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5100401606425703}, "template_name": "MNLI crowdsource_esmt"}, "can we infer_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='can we infer_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "can we infer_esmt"}, "guaranteed/possible/impossible_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='guaranteed/possible/impossible_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3389558232931727}, "template_name": "guaranteed/possible/impossible_esmt"}, "justified in saying_esmt": {"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='justified in saying_esmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "es", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "justified in saying_esmt"}}, "xnli_fr": {"GPT-3 style_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='GPT-3 style_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4967871485943775}, "template_name": "GPT-3 style_frmt"}, "MNLI crowdsource_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='MNLI crowdsource_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "MNLI crowdsource_frmt"}, "can we infer_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='can we infer_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.5586345381526104}, "template_name": "can we infer_frmt"}, "guaranteed/possible/impossible_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='guaranteed/possible/impossible_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.44096385542168676}, "template_name": "guaranteed/possible/impossible_frmt"}, "justified in saying_frmt": {"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='justified in saying_frmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "fr", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4899598393574297}, "template_name": "justified in saying_frmt"}}, "xnli_hi": {"GPT-3 style_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='GPT-3 style_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.4393574297188755}, "template_name": "GPT-3 style_himt"}, "MNLI crowdsource_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='MNLI crowdsource_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "MNLI crowdsource_himt"}, "can we infer_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='can we infer_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3610441767068273}, "template_name": "can we infer_himt"}, "guaranteed/possible/impossible_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='guaranteed/possible/impossible_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.38072289156626504}, "template_name": "guaranteed/possible/impossible_himt"}, "justified in saying_himt": {"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='justified in saying_himt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "hi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.39759036144578314}, "template_name": "justified in saying_himt"}}, "xnli_sw": {"GPT-3 style_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='GPT-3 style_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "GPT-3 style_swmt"}, "MNLI crowdsource_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='MNLI crowdsource_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3337349397590361}, "template_name": "MNLI crowdsource_swmt"}, "can we infer_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='can we infer_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.334136546184739}, "template_name": "can we infer_swmt"}, "guaranteed/possible/impossible_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='guaranteed/possible/impossible_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3321285140562249}, "template_name": "guaranteed/possible/impossible_swmt"}, "justified in saying_swmt": {"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='justified in saying_swmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "sw", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3357429718875502}, "template_name": "justified in saying_swmt"}}, "xnli_ur": {"GPT-3 style_urmt": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='GPT-3 style_urmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3718875502008032}, "template_name": "GPT-3 style_urmt"}, "MNLI crowdsource_urmt": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='MNLI crowdsource_urmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3421686746987952}, "template_name": "MNLI crowdsource_urmt"}, "can we infer_urmt": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='can we infer_urmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.36666666666666664}, "template_name": "can we infer_urmt"}, "guaranteed/possible/impossible_urmt": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='guaranteed/possible/impossible_urmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "guaranteed/possible/impossible_urmt"}, "justified in saying_urmt": {"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='justified in saying_urmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "ur", "dataset_name": "xnli", "evaluation": {"accuracy": 0.378714859437751}, "template_name": "justified in saying_urmt"}}, "xnli_vi": {"GPT-3 style_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='GPT-3 style_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "GPT-3 style_vimt"}, "MNLI crowdsource_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='MNLI crowdsource_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "MNLI crowdsource_vimt"}, "can we infer_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='can we infer_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "can we infer_vimt"}, "guaranteed/possible/impossible_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='guaranteed/possible/impossible_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3389558232931727}, "template_name": "guaranteed/possible/impossible_vimt"}, "justified in saying_vimt": {"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='justified in saying_vimt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "vi", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "justified in saying_vimt"}}, "xnli_zh": {"GPT-3 style_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='GPT-3 style_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3895582329317269}, "template_name": "GPT-3 style_zhmt"}, "MNLI crowdsource_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='MNLI crowdsource_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3333333333333333}, "template_name": "MNLI crowdsource_zhmt"}, "can we infer_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='can we infer_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.40602409638554215}, "template_name": "can we infer_zhmt"}, "guaranteed/possible/impossible_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='guaranteed/possible/impossible_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.44136546184738956}, "template_name": "guaranteed/possible/impossible_zhmt"}, "justified in saying_zhmt": {"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='bfloat16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/176bt0/p31lossseq_global_step498/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='justified in saying_zhmt', tokenizer_name=None, use_slow_tokenizer=False)", "dataset_config_name": "zh", "dataset_name": "xnli", "evaluation": {"accuracy": 0.3405622489959839}, "template_name": "justified in saying_zhmt"}}}