Muennighoff's picture
Organize
8e048e8
raw history blame
No virus
4.91 kB
dataset,prompt,metric,value
xcopa_id,C1 or C2? premise_idmt,accuracy,0.57
xcopa_id,best_option_idmt,accuracy,0.78
xcopa_id,cause_effect_idmt,accuracy,0.84
xcopa_id,i_am_hesitating_idmt,accuracy,0.84
xcopa_id,plausible_alternatives_idmt,accuracy,0.83
xcopa_id,median,accuracy,0.83
xcopa_sw,C1 or C2? premise_swmt,accuracy,0.6
xcopa_sw,best_option_swmt,accuracy,0.59
xcopa_sw,cause_effect_swmt,accuracy,0.63
xcopa_sw,i_am_hesitating_swmt,accuracy,0.67
xcopa_sw,plausible_alternatives_swmt,accuracy,0.62
xcopa_sw,median,accuracy,0.62
xcopa_ta,C1 or C2? premise_tamt,accuracy,0.64
xcopa_ta,best_option_tamt,accuracy,0.56
xcopa_ta,cause_effect_tamt,accuracy,0.62
xcopa_ta,i_am_hesitating_tamt,accuracy,0.64
xcopa_ta,plausible_alternatives_tamt,accuracy,0.63
xcopa_ta,median,accuracy,0.63
xcopa_vi,C1 or C2? premise_vimt,accuracy,0.61
xcopa_vi,best_option_vimt,accuracy,0.77
xcopa_vi,cause_effect_vimt,accuracy,0.89
xcopa_vi,i_am_hesitating_vimt,accuracy,0.85
xcopa_vi,plausible_alternatives_vimt,accuracy,0.87
xcopa_vi,median,accuracy,0.85
xcopa_zh,C1 or C2? premise_zhmt,accuracy,0.63
xcopa_zh,best_option_zhmt,accuracy,0.75
xcopa_zh,cause_effect_zhmt,accuracy,0.83
xcopa_zh,i_am_hesitating_zhmt,accuracy,0.84
xcopa_zh,plausible_alternatives_zhmt,accuracy,0.86
xcopa_zh,median,accuracy,0.83
xstory_cloze_ar,Answer Given options_armt,accuracy,0.8941098610191925
xstory_cloze_ar,Choose Story Ending_armt,accuracy,0.9404367968232958
xstory_cloze_ar,Generate Ending_armt,accuracy,0.6598279285241562
xstory_cloze_ar,Novel Correct Ending_armt,accuracy,0.9272005294506949
xstory_cloze_ar,Story Continuation and Options_armt,accuracy,0.9172733289212442
xstory_cloze_ar,median,accuracy,0.9172733289212442
xstory_cloze_es,Answer Given options_esmt,accuracy,0.9311714096624751
xstory_cloze_es,Choose Story Ending_esmt,accuracy,0.9549966909331569
xstory_cloze_es,Generate Ending_esmt,accuracy,0.7405691594970218
xstory_cloze_es,Novel Correct Ending_esmt,accuracy,0.9490403706154864
xstory_cloze_es,Story Continuation and Options_esmt,accuracy,0.9523494374586366
xstory_cloze_es,median,accuracy,0.9490403706154864
xstory_cloze_eu,Answer Given options_eumt,accuracy,0.7326273990734613
xstory_cloze_eu,Choose Story Ending_eumt,accuracy,0.8682991396426207
xstory_cloze_eu,Generate Ending_eumt,accuracy,0.6293845135671741
xstory_cloze_eu,Novel Correct Ending_eumt,accuracy,0.8305757776307081
xstory_cloze_eu,Story Continuation and Options_eumt,accuracy,0.8259430840502978
xstory_cloze_eu,median,accuracy,0.8259430840502978
xstory_cloze_hi,Answer Given options_himt,accuracy,0.8530774321641297
xstory_cloze_hi,Choose Story Ending_himt,accuracy,0.8914626075446724
xstory_cloze_hi,Generate Ending_himt,accuracy,0.6644606221045665
xstory_cloze_hi,Novel Correct Ending_himt,accuracy,0.8821972203838517
xstory_cloze_hi,Story Continuation and Options_himt,accuracy,0.8735936465916612
xstory_cloze_hi,median,accuracy,0.8735936465916612
xstory_cloze_id,Answer Given options_idmt,accuracy,0.8682991396426207
xstory_cloze_id,Choose Story Ending_idmt,accuracy,0.927862342819325
xstory_cloze_id,Generate Ending_idmt,accuracy,0.6929185969556585
xstory_cloze_id,Novel Correct Ending_idmt,accuracy,0.9086697551290536
xstory_cloze_id,Story Continuation and Options_idmt,accuracy,0.9159497021839841
xstory_cloze_id,median,accuracy,0.9086697551290536
xstory_cloze_zh,Answer Given options_zhmt,accuracy,0.913964262078094
xstory_cloze_zh,Choose Story Ending_zhmt,accuracy,0.9238914626075446
xstory_cloze_zh,Generate Ending_zhmt,accuracy,0.6843150231634679
xstory_cloze_zh,Novel Correct Ending_zhmt,accuracy,0.9252150893448048
xstory_cloze_zh,Story Continuation and Options_zhmt,accuracy,0.913302448709464
xstory_cloze_zh,median,accuracy,0.913964262078094
xwinograd_fr,Replace_frmt,accuracy,0.6626506024096386
xwinograd_fr,True or False_frmt,accuracy,0.4578313253012048
xwinograd_fr,does underscore refer to_frmt,accuracy,0.5783132530120482
xwinograd_fr,stand for_frmt,accuracy,0.5421686746987951
xwinograd_fr,underscore refer to_frmt,accuracy,0.6265060240963856
xwinograd_fr,median,accuracy,0.5783132530120482
xwinograd_pt,Replace_ptmt,accuracy,0.6273764258555133
xwinograd_pt,True or False_ptmt,accuracy,0.532319391634981
xwinograd_pt,does underscore refer to_ptmt,accuracy,0.596958174904943
xwinograd_pt,stand for_ptmt,accuracy,0.5399239543726235
xwinograd_pt,underscore refer to_ptmt,accuracy,0.623574144486692
xwinograd_pt,median,accuracy,0.596958174904943
xwinograd_zh,Replace_zhmt,accuracy,0.7202380952380952
xwinograd_zh,True or False_zhmt,accuracy,0.5099206349206349
xwinograd_zh,does underscore refer to_zhmt,accuracy,0.6746031746031746
xwinograd_zh,stand for_zhmt,accuracy,0.5654761904761905
xwinograd_zh,underscore refer to_zhmt,accuracy,0.7638888888888888
xwinograd_zh,median,accuracy,0.6746031746031746
multiple,average,multiple,0.7855970749932859