File size: 4,906 Bytes
899c702
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
dataset,prompt,metric,value
xcopa_id,C1 or C2? premise_idmt,accuracy,0.49
xcopa_id,best_option_idmt,accuracy,0.67
xcopa_id,cause_effect_idmt,accuracy,0.62
xcopa_id,i_am_hesitating_idmt,accuracy,0.64
xcopa_id,plausible_alternatives_idmt,accuracy,0.6
xcopa_id,median,accuracy,0.62
xcopa_sw,C1 or C2? premise_swmt,accuracy,0.6
xcopa_sw,best_option_swmt,accuracy,0.59
xcopa_sw,cause_effect_swmt,accuracy,0.55
xcopa_sw,i_am_hesitating_swmt,accuracy,0.6
xcopa_sw,plausible_alternatives_swmt,accuracy,0.53
xcopa_sw,median,accuracy,0.59
xcopa_ta,C1 or C2? premise_tamt,accuracy,0.64
xcopa_ta,best_option_tamt,accuracy,0.52
xcopa_ta,cause_effect_tamt,accuracy,0.61
xcopa_ta,i_am_hesitating_tamt,accuracy,0.63
xcopa_ta,plausible_alternatives_tamt,accuracy,0.56
xcopa_ta,median,accuracy,0.61
xcopa_vi,C1 or C2? premise_vimt,accuracy,0.52
xcopa_vi,best_option_vimt,accuracy,0.64
xcopa_vi,cause_effect_vimt,accuracy,0.74
xcopa_vi,i_am_hesitating_vimt,accuracy,0.71
xcopa_vi,plausible_alternatives_vimt,accuracy,0.68
xcopa_vi,median,accuracy,0.68
xcopa_zh,C1 or C2? premise_zhmt,accuracy,0.61
xcopa_zh,best_option_zhmt,accuracy,0.72
xcopa_zh,cause_effect_zhmt,accuracy,0.81
xcopa_zh,i_am_hesitating_zhmt,accuracy,0.8
xcopa_zh,plausible_alternatives_zhmt,accuracy,0.65
xcopa_zh,median,accuracy,0.72
xstory_cloze_ar,Answer Given options_armt,accuracy,0.7412309728656519
xstory_cloze_ar,Choose Story Ending_armt,accuracy,0.8305757776307081
xstory_cloze_ar,Generate Ending_armt,accuracy,0.5506287227001986
xstory_cloze_ar,Novel Correct Ending_armt,accuracy,0.800132362673726
xstory_cloze_ar,Story Continuation and Options_armt,accuracy,0.8087359364659166
xstory_cloze_ar,median,accuracy,0.800132362673726
xstory_cloze_es,Answer Given options_esmt,accuracy,0.8385175380542687
xstory_cloze_es,Choose Story Ending_esmt,accuracy,0.8894771674387822
xstory_cloze_es,Generate Ending_esmt,accuracy,0.6479152878888154
xstory_cloze_es,Novel Correct Ending_esmt,accuracy,0.8497683653209794
xstory_cloze_es,Story Continuation and Options_esmt,accuracy,0.8815354070152217
xstory_cloze_es,median,accuracy,0.8497683653209794
xstory_cloze_eu,Answer Given options_eumt,accuracy,0.5724685638649901
xstory_cloze_eu,Choose Story Ending_eumt,accuracy,0.7081403044341495
xstory_cloze_eu,Generate Ending_eumt,accuracy,0.5016545334215751
xstory_cloze_eu,Novel Correct Ending_eumt,accuracy,0.598941098610192
xstory_cloze_eu,Story Continuation and Options_eumt,accuracy,0.7035076108537393
xstory_cloze_eu,median,accuracy,0.598941098610192
xstory_cloze_hi,Answer Given options_himt,accuracy,0.6982131039046989
xstory_cloze_hi,Choose Story Ending_himt,accuracy,0.8060886829913965
xstory_cloze_hi,Generate Ending_himt,accuracy,0.5651886168100596
xstory_cloze_hi,Novel Correct Ending_himt,accuracy,0.7332892124420913
xstory_cloze_hi,Story Continuation and Options_himt,accuracy,0.7948378557246857
xstory_cloze_hi,median,accuracy,0.7332892124420913
xstory_cloze_id,Answer Given options_idmt,accuracy,0.6823295830575777
xstory_cloze_id,Choose Story Ending_idmt,accuracy,0.7974851091992058
xstory_cloze_id,Generate Ending_idmt,accuracy,0.5473196558570483
xstory_cloze_id,Novel Correct Ending_idmt,accuracy,0.6896095301125083
xstory_cloze_id,Story Continuation and Options_idmt,accuracy,0.8332230311052283
xstory_cloze_id,median,accuracy,0.6896095301125083
xstory_cloze_zh,Answer Given options_zhmt,accuracy,0.7703507610853739
xstory_cloze_zh,Choose Story Ending_zhmt,accuracy,0.8510919920582396
xstory_cloze_zh,Generate Ending_zhmt,accuracy,0.5804103242885507
xstory_cloze_zh,Novel Correct Ending_zhmt,accuracy,0.8120450033090668
xstory_cloze_zh,Story Continuation and Options_zhmt,accuracy,0.8471211118464593
xstory_cloze_zh,median,accuracy,0.8120450033090668
xwinograd_fr,Replace_frmt,accuracy,0.5180722891566265
xwinograd_fr,True or False_frmt,accuracy,0.5060240963855421
xwinograd_fr,does underscore refer to_frmt,accuracy,0.5662650602409639
xwinograd_fr,stand for_frmt,accuracy,0.4578313253012048
xwinograd_fr,underscore refer to_frmt,accuracy,0.5783132530120482
xwinograd_fr,median,accuracy,0.5180722891566265
xwinograd_pt,Replace_ptmt,accuracy,0.5285171102661597
xwinograd_pt,True or False_ptmt,accuracy,0.5399239543726235
xwinograd_pt,does underscore refer to_ptmt,accuracy,0.5247148288973384
xwinograd_pt,stand for_ptmt,accuracy,0.5361216730038023
xwinograd_pt,underscore refer to_ptmt,accuracy,0.5171102661596958
xwinograd_pt,median,accuracy,0.5285171102661597
xwinograd_zh,Replace_zhmt,accuracy,0.6071428571428571
xwinograd_zh,True or False_zhmt,accuracy,0.5079365079365079
xwinograd_zh,does underscore refer to_zhmt,accuracy,0.5238095238095238
xwinograd_zh,stand for_zhmt,accuracy,0.5059523809523809
xwinograd_zh,underscore refer to_zhmt,accuracy,0.5793650793650794
xwinograd_zh,median,accuracy,0.5238095238095238
multiple,average,multiple,0.6624417496929196