Delete human_leaderboard_scores.json
Browse files- human_leaderboard_scores.json +0 -186
human_leaderboard_scores.json
DELETED
@@ -1,186 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"claude2###human": {
|
3 |
-
"Alpaca 7B": {
|
4 |
-
"nq": {
|
5 |
-
"abstain": 27.0,
|
6 |
-
"entailment": 21.90920951194924,
|
7 |
-
"neutral": 49.65358081796437,
|
8 |
-
"contradiction": 28.437209670086382
|
9 |
-
},
|
10 |
-
"msmarco": {
|
11 |
-
"abstain": 2.0,
|
12 |
-
"entailment": 59.01711667017789,
|
13 |
-
"neutral": 19.84909648174954,
|
14 |
-
"contradiction": 21.13378684807256
|
15 |
-
},
|
16 |
-
"dolly": {
|
17 |
-
"abstain": 13.0,
|
18 |
-
"entailment": 76.98572340813719,
|
19 |
-
"neutral": 12.884738186462325,
|
20 |
-
"contradiction": 10.129538405400474
|
21 |
-
},
|
22 |
-
"avg": {
|
23 |
-
"abstain": 14.000000000000002,
|
24 |
-
"entailment": 54.57677389363435,
|
25 |
-
"neutral": 25.933701849399526,
|
26 |
-
"contradiction": 19.489524256966117
|
27 |
-
}
|
28 |
-
},
|
29 |
-
"GPT-3.5-Turbo": {
|
30 |
-
"nq": {
|
31 |
-
"abstain": 1.0,
|
32 |
-
"entailment": 58.8535769373559,
|
33 |
-
"neutral": 22.130219091003404,
|
34 |
-
"contradiction": 19.016203971640692
|
35 |
-
},
|
36 |
-
"msmarco": {
|
37 |
-
"abstain": 20.0,
|
38 |
-
"entailment": 77.3299637383689,
|
39 |
-
"neutral": 6.634321975916804,
|
40 |
-
"contradiction": 16.035714285714285
|
41 |
-
},
|
42 |
-
"dolly": {
|
43 |
-
"abstain": 0.0,
|
44 |
-
"entailment": 93.69698079698081,
|
45 |
-
"neutral": 2.682251082251082,
|
46 |
-
"contradiction": 3.6207681207681204
|
47 |
-
},
|
48 |
-
"avg": {
|
49 |
-
"abstain": 7.000000000000001,
|
50 |
-
"entailment": 76.64014084432196,
|
51 |
-
"neutral": 10.716353248415016,
|
52 |
-
"contradiction": 12.643505907263023
|
53 |
-
}
|
54 |
-
},
|
55 |
-
"Claude 2": {
|
56 |
-
"nq": {
|
57 |
-
"abstain": 21.0,
|
58 |
-
"entailment": 36.24974533202381,
|
59 |
-
"neutral": 60.93093966511689,
|
60 |
-
"contradiction": 2.819315002859307
|
61 |
-
},
|
62 |
-
"msmarco": {
|
63 |
-
"abstain": 6.0,
|
64 |
-
"entailment": 88.95130578641216,
|
65 |
-
"neutral": 6.450995812697939,
|
66 |
-
"contradiction": 4.5976984008898905
|
67 |
-
},
|
68 |
-
"dolly": {
|
69 |
-
"abstain": 8.0,
|
70 |
-
"entailment": 90.86864524364525,
|
71 |
-
"neutral": 6.670880448054362,
|
72 |
-
"contradiction": 2.4604743083003955
|
73 |
-
},
|
74 |
-
"avg": {
|
75 |
-
"abstain": 11.666666666666668,
|
76 |
-
"entailment": 73.90591693421882,
|
77 |
-
"neutral": 22.768523928901285,
|
78 |
-
"contradiction": 3.3255591368798907
|
79 |
-
}
|
80 |
-
},
|
81 |
-
"InstructGPT": {
|
82 |
-
"nq": {
|
83 |
-
"abstain": 5.0,
|
84 |
-
"entailment": 20.438596491228072,
|
85 |
-
"neutral": 25.30701754385965,
|
86 |
-
"contradiction": 54.254385964912274
|
87 |
-
},
|
88 |
-
"msmarco": {
|
89 |
-
"abstain": 13.0,
|
90 |
-
"entailment": 65.80729296246537,
|
91 |
-
"neutral": 13.403575989782887,
|
92 |
-
"contradiction": 20.78913104775174
|
93 |
-
},
|
94 |
-
"dolly": {
|
95 |
-
"abstain": 1.0,
|
96 |
-
"entailment": 81.58865825532492,
|
97 |
-
"neutral": 5.608465608465608,
|
98 |
-
"contradiction": 12.802876136209468
|
99 |
-
},
|
100 |
-
"avg": {
|
101 |
-
"abstain": 6.333333333333334,
|
102 |
-
"entailment": 56.029104347609696,
|
103 |
-
"neutral": 14.68155114952268,
|
104 |
-
"contradiction": 29.289344502867635
|
105 |
-
}
|
106 |
-
},
|
107 |
-
"Falcon 40B Instruct": {
|
108 |
-
"nq": {
|
109 |
-
"abstain": 27.0,
|
110 |
-
"entailment": 37.96803652968036,
|
111 |
-
"neutral": 17.123287671232877,
|
112 |
-
"contradiction": 44.90867579908676
|
113 |
-
},
|
114 |
-
"msmarco": {
|
115 |
-
"abstain": 17.0,
|
116 |
-
"entailment": 61.28370625358577,
|
117 |
-
"neutral": 17.053930005737232,
|
118 |
-
"contradiction": 21.662363740676994
|
119 |
-
},
|
120 |
-
"dolly": {
|
121 |
-
"abstain": 3.0,
|
122 |
-
"entailment": 78.37657474255414,
|
123 |
-
"neutral": 13.978295473140834,
|
124 |
-
"contradiction": 7.645129784305042
|
125 |
-
},
|
126 |
-
"avg": {
|
127 |
-
"abstain": 15.66666666666667,
|
128 |
-
"entailment": 61.10965231518591,
|
129 |
-
"neutral": 15.894746448106131,
|
130 |
-
"contradiction": 22.99560123670796
|
131 |
-
}
|
132 |
-
},
|
133 |
-
"GPT-4": {
|
134 |
-
"nq": {
|
135 |
-
"abstain": 0.0,
|
136 |
-
"entailment": 71.44246031746032,
|
137 |
-
"neutral": 15.671428571428569,
|
138 |
-
"contradiction": 12.88611111111111
|
139 |
-
},
|
140 |
-
"msmarco": {
|
141 |
-
"abstain": 13.0,
|
142 |
-
"entailment": 91.79110724749671,
|
143 |
-
"neutral": 6.772111143307898,
|
144 |
-
"contradiction": 1.4367816091954022
|
145 |
-
},
|
146 |
-
"dolly": {
|
147 |
-
"abstain": 8.0,
|
148 |
-
"entailment": 97.77950310559007,
|
149 |
-
"neutral": 1.224120082815735,
|
150 |
-
"contradiction": 0.9963768115942028
|
151 |
-
},
|
152 |
-
"avg": {
|
153 |
-
"abstain": 7.000000000000001,
|
154 |
-
"entailment": 86.47235357703416,
|
155 |
-
"neutral": 8.132385570715742,
|
156 |
-
"contradiction": 5.395260852250098
|
157 |
-
}
|
158 |
-
},
|
159 |
-
"LLaMA 2 70B Chat": {
|
160 |
-
"nq": {
|
161 |
-
"abstain": 6.0,
|
162 |
-
"entailment": 23.619620247386862,
|
163 |
-
"neutral": 62.5351563421684,
|
164 |
-
"contradiction": 13.84522341044474
|
165 |
-
},
|
166 |
-
"msmarco": {
|
167 |
-
"abstain": 4.0,
|
168 |
-
"entailment": 84.80608457890267,
|
169 |
-
"neutral": 11.166780978062148,
|
170 |
-
"contradiction": 4.0271344430351785
|
171 |
-
},
|
172 |
-
"dolly": {
|
173 |
-
"abstain": 0.0,
|
174 |
-
"entailment": 92.75111832611834,
|
175 |
-
"neutral": 4.0687229437229435,
|
176 |
-
"contradiction": 3.1801587301587304
|
177 |
-
},
|
178 |
-
"avg": {
|
179 |
-
"abstain": 3.3333333333333335,
|
180 |
-
"entailment": 67.71289743255467,
|
181 |
-
"neutral": 25.369613670448583,
|
182 |
-
"contradiction": 6.917488896996744
|
183 |
-
}
|
184 |
-
}
|
185 |
-
}
|
186 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|