ramiroluo commited on
Commit
2e24be9
1 Parent(s): 6c5153f

Delete human_leaderboard_scores.json

Browse files
Files changed (1) hide show
  1. human_leaderboard_scores.json +0 -186
human_leaderboard_scores.json DELETED
@@ -1,186 +0,0 @@
1
- {
2
- "claude2###human": {
3
- "Alpaca 7B": {
4
- "nq": {
5
- "abstain": 27.0,
6
- "entailment": 21.90920951194924,
7
- "neutral": 49.65358081796437,
8
- "contradiction": 28.437209670086382
9
- },
10
- "msmarco": {
11
- "abstain": 2.0,
12
- "entailment": 59.01711667017789,
13
- "neutral": 19.84909648174954,
14
- "contradiction": 21.13378684807256
15
- },
16
- "dolly": {
17
- "abstain": 13.0,
18
- "entailment": 76.98572340813719,
19
- "neutral": 12.884738186462325,
20
- "contradiction": 10.129538405400474
21
- },
22
- "avg": {
23
- "abstain": 14.000000000000002,
24
- "entailment": 54.57677389363435,
25
- "neutral": 25.933701849399526,
26
- "contradiction": 19.489524256966117
27
- }
28
- },
29
- "GPT-3.5-Turbo": {
30
- "nq": {
31
- "abstain": 1.0,
32
- "entailment": 58.8535769373559,
33
- "neutral": 22.130219091003404,
34
- "contradiction": 19.016203971640692
35
- },
36
- "msmarco": {
37
- "abstain": 20.0,
38
- "entailment": 77.3299637383689,
39
- "neutral": 6.634321975916804,
40
- "contradiction": 16.035714285714285
41
- },
42
- "dolly": {
43
- "abstain": 0.0,
44
- "entailment": 93.69698079698081,
45
- "neutral": 2.682251082251082,
46
- "contradiction": 3.6207681207681204
47
- },
48
- "avg": {
49
- "abstain": 7.000000000000001,
50
- "entailment": 76.64014084432196,
51
- "neutral": 10.716353248415016,
52
- "contradiction": 12.643505907263023
53
- }
54
- },
55
- "Claude 2": {
56
- "nq": {
57
- "abstain": 21.0,
58
- "entailment": 36.24974533202381,
59
- "neutral": 60.93093966511689,
60
- "contradiction": 2.819315002859307
61
- },
62
- "msmarco": {
63
- "abstain": 6.0,
64
- "entailment": 88.95130578641216,
65
- "neutral": 6.450995812697939,
66
- "contradiction": 4.5976984008898905
67
- },
68
- "dolly": {
69
- "abstain": 8.0,
70
- "entailment": 90.86864524364525,
71
- "neutral": 6.670880448054362,
72
- "contradiction": 2.4604743083003955
73
- },
74
- "avg": {
75
- "abstain": 11.666666666666668,
76
- "entailment": 73.90591693421882,
77
- "neutral": 22.768523928901285,
78
- "contradiction": 3.3255591368798907
79
- }
80
- },
81
- "InstructGPT": {
82
- "nq": {
83
- "abstain": 5.0,
84
- "entailment": 20.438596491228072,
85
- "neutral": 25.30701754385965,
86
- "contradiction": 54.254385964912274
87
- },
88
- "msmarco": {
89
- "abstain": 13.0,
90
- "entailment": 65.80729296246537,
91
- "neutral": 13.403575989782887,
92
- "contradiction": 20.78913104775174
93
- },
94
- "dolly": {
95
- "abstain": 1.0,
96
- "entailment": 81.58865825532492,
97
- "neutral": 5.608465608465608,
98
- "contradiction": 12.802876136209468
99
- },
100
- "avg": {
101
- "abstain": 6.333333333333334,
102
- "entailment": 56.029104347609696,
103
- "neutral": 14.68155114952268,
104
- "contradiction": 29.289344502867635
105
- }
106
- },
107
- "Falcon 40B Instruct": {
108
- "nq": {
109
- "abstain": 27.0,
110
- "entailment": 37.96803652968036,
111
- "neutral": 17.123287671232877,
112
- "contradiction": 44.90867579908676
113
- },
114
- "msmarco": {
115
- "abstain": 17.0,
116
- "entailment": 61.28370625358577,
117
- "neutral": 17.053930005737232,
118
- "contradiction": 21.662363740676994
119
- },
120
- "dolly": {
121
- "abstain": 3.0,
122
- "entailment": 78.37657474255414,
123
- "neutral": 13.978295473140834,
124
- "contradiction": 7.645129784305042
125
- },
126
- "avg": {
127
- "abstain": 15.66666666666667,
128
- "entailment": 61.10965231518591,
129
- "neutral": 15.894746448106131,
130
- "contradiction": 22.99560123670796
131
- }
132
- },
133
- "GPT-4": {
134
- "nq": {
135
- "abstain": 0.0,
136
- "entailment": 71.44246031746032,
137
- "neutral": 15.671428571428569,
138
- "contradiction": 12.88611111111111
139
- },
140
- "msmarco": {
141
- "abstain": 13.0,
142
- "entailment": 91.79110724749671,
143
- "neutral": 6.772111143307898,
144
- "contradiction": 1.4367816091954022
145
- },
146
- "dolly": {
147
- "abstain": 8.0,
148
- "entailment": 97.77950310559007,
149
- "neutral": 1.224120082815735,
150
- "contradiction": 0.9963768115942028
151
- },
152
- "avg": {
153
- "abstain": 7.000000000000001,
154
- "entailment": 86.47235357703416,
155
- "neutral": 8.132385570715742,
156
- "contradiction": 5.395260852250098
157
- }
158
- },
159
- "LLaMA 2 70B Chat": {
160
- "nq": {
161
- "abstain": 6.0,
162
- "entailment": 23.619620247386862,
163
- "neutral": 62.5351563421684,
164
- "contradiction": 13.84522341044474
165
- },
166
- "msmarco": {
167
- "abstain": 4.0,
168
- "entailment": 84.80608457890267,
169
- "neutral": 11.166780978062148,
170
- "contradiction": 4.0271344430351785
171
- },
172
- "dolly": {
173
- "abstain": 0.0,
174
- "entailment": 92.75111832611834,
175
- "neutral": 4.0687229437229435,
176
- "contradiction": 3.1801587301587304
177
- },
178
- "avg": {
179
- "abstain": 3.3333333333333335,
180
- "entailment": 67.71289743255467,
181
- "neutral": 25.369613670448583,
182
- "contradiction": 6.917488896996744
183
- }
184
- }
185
- }
186
- }