Spaces:
Sleeping
Sleeping
michal
commited on
Commit
β’
3930665
1
Parent(s):
c8fccfb
Upload
Browse files- app.py +0 -5
- leaderboards/ldek_en_accs.json +274 -0
- leaderboards/lek_en_accs.json +274 -0
- src/structures/ldek_structure.py +1 -1
app.py
CHANGED
@@ -37,11 +37,6 @@ main = gr.Blocks(css=custom_css)
|
|
37 |
with main:
|
38 |
with gr.Row():
|
39 |
with gr.Column():
|
40 |
-
# image = gr.Image("src/images/logo.png",
|
41 |
-
# show_download_button=False,
|
42 |
-
# show_share_button=False,
|
43 |
-
# show_fullscreen_button=False,
|
44 |
-
# container=False)
|
45 |
gr.HTML(polygon_svg)
|
46 |
with gr.Column():
|
47 |
gr.HTML(HEADER_TITLE)
|
|
|
37 |
with main:
|
38 |
with gr.Row():
|
39 |
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
40 |
gr.HTML(polygon_svg)
|
41 |
with gr.Column():
|
42 |
gr.HTML(HEADER_TITLE)
|
leaderboards/ldek_en_accs.json
ADDED
@@ -0,0 +1,274 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"model_name": "model-meta-llama-Llama-3.1-8B-Instruct",
|
4 |
+
"2022 Spring": 49.494949494949495,
|
5 |
+
"2014 Spring": 42.857142857142854,
|
6 |
+
"2022 Fall": 45.91836734693878,
|
7 |
+
"2014 Fall": 46.1139896373057,
|
8 |
+
"2024 Spring": 47.20812182741117,
|
9 |
+
"2015 Spring": 54.3010752688172,
|
10 |
+
"2023 Spring": 46.96969696969697,
|
11 |
+
"2021 Fall": 48.98989898989899,
|
12 |
+
"2015 Fall": 47.89473684210526,
|
13 |
+
"2023 Fall": 49.743589743589745,
|
14 |
+
"2021 Spring": 46.93877551020408,
|
15 |
+
"2013 Spring": 47.42268041237113,
|
16 |
+
"2013 Fall": 46.596858638743456,
|
17 |
+
"overall_accuracy": 47.70569620253164
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"model_name": "model-BioMistral-BioMistral-7B",
|
21 |
+
"2022 Spring": 22.727272727272727,
|
22 |
+
"2014 Spring": 20.408163265306122,
|
23 |
+
"2022 Fall": 32.6530612244898,
|
24 |
+
"2014 Fall": 22.279792746113987,
|
25 |
+
"2024 Spring": 29.949238578680205,
|
26 |
+
"2015 Spring": 25.806451612903224,
|
27 |
+
"2023 Spring": 27.77777777777778,
|
28 |
+
"2021 Fall": 29.292929292929294,
|
29 |
+
"2015 Fall": 29.47368421052631,
|
30 |
+
"2023 Fall": 27.692307692307693,
|
31 |
+
"2021 Spring": 22.448979591836736,
|
32 |
+
"2013 Spring": 28.865979381443296,
|
33 |
+
"2013 Fall": 28.79581151832461,
|
34 |
+
"overall_accuracy": 26.78006329113924
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"model_name": "model-johnsnowlabs-JSL-MedLlama-3-8B-v2.0",
|
38 |
+
"2022 Spring": 38.88888888888889,
|
39 |
+
"2014 Spring": 34.69387755102041,
|
40 |
+
"2022 Fall": 40.30612244897959,
|
41 |
+
"2014 Fall": 37.82383419689119,
|
42 |
+
"2024 Spring": 41.62436548223351,
|
43 |
+
"2015 Spring": 40.32258064516129,
|
44 |
+
"2023 Spring": 36.868686868686865,
|
45 |
+
"2021 Fall": 44.94949494949495,
|
46 |
+
"2015 Fall": 35.26315789473684,
|
47 |
+
"2023 Fall": 37.43589743589744,
|
48 |
+
"2021 Spring": 40.816326530612244,
|
49 |
+
"2013 Spring": 41.23711340206185,
|
50 |
+
"2013 Fall": 42.40837696335078,
|
51 |
+
"overall_accuracy": 39.438291139240505
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"model_name": "model-Qwen-Qwen2.5-7B-Instruct",
|
55 |
+
"2022 Spring": 46.464646464646464,
|
56 |
+
"2014 Spring": 46.42857142857143,
|
57 |
+
"2022 Fall": 46.93877551020408,
|
58 |
+
"2014 Fall": 49.740932642487046,
|
59 |
+
"2024 Spring": 53.299492385786806,
|
60 |
+
"2015 Spring": 44.086021505376344,
|
61 |
+
"2023 Spring": 48.98989898989899,
|
62 |
+
"2021 Fall": 53.03030303030303,
|
63 |
+
"2015 Fall": 44.21052631578947,
|
64 |
+
"2023 Fall": 47.179487179487175,
|
65 |
+
"2021 Spring": 51.53061224489795,
|
66 |
+
"2013 Spring": 52.0618556701031,
|
67 |
+
"2013 Fall": 44.50261780104712,
|
68 |
+
"overall_accuracy": 48.37816455696203
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"model_name": "model-speakleash-Bielik-11B-v2.2-Instruct",
|
72 |
+
"2022 Spring": 39.8989898989899,
|
73 |
+
"2014 Spring": 36.734693877551024,
|
74 |
+
"2022 Fall": 38.265306122448976,
|
75 |
+
"2014 Fall": 44.04145077720207,
|
76 |
+
"2024 Spring": 50.25380710659898,
|
77 |
+
"2015 Spring": 39.247311827956985,
|
78 |
+
"2023 Spring": 38.88888888888889,
|
79 |
+
"2021 Fall": 42.92929292929293,
|
80 |
+
"2015 Fall": 42.10526315789473,
|
81 |
+
"2023 Fall": 43.58974358974359,
|
82 |
+
"2021 Spring": 48.97959183673469,
|
83 |
+
"2013 Spring": 49.48453608247423,
|
84 |
+
"2013 Fall": 42.93193717277487,
|
85 |
+
"overall_accuracy": 42.879746835443036
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"model_name": "model-gpt-4o-2024-08-06",
|
89 |
+
"2022 Spring": 74.74747474747475,
|
90 |
+
"2014 Spring": 72.44897959183673,
|
91 |
+
"2022 Fall": 71.42857142857143,
|
92 |
+
"2014 Fall": 74.09326424870466,
|
93 |
+
"2024 Spring": 73.09644670050761,
|
94 |
+
"2015 Spring": 66.66666666666666,
|
95 |
+
"2023 Spring": 71.21212121212122,
|
96 |
+
"2021 Fall": 74.74747474747475,
|
97 |
+
"2015 Fall": 71.05263157894737,
|
98 |
+
"2023 Fall": 70.76923076923077,
|
99 |
+
"2021 Spring": 77.55102040816327,
|
100 |
+
"2013 Spring": 72.68041237113401,
|
101 |
+
"2013 Fall": 71.72774869109948,
|
102 |
+
"overall_accuracy": 72.50791139240506
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"model_name": "model-OpenMeditron-Meditron3-8B",
|
106 |
+
"2022 Spring": 42.92929292929293,
|
107 |
+
"2014 Spring": 38.775510204081634,
|
108 |
+
"2022 Fall": 44.38775510204081,
|
109 |
+
"2014 Fall": 44.04145077720207,
|
110 |
+
"2024 Spring": 40.10152284263959,
|
111 |
+
"2015 Spring": 45.16129032258064,
|
112 |
+
"2023 Spring": 41.91919191919192,
|
113 |
+
"2021 Fall": 44.94949494949495,
|
114 |
+
"2015 Fall": 42.10526315789473,
|
115 |
+
"2023 Fall": 41.53846153846154,
|
116 |
+
"2021 Spring": 43.36734693877551,
|
117 |
+
"2013 Spring": 46.90721649484536,
|
118 |
+
"2013 Fall": 47.64397905759162,
|
119 |
+
"overall_accuracy": 43.35443037974683
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"model_name": "model-gpt-4o-mini-2024-07-18",
|
123 |
+
"2022 Spring": 54.54545454545454,
|
124 |
+
"2014 Spring": 55.61224489795919,
|
125 |
+
"2022 Fall": 56.12244897959183,
|
126 |
+
"2014 Fall": 56.994818652849744,
|
127 |
+
"2024 Spring": 56.34517766497462,
|
128 |
+
"2015 Spring": 56.98924731182796,
|
129 |
+
"2023 Spring": 56.060606060606055,
|
130 |
+
"2021 Fall": 55.55555555555556,
|
131 |
+
"2015 Fall": 54.21052631578947,
|
132 |
+
"2023 Fall": 58.97435897435898,
|
133 |
+
"2021 Spring": 62.755102040816325,
|
134 |
+
"2013 Spring": 58.24742268041238,
|
135 |
+
"2013 Fall": 57.06806282722513,
|
136 |
+
"overall_accuracy": 56.88291139240506
|
137 |
+
},
|
138 |
+
{
|
139 |
+
"model_name": "model-OpenMeditron-Meditron3-70B",
|
140 |
+
"2022 Spring": 43.93939393939394,
|
141 |
+
"2014 Spring": 40.30612244897959,
|
142 |
+
"2022 Fall": 47.95918367346938,
|
143 |
+
"2014 Fall": 45.07772020725388,
|
144 |
+
"2024 Spring": 49.23857868020304,
|
145 |
+
"2015 Spring": 45.16129032258064,
|
146 |
+
"2023 Spring": 43.43434343434344,
|
147 |
+
"2021 Fall": 48.98989898989899,
|
148 |
+
"2015 Fall": 48.94736842105264,
|
149 |
+
"2023 Fall": 45.64102564102564,
|
150 |
+
"2021 Spring": 46.42857142857143,
|
151 |
+
"2013 Spring": 46.391752577319586,
|
152 |
+
"2013 Fall": 46.07329842931937,
|
153 |
+
"overall_accuracy": 45.96518987341772
|
154 |
+
},
|
155 |
+
{
|
156 |
+
"model_name": "model-aaditya-Llama3-OpenBioLLM-70B",
|
157 |
+
"2022 Spring": 44.44444444444444,
|
158 |
+
"2014 Spring": 45.40816326530612,
|
159 |
+
"2022 Fall": 46.93877551020408,
|
160 |
+
"2014 Fall": 47.15025906735752,
|
161 |
+
"2024 Spring": 48.223350253807105,
|
162 |
+
"2015 Spring": 45.16129032258064,
|
163 |
+
"2023 Spring": 45.95959595959596,
|
164 |
+
"2021 Fall": 48.484848484848484,
|
165 |
+
"2015 Fall": 46.31578947368421,
|
166 |
+
"2023 Fall": 41.02564102564102,
|
167 |
+
"2021 Spring": 45.91836734693878,
|
168 |
+
"2013 Spring": 46.391752577319586,
|
169 |
+
"2013 Fall": 45.0261780104712,
|
170 |
+
"overall_accuracy": 45.88607594936709
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"model_name": "model-meta-llama-Llama-3.2-3B-Instruct",
|
174 |
+
"2022 Spring": 34.34343434343434,
|
175 |
+
"2014 Spring": 32.6530612244898,
|
176 |
+
"2022 Fall": 39.285714285714285,
|
177 |
+
"2014 Fall": 35.751295336787564,
|
178 |
+
"2024 Spring": 39.59390862944163,
|
179 |
+
"2015 Spring": 38.70967741935484,
|
180 |
+
"2023 Spring": 39.39393939393939,
|
181 |
+
"2021 Fall": 37.878787878787875,
|
182 |
+
"2015 Fall": 35.78947368421053,
|
183 |
+
"2023 Fall": 37.43589743589744,
|
184 |
+
"2021 Spring": 37.755102040816325,
|
185 |
+
"2013 Spring": 33.50515463917525,
|
186 |
+
"2013 Fall": 37.17277486910995,
|
187 |
+
"overall_accuracy": 36.86708860759494
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"model_name": "model-mistralai-Mistral-Large-Instruct-2407",
|
191 |
+
"2022 Spring": 61.61616161616161,
|
192 |
+
"2014 Spring": 61.73469387755102,
|
193 |
+
"2022 Fall": 60.204081632653065,
|
194 |
+
"2014 Fall": 63.212435233160626,
|
195 |
+
"2024 Spring": 64.9746192893401,
|
196 |
+
"2015 Spring": 62.365591397849464,
|
197 |
+
"2023 Spring": 60.60606060606061,
|
198 |
+
"2021 Fall": 62.62626262626263,
|
199 |
+
"2015 Fall": 61.578947368421055,
|
200 |
+
"2023 Fall": 59.48717948717949,
|
201 |
+
"2021 Spring": 64.28571428571429,
|
202 |
+
"2013 Spring": 63.91752577319587,
|
203 |
+
"2013 Fall": 61.25654450261781,
|
204 |
+
"overall_accuracy": 62.14398734177215
|
205 |
+
},
|
206 |
+
{
|
207 |
+
"model_name": "model-meta-llama-Meta-Llama-3.1-70B-Instruct",
|
208 |
+
"2022 Spring": 65.15151515151516,
|
209 |
+
"2014 Spring": 62.755102040816325,
|
210 |
+
"2022 Fall": 62.244897959183675,
|
211 |
+
"2014 Fall": 64.24870466321244,
|
212 |
+
"2024 Spring": 59.89847715736041,
|
213 |
+
"2015 Spring": 68.27956989247312,
|
214 |
+
"2023 Spring": 64.64646464646465,
|
215 |
+
"2021 Fall": 62.121212121212125,
|
216 |
+
"2015 Fall": 63.1578947368421,
|
217 |
+
"2023 Fall": 56.92307692307692,
|
218 |
+
"2021 Spring": 67.85714285714286,
|
219 |
+
"2013 Spring": 61.34020618556701,
|
220 |
+
"2013 Fall": 62.82722513089005,
|
221 |
+
"overall_accuracy": 63.17246835443038
|
222 |
+
},
|
223 |
+
{
|
224 |
+
"model_name": "model-ProbeMedicalYonseiMAILab-medllama3-v20",
|
225 |
+
"2022 Spring": 36.868686868686865,
|
226 |
+
"2014 Spring": 34.69387755102041,
|
227 |
+
"2022 Fall": 37.755102040816325,
|
228 |
+
"2014 Fall": 35.751295336787564,
|
229 |
+
"2024 Spring": 43.14720812182741,
|
230 |
+
"2015 Spring": 43.54838709677419,
|
231 |
+
"2023 Spring": 39.8989898989899,
|
232 |
+
"2021 Fall": 35.35353535353536,
|
233 |
+
"2015 Fall": 37.89473684210527,
|
234 |
+
"2023 Fall": 34.87179487179487,
|
235 |
+
"2021 Spring": 38.775510204081634,
|
236 |
+
"2013 Spring": 41.75257731958763,
|
237 |
+
"2013 Fall": 39.79057591623037,
|
238 |
+
"overall_accuracy": 38.449367088607595
|
239 |
+
},
|
240 |
+
{
|
241 |
+
"model_name": "model-mistralai-Mistral-Small-Instruct-2409",
|
242 |
+
"2022 Spring": 37.878787878787875,
|
243 |
+
"2014 Spring": 45.91836734693878,
|
244 |
+
"2022 Fall": 45.91836734693878,
|
245 |
+
"2014 Fall": 43.005181347150256,
|
246 |
+
"2024 Spring": 45.68527918781726,
|
247 |
+
"2015 Spring": 44.623655913978496,
|
248 |
+
"2023 Spring": 40.909090909090914,
|
249 |
+
"2021 Fall": 40.4040404040404,
|
250 |
+
"2015 Fall": 44.21052631578947,
|
251 |
+
"2023 Fall": 42.05128205128205,
|
252 |
+
"2021 Spring": 46.42857142857143,
|
253 |
+
"2013 Spring": 44.329896907216494,
|
254 |
+
"2013 Fall": 43.97905759162304,
|
255 |
+
"overall_accuracy": 43.473101265822784
|
256 |
+
},
|
257 |
+
{
|
258 |
+
"model_name": "model-Qwen-Qwen2.5-72B-Instruct",
|
259 |
+
"2022 Spring": 65.15151515151516,
|
260 |
+
"2014 Spring": 61.73469387755102,
|
261 |
+
"2022 Fall": 64.79591836734694,
|
262 |
+
"2014 Fall": 66.32124352331607,
|
263 |
+
"2024 Spring": 63.45177664974619,
|
264 |
+
"2015 Spring": 62.365591397849464,
|
265 |
+
"2023 Spring": 59.09090909090909,
|
266 |
+
"2021 Fall": 65.15151515151516,
|
267 |
+
"2015 Fall": 63.1578947368421,
|
268 |
+
"2023 Fall": 58.46153846153847,
|
269 |
+
"2021 Spring": 69.38775510204081,
|
270 |
+
"2013 Spring": 64.94845360824742,
|
271 |
+
"2013 Fall": 59.16230366492147,
|
272 |
+
"overall_accuracy": 63.33069620253164
|
273 |
+
}
|
274 |
+
]
|
leaderboards/lek_en_accs.json
ADDED
@@ -0,0 +1,274 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"model_name": "model-meta-llama-Llama-3.1-8B-Instruct",
|
4 |
+
"2022 Spring": 63.775510204081634,
|
5 |
+
"2014 Spring": 60.51282051282051,
|
6 |
+
"2022 Fall": 63.775510204081634,
|
7 |
+
"2014 Fall": 64.58333333333334,
|
8 |
+
"2024 Spring": 74.61928934010153,
|
9 |
+
"2015 Spring": 61.111111111111114,
|
10 |
+
"2023 Spring": 63.4020618556701,
|
11 |
+
"2021 Fall": 61.855670103092784,
|
12 |
+
"2015 Fall": 62.63157894736842,
|
13 |
+
"2023 Fall": 70.61855670103093,
|
14 |
+
"2021 Spring": 70.0,
|
15 |
+
"2013 Spring": 61.979166666666664,
|
16 |
+
"2013 Fall": 61.78010471204188,
|
17 |
+
"overall_accuracy": 64.68960063266113
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"model_name": "model-BioMistral-BioMistral-7B",
|
21 |
+
"2022 Spring": 32.142857142857146,
|
22 |
+
"2014 Spring": 32.30769230769231,
|
23 |
+
"2022 Fall": 32.142857142857146,
|
24 |
+
"2014 Fall": 27.604166666666668,
|
25 |
+
"2024 Spring": 39.59390862944163,
|
26 |
+
"2015 Spring": 29.292929292929294,
|
27 |
+
"2023 Spring": 32.98969072164948,
|
28 |
+
"2021 Fall": 32.47422680412371,
|
29 |
+
"2015 Fall": 32.631578947368425,
|
30 |
+
"2023 Fall": 31.958762886597935,
|
31 |
+
"2021 Spring": 39.0,
|
32 |
+
"2013 Spring": 36.97916666666667,
|
33 |
+
"2013 Fall": 26.17801047120419,
|
34 |
+
"overall_accuracy": 32.74021352313167
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"model_name": "model-johnsnowlabs-JSL-MedLlama-3-8B-v2.0",
|
38 |
+
"2022 Spring": 54.59183673469388,
|
39 |
+
"2014 Spring": 49.23076923076923,
|
40 |
+
"2022 Fall": 51.02040816326531,
|
41 |
+
"2014 Fall": 54.166666666666664,
|
42 |
+
"2024 Spring": 62.944162436548226,
|
43 |
+
"2015 Spring": 51.515151515151516,
|
44 |
+
"2023 Spring": 53.09278350515464,
|
45 |
+
"2021 Fall": 56.18556701030928,
|
46 |
+
"2015 Fall": 53.1578947368421,
|
47 |
+
"2023 Fall": 56.18556701030928,
|
48 |
+
"2021 Spring": 55.00000000000001,
|
49 |
+
"2013 Spring": 48.95833333333333,
|
50 |
+
"2013 Fall": 57.06806282722513,
|
51 |
+
"overall_accuracy": 54.092526690391466
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"model_name": "model-Qwen-Qwen2.5-7B-Instruct",
|
55 |
+
"2022 Spring": 64.79591836734694,
|
56 |
+
"2014 Spring": 62.05128205128205,
|
57 |
+
"2022 Fall": 66.3265306122449,
|
58 |
+
"2014 Fall": 72.91666666666666,
|
59 |
+
"2024 Spring": 79.18781725888326,
|
60 |
+
"2015 Spring": 68.18181818181817,
|
61 |
+
"2023 Spring": 68.55670103092784,
|
62 |
+
"2021 Fall": 63.91752577319587,
|
63 |
+
"2015 Fall": 68.94736842105263,
|
64 |
+
"2023 Fall": 71.64948453608247,
|
65 |
+
"2021 Spring": 65.5,
|
66 |
+
"2013 Spring": 64.0625,
|
67 |
+
"2013 Fall": 64.3979057591623,
|
68 |
+
"overall_accuracy": 67.73428232502965
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"model_name": "model-speakleash-Bielik-11B-v2.2-Instruct",
|
72 |
+
"2022 Spring": 50.51020408163265,
|
73 |
+
"2014 Spring": 53.84615384615385,
|
74 |
+
"2022 Fall": 59.693877551020414,
|
75 |
+
"2014 Fall": 58.333333333333336,
|
76 |
+
"2024 Spring": 62.43654822335025,
|
77 |
+
"2015 Spring": 58.58585858585859,
|
78 |
+
"2023 Spring": 56.70103092783505,
|
79 |
+
"2021 Fall": 55.154639175257735,
|
80 |
+
"2015 Fall": 56.84210526315789,
|
81 |
+
"2023 Fall": 57.73195876288659,
|
82 |
+
"2021 Spring": 61.5,
|
83 |
+
"2013 Spring": 55.729166666666664,
|
84 |
+
"2013 Fall": 53.403141361256544,
|
85 |
+
"overall_accuracy": 56.97904310003954
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"model_name": "model-gpt-4o-2024-08-06",
|
89 |
+
"2022 Spring": 87.75510204081633,
|
90 |
+
"2014 Spring": 88.71794871794872,
|
91 |
+
"2022 Fall": 87.75510204081633,
|
92 |
+
"2014 Fall": 88.02083333333334,
|
93 |
+
"2024 Spring": 94.41624365482234,
|
94 |
+
"2015 Spring": 89.39393939393939,
|
95 |
+
"2023 Spring": 91.23711340206185,
|
96 |
+
"2021 Fall": 85.56701030927834,
|
97 |
+
"2015 Fall": 87.36842105263159,
|
98 |
+
"2023 Fall": 90.72164948453609,
|
99 |
+
"2021 Spring": 91.5,
|
100 |
+
"2013 Spring": 83.85416666666666,
|
101 |
+
"2013 Fall": 86.38743455497382,
|
102 |
+
"overall_accuracy": 88.69118228548834
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"model_name": "model-OpenMeditron-Meditron3-8B",
|
106 |
+
"2022 Spring": 56.63265306122449,
|
107 |
+
"2014 Spring": 57.43589743589743,
|
108 |
+
"2022 Fall": 59.183673469387756,
|
109 |
+
"2014 Fall": 64.58333333333334,
|
110 |
+
"2024 Spring": 65.98984771573603,
|
111 |
+
"2015 Spring": 58.58585858585859,
|
112 |
+
"2023 Spring": 64.43298969072166,
|
113 |
+
"2021 Fall": 59.27835051546392,
|
114 |
+
"2015 Fall": 59.473684210526315,
|
115 |
+
"2023 Fall": 61.34020618556701,
|
116 |
+
"2021 Spring": 61.5,
|
117 |
+
"2013 Spring": 57.291666666666664,
|
118 |
+
"2013 Fall": 58.63874345549738,
|
119 |
+
"overall_accuracy": 60.340055357848954
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"model_name": "model-mistralai-Mistral-Large-Instruct-2407",
|
123 |
+
"2022 Spring": 84.18367346938776,
|
124 |
+
"2014 Spring": 80.0,
|
125 |
+
"2022 Fall": 79.08163265306123,
|
126 |
+
"2014 Fall": 82.29166666666666,
|
127 |
+
"2024 Spring": 91.37055837563452,
|
128 |
+
"2015 Spring": 80.8080808080808,
|
129 |
+
"2023 Spring": 82.4742268041237,
|
130 |
+
"2021 Fall": 79.38144329896907,
|
131 |
+
"2015 Fall": 78.42105263157895,
|
132 |
+
"2023 Fall": 89.69072164948454,
|
133 |
+
"2021 Spring": 88.5,
|
134 |
+
"2013 Spring": 77.08333333333334,
|
135 |
+
"2013 Fall": 77.4869109947644,
|
136 |
+
"overall_accuracy": 82.40411229735074
|
137 |
+
},
|
138 |
+
{
|
139 |
+
"model_name": "model-gpt-4o-mini-2024-07-18",
|
140 |
+
"2022 Spring": 70.40816326530613,
|
141 |
+
"2014 Spring": 75.8974358974359,
|
142 |
+
"2022 Fall": 75.0,
|
143 |
+
"2014 Fall": 78.64583333333334,
|
144 |
+
"2024 Spring": 85.27918781725889,
|
145 |
+
"2015 Spring": 72.72727272727273,
|
146 |
+
"2023 Spring": 76.80412371134021,
|
147 |
+
"2021 Fall": 72.68041237113401,
|
148 |
+
"2015 Fall": 72.63157894736842,
|
149 |
+
"2023 Fall": 84.5360824742268,
|
150 |
+
"2021 Spring": 77.0,
|
151 |
+
"2013 Spring": 69.79166666666666,
|
152 |
+
"2013 Fall": 75.39267015706807,
|
153 |
+
"overall_accuracy": 75.91933570581257
|
154 |
+
},
|
155 |
+
{
|
156 |
+
"model_name": "model-OpenMeditron-Meditron3-70B",
|
157 |
+
"2022 Spring": 67.85714285714286,
|
158 |
+
"2014 Spring": 67.17948717948717,
|
159 |
+
"2022 Fall": 60.204081632653065,
|
160 |
+
"2014 Fall": 65.10416666666666,
|
161 |
+
"2024 Spring": 81.21827411167513,
|
162 |
+
"2015 Spring": 64.14141414141415,
|
163 |
+
"2023 Spring": 70.61855670103093,
|
164 |
+
"2021 Fall": 60.30927835051546,
|
165 |
+
"2015 Fall": 62.63157894736842,
|
166 |
+
"2023 Fall": 72.68041237113401,
|
167 |
+
"2021 Spring": 67.5,
|
168 |
+
"2013 Spring": 65.10416666666666,
|
169 |
+
"2013 Fall": 62.82722513089005,
|
170 |
+
"overall_accuracy": 66.74574930802689
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"model_name": "model-aaditya-Llama3-OpenBioLLM-70B",
|
174 |
+
"2022 Spring": 62.244897959183675,
|
175 |
+
"2014 Spring": 63.07692307692307,
|
176 |
+
"2022 Fall": 59.183673469387756,
|
177 |
+
"2014 Fall": 68.22916666666666,
|
178 |
+
"2024 Spring": 72.58883248730965,
|
179 |
+
"2015 Spring": 67.67676767676768,
|
180 |
+
"2023 Spring": 68.55670103092784,
|
181 |
+
"2021 Fall": 65.97938144329896,
|
182 |
+
"2015 Fall": 59.473684210526315,
|
183 |
+
"2023 Fall": 68.55670103092784,
|
184 |
+
"2021 Spring": 72.5,
|
185 |
+
"2013 Spring": 64.58333333333334,
|
186 |
+
"2013 Fall": 63.87434554973822,
|
187 |
+
"overall_accuracy": 65.91538157374455
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"model_name": "model-meta-llama-Llama-3.2-3B-Instruct",
|
191 |
+
"2022 Spring": 51.02040816326531,
|
192 |
+
"2014 Spring": 46.666666666666664,
|
193 |
+
"2022 Fall": 51.02040816326531,
|
194 |
+
"2014 Fall": 53.645833333333336,
|
195 |
+
"2024 Spring": 56.34517766497462,
|
196 |
+
"2015 Spring": 48.98989898989899,
|
197 |
+
"2023 Spring": 52.57731958762887,
|
198 |
+
"2021 Fall": 55.154639175257735,
|
199 |
+
"2015 Fall": 52.63157894736842,
|
200 |
+
"2023 Fall": 55.670103092783506,
|
201 |
+
"2021 Spring": 56.99999999999999,
|
202 |
+
"2013 Spring": 46.35416666666667,
|
203 |
+
"2013 Fall": 49.73821989528796,
|
204 |
+
"overall_accuracy": 52.07591933570581
|
205 |
+
},
|
206 |
+
{
|
207 |
+
"model_name": "model-meta-llama-Meta-Llama-3.1-70B-Instruct",
|
208 |
+
"2022 Spring": 78.06122448979592,
|
209 |
+
"2014 Spring": 79.48717948717949,
|
210 |
+
"2022 Fall": 82.6530612244898,
|
211 |
+
"2014 Fall": 86.45833333333334,
|
212 |
+
"2024 Spring": 89.84771573604061,
|
213 |
+
"2015 Spring": 86.36363636363636,
|
214 |
+
"2023 Spring": 82.4742268041237,
|
215 |
+
"2021 Fall": 77.83505154639175,
|
216 |
+
"2015 Fall": 82.10526315789474,
|
217 |
+
"2023 Fall": 84.02061855670104,
|
218 |
+
"2021 Spring": 86.5,
|
219 |
+
"2013 Spring": 79.16666666666666,
|
220 |
+
"2013 Fall": 78.53403141361257,
|
221 |
+
"overall_accuracy": 82.60181890075128
|
222 |
+
},
|
223 |
+
{
|
224 |
+
"model_name": "model-ProbeMedicalYonseiMAILab-medllama3-v20",
|
225 |
+
"2022 Spring": 49.48979591836735,
|
226 |
+
"2014 Spring": 49.743589743589745,
|
227 |
+
"2022 Fall": 57.14285714285714,
|
228 |
+
"2014 Fall": 52.604166666666664,
|
229 |
+
"2024 Spring": 56.852791878172596,
|
230 |
+
"2015 Spring": 45.45454545454545,
|
231 |
+
"2023 Spring": 55.154639175257735,
|
232 |
+
"2021 Fall": 52.0618556701031,
|
233 |
+
"2015 Fall": 47.89473684210526,
|
234 |
+
"2023 Fall": 57.73195876288659,
|
235 |
+
"2021 Spring": 53.5,
|
236 |
+
"2013 Spring": 48.95833333333333,
|
237 |
+
"2013 Fall": 52.879581151832454,
|
238 |
+
"overall_accuracy": 52.27362593910636
|
239 |
+
},
|
240 |
+
{
|
241 |
+
"model_name": "model-mistralai-Mistral-Small-Instruct-2409",
|
242 |
+
"2022 Spring": 63.775510204081634,
|
243 |
+
"2014 Spring": 61.53846153846154,
|
244 |
+
"2022 Fall": 63.775510204081634,
|
245 |
+
"2014 Fall": 64.0625,
|
246 |
+
"2024 Spring": 73.09644670050761,
|
247 |
+
"2015 Spring": 58.58585858585859,
|
248 |
+
"2023 Spring": 67.0103092783505,
|
249 |
+
"2021 Fall": 63.4020618556701,
|
250 |
+
"2015 Fall": 58.94736842105262,
|
251 |
+
"2023 Fall": 68.55670103092784,
|
252 |
+
"2021 Spring": 65.0,
|
253 |
+
"2013 Spring": 58.333333333333336,
|
254 |
+
"2013 Fall": 61.78010471204188,
|
255 |
+
"overall_accuracy": 63.70106761565836
|
256 |
+
},
|
257 |
+
{
|
258 |
+
"model_name": "model-Qwen-Qwen2.5-72B-Instruct",
|
259 |
+
"2022 Spring": 81.63265306122449,
|
260 |
+
"2014 Spring": 80.0,
|
261 |
+
"2022 Fall": 81.63265306122449,
|
262 |
+
"2014 Fall": 84.375,
|
263 |
+
"2024 Spring": 87.30964467005076,
|
264 |
+
"2015 Spring": 81.81818181818183,
|
265 |
+
"2023 Spring": 81.95876288659794,
|
266 |
+
"2021 Fall": 81.44329896907216,
|
267 |
+
"2015 Fall": 76.84210526315789,
|
268 |
+
"2023 Fall": 85.56701030927834,
|
269 |
+
"2021 Spring": 83.0,
|
270 |
+
"2013 Spring": 76.5625,
|
271 |
+
"2013 Fall": 82.72251308900523,
|
272 |
+
"overall_accuracy": 81.9296164491894
|
273 |
+
}
|
274 |
+
]
|
src/structures/ldek_structure.py
CHANGED
@@ -21,7 +21,7 @@ def generate_ORDER_LIST_LDEK_and_data_types(json_data):
|
|
21 |
data_types = ["markdown", "number"]
|
22 |
|
23 |
for key in json_data.keys():
|
24 |
-
if key not in ["model_name"
|
25 |
ORDER_LIST_LDEK.append(key)
|
26 |
data_types.append("number")
|
27 |
ORDER_LIST_LDEK[2:] = sorted(ORDER_LIST_LDEK[2:])
|
|
|
21 |
data_types = ["markdown", "number"]
|
22 |
|
23 |
for key in json_data.keys():
|
24 |
+
if key not in ["model_name"]:
|
25 |
ORDER_LIST_LDEK.append(key)
|
26 |
data_types.append("number")
|
27 |
ORDER_LIST_LDEK[2:] = sorted(ORDER_LIST_LDEK[2:])
|