michal commited on
Commit
54f0e5a
β€’
1 Parent(s): dfd3fd2
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from src.styles import custom_css
3
+
4
+ from src.structures.all_structure import (
5
+ ALL_ACCS,
6
+ STYLED
7
+ # ALL_DATA_TYPES,
8
+ # ALL_ORDER_LIST
9
+ )
10
+ from src.structures.pes_structure import (PES_ACCS,
11
+ ORDER_LIST,
12
+ DATA_TYPES,
13
+ COLUMN_HEADERS,
14
+ filter_data,
15
+ filter_columns,
16
+ )
17
+ from src.abouts import *
18
+ from src.structures.lek_structure import (
19
+ LEK_ACCS,
20
+ ORDER_LIST_LEK,
21
+ COLUMN_HEADERS_LEK,
22
+ DATA_TYPES_LEK,
23
+ filter_columns_lek
24
+ )
25
+ from src.structures.ldek_structure import (
26
+ LDEK_ACCS,
27
+ ORDER_LIST_LDEK,
28
+ COLUMN_HEADERS_LDEK,
29
+ DATA_TYPES_LDEK,
30
+ filter_columns_ldek
31
+ )
32
+
33
+ global data_component
34
+ global data_component_ldek
35
+
36
+ main = gr.Blocks(css=custom_css)
37
+ with main:
38
+ with gr.Row():
39
+ with gr.Column():
40
+ image = gr.Image("src/images/logo.png",
41
+ show_download_button=False,
42
+ show_share_button=False,
43
+ show_fullscreen_button=False,
44
+ container=False)
45
+ with gr.Column():
46
+ gr.HTML(HEADER_TITLE)
47
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
48
+ with gr.TabItem("πŸ₯‡ Medical leaderboard"):
49
+ gr.Markdown(LEADERBOARD_DESC)
50
+ data_component = gr.components.Dataframe(
51
+ value=STYLED,
52
+ type="pandas",
53
+ datatype=["markdown"],
54
+ interactive=False,
55
+ visible=True,
56
+ show_label=True,
57
+ column_widths=[400,150,150,150,150]
58
+ )
59
+
60
+ with gr.TabItem("πŸ₯ PES"):
61
+ gr.Markdown(PES_DESC)
62
+ # Checkbox to toggle column visibility
63
+ columns_selector = gr.CheckboxGroup(
64
+ choices=ORDER_LIST,
65
+ label="Select columns to display",
66
+ value=ORDER_LIST,
67
+ )
68
+ # Dataframe component to display the leaderboard data
69
+ data_component = gr.components.Dataframe(
70
+ value=PES_ACCS,
71
+ headers=COLUMN_HEADERS,
72
+ type="pandas",
73
+ datatype=DATA_TYPES,
74
+ interactive=False,
75
+ visible=True,
76
+ # column_widths=[400] + [250] * (len(COLUMN_HEADERS) - 1)
77
+ )
78
+ def update_dataframe(selected_columns):
79
+ return filter_columns(selected_columns)
80
+ columns_selector.change(update_dataframe, inputs=columns_selector, outputs=data_component)
81
+
82
+ with gr.TabItem("🦷 LDEK"):
83
+ gr.Markdown(LDEK_DESC)
84
+ columns_selector = gr.CheckboxGroup(
85
+ choices=ORDER_LIST_LDEK,
86
+ label="Select columns to display",
87
+ value=ORDER_LIST_LDEK,
88
+ )
89
+ data_component_ldek = gr.components.Dataframe(
90
+ value=LDEK_ACCS,
91
+ headers=COLUMN_HEADERS_LDEK,
92
+ type="pandas",
93
+ datatype=DATA_TYPES_LDEK,
94
+ interactive=False,
95
+ visible=True,
96
+ column_widths=[400] + [155] * 23
97
+ )
98
+ def update_dataframe(selected_columns):
99
+ return filter_columns_ldek(selected_columns)
100
+ columns_selector.change(update_dataframe, inputs=columns_selector, outputs=data_component_ldek)
101
+
102
+
103
+ with gr.TabItem("🩺 LEK"):
104
+ gr.Markdown(LEK_DESC)
105
+ columns_selector = gr.CheckboxGroup(
106
+ choices=ORDER_LIST_LEK,
107
+ label="Select columns to display",
108
+ value=ORDER_LIST_LEK,
109
+ )
110
+ data_component_ldek = gr.components.Dataframe(
111
+ value=LEK_ACCS,
112
+ headers=COLUMN_HEADERS_LEK,
113
+ type="pandas",
114
+ datatype=DATA_TYPES_LEK,
115
+ interactive=False,
116
+ visible=True,
117
+ column_widths=[400] + [155] * 23
118
+ )
119
+ def update_dataframe(selected_columns):
120
+ return filter_columns_lek(selected_columns)
121
+ columns_selector.change(update_dataframe, inputs=columns_selector, outputs=data_component_ldek)
122
+
123
+
124
+ with gr.Column():
125
+ with gr.Accordion("πŸ“™ Citation", open=False):
126
+ citation_button = gr.Textbox(
127
+ label=CITATION_LABEL,
128
+ value=CITATION_CONTENT,
129
+ lines=20,
130
+ elem_id="citation-button",
131
+ show_copy_button=True,
132
+ )
133
+
134
+ if __name__ == "__main__":
135
+ main.launch()
leaderboards/pes_accuracy.json ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "model_name": "model-epfl-llm-meditron-7b",
4
+ "stomatologia dziecięca": 15.833333333333332,
5
+ "neurologia": 14.285714285714285,
6
+ "medycyna pracy": 17.796610169491526,
7
+ "medycyna rodzinna": 20.689655172413794,
8
+ "patomorfologia": 25.0,
9
+ "endokrynologia ginekologiczna i rozrodczość": 19.491525423728813,
10
+ "radiologia i diagnostyka obrazowa": 18.333333333333332,
11
+ "chirurgia szczękowo-twarzowa": 26.05042016806723,
12
+ "medycyna sportowa": 25.0,
13
+ "neonatologia": 25.423728813559322,
14
+ "ortopedia": 18.26086956521739,
15
+ "medycyna paliatywna": 20.0,
16
+ "chirurgia stomatologiczna": 16.94915254237288,
17
+ "neurologia dziecięca": 26.05042016806723,
18
+ "dermatologia i wenerologia": 15.966386554621847,
19
+ "diabetologia": 19.65811965811966,
20
+ "hematologia": 21.008403361344538,
21
+ "onkologia kliniczna": 21.367521367521366,
22
+ "protetyka stomatologiczna": 20.0,
23
+ "endokrynologia i diabetologia dziecięca": 21.008403361344538,
24
+ "neurochirurgia": 23.728813559322035,
25
+ "alergologia": 20.833333333333336,
26
+ "geriatria": 16.101694915254235,
27
+ "choroby wewnętrzne": 17.647058823529413,
28
+ "ginekologia onkologiczna": 13.445378151260504,
29
+ "otolaryngologia": 27.350427350427353,
30
+ "ortodoncja": 25.0,
31
+ "anestezjologia": 24.369747899159663,
32
+ "endokrynologia": 17.5,
33
+ "periodontologia": 11.76470588235294,
34
+ "psychiatria dzieci i mlodzieży": 19.166666666666668,
35
+ "psychiatria": 18.333333333333332,
36
+ "rehabilitacja medyczna": 16.94915254237288,
37
+ "perinatologia": 8.333333333333332,
38
+ "balneologia i medycyna fizykalna": 23.333333333333332,
39
+ "pediatria": 10.784313725490197,
40
+ "nefrologia": 22.5,
41
+ "choroby pΕ‚uc": 18.103448275862068,
42
+ "choroby zakaźne": 20.0,
43
+ "okulistyka": 17.094017094017094,
44
+ "kardiochirurgia": 14.166666666666666,
45
+ "chirurgia naczyniowa": 19.166666666666668,
46
+ "reumatologia": 18.96551724137931,
47
+ "chirurgia onkologiczna": 12.711864406779661,
48
+ "transplantologia kliniczna": 18.803418803418804,
49
+ "radioterapia onkologiczna": 26.666666666666668,
50
+ "gastroenterologia dziecięca": 14.166666666666666,
51
+ "kardiologia": 17.094017094017094,
52
+ "angiologia": 14.166666666666666,
53
+ "gastroenterologia": 17.543859649122805,
54
+ "chirurgia dziecięca": 16.80672268907563,
55
+ "hipertensjologia": 10.92436974789916,
56
+ "stomatologia zachowawcza": 26.05042016806723,
57
+ "chirurgia ogólna": 19.166666666666668,
58
+ "choroby pΕ‚uc dzieci": 24.166666666666668,
59
+ "położnictwo i ginekologia": 21.666666666666668,
60
+ "medycyna ratunkowa": 19.626168224299064,
61
+ "nefrologia dzieciΔ™ca": 20.168067226890756,
62
+ "endokrynologia ginekologiczna i rozrodczoΕ›Δ‡": 24.786324786324787,
63
+ "anestezjologia i intensywna terapia": 23.47826086956522,
64
+ "stomatologia dzieciΔ™ca": 23.52941176470588,
65
+ "ortopedia i traumatologia narzΔ…du ruchu": 21.428571428571427,
66
+ "poΕ‚oΕΌnictwo i ginekologia": 15.65217391304348,
67
+ "chirurgia szczΔ™kowo-twarzowa": 18.421052631578945,
68
+ "neurologia dzieciΔ™ca": 23.52941176470588,
69
+ "choroby zakaΕΊne": 18.333333333333332,
70
+ "gastroenterologia dzieciΔ™ca": 14.166666666666666,
71
+ "stomatologia zachowawcza z endodoncjΔ…": 19.491525423728813,
72
+ "endokrynologia i diabetologia dzieciΔ™ca": 26.27118644067797,
73
+ "psychiatria dzieci i mΕ‚odzieΕΌy": 16.94915254237288,
74
+ "chirurgia ogΓ³lna": 20.33898305084746,
75
+ "choroby wewnΔ™trzne": 18.556701030927837,
76
+ "diagnostyka laboratoryjna": 30.508474576271187,
77
+ "chirurgia dzieciΔ™ca": 22.88135593220339,
78
+ "otorynolaryngologia": 18.803418803418804,
79
+ "urologia dzieciΔ™ca": 25.663716814159294,
80
+ "zdrowie publiczne": 20.168067226890756,
81
+ "genetyka kliniczna": 19.491525423728813,
82
+ "medycyna nuklearna": 23.076923076923077,
83
+ "kardiologia dzieciΔ™ca": 15.789473684210526,
84
+ "chirurgia plastyczna": 20.0,
85
+ "chirurgia klatki piersiowej": 24.166666666666668,
86
+ "audiologia i foniatria": 18.421052631578945,
87
+ "intensywna terapia": 20.833333333333336,
88
+ "overall_accuracy": 19.74083822636161
89
+ },
90
+ {
91
+ "model_name": "model-speakleash-Bielik-11B-v2.2-Instruct",
92
+ "stomatologia dziecięca": 40.833333333333336,
93
+ "neurologia": 48.739495798319325,
94
+ "medycyna pracy": 42.3728813559322,
95
+ "medycyna rodzinna": 43.103448275862064,
96
+ "patomorfologia": 50.83333333333333,
97
+ "endokrynologia ginekologiczna i rozrodczość": 32.20338983050847,
98
+ "radiologia i diagnostyka obrazowa": 46.666666666666664,
99
+ "chirurgia szczękowo-twarzowa": 25.210084033613445,
100
+ "medycyna sportowa": 57.49999999999999,
101
+ "neonatologia": 46.61016949152542,
102
+ "ortopedia": 41.73913043478261,
103
+ "medycyna paliatywna": 45.83333333333333,
104
+ "chirurgia stomatologiczna": 36.440677966101696,
105
+ "neurologia dziecięca": 45.378151260504204,
106
+ "dermatologia i wenerologia": 37.81512605042017,
107
+ "diabetologia": 31.62393162393162,
108
+ "hematologia": 33.61344537815126,
109
+ "onkologia kliniczna": 44.44444444444444,
110
+ "protetyka stomatologiczna": 36.666666666666664,
111
+ "endokrynologia i diabetologia dziecięca": 44.537815126050425,
112
+ "neurochirurgia": 27.966101694915253,
113
+ "alergologia": 36.666666666666664,
114
+ "geriatria": 45.76271186440678,
115
+ "choroby wewnętrzne": 46.21848739495798,
116
+ "ginekologia onkologiczna": 42.016806722689076,
117
+ "otolaryngologia": 33.33333333333333,
118
+ "ortodoncja": 31.25,
119
+ "anestezjologia": 51.26050420168067,
120
+ "endokrynologia": 33.33333333333333,
121
+ "periodontologia": 36.97478991596639,
122
+ "psychiatria dzieci i mlodzieży": 55.833333333333336,
123
+ "psychiatria": 36.666666666666664,
124
+ "rehabilitacja medyczna": 53.38983050847458,
125
+ "perinatologia": 34.166666666666664,
126
+ "balneologia i medycyna fizykalna": 41.66666666666667,
127
+ "pediatria": 48.03921568627451,
128
+ "nefrologia": 33.33333333333333,
129
+ "choroby pΕ‚uc": 51.724137931034484,
130
+ "choroby zakaźne": 45.0,
131
+ "okulistyka": 45.2991452991453,
132
+ "kardiochirurgia": 37.5,
133
+ "chirurgia naczyniowa": 45.83333333333333,
134
+ "reumatologia": 36.206896551724135,
135
+ "chirurgia onkologiczna": 44.06779661016949,
136
+ "transplantologia kliniczna": 33.33333333333333,
137
+ "radioterapia onkologiczna": 41.66666666666667,
138
+ "gastroenterologia dziecięca": 39.166666666666664,
139
+ "kardiologia": 40.17094017094017,
140
+ "angiologia": 46.666666666666664,
141
+ "gastroenterologia": 45.614035087719294,
142
+ "chirurgia dziecięca": 42.857142857142854,
143
+ "hipertensjologia": 51.26050420168067,
144
+ "stomatologia zachowawcza": 27.73109243697479,
145
+ "chirurgia ogólna": 29.166666666666668,
146
+ "choroby pΕ‚uc dzieci": 48.333333333333336,
147
+ "położnictwo i ginekologia": 40.833333333333336,
148
+ "medycyna ratunkowa": 50.467289719626166,
149
+ "nefrologia dzieciΔ™ca": 50.42016806722689,
150
+ "endokrynologia ginekologiczna i rozrodczoΕ›Δ‡": 41.02564102564102,
151
+ "anestezjologia i intensywna terapia": 39.130434782608695,
152
+ "stomatologia dzieciΔ™ca": 32.773109243697476,
153
+ "ortopedia i traumatologia narzΔ…du ruchu": 40.17857142857143,
154
+ "poΕ‚oΕΌnictwo i ginekologia": 42.608695652173914,
155
+ "chirurgia szczΔ™kowo-twarzowa": 27.192982456140353,
156
+ "neurologia dzieciΔ™ca": 42.016806722689076,
157
+ "choroby zakaΕΊne": 40.0,
158
+ "gastroenterologia dzieciΔ™ca": 42.5,
159
+ "stomatologia zachowawcza z endodoncjΔ…": 36.440677966101696,
160
+ "endokrynologia i diabetologia dzieciΔ™ca": 33.89830508474576,
161
+ "psychiatria dzieci i mΕ‚odzieΕΌy": 59.32203389830508,
162
+ "chirurgia ogΓ³lna": 46.61016949152542,
163
+ "choroby wewnΔ™trzne": 38.144329896907216,
164
+ "diagnostyka laboratoryjna": 50.0,
165
+ "chirurgia dzieciΔ™ca": 29.66101694915254,
166
+ "otorynolaryngologia": 41.88034188034188,
167
+ "urologia dzieciΔ™ca": 35.39823008849557,
168
+ "zdrowie publiczne": 71.42857142857143,
169
+ "genetyka kliniczna": 38.13559322033898,
170
+ "medycyna nuklearna": 45.2991452991453,
171
+ "kardiologia dzieciΔ™ca": 42.98245614035088,
172
+ "chirurgia plastyczna": 45.83333333333333,
173
+ "chirurgia klatki piersiowej": 39.166666666666664,
174
+ "audiologia i foniatria": 48.24561403508772,
175
+ "intensywna terapia": 44.166666666666664,
176
+ "overall_accuracy": 41.759465478841875
177
+ },
178
+ {
179
+ "model_name": "model-meta-llama-Meta-Llama-3.1-8B-Instruct",
180
+ "stomatologia dziecięca": 27.500000000000004,
181
+ "neurologia": 50.42016806722689,
182
+ "medycyna pracy": 38.983050847457626,
183
+ "medycyna rodzinna": 35.3448275862069,
184
+ "patomorfologia": 46.666666666666664,
185
+ "endokrynologia ginekologiczna i rozrodczość": 27.11864406779661,
186
+ "radiologia i diagnostyka obrazowa": 40.833333333333336,
187
+ "chirurgia szczękowo-twarzowa": 26.89075630252101,
188
+ "medycyna sportowa": 45.0,
189
+ "neonatologia": 45.76271186440678,
190
+ "ortopedia": 40.0,
191
+ "medycyna paliatywna": 55.833333333333336,
192
+ "chirurgia stomatologiczna": 33.05084745762712,
193
+ "neurologia dziecięca": 38.655462184873954,
194
+ "dermatologia i wenerologia": 32.773109243697476,
195
+ "diabetologia": 41.88034188034188,
196
+ "hematologia": 41.17647058823529,
197
+ "onkologia kliniczna": 36.75213675213676,
198
+ "protetyka stomatologiczna": 38.333333333333336,
199
+ "endokrynologia i diabetologia dziecięca": 41.17647058823529,
200
+ "neurochirurgia": 27.966101694915253,
201
+ "alergologia": 37.5,
202
+ "geriatria": 44.91525423728814,
203
+ "choroby wewnętrzne": 47.05882352941176,
204
+ "ginekologia onkologiczna": 40.33613445378151,
205
+ "otolaryngologia": 26.495726495726498,
206
+ "ortodoncja": 27.67857142857143,
207
+ "anestezjologia": 48.739495798319325,
208
+ "endokrynologia": 30.833333333333336,
209
+ "periodontologia": 41.17647058823529,
210
+ "psychiatria dzieci i mlodzieży": 47.5,
211
+ "psychiatria": 32.5,
212
+ "rehabilitacja medyczna": 45.76271186440678,
213
+ "perinatologia": 36.666666666666664,
214
+ "balneologia i medycyna fizykalna": 38.333333333333336,
215
+ "pediatria": 39.21568627450981,
216
+ "nefrologia": 37.5,
217
+ "choroby pΕ‚uc": 50.86206896551724,
218
+ "choroby zakaźne": 44.166666666666664,
219
+ "okulistyka": 43.58974358974359,
220
+ "kardiochirurgia": 43.333333333333336,
221
+ "chirurgia naczyniowa": 35.833333333333336,
222
+ "reumatologia": 36.206896551724135,
223
+ "chirurgia onkologiczna": 37.28813559322034,
224
+ "transplantologia kliniczna": 32.47863247863248,
225
+ "radioterapia onkologiczna": 35.833333333333336,
226
+ "gastroenterologia dziecięca": 40.833333333333336,
227
+ "kardiologia": 37.60683760683761,
228
+ "angiologia": 40.833333333333336,
229
+ "gastroenterologia": 39.473684210526315,
230
+ "chirurgia dziecięca": 31.092436974789916,
231
+ "hipertensjologia": 46.21848739495798,
232
+ "stomatologia zachowawcza": 37.81512605042017,
233
+ "chirurgia ogólna": 25.0,
234
+ "choroby pΕ‚uc dzieci": 37.5,
235
+ "położnictwo i ginekologia": 36.666666666666664,
236
+ "medycyna ratunkowa": 50.467289719626166,
237
+ "nefrologia dzieciΔ™ca": 43.69747899159664,
238
+ "endokrynologia ginekologiczna i rozrodczoΕ›Δ‡": 42.73504273504273,
239
+ "anestezjologia i intensywna terapia": 36.52173913043478,
240
+ "stomatologia dzieciΔ™ca": 31.092436974789916,
241
+ "ortopedia i traumatologia narzΔ…du ruchu": 38.392857142857146,
242
+ "poΕ‚oΕΌnictwo i ginekologia": 39.130434782608695,
243
+ "chirurgia szczΔ™kowo-twarzowa": 26.31578947368421,
244
+ "neurologia dzieciΔ™ca": 41.17647058823529,
245
+ "choroby zakaΕΊne": 30.0,
246
+ "gastroenterologia dzieciΔ™ca": 38.333333333333336,
247
+ "stomatologia zachowawcza z endodoncjΔ…": 27.11864406779661,
248
+ "endokrynologia i diabetologia dzieciΔ™ca": 38.983050847457626,
249
+ "psychiatria dzieci i mΕ‚odzieΕΌy": 51.69491525423729,
250
+ "chirurgia ogΓ³lna": 41.52542372881356,
251
+ "choroby wewnΔ™trzne": 43.29896907216495,
252
+ "diagnostyka laboratoryjna": 48.30508474576271,
253
+ "chirurgia dzieciΔ™ca": 33.05084745762712,
254
+ "otorynolaryngologia": 36.75213675213676,
255
+ "urologia dzieciΔ™ca": 35.39823008849557,
256
+ "zdrowie publiczne": 61.34453781512605,
257
+ "genetyka kliniczna": 46.61016949152542,
258
+ "medycyna nuklearna": 43.58974358974359,
259
+ "kardiologia dzieciΔ™ca": 42.98245614035088,
260
+ "chirurgia plastyczna": 45.0,
261
+ "chirurgia klatki piersiowej": 35.833333333333336,
262
+ "audiologia i foniatria": 47.368421052631575,
263
+ "intensywna terapia": 45.0,
264
+ "overall_accuracy": 39.380441384895725
265
+ }
266
+ ]
leaderboards/r_ldek_report_scores.json ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "model_name": "model-epfl-llm-meditron-7b",
4
+ "2008_jesień": 22.727272727272727,
5
+ "2012_wiosna": 23.711340206185564,
6
+ "2010_jesień": 19.597989949748744,
7
+ "2009_wiosna": 16.161616161616163,
8
+ "2010_wiosna": 16.0,
9
+ "2011_wiosna": 19.696969696969695,
10
+ "2012_jesień": 23.46938775510204,
11
+ "2011_jesień": 24.747474747474747,
12
+ "2009_jesień": 18.5,
13
+ "2022_wiosna": 22.727272727272727,
14
+ "2014_wiosna": 17.346938775510203,
15
+ "2022_jesien": 22.75132275132275,
16
+ "2014_jesien": 16.580310880829018,
17
+ "2024_wiosna": 22.916666666666664,
18
+ "2015_wiosna": 19.35483870967742,
19
+ "2023_wiosna": 21.025641025641026,
20
+ "2021_jesien": 24.873096446700508,
21
+ "2015_jesien": 18.947368421052634,
22
+ "2023_jesien": 22.164948453608247,
23
+ "2021_wiosna": 24.489795918367346,
24
+ "2013_wiosna": 18.04123711340206,
25
+ "2013_jesien": 17.894736842105264,
26
+ "overall_accuracy": 20.624563038918666
27
+ },
28
+ {
29
+ "model_name": "model-speakleash-Bielik-11B-v2.2-Instruct",
30
+ "2008_jesień": 45.45454545454545,
31
+ "2012_wiosna": 46.391752577319586,
32
+ "2010_jesień": 52.26130653266332,
33
+ "2009_wiosna": 51.515151515151516,
34
+ "2010_wiosna": 48.5,
35
+ "2011_wiosna": 41.91919191919192,
36
+ "2012_jesień": 57.6530612244898,
37
+ "2011_jesień": 45.95959595959596,
38
+ "2009_jesień": 49.5,
39
+ "2022_wiosna": 40.4040404040404,
40
+ "2014_wiosna": 34.69387755102041,
41
+ "2022_jesien": 43.91534391534391,
42
+ "2014_jesien": 44.04145077720207,
43
+ "2024_wiosna": 51.041666666666664,
44
+ "2015_wiosna": 43.54838709677419,
45
+ "2023_wiosna": 42.05128205128205,
46
+ "2021_jesien": 44.16243654822335,
47
+ "2015_jesien": 40.0,
48
+ "2023_jesien": 38.144329896907216,
49
+ "2021_wiosna": 50.0,
50
+ "2013_wiosna": 47.93814432989691,
51
+ "2013_jesien": 43.15789473684211,
52
+ "overall_accuracy": 45.583780004660916
53
+ },
54
+ {
55
+ "model_name": "model-meta-llama-Meta-Llama-3.1-8B-Instruct",
56
+ "2008_jesień": 41.41414141414141,
57
+ "2012_wiosna": 41.23711340206185,
58
+ "2010_jesień": 43.21608040201005,
59
+ "2009_wiosna": 47.97979797979798,
60
+ "2010_wiosna": 46.5,
61
+ "2011_wiosna": 33.33333333333333,
62
+ "2012_jesień": 48.46938775510204,
63
+ "2011_jesień": 41.41414141414141,
64
+ "2009_jesień": 54.50000000000001,
65
+ "2022_wiosna": 40.4040404040404,
66
+ "2014_wiosna": 33.6734693877551,
67
+ "2022_jesien": 41.7989417989418,
68
+ "2014_jesien": 42.487046632124354,
69
+ "2024_wiosna": 39.0625,
70
+ "2015_wiosna": 42.473118279569896,
71
+ "2023_wiosna": 45.64102564102564,
72
+ "2021_jesien": 40.609137055837564,
73
+ "2015_jesien": 41.578947368421055,
74
+ "2023_jesien": 42.2680412371134,
75
+ "2021_wiosna": 41.3265306122449,
76
+ "2013_wiosna": 41.23711340206185,
77
+ "2013_jesien": 45.78947368421053,
78
+ "overall_accuracy": 42.57748776508972
79
+ },
80
+ {
81
+ "model_name": "model-meta-llama-Meta-Llama-3.1-70B-Instruct",
82
+ "2008_jesień": 62.121212121212125,
83
+ "2012_wiosna": 61.34020618556701,
84
+ "2010_jesień": 62.8140703517588,
85
+ "2009_wiosna": 69.1919191919192,
86
+ "2010_wiosna": 73.0,
87
+ "2011_wiosna": 59.59595959595959,
88
+ "2012_jesień": 69.89795918367348,
89
+ "2011_jesień": 65.65656565656566,
90
+ "2009_jesień": 68.0,
91
+ "2022_wiosna": 68.68686868686868,
92
+ "2014_wiosna": 54.59183673469388,
93
+ "2022_jesien": 64.02116402116403,
94
+ "2014_jesien": 60.62176165803109,
95
+ "2024_wiosna": 61.979166666666664,
96
+ "2015_wiosna": 60.215053763440864,
97
+ "2023_wiosna": 63.07692307692307,
98
+ "2021_jesien": 63.95939086294417,
99
+ "2015_jesien": 58.42105263157895,
100
+ "2023_jesien": 61.34020618556701,
101
+ "2021_wiosna": 68.36734693877551,
102
+ "2013_wiosna": 62.371134020618555,
103
+ "2013_jesien": 57.36842105263158,
104
+ "overall_accuracy": 63.528315078070385
105
+ },
106
+ {
107
+ "model_name": "model-mistralai-Mistral-Large-Instruct-2407",
108
+ "2008_jesień": 61.61616161616161,
109
+ "2012_wiosna": 54.123711340206185,
110
+ "2010_jesień": 62.311557788944725,
111
+ "2009_wiosna": 67.67676767676768,
112
+ "2010_wiosna": 69.5,
113
+ "2011_wiosna": 56.56565656565656,
114
+ "2012_jesień": 66.3265306122449,
115
+ "2011_jesień": 58.080808080808076,
116
+ "2009_jesień": 63.0,
117
+ "2022_wiosna": 54.54545454545454,
118
+ "2014_wiosna": 54.08163265306123,
119
+ "2022_jesien": 57.14285714285714,
120
+ "2014_jesien": 56.476683937823836,
121
+ "2024_wiosna": 56.770833333333336,
122
+ "2015_wiosna": 58.602150537634415,
123
+ "2023_wiosna": 57.948717948717956,
124
+ "2021_jesien": 55.83756345177665,
125
+ "2015_jesien": 60.0,
126
+ "2023_jesien": 48.45360824742268,
127
+ "2021_wiosna": 57.14285714285714,
128
+ "2013_wiosna": 61.34020618556701,
129
+ "2013_jesien": 55.78947368421052,
130
+ "overall_accuracy": 58.820787695175945
131
+ }
132
+ ]
leaderboards/r_lek_report_scores.json ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "model_name": "model-epfl-llm-meditron-7b",
4
+ "2011_wiosna": 19.696969696969695,
5
+ "2010_wiosna": 26.5,
6
+ "2008_jesień": 23.85786802030457,
7
+ "2011_jesień": 23.232323232323232,
8
+ "2009_jesień": 22.33502538071066,
9
+ "2010_jesień": 19.696969696969695,
10
+ "2012_wiosna": 24.5,
11
+ "2009_wiosna": 24.120603015075375,
12
+ "2012_jesień": 19.897959183673468,
13
+ "2022_wiosna": 19.3717277486911,
14
+ "2014_wiosna": 20.0,
15
+ "2022_jesien": 28.654970760233915,
16
+ "2014_jesien": 23.958333333333336,
17
+ "2024_wiosna": 23.809523809523807,
18
+ "2015_wiosna": 18.781725888324875,
19
+ "2023_wiosna": 20.46783625730994,
20
+ "2021_jesien": 19.170984455958546,
21
+ "2015_jesien": 22.22222222222222,
22
+ "2023_jesien": 23.4375,
23
+ "2021_wiosna": 15.656565656565657,
24
+ "2013_wiosna": 16.145833333333336,
25
+ "2013_jesien": 21.465968586387437,
26
+ "overall_accuracy": 21.64338148235851
27
+ },
28
+ {
29
+ "model_name": "model-speakleash-Bielik-11B-v2.2-Instruct",
30
+ "2011_wiosna": 59.59595959595959,
31
+ "2010_wiosna": 62.0,
32
+ "2008_jesień": 59.89847715736041,
33
+ "2011_jesień": 60.1010101010101,
34
+ "2009_jesień": 58.37563451776649,
35
+ "2010_jesień": 63.13131313131313,
36
+ "2012_wiosna": 61.0,
37
+ "2009_wiosna": 63.31658291457286,
38
+ "2012_jesień": 63.26530612244898,
39
+ "2022_wiosna": 58.63874345549738,
40
+ "2014_wiosna": 57.948717948717956,
41
+ "2022_jesien": 69.00584795321637,
42
+ "2014_jesien": 63.541666666666664,
43
+ "2024_wiosna": 66.07142857142857,
44
+ "2015_wiosna": 59.390862944162436,
45
+ "2023_wiosna": 66.08187134502924,
46
+ "2021_jesien": 62.17616580310881,
47
+ "2015_jesien": 61.904761904761905,
48
+ "2023_jesien": 63.541666666666664,
49
+ "2021_wiosna": 65.15151515151516,
50
+ "2013_wiosna": 61.458333333333336,
51
+ "2013_jesien": 61.78010471204188,
52
+ "overall_accuracy": 62.064882784750175
53
+ },
54
+ {
55
+ "model_name": "model-meta-llama-Meta-Llama-3.1-8B-Instruct",
56
+ "2011_wiosna": 52.02020202020202,
57
+ "2010_wiosna": 53.5,
58
+ "2008_jesień": 50.25380710659898,
59
+ "2011_jesień": 57.07070707070707,
60
+ "2009_jesień": 45.68527918781726,
61
+ "2010_jesień": 43.93939393939394,
62
+ "2012_wiosna": 54.0,
63
+ "2009_wiosna": 45.22613065326633,
64
+ "2012_jesień": 53.06122448979592,
65
+ "2022_wiosna": 45.54973821989529,
66
+ "2014_wiosna": 52.307692307692314,
67
+ "2022_jesien": 53.80116959064327,
68
+ "2014_jesien": 54.166666666666664,
69
+ "2024_wiosna": 57.14285714285714,
70
+ "2015_wiosna": 54.82233502538071,
71
+ "2023_wiosna": 53.216374269005854,
72
+ "2021_jesien": 51.813471502590666,
73
+ "2015_jesien": 44.44444444444444,
74
+ "2023_jesien": 50.0,
75
+ "2021_wiosna": 47.97979797979798,
76
+ "2013_wiosna": 54.6875,
77
+ "2013_jesien": 51.832460732984295,
78
+ "overall_accuracy": 51.148472649775044
79
+ },
80
+ {
81
+ "model_name": "model-meta-llama-Meta-Llama-3.1-70B-Instruct",
82
+ "2011_wiosna": 77.27272727272727,
83
+ "2010_wiosna": 82.0,
84
+ "2008_jesień": 79.69543147208121,
85
+ "2011_jesień": 83.33333333333334,
86
+ "2009_jesień": 81.21827411167513,
87
+ "2010_jesień": 80.8080808080808,
88
+ "2012_wiosna": 80.0,
89
+ "2009_wiosna": 72.8643216080402,
90
+ "2012_jesień": 81.12244897959184,
91
+ "2022_wiosna": 79.05759162303664,
92
+ "2014_wiosna": 76.41025641025641,
93
+ "2022_jesien": 82.45614035087719,
94
+ "2014_jesien": 87.5,
95
+ "2024_wiosna": 86.90476190476191,
96
+ "2015_wiosna": 77.66497461928934,
97
+ "2023_wiosna": 80.7017543859649,
98
+ "2021_jesien": 81.34715025906736,
99
+ "2015_jesien": 78.83597883597884,
100
+ "2023_jesien": 83.33333333333334,
101
+ "2021_wiosna": 84.84848484848484,
102
+ "2013_wiosna": 77.60416666666666,
103
+ "2013_jesien": 78.01047120418848,
104
+ "overall_accuracy": 80.53516457494672
105
+ },
106
+ {
107
+ "model_name": "model-mistralai-Mistral-Large-Instruct-2407",
108
+ "2011_wiosna": 76.26262626262627,
109
+ "2010_wiosna": 76.5,
110
+ "2008_jesień": 76.6497461928934,
111
+ "2011_jesień": 79.29292929292929,
112
+ "2009_jesień": 71.57360406091371,
113
+ "2010_jesień": 74.24242424242425,
114
+ "2012_wiosna": 77.5,
115
+ "2009_wiosna": 72.8643216080402,
116
+ "2012_jesień": 76.53061224489795,
117
+ "2022_wiosna": 73.29842931937172,
118
+ "2014_wiosna": 80.0,
119
+ "2022_jesien": 77.19298245614034,
120
+ "2014_jesien": 76.5625,
121
+ "2024_wiosna": 87.5,
122
+ "2015_wiosna": 75.1269035532995,
123
+ "2023_wiosna": 80.11695906432749,
124
+ "2021_jesien": 76.16580310880829,
125
+ "2015_jesien": 71.42857142857143,
126
+ "2023_jesien": 76.5625,
127
+ "2021_wiosna": 79.29292929292929,
128
+ "2013_wiosna": 71.875,
129
+ "2013_jesien": 75.91623036649214,
130
+ "overall_accuracy": 76.39119109637699
131
+ }
132
+ ]
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ APScheduler
2
+ black
3
+ datasets
4
+ gradio
5
+ gradio[oauth]
6
+ gradio_leaderboard==0.0.9
7
+ gradio_client
8
+ huggingface-hub>=0.18.0
9
+ matplotlib
10
+ numpy
11
+ pandas
12
+ python-dateutil
13
+ tqdm
14
+ transformers
15
+ tokenizers>=0.15.0
16
+ sentencepiece
src/abouts.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ HEADER_TITLE = """
2
+ <div style="display: flex; flex-wrap: wrap; justify-content: space-around; align-items: center;">
3
+ <div>
4
+ <h1 align="center" id="space-title">Adam Mickiewicz University's Center for Artificial Intelligence</h1>
5
+ <h2 align="center" id="space-subtitle">LLMs medical benchmark</h2>
6
+ </div>
7
+ </div>
8
+ """
9
+
10
+ LEADERBOARD_DESC = """## Test
11
+ """
12
+
13
+ PES_DESC = """## ...
14
+ """
15
+
16
+ LDEK_DESC = """## ...
17
+ """
18
+
19
+ LEK_DESC = """## ...
20
+ """
21
+
22
+ CITATION_LABEL = "TEST_LABEL"
23
+
24
+ CITATION_CONTENT = "CONTENT"
src/envs.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from huggingface_hub import HfApi
4
+
5
+ # Info to change for your repository
6
+ # ----------------------------------
7
+ TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
8
+
9
+ OWNER = "MCiesiolka" # Change to your org - don't forget to create a results and request dataset, with the correct format!
10
+ # ----------------------------------
11
+
12
+ REPO_ID = f"{OWNER}/test_leaderboard"
13
+ QUEUE_REPO = f"{OWNER}/requests"
14
+ RESULTS_REPO = f"{OWNER}/results"
15
+
16
+ # If you setup a cache later, just change HF_HOME
17
+ CACHE_PATH=os.getenv("HF_HOME", ".")
18
+
19
+ # Local caches
20
+ EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
21
+ EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
22
+ EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
23
+ EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
24
+
25
+ API = HfApi(token=TOKEN)
src/images/logo.png ADDED
src/structures/all_structure.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from pathlib import Path
3
+
4
+ abs_path = Path(__file__).parent.parent.parent
5
+
6
+ def replace_models_names(model_name):
7
+ replaces = {'meta-llama': 'meta_llama',
8
+ 'epfl-llm':'epfl_llm',
9
+ '01-ai':'01_ai'}
10
+ new_name = model_name.replace('model-', '')
11
+ for k, v in replaces.items():
12
+ if new_name.startswith(k):
13
+ new_name = new_name.replace(k, v)
14
+ new_name = new_name.replace('-','/',1)
15
+ new_name = new_name.replace('_','-',1)
16
+ new_name = f"[{new_name}](https://huggingface.co/{new_name})"
17
+ return new_name
18
+
19
+ def load_json_data(file_path):
20
+ ALL_ACCS = pd.read_json(file_path)
21
+
22
+ for column in ALL_ACCS.columns:
23
+ if ALL_ACCS[column].apply(type).eq(dict).any():
24
+ ALL_ACCS[column] = ALL_ACCS[column].apply(str)
25
+
26
+ for column in ALL_ACCS.select_dtypes(include='number').columns:
27
+ ALL_ACCS[column] = ALL_ACCS[column].round(2)
28
+
29
+ return ALL_ACCS
30
+
31
+ file_paths = [
32
+ str(abs_path / "leaderboards/pes_accuracy.json"),
33
+ str(abs_path / "leaderboards/r_ldek_report_scores.json"),
34
+ str(abs_path / "leaderboards/r_lek_report_scores.json"),
35
+ ]
36
+
37
+ model_data = {}
38
+
39
+ for file_path in file_paths:
40
+ ALL_ACCS = load_json_data(file_path)
41
+
42
+ for _, row in ALL_ACCS.iterrows():
43
+ model_name = replace_models_names(row["model_name"])
44
+ overall_accuracy = row["overall_accuracy"]
45
+
46
+ if model_name not in model_data:
47
+ model_data[model_name] = {"model_name": model_name}
48
+
49
+ file_key = file_path.split("/")[-1].replace(".json", "") # Use file name as key
50
+ model_data[model_name][f"overall_acc_from_{file_key}"] = overall_accuracy
51
+
52
+ ALL_ACCS = pd.DataFrame(list(model_data.values()))
53
+ ALL_ACCS=ALL_ACCS.rename(columns={'overall_acc_from_pes_accuracy':'PES',
54
+ 'overall_acc_from_r_ldek_report_scores':'LDEK',
55
+ 'overall_acc_from_r_lek_report_scores':'LEK'})
56
+
57
+ ALL_ACCS['Average'] = ALL_ACCS[['PES', 'LDEK', 'LEK']].mean(axis=1).round(2)
58
+ columns = list(ALL_ACCS.columns)
59
+ columns.insert(1, columns.pop(columns.index('Average')))
60
+ ALL_ACCS = ALL_ACCS[columns]
61
+ STYLED = ALL_ACCS.style.highlight_max(
62
+ color = '#ff7070',
63
+ subset=ALL_ACCS.columns[-4:]).format(precision=2)
src/structures/ldek_structure.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from pathlib import Path
3
+
4
+ abs_path = Path(__file__).parent.parent.parent
5
+
6
+ def replace_models_names(model_name):
7
+ replaces = {'meta-llama': 'meta_llama',
8
+ 'epfl-llm':'epfl_llm',
9
+ '01-ai':'01_ai'}
10
+ new_name = model_name.replace('model-', '')
11
+ for k, v in replaces.items():
12
+ if new_name.startswith(k):
13
+ new_name = new_name.replace(k, v)
14
+ new_name = new_name.replace('-','/',1)
15
+ new_name = new_name.replace('_','-',1)
16
+ new_name = f"[{new_name}](https://huggingface.co/{new_name})"
17
+ return new_name
18
+
19
+ def generate_ORDER_LIST_LDEK_and_data_types(json_data):
20
+ ORDER_LIST_LDEK = ["model_name", "overall_accuracy"]
21
+ data_types = ["markdown", "number"]
22
+
23
+ for key in json_data.keys():
24
+ if key not in ["model_name", "overall_accuracy"]:
25
+ ORDER_LIST_LDEK.append(key)
26
+ data_types.append("number")
27
+ ORDER_LIST_LDEK[2:] = sorted(ORDER_LIST_LDEK[2:])
28
+ return ORDER_LIST_LDEK, data_types
29
+
30
+ def filter_columns_ldek(column_choices):
31
+ selected_columns = [col for col in ORDER_LIST_LDEK if col in column_choices]
32
+ return LDEK_ACCS[selected_columns]
33
+
34
+ def load_json_data(file_path, ORDER_LIST_LDEK):
35
+ LDEK_ACCS = pd.read_json(file_path)
36
+ for column in LDEK_ACCS.columns:
37
+ if LDEK_ACCS[column].apply(type).eq(dict).any():
38
+ LDEK_ACCS[column] = LDEK_ACCS[column].apply(str)
39
+
40
+ LDEK_ACCS["model_name"] = LDEK_ACCS["model_name"].apply(
41
+ lambda name: replace_models_names(name)
42
+ )
43
+
44
+ for column in LDEK_ACCS.select_dtypes(include='number').columns:
45
+ LDEK_ACCS[column] = LDEK_ACCS[column].round(2)
46
+ ordered_columns = [col for col in ORDER_LIST_LDEK if col in LDEK_ACCS.columns]
47
+ LDEK_ACCS = LDEK_ACCS[ordered_columns]
48
+
49
+ if "Score" in LDEK_ACCS.columns:
50
+ LDEK_ACCS = LDEK_ACCS.sort_values(by="Score", ascending=False)
51
+
52
+ return LDEK_ACCS
53
+
54
+
55
+ file_path = str(abs_path / "leaderboards/r_ldek_report_scores.json")
56
+ with open(file_path, 'r', encoding='utf-8') as file:
57
+ sample_data = pd.read_json(file_path).iloc[0].to_dict() # Load the first row as a dict
58
+
59
+ ORDER_LIST_LDEK, DATA_TYPES_LDEK = generate_ORDER_LIST_LDEK_and_data_types(sample_data)
60
+ LDEK_ACCS = load_json_data(file_path, ORDER_LIST_LDEK)
61
+ # LDEK_ACCS = LDEK_ACCS.style.highlight_max(
62
+ # color = '#ff7070',
63
+ # subset=LDEK_ACCS.columns[1:]).format(precision=2)
64
+ COLUMN_HEADERS_LDEK = ORDER_LIST_LDEK
65
+
66
+ print(ORDER_LIST_LDEK)
src/structures/lek_structure.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from pathlib import Path
3
+
4
+ abs_path = Path(__file__).parent.parent.parent
5
+
6
+ def replace_models_names(model_name):
7
+ replaces = {'meta-llama': 'meta_llama',
8
+ 'epfl-llm':'epfl_llm',
9
+ '01-ai':'01_ai'}
10
+ new_name = model_name.replace('model-', '')
11
+ for k, v in replaces.items():
12
+ if new_name.startswith(k):
13
+ new_name = new_name.replace(k, v)
14
+ new_name = new_name.replace('-','/',1)
15
+ new_name = new_name.replace('_','-',1)
16
+ new_name = f"[{new_name}](https://huggingface.co/{new_name})"
17
+ return new_name
18
+
19
+ def generate_ORDER_LIST_LEK_and_data_types(json_data):
20
+ ORDER_LIST_LEK = ["model_name", "overall_accuracy"]
21
+ data_types = ["markdown", "number"]
22
+
23
+ for key in json_data.keys():
24
+ if key not in ["model_name", "overall_accuracy"]:
25
+ ORDER_LIST_LEK.append(key)
26
+ data_types.append("number")
27
+ ORDER_LIST_LEK[2:] = sorted(ORDER_LIST_LEK[2:])
28
+ return ORDER_LIST_LEK, data_types
29
+
30
+ def filter_columns_lek(column_choices):
31
+ selected_columns = [col for col in ORDER_LIST_LEK if col in column_choices]
32
+ return LEK_ACCS[selected_columns]
33
+
34
+ def load_json_data(file_path, ORDER_LIST_LEK):
35
+ LEK_ACCS = pd.read_json(file_path)
36
+ for column in LEK_ACCS.columns:
37
+ if LEK_ACCS[column].apply(type).eq(dict).any():
38
+ LEK_ACCS[column] = LEK_ACCS[column].apply(str)
39
+
40
+ LEK_ACCS["model_name"] = LEK_ACCS["model_name"].apply(
41
+ lambda name: replace_models_names(name)
42
+ )
43
+
44
+ for column in LEK_ACCS.select_dtypes(include='number').columns:
45
+ LEK_ACCS[column] = LEK_ACCS[column].round(2)
46
+ ordered_columns = [col for col in ORDER_LIST_LEK if col in LEK_ACCS.columns]
47
+ LEK_ACCS = LEK_ACCS[ordered_columns]
48
+
49
+ if "Score" in LEK_ACCS.columns:
50
+ LEK_ACCS = LEK_ACCS.sort_values(by="Score", ascending=False)
51
+
52
+ return LEK_ACCS
53
+
54
+
55
+ file_path = str(abs_path / "leaderboards/r_lek_report_scores.json")
56
+ with open(file_path, 'r', encoding='utf-8') as file:
57
+ sample_data = pd.read_json(file_path).iloc[0].to_dict() # Load the first row as a dict
58
+
59
+ ORDER_LIST_LEK, DATA_TYPES_LEK = generate_ORDER_LIST_LEK_and_data_types(sample_data)
60
+ LEK_ACCS = load_json_data(file_path, ORDER_LIST_LEK)
61
+ # LEK_ACCS = LEK_ACCS.reindex(sorted(LEK_ACCS.columns), axis=1)
62
+ COLUMN_HEADERS_LEK = ORDER_LIST_LEK
63
+
64
+ print(ORDER_LIST_LEK)
src/structures/pes_structure.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from pathlib import Path
3
+
4
+ abs_path = Path(__file__).parent.parent.parent
5
+
6
+ def replace_models_names(model_name):
7
+ replaces = {'meta-llama': 'meta_llama',
8
+ 'epfl-llm':'epfl_llm',
9
+ '01-ai':'01_ai'}
10
+ new_name = model_name.replace('model-', '')
11
+ for k, v in replaces.items():
12
+ if new_name.startswith(k):
13
+ new_name = new_name.replace(k, v)
14
+ new_name = new_name.replace('-','/',1)
15
+ new_name = new_name.replace('_','-',1)
16
+ new_name = f"[{new_name}](https://huggingface.co/{new_name})"
17
+ return new_name
18
+
19
+
20
+ def generate_order_list_and_data_types(json_data):
21
+ order_list = ["model_name", "overall_accuracy"]
22
+ data_types = ["markdown", "number"]
23
+
24
+ for key in json_data.keys():
25
+ if key not in ["model_name", "overall_accuracy"]:
26
+ order_list.append(key)
27
+ data_types.append("number")
28
+ order_list[2:] = sorted(order_list[2:])
29
+ return order_list, data_types
30
+
31
+ def filter_data(selected_columns, search_query):
32
+ df = PES_ACCS[selected_columns]
33
+ if search_query:
34
+ df = df[df['model_name'].str.contains(search_query, case=False, na=False)]
35
+ return df
36
+
37
+ def filter_columns(column_choices):
38
+ selected_columns = [col for col in ORDER_LIST if col in column_choices]
39
+ return PES_ACCS[selected_columns]
40
+
41
+ def load_json_data(file_path, order_list):
42
+ PES_ACCS = pd.read_json(file_path)
43
+ for column in PES_ACCS.columns:
44
+ if PES_ACCS[column].apply(type).eq(dict).any():
45
+ PES_ACCS[column] = PES_ACCS[column].apply(str)
46
+
47
+ PES_ACCS["model_name"] = PES_ACCS["model_name"].apply(
48
+ lambda name: replace_models_names(name)
49
+ )
50
+
51
+ for column in PES_ACCS.select_dtypes(include='number').columns:
52
+ PES_ACCS[column] = PES_ACCS[column].round(2)
53
+ ordered_columns = [col for col in order_list if col in PES_ACCS.columns]
54
+ PES_ACCS = PES_ACCS[ordered_columns]
55
+
56
+ if "Score" in PES_ACCS.columns:
57
+ PES_ACCS = PES_ACCS.sort_values(by="Score", ascending=False)
58
+
59
+ return PES_ACCS
60
+ file_path = str(abs_path / "leaderboards/pes_accuracy.json")
61
+
62
+ with open(file_path, 'r', encoding='utf-8') as file:
63
+ sample_data = pd.read_json(file_path).iloc[0].to_dict() # Load the first row as a dict
64
+
65
+ ORDER_LIST, DATA_TYPES = generate_order_list_and_data_types(sample_data)
66
+ PES_ACCS = load_json_data(file_path, ORDER_LIST)
67
+ # PES_ACCS = PES_ACCS.style.highlight_max(
68
+ # color = '#ff7070',
69
+ # subset=PES_ACCS.columns[1:]).format(precision=2)
70
+ COLUMN_HEADERS = ORDER_LIST
71
+
72
+ print('test')
src/styles.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_css = """
2
+ .markdown-text {
3
+ font-size: 16px !important;
4
+ }
5
+ #models-to-add-text {
6
+ font-size: 18px !important;
7
+ }
8
+ #citation-button span {
9
+ font-size: 16px !important;
10
+ }
11
+ #citation-button textarea {
12
+ font-size: 16px !important;
13
+ }
14
+ #citation-button > label > button {
15
+ margin: 6px;
16
+ transform: scale(1.3);
17
+ }
18
+ #leaderboard-table {
19
+ margin-top: 15px
20
+ }
21
+ #leaderboard-table-lite {
22
+ margin-top: 15px
23
+ }
24
+ #search-bar-table-box > div:first-child {
25
+ background: none;
26
+ border: none;
27
+ }
28
+
29
+ #search-bar {
30
+ padding: 0px;
31
+ }
32
+ /* Hides the final AutoEvalColumn */
33
+ #llm-benchmark-tab-table table td:last-child,
34
+ #llm-benchmark-tab-table table th:last-child {
35
+ display: none;
36
+ }
37
+ /* Limit the width of the first AutoEvalColumn so that names don't expand too much */
38
+ table td:first-child,
39
+ table th:first-child {
40
+ max-width: 400px;
41
+ overflow: auto;
42
+ white-space: nowrap;
43
+ }
44
+ table > tbody > tr > td:nth-child(3) > div {
45
+ overflow-x: auto;
46
+ width: 450px;
47
+ }
48
+ tbody span {
49
+ -webkit-user-select: text;
50
+ -moz-user-select: text;
51
+ -ms-user-select: text;
52
+ user-select: text;
53
+ }
54
+ .tab-buttons button {
55
+ font-size: 20px;
56
+ }
57
+ #scale-logo {
58
+ border-style: none !important;
59
+ box-shadow: none;
60
+ display: block;
61
+ margin-left: auto;
62
+ margin-right: auto;
63
+ max-width: 600px;
64
+ }
65
+ #scale-logo .download {
66
+ display: none;
67
+ }
68
+ #filter_type{
69
+ border: 0;
70
+ padding-left: 0;
71
+ padding-top: 0;
72
+ }
73
+ #filter_type label {
74
+ display: flex;
75
+ }
76
+ #filter_type label > span{
77
+ margin-top: var(--spacing-lg);
78
+ margin-right: 0.5em;
79
+ }
80
+ #filter_type label > .wrap{
81
+ width: 103px;
82
+ }
83
+ #filter_type label > .wrap .wrap-inner{
84
+ padding: 2px;
85
+ }
86
+ #filter_type label > .wrap .wrap-inner input{
87
+ width: 1px
88
+ }
89
+ #filter-columns-type{
90
+ border:0;
91
+ padding:0.5;
92
+ }
93
+ #filter-columns-size{
94
+ border:0;
95
+ padding:0.5;
96
+ }
97
+ #box-filter > .form{
98
+ border: 0
99
+ }
100
+ """
101
+
102
+ get_window_url_params = """
103
+ function(url_params) {
104
+ const params = new URLSearchParams(window.location.search);
105
+ url_params = Object.fromEntries(params);
106
+ return url_params;
107
+ }
108
+ """
src/utils.py ADDED
@@ -0,0 +1 @@
 
 
1
+