saattrupdan
commited on
Commit
•
9b382e3
1
Parent(s):
bd0b666
fix: Allow languages that do not have all tasks
Browse files
app.py
CHANGED
@@ -258,8 +258,18 @@ def update_model_ids_dropdown(
|
|
258 |
logger.info("No languages selected. Resetting model ids dropdown.")
|
259 |
return gr.update(choices=[], value=[])
|
260 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
filtered_results_dfs = {
|
262 |
-
language: df
|
263 |
for language, df in results_dfs.items()
|
264 |
if language.name in language_names
|
265 |
}
|
@@ -337,7 +347,6 @@ def produce_radial_plot(
|
|
337 |
f"{language_names!r}..."
|
338 |
)
|
339 |
|
340 |
-
tasks = ALL_TASKS
|
341 |
languages = [ALL_LANGUAGES[language_name] for language_name in language_names]
|
342 |
|
343 |
results_dfs_filtered = {
|
@@ -346,6 +355,12 @@ def produce_radial_plot(
|
|
346 |
if language.name in language_names
|
347 |
}
|
348 |
|
|
|
|
|
|
|
|
|
|
|
|
|
349 |
# Add all the evaluation results for each model
|
350 |
results: list[list[float]] = list()
|
351 |
for model_id in model_ids:
|
@@ -453,10 +468,7 @@ def fetch_results() -> dict[Language, pd.DataFrame]:
|
|
453 |
lambda list_or_nan:
|
454 |
np.mean(list_or_nan) if list_or_nan == list_or_nan else list_or_nan
|
455 |
).dropna()
|
456 |
-
|
457 |
-
results_dfs[language] = pd.DataFrame()
|
458 |
-
else:
|
459 |
-
results_dfs[language] = results_df
|
460 |
|
461 |
logger.info("Successfully fetched results from ScandEval benchmark.")
|
462 |
|
|
|
258 |
logger.info("No languages selected. Resetting model ids dropdown.")
|
259 |
return gr.update(choices=[], value=[])
|
260 |
|
261 |
+
tasks = [
|
262 |
+
task
|
263 |
+
for task in ALL_TASKS
|
264 |
+
if all(
|
265 |
+
task in df.columns
|
266 |
+
for language, df in results_dfs.items()
|
267 |
+
if language.name in language_names
|
268 |
+
)
|
269 |
+
]
|
270 |
+
|
271 |
filtered_results_dfs = {
|
272 |
+
language: df[tasks]
|
273 |
for language, df in results_dfs.items()
|
274 |
if language.name in language_names
|
275 |
}
|
|
|
347 |
f"{language_names!r}..."
|
348 |
)
|
349 |
|
|
|
350 |
languages = [ALL_LANGUAGES[language_name] for language_name in language_names]
|
351 |
|
352 |
results_dfs_filtered = {
|
|
|
355 |
if language.name in language_names
|
356 |
}
|
357 |
|
358 |
+
tasks = [
|
359 |
+
task
|
360 |
+
for task in ALL_TASKS
|
361 |
+
if all(task in df.columns for df in results_dfs_filtered.values())
|
362 |
+
]
|
363 |
+
|
364 |
# Add all the evaluation results for each model
|
365 |
results: list[list[float]] = list()
|
366 |
for model_id in model_ids:
|
|
|
468 |
lambda list_or_nan:
|
469 |
np.mean(list_or_nan) if list_or_nan == list_or_nan else list_or_nan
|
470 |
).dropna()
|
471 |
+
results_dfs[language] = results_df
|
|
|
|
|
|
|
472 |
|
473 |
logger.info("Successfully fetched results from ScandEval benchmark.")
|
474 |
|