Spaces:
Running
Running
Aaron Mueller
commited on
Commit
·
4d561ee
1
Parent(s):
cde984f
COLS for multimodal track
Browse files- app.py +2 -1
- src/display/utils.py +8 -0
- src/populate.py +1 -1
app.py
CHANGED
@@ -17,6 +17,7 @@ from src.display.utils import (
|
|
17 |
BENCHMARK_COLS,
|
18 |
BENCHMARK_COLS_MULTIMODAL,
|
19 |
COLS,
|
|
|
20 |
EVAL_COLS,
|
21 |
EVAL_TYPES,
|
22 |
AutoEvalColumn,
|
@@ -48,7 +49,7 @@ except Exception:
|
|
48 |
|
49 |
|
50 |
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
51 |
-
LEADERBOARD_DF_MULTIMODAL = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH,
|
52 |
|
53 |
(
|
54 |
finished_eval_queue_df,
|
|
|
17 |
BENCHMARK_COLS,
|
18 |
BENCHMARK_COLS_MULTIMODAL,
|
19 |
COLS,
|
20 |
+
COLS_MULTIMODAL,
|
21 |
EVAL_COLS,
|
22 |
EVAL_TYPES,
|
23 |
AutoEvalColumn,
|
|
|
49 |
|
50 |
|
51 |
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
52 |
+
LEADERBOARD_DF_MULTIMODAL = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS_MULTIMODAL, BENCHMARK_COLS_MULTIMODAL)
|
53 |
|
54 |
(
|
55 |
finished_eval_queue_df,
|
src/display/utils.py
CHANGED
@@ -22,18 +22,25 @@ class ColumnContent:
|
|
22 |
|
23 |
## Leaderboard columns
|
24 |
auto_eval_column_dict = []
|
|
|
25 |
# Init
|
26 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
27 |
#Scores
|
28 |
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
|
|
29 |
for task in Tasks:
|
30 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
|
|
|
|
31 |
# Model information
|
32 |
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
33 |
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
|
|
|
|
34 |
|
35 |
# We use make dataclass to dynamically fill the scores from Tasks
|
36 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
|
|
37 |
|
38 |
## For the queue columns in the submission tab
|
39 |
@dataclass(frozen=True)
|
@@ -53,6 +60,7 @@ class ModelDetails:
|
|
53 |
|
54 |
# Column selection
|
55 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
|
|
56 |
|
57 |
EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
|
58 |
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
|
|
22 |
|
23 |
## Leaderboard columns
|
24 |
auto_eval_column_dict = []
|
25 |
+
auto_eval_column_dict_multimodal = []
|
26 |
# Init
|
27 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
28 |
#Scores
|
29 |
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
30 |
+
auto_eval_column_dict_multimodal = auto_eval_column_dict
|
31 |
for task in Tasks:
|
32 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
33 |
+
for task in TasksMultimodal:
|
34 |
+
auto_eval_column_dict_multimodal.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
35 |
# Model information
|
36 |
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
37 |
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
38 |
+
auto_eval_column_dict_multimodal.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
39 |
+
auto_eval_column_dict_multimodal.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
40 |
|
41 |
# We use make dataclass to dynamically fill the scores from Tasks
|
42 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
43 |
+
AutoEvalColumnMultimodal = make_dataclass("AutoEvalColumnMultimodal", auto_eval_column_dict_multimodal, frozen=True)
|
44 |
|
45 |
## For the queue columns in the submission tab
|
46 |
@dataclass(frozen=True)
|
|
|
60 |
|
61 |
# Column selection
|
62 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
63 |
+
COLS_MULTIMODAL = [c.name for c in fields(AutoEvalColumnMultimodal) if not c.hidden]
|
64 |
|
65 |
EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
|
66 |
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
src/populate.py
CHANGED
@@ -23,7 +23,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
23 |
df = pd.DataFrame.from_records(all_data_json)
|
24 |
print(df)
|
25 |
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
26 |
-
df = df[
|
27 |
|
28 |
# filter out if any of the benchmarks have not been produced
|
29 |
df = df[has_no_nan_values(df, benchmark_cols)]
|
|
|
23 |
df = pd.DataFrame.from_records(all_data_json)
|
24 |
print(df)
|
25 |
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
26 |
+
df = df[cols].round(decimals=1)
|
27 |
|
28 |
# filter out if any of the benchmarks have not been produced
|
29 |
df = df[has_no_nan_values(df, benchmark_cols)]
|