Spaces:
Build error
Build error
Make different aggs run faster (#5)
Browse files- Make different aggs run faster (d59694170093223a311cd5b6c35dd522ed75d5bd)
app.py
CHANGED
|
@@ -13,7 +13,7 @@ Evaluation of H4 and community models across a diverse range of benchmarks from
|
|
| 13 |
BENCHMARKS_TO_SKIP = ["math", "mini_math", "aimo_math_integer_lvl4-5"]
|
| 14 |
|
| 15 |
|
| 16 |
-
def get_leaderboard_df(
|
| 17 |
filepaths = list(Path("eval_results").rglob("*.json"))
|
| 18 |
|
| 19 |
# Parse filepaths to get unique models
|
|
@@ -128,6 +128,12 @@ def get_leaderboard_df(agg: str = "max"):
|
|
| 128 |
# Strip off date from model name
|
| 129 |
df["Model"] = df["Model"].apply(lambda x: x.rsplit("_", 1)[0])
|
| 130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
# Drop date and aggregate results by model name
|
| 132 |
df = df.drop("Date", axis=1).groupby("Model").agg(agg).reset_index()
|
| 133 |
|
|
@@ -136,20 +142,12 @@ def get_leaderboard_df(agg: str = "max"):
|
|
| 136 |
# Convert all values to percentage
|
| 137 |
df[df.select_dtypes(include=["number"]).columns] *= 100.0
|
| 138 |
df = df.sort_values(by=["Average"], ascending=False)
|
| 139 |
-
|
| 140 |
return df
|
| 141 |
|
| 142 |
-
|
| 143 |
-
leaderboard_df = get_leaderboard_df()
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
def refresh(agg: str = "max"):
|
| 147 |
-
return get_leaderboard_df(agg=agg)
|
| 148 |
-
|
| 149 |
-
|
| 150 |
# Function to update the table based on search query
|
| 151 |
-
def filter_and_search(cols: list[str], search_query: str):
|
| 152 |
df = leaderboard_df
|
|
|
|
| 153 |
if len(search_query) > 0:
|
| 154 |
search_terms = search_query.split(";")
|
| 155 |
search_terms = [term.strip().lower() for term in search_terms]
|
|
@@ -189,19 +187,15 @@ with demo:
|
|
| 189 |
info="Select columns to display",
|
| 190 |
)
|
| 191 |
with gr.Group():
|
| 192 |
-
# leaderboard_df = get_leaderboard_df()
|
| 193 |
leaderboard_table = gr.Dataframe(
|
| 194 |
value=leaderboard_df,
|
| 195 |
wrap=True,
|
| 196 |
height=1000,
|
| 197 |
column_widths=[400, 110] + [(260 + len(c)) for c in leaderboard_df.columns[1:]],
|
| 198 |
)
|
| 199 |
-
with gr.Row():
|
| 200 |
-
refresh_button = gr.Button("Refresh")
|
| 201 |
|
| 202 |
-
cols_bar.change(filter_and_search, inputs=[cols_bar, search_bar], outputs=[leaderboard_table])
|
| 203 |
-
agg.change(
|
| 204 |
-
search_bar.submit(filter_and_search, inputs=[cols_bar, search_bar], outputs=[leaderboard_table])
|
| 205 |
-
refresh_button.click(refresh, inputs=[], outputs=[leaderboard_table])
|
| 206 |
|
| 207 |
demo.launch()
|
|
|
|
| 13 |
BENCHMARKS_TO_SKIP = ["math", "mini_math", "aimo_math_integer_lvl4-5"]
|
| 14 |
|
| 15 |
|
| 16 |
+
def get_leaderboard_df():
|
| 17 |
filepaths = list(Path("eval_results").rglob("*.json"))
|
| 18 |
|
| 19 |
# Parse filepaths to get unique models
|
|
|
|
| 128 |
# Strip off date from model name
|
| 129 |
df["Model"] = df["Model"].apply(lambda x: x.rsplit("_", 1)[0])
|
| 130 |
|
| 131 |
+
return df
|
| 132 |
+
|
| 133 |
+
leaderboard_df = get_leaderboard_df()
|
| 134 |
+
|
| 135 |
+
def agg_df(df, agg: str = "max"):
|
| 136 |
+
df = df.copy()
|
| 137 |
# Drop date and aggregate results by model name
|
| 138 |
df = df.drop("Date", axis=1).groupby("Model").agg(agg).reset_index()
|
| 139 |
|
|
|
|
| 142 |
# Convert all values to percentage
|
| 143 |
df[df.select_dtypes(include=["number"]).columns] *= 100.0
|
| 144 |
df = df.sort_values(by=["Average"], ascending=False)
|
|
|
|
| 145 |
return df
|
| 146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
# Function to update the table based on search query
|
| 148 |
+
def filter_and_search(cols: list[str], search_query: str, agg: str):
|
| 149 |
df = leaderboard_df
|
| 150 |
+
df = agg_df(df, agg)
|
| 151 |
if len(search_query) > 0:
|
| 152 |
search_terms = search_query.split(";")
|
| 153 |
search_terms = [term.strip().lower() for term in search_terms]
|
|
|
|
| 187 |
info="Select columns to display",
|
| 188 |
)
|
| 189 |
with gr.Group():
|
|
|
|
| 190 |
leaderboard_table = gr.Dataframe(
|
| 191 |
value=leaderboard_df,
|
| 192 |
wrap=True,
|
| 193 |
height=1000,
|
| 194 |
column_widths=[400, 110] + [(260 + len(c)) for c in leaderboard_df.columns[1:]],
|
| 195 |
)
|
|
|
|
|
|
|
| 196 |
|
| 197 |
+
cols_bar.change(filter_and_search, inputs=[cols_bar, search_bar, agg], outputs=[leaderboard_table])
|
| 198 |
+
agg.change(filter_and_search, inputs=[cols_bar, search_bar, agg], outputs=[leaderboard_table])
|
| 199 |
+
search_bar.submit(filter_and_search, inputs=[cols_bar, search_bar, agg], outputs=[leaderboard_table])
|
|
|
|
| 200 |
|
| 201 |
demo.launch()
|