Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update the size vs. score graph
Browse files
app.py
CHANGED
@@ -295,33 +295,45 @@ def toggle_all_categories(action: str) -> list[gr.CheckboxGroup]:
|
|
295 |
return results
|
296 |
|
297 |
|
|
|
|
|
|
|
|
|
|
|
298 |
def plot_size_vs_score(df_filtered: pd.DataFrame) -> go.Figure:
|
299 |
df = ORIGINAL_DF[ORIGINAL_DF[AutoEvalColumn.row_id.name].isin(df_filtered[AutoEvalColumn.row_id.name])]
|
300 |
df = df[df["#Params (B)"] > 0]
|
301 |
-
|
302 |
-
df
|
|
|
303 |
df = df.rename(columns={"model_name_for_query": "Model", "Few-shot": "n-shot"})
|
304 |
df["model_name_without_org_name"] = df["Model"].str.split("/").str[-1] + " (" + df["n-shot"] + "-shot)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
305 |
fig = px.scatter(
|
306 |
df,
|
307 |
x="#Params (B)",
|
308 |
-
y="
|
309 |
text="model_name_without_org_name",
|
310 |
-
|
|
|
311 |
)
|
312 |
fig.update_traces(
|
313 |
-
hovertemplate="<b>%{customdata[0]}</b><br>#Params: %{x:.2f}B<br>n-shot: %{customdata[1]}<br
|
314 |
textposition="top right",
|
315 |
)
|
|
|
|
|
|
|
316 |
fig.update_layout(xaxis_range=[0, MAX_MODEL_SIZE * 1.2], yaxis_range=[0, 1])
|
317 |
return fig
|
318 |
|
319 |
|
320 |
-
TASK_AVG_NAME_MAP = {
|
321 |
-
c.name: c.task_type.name for c in fields(AutoEvalColumn) if c.average and c.task_type != TaskType.AVG
|
322 |
-
}
|
323 |
-
|
324 |
-
|
325 |
def plot_average_scores(df_filtered: pd.DataFrame) -> go.Figure:
|
326 |
df = ORIGINAL_DF[ORIGINAL_DF[AutoEvalColumn.row_id.name].isin(df_filtered[AutoEvalColumn.row_id.name])]
|
327 |
df = df[["model_name_for_query", "Few-shot"] + list(TASK_AVG_NAME_MAP.keys())]
|
|
|
295 |
return results
|
296 |
|
297 |
|
298 |
+
TASK_AVG_NAME_MAP = {
|
299 |
+
c.name: c.task_type.name for c in fields(AutoEvalColumn) if c.average and c.task_type != TaskType.AVG
|
300 |
+
}
|
301 |
+
|
302 |
+
|
303 |
def plot_size_vs_score(df_filtered: pd.DataFrame) -> go.Figure:
|
304 |
df = ORIGINAL_DF[ORIGINAL_DF[AutoEvalColumn.row_id.name].isin(df_filtered[AutoEvalColumn.row_id.name])]
|
305 |
df = df[df["#Params (B)"] > 0]
|
306 |
+
AVG_COLUMNS = ["AVG"] + list(TASK_AVG_NAME_MAP.keys())
|
307 |
+
df = df[["model_name_for_query", "#Params (B)", "Few-shot"] + AVG_COLUMNS]
|
308 |
+
df[AVG_COLUMNS] = df[AVG_COLUMNS].astype(float)
|
309 |
df = df.rename(columns={"model_name_for_query": "Model", "Few-shot": "n-shot"})
|
310 |
df["model_name_without_org_name"] = df["Model"].str.split("/").str[-1] + " (" + df["n-shot"] + "-shot)"
|
311 |
+
df = pd.melt(
|
312 |
+
df,
|
313 |
+
id_vars=["Model", "model_name_without_org_name", "#Params (B)", "n-shot"],
|
314 |
+
value_vars=AVG_COLUMNS,
|
315 |
+
var_name="Task_Category",
|
316 |
+
value_name="Score",
|
317 |
+
)
|
318 |
fig = px.scatter(
|
319 |
df,
|
320 |
x="#Params (B)",
|
321 |
+
y="Score",
|
322 |
text="model_name_without_org_name",
|
323 |
+
color="Task_Category",
|
324 |
+
hover_data=["Model", "n-shot", "Task_Category"],
|
325 |
)
|
326 |
fig.update_traces(
|
327 |
+
hovertemplate="<b>%{customdata[0]}</b><br>#Params: %{x:.2f}B<br>n-shot: %{customdata[1]}<br>%{customdata[2]}: %{y:.4f}<extra></extra>",
|
328 |
textposition="top right",
|
329 |
)
|
330 |
+
for trace in fig.data:
|
331 |
+
if trace.name != "AVG":
|
332 |
+
trace.visible = "legendonly"
|
333 |
fig.update_layout(xaxis_range=[0, MAX_MODEL_SIZE * 1.2], yaxis_range=[0, 1])
|
334 |
return fig
|
335 |
|
336 |
|
|
|
|
|
|
|
|
|
|
|
337 |
def plot_average_scores(df_filtered: pd.DataFrame) -> go.Figure:
|
338 |
df = ORIGINAL_DF[ORIGINAL_DF[AutoEvalColumn.row_id.name].isin(df_filtered[AutoEvalColumn.row_id.name])]
|
339 |
df = df[["model_name_for_query", "Few-shot"] + list(TASK_AVG_NAME_MAP.keys())]
|