Spaces:
Running
Running
update metric area filter
Browse files- app.py +23 -0
- src/populate.py +0 -4
app.py
CHANGED
@@ -24,6 +24,7 @@ def update_table(
|
|
24 |
use_case_area_query: list,
|
25 |
use_case_query: list,
|
26 |
use_case_type_query: list,
|
|
|
27 |
):
|
28 |
filtered_df = filter_llm_func(hidden_df, llm_query)
|
29 |
filtered_df = filter_llm_provider_func(filtered_df, llm_provider_query)
|
@@ -34,7 +35,24 @@ def update_table(
|
|
34 |
filtered_df = filter_use_case_area_func(filtered_df, use_case_area_query)
|
35 |
filtered_df = filter_use_case_func(filtered_df, use_case_query)
|
36 |
filtered_df = filter_use_case_type_func(filtered_df, use_case_type_query)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
df = select_columns(filtered_df, columns)
|
|
|
38 |
return df.style.map(highlight_cost_band_low, props="background-color: #b3d5a4")
|
39 |
|
40 |
|
@@ -60,6 +78,7 @@ def init_leaderboard_df(
|
|
60 |
use_case_area_query: list,
|
61 |
use_case_query: list,
|
62 |
use_case_type_query: list,
|
|
|
63 |
):
|
64 |
|
65 |
# Applying the style function
|
@@ -74,6 +93,7 @@ def init_leaderboard_df(
|
|
74 |
use_case_area_query,
|
75 |
use_case_query,
|
76 |
use_case_type_query,
|
|
|
77 |
)
|
78 |
|
79 |
|
@@ -232,6 +252,7 @@ with demo:
|
|
232 |
filter_use_case_area.value,
|
233 |
filter_use_case.value,
|
234 |
filter_use_case_type.value,
|
|
|
235 |
),
|
236 |
headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
|
237 |
datatype=TYPES,
|
@@ -256,6 +277,7 @@ with demo:
|
|
256 |
filter_use_case_area,
|
257 |
filter_use_case,
|
258 |
filter_use_case_type,
|
|
|
259 |
]:
|
260 |
selector.change(
|
261 |
update_table,
|
@@ -269,6 +291,7 @@ with demo:
|
|
269 |
filter_use_case_area,
|
270 |
filter_use_case,
|
271 |
filter_use_case_type,
|
|
|
272 |
],
|
273 |
leaderboard_table,
|
274 |
queue=True,
|
|
|
24 |
use_case_area_query: list,
|
25 |
use_case_query: list,
|
26 |
use_case_type_query: list,
|
27 |
+
metric_area_query: list,
|
28 |
):
|
29 |
filtered_df = filter_llm_func(hidden_df, llm_query)
|
30 |
filtered_df = filter_llm_provider_func(filtered_df, llm_provider_query)
|
|
|
35 |
filtered_df = filter_use_case_area_func(filtered_df, use_case_area_query)
|
36 |
filtered_df = filter_use_case_func(filtered_df, use_case_query)
|
37 |
filtered_df = filter_use_case_type_func(filtered_df, use_case_type_query)
|
38 |
+
# Filtering by metric area
|
39 |
+
metric_area_maps = {
|
40 |
+
"Cost": ["Cost Band"],
|
41 |
+
"Accuracy": ["Accuracy", "Instruction Following", "Conciseness", "Completeness", "Factuality"],
|
42 |
+
"Speed (Latency)": ["Response Time (Sec)", "Mean Output Tokens"],
|
43 |
+
"Trust & Safety": ["Trust & Safety", "Safety", "Privacy", "Truthfulness", "CRM Bias"],
|
44 |
+
}
|
45 |
+
all_metric_cols = []
|
46 |
+
for area in metric_area_maps:
|
47 |
+
all_metric_cols = all_metric_cols + metric_area_maps[area]
|
48 |
+
|
49 |
+
columns_to_keep = list(set(columns).difference(set(all_metric_cols)))
|
50 |
+
for area in metric_area_query:
|
51 |
+
columns_to_keep = columns_to_keep + metric_area_maps[area]
|
52 |
+
columns = list(set(columns).intersection(set(columns_to_keep)))
|
53 |
+
|
54 |
df = select_columns(filtered_df, columns)
|
55 |
+
|
56 |
return df.style.map(highlight_cost_band_low, props="background-color: #b3d5a4")
|
57 |
|
58 |
|
|
|
78 |
use_case_area_query: list,
|
79 |
use_case_query: list,
|
80 |
use_case_type_query: list,
|
81 |
+
metric_area_query: list,
|
82 |
):
|
83 |
|
84 |
# Applying the style function
|
|
|
93 |
use_case_area_query,
|
94 |
use_case_query,
|
95 |
use_case_type_query,
|
96 |
+
metric_area_query,
|
97 |
)
|
98 |
|
99 |
|
|
|
252 |
filter_use_case_area.value,
|
253 |
filter_use_case.value,
|
254 |
filter_use_case_type.value,
|
255 |
+
filter_metric_area.value,
|
256 |
),
|
257 |
headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
|
258 |
datatype=TYPES,
|
|
|
277 |
filter_use_case_area,
|
278 |
filter_use_case,
|
279 |
filter_use_case_type,
|
280 |
+
filter_metric_area,
|
281 |
]:
|
282 |
selector.change(
|
283 |
update_table,
|
|
|
291 |
filter_use_case_area,
|
292 |
filter_use_case,
|
293 |
filter_use_case_type,
|
294 |
+
filter_metric_area,
|
295 |
],
|
296 |
leaderboard_table,
|
297 |
queue=True,
|
src/populate.py
CHANGED
@@ -21,8 +21,6 @@ def get_leaderboard_df_crm(
|
|
21 |
on="Use Case Name",
|
22 |
)
|
23 |
|
24 |
-
ref_df = leaderboard_accuracy_df[["Model Name", "LLM Provider"]].drop_duplicates()
|
25 |
-
|
26 |
leaderboard_cost_df = pd.read_csv(os.path.join(crm_results_path, "hf_leaderboard_latency_cost.csv"))
|
27 |
leaderboard_cost_df = leaderboard_cost_df[~leaderboard_cost_df["Model Name"].isin(sf_finetuned_models)]
|
28 |
leaderboard_accuracy_df = leaderboard_accuracy_df.join(
|
@@ -61,8 +59,6 @@ def get_leaderboard_df_crm(
|
|
61 |
on=["Model Name"],
|
62 |
)
|
63 |
|
64 |
-
leaderboard_ts_df = leaderboard_ts_df.join(ref_df.set_index("Model Name"), on="Model Name")
|
65 |
-
|
66 |
leaderboard_accuracy_df = leaderboard_accuracy_df.sort_values(
|
67 |
by=[AutoEvalColumn.accuracy_metric_average.name], ascending=False
|
68 |
)
|
|
|
21 |
on="Use Case Name",
|
22 |
)
|
23 |
|
|
|
|
|
24 |
leaderboard_cost_df = pd.read_csv(os.path.join(crm_results_path, "hf_leaderboard_latency_cost.csv"))
|
25 |
leaderboard_cost_df = leaderboard_cost_df[~leaderboard_cost_df["Model Name"].isin(sf_finetuned_models)]
|
26 |
leaderboard_accuracy_df = leaderboard_accuracy_df.join(
|
|
|
59 |
on=["Model Name"],
|
60 |
)
|
61 |
|
|
|
|
|
62 |
leaderboard_accuracy_df = leaderboard_accuracy_df.sort_values(
|
63 |
by=[AutoEvalColumn.accuracy_metric_average.name], ascending=False
|
64 |
)
|