yibum commited on
Commit
c6ea0a2
·
1 Parent(s): 375e6bf

update metric area filter

Browse files
Files changed (2) hide show
  1. app.py +23 -0
  2. src/populate.py +0 -4
app.py CHANGED
@@ -24,6 +24,7 @@ def update_table(
24
  use_case_area_query: list,
25
  use_case_query: list,
26
  use_case_type_query: list,
 
27
  ):
28
  filtered_df = filter_llm_func(hidden_df, llm_query)
29
  filtered_df = filter_llm_provider_func(filtered_df, llm_provider_query)
@@ -34,7 +35,24 @@ def update_table(
34
  filtered_df = filter_use_case_area_func(filtered_df, use_case_area_query)
35
  filtered_df = filter_use_case_func(filtered_df, use_case_query)
36
  filtered_df = filter_use_case_type_func(filtered_df, use_case_type_query)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  df = select_columns(filtered_df, columns)
 
38
  return df.style.map(highlight_cost_band_low, props="background-color: #b3d5a4")
39
 
40
 
@@ -60,6 +78,7 @@ def init_leaderboard_df(
60
  use_case_area_query: list,
61
  use_case_query: list,
62
  use_case_type_query: list,
 
63
  ):
64
 
65
  # Applying the style function
@@ -74,6 +93,7 @@ def init_leaderboard_df(
74
  use_case_area_query,
75
  use_case_query,
76
  use_case_type_query,
 
77
  )
78
 
79
 
@@ -232,6 +252,7 @@ with demo:
232
  filter_use_case_area.value,
233
  filter_use_case.value,
234
  filter_use_case_type.value,
 
235
  ),
236
  headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
237
  datatype=TYPES,
@@ -256,6 +277,7 @@ with demo:
256
  filter_use_case_area,
257
  filter_use_case,
258
  filter_use_case_type,
 
259
  ]:
260
  selector.change(
261
  update_table,
@@ -269,6 +291,7 @@ with demo:
269
  filter_use_case_area,
270
  filter_use_case,
271
  filter_use_case_type,
 
272
  ],
273
  leaderboard_table,
274
  queue=True,
 
24
  use_case_area_query: list,
25
  use_case_query: list,
26
  use_case_type_query: list,
27
+ metric_area_query: list,
28
  ):
29
  filtered_df = filter_llm_func(hidden_df, llm_query)
30
  filtered_df = filter_llm_provider_func(filtered_df, llm_provider_query)
 
35
  filtered_df = filter_use_case_area_func(filtered_df, use_case_area_query)
36
  filtered_df = filter_use_case_func(filtered_df, use_case_query)
37
  filtered_df = filter_use_case_type_func(filtered_df, use_case_type_query)
38
+ # Filtering by metric area
39
+ metric_area_maps = {
40
+ "Cost": ["Cost Band"],
41
+ "Accuracy": ["Accuracy", "Instruction Following", "Conciseness", "Completeness", "Factuality"],
42
+ "Speed (Latency)": ["Response Time (Sec)", "Mean Output Tokens"],
43
+ "Trust & Safety": ["Trust & Safety", "Safety", "Privacy", "Truthfulness", "CRM Bias"],
44
+ }
45
+ all_metric_cols = []
46
+ for area in metric_area_maps:
47
+ all_metric_cols = all_metric_cols + metric_area_maps[area]
48
+
49
+ columns_to_keep = list(set(columns).difference(set(all_metric_cols)))
50
+ for area in metric_area_query:
51
+ columns_to_keep = columns_to_keep + metric_area_maps[area]
52
+ columns = list(set(columns).intersection(set(columns_to_keep)))
53
+
54
  df = select_columns(filtered_df, columns)
55
+
56
  return df.style.map(highlight_cost_band_low, props="background-color: #b3d5a4")
57
 
58
 
 
78
  use_case_area_query: list,
79
  use_case_query: list,
80
  use_case_type_query: list,
81
+ metric_area_query: list,
82
  ):
83
 
84
  # Applying the style function
 
93
  use_case_area_query,
94
  use_case_query,
95
  use_case_type_query,
96
+ metric_area_query,
97
  )
98
 
99
 
 
252
  filter_use_case_area.value,
253
  filter_use_case.value,
254
  filter_use_case_type.value,
255
+ filter_metric_area.value,
256
  ),
257
  headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
258
  datatype=TYPES,
 
277
  filter_use_case_area,
278
  filter_use_case,
279
  filter_use_case_type,
280
+ filter_metric_area,
281
  ]:
282
  selector.change(
283
  update_table,
 
291
  filter_use_case_area,
292
  filter_use_case,
293
  filter_use_case_type,
294
+ filter_metric_area,
295
  ],
296
  leaderboard_table,
297
  queue=True,
src/populate.py CHANGED
@@ -21,8 +21,6 @@ def get_leaderboard_df_crm(
21
  on="Use Case Name",
22
  )
23
 
24
- ref_df = leaderboard_accuracy_df[["Model Name", "LLM Provider"]].drop_duplicates()
25
-
26
  leaderboard_cost_df = pd.read_csv(os.path.join(crm_results_path, "hf_leaderboard_latency_cost.csv"))
27
  leaderboard_cost_df = leaderboard_cost_df[~leaderboard_cost_df["Model Name"].isin(sf_finetuned_models)]
28
  leaderboard_accuracy_df = leaderboard_accuracy_df.join(
@@ -61,8 +59,6 @@ def get_leaderboard_df_crm(
61
  on=["Model Name"],
62
  )
63
 
64
- leaderboard_ts_df = leaderboard_ts_df.join(ref_df.set_index("Model Name"), on="Model Name")
65
-
66
  leaderboard_accuracy_df = leaderboard_accuracy_df.sort_values(
67
  by=[AutoEvalColumn.accuracy_metric_average.name], ascending=False
68
  )
 
21
  on="Use Case Name",
22
  )
23
 
 
 
24
  leaderboard_cost_df = pd.read_csv(os.path.join(crm_results_path, "hf_leaderboard_latency_cost.csv"))
25
  leaderboard_cost_df = leaderboard_cost_df[~leaderboard_cost_df["Model Name"].isin(sf_finetuned_models)]
26
  leaderboard_accuracy_df = leaderboard_accuracy_df.join(
 
59
  on=["Model Name"],
60
  )
61
 
 
 
62
  leaderboard_accuracy_df = leaderboard_accuracy_df.sort_values(
63
  by=[AutoEvalColumn.accuracy_metric_average.name], ascending=False
64
  )