taesiri commited on
Commit
75d681a
1 Parent(s): e0656c6
Files changed (1) hide show
  1. app.py +130 -9
app.py CHANGED
@@ -1,7 +1,11 @@
1
  import gradio as gr
2
  import pandas as pd
3
  from glob import glob
4
-
 
 
 
 
5
 
6
  # Load text benchmark results
7
  csv_results = glob("results/*.pkl")
@@ -30,9 +34,7 @@ cot_text_data = load_data(cot_text_results, "CoT Text Only")
30
  # cot_vision_data = load_data(cot_vision_results, "CoT Vision")
31
 
32
  # Combine all data into a single DataFrame
33
- all_data = pd.concat(
34
- [data, vision_data, cot_text_data], ignore_index=True
35
- )
36
 
37
  all_model_names = all_data["Model Name"].unique()
38
  all_text_only_model_names = list(
@@ -43,10 +45,13 @@ all_cot_text_only_models = list(
43
  )
44
 
45
 
 
 
46
 
47
  ## Continue with the cold code --
48
  # TODO: Update me to read from all_data for later
49
 
 
50
  # Load the csv files into a dict with keys being name of the file and values being the data
51
  data = {file: pd.read_pickle(file) for file in csv_results}
52
  # Load the vision files into a dict
@@ -145,7 +150,7 @@ def load_cot_vision_heatmap(evt: gr.SelectData):
145
 
146
 
147
  def calculate_order_by_first_substring(selected_models):
148
-
149
  first_columns = all_data[all_data["substring_index"] == 1]
150
  query_ids_df = first_columns[first_columns["Model Type"] == "Text Only"]
151
  query_ids_df = query_ids_df[query_ids_df["Model Name"].isin(selected_models)]
@@ -158,6 +163,7 @@ def calculate_order_by_first_substring(selected_models):
158
 
159
  text_only = all_data[all_data["Model Type"] == "Text Only"]
160
  text_only_filtered = text_only[text_only["fsm_id"].isin(fsm_ids)]
 
161
 
162
  query_ids = text_only_filtered.query_id.unique()
163
  text_only_filtered = (
@@ -180,9 +186,8 @@ def calculate_order_by_first_substring(selected_models):
180
  return text_only_filtered, number_of_queries, number_of_fsms
181
 
182
 
183
-
184
  def calculate_order_by_first_substring_cot(selected_models):
185
-
186
  first_columns = all_data[all_data["substring_index"] == 1]
187
  query_ids_df = first_columns[first_columns["Model Type"] == "CoT Text Only"]
188
  query_ids_df = query_ids_df[query_ids_df["Model Name"].isin(selected_models)]
@@ -195,6 +200,7 @@ def calculate_order_by_first_substring_cot(selected_models):
195
 
196
  text_only = all_data[all_data["Model Type"] == "CoT Text Only"]
197
  text_only_filtered = text_only[text_only["fsm_id"].isin(fsm_ids)]
 
198
 
199
  query_ids = text_only_filtered.query_id.unique()
200
  text_only_filtered = (
@@ -217,6 +223,108 @@ def calculate_order_by_first_substring_cot(selected_models):
217
  return text_only_filtered, number_of_queries, number_of_fsms
218
 
219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  with gr.Blocks() as demo:
221
  gr.Markdown("# FSM Benchmark Leaderboard")
222
  with gr.Tab("Text-only Benchmark"):
@@ -273,6 +381,7 @@ with gr.Blocks() as demo:
273
  number_of_fsms = gr.Textbox(label="Number of included FSMs")
274
 
275
  constrained_leader_board_text = gr.Dataframe()
 
276
 
277
  included_models.select(
278
  fn=calculate_order_by_first_substring,
@@ -281,7 +390,6 @@ with gr.Blocks() as demo:
281
  queue=True,
282
  )
283
 
284
-
285
  with gr.Tab("Constraint Text-only Results (CoT)"):
286
  gr.Markdown("## Constraint Text-only Leaderboard by first substrin (CoT)")
287
  included_models_cot = gr.CheckboxGroup(
@@ -295,12 +403,25 @@ with gr.Blocks() as demo:
295
  number_of_fsms_cot = gr.Textbox(label="Number of included FSMs")
296
 
297
  constrained_leader_board_text_cot = gr.Dataframe()
 
298
 
299
  included_models_cot.select(
300
  fn=calculate_order_by_first_substring_cot,
301
  inputs=[included_models_cot],
302
- outputs=[constrained_leader_board_text_cot, number_of_queries_cot, number_of_fsms_cot],
 
 
 
 
303
  queue=True,
304
  )
305
 
 
 
 
 
 
 
 
 
306
  demo.launch()
 
1
  import gradio as gr
2
  import pandas as pd
3
  from glob import glob
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ from matplotlib.colors import ListedColormap, BoundaryNorm
7
+ from glob import glob
8
+ import os
9
 
10
  # Load text benchmark results
11
  csv_results = glob("results/*.pkl")
 
34
  # cot_vision_data = load_data(cot_vision_results, "CoT Vision")
35
 
36
  # Combine all data into a single DataFrame
37
+ all_data = pd.concat([data, vision_data, cot_text_data], ignore_index=True)
 
 
38
 
39
  all_model_names = all_data["Model Name"].unique()
40
  all_text_only_model_names = list(
 
45
  )
46
 
47
 
48
+ text_only_filtered_raw = None
49
+ text_only_filtered_raw_cot = None
50
 
51
  ## Continue with the cold code --
52
  # TODO: Update me to read from all_data for later
53
 
54
+
55
  # Load the csv files into a dict with keys being name of the file and values being the data
56
  data = {file: pd.read_pickle(file) for file in csv_results}
57
  # Load the vision files into a dict
 
150
 
151
 
152
  def calculate_order_by_first_substring(selected_models):
153
+ global text_only_filtered_raw
154
  first_columns = all_data[all_data["substring_index"] == 1]
155
  query_ids_df = first_columns[first_columns["Model Type"] == "Text Only"]
156
  query_ids_df = query_ids_df[query_ids_df["Model Name"].isin(selected_models)]
 
163
 
164
  text_only = all_data[all_data["Model Type"] == "Text Only"]
165
  text_only_filtered = text_only[text_only["fsm_id"].isin(fsm_ids)]
166
+ text_only_filtered_raw = text_only_filtered.copy()
167
 
168
  query_ids = text_only_filtered.query_id.unique()
169
  text_only_filtered = (
 
186
  return text_only_filtered, number_of_queries, number_of_fsms
187
 
188
 
 
189
  def calculate_order_by_first_substring_cot(selected_models):
190
+ global text_only_filtered_raw_cot
191
  first_columns = all_data[all_data["substring_index"] == 1]
192
  query_ids_df = first_columns[first_columns["Model Type"] == "CoT Text Only"]
193
  query_ids_df = query_ids_df[query_ids_df["Model Name"].isin(selected_models)]
 
200
 
201
  text_only = all_data[all_data["Model Type"] == "CoT Text Only"]
202
  text_only_filtered = text_only[text_only["fsm_id"].isin(fsm_ids)]
203
+ text_only_filtered_raw_cot = text_only_filtered.copy()
204
 
205
  query_ids = text_only_filtered.query_id.unique()
206
  text_only_filtered = (
 
223
  return text_only_filtered, number_of_queries, number_of_fsms
224
 
225
 
226
+ def generate_heatmap_for_specific_model(model_name):
227
+ global text_only_filtered_raw
228
+
229
+ cmap = ListedColormap(["lightblue", "red", "green"])
230
+ bounds = [-1.5, -0.5, 0.5, 1.5]
231
+ norm = BoundaryNorm(bounds, cmap.N)
232
+
233
+ model_df = text_only_filtered_raw[
234
+ text_only_filtered_raw["Model Name"] == model_name
235
+ ]
236
+ model_df["fsm_info"] = model_df.apply(
237
+ lambda x: f"{x['num_states']} states, {x['num_alphabet']} alphabet", axis=1
238
+ )
239
+ model_df = model_df.sort_values(by=["num_states", "num_alphabet"])
240
+
241
+ pivot_df = (
242
+ model_df.pivot_table(
243
+ index="fsm_info",
244
+ columns="substring_index",
245
+ values="parsed_judge_response",
246
+ aggfunc="first",
247
+ )
248
+ .fillna(-1)
249
+ .astype(float)
250
+ )
251
+
252
+ # plt.figure(figsize=(12, 8))
253
+ fig, ax = plt.subplots(figsize=(12, 8))
254
+ sns.heatmap(
255
+ pivot_df,
256
+ cmap=cmap,
257
+ linewidths=1,
258
+ linecolor="black",
259
+ norm=norm,
260
+ cbar=False,
261
+ square=True,
262
+ ax=ax,
263
+ )
264
+ plt.title(f"Heatmap for Model: {model_name}", fontsize=20, weight="bold")
265
+ plt.xlabel("Substring Index")
266
+ plt.ylabel("FSM (States, Alphabet)")
267
+ plt.xticks(rotation=45)
268
+
269
+ return fig
270
+
271
+
272
+ def generate_heatmap_for_specific_model_cot(model_name):
273
+ global text_only_filtered_raw
274
+
275
+ cmap = ListedColormap(["lightblue", "red", "green"])
276
+ bounds = [-1.5, -0.5, 0.5, 1.5]
277
+ norm = BoundaryNorm(bounds, cmap.N)
278
+
279
+ model_df = text_only_filtered_raw_cot[
280
+ text_only_filtered_raw_cot["Model Name"] == model_name
281
+ ]
282
+ model_df["fsm_info"] = model_df.apply(
283
+ lambda x: f"{x['num_states']} states, {x['num_alphabet']} alphabet", axis=1
284
+ )
285
+ model_df = model_df.sort_values(by=["num_states", "num_alphabet"])
286
+
287
+ pivot_df = (
288
+ model_df.pivot_table(
289
+ index="fsm_info",
290
+ columns="substring_index",
291
+ values="parsed_judge_response",
292
+ aggfunc="first",
293
+ )
294
+ .fillna(-1)
295
+ .astype(float)
296
+ )
297
+
298
+ # plt.figure(figsize=(12, 8))
299
+ fig, ax = plt.subplots(figsize=(12, 8))
300
+ sns.heatmap(
301
+ pivot_df,
302
+ cmap=cmap,
303
+ linewidths=1,
304
+ linecolor="black",
305
+ norm=norm,
306
+ cbar=False,
307
+ square=True,
308
+ ax=ax,
309
+ )
310
+ plt.title(f"Heatmap for Model: {model_name}", fontsize=20, weight="bold")
311
+ plt.xlabel("Substring Index")
312
+ plt.ylabel("FSM (States, Alphabet)")
313
+ plt.xticks(rotation=45)
314
+
315
+ return fig
316
+
317
+
318
+ def show_constraint_heatmap(evt: gr.SelectData):
319
+ model_name = evt.value
320
+ return generate_heatmap_for_specific_model(model_name)
321
+
322
+
323
+ def show_constraint_heatmap_cot(evt: gr.SelectData):
324
+ model_name = evt.value
325
+ return generate_heatmap_for_specific_model_cot(model_name)
326
+
327
+
328
  with gr.Blocks() as demo:
329
  gr.Markdown("# FSM Benchmark Leaderboard")
330
  with gr.Tab("Text-only Benchmark"):
 
381
  number_of_fsms = gr.Textbox(label="Number of included FSMs")
382
 
383
  constrained_leader_board_text = gr.Dataframe()
384
+ constrained_leader_board_plot = gr.Plot()
385
 
386
  included_models.select(
387
  fn=calculate_order_by_first_substring,
 
390
  queue=True,
391
  )
392
 
 
393
  with gr.Tab("Constraint Text-only Results (CoT)"):
394
  gr.Markdown("## Constraint Text-only Leaderboard by first substrin (CoT)")
395
  included_models_cot = gr.CheckboxGroup(
 
403
  number_of_fsms_cot = gr.Textbox(label="Number of included FSMs")
404
 
405
  constrained_leader_board_text_cot = gr.Dataframe()
406
+ constrained_leader_board_plot_cot = gr.Plot()
407
 
408
  included_models_cot.select(
409
  fn=calculate_order_by_first_substring_cot,
410
  inputs=[included_models_cot],
411
+ outputs=[
412
+ constrained_leader_board_text_cot,
413
+ number_of_queries_cot,
414
+ number_of_fsms_cot,
415
+ ],
416
  queue=True,
417
  )
418
 
419
+ constrained_leader_board_text.select(
420
+ fn=show_constraint_heatmap, outputs=[constrained_leader_board_plot]
421
+ )
422
+
423
+ constrained_leader_board_text_cot.select(
424
+ fn=show_constraint_heatmap_cot, outputs=[constrained_leader_board_plot_cot]
425
+ )
426
+
427
  demo.launch()