wassemgtk commited on
Commit
c2c6410
·
verified ·
1 Parent(s): 7d6243e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -21
app.py CHANGED
@@ -41,44 +41,57 @@ context_grounding_data = {
41
  "Compliance": [0.76, 0.72, 0.52, 0.59, 0.63, 0.34, 0.40, 0.44, 0.43, 0.41, 0.66, 0.51, 0.49, 0.71, 0.71, 0.80, 0.67, 0.62, 0.68, 0.54, 0.46, 0.35, 0.34, 0.81]
42
  }
43
 
44
- # Function to bold the highest score per column (excluding "Model Name")
45
- def format_table(df):
46
  styled_df = df.copy()
47
  numeric_columns = [col for col in df.columns if col != "Model Name"]
48
 
 
 
 
49
  for col in numeric_columns:
50
  if col in ["Baseline", "Irrelevant Ctx", "No Ctx", "Ctx Grounding QA", "Ctx Grounding TG", "Ctx Grounding", "Robustness", "Compliance"]:
51
  # Convert string values (e.g., "0.95 (0.0)") to float for comparison, or use direct float values
52
- if any(" (" in str(x) for x in df[col]):
53
  # Handle string values with deltas (e.g., "0.95 (0.0)")
54
- values = [float(str(x).split(" (")[0]) for x in df[col]]
55
  else:
56
  # Handle direct float values
57
- values = df[col].astype(float)
58
 
59
  max_value = np.max(values)
60
- styled_df[col] = df[col].apply(lambda x: f"**{x}**" if (float(str(x).split(" (")[0]) if " (" in str(x) else float(x)) == max_value else x)
61
 
62
  return styled_df
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  # Function to calculate top 3 models based on combined score (average of numeric columns)
65
  def get_top_3_models(robustness_df, context_grounding_df):
66
  # Combine numeric columns from both datasets
67
  numeric_cols_robustness = ["Baseline", "Robustness (Δ)"] # Columns with numeric or string-numeric data
68
  numeric_cols_context = ["Irrelevant Ctx", "No Ctx", "Ctx Grounding QA", "Ctx Grounding TG", "Ctx Grounding", "Robustness", "Compliance"] # From context grounding
69
 
70
- # Extract numeric values for each column in robustness_df
71
  robustness_scores = pd.DataFrame()
72
  for col in numeric_cols_robustness:
73
- if any(" (" in str(x) for x in robustness_df[col]):
74
- # Handle string values with deltas (e.g., "0.95 (0.0)")
75
- robustness_scores[col] = robustness_df[col].apply(lambda x: float(str(x).split(" (")[0]) if " (" in str(x) else float(x))
76
- else:
77
- # Handle direct float values
78
- robustness_scores[col] = robustness_df[col].astype(float)
79
 
80
- # Extract numeric values for context_grounding_df (all are already float values)
81
- context_scores = context_grounding_df[numeric_cols_context].astype(float)
 
 
82
 
83
  # Combine scores by averaging
84
  combined_scores = (robustness_scores.mean(axis=1) + context_scores.mean(axis=1)) / 2
@@ -107,11 +120,11 @@ def create_leaderboard():
107
  robustness_df = pd.DataFrame(robustness_data)
108
  context_grounding_df = pd.DataFrame(context_grounding_data)
109
 
110
- # Format tables to bold highest scores
111
- robustness_df = format_table(robustness_df)
112
- context_grounding_df = format_table(context_grounding_df)
113
 
114
- # Get top 3 winners
115
  winners_df = get_top_3_models(robustness_df, context_grounding_df)
116
 
117
  # Create Gradio interface with a nice theme
@@ -122,14 +135,14 @@ def create_leaderboard():
122
  with gr.Column():
123
  with gr.Tab("Robustness Results"):
124
  gr.DataFrame(
125
- value=robustness_df,
126
  label="Robustness Results",
127
  wrap=True,
128
  elem_classes=["custom-table"]
129
  )
130
  with gr.Tab("Context Grounding Results"):
131
  gr.DataFrame(
132
- value=context_grounding_df,
133
  label="Context Grounding Results",
134
  wrap=True,
135
  elem_classes=["custom-table"]
 
41
  "Compliance": [0.76, 0.72, 0.52, 0.59, 0.63, 0.34, 0.40, 0.44, 0.43, 0.41, 0.66, 0.51, 0.49, 0.71, 0.71, 0.80, 0.67, 0.62, 0.68, 0.54, 0.46, 0.35, 0.34, 0.81]
42
  }
43
 
44
+ # Function to bold the highest score per column (excluding "Model Name") but keep original data for calculations
45
+ def format_table(df, original_df=None):
46
  styled_df = df.copy()
47
  numeric_columns = [col for col in df.columns if col != "Model Name"]
48
 
49
+ if original_df is None:
50
+ original_df = df.copy() # Use the input df as original if none provided
51
+
52
  for col in numeric_columns:
53
  if col in ["Baseline", "Irrelevant Ctx", "No Ctx", "Ctx Grounding QA", "Ctx Grounding TG", "Ctx Grounding", "Robustness", "Compliance"]:
54
  # Convert string values (e.g., "0.95 (0.0)") to float for comparison, or use direct float values
55
+ if any(" (" in str(x) for x in original_df[col]):
56
  # Handle string values with deltas (e.g., "0.95 (0.0)")
57
+ values = [float(str(x).split(" (")[0]) for x in original_df[col]]
58
  else:
59
  # Handle direct float values
60
+ values = original_df[col].astype(float)
61
 
62
  max_value = np.max(values)
63
+ styled_df[col] = original_df[col].apply(lambda x: f"**{x}**" if (float(str(x).split(" (")[0]) if " (" in str(x) else float(x)) == max_value else x)
64
 
65
  return styled_df
66
 
67
+ # Function to extract numeric value from a string (removing bold markup and deltas)
68
+ def extract_numeric(value):
69
+ if pd.isna(value):
70
+ return np.nan
71
+ if isinstance(value, str):
72
+ # Remove bold markup (**)
73
+ value = value.replace("**", "")
74
+ # Extract numeric part before the delta (if present)
75
+ if " (" in value:
76
+ return float(value.split(" (")[0])
77
+ return float(value)
78
+ return float(value)
79
+
80
  # Function to calculate top 3 models based on combined score (average of numeric columns)
81
  def get_top_3_models(robustness_df, context_grounding_df):
82
  # Combine numeric columns from both datasets
83
  numeric_cols_robustness = ["Baseline", "Robustness (Δ)"] # Columns with numeric or string-numeric data
84
  numeric_cols_context = ["Irrelevant Ctx", "No Ctx", "Ctx Grounding QA", "Ctx Grounding TG", "Ctx Grounding", "Robustness", "Compliance"] # From context grounding
85
 
86
+ # Extract numeric values for each column in robustness_df, handling bold markup and deltas
87
  robustness_scores = pd.DataFrame()
88
  for col in numeric_cols_robustness:
89
+ robustness_scores[col] = robustness_df[col].apply(extract_numeric)
 
 
 
 
 
90
 
91
+ # Extract numeric values for context_grounding_df (all are already float values, but use extract_numeric for consistency)
92
+ context_scores = pd.DataFrame()
93
+ for col in numeric_cols_context:
94
+ context_scores[col] = context_grounding_df[col].apply(extract_numeric)
95
 
96
  # Combine scores by averaging
97
  combined_scores = (robustness_scores.mean(axis=1) + context_scores.mean(axis=1)) / 2
 
120
  robustness_df = pd.DataFrame(robustness_data)
121
  context_grounding_df = pd.DataFrame(context_grounding_data)
122
 
123
+ # Format tables to bold highest scores, but keep original data for calculations
124
+ formatted_robustness_df = format_table(robustness_df, robustness_df.copy()) # Pass original data for calculations
125
+ formatted_context_grounding_df = format_table(context_grounding_df, context_grounding_df.copy()) # Pass original data for calculations
126
 
127
+ # Get top 3 winners using the original (unformatted) DataFrames
128
  winners_df = get_top_3_models(robustness_df, context_grounding_df)
129
 
130
  # Create Gradio interface with a nice theme
 
135
  with gr.Column():
136
  with gr.Tab("Robustness Results"):
137
  gr.DataFrame(
138
+ value=formatted_robustness_df,
139
  label="Robustness Results",
140
  wrap=True,
141
  elem_classes=["custom-table"]
142
  )
143
  with gr.Tab("Context Grounding Results"):
144
  gr.DataFrame(
145
+ value=formatted_context_grounding_df,
146
  label="Context Grounding Results",
147
  wrap=True,
148
  elem_classes=["custom-table"]