shayekh commited on
Commit
4ec3cb6
Β·
verified Β·
1 Parent(s): ac578b5

translation set updated

Browse files
Files changed (1) hide show
  1. app.py +96 -15
app.py CHANGED
@@ -15,7 +15,13 @@ Evaluating the chat, safety, reasoning, and translation capabilities of Multilin
15
 
16
  🌐 https://m-rewardbench.github.io/'''
17
 
18
- GOOGLE_SHEET_URL = "https://docs.google.com/spreadsheets/d/1qrD7plUdrBwAw7G6UeDVZAaV9ihxaNAcoiKwSaqotR4/export?gid=0&format=csv"
 
 
 
 
 
 
19
  # ABOUT_TEXT = """
20
  # <h1>
21
  # <span style="font-variant: small-caps;">M-RewardBench</span>: Evaluating Reward Models in Multilingual Settings
@@ -51,11 +57,41 @@ class AutoEvalColumn:
51
  })
52
 
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  def get_result_data():
55
- return pd.read_csv(GOOGLE_SHEET_URL)
 
56
 
 
 
57
 
58
- def init_leaderboard(dataframe):
 
59
  if dataframe is None or dataframe.empty:
60
  raise ValueError("Leaderboard DataFrame is empty or None.")
61
 
@@ -63,18 +99,18 @@ def init_leaderboard(dataframe):
63
  value=dataframe,
64
  datatype=[
65
  col["type"]
66
- for col in AutoEvalColumn.__dict__.values()
67
  if isinstance(col, dict)
68
  ],
69
  select_columns=SelectColumns(
70
  default_selection=[
71
  col["name"]
72
- for col in AutoEvalColumn.__dict__.values()
73
  if isinstance(col, dict) and col["displayed_by_default"]
74
  ],
75
  cant_deselect=[
76
  col["name"]
77
- for col in AutoEvalColumn.__dict__.values()
78
  if isinstance(col, dict) and col.get("never_hidden", False)
79
  ],
80
  label="Select Columns to Display:",
@@ -99,7 +135,7 @@ emojis = "πŸ”’ πŸ’¬ 🎯"
99
  model_types = {"Generative RM": "πŸ’¬", "DPO": "🎯", "Sequence Classifier": "πŸ”’"}
100
 
101
  from functools import partial
102
- def format_with_color(val, min_val=50, max_val=100):
103
  """
104
  Formats a value with inline green color gradient CSS.
105
  Returns an HTML string with bold, black text and muted green background.
@@ -111,6 +147,7 @@ def format_with_color(val, min_val=50, max_val=100):
111
 
112
  # Normalize value between 50 and 100 to 0-1 range
113
  normalized = (val - min_val) / (max_val - min_val)
 
114
  # Clamp value between 0 and 1
115
  normalized = max(0, min(1, normalized))
116
 
@@ -119,7 +156,12 @@ def format_with_color(val, min_val=50, max_val=100):
119
  intensity = int(50 + (150 * (1 - normalized)))
120
 
121
  # Return HTML with inline CSS - bold black text
122
- return f'<div val={val} style="background-color: rgb({intensity}, 200, {intensity}); color: black; font-weight: bold; text-align: center; vertical-align: middle;">{val*100:.1f}</div>'
 
 
 
 
 
123
 
124
  except (ValueError, TypeError):
125
  return str(val)
@@ -131,12 +173,10 @@ with demo:
131
  gr.Markdown(INTRODUCTION_TEXT)
132
 
133
  with gr.Tabs() as tabs:
134
- with gr.TabItem("πŸ… Leaderboard"):
135
  df = get_result_data()
136
  df["Model_Type"] = df["Model_Type"].map(model_types)
137
-
138
  df["Model"] = df.apply(format_model_link, axis=1)
139
-
140
  df["zho"] = df[["zho_Hans", "zho_Hant"]].mean(axis=1)
141
 
142
  columns = lang_ids.split("\t")
@@ -152,22 +192,63 @@ with demo:
152
 
153
  # df = df.style.applymap(apply_color_gradient, subset=['eng'])
154
  numeric_cols = df.select_dtypes(include=[np.number]).columns
 
 
155
 
156
 
157
  for col in numeric_cols:
158
  lang_format_with_color = partial(format_with_color,
159
- min_val=df[col].min(),
160
- max_val=df[col].max())
 
 
 
161
 
162
  df[col] = df[col].apply(lang_format_with_color)
163
 
164
-
165
  # for col in numeric_cols:
166
  # df[col] = (df[col] * 100).round(1).astype(str)
167
 
168
  AutoEvalColumn.add_columns_from_df(df, numeric_cols)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
- leaderboard = init_leaderboard(df)
 
 
 
 
 
 
 
171
 
172
  with gr.Row():
173
  with gr.Accordion("πŸ“š Citation", open=False):
 
15
 
16
  🌐 https://m-rewardbench.github.io/'''
17
 
18
+ # GOOGLE_SHEET_URL = "https://docs.google.com/spreadsheets/d/1qrD7plUdrBwAw7G6UeDVZAaV9ihxaNAcoiKwSaqotR4/export?gid=0&format=csv"
19
+
20
+ GOOGLE_SHEET_URLS = [
21
+ "https://docs.google.com/spreadsheets/d/1qrD7plUdrBwAw7G6UeDVZAaV9ihxaNAcoiKwSaqotR4/gviz/tq?tqx=out:csv&sheet=gt",
22
+ "https://docs.google.com/spreadsheets/d/1qrD7plUdrBwAw7G6UeDVZAaV9ihxaNAcoiKwSaqotR4/gviz/tq?tqx=out:csv&sheet=maple"
23
+ ]
24
+
25
  # ABOUT_TEXT = """
26
  # <h1>
27
  # <span style="font-variant: small-caps;">M-RewardBench</span>: Evaluating Reward Models in Multilingual Settings
 
57
  })
58
 
59
 
60
+ class AutoEvalColumnTranslation:
61
+ model = {
62
+ "name": "Model",
63
+ "type": "markdown",
64
+ "displayed_by_default": True,
65
+ "never_hidden": True,
66
+ }
67
+
68
+ model_type = {
69
+ "name": "MT",
70
+ "type": "markdown",
71
+ "displayed_by_default": True,
72
+ "never_hidden": True,
73
+ }
74
+
75
+ @classmethod
76
+ def add_columns_from_df(cls, df, columns):
77
+ for col in columns:
78
+ if col.lower() != 'model': # Skip if it's the model column since it's predefined
79
+ setattr(cls, col, {
80
+ "name": col,
81
+ "type": "markdown",
82
+ "displayed_by_default": True,
83
+ "never_hidden": False,
84
+ })
85
+
86
  def get_result_data():
87
+ return pd.read_csv(GOOGLE_SHEET_URLS[0])
88
+
89
 
90
+ def get_translation_data():
91
+ return pd.read_csv(GOOGLE_SHEET_URLS[1])
92
 
93
+
94
+ def init_leaderboard(dataframe, df_class):
95
  if dataframe is None or dataframe.empty:
96
  raise ValueError("Leaderboard DataFrame is empty or None.")
97
 
 
99
  value=dataframe,
100
  datatype=[
101
  col["type"]
102
+ for col in df_class.__dict__.values()
103
  if isinstance(col, dict)
104
  ],
105
  select_columns=SelectColumns(
106
  default_selection=[
107
  col["name"]
108
+ for col in df_class.__dict__.values()
109
  if isinstance(col, dict) and col["displayed_by_default"]
110
  ],
111
  cant_deselect=[
112
  col["name"]
113
+ for col in df_class.__dict__.values()
114
  if isinstance(col, dict) and col.get("never_hidden", False)
115
  ],
116
  label="Select Columns to Display:",
 
135
  model_types = {"Generative RM": "πŸ’¬", "DPO": "🎯", "Sequence Classifier": "πŸ”’"}
136
 
137
  from functools import partial
138
+ def format_with_color(val, min_val=50, max_val=100, scale=True):
139
  """
140
  Formats a value with inline green color gradient CSS.
141
  Returns an HTML string with bold, black text and muted green background.
 
147
 
148
  # Normalize value between 50 and 100 to 0-1 range
149
  normalized = (val - min_val) / (max_val - min_val)
150
+ # print(normalized)
151
  # Clamp value between 0 and 1
152
  normalized = max(0, min(1, normalized))
153
 
 
156
  intensity = int(50 + (150 * (1 - normalized)))
157
 
158
  # Return HTML with inline CSS - bold black text
159
+ show_val = val
160
+
161
+ if scale:
162
+ show_val = val*100
163
+
164
+ return f'<div val={val} style="background-color: rgb({intensity}, 200, {intensity}); color: black; font-weight: bold; text-align: center; vertical-align: middle;">{show_val:.1f}</div>'
165
 
166
  except (ValueError, TypeError):
167
  return str(val)
 
173
  gr.Markdown(INTRODUCTION_TEXT)
174
 
175
  with gr.Tabs() as tabs:
176
+ with gr.TabItem("πŸ… Main"):
177
  df = get_result_data()
178
  df["Model_Type"] = df["Model_Type"].map(model_types)
 
179
  df["Model"] = df.apply(format_model_link, axis=1)
 
180
  df["zho"] = df[["zho_Hans", "zho_Hant"]].mean(axis=1)
181
 
182
  columns = lang_ids.split("\t")
 
192
 
193
  # df = df.style.applymap(apply_color_gradient, subset=['eng'])
194
  numeric_cols = df.select_dtypes(include=[np.number]).columns
195
+ global_min = df.select_dtypes(include='number').min().min().astype(float)
196
+ global_max = df.select_dtypes(include='number').max().max().astype(float)
197
 
198
 
199
  for col in numeric_cols:
200
  lang_format_with_color = partial(format_with_color,
201
+ # min_val=df[col].min(),
202
+ # max_val=df[col].max(),
203
+ min_val=global_min,
204
+ max_val=global_max,
205
+ )
206
 
207
  df[col] = df[col].apply(lang_format_with_color)
208
 
 
209
  # for col in numeric_cols:
210
  # df[col] = (df[col] * 100).round(1).astype(str)
211
 
212
  AutoEvalColumn.add_columns_from_df(df, numeric_cols)
213
+ leaderboard = init_leaderboard(df, AutoEvalColumn)
214
+
215
+ with gr.TabItem("πŸ… Translation"):
216
+ df = get_translation_data()
217
+ df["Model_Type"] = df["Model_Type"].map(model_types)
218
+ df["Model"] = df.apply(format_model_link, axis=1)
219
+
220
+ df.rename(columns={
221
+ "Model_Type": "MT",
222
+ "Avg": "AVG",
223
+ }, inplace=True)
224
+
225
+ numeric_cols = df.select_dtypes(include=[np.number]).columns
226
+ # print(df[numeric_cols].min().min())
227
+ # print(df[numeric_cols].max().max())
228
+ global_min = df.select_dtypes(include='number').min().min().astype(float)
229
+ global_max = df.select_dtypes(include='number').max().max().astype(float)
230
+ # print(global_max)
231
+
232
+ for col in numeric_cols:
233
+ # print(df[col].min())
234
+ lang_format_with_color = partial(format_with_color,
235
+ min_val=global_min,
236
+ max_val=global_max,
237
+ # min_val=df[col].min(),
238
+ # max_val=df[col].max(),
239
+ scale=False)
240
+ df[col] = df[col].apply(lang_format_with_color)
241
+
242
+
243
 
244
+
245
+ # for col in numeric_cols:
246
+ # df[col] = (df[col] * 100).round(1).astype(str)
247
+
248
+ AutoEvalColumnTranslation.add_columns_from_df(df, numeric_cols)
249
+ leaderboard = init_leaderboard(df, AutoEvalColumnTranslation)
250
+
251
+
252
 
253
  with gr.Row():
254
  with gr.Accordion("πŸ“š Citation", open=False):