hysts HF staff commited on
Commit
dc8017a
Β·
1 Parent(s): 1429fd4

Fix graphs

Browse files
Files changed (4) hide show
  1. app.py +5 -3
  2. src/display/utils.py +1 -0
  3. src/populate.py +3 -0
  4. style.css +6 -0
app.py CHANGED
@@ -165,7 +165,9 @@ def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
165
 
166
  # 'always_here_cols' γ‚’ 'columns' から陀倖して重耇を避ける
167
  columns = [c for c in columns if c not in always_here_cols]
168
- new_columns = always_here_cols + [c for c in COLS if c in df.columns and c in columns]
 
 
169
 
170
  # ι‡θ€‡γ‚’ζŽ’ι™€γ—γ€γ€ι †εΊγ‚’ηΆ­ζŒ
171
  seen = set()
@@ -306,7 +308,7 @@ def toggle_all_categories(action: str) -> list[gr.CheckboxGroup]:
306
 
307
 
308
  def plot_size_vs_score(df: pd.DataFrame, hidden_df: pd.DataFrame) -> go.Figure:
309
- df2 = hidden_df.iloc[df.index]
310
  df2 = df2[df2["#Params (B)"] > 0]
311
  df2 = df2[["model_name_for_query", "#Params (B)", "AVG", "Few-shot"]]
312
  df2["AVG"] = df2["AVG"].astype(float)
@@ -333,7 +335,7 @@ TASK_AVG_NAME_MAP = {
333
 
334
 
335
  def plot_average_scores(df: pd.DataFrame, hidden_df: pd.DataFrame) -> go.Figure:
336
- df2 = hidden_df.iloc[df.index]
337
  df2 = df2[["model_name_for_query", "Few-shot"] + list(TASK_AVG_NAME_MAP.keys())]
338
  df2 = df2.rename(columns={"model_name_for_query": "Model", "Few-shot": "n-shot"})
339
  df2 = df2.rename(columns=TASK_AVG_NAME_MAP)
 
165
 
166
  # 'always_here_cols' γ‚’ 'columns' から陀倖して重耇を避ける
167
  columns = [c for c in columns if c not in always_here_cols]
168
+ new_columns = (
169
+ always_here_cols + [c for c in COLS if c in df.columns and c in columns] + [AutoEvalColumn.row_id.name]
170
+ )
171
 
172
  # ι‡θ€‡γ‚’ζŽ’ι™€γ—γ€γ€ι †εΊγ‚’ηΆ­ζŒ
173
  seen = set()
 
308
 
309
 
310
  def plot_size_vs_score(df: pd.DataFrame, hidden_df: pd.DataFrame) -> go.Figure:
311
+ df2 = hidden_df[hidden_df[AutoEvalColumn.row_id.name].isin(df[AutoEvalColumn.row_id.name])]
312
  df2 = df2[df2["#Params (B)"] > 0]
313
  df2 = df2[["model_name_for_query", "#Params (B)", "AVG", "Few-shot"]]
314
  df2["AVG"] = df2["AVG"].astype(float)
 
335
 
336
 
337
  def plot_average_scores(df: pd.DataFrame, hidden_df: pd.DataFrame) -> go.Figure:
338
+ df2 = hidden_df[hidden_df[AutoEvalColumn.row_id.name].isin(df[AutoEvalColumn.row_id.name])]
339
  df2 = df2[["model_name_for_query", "Few-shot"] + list(TASK_AVG_NAME_MAP.keys())]
340
  df2 = df2.rename(columns={"model_name_for_query": "Model", "Few-shot": "n-shot"})
341
  df2 = df2.rename(columns=TASK_AVG_NAME_MAP)
src/display/utils.py CHANGED
@@ -63,6 +63,7 @@ auto_eval_column_dict.append(
63
  )
64
  auto_eval_column_dict.append(["backend", ColumnContent, ColumnContent("Backend Library", "str", False, dummy=True)])
65
  auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
 
66
 
67
  # We use make dataclass to dynamically fill the scores from Tasks
68
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
 
63
  )
64
  auto_eval_column_dict.append(["backend", ColumnContent, ColumnContent("Backend Library", "str", False, dummy=True)])
65
  auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
66
+ auto_eval_column_dict.append(["row_id", ColumnContent, ColumnContent("ID", "number", False, dummy=True)])
67
 
68
  # We use make dataclass to dynamically fill the scores from Tasks
69
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
src/populate.py CHANGED
@@ -15,6 +15,9 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
15
 
16
  df = pd.DataFrame.from_records(all_data_json)
17
 
 
 
 
18
  score_cols = [
19
  "ALT E to J BLEU",
20
  "ALT J to E BLEU",
 
15
 
16
  df = pd.DataFrame.from_records(all_data_json)
17
 
18
+ # Add a row ID column
19
+ df[AutoEvalColumn.row_id.name] = range(len(df))
20
+
21
  score_cols = [
22
  "ALT E to J BLEU",
23
  "ALT J to E BLEU",
style.css CHANGED
@@ -135,3 +135,9 @@
135
  flex-direction: row;
136
  align-items: center;
137
  }
 
 
 
 
 
 
 
135
  flex-direction: row;
136
  align-items: center;
137
  }
138
+
139
+ /* Hides the final AutoEvalColumn */
140
+ #llm-benchmark-tab-table table td:last-child,
141
+ #llm-benchmark-tab-table table th:last-child {
142
+ display: none;
143
+ }