sh1gechan commited on
Commit
d7a1307
1 Parent(s): 5008c13

submit可能なモデルの説明を追加, モデル列にshot数を追加

Browse files
Files changed (3) hide show
  1. src/about.py +8 -0
  2. src/display/formatting.py +10 -0
  3. src/populate.py +7 -3
src/about.py CHANGED
@@ -223,6 +223,10 @@ To reproduce our results, please follow the instructions of the evalution tool,
223
  ## Average Score Calculation
224
  The calculation of the average score (AVG) includes only the scores of datasets marked with a ⭐.
225
 
 
 
 
 
226
  """
227
 
228
  LLM_BENCHMARKS_TEXT_JA = """
@@ -308,6 +312,10 @@ LLM_BENCHMARKS_TEXT_JA = """
308
 
309
  ## 平均スコアの計算について
310
  平均スコア (AVG) の計算には、⭐マークのついたスコアのみが含まれます
 
 
 
 
311
  """
312
 
313
 
 
223
  ## Average Score Calculation
224
  The calculation of the average score (AVG) includes only the scores of datasets marked with a ⭐.
225
 
226
+ ### Note about large models
227
+ Currently, we support models up to 70B parameters. However, we are working on infrastructure improvements to accommodate larger models (70B+) in the near future. Stay tuned for updates!
228
+
229
+
230
  """
231
 
232
  LLM_BENCHMARKS_TEXT_JA = """
 
312
 
313
  ## 平均スコアの計算について
314
  平均スコア (AVG) の計算には、⭐マークのついたスコアのみが含まれます
315
+
316
+ ### 大規模モデルに関する注意
317
+ 現在、70Bパラメータまでのモデルをサポートしています。より大規模なモデル(70Bよりも大きいもの)については、インフラストラクチャの改善を進めており、近い将来対応予定です。続報をお待ちください!
318
+
319
  """
320
 
321
 
src/display/formatting.py CHANGED
@@ -2,11 +2,21 @@ def model_hyperlink(link, model_name):
2
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
3
 
4
 
 
 
 
 
 
5
  def make_clickable_model(model_name):
6
  link = f"https://huggingface.co/{model_name}"
7
  return model_hyperlink(link, model_name)
8
 
9
 
 
 
 
 
 
10
  def styled_error(error):
11
  return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
12
 
 
2
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
3
 
4
 
5
+ def model_hyperlink_with_shot(link, model_name, num_few_shot):
6
+ display_name = f"{model_name} ({num_few_shot}-shot)"
7
+ return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{display_name}</a>'
8
+
9
+
10
  def make_clickable_model(model_name):
11
  link = f"https://huggingface.co/{model_name}"
12
  return model_hyperlink(link, model_name)
13
 
14
 
15
+ def make_clickable_model_with_shot(model_name, num_few_shot):
16
+ link = f"https://huggingface.co/{model_name}"
17
+ return model_hyperlink_with_shot(link, model_name, num_few_shot)
18
+
19
+
20
  def styled_error(error):
21
  return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
22
 
src/populate.py CHANGED
@@ -5,7 +5,7 @@ import datasets
5
  import pandas as pd
6
 
7
  from src.about import Tasks
8
- from src.display.formatting import has_no_nan_values, make_clickable_model
9
  from src.display.utils import AutoEvalColumn, EvalQueueColumn
10
 
11
  # The values of these columns are in the range of 0-100
@@ -24,7 +24,8 @@ COLUMNS_TO_NORMALIZE = [
24
 
25
  def get_leaderboard_df(contents_repo: str, cols: list[str], benchmark_cols: list[str]) -> pd.DataFrame:
26
  df = datasets.load_dataset(contents_repo, split="train").to_pandas()
27
- df["Model"] = df["model"].map(make_clickable_model)
 
28
  df["T"] = df["model_type"].map(lambda x: x.split(":")[0].strip())
29
  df = df.rename(columns={task.value.metric: task.value.col_name for task in Tasks})
30
  df = df.rename(
@@ -72,7 +73,10 @@ def get_evaluation_queue_df(save_path: str, cols: list[str]) -> list[pd.DataFram
72
  with open(file_path) as fp:
73
  data = json.load(fp)
74
 
75
- data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
 
 
 
76
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
77
 
78
  all_evals.append(data)
 
5
  import pandas as pd
6
 
7
  from src.about import Tasks
8
+ from src.display.formatting import has_no_nan_values, make_clickable_model, make_clickable_model_with_shot
9
  from src.display.utils import AutoEvalColumn, EvalQueueColumn
10
 
11
  # The values of these columns are in the range of 0-100
 
24
 
25
  def get_leaderboard_df(contents_repo: str, cols: list[str], benchmark_cols: list[str]) -> pd.DataFrame:
26
  df = datasets.load_dataset(contents_repo, split="train").to_pandas()
27
+ # df["Model"] = df["model"].map(make_clickable_model)
28
+ df["Model"] = df.apply(lambda x: make_clickable_model_with_shot(x["model"], x["num_few_shot"]), axis=1)
29
  df["T"] = df["model_type"].map(lambda x: x.split(":")[0].strip())
30
  df = df.rename(columns={task.value.metric: task.value.col_name for task in Tasks})
31
  df = df.rename(
 
73
  with open(file_path) as fp:
74
  data = json.load(fp)
75
 
76
+ # data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
77
+ data[EvalQueueColumn.model.name] = make_clickable_model_with_shot(
78
+ data["model"], data["num_few_shot"] # num_few_shotは必ず存在するため、直接参照
79
+ )
80
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
81
 
82
  all_evals.append(data)