Spaces:

llm-jp
/

open-japanese-llm-leaderboard

Running on CPU Upgrade

sh1gechan commited on Nov 22, 2024

Commit

d7a1307

1 Parent(s): 5008c13

submit可能なモデルの説明を追加, モデル列にshot数を追加

Files changed (3) hide show

src/about.py CHANGED Viewed

@@ -223,6 +223,10 @@ To reproduce our results, please follow the instructions of the evalution tool,
 ## Average Score Calculation
 The calculation of the average score (AVG) includes only the scores of datasets marked with a ⭐.
 """
 LLM_BENCHMARKS_TEXT_JA = """
@@ -308,6 +312,10 @@ LLM_BENCHMARKS_TEXT_JA = """
 ## 平均スコアの計算について
 平均スコア (AVG) の計算には、⭐マークのついたスコアのみが含まれます
 """

 ## Average Score Calculation
 The calculation of the average score (AVG) includes only the scores of datasets marked with a ⭐.
+### Note about large models
+Currently, we support models up to 70B parameters. However, we are working on infrastructure improvements to accommodate larger models (70B+) in the near future. Stay tuned for updates!
 """
 LLM_BENCHMARKS_TEXT_JA = """
 ## 平均スコアの計算について
 平均スコア (AVG) の計算には、⭐マークのついたスコアのみが含まれます
+### 大規模モデルに関する注意
+現在、70Bパラメータまでのモデルをサポートしています。より大規模なモデル（70Bよりも大きいもの）については、インフラストラクチャの改善を進めており、近い将来対応予定です。続報をお待ちください！
 """

src/display/formatting.py CHANGED Viewed

@@ -2,11 +2,21 @@ def model_hyperlink(link, model_name):
     return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
 def make_clickable_model(model_name):
     link = f"https://huggingface.co/{model_name}"
     return model_hyperlink(link, model_name)
 def styled_error(error):
     return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"

     return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
+def model_hyperlink_with_shot(link, model_name, num_few_shot):
+    display_name = f"{model_name} ({num_few_shot}-shot)"
+    return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{display_name}</a>'
 def make_clickable_model(model_name):
     link = f"https://huggingface.co/{model_name}"
     return model_hyperlink(link, model_name)
+def make_clickable_model_with_shot(model_name, num_few_shot):
+    link = f"https://huggingface.co/{model_name}"
+    return model_hyperlink_with_shot(link, model_name, num_few_shot)
 def styled_error(error):
     return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"

src/populate.py CHANGED Viewed

@@ -5,7 +5,7 @@ import datasets
 import pandas as pd
 from src.about import Tasks
-from src.display.formatting import has_no_nan_values, make_clickable_model
 from src.display.utils import AutoEvalColumn, EvalQueueColumn
 # The values of these columns are in the range of 0-100
@@ -24,7 +24,8 @@ COLUMNS_TO_NORMALIZE = [
 def get_leaderboard_df(contents_repo: str, cols: list[str], benchmark_cols: list[str]) -> pd.DataFrame:
     df = datasets.load_dataset(contents_repo, split="train").to_pandas()
-    df["Model"] = df["model"].map(make_clickable_model)
     df["T"] = df["model_type"].map(lambda x: x.split(":")[0].strip())
     df = df.rename(columns={task.value.metric: task.value.col_name for task in Tasks})
     df = df.rename(
@@ -72,7 +73,10 @@ def get_evaluation_queue_df(save_path: str, cols: list[str]) -> list[pd.DataFram
             with open(file_path) as fp:
                 data = json.load(fp)
-            data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
             data[EvalQueueColumn.revision.name] = data.get("revision", "main")
             all_evals.append(data)

 import pandas as pd
 from src.about import Tasks
+from src.display.formatting import has_no_nan_values, make_clickable_model, make_clickable_model_with_shot
 from src.display.utils import AutoEvalColumn, EvalQueueColumn
 # The values of these columns are in the range of 0-100
 def get_leaderboard_df(contents_repo: str, cols: list[str], benchmark_cols: list[str]) -> pd.DataFrame:
     df = datasets.load_dataset(contents_repo, split="train").to_pandas()
+    # df["Model"] = df["model"].map(make_clickable_model)
+    df["Model"] = df.apply(lambda x: make_clickable_model_with_shot(x["model"], x["num_few_shot"]), axis=1)
     df["T"] = df["model_type"].map(lambda x: x.split(":")[0].strip())
     df = df.rename(columns={task.value.metric: task.value.col_name for task in Tasks})
     df = df.rename(
             with open(file_path) as fp:
                 data = json.load(fp)
+            # data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
+            data[EvalQueueColumn.model.name] = make_clickable_model_with_shot(
+                data["model"], data["num_few_shot"]  # num_few_shotは必ず存在するため、直接参照
+            )
             data[EvalQueueColumn.revision.name] = data.get("revision", "main")
             all_evals.append(data)