Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
submit可能なモデルの説明を追加, モデル列にshot数を追加
Browse files- src/about.py +8 -0
- src/display/formatting.py +10 -0
- src/populate.py +7 -3
src/about.py
CHANGED
@@ -223,6 +223,10 @@ To reproduce our results, please follow the instructions of the evalution tool,
|
|
223 |
## Average Score Calculation
|
224 |
The calculation of the average score (AVG) includes only the scores of datasets marked with a ⭐.
|
225 |
|
|
|
|
|
|
|
|
|
226 |
"""
|
227 |
|
228 |
LLM_BENCHMARKS_TEXT_JA = """
|
@@ -308,6 +312,10 @@ LLM_BENCHMARKS_TEXT_JA = """
|
|
308 |
|
309 |
## 平均スコアの計算について
|
310 |
平均スコア (AVG) の計算には、⭐マークのついたスコアのみが含まれます
|
|
|
|
|
|
|
|
|
311 |
"""
|
312 |
|
313 |
|
|
|
223 |
## Average Score Calculation
|
224 |
The calculation of the average score (AVG) includes only the scores of datasets marked with a ⭐.
|
225 |
|
226 |
+
### Note about large models
|
227 |
+
Currently, we support models up to 70B parameters. However, we are working on infrastructure improvements to accommodate larger models (70B+) in the near future. Stay tuned for updates!
|
228 |
+
|
229 |
+
|
230 |
"""
|
231 |
|
232 |
LLM_BENCHMARKS_TEXT_JA = """
|
|
|
312 |
|
313 |
## 平均スコアの計算について
|
314 |
平均スコア (AVG) の計算には、⭐マークのついたスコアのみが含まれます
|
315 |
+
|
316 |
+
### 大規模モデルに関する注意
|
317 |
+
現在、70Bパラメータまでのモデルをサポートしています。より大規模なモデル(70Bよりも大きいもの)については、インフラストラクチャの改善を進めており、近い将来対応予定です。続報をお待ちください!
|
318 |
+
|
319 |
"""
|
320 |
|
321 |
|
src/display/formatting.py
CHANGED
@@ -2,11 +2,21 @@ def model_hyperlink(link, model_name):
|
|
2 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
3 |
|
4 |
|
|
|
|
|
|
|
|
|
|
|
5 |
def make_clickable_model(model_name):
|
6 |
link = f"https://huggingface.co/{model_name}"
|
7 |
return model_hyperlink(link, model_name)
|
8 |
|
9 |
|
|
|
|
|
|
|
|
|
|
|
10 |
def styled_error(error):
|
11 |
return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
|
12 |
|
|
|
2 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
3 |
|
4 |
|
5 |
+
def model_hyperlink_with_shot(link, model_name, num_few_shot):
|
6 |
+
display_name = f"{model_name} ({num_few_shot}-shot)"
|
7 |
+
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{display_name}</a>'
|
8 |
+
|
9 |
+
|
10 |
def make_clickable_model(model_name):
|
11 |
link = f"https://huggingface.co/{model_name}"
|
12 |
return model_hyperlink(link, model_name)
|
13 |
|
14 |
|
15 |
+
def make_clickable_model_with_shot(model_name, num_few_shot):
|
16 |
+
link = f"https://huggingface.co/{model_name}"
|
17 |
+
return model_hyperlink_with_shot(link, model_name, num_few_shot)
|
18 |
+
|
19 |
+
|
20 |
def styled_error(error):
|
21 |
return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
|
22 |
|
src/populate.py
CHANGED
@@ -5,7 +5,7 @@ import datasets
|
|
5 |
import pandas as pd
|
6 |
|
7 |
from src.about import Tasks
|
8 |
-
from src.display.formatting import has_no_nan_values, make_clickable_model
|
9 |
from src.display.utils import AutoEvalColumn, EvalQueueColumn
|
10 |
|
11 |
# The values of these columns are in the range of 0-100
|
@@ -24,7 +24,8 @@ COLUMNS_TO_NORMALIZE = [
|
|
24 |
|
25 |
def get_leaderboard_df(contents_repo: str, cols: list[str], benchmark_cols: list[str]) -> pd.DataFrame:
|
26 |
df = datasets.load_dataset(contents_repo, split="train").to_pandas()
|
27 |
-
df["Model"] = df["model"].map(make_clickable_model)
|
|
|
28 |
df["T"] = df["model_type"].map(lambda x: x.split(":")[0].strip())
|
29 |
df = df.rename(columns={task.value.metric: task.value.col_name for task in Tasks})
|
30 |
df = df.rename(
|
@@ -72,7 +73,10 @@ def get_evaluation_queue_df(save_path: str, cols: list[str]) -> list[pd.DataFram
|
|
72 |
with open(file_path) as fp:
|
73 |
data = json.load(fp)
|
74 |
|
75 |
-
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
|
|
|
|
|
|
|
76 |
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
77 |
|
78 |
all_evals.append(data)
|
|
|
5 |
import pandas as pd
|
6 |
|
7 |
from src.about import Tasks
|
8 |
+
from src.display.formatting import has_no_nan_values, make_clickable_model, make_clickable_model_with_shot
|
9 |
from src.display.utils import AutoEvalColumn, EvalQueueColumn
|
10 |
|
11 |
# The values of these columns are in the range of 0-100
|
|
|
24 |
|
25 |
def get_leaderboard_df(contents_repo: str, cols: list[str], benchmark_cols: list[str]) -> pd.DataFrame:
|
26 |
df = datasets.load_dataset(contents_repo, split="train").to_pandas()
|
27 |
+
# df["Model"] = df["model"].map(make_clickable_model)
|
28 |
+
df["Model"] = df.apply(lambda x: make_clickable_model_with_shot(x["model"], x["num_few_shot"]), axis=1)
|
29 |
df["T"] = df["model_type"].map(lambda x: x.split(":")[0].strip())
|
30 |
df = df.rename(columns={task.value.metric: task.value.col_name for task in Tasks})
|
31 |
df = df.rename(
|
|
|
73 |
with open(file_path) as fp:
|
74 |
data = json.load(fp)
|
75 |
|
76 |
+
# data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
|
77 |
+
data[EvalQueueColumn.model.name] = make_clickable_model_with_shot(
|
78 |
+
data["model"], data["num_few_shot"] # num_few_shotは必ず存在するため、直接参照
|
79 |
+
)
|
80 |
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
81 |
|
82 |
all_evals.append(data)
|