Spaces:
Running
Running
Muennighoff
commited on
Commit
·
003d24d
1
Parent(s):
0d4db15
Updates
Browse files
app.py
CHANGED
@@ -1,55 +1,167 @@
|
|
1 |
import gradio as gr
|
2 |
-
import requests
|
3 |
import pandas as pd
|
4 |
-
from huggingface_hub.hf_api import SpaceInfo
|
5 |
from huggingface_hub import HfApi, hf_hub_download
|
6 |
from huggingface_hub.repocard import metadata_load
|
7 |
|
8 |
path = f"https://huggingface.co/api/spaces"
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
def make_clickable_model(model_name):
|
24 |
-
#
|
25 |
-
model_name_show =
|
26 |
link = "https://huggingface.co/" + model_name
|
27 |
-
return
|
|
|
|
|
28 |
|
29 |
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
api = HfApi()
|
32 |
models = api.list_models(filter="mteb")
|
33 |
df_list = []
|
34 |
for model in models:
|
35 |
readme_path = hf_hub_download(model.modelId, filename="README.md")
|
36 |
meta = metadata_load(readme_path)
|
37 |
-
#
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
# Multilingual
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
out = {k: v for d in out for k, v in d.items()}
|
54 |
out["Model"] = make_clickable_model(model.modelId)
|
55 |
df_list.append(out)
|
@@ -58,31 +170,91 @@ def get_mteb_data(task="Clustering", metric="v_measure", lang=None):
|
|
58 |
cols = sorted(list(df.columns))
|
59 |
cols.insert(0, cols.pop(cols.index("Model")))
|
60 |
df = df[cols]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
df.fillna('', inplace=True)
|
63 |
-
return df.astype(str) # Cast to str as Gradio does not accept floats
|
64 |
|
65 |
block = gr.Blocks()
|
66 |
|
67 |
-
with block:
|
68 |
-
gr.Markdown(
|
|
|
|
|
69 |
with gr.Tabs():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
with gr.TabItem("Classification"):
|
71 |
with gr.TabItem("English"):
|
72 |
with gr.Row():
|
73 |
gr.Markdown("""Leaderboard for Classification""")
|
74 |
with gr.Row():
|
75 |
data_classification_en = gr.components.Dataframe(
|
76 |
-
|
|
|
77 |
type="pandas",
|
78 |
-
col_count=(
|
79 |
)
|
80 |
with gr.Row():
|
81 |
data_run = gr.Button("Refresh")
|
82 |
task_classification_en = gr.Variable(value="Classification")
|
83 |
metric_classification_en = gr.Variable(value="accuracy")
|
84 |
lang_classification_en = gr.Variable(value=["en"])
|
85 |
-
data_run.click(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
with gr.TabItem("Multilingual"):
|
87 |
with gr.Row():
|
88 |
gr.Markdown("""Multilingual Classification""")
|
@@ -95,7 +267,11 @@ with block:
|
|
95 |
data_run = gr.Button("Refresh")
|
96 |
task_classification = gr.Variable(value="Classification")
|
97 |
metric_classification = gr.Variable(value="accuracy")
|
98 |
-
data_run.click(
|
|
|
|
|
|
|
|
|
99 |
with gr.TabItem("Clustering"):
|
100 |
with gr.Row():
|
101 |
gr.Markdown("""Leaderboard for Clustering""")
|
@@ -108,7 +284,11 @@ with block:
|
|
108 |
data_run = gr.Button("Refresh")
|
109 |
task_clustering = gr.Variable(value="Clustering")
|
110 |
metric_clustering = gr.Variable(value="v_measure")
|
111 |
-
data_run.click(
|
|
|
|
|
|
|
|
|
112 |
with gr.TabItem("Retrieval"):
|
113 |
with gr.Row():
|
114 |
gr.Markdown("""Leaderboard for Retrieval""")
|
@@ -121,7 +301,9 @@ with block:
|
|
121 |
data_run = gr.Button("Refresh")
|
122 |
task_retrieval = gr.Variable(value="Retrieval")
|
123 |
metric_retrieval = gr.Variable(value="ndcg_at_10")
|
124 |
-
data_run.click(
|
|
|
|
|
125 |
with gr.TabItem("Reranking"):
|
126 |
with gr.Row():
|
127 |
gr.Markdown("""Leaderboard for Reranking""")
|
@@ -129,13 +311,15 @@ with block:
|
|
129 |
data_reranking = gr.components.Dataframe(
|
130 |
datatype=["markdown"] * 500,
|
131 |
type="pandas",
|
132 |
-
#col_count=(12, "fixed"),
|
133 |
)
|
134 |
with gr.Row():
|
135 |
data_run = gr.Button("Refresh")
|
136 |
task_reranking = gr.Variable(value="Reranking")
|
137 |
metric_reranking = gr.Variable(value="map")
|
138 |
-
data_run.click(
|
|
|
|
|
139 |
with gr.TabItem("STS"):
|
140 |
with gr.TabItem("English"):
|
141 |
with gr.Row():
|
@@ -150,7 +334,11 @@ with block:
|
|
150 |
task_sts_en = gr.Variable(value="STS")
|
151 |
metric_sts_en = gr.Variable(value="cos_sim_spearman")
|
152 |
lang_sts_en = gr.Variable(value=["en", "en-en"])
|
153 |
-
data_run.click(
|
|
|
|
|
|
|
|
|
154 |
with gr.TabItem("Multilingual"):
|
155 |
with gr.Row():
|
156 |
gr.Markdown("""Leaderboard for STS""")
|
@@ -176,23 +364,29 @@ with block:
|
|
176 |
data_run = gr.Button("Refresh")
|
177 |
task_summarization = gr.Variable(value="Summarization")
|
178 |
metric_summarization = gr.Variable(value="cos_sim_spearman")
|
179 |
-
data_run.click(
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
data_run = gr.Button("Refresh")
|
185 |
-
data_run.click(get_blocks_party_spaces, inputs=None, outputs=data)
|
186 |
# running the function on page load in addition to when the button is clicked
|
187 |
-
block.load(
|
188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
block.load(get_mteb_data, inputs=[task_clustering, metric_clustering], outputs=data_clustering)
|
190 |
block.load(get_mteb_data, inputs=[task_retrieval, metric_retrieval], outputs=data_retrieval)
|
191 |
block.load(get_mteb_data, inputs=[task_reranking, metric_reranking], outputs=data_reranking)
|
192 |
-
block.load(get_mteb_data, inputs=[task_sts, metric_sts], outputs=data_sts)
|
193 |
-
block.load(
|
194 |
-
|
195 |
-
|
196 |
|
197 |
block.launch()
|
198 |
-
|
|
|
1 |
import gradio as gr
|
|
|
2 |
import pandas as pd
|
|
|
3 |
from huggingface_hub import HfApi, hf_hub_download
|
4 |
from huggingface_hub.repocard import metadata_load
|
5 |
|
6 |
path = f"https://huggingface.co/api/spaces"
|
7 |
|
8 |
+
TASKS = [
|
9 |
+
"BitextMining",
|
10 |
+
"Classification",
|
11 |
+
"Clustering",
|
12 |
+
"PairClassification",
|
13 |
+
"Reranking",
|
14 |
+
"Retrieval",
|
15 |
+
"STS",
|
16 |
+
"Summarization",
|
17 |
+
]
|
18 |
+
|
19 |
+
TASK_LIST_CLASSIFICATION = [
|
20 |
+
"AmazonCounterfactualClassification (en)",
|
21 |
+
"AmazonPolarityClassification",
|
22 |
+
"AmazonReviewsClassification (en)",
|
23 |
+
"Banking77Classification",
|
24 |
+
"EmotionClassification",
|
25 |
+
"ImdbClassification",
|
26 |
+
"MassiveIntentClassification (en)",
|
27 |
+
"MassiveScenarioClassification (en)",
|
28 |
+
"MTOPDomainClassification (en)",
|
29 |
+
"MTOPIntentClassification (en)",
|
30 |
+
"ToxicConversationsClassification",
|
31 |
+
"TweetSentimentExtractionClassification",
|
32 |
+
]
|
33 |
+
|
34 |
+
TASK_LIST_CLUSTERING = [
|
35 |
+
"ArxivClusteringP2P",
|
36 |
+
"ArxivClusteringS2S",
|
37 |
+
"BiorxivClusteringP2P",
|
38 |
+
"BiorxivClusteringS2S",
|
39 |
+
"MedrxivClusteringP2P",
|
40 |
+
"MedrxivClusteringS2S",
|
41 |
+
"RedditClustering",
|
42 |
+
"RedditClusteringP2P",
|
43 |
+
"StackExchangeClustering",
|
44 |
+
"StackExchangeClusteringP2P",
|
45 |
+
"TwentyNewsgroupsClustering",
|
46 |
+
]
|
47 |
+
|
48 |
+
TASK_LIST_PAIR_CLASSIFICATION = [
|
49 |
+
"SprintDuplicateQuestions",
|
50 |
+
"TwitterSemEval2015",
|
51 |
+
"TwitterURLCorpus",
|
52 |
+
]
|
53 |
+
|
54 |
+
TASK_LIST_RERANKING = [
|
55 |
+
"AskUbuntuDupQuestions",
|
56 |
+
"MindSmallReranking",
|
57 |
+
"SciDocsRR",
|
58 |
+
"StackOverflowDupQuestions",
|
59 |
+
]
|
60 |
+
|
61 |
+
TASK_LIST_RETRIEVAL = [
|
62 |
+
"ArguAna",
|
63 |
+
"ClimateFEVER",
|
64 |
+
"CQADupstackRetrieval",
|
65 |
+
"DBPedia",
|
66 |
+
"FEVER",
|
67 |
+
"FiQA2018",
|
68 |
+
"HotpotQA",
|
69 |
+
"MSMARCO",
|
70 |
+
"NFCorpus",
|
71 |
+
"NQ",
|
72 |
+
"QuoraRetrieval",
|
73 |
+
"SCIDOCS",
|
74 |
+
"SciFact",
|
75 |
+
"Touche2020",
|
76 |
+
"TRECCOVID",
|
77 |
+
]
|
78 |
+
|
79 |
+
TASK_LIST_STS = [
|
80 |
+
"BIOSSES",
|
81 |
+
"SICK-R",
|
82 |
+
"STS12",
|
83 |
+
"STS13",
|
84 |
+
"STS14",
|
85 |
+
"STS15",
|
86 |
+
"STS16",
|
87 |
+
"STS17 (en-en)",
|
88 |
+
"STS22 (en)",
|
89 |
+
"STSBenchmark",
|
90 |
+
]
|
91 |
+
|
92 |
+
|
93 |
+
TASK_LIST_SUMMARIZATION = [
|
94 |
+
"SummEval",
|
95 |
+
]
|
96 |
+
|
97 |
+
TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION
|
98 |
+
|
99 |
+
TASK_TO_TASK_LIST = {}
|
100 |
+
|
101 |
+
|
102 |
|
103 |
def make_clickable_model(model_name):
|
104 |
+
# Remove user from model name
|
105 |
+
model_name_show = " ".join(model_name.split("/")[1:])
|
106 |
link = "https://huggingface.co/" + model_name
|
107 |
+
return (
|
108 |
+
f'<a target="_blank" style="text-decoration: underline" href="{link}">{model_name_show}</a>'
|
109 |
+
)
|
110 |
|
111 |
|
112 |
+
TASK_TO_METRIC = {
|
113 |
+
"BitextMining": "f1",
|
114 |
+
"Clustering": "v_measure",
|
115 |
+
"Classification": "accuracy",
|
116 |
+
"PairClassification": "cos_sim_ap",
|
117 |
+
"Reranking": "map",
|
118 |
+
"Retrieval": "ndcg_at_10",
|
119 |
+
"STS": "cos_sim_spearman",
|
120 |
+
"Summarization": "cos_sim_spearman",
|
121 |
+
}
|
122 |
+
|
123 |
+
def get_mteb_data(tasks=["Clustering"], metric="v_measure", langs=[], cast_to_str=True, task_to_metric=TASK_TO_METRIC):
|
124 |
api = HfApi()
|
125 |
models = api.list_models(filter="mteb")
|
126 |
df_list = []
|
127 |
for model in models:
|
128 |
readme_path = hf_hub_download(model.modelId, filename="README.md")
|
129 |
meta = metadata_load(readme_path)
|
130 |
+
# meta['model-index'][0]["results"] is list of elements like:
|
131 |
+
# {
|
132 |
+
# "task": {"type": "Classification"},
|
133 |
+
# "dataset": {
|
134 |
+
# "type": "mteb/amazon_massive_intent",
|
135 |
+
# "name": "MTEB MassiveIntentClassification (nb)",
|
136 |
+
# "config": "nb",
|
137 |
+
# "split": "test",
|
138 |
+
# },
|
139 |
+
# "metrics": [
|
140 |
+
# {"type": "accuracy", "value": 39.81506388702084},
|
141 |
+
# {"type": "f1", "value": 38.809586587791664},
|
142 |
+
# ],
|
143 |
+
# },
|
144 |
+
|
145 |
+
# Use "get" instead of dict indexing to skip incompat metadata instead of erroring out
|
146 |
+
#if langs is None:
|
147 |
+
task_results = [sub_res for sub_res in meta["model-index"][0]["results"] if (sub_res.get("task", {}).get("type", "") in tasks) and (sub_res.get("dataset", {}).get("config", "default") in ("default", *langs))]
|
148 |
+
out = [{res["dataset"]["name"].replace("MTEB ", ""): [round(score["value"], 2) for score in res["metrics"] if score["type"] == task_to_metric.get(res["task"]["type"])][0]} for res in task_results]
|
149 |
+
#else:
|
150 |
# Multilingual
|
151 |
+
# out = list(
|
152 |
+
# map(
|
153 |
+
# lambda x: {
|
154 |
+
# x["dataset"]["name"].replace("MTEB ", ""): round(
|
155 |
+
# list(filter(lambda x: x["type"] == metric, x["metrics"]))[0]["value"], 2
|
156 |
+
# )
|
157 |
+
# },
|
158 |
+
# filter(
|
159 |
+
# lambda x: (x.get("task", {}).get("type", "") in tasks)
|
160 |
+
# and (x.get("dataset", {}).get("config", "") in ("default", *langs)),
|
161 |
+
# meta["model-index"][0]["results"],
|
162 |
+
# ),
|
163 |
+
# )
|
164 |
+
# )
|
165 |
out = {k: v for d in out for k, v in d.items()}
|
166 |
out["Model"] = make_clickable_model(model.modelId)
|
167 |
df_list.append(out)
|
|
|
170 |
cols = sorted(list(df.columns))
|
171 |
cols.insert(0, cols.pop(cols.index("Model")))
|
172 |
df = df[cols]
|
173 |
+
# df.insert(1, "Average", df.mean(axis=1, skipna=False))
|
174 |
+
df.fillna("", inplace=True)
|
175 |
+
if cast_to_str:
|
176 |
+
return df.astype(str) # Cast to str as Gradio does not accept floats
|
177 |
+
return df
|
178 |
+
|
179 |
+
|
180 |
+
DATA_OVERALL = get_mteb_data(
|
181 |
+
tasks=[
|
182 |
+
"Classification",
|
183 |
+
"Clustering",
|
184 |
+
"PairClassification",
|
185 |
+
"Reranking",
|
186 |
+
"Retrieval",
|
187 |
+
"STS",
|
188 |
+
"Summarization",
|
189 |
+
],
|
190 |
+
langs=["en", "en-en"],
|
191 |
+
cast_to_str=False
|
192 |
+
)
|
193 |
+
|
194 |
+
DATA_OVERALL.insert(1, "Average", DATA_OVERALL[TASK_LIST_EN].mean(axis=1, skipna=False))
|
195 |
+
DATA_OVERALL.insert(2, "Classification Average", DATA_OVERALL[TASK_LIST_CLASSIFICATION].mean(axis=1, skipna=False))
|
196 |
+
DATA_OVERALL.insert(3, "Clustering Average", DATA_OVERALL[TASK_LIST_CLUSTERING].mean(axis=1, skipna=False))
|
197 |
+
DATA_OVERALL.insert(4, "Pair Classification Average", DATA_OVERALL[TASK_LIST_PAIR_CLASSIFICATION].mean(axis=1, skipna=False))
|
198 |
+
DATA_OVERALL.insert(5, "Reranking Average", DATA_OVERALL[TASK_LIST_RERANKING].mean(axis=1, skipna=False))
|
199 |
+
DATA_OVERALL.insert(6, "Retrieval Average", DATA_OVERALL[TASK_LIST_RETRIEVAL].mean(axis=1, skipna=False))
|
200 |
+
DATA_OVERALL.insert(7, "STS Average", DATA_OVERALL[TASK_LIST_STS].mean(axis=1, skipna=False))
|
201 |
+
DATA_OVERALL.insert(8, "Summarization Average", DATA_OVERALL[TASK_LIST_SUMMARIZATION].mean(axis=1, skipna=False))
|
202 |
+
DATA_OVERALL = DATA_OVERALL.round(2).astype(str)
|
203 |
+
|
204 |
+
DATA_CLASSIFICATION_EN = DATA_OVERALL[["Model"] + TASK_LIST_CLASSIFICATION]
|
205 |
+
DATA_CLUSTERING = DATA_OVERALL[["Model"] + TASK_LIST_CLUSTERING]
|
206 |
+
DATA_PAIR_CLASSIFICATION = DATA_OVERALL[["Model"] + TASK_LIST_PAIR_CLASSIFICATION]
|
207 |
+
DATA_RERANKING = DATA_OVERALL[["Model"] + TASK_LIST_RERANKING]
|
208 |
+
DATA_RETRIEVAL = DATA_OVERALL[["Model"] + TASK_LIST_RETRIEVAL]
|
209 |
+
DATA_STS_EN = DATA_OVERALL[["Model"] + TASK_LIST_STS]
|
210 |
+
DATA_SUMMARIZATION = DATA_OVERALL[["Model"] + TASK_LIST_SUMMARIZATION]
|
211 |
+
|
212 |
+
DATA_OVERALL = DATA_OVERALL[["Model", "Average", "Classification Average", "Clustering Average", "Pair Classification Average", "Reranking Average", "Retrieval Average", "STS Average", "Summarization Average"]]
|
213 |
|
|
|
|
|
214 |
|
215 |
block = gr.Blocks()
|
216 |
|
217 |
+
with block:
|
218 |
+
gr.Markdown(
|
219 |
+
"""Leaderboard for XX most popular Blocks Event Spaces. To learn more and join, see <a href="https://huggingface.co/Gradio-Blocks" target="_blank" style="text-decoration: underline">Blocks Party Event</a>"""
|
220 |
+
)
|
221 |
with gr.Tabs():
|
222 |
+
with gr.TabItem("Overall"):
|
223 |
+
with gr.Row():
|
224 |
+
gr.Markdown("""Average Scores""")
|
225 |
+
with gr.Row():
|
226 |
+
data_overall = gr.components.Dataframe(
|
227 |
+
DATA_OVERALL,
|
228 |
+
datatype="markdown",
|
229 |
+
type="pandas",
|
230 |
+
col_count=(len(DATA_OVERALL.columns), "fixed"),
|
231 |
+
wrap=True,
|
232 |
+
)
|
233 |
with gr.TabItem("Classification"):
|
234 |
with gr.TabItem("English"):
|
235 |
with gr.Row():
|
236 |
gr.Markdown("""Leaderboard for Classification""")
|
237 |
with gr.Row():
|
238 |
data_classification_en = gr.components.Dataframe(
|
239 |
+
DATA_CLASSIFICATION_EN,
|
240 |
+
datatype="markdown",
|
241 |
type="pandas",
|
242 |
+
col_count=(len(DATA_CLASSIFICATION_EN.columns), "fixed"),
|
243 |
)
|
244 |
with gr.Row():
|
245 |
data_run = gr.Button("Refresh")
|
246 |
task_classification_en = gr.Variable(value="Classification")
|
247 |
metric_classification_en = gr.Variable(value="accuracy")
|
248 |
lang_classification_en = gr.Variable(value=["en"])
|
249 |
+
data_run.click(
|
250 |
+
get_mteb_data,
|
251 |
+
inputs=[
|
252 |
+
task_classification_en,
|
253 |
+
metric_classification_en,
|
254 |
+
lang_classification_en,
|
255 |
+
],
|
256 |
+
outputs=data_classification_en,
|
257 |
+
)
|
258 |
with gr.TabItem("Multilingual"):
|
259 |
with gr.Row():
|
260 |
gr.Markdown("""Multilingual Classification""")
|
|
|
267 |
data_run = gr.Button("Refresh")
|
268 |
task_classification = gr.Variable(value="Classification")
|
269 |
metric_classification = gr.Variable(value="accuracy")
|
270 |
+
data_run.click(
|
271 |
+
get_mteb_data,
|
272 |
+
inputs=[task_classification, metric_classification],
|
273 |
+
outputs=data_classification,
|
274 |
+
)
|
275 |
with gr.TabItem("Clustering"):
|
276 |
with gr.Row():
|
277 |
gr.Markdown("""Leaderboard for Clustering""")
|
|
|
284 |
data_run = gr.Button("Refresh")
|
285 |
task_clustering = gr.Variable(value="Clustering")
|
286 |
metric_clustering = gr.Variable(value="v_measure")
|
287 |
+
data_run.click(
|
288 |
+
get_mteb_data,
|
289 |
+
inputs=[task_clustering, metric_clustering],
|
290 |
+
outputs=data_clustering,
|
291 |
+
)
|
292 |
with gr.TabItem("Retrieval"):
|
293 |
with gr.Row():
|
294 |
gr.Markdown("""Leaderboard for Retrieval""")
|
|
|
301 |
data_run = gr.Button("Refresh")
|
302 |
task_retrieval = gr.Variable(value="Retrieval")
|
303 |
metric_retrieval = gr.Variable(value="ndcg_at_10")
|
304 |
+
data_run.click(
|
305 |
+
get_mteb_data, inputs=[task_retrieval, metric_retrieval], outputs=data_retrieval
|
306 |
+
)
|
307 |
with gr.TabItem("Reranking"):
|
308 |
with gr.Row():
|
309 |
gr.Markdown("""Leaderboard for Reranking""")
|
|
|
311 |
data_reranking = gr.components.Dataframe(
|
312 |
datatype=["markdown"] * 500,
|
313 |
type="pandas",
|
314 |
+
# col_count=(12, "fixed"),
|
315 |
)
|
316 |
with gr.Row():
|
317 |
data_run = gr.Button("Refresh")
|
318 |
task_reranking = gr.Variable(value="Reranking")
|
319 |
metric_reranking = gr.Variable(value="map")
|
320 |
+
data_run.click(
|
321 |
+
get_mteb_data, inputs=[task_reranking, metric_reranking], outputs=data_reranking
|
322 |
+
)
|
323 |
with gr.TabItem("STS"):
|
324 |
with gr.TabItem("English"):
|
325 |
with gr.Row():
|
|
|
334 |
task_sts_en = gr.Variable(value="STS")
|
335 |
metric_sts_en = gr.Variable(value="cos_sim_spearman")
|
336 |
lang_sts_en = gr.Variable(value=["en", "en-en"])
|
337 |
+
data_run.click(
|
338 |
+
get_mteb_data,
|
339 |
+
inputs=[task_sts_en, metric_sts_en, lang_sts_en],
|
340 |
+
outputs=data_sts_en,
|
341 |
+
)
|
342 |
with gr.TabItem("Multilingual"):
|
343 |
with gr.Row():
|
344 |
gr.Markdown("""Leaderboard for STS""")
|
|
|
364 |
data_run = gr.Button("Refresh")
|
365 |
task_summarization = gr.Variable(value="Summarization")
|
366 |
metric_summarization = gr.Variable(value="cos_sim_spearman")
|
367 |
+
data_run.click(
|
368 |
+
get_mteb_data,
|
369 |
+
inputs=[task_summarization, metric_summarization],
|
370 |
+
outputs=data_summarization,
|
371 |
+
)
|
|
|
|
|
372 |
# running the function on page load in addition to when the button is clicked
|
373 |
+
#block.load(
|
374 |
+
# get_mteb_data,
|
375 |
+
# inputs=[task_classification_en, metric_classification_en],
|
376 |
+
# outputs=data_classification_en,
|
377 |
+
# show_progress=False,
|
378 |
+
#)
|
379 |
+
block.load(
|
380 |
+
get_mteb_data,
|
381 |
+
inputs=[task_classification, metric_classification],
|
382 |
+
outputs=data_classification,
|
383 |
+
)
|
384 |
block.load(get_mteb_data, inputs=[task_clustering, metric_clustering], outputs=data_clustering)
|
385 |
block.load(get_mteb_data, inputs=[task_retrieval, metric_retrieval], outputs=data_retrieval)
|
386 |
block.load(get_mteb_data, inputs=[task_reranking, metric_reranking], outputs=data_reranking)
|
387 |
+
block.load(get_mteb_data, inputs=[task_sts, metric_sts], outputs=data_sts)
|
388 |
+
block.load(
|
389 |
+
get_mteb_data, inputs=[task_summarization, metric_summarization], outputs=data_summarization
|
390 |
+
)
|
391 |
|
392 |
block.launch()
|
|