Spaces:
Runtime error
Runtime error
Commit
•
d63195a
1
Parent(s):
219c4e7
Update app.py
Browse files
app.py
CHANGED
@@ -39,35 +39,6 @@ TASK_LIST_CLASSIFICATION = [
|
|
39 |
|
40 |
TASK_LIST_CLASSIFICATION_NORM = [x.replace(" (en)", "") for x in TASK_LIST_CLASSIFICATION]
|
41 |
|
42 |
-
TASK_LIST_CLASSIFICATION_DA = [
|
43 |
-
"AngryTweetsClassification",
|
44 |
-
"DanishPoliticalCommentsClassification",
|
45 |
-
"DKHateClassification",
|
46 |
-
"LccSentimentClassification",
|
47 |
-
"MassiveIntentClassification (da)",
|
48 |
-
"MassiveScenarioClassification (da)",
|
49 |
-
"NordicLangClassification",
|
50 |
-
"ScalaDaClassification",
|
51 |
-
]
|
52 |
-
|
53 |
-
TASK_LIST_CLASSIFICATION_NB = [
|
54 |
-
"NoRecClassification",
|
55 |
-
"NordicLangClassification",
|
56 |
-
"NorwegianParliament",
|
57 |
-
"MassiveIntentClassification (nb)",
|
58 |
-
"MassiveScenarioClassification (nb)",
|
59 |
-
"ScalaNbClassification",
|
60 |
-
]
|
61 |
-
|
62 |
-
TASK_LIST_CLASSIFICATION_PL = [
|
63 |
-
"AllegroReviews",
|
64 |
-
"CBD",
|
65 |
-
"MassiveIntentClassification (pl)",
|
66 |
-
"MassiveScenarioClassification (pl)",
|
67 |
-
"PAC",
|
68 |
-
"PolEmo2.0-IN",
|
69 |
-
"PolEmo2.0-OUT",
|
70 |
-
]
|
71 |
|
72 |
TASK_LIST_CLASSIFICATION_SV = [
|
73 |
"DalajClassification",
|
@@ -78,18 +49,6 @@ TASK_LIST_CLASSIFICATION_SV = [
|
|
78 |
"SweRecClassification",
|
79 |
]
|
80 |
|
81 |
-
TASK_LIST_CLASSIFICATION_ZH = [
|
82 |
-
"AmazonReviewsClassification (zh)",
|
83 |
-
"IFlyTek",
|
84 |
-
"JDReview",
|
85 |
-
"MassiveIntentClassification (zh-CN)",
|
86 |
-
"MassiveScenarioClassification (zh-CN)",
|
87 |
-
"MultilingualSentiment",
|
88 |
-
"OnlineShopping",
|
89 |
-
"TNews",
|
90 |
-
"Waimai",
|
91 |
-
]
|
92 |
-
|
93 |
TASK_LIST_CLASSIFICATION_OTHER = ['AmazonCounterfactualClassification (de)', 'AmazonCounterfactualClassification (ja)', 'AmazonReviewsClassification (de)', 'AmazonReviewsClassification (es)', 'AmazonReviewsClassification (fr)', 'AmazonReviewsClassification (ja)', 'AmazonReviewsClassification (zh)', 'MTOPDomainClassification (de)', 'MTOPDomainClassification (es)', 'MTOPDomainClassification (fr)', 'MTOPDomainClassification (hi)', 'MTOPDomainClassification (th)', 'MTOPIntentClassification (de)', 'MTOPIntentClassification (es)', 'MTOPIntentClassification (fr)', 'MTOPIntentClassification (hi)', 'MTOPIntentClassification (th)', 'MassiveIntentClassification (af)', 'MassiveIntentClassification (am)', 'MassiveIntentClassification (ar)', 'MassiveIntentClassification (az)', 'MassiveIntentClassification (bn)', 'MassiveIntentClassification (cy)', 'MassiveIntentClassification (de)', 'MassiveIntentClassification (el)', 'MassiveIntentClassification (es)', 'MassiveIntentClassification (fa)', 'MassiveIntentClassification (fi)', 'MassiveIntentClassification (fr)', 'MassiveIntentClassification (he)', 'MassiveIntentClassification (hi)', 'MassiveIntentClassification (hu)', 'MassiveIntentClassification (hy)', 'MassiveIntentClassification (id)', 'MassiveIntentClassification (is)', 'MassiveIntentClassification (it)', 'MassiveIntentClassification (ja)', 'MassiveIntentClassification (jv)', 'MassiveIntentClassification (ka)', 'MassiveIntentClassification (km)', 'MassiveIntentClassification (kn)', 'MassiveIntentClassification (ko)', 'MassiveIntentClassification (lv)', 'MassiveIntentClassification (ml)', 'MassiveIntentClassification (mn)', 'MassiveIntentClassification (ms)', 'MassiveIntentClassification (my)', 'MassiveIntentClassification (nl)', 'MassiveIntentClassification (pt)', 'MassiveIntentClassification (ro)', 'MassiveIntentClassification (ru)', 'MassiveIntentClassification (sl)', 'MassiveIntentClassification (sq)', 'MassiveIntentClassification (sw)', 'MassiveIntentClassification (ta)', 'MassiveIntentClassification (te)', 'MassiveIntentClassification (th)', 'MassiveIntentClassification (tl)', 'MassiveIntentClassification (tr)', 'MassiveIntentClassification (ur)', 'MassiveIntentClassification (vi)', 'MassiveIntentClassification (zh-TW)', 'MassiveScenarioClassification (af)', 'MassiveScenarioClassification (am)', 'MassiveScenarioClassification (ar)', 'MassiveScenarioClassification (az)', 'MassiveScenarioClassification (bn)', 'MassiveScenarioClassification (cy)', 'MassiveScenarioClassification (de)', 'MassiveScenarioClassification (el)', 'MassiveScenarioClassification (es)', 'MassiveScenarioClassification (fa)', 'MassiveScenarioClassification (fi)', 'MassiveScenarioClassification (fr)', 'MassiveScenarioClassification (he)', 'MassiveScenarioClassification (hi)', 'MassiveScenarioClassification (hu)', 'MassiveScenarioClassification (hy)', 'MassiveScenarioClassification (id)', 'MassiveScenarioClassification (is)', 'MassiveScenarioClassification (it)', 'MassiveScenarioClassification (ja)', 'MassiveScenarioClassification (jv)', 'MassiveScenarioClassification (ka)', 'MassiveScenarioClassification (km)', 'MassiveScenarioClassification (kn)', 'MassiveScenarioClassification (ko)', 'MassiveScenarioClassification (lv)', 'MassiveScenarioClassification (ml)', 'MassiveScenarioClassification (mn)', 'MassiveScenarioClassification (ms)', 'MassiveScenarioClassification (my)', 'MassiveScenarioClassification (nl)', 'MassiveScenarioClassification (pt)', 'MassiveScenarioClassification (ro)', 'MassiveScenarioClassification (ru)', 'MassiveScenarioClassification (sl)', 'MassiveScenarioClassification (sq)', 'MassiveScenarioClassification (sw)', 'MassiveScenarioClassification (ta)', 'MassiveScenarioClassification (te)', 'MassiveScenarioClassification (th)', 'MassiveScenarioClassification (tl)', 'MassiveScenarioClassification (tr)', 'MassiveScenarioClassification (ur)', 'MassiveScenarioClassification (vi)', 'MassiveScenarioClassification (zh-TW)']
|
94 |
|
95 |
TASK_LIST_CLUSTERING = [
|
@@ -106,43 +65,12 @@ TASK_LIST_CLUSTERING = [
|
|
106 |
"TwentyNewsgroupsClustering",
|
107 |
]
|
108 |
|
109 |
-
|
110 |
-
TASK_LIST_CLUSTERING_DE = [
|
111 |
-
"BlurbsClusteringP2P",
|
112 |
-
"BlurbsClusteringS2S",
|
113 |
-
"TenKGnadClusteringP2P",
|
114 |
-
"TenKGnadClusteringS2S",
|
115 |
-
]
|
116 |
-
|
117 |
-
TASK_LIST_CLUSTERING_PL = [
|
118 |
-
"8TagsClustering",
|
119 |
-
]
|
120 |
-
|
121 |
-
TASK_LIST_CLUSTERING_ZH = [
|
122 |
-
"CLSClusteringP2P",
|
123 |
-
"CLSClusteringS2S",
|
124 |
-
"ThuNewsClusteringP2P",
|
125 |
-
"ThuNewsClusteringS2S",
|
126 |
-
]
|
127 |
-
|
128 |
TASK_LIST_PAIR_CLASSIFICATION = [
|
129 |
"SprintDuplicateQuestions",
|
130 |
"TwitterSemEval2015",
|
131 |
"TwitterURLCorpus",
|
132 |
]
|
133 |
|
134 |
-
TASK_LIST_PAIR_CLASSIFICATION_PL = [
|
135 |
-
"CDSC-E",
|
136 |
-
"PPC",
|
137 |
-
"PSC",
|
138 |
-
"SICK-E-PL",
|
139 |
-
]
|
140 |
-
|
141 |
-
TASK_LIST_PAIR_CLASSIFICATION_ZH = [
|
142 |
-
"Cmnli",
|
143 |
-
"Ocnli",
|
144 |
-
]
|
145 |
-
|
146 |
TASK_LIST_RERANKING = [
|
147 |
"AskUbuntuDupQuestions",
|
148 |
"MindSmallReranking",
|
@@ -150,13 +78,6 @@ TASK_LIST_RERANKING = [
|
|
150 |
"StackOverflowDupQuestions",
|
151 |
]
|
152 |
|
153 |
-
TASK_LIST_RERANKING_ZH = [
|
154 |
-
"CMedQAv1",
|
155 |
-
"CMedQAv2",
|
156 |
-
"MMarcoReranking",
|
157 |
-
"T2Reranking",
|
158 |
-
]
|
159 |
-
|
160 |
TASK_LIST_RETRIEVAL = [
|
161 |
"ArguAna",
|
162 |
"ClimateFEVER",
|
@@ -175,31 +96,6 @@ TASK_LIST_RETRIEVAL = [
|
|
175 |
"TRECCOVID",
|
176 |
]
|
177 |
|
178 |
-
TASK_LIST_RETRIEVAL_PL = [
|
179 |
-
"ArguAna-PL",
|
180 |
-
"DBPedia-PL",
|
181 |
-
"FiQA-PL",
|
182 |
-
"HotpotQA-PL",
|
183 |
-
"MSMARCO-PL",
|
184 |
-
"NFCorpus-PL",
|
185 |
-
"NQ-PL",
|
186 |
-
"Quora-PL",
|
187 |
-
"SCIDOCS-PL",
|
188 |
-
"SciFact-PL",
|
189 |
-
"TRECCOVID-PL",
|
190 |
-
]
|
191 |
-
|
192 |
-
TASK_LIST_RETRIEVAL_ZH = [
|
193 |
-
"CmedqaRetrieval",
|
194 |
-
"CovidRetrieval",
|
195 |
-
"DuRetrieval",
|
196 |
-
"EcomRetrieval",
|
197 |
-
"MedicalRetrieval",
|
198 |
-
"MMarcoRetrieval",
|
199 |
-
"T2Retrieval",
|
200 |
-
"VideoRetrieval",
|
201 |
-
]
|
202 |
-
|
203 |
TASK_LIST_RETRIEVAL_NORM = TASK_LIST_RETRIEVAL + [
|
204 |
"CQADupstackAndroidRetrieval",
|
205 |
"CQADupstackEnglishRetrieval",
|
@@ -228,31 +124,12 @@ TASK_LIST_STS = [
|
|
228 |
"STSBenchmark",
|
229 |
]
|
230 |
|
231 |
-
TASK_LIST_STS_PL = [
|
232 |
-
"CDSC-R",
|
233 |
-
"SICK-R-PL",
|
234 |
-
"STS22 (pl)",
|
235 |
-
]
|
236 |
-
|
237 |
-
TASK_LIST_STS_ZH = [
|
238 |
-
"AFQMC",
|
239 |
-
"ATEC",
|
240 |
-
"BQ",
|
241 |
-
"LCQMC",
|
242 |
-
"PAWSX",
|
243 |
-
"QBQTC",
|
244 |
-
"STS22 (zh)",
|
245 |
-
"STSB",
|
246 |
-
]
|
247 |
-
|
248 |
TASK_LIST_STS_OTHER = ["STS17 (ar-ar)", "STS17 (en-ar)", "STS17 (en-de)", "STS17 (en-tr)", "STS17 (es-en)", "STS17 (es-es)", "STS17 (fr-en)", "STS17 (it-en)", "STS17 (ko-ko)", "STS17 (nl-en)", "STS22 (ar)", "STS22 (de)", "STS22 (de-en)", "STS22 (de-fr)", "STS22 (de-pl)", "STS22 (es)", "STS22 (es-en)", "STS22 (es-it)", "STS22 (fr)", "STS22 (fr-pl)", "STS22 (it)", "STS22 (pl)", "STS22 (pl-en)", "STS22 (ru)", "STS22 (tr)", "STS22 (zh-en)", "STSBenchmark",]
|
249 |
TASK_LIST_STS_NORM = [x.replace(" (en)", "").replace(" (en-en)", "") for x in TASK_LIST_STS]
|
250 |
|
251 |
TASK_LIST_SUMMARIZATION = ["SummEval",]
|
252 |
|
253 |
TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION
|
254 |
-
TASK_LIST_PL = TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLUSTERING_PL + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_RETRIEVAL_PL + TASK_LIST_STS_PL
|
255 |
-
TASK_LIST_ZH = TASK_LIST_CLASSIFICATION_ZH + TASK_LIST_CLUSTERING_ZH + TASK_LIST_PAIR_CLASSIFICATION_ZH + TASK_LIST_RERANKING_ZH + TASK_LIST_RETRIEVAL_ZH + TASK_LIST_STS_ZH
|
256 |
|
257 |
TASK_TO_METRIC = {
|
258 |
"BitextMining": "f1",
|
@@ -806,7 +683,8 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
|
|
806 |
df_list.append(res)
|
807 |
|
808 |
for model in models:
|
809 |
-
if model.modelId in MODELS_TO_SKIP:
|
|
|
810 |
print("MODEL", model)
|
811 |
readme_path = hf_hub_download(model.modelId, filename="README.md")
|
812 |
meta = metadata_load(readme_path)
|
@@ -890,7 +768,7 @@ def get_mteb_average():
|
|
890 |
get_mteb_average()
|
891 |
|
892 |
NUM_DATASETS = len(set(DATASETS))
|
893 |
-
|
894 |
NUM_MODELS = len(set(MODELS))
|
895 |
|
896 |
data_overall = gr.components.Dataframe(
|
|
|
39 |
|
40 |
TASK_LIST_CLASSIFICATION_NORM = [x.replace(" (en)", "") for x in TASK_LIST_CLASSIFICATION]
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
TASK_LIST_CLASSIFICATION_SV = [
|
44 |
"DalajClassification",
|
|
|
49 |
"SweRecClassification",
|
50 |
]
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
TASK_LIST_CLASSIFICATION_OTHER = ['AmazonCounterfactualClassification (de)', 'AmazonCounterfactualClassification (ja)', 'AmazonReviewsClassification (de)', 'AmazonReviewsClassification (es)', 'AmazonReviewsClassification (fr)', 'AmazonReviewsClassification (ja)', 'AmazonReviewsClassification (zh)', 'MTOPDomainClassification (de)', 'MTOPDomainClassification (es)', 'MTOPDomainClassification (fr)', 'MTOPDomainClassification (hi)', 'MTOPDomainClassification (th)', 'MTOPIntentClassification (de)', 'MTOPIntentClassification (es)', 'MTOPIntentClassification (fr)', 'MTOPIntentClassification (hi)', 'MTOPIntentClassification (th)', 'MassiveIntentClassification (af)', 'MassiveIntentClassification (am)', 'MassiveIntentClassification (ar)', 'MassiveIntentClassification (az)', 'MassiveIntentClassification (bn)', 'MassiveIntentClassification (cy)', 'MassiveIntentClassification (de)', 'MassiveIntentClassification (el)', 'MassiveIntentClassification (es)', 'MassiveIntentClassification (fa)', 'MassiveIntentClassification (fi)', 'MassiveIntentClassification (fr)', 'MassiveIntentClassification (he)', 'MassiveIntentClassification (hi)', 'MassiveIntentClassification (hu)', 'MassiveIntentClassification (hy)', 'MassiveIntentClassification (id)', 'MassiveIntentClassification (is)', 'MassiveIntentClassification (it)', 'MassiveIntentClassification (ja)', 'MassiveIntentClassification (jv)', 'MassiveIntentClassification (ka)', 'MassiveIntentClassification (km)', 'MassiveIntentClassification (kn)', 'MassiveIntentClassification (ko)', 'MassiveIntentClassification (lv)', 'MassiveIntentClassification (ml)', 'MassiveIntentClassification (mn)', 'MassiveIntentClassification (ms)', 'MassiveIntentClassification (my)', 'MassiveIntentClassification (nl)', 'MassiveIntentClassification (pt)', 'MassiveIntentClassification (ro)', 'MassiveIntentClassification (ru)', 'MassiveIntentClassification (sl)', 'MassiveIntentClassification (sq)', 'MassiveIntentClassification (sw)', 'MassiveIntentClassification (ta)', 'MassiveIntentClassification (te)', 'MassiveIntentClassification (th)', 'MassiveIntentClassification (tl)', 'MassiveIntentClassification (tr)', 'MassiveIntentClassification (ur)', 'MassiveIntentClassification (vi)', 'MassiveIntentClassification (zh-TW)', 'MassiveScenarioClassification (af)', 'MassiveScenarioClassification (am)', 'MassiveScenarioClassification (ar)', 'MassiveScenarioClassification (az)', 'MassiveScenarioClassification (bn)', 'MassiveScenarioClassification (cy)', 'MassiveScenarioClassification (de)', 'MassiveScenarioClassification (el)', 'MassiveScenarioClassification (es)', 'MassiveScenarioClassification (fa)', 'MassiveScenarioClassification (fi)', 'MassiveScenarioClassification (fr)', 'MassiveScenarioClassification (he)', 'MassiveScenarioClassification (hi)', 'MassiveScenarioClassification (hu)', 'MassiveScenarioClassification (hy)', 'MassiveScenarioClassification (id)', 'MassiveScenarioClassification (is)', 'MassiveScenarioClassification (it)', 'MassiveScenarioClassification (ja)', 'MassiveScenarioClassification (jv)', 'MassiveScenarioClassification (ka)', 'MassiveScenarioClassification (km)', 'MassiveScenarioClassification (kn)', 'MassiveScenarioClassification (ko)', 'MassiveScenarioClassification (lv)', 'MassiveScenarioClassification (ml)', 'MassiveScenarioClassification (mn)', 'MassiveScenarioClassification (ms)', 'MassiveScenarioClassification (my)', 'MassiveScenarioClassification (nl)', 'MassiveScenarioClassification (pt)', 'MassiveScenarioClassification (ro)', 'MassiveScenarioClassification (ru)', 'MassiveScenarioClassification (sl)', 'MassiveScenarioClassification (sq)', 'MassiveScenarioClassification (sw)', 'MassiveScenarioClassification (ta)', 'MassiveScenarioClassification (te)', 'MassiveScenarioClassification (th)', 'MassiveScenarioClassification (tl)', 'MassiveScenarioClassification (tr)', 'MassiveScenarioClassification (ur)', 'MassiveScenarioClassification (vi)', 'MassiveScenarioClassification (zh-TW)']
|
53 |
|
54 |
TASK_LIST_CLUSTERING = [
|
|
|
65 |
"TwentyNewsgroupsClustering",
|
66 |
]
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
TASK_LIST_PAIR_CLASSIFICATION = [
|
69 |
"SprintDuplicateQuestions",
|
70 |
"TwitterSemEval2015",
|
71 |
"TwitterURLCorpus",
|
72 |
]
|
73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
TASK_LIST_RERANKING = [
|
75 |
"AskUbuntuDupQuestions",
|
76 |
"MindSmallReranking",
|
|
|
78 |
"StackOverflowDupQuestions",
|
79 |
]
|
80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
TASK_LIST_RETRIEVAL = [
|
82 |
"ArguAna",
|
83 |
"ClimateFEVER",
|
|
|
96 |
"TRECCOVID",
|
97 |
]
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
TASK_LIST_RETRIEVAL_NORM = TASK_LIST_RETRIEVAL + [
|
100 |
"CQADupstackAndroidRetrieval",
|
101 |
"CQADupstackEnglishRetrieval",
|
|
|
124 |
"STSBenchmark",
|
125 |
]
|
126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
TASK_LIST_STS_OTHER = ["STS17 (ar-ar)", "STS17 (en-ar)", "STS17 (en-de)", "STS17 (en-tr)", "STS17 (es-en)", "STS17 (es-es)", "STS17 (fr-en)", "STS17 (it-en)", "STS17 (ko-ko)", "STS17 (nl-en)", "STS22 (ar)", "STS22 (de)", "STS22 (de-en)", "STS22 (de-fr)", "STS22 (de-pl)", "STS22 (es)", "STS22 (es-en)", "STS22 (es-it)", "STS22 (fr)", "STS22 (fr-pl)", "STS22 (it)", "STS22 (pl)", "STS22 (pl-en)", "STS22 (ru)", "STS22 (tr)", "STS22 (zh-en)", "STSBenchmark",]
|
128 |
TASK_LIST_STS_NORM = [x.replace(" (en)", "").replace(" (en-en)", "") for x in TASK_LIST_STS]
|
129 |
|
130 |
TASK_LIST_SUMMARIZATION = ["SummEval",]
|
131 |
|
132 |
TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION
|
|
|
|
|
133 |
|
134 |
TASK_TO_METRIC = {
|
135 |
"BitextMining": "f1",
|
|
|
683 |
df_list.append(res)
|
684 |
|
685 |
for model in models:
|
686 |
+
if model.modelId in MODELS_TO_SKIP:
|
687 |
+
continue
|
688 |
print("MODEL", model)
|
689 |
readme_path = hf_hub_download(model.modelId, filename="README.md")
|
690 |
meta = metadata_load(readme_path)
|
|
|
768 |
get_mteb_average()
|
769 |
|
770 |
NUM_DATASETS = len(set(DATASETS))
|
771 |
+
|
772 |
NUM_MODELS = len(set(MODELS))
|
773 |
|
774 |
data_overall = gr.components.Dataframe(
|