rodrigomasini commited on
Commit
d63195a
1 Parent(s): 219c4e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -125
app.py CHANGED
@@ -39,35 +39,6 @@ TASK_LIST_CLASSIFICATION = [
39
 
40
  TASK_LIST_CLASSIFICATION_NORM = [x.replace(" (en)", "") for x in TASK_LIST_CLASSIFICATION]
41
 
42
- TASK_LIST_CLASSIFICATION_DA = [
43
- "AngryTweetsClassification",
44
- "DanishPoliticalCommentsClassification",
45
- "DKHateClassification",
46
- "LccSentimentClassification",
47
- "MassiveIntentClassification (da)",
48
- "MassiveScenarioClassification (da)",
49
- "NordicLangClassification",
50
- "ScalaDaClassification",
51
- ]
52
-
53
- TASK_LIST_CLASSIFICATION_NB = [
54
- "NoRecClassification",
55
- "NordicLangClassification",
56
- "NorwegianParliament",
57
- "MassiveIntentClassification (nb)",
58
- "MassiveScenarioClassification (nb)",
59
- "ScalaNbClassification",
60
- ]
61
-
62
- TASK_LIST_CLASSIFICATION_PL = [
63
- "AllegroReviews",
64
- "CBD",
65
- "MassiveIntentClassification (pl)",
66
- "MassiveScenarioClassification (pl)",
67
- "PAC",
68
- "PolEmo2.0-IN",
69
- "PolEmo2.0-OUT",
70
- ]
71
 
72
  TASK_LIST_CLASSIFICATION_SV = [
73
  "DalajClassification",
@@ -78,18 +49,6 @@ TASK_LIST_CLASSIFICATION_SV = [
78
  "SweRecClassification",
79
  ]
80
 
81
- TASK_LIST_CLASSIFICATION_ZH = [
82
- "AmazonReviewsClassification (zh)",
83
- "IFlyTek",
84
- "JDReview",
85
- "MassiveIntentClassification (zh-CN)",
86
- "MassiveScenarioClassification (zh-CN)",
87
- "MultilingualSentiment",
88
- "OnlineShopping",
89
- "TNews",
90
- "Waimai",
91
- ]
92
-
93
  TASK_LIST_CLASSIFICATION_OTHER = ['AmazonCounterfactualClassification (de)', 'AmazonCounterfactualClassification (ja)', 'AmazonReviewsClassification (de)', 'AmazonReviewsClassification (es)', 'AmazonReviewsClassification (fr)', 'AmazonReviewsClassification (ja)', 'AmazonReviewsClassification (zh)', 'MTOPDomainClassification (de)', 'MTOPDomainClassification (es)', 'MTOPDomainClassification (fr)', 'MTOPDomainClassification (hi)', 'MTOPDomainClassification (th)', 'MTOPIntentClassification (de)', 'MTOPIntentClassification (es)', 'MTOPIntentClassification (fr)', 'MTOPIntentClassification (hi)', 'MTOPIntentClassification (th)', 'MassiveIntentClassification (af)', 'MassiveIntentClassification (am)', 'MassiveIntentClassification (ar)', 'MassiveIntentClassification (az)', 'MassiveIntentClassification (bn)', 'MassiveIntentClassification (cy)', 'MassiveIntentClassification (de)', 'MassiveIntentClassification (el)', 'MassiveIntentClassification (es)', 'MassiveIntentClassification (fa)', 'MassiveIntentClassification (fi)', 'MassiveIntentClassification (fr)', 'MassiveIntentClassification (he)', 'MassiveIntentClassification (hi)', 'MassiveIntentClassification (hu)', 'MassiveIntentClassification (hy)', 'MassiveIntentClassification (id)', 'MassiveIntentClassification (is)', 'MassiveIntentClassification (it)', 'MassiveIntentClassification (ja)', 'MassiveIntentClassification (jv)', 'MassiveIntentClassification (ka)', 'MassiveIntentClassification (km)', 'MassiveIntentClassification (kn)', 'MassiveIntentClassification (ko)', 'MassiveIntentClassification (lv)', 'MassiveIntentClassification (ml)', 'MassiveIntentClassification (mn)', 'MassiveIntentClassification (ms)', 'MassiveIntentClassification (my)', 'MassiveIntentClassification (nl)', 'MassiveIntentClassification (pt)', 'MassiveIntentClassification (ro)', 'MassiveIntentClassification (ru)', 'MassiveIntentClassification (sl)', 'MassiveIntentClassification (sq)', 'MassiveIntentClassification (sw)', 'MassiveIntentClassification (ta)', 'MassiveIntentClassification (te)', 'MassiveIntentClassification (th)', 'MassiveIntentClassification (tl)', 'MassiveIntentClassification (tr)', 'MassiveIntentClassification (ur)', 'MassiveIntentClassification (vi)', 'MassiveIntentClassification (zh-TW)', 'MassiveScenarioClassification (af)', 'MassiveScenarioClassification (am)', 'MassiveScenarioClassification (ar)', 'MassiveScenarioClassification (az)', 'MassiveScenarioClassification (bn)', 'MassiveScenarioClassification (cy)', 'MassiveScenarioClassification (de)', 'MassiveScenarioClassification (el)', 'MassiveScenarioClassification (es)', 'MassiveScenarioClassification (fa)', 'MassiveScenarioClassification (fi)', 'MassiveScenarioClassification (fr)', 'MassiveScenarioClassification (he)', 'MassiveScenarioClassification (hi)', 'MassiveScenarioClassification (hu)', 'MassiveScenarioClassification (hy)', 'MassiveScenarioClassification (id)', 'MassiveScenarioClassification (is)', 'MassiveScenarioClassification (it)', 'MassiveScenarioClassification (ja)', 'MassiveScenarioClassification (jv)', 'MassiveScenarioClassification (ka)', 'MassiveScenarioClassification (km)', 'MassiveScenarioClassification (kn)', 'MassiveScenarioClassification (ko)', 'MassiveScenarioClassification (lv)', 'MassiveScenarioClassification (ml)', 'MassiveScenarioClassification (mn)', 'MassiveScenarioClassification (ms)', 'MassiveScenarioClassification (my)', 'MassiveScenarioClassification (nl)', 'MassiveScenarioClassification (pt)', 'MassiveScenarioClassification (ro)', 'MassiveScenarioClassification (ru)', 'MassiveScenarioClassification (sl)', 'MassiveScenarioClassification (sq)', 'MassiveScenarioClassification (sw)', 'MassiveScenarioClassification (ta)', 'MassiveScenarioClassification (te)', 'MassiveScenarioClassification (th)', 'MassiveScenarioClassification (tl)', 'MassiveScenarioClassification (tr)', 'MassiveScenarioClassification (ur)', 'MassiveScenarioClassification (vi)', 'MassiveScenarioClassification (zh-TW)']
94
 
95
  TASK_LIST_CLUSTERING = [
@@ -106,43 +65,12 @@ TASK_LIST_CLUSTERING = [
106
  "TwentyNewsgroupsClustering",
107
  ]
108
 
109
-
110
- TASK_LIST_CLUSTERING_DE = [
111
- "BlurbsClusteringP2P",
112
- "BlurbsClusteringS2S",
113
- "TenKGnadClusteringP2P",
114
- "TenKGnadClusteringS2S",
115
- ]
116
-
117
- TASK_LIST_CLUSTERING_PL = [
118
- "8TagsClustering",
119
- ]
120
-
121
- TASK_LIST_CLUSTERING_ZH = [
122
- "CLSClusteringP2P",
123
- "CLSClusteringS2S",
124
- "ThuNewsClusteringP2P",
125
- "ThuNewsClusteringS2S",
126
- ]
127
-
128
  TASK_LIST_PAIR_CLASSIFICATION = [
129
  "SprintDuplicateQuestions",
130
  "TwitterSemEval2015",
131
  "TwitterURLCorpus",
132
  ]
133
 
134
- TASK_LIST_PAIR_CLASSIFICATION_PL = [
135
- "CDSC-E",
136
- "PPC",
137
- "PSC",
138
- "SICK-E-PL",
139
- ]
140
-
141
- TASK_LIST_PAIR_CLASSIFICATION_ZH = [
142
- "Cmnli",
143
- "Ocnli",
144
- ]
145
-
146
  TASK_LIST_RERANKING = [
147
  "AskUbuntuDupQuestions",
148
  "MindSmallReranking",
@@ -150,13 +78,6 @@ TASK_LIST_RERANKING = [
150
  "StackOverflowDupQuestions",
151
  ]
152
 
153
- TASK_LIST_RERANKING_ZH = [
154
- "CMedQAv1",
155
- "CMedQAv2",
156
- "MMarcoReranking",
157
- "T2Reranking",
158
- ]
159
-
160
  TASK_LIST_RETRIEVAL = [
161
  "ArguAna",
162
  "ClimateFEVER",
@@ -175,31 +96,6 @@ TASK_LIST_RETRIEVAL = [
175
  "TRECCOVID",
176
  ]
177
 
178
- TASK_LIST_RETRIEVAL_PL = [
179
- "ArguAna-PL",
180
- "DBPedia-PL",
181
- "FiQA-PL",
182
- "HotpotQA-PL",
183
- "MSMARCO-PL",
184
- "NFCorpus-PL",
185
- "NQ-PL",
186
- "Quora-PL",
187
- "SCIDOCS-PL",
188
- "SciFact-PL",
189
- "TRECCOVID-PL",
190
- ]
191
-
192
- TASK_LIST_RETRIEVAL_ZH = [
193
- "CmedqaRetrieval",
194
- "CovidRetrieval",
195
- "DuRetrieval",
196
- "EcomRetrieval",
197
- "MedicalRetrieval",
198
- "MMarcoRetrieval",
199
- "T2Retrieval",
200
- "VideoRetrieval",
201
- ]
202
-
203
  TASK_LIST_RETRIEVAL_NORM = TASK_LIST_RETRIEVAL + [
204
  "CQADupstackAndroidRetrieval",
205
  "CQADupstackEnglishRetrieval",
@@ -228,31 +124,12 @@ TASK_LIST_STS = [
228
  "STSBenchmark",
229
  ]
230
 
231
- TASK_LIST_STS_PL = [
232
- "CDSC-R",
233
- "SICK-R-PL",
234
- "STS22 (pl)",
235
- ]
236
-
237
- TASK_LIST_STS_ZH = [
238
- "AFQMC",
239
- "ATEC",
240
- "BQ",
241
- "LCQMC",
242
- "PAWSX",
243
- "QBQTC",
244
- "STS22 (zh)",
245
- "STSB",
246
- ]
247
-
248
  TASK_LIST_STS_OTHER = ["STS17 (ar-ar)", "STS17 (en-ar)", "STS17 (en-de)", "STS17 (en-tr)", "STS17 (es-en)", "STS17 (es-es)", "STS17 (fr-en)", "STS17 (it-en)", "STS17 (ko-ko)", "STS17 (nl-en)", "STS22 (ar)", "STS22 (de)", "STS22 (de-en)", "STS22 (de-fr)", "STS22 (de-pl)", "STS22 (es)", "STS22 (es-en)", "STS22 (es-it)", "STS22 (fr)", "STS22 (fr-pl)", "STS22 (it)", "STS22 (pl)", "STS22 (pl-en)", "STS22 (ru)", "STS22 (tr)", "STS22 (zh-en)", "STSBenchmark",]
249
  TASK_LIST_STS_NORM = [x.replace(" (en)", "").replace(" (en-en)", "") for x in TASK_LIST_STS]
250
 
251
  TASK_LIST_SUMMARIZATION = ["SummEval",]
252
 
253
  TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION
254
- TASK_LIST_PL = TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLUSTERING_PL + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_RETRIEVAL_PL + TASK_LIST_STS_PL
255
- TASK_LIST_ZH = TASK_LIST_CLASSIFICATION_ZH + TASK_LIST_CLUSTERING_ZH + TASK_LIST_PAIR_CLASSIFICATION_ZH + TASK_LIST_RERANKING_ZH + TASK_LIST_RETRIEVAL_ZH + TASK_LIST_STS_ZH
256
 
257
  TASK_TO_METRIC = {
258
  "BitextMining": "f1",
@@ -806,7 +683,8 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
806
  df_list.append(res)
807
 
808
  for model in models:
809
- if model.modelId in MODELS_TO_SKIP: continue
 
810
  print("MODEL", model)
811
  readme_path = hf_hub_download(model.modelId, filename="README.md")
812
  meta = metadata_load(readme_path)
@@ -890,7 +768,7 @@ def get_mteb_average():
890
  get_mteb_average()
891
 
892
  NUM_DATASETS = len(set(DATASETS))
893
- # NUM_LANGUAGES = len(set(LANGUAGES))
894
  NUM_MODELS = len(set(MODELS))
895
 
896
  data_overall = gr.components.Dataframe(
 
39
 
40
  TASK_LIST_CLASSIFICATION_NORM = [x.replace(" (en)", "") for x in TASK_LIST_CLASSIFICATION]
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  TASK_LIST_CLASSIFICATION_SV = [
44
  "DalajClassification",
 
49
  "SweRecClassification",
50
  ]
51
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  TASK_LIST_CLASSIFICATION_OTHER = ['AmazonCounterfactualClassification (de)', 'AmazonCounterfactualClassification (ja)', 'AmazonReviewsClassification (de)', 'AmazonReviewsClassification (es)', 'AmazonReviewsClassification (fr)', 'AmazonReviewsClassification (ja)', 'AmazonReviewsClassification (zh)', 'MTOPDomainClassification (de)', 'MTOPDomainClassification (es)', 'MTOPDomainClassification (fr)', 'MTOPDomainClassification (hi)', 'MTOPDomainClassification (th)', 'MTOPIntentClassification (de)', 'MTOPIntentClassification (es)', 'MTOPIntentClassification (fr)', 'MTOPIntentClassification (hi)', 'MTOPIntentClassification (th)', 'MassiveIntentClassification (af)', 'MassiveIntentClassification (am)', 'MassiveIntentClassification (ar)', 'MassiveIntentClassification (az)', 'MassiveIntentClassification (bn)', 'MassiveIntentClassification (cy)', 'MassiveIntentClassification (de)', 'MassiveIntentClassification (el)', 'MassiveIntentClassification (es)', 'MassiveIntentClassification (fa)', 'MassiveIntentClassification (fi)', 'MassiveIntentClassification (fr)', 'MassiveIntentClassification (he)', 'MassiveIntentClassification (hi)', 'MassiveIntentClassification (hu)', 'MassiveIntentClassification (hy)', 'MassiveIntentClassification (id)', 'MassiveIntentClassification (is)', 'MassiveIntentClassification (it)', 'MassiveIntentClassification (ja)', 'MassiveIntentClassification (jv)', 'MassiveIntentClassification (ka)', 'MassiveIntentClassification (km)', 'MassiveIntentClassification (kn)', 'MassiveIntentClassification (ko)', 'MassiveIntentClassification (lv)', 'MassiveIntentClassification (ml)', 'MassiveIntentClassification (mn)', 'MassiveIntentClassification (ms)', 'MassiveIntentClassification (my)', 'MassiveIntentClassification (nl)', 'MassiveIntentClassification (pt)', 'MassiveIntentClassification (ro)', 'MassiveIntentClassification (ru)', 'MassiveIntentClassification (sl)', 'MassiveIntentClassification (sq)', 'MassiveIntentClassification (sw)', 'MassiveIntentClassification (ta)', 'MassiveIntentClassification (te)', 'MassiveIntentClassification (th)', 'MassiveIntentClassification (tl)', 'MassiveIntentClassification (tr)', 'MassiveIntentClassification (ur)', 'MassiveIntentClassification (vi)', 'MassiveIntentClassification (zh-TW)', 'MassiveScenarioClassification (af)', 'MassiveScenarioClassification (am)', 'MassiveScenarioClassification (ar)', 'MassiveScenarioClassification (az)', 'MassiveScenarioClassification (bn)', 'MassiveScenarioClassification (cy)', 'MassiveScenarioClassification (de)', 'MassiveScenarioClassification (el)', 'MassiveScenarioClassification (es)', 'MassiveScenarioClassification (fa)', 'MassiveScenarioClassification (fi)', 'MassiveScenarioClassification (fr)', 'MassiveScenarioClassification (he)', 'MassiveScenarioClassification (hi)', 'MassiveScenarioClassification (hu)', 'MassiveScenarioClassification (hy)', 'MassiveScenarioClassification (id)', 'MassiveScenarioClassification (is)', 'MassiveScenarioClassification (it)', 'MassiveScenarioClassification (ja)', 'MassiveScenarioClassification (jv)', 'MassiveScenarioClassification (ka)', 'MassiveScenarioClassification (km)', 'MassiveScenarioClassification (kn)', 'MassiveScenarioClassification (ko)', 'MassiveScenarioClassification (lv)', 'MassiveScenarioClassification (ml)', 'MassiveScenarioClassification (mn)', 'MassiveScenarioClassification (ms)', 'MassiveScenarioClassification (my)', 'MassiveScenarioClassification (nl)', 'MassiveScenarioClassification (pt)', 'MassiveScenarioClassification (ro)', 'MassiveScenarioClassification (ru)', 'MassiveScenarioClassification (sl)', 'MassiveScenarioClassification (sq)', 'MassiveScenarioClassification (sw)', 'MassiveScenarioClassification (ta)', 'MassiveScenarioClassification (te)', 'MassiveScenarioClassification (th)', 'MassiveScenarioClassification (tl)', 'MassiveScenarioClassification (tr)', 'MassiveScenarioClassification (ur)', 'MassiveScenarioClassification (vi)', 'MassiveScenarioClassification (zh-TW)']
53
 
54
  TASK_LIST_CLUSTERING = [
 
65
  "TwentyNewsgroupsClustering",
66
  ]
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  TASK_LIST_PAIR_CLASSIFICATION = [
69
  "SprintDuplicateQuestions",
70
  "TwitterSemEval2015",
71
  "TwitterURLCorpus",
72
  ]
73
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  TASK_LIST_RERANKING = [
75
  "AskUbuntuDupQuestions",
76
  "MindSmallReranking",
 
78
  "StackOverflowDupQuestions",
79
  ]
80
 
 
 
 
 
 
 
 
81
  TASK_LIST_RETRIEVAL = [
82
  "ArguAna",
83
  "ClimateFEVER",
 
96
  "TRECCOVID",
97
  ]
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  TASK_LIST_RETRIEVAL_NORM = TASK_LIST_RETRIEVAL + [
100
  "CQADupstackAndroidRetrieval",
101
  "CQADupstackEnglishRetrieval",
 
124
  "STSBenchmark",
125
  ]
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  TASK_LIST_STS_OTHER = ["STS17 (ar-ar)", "STS17 (en-ar)", "STS17 (en-de)", "STS17 (en-tr)", "STS17 (es-en)", "STS17 (es-es)", "STS17 (fr-en)", "STS17 (it-en)", "STS17 (ko-ko)", "STS17 (nl-en)", "STS22 (ar)", "STS22 (de)", "STS22 (de-en)", "STS22 (de-fr)", "STS22 (de-pl)", "STS22 (es)", "STS22 (es-en)", "STS22 (es-it)", "STS22 (fr)", "STS22 (fr-pl)", "STS22 (it)", "STS22 (pl)", "STS22 (pl-en)", "STS22 (ru)", "STS22 (tr)", "STS22 (zh-en)", "STSBenchmark",]
128
  TASK_LIST_STS_NORM = [x.replace(" (en)", "").replace(" (en-en)", "") for x in TASK_LIST_STS]
129
 
130
  TASK_LIST_SUMMARIZATION = ["SummEval",]
131
 
132
  TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION
 
 
133
 
134
  TASK_TO_METRIC = {
135
  "BitextMining": "f1",
 
683
  df_list.append(res)
684
 
685
  for model in models:
686
+ if model.modelId in MODELS_TO_SKIP:
687
+ continue
688
  print("MODEL", model)
689
  readme_path = hf_hub_download(model.modelId, filename="README.md")
690
  meta = metadata_load(readme_path)
 
768
  get_mteb_average()
769
 
770
  NUM_DATASETS = len(set(DATASETS))
771
+
772
  NUM_MODELS = len(set(MODELS))
773
 
774
  data_overall = gr.components.Dataframe(