Spaces:
Runtime error
Runtime error
Santi Diana
commited on
Commit
•
3aae85b
1
Parent(s):
b8e8c93
Uploaded state-of-the-art propietary models
Browse files- .DS_Store +0 -0
- add_new_model/.DS_Store +0 -0
- add_new_model/README.md +3 -1
- add_new_model/add_new_model.py +3 -4
- add_new_model/mteb_metadata.yaml +114 -0
- app.py +44 -3
- data/classification.csv +17 -8
- data/general.csv +9 -0
- data/sts.csv +17 -8
.DS_Store
CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
|
|
add_new_model/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
add_new_model/README.md
CHANGED
@@ -7,4 +7,6 @@ when evaluating `sentence-transformers/sentence-t5-large`.
|
|
7 |
3. Once evaluated, move that folder to this folder, so it will be inside `add_new_model` folder.
|
8 |
4. Execute the file `MTEB_metadata_to_yaml.py`. That will create a file named `mteb_medadata.yaml` that contains the metadata regarding your evaluation.
|
9 |
5. Execute the file `add_new_model.py`. That file will add your model to the Leaderboard.
|
10 |
-
6. Add, commit and `git push` the changes without uploading the results and the `mteb_metadata.yaml`.
|
|
|
|
|
|
7 |
3. Once evaluated, move that folder to this folder, so it will be inside `add_new_model` folder.
|
8 |
4. Execute the file `MTEB_metadata_to_yaml.py`. That will create a file named `mteb_medadata.yaml` that contains the metadata regarding your evaluation.
|
9 |
5. Execute the file `add_new_model.py`. That file will add your model to the Leaderboard.
|
10 |
+
6. Add, commit and `git push` the changes without uploading the results and the `mteb_metadata.yaml`.
|
11 |
+
7. It is recommended to launch the app by running `python3 app.py` from parent folder, and confirm that there are no errors in the leaderboard and we
|
12 |
+
are uploading it as we wanted.
|
add_new_model/add_new_model.py
CHANGED
@@ -54,13 +54,12 @@ def add_model(metadata_archive):
|
|
54 |
## CLASSIFICATION
|
55 |
classification_dataframe = pd.read_csv('../data/classification.csv')
|
56 |
classification_df = df[df['Category']== 'Classification']
|
57 |
-
new_row_data = {'Model name': model_name}
|
58 |
-
|
59 |
for index, row in classification_df.iterrows():
|
60 |
column_name = row['dataset_name']
|
61 |
accuracy_value = row['Accuracy']
|
62 |
new_row_data[column_name] = round(accuracy_value,2)
|
63 |
-
|
64 |
new_row_df = pd.DataFrame(new_row_data,index=[0])
|
65 |
classification_dataframe = pd.concat([classification_dataframe,new_row_df],ignore_index=True)
|
66 |
classification_dataframe.to_csv("../data/classification.csv",index=False)
|
@@ -68,7 +67,7 @@ def add_model(metadata_archive):
|
|
68 |
## STS
|
69 |
sts_dataframe = pd.read_csv('../data/sts.csv')
|
70 |
sts_df = df[df['Category']=='STS']
|
71 |
-
new_row_data = {'Model name': model_name}
|
72 |
|
73 |
for index, row in sts_df.iterrows():
|
74 |
column_name = row['dataset_name']
|
|
|
54 |
## CLASSIFICATION
|
55 |
classification_dataframe = pd.read_csv('../data/classification.csv')
|
56 |
classification_df = df[df['Category']== 'Classification']
|
57 |
+
new_row_data = {'Model name': model_name, 'Average': classification_average}
|
|
|
58 |
for index, row in classification_df.iterrows():
|
59 |
column_name = row['dataset_name']
|
60 |
accuracy_value = row['Accuracy']
|
61 |
new_row_data[column_name] = round(accuracy_value,2)
|
62 |
+
|
63 |
new_row_df = pd.DataFrame(new_row_data,index=[0])
|
64 |
classification_dataframe = pd.concat([classification_dataframe,new_row_df],ignore_index=True)
|
65 |
classification_dataframe.to_csv("../data/classification.csv",index=False)
|
|
|
67 |
## STS
|
68 |
sts_dataframe = pd.read_csv('../data/sts.csv')
|
69 |
sts_df = df[df['Category']=='STS']
|
70 |
+
new_row_data = {'Model name': model_name, 'Average': sts_spearman_average}
|
71 |
|
72 |
for index, row in sts_df.iterrows():
|
73 |
column_name = row['dataset_name']
|
add_new_model/mteb_metadata.yaml
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- mteb
|
4 |
+
model-index:
|
5 |
+
- name: multilingual-e5-large-stsb-tuned-b64-e10
|
6 |
+
results:
|
7 |
+
- task:
|
8 |
+
type: Classification
|
9 |
+
dataset:
|
10 |
+
type: mteb/amazon_reviews_multi
|
11 |
+
name: MTEB AmazonReviewsClassification (es)
|
12 |
+
config: es
|
13 |
+
split: test
|
14 |
+
revision: 1399c76144fd37290681b995c656ef9b2e06e26d
|
15 |
+
metrics:
|
16 |
+
- type: accuracy
|
17 |
+
value: 43.709999999999994
|
18 |
+
- type: f1
|
19 |
+
value: 41.47169623212768
|
20 |
+
- task:
|
21 |
+
type: Classification
|
22 |
+
dataset:
|
23 |
+
type: mteb/mtop_domain
|
24 |
+
name: MTEB MTOPDomainClassification (es)
|
25 |
+
config: es
|
26 |
+
split: test
|
27 |
+
revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
|
28 |
+
metrics:
|
29 |
+
- type: accuracy
|
30 |
+
value: 88.83589059372916
|
31 |
+
- type: f1
|
32 |
+
value: 88.28914595398294
|
33 |
+
- task:
|
34 |
+
type: Classification
|
35 |
+
dataset:
|
36 |
+
type: mteb/mtop_intent
|
37 |
+
name: MTEB MTOPIntentClassification (es)
|
38 |
+
config: es
|
39 |
+
split: test
|
40 |
+
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
|
41 |
+
metrics:
|
42 |
+
- type: accuracy
|
43 |
+
value: 60.20346897931954
|
44 |
+
- type: f1
|
45 |
+
value: 41.64439175677159
|
46 |
+
- task:
|
47 |
+
type: Classification
|
48 |
+
dataset:
|
49 |
+
type: mteb/amazon_massive_intent
|
50 |
+
name: MTEB MassiveIntentClassification (es)
|
51 |
+
config: es
|
52 |
+
split: test
|
53 |
+
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
|
54 |
+
metrics:
|
55 |
+
- type: accuracy
|
56 |
+
value: 62.74041694687289
|
57 |
+
- type: f1
|
58 |
+
value: 61.77713703269475
|
59 |
+
- task:
|
60 |
+
type: Classification
|
61 |
+
dataset:
|
62 |
+
type: mteb/amazon_massive_scenario
|
63 |
+
name: MTEB MassiveScenarioClassification (es)
|
64 |
+
config: es
|
65 |
+
split: test
|
66 |
+
revision: 7d571f92784cd94a019292a1f45445077d0ef634
|
67 |
+
metrics:
|
68 |
+
- type: accuracy
|
69 |
+
value: 67.40080699394755
|
70 |
+
- type: f1
|
71 |
+
value: 67.14214912345791
|
72 |
+
- task:
|
73 |
+
type: STS
|
74 |
+
dataset:
|
75 |
+
type: mteb/sts17-crosslingual-sts
|
76 |
+
name: MTEB STS17 (es-es)
|
77 |
+
config: es-es
|
78 |
+
split: test
|
79 |
+
revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
|
80 |
+
metrics:
|
81 |
+
- type: cos_sim_pearson
|
82 |
+
value: 88.26778066226262
|
83 |
+
- type: cos_sim_spearman
|
84 |
+
value: 88.03435803600337
|
85 |
+
- type: euclidean_pearson
|
86 |
+
value: 88.31560142002508
|
87 |
+
- type: euclidean_spearman
|
88 |
+
value: 88.03594258414384
|
89 |
+
- type: manhattan_pearson
|
90 |
+
value: 88.3997621988469
|
91 |
+
- type: manhattan_spearman
|
92 |
+
value: 88.17114024743894
|
93 |
+
- task:
|
94 |
+
type: STS
|
95 |
+
dataset:
|
96 |
+
type: mteb/sts22-crosslingual-sts
|
97 |
+
name: MTEB STS22 (es)
|
98 |
+
config: es
|
99 |
+
split: test
|
100 |
+
revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
|
101 |
+
metrics:
|
102 |
+
- type: cos_sim_pearson
|
103 |
+
value: 66.49699923699941
|
104 |
+
- type: cos_sim_spearman
|
105 |
+
value: 70.12135103690638
|
106 |
+
- type: euclidean_pearson
|
107 |
+
value: 67.63308096173844
|
108 |
+
- type: euclidean_spearman
|
109 |
+
value: 70.12135103690638
|
110 |
+
- type: manhattan_pearson
|
111 |
+
value: 67.49091236728717
|
112 |
+
- type: manhattan_spearman
|
113 |
+
value: 70.08015881466724
|
114 |
+
---
|
app.py
CHANGED
@@ -1,10 +1,11 @@
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
|
4 |
-
|
|
|
5 |
NUM_DATASETS = 7
|
6 |
NUM_SCORES = 0
|
7 |
-
NUM_MODELS =
|
8 |
|
9 |
def general_dataframe_update():
|
10 |
"""
|
@@ -19,6 +20,7 @@ def classification_dataframe_update():
|
|
19 |
"""
|
20 |
dataframe = pd.read_csv('data/classification.csv')
|
21 |
return dataframe
|
|
|
22 |
def sts_dataframe_udpate():
|
23 |
"""
|
24 |
Returns sts dataframe for sts table.
|
@@ -26,6 +28,13 @@ def sts_dataframe_udpate():
|
|
26 |
dataframe = pd.read_csv('data/sts.csv')
|
27 |
return dataframe
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
with block:
|
30 |
gr.Markdown(f"""**Leaderboard de modelos de Embeddings en español
|
31 |
Massive Text Embedding Benchmark (MTEB) Leaderboard.**
|
@@ -40,7 +49,7 @@ with block:
|
|
40 |
gr.Markdown("""
|
41 |
**Tabla General de Embeddings**
|
42 |
|
43 |
-
- **
|
44 |
- **Idioma:** Español
|
45 |
""")
|
46 |
with gr.Row():
|
@@ -51,6 +60,13 @@ with block:
|
|
51 |
wrap=True,
|
52 |
)
|
53 |
with gr.TabItem("Classification"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
with gr.Row():
|
55 |
# Create and display a sample DataFrame
|
56 |
classification = classification_dataframe_update()
|
@@ -60,6 +76,13 @@ with block:
|
|
60 |
wrap=True,
|
61 |
)
|
62 |
with gr.TabItem("STS"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
with gr.Row():
|
64 |
# Create and display a sample DataFrame
|
65 |
sts = sts_dataframe_udpate()
|
@@ -68,6 +91,24 @@ with block:
|
|
68 |
type="pandas",
|
69 |
wrap=True,
|
70 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
block.launch()
|
73 |
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
|
4 |
+
dataframe = pd.read_csv('data/general.csv')
|
5 |
+
|
6 |
NUM_DATASETS = 7
|
7 |
NUM_SCORES = 0
|
8 |
+
NUM_MODELS = len(dataframe)
|
9 |
|
10 |
def general_dataframe_update():
|
11 |
"""
|
|
|
20 |
"""
|
21 |
dataframe = pd.read_csv('data/classification.csv')
|
22 |
return dataframe
|
23 |
+
|
24 |
def sts_dataframe_udpate():
|
25 |
"""
|
26 |
Returns sts dataframe for sts table.
|
|
|
28 |
dataframe = pd.read_csv('data/sts.csv')
|
29 |
return dataframe
|
30 |
|
31 |
+
def clustering_dataframe_update():
|
32 |
+
pass
|
33 |
+
|
34 |
+
def retrieval_dataframe_update():
|
35 |
+
pass
|
36 |
+
|
37 |
+
block = gr.Blocks()
|
38 |
with block:
|
39 |
gr.Markdown(f"""**Leaderboard de modelos de Embeddings en español
|
40 |
Massive Text Embedding Benchmark (MTEB) Leaderboard.**
|
|
|
49 |
gr.Markdown("""
|
50 |
**Tabla General de Embeddings**
|
51 |
|
52 |
+
- **Métricas:** Varias, con sus respectivas medias.
|
53 |
- **Idioma:** Español
|
54 |
""")
|
55 |
with gr.Row():
|
|
|
60 |
wrap=True,
|
61 |
)
|
62 |
with gr.TabItem("Classification"):
|
63 |
+
with gr.Row():
|
64 |
+
gr.Markdown("""
|
65 |
+
**Tabla Classification de Embeddings**
|
66 |
+
|
67 |
+
- **Métricas:** Spearman correlation based on cosine similarity.
|
68 |
+
- **Idioma:** Español
|
69 |
+
""")
|
70 |
with gr.Row():
|
71 |
# Create and display a sample DataFrame
|
72 |
classification = classification_dataframe_update()
|
|
|
76 |
wrap=True,
|
77 |
)
|
78 |
with gr.TabItem("STS"):
|
79 |
+
with gr.Row():
|
80 |
+
gr.Markdown("""
|
81 |
+
**Tabla Classification de Embeddings**
|
82 |
+
|
83 |
+
- **Metricas:** .
|
84 |
+
- **Idioma:** Español
|
85 |
+
""")
|
86 |
with gr.Row():
|
87 |
# Create and display a sample DataFrame
|
88 |
sts = sts_dataframe_udpate()
|
|
|
91 |
type="pandas",
|
92 |
wrap=True,
|
93 |
)
|
94 |
+
with gr.TabItem("Clustering"):
|
95 |
+
with gr.Row():
|
96 |
+
# Create and display a sample DataFrame
|
97 |
+
sts = clustering_dataframe_update()
|
98 |
+
data_overall = gr.components.Dataframe(
|
99 |
+
sts,
|
100 |
+
type="pandas",
|
101 |
+
wrap=True,
|
102 |
+
)
|
103 |
+
with gr.TabItem("Retrieval"):
|
104 |
+
with gr.Row():
|
105 |
+
# Create and display a sample DataFrame
|
106 |
+
sts = retrieval_dataframe_update()
|
107 |
+
data_overall = gr.components.Dataframe(
|
108 |
+
sts,
|
109 |
+
type="pandas",
|
110 |
+
wrap=True,
|
111 |
+
)
|
112 |
|
113 |
block.launch()
|
114 |
|
data/classification.csv
CHANGED
@@ -1,8 +1,17 @@
|
|
1 |
-
Model name,MTEB AmazonReviewsClassification (es),MTEB MTOPDomainClassification (es),MTEB MTOPIntentClassification (es),MTEB MassiveIntentClassification (es),MTEB MassiveScenarioClassification (es)
|
2 |
-
multilingual-e5-large,42.66,89.95,66.84,64.68,68.85
|
3 |
-
bge-small-en-v1.5,32.03,76.93,52.15,48.77,54.42
|
4 |
-
multilingual-e5-base,42.47,89.62,60.27,60.51,66.52
|
5 |
-
multilingual-e5-small,41.3,87.33,55.87,58.06,63.1
|
6 |
-
paraphrase-multilingual-mpnet-base-v2,39.99,86.96,66.59,64.43,70.42
|
7 |
-
sentence-t5-large,42.89,80.78,52.07,54.1,59.56
|
8 |
-
sentence-t5-xl,45.01,85.32,57.38,57.97,62.52
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model name,Average,MTEB AmazonReviewsClassification (es),MTEB MTOPDomainClassification (es),MTEB MTOPIntentClassification (es),MTEB MassiveIntentClassification (es),MTEB MassiveScenarioClassification (es)
|
2 |
+
multilingual-e5-large,66.59,42.66,89.95,66.84,64.68,68.85
|
3 |
+
bge-small-en-v1.5,52.86,32.03,76.93,52.15,48.77,54.42
|
4 |
+
multilingual-e5-base,63.87,42.47,89.62,60.27,60.51,66.52
|
5 |
+
multilingual-e5-small,61.13,41.3,87.33,55.87,58.06,63.1
|
6 |
+
paraphrase-multilingual-mpnet-base-v2,65.67,39.99,86.96,66.59,64.43,70.42
|
7 |
+
sentence-t5-large,57.87,42.89,80.78,52.07,54.1,59.56
|
8 |
+
sentence-t5-xl,61.64,45.01,85.32,57.38,57.97,62.52
|
9 |
+
paraphrase-spanish-distilroberta,63.98,38.24,86.81,65.94,60.52,68.39
|
10 |
+
sentence_similarity_spanish_es,61.77,35.08,85.86,65.21,58.51,64.21
|
11 |
+
paraphrase-multilingual-mpnet-base-v2-ft-stsb_multi_mt-embeddings,64.0,37.25,86.93,66.28,62.6,66.96
|
12 |
+
mstsb-paraphrase-multilingual-mpnet-base-v2,64.47,38.29,86.04,67.06,63.47,67.53
|
13 |
+
multilingual-e5-base-b16-e10,65.09,43.4,89.02,61.7,63.06,68.25
|
14 |
+
multilingual-e5-large-stsb-tuned-b32-e10,66.19,43.31,89.3,64.04,64.62,69.69
|
15 |
+
multilingual-e5-large-stsb-tuned-b16-e10,67.1,43.72,90.29,65.51,65.13,70.84
|
16 |
+
multilingual-e5-large-stsb-tuned,66.23,43.62,89.33,62.93,65.11,70.16
|
17 |
+
multilingual-e5-large-stsb-tuned-b64-e10,64.58,43.71,88.84,60.2,62.74,67.4
|
data/general.csv
CHANGED
@@ -6,3 +6,12 @@ multilingual-e5-small,,,68.64,61.13,,76.15,
|
|
6 |
paraphrase-multilingual-mpnet-base-v2,,,69.1,65.68,,72.53,
|
7 |
sentence-t5-large,,,64.04,57.88,,70.21,
|
8 |
sentence-t5-xl,,,66.22,61.64,,70.79,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
paraphrase-multilingual-mpnet-base-v2,,,69.1,65.68,,72.53,
|
7 |
sentence-t5-large,,,64.04,57.88,,70.21,
|
8 |
sentence-t5-xl,,,66.22,61.64,,70.79,
|
9 |
+
paraphrase-spanish-distilroberta,,,69.34,63.98,,74.7,
|
10 |
+
sentence_similarity_spanish_es,,,68.5,61.77,,75.22,
|
11 |
+
paraphrase-multilingual-mpnet-base-v2-ft-stsb_multi_mt-embeddings,,,68.62,64.0,,73.25,
|
12 |
+
mstsb-paraphrase-multilingual-mpnet-base-v2,,,69.39,64.48,,74.29,
|
13 |
+
multilingual-e5-base-b16-e10,,,71.97,65.09,,78.86,
|
14 |
+
multilingual-e5-large-stsb-tuned-b32-e10,,,72.73,66.19,,79.27,
|
15 |
+
multilingual-e5-large-stsb-tuned-b16-e10,,,73.07,67.1,,79.05,
|
16 |
+
multilingual-e5-large-stsb-tuned,,,72.84,66.23,,79.46,
|
17 |
+
multilingual-e5-large-stsb-tuned-b64-e10,,,71.83,64.58,,79.08,
|
data/sts.csv
CHANGED
@@ -1,8 +1,17 @@
|
|
1 |
-
Model name,MTEB STS17 (es-es),MTEB STS22 (es)
|
2 |
-
multilingual-e5-large,87.42,68.23
|
3 |
-
bge-small-en-v1.5,77.73,55.47
|
4 |
-
multilingual-e5-base,87.26,67.79
|
5 |
-
multilingual-e5-small,85.27,67.04
|
6 |
-
paraphrase-multilingual-mpnet-base-v2,85.14,59.91
|
7 |
-
sentence-t5-large,82.74,57.68
|
8 |
-
sentence-t5-xl,83.42,58.16
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model name,Average,MTEB STS17 (es-es),MTEB STS22 (es)
|
2 |
+
multilingual-e5-large,77.82,87.42,68.23
|
3 |
+
bge-small-en-v1.5,66.6,77.73,55.47
|
4 |
+
multilingual-e5-base,77.52,87.26,67.79
|
5 |
+
multilingual-e5-small,76.15,85.27,67.04
|
6 |
+
paraphrase-multilingual-mpnet-base-v2,72.52,85.14,59.91
|
7 |
+
sentence-t5-large,70.21,82.74,57.68
|
8 |
+
sentence-t5-xl,70.78,83.42,58.16
|
9 |
+
paraphrase-spanish-distilroberta,74.7,85.79,63.61
|
10 |
+
sentence_similarity_spanish_es,75.22,85.37,65.07
|
11 |
+
paraphrase-multilingual-mpnet-base-v2-ft-stsb_multi_mt-embeddings,73.24,86.89,59.6
|
12 |
+
mstsb-paraphrase-multilingual-mpnet-base-v2,74.28,88.22,60.36
|
13 |
+
multilingual-e5-base-b16-e10,78.86,87.51,70.21
|
14 |
+
multilingual-e5-large-stsb-tuned-b32-e10,79.27,88.1,70.44
|
15 |
+
multilingual-e5-large-stsb-tuned-b16-e10,79.05,88.53,69.58
|
16 |
+
multilingual-e5-large-stsb-tuned,79.46,88.44,70.48
|
17 |
+
multilingual-e5-large-stsb-tuned-b64-e10,79.08,88.03,70.12
|