Santi Diana commited on
Commit
3aae85b
1 Parent(s): b8e8c93

Uploaded state-of-the-art propietary models

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
add_new_model/.DS_Store ADDED
Binary file (6.15 kB). View file
 
add_new_model/README.md CHANGED
@@ -7,4 +7,6 @@ when evaluating `sentence-transformers/sentence-t5-large`.
7
  3. Once evaluated, move that folder to this folder, so it will be inside `add_new_model` folder.
8
  4. Execute the file `MTEB_metadata_to_yaml.py`. That will create a file named `mteb_medadata.yaml` that contains the metadata regarding your evaluation.
9
  5. Execute the file `add_new_model.py`. That file will add your model to the Leaderboard.
10
- 6. Add, commit and `git push` the changes without uploading the results and the `mteb_metadata.yaml`.
 
 
 
7
  3. Once evaluated, move that folder to this folder, so it will be inside `add_new_model` folder.
8
  4. Execute the file `MTEB_metadata_to_yaml.py`. That will create a file named `mteb_medadata.yaml` that contains the metadata regarding your evaluation.
9
  5. Execute the file `add_new_model.py`. That file will add your model to the Leaderboard.
10
+ 6. Add, commit and `git push` the changes without uploading the results and the `mteb_metadata.yaml`.
11
+ 7. It is recommended to launch the app by running `python3 app.py` from parent folder, and confirm that there are no errors in the leaderboard and we
12
+ are uploading it as we wanted.
add_new_model/add_new_model.py CHANGED
@@ -54,13 +54,12 @@ def add_model(metadata_archive):
54
  ## CLASSIFICATION
55
  classification_dataframe = pd.read_csv('../data/classification.csv')
56
  classification_df = df[df['Category']== 'Classification']
57
- new_row_data = {'Model name': model_name}
58
-
59
  for index, row in classification_df.iterrows():
60
  column_name = row['dataset_name']
61
  accuracy_value = row['Accuracy']
62
  new_row_data[column_name] = round(accuracy_value,2)
63
-
64
  new_row_df = pd.DataFrame(new_row_data,index=[0])
65
  classification_dataframe = pd.concat([classification_dataframe,new_row_df],ignore_index=True)
66
  classification_dataframe.to_csv("../data/classification.csv",index=False)
@@ -68,7 +67,7 @@ def add_model(metadata_archive):
68
  ## STS
69
  sts_dataframe = pd.read_csv('../data/sts.csv')
70
  sts_df = df[df['Category']=='STS']
71
- new_row_data = {'Model name': model_name}
72
 
73
  for index, row in sts_df.iterrows():
74
  column_name = row['dataset_name']
 
54
  ## CLASSIFICATION
55
  classification_dataframe = pd.read_csv('../data/classification.csv')
56
  classification_df = df[df['Category']== 'Classification']
57
+ new_row_data = {'Model name': model_name, 'Average': classification_average}
 
58
  for index, row in classification_df.iterrows():
59
  column_name = row['dataset_name']
60
  accuracy_value = row['Accuracy']
61
  new_row_data[column_name] = round(accuracy_value,2)
62
+
63
  new_row_df = pd.DataFrame(new_row_data,index=[0])
64
  classification_dataframe = pd.concat([classification_dataframe,new_row_df],ignore_index=True)
65
  classification_dataframe.to_csv("../data/classification.csv",index=False)
 
67
  ## STS
68
  sts_dataframe = pd.read_csv('../data/sts.csv')
69
  sts_df = df[df['Category']=='STS']
70
+ new_row_data = {'Model name': model_name, 'Average': sts_spearman_average}
71
 
72
  for index, row in sts_df.iterrows():
73
  column_name = row['dataset_name']
add_new_model/mteb_metadata.yaml ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - mteb
4
+ model-index:
5
+ - name: multilingual-e5-large-stsb-tuned-b64-e10
6
+ results:
7
+ - task:
8
+ type: Classification
9
+ dataset:
10
+ type: mteb/amazon_reviews_multi
11
+ name: MTEB AmazonReviewsClassification (es)
12
+ config: es
13
+ split: test
14
+ revision: 1399c76144fd37290681b995c656ef9b2e06e26d
15
+ metrics:
16
+ - type: accuracy
17
+ value: 43.709999999999994
18
+ - type: f1
19
+ value: 41.47169623212768
20
+ - task:
21
+ type: Classification
22
+ dataset:
23
+ type: mteb/mtop_domain
24
+ name: MTEB MTOPDomainClassification (es)
25
+ config: es
26
+ split: test
27
+ revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
28
+ metrics:
29
+ - type: accuracy
30
+ value: 88.83589059372916
31
+ - type: f1
32
+ value: 88.28914595398294
33
+ - task:
34
+ type: Classification
35
+ dataset:
36
+ type: mteb/mtop_intent
37
+ name: MTEB MTOPIntentClassification (es)
38
+ config: es
39
+ split: test
40
+ revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
41
+ metrics:
42
+ - type: accuracy
43
+ value: 60.20346897931954
44
+ - type: f1
45
+ value: 41.64439175677159
46
+ - task:
47
+ type: Classification
48
+ dataset:
49
+ type: mteb/amazon_massive_intent
50
+ name: MTEB MassiveIntentClassification (es)
51
+ config: es
52
+ split: test
53
+ revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
54
+ metrics:
55
+ - type: accuracy
56
+ value: 62.74041694687289
57
+ - type: f1
58
+ value: 61.77713703269475
59
+ - task:
60
+ type: Classification
61
+ dataset:
62
+ type: mteb/amazon_massive_scenario
63
+ name: MTEB MassiveScenarioClassification (es)
64
+ config: es
65
+ split: test
66
+ revision: 7d571f92784cd94a019292a1f45445077d0ef634
67
+ metrics:
68
+ - type: accuracy
69
+ value: 67.40080699394755
70
+ - type: f1
71
+ value: 67.14214912345791
72
+ - task:
73
+ type: STS
74
+ dataset:
75
+ type: mteb/sts17-crosslingual-sts
76
+ name: MTEB STS17 (es-es)
77
+ config: es-es
78
+ split: test
79
+ revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
80
+ metrics:
81
+ - type: cos_sim_pearson
82
+ value: 88.26778066226262
83
+ - type: cos_sim_spearman
84
+ value: 88.03435803600337
85
+ - type: euclidean_pearson
86
+ value: 88.31560142002508
87
+ - type: euclidean_spearman
88
+ value: 88.03594258414384
89
+ - type: manhattan_pearson
90
+ value: 88.3997621988469
91
+ - type: manhattan_spearman
92
+ value: 88.17114024743894
93
+ - task:
94
+ type: STS
95
+ dataset:
96
+ type: mteb/sts22-crosslingual-sts
97
+ name: MTEB STS22 (es)
98
+ config: es
99
+ split: test
100
+ revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
101
+ metrics:
102
+ - type: cos_sim_pearson
103
+ value: 66.49699923699941
104
+ - type: cos_sim_spearman
105
+ value: 70.12135103690638
106
+ - type: euclidean_pearson
107
+ value: 67.63308096173844
108
+ - type: euclidean_spearman
109
+ value: 70.12135103690638
110
+ - type: manhattan_pearson
111
+ value: 67.49091236728717
112
+ - type: manhattan_spearman
113
+ value: 70.08015881466724
114
+ ---
app.py CHANGED
@@ -1,10 +1,11 @@
1
  import gradio as gr
2
  import pandas as pd
3
 
4
- block = gr.Blocks()
 
5
  NUM_DATASETS = 7
6
  NUM_SCORES = 0
7
- NUM_MODELS = 5
8
 
9
  def general_dataframe_update():
10
  """
@@ -19,6 +20,7 @@ def classification_dataframe_update():
19
  """
20
  dataframe = pd.read_csv('data/classification.csv')
21
  return dataframe
 
22
  def sts_dataframe_udpate():
23
  """
24
  Returns sts dataframe for sts table.
@@ -26,6 +28,13 @@ def sts_dataframe_udpate():
26
  dataframe = pd.read_csv('data/sts.csv')
27
  return dataframe
28
 
 
 
 
 
 
 
 
29
  with block:
30
  gr.Markdown(f"""**Leaderboard de modelos de Embeddings en español
31
  Massive Text Embedding Benchmark (MTEB) Leaderboard.**
@@ -40,7 +49,7 @@ with block:
40
  gr.Markdown("""
41
  **Tabla General de Embeddings**
42
 
43
- - **Metricas:** Varias, con sus respectivas medias.
44
  - **Idioma:** Español
45
  """)
46
  with gr.Row():
@@ -51,6 +60,13 @@ with block:
51
  wrap=True,
52
  )
53
  with gr.TabItem("Classification"):
 
 
 
 
 
 
 
54
  with gr.Row():
55
  # Create and display a sample DataFrame
56
  classification = classification_dataframe_update()
@@ -60,6 +76,13 @@ with block:
60
  wrap=True,
61
  )
62
  with gr.TabItem("STS"):
 
 
 
 
 
 
 
63
  with gr.Row():
64
  # Create and display a sample DataFrame
65
  sts = sts_dataframe_udpate()
@@ -68,6 +91,24 @@ with block:
68
  type="pandas",
69
  wrap=True,
70
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  block.launch()
73
 
 
1
  import gradio as gr
2
  import pandas as pd
3
 
4
+ dataframe = pd.read_csv('data/general.csv')
5
+
6
  NUM_DATASETS = 7
7
  NUM_SCORES = 0
8
+ NUM_MODELS = len(dataframe)
9
 
10
  def general_dataframe_update():
11
  """
 
20
  """
21
  dataframe = pd.read_csv('data/classification.csv')
22
  return dataframe
23
+
24
  def sts_dataframe_udpate():
25
  """
26
  Returns sts dataframe for sts table.
 
28
  dataframe = pd.read_csv('data/sts.csv')
29
  return dataframe
30
 
31
+ def clustering_dataframe_update():
32
+ pass
33
+
34
+ def retrieval_dataframe_update():
35
+ pass
36
+
37
+ block = gr.Blocks()
38
  with block:
39
  gr.Markdown(f"""**Leaderboard de modelos de Embeddings en español
40
  Massive Text Embedding Benchmark (MTEB) Leaderboard.**
 
49
  gr.Markdown("""
50
  **Tabla General de Embeddings**
51
 
52
+ - **Métricas:** Varias, con sus respectivas medias.
53
  - **Idioma:** Español
54
  """)
55
  with gr.Row():
 
60
  wrap=True,
61
  )
62
  with gr.TabItem("Classification"):
63
+ with gr.Row():
64
+ gr.Markdown("""
65
+ **Tabla Classification de Embeddings**
66
+
67
+ - **Métricas:** Spearman correlation based on cosine similarity.
68
+ - **Idioma:** Español
69
+ """)
70
  with gr.Row():
71
  # Create and display a sample DataFrame
72
  classification = classification_dataframe_update()
 
76
  wrap=True,
77
  )
78
  with gr.TabItem("STS"):
79
+ with gr.Row():
80
+ gr.Markdown("""
81
+ **Tabla Classification de Embeddings**
82
+
83
+ - **Metricas:** .
84
+ - **Idioma:** Español
85
+ """)
86
  with gr.Row():
87
  # Create and display a sample DataFrame
88
  sts = sts_dataframe_udpate()
 
91
  type="pandas",
92
  wrap=True,
93
  )
94
+ with gr.TabItem("Clustering"):
95
+ with gr.Row():
96
+ # Create and display a sample DataFrame
97
+ sts = clustering_dataframe_update()
98
+ data_overall = gr.components.Dataframe(
99
+ sts,
100
+ type="pandas",
101
+ wrap=True,
102
+ )
103
+ with gr.TabItem("Retrieval"):
104
+ with gr.Row():
105
+ # Create and display a sample DataFrame
106
+ sts = retrieval_dataframe_update()
107
+ data_overall = gr.components.Dataframe(
108
+ sts,
109
+ type="pandas",
110
+ wrap=True,
111
+ )
112
 
113
  block.launch()
114
 
data/classification.csv CHANGED
@@ -1,8 +1,17 @@
1
- Model name,MTEB AmazonReviewsClassification (es),MTEB MTOPDomainClassification (es),MTEB MTOPIntentClassification (es),MTEB MassiveIntentClassification (es),MTEB MassiveScenarioClassification (es)
2
- multilingual-e5-large,42.66,89.95,66.84,64.68,68.85
3
- bge-small-en-v1.5,32.03,76.93,52.15,48.77,54.42
4
- multilingual-e5-base,42.47,89.62,60.27,60.51,66.52
5
- multilingual-e5-small,41.3,87.33,55.87,58.06,63.1
6
- paraphrase-multilingual-mpnet-base-v2,39.99,86.96,66.59,64.43,70.42
7
- sentence-t5-large,42.89,80.78,52.07,54.1,59.56
8
- sentence-t5-xl,45.01,85.32,57.38,57.97,62.52
 
 
 
 
 
 
 
 
 
 
1
+ Model name,Average,MTEB AmazonReviewsClassification (es),MTEB MTOPDomainClassification (es),MTEB MTOPIntentClassification (es),MTEB MassiveIntentClassification (es),MTEB MassiveScenarioClassification (es)
2
+ multilingual-e5-large,66.59,42.66,89.95,66.84,64.68,68.85
3
+ bge-small-en-v1.5,52.86,32.03,76.93,52.15,48.77,54.42
4
+ multilingual-e5-base,63.87,42.47,89.62,60.27,60.51,66.52
5
+ multilingual-e5-small,61.13,41.3,87.33,55.87,58.06,63.1
6
+ paraphrase-multilingual-mpnet-base-v2,65.67,39.99,86.96,66.59,64.43,70.42
7
+ sentence-t5-large,57.87,42.89,80.78,52.07,54.1,59.56
8
+ sentence-t5-xl,61.64,45.01,85.32,57.38,57.97,62.52
9
+ paraphrase-spanish-distilroberta,63.98,38.24,86.81,65.94,60.52,68.39
10
+ sentence_similarity_spanish_es,61.77,35.08,85.86,65.21,58.51,64.21
11
+ paraphrase-multilingual-mpnet-base-v2-ft-stsb_multi_mt-embeddings,64.0,37.25,86.93,66.28,62.6,66.96
12
+ mstsb-paraphrase-multilingual-mpnet-base-v2,64.47,38.29,86.04,67.06,63.47,67.53
13
+ multilingual-e5-base-b16-e10,65.09,43.4,89.02,61.7,63.06,68.25
14
+ multilingual-e5-large-stsb-tuned-b32-e10,66.19,43.31,89.3,64.04,64.62,69.69
15
+ multilingual-e5-large-stsb-tuned-b16-e10,67.1,43.72,90.29,65.51,65.13,70.84
16
+ multilingual-e5-large-stsb-tuned,66.23,43.62,89.33,62.93,65.11,70.16
17
+ multilingual-e5-large-stsb-tuned-b64-e10,64.58,43.71,88.84,60.2,62.74,67.4
data/general.csv CHANGED
@@ -6,3 +6,12 @@ multilingual-e5-small,,,68.64,61.13,,76.15,
6
  paraphrase-multilingual-mpnet-base-v2,,,69.1,65.68,,72.53,
7
  sentence-t5-large,,,64.04,57.88,,70.21,
8
  sentence-t5-xl,,,66.22,61.64,,70.79,
 
 
 
 
 
 
 
 
 
 
6
  paraphrase-multilingual-mpnet-base-v2,,,69.1,65.68,,72.53,
7
  sentence-t5-large,,,64.04,57.88,,70.21,
8
  sentence-t5-xl,,,66.22,61.64,,70.79,
9
+ paraphrase-spanish-distilroberta,,,69.34,63.98,,74.7,
10
+ sentence_similarity_spanish_es,,,68.5,61.77,,75.22,
11
+ paraphrase-multilingual-mpnet-base-v2-ft-stsb_multi_mt-embeddings,,,68.62,64.0,,73.25,
12
+ mstsb-paraphrase-multilingual-mpnet-base-v2,,,69.39,64.48,,74.29,
13
+ multilingual-e5-base-b16-e10,,,71.97,65.09,,78.86,
14
+ multilingual-e5-large-stsb-tuned-b32-e10,,,72.73,66.19,,79.27,
15
+ multilingual-e5-large-stsb-tuned-b16-e10,,,73.07,67.1,,79.05,
16
+ multilingual-e5-large-stsb-tuned,,,72.84,66.23,,79.46,
17
+ multilingual-e5-large-stsb-tuned-b64-e10,,,71.83,64.58,,79.08,
data/sts.csv CHANGED
@@ -1,8 +1,17 @@
1
- Model name,MTEB STS17 (es-es),MTEB STS22 (es)
2
- multilingual-e5-large,87.42,68.23
3
- bge-small-en-v1.5,77.73,55.47
4
- multilingual-e5-base,87.26,67.79
5
- multilingual-e5-small,85.27,67.04
6
- paraphrase-multilingual-mpnet-base-v2,85.14,59.91
7
- sentence-t5-large,82.74,57.68
8
- sentence-t5-xl,83.42,58.16
 
 
 
 
 
 
 
 
 
 
1
+ Model name,Average,MTEB STS17 (es-es),MTEB STS22 (es)
2
+ multilingual-e5-large,77.82,87.42,68.23
3
+ bge-small-en-v1.5,66.6,77.73,55.47
4
+ multilingual-e5-base,77.52,87.26,67.79
5
+ multilingual-e5-small,76.15,85.27,67.04
6
+ paraphrase-multilingual-mpnet-base-v2,72.52,85.14,59.91
7
+ sentence-t5-large,70.21,82.74,57.68
8
+ sentence-t5-xl,70.78,83.42,58.16
9
+ paraphrase-spanish-distilroberta,74.7,85.79,63.61
10
+ sentence_similarity_spanish_es,75.22,85.37,65.07
11
+ paraphrase-multilingual-mpnet-base-v2-ft-stsb_multi_mt-embeddings,73.24,86.89,59.6
12
+ mstsb-paraphrase-multilingual-mpnet-base-v2,74.28,88.22,60.36
13
+ multilingual-e5-base-b16-e10,78.86,87.51,70.21
14
+ multilingual-e5-large-stsb-tuned-b32-e10,79.27,88.1,70.44
15
+ multilingual-e5-large-stsb-tuned-b16-e10,79.05,88.53,69.58
16
+ multilingual-e5-large-stsb-tuned,79.46,88.44,70.48
17
+ multilingual-e5-large-stsb-tuned-b64-e10,79.08,88.03,70.12