Santi Diana commited on
Commit
8b42620
1 Parent(s): 743aac4

Added retrieval and revaluated every model

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
add_new_model/add_new_model.py CHANGED
@@ -18,7 +18,7 @@ def add_model():
18
 
19
  """
20
  # Initialize an empty DataFrame
21
- df = pd.DataFrame(columns=['dataset_name', 'Accuracy', 'Spearman','V_measure', 'Category'])
22
 
23
  metadata_archive = 'mteb_metadata.yaml'
24
 
@@ -36,7 +36,7 @@ def add_model():
36
  dataset_name = results_list[i]['dataset']['name']
37
 
38
  # Initialize the row with NaN values
39
- row = {'dataset_name': dataset_name, 'Accuracy': None, 'Spearman': None}
40
 
41
  if task_name == "Classification":
42
  accuracy = next((metric.get('value') for metric in results_list[i].get('metrics', []) if metric.get('type') == 'accuracy'), None)
@@ -50,6 +50,10 @@ def add_model():
50
  v_measure = next((metric.get('value') for metric in results_list[i].get('metrics', []) if metric.get('type') == 'v_measure'), None)
51
  row['V_measure'] = v_measure
52
  row['Category'] = "Clustering"
 
 
 
 
53
  # Append the row to the DataFrame using pd.concat
54
  new_df = pd.DataFrame([row])
55
  df = pd.concat([df, new_df], ignore_index=True)
@@ -66,6 +70,9 @@ def add_model():
66
  df['V_measure'] = pd.to_numeric(df['V_measure'], errors='coerce')
67
  clustering_v_measure_average = round(df.loc[df['Category'] == 'Clustering', 'V_measure'].mean(),2)
68
 
 
 
 
69
 
70
  ## CLASSIFICATION
71
  classification_dataframe = pd.read_csv('../data/classification.csv')
@@ -107,13 +114,27 @@ def add_model():
107
  clustering_dataframe = pd.concat([clustering_dataframe,new_row_df],ignore_index=True)
108
  clustering_dataframe.to_csv('../data/clustering.csv',index=False)
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  ## GENERAL
112
  general_dataframe = pd.read_csv("../data/general.csv")
113
 
114
- average = round(np.mean([classification_average,sts_spearman_average]),2)
115
  ## TODO: solucionar la meta-data como Model Size o Embedding Dimensions.
116
- new_instance = {'Model name':model_name, 'Model Size (GB)': None, 'Embedding Dimensions': None, 'Average':average, 'Classification Average': classification_average, 'Clustering Average': clustering_v_measure_average, 'STS Average': sts_spearman_average, 'Retrieval Average': None}
117
  new_row_df = pd.DataFrame(new_instance, index=[0])
118
  general_dataframe = pd.concat([general_dataframe, new_row_df], ignore_index=True)
119
  general_dataframe.to_csv("../data/general.csv",index=False)
@@ -215,26 +236,26 @@ def results_to_yaml(results_folder):
215
 
216
 
217
  def main():
218
- if args.execute_eval:
219
- output_folder = evaluate(args.model_id)
220
- #results_to_yaml(output_folder)
221
- add_model()
222
- else:
223
- if args.output_folder == None and args.already_yaml == False:
224
- raise ValueError("You must indicate where your results are located")
225
- else:
226
- #results_to_yaml(args.output_folder)
227
- add_model()
228
- print('Model added')
229
 
230
 
231
  if __name__ == "__main__":
232
 
233
  parser = argparse.ArgumentParser(description="Select the model that you want to add to the Leaderboard.")
234
- parser.add_argument("--model_id", type=str, required=True, help="HuggingFace model path that you want to evaluate.")
235
- parser.add_argument("--execute_eval",type=bool, default=False, help="Select if you want to execute evaluation.")
236
- parser.add_argument("--output_folder", type=str, help = "Select the folder in which the results are stored.")
237
- parser.add_argument("--already_yaml",default=False, help="Select if you already have the yaml file.")
238
  args = parser.parse_args()
239
  main()
240
 
 
18
 
19
  """
20
  # Initialize an empty DataFrame
21
+ df = pd.DataFrame(columns=['dataset_name', 'Accuracy', 'Spearman','V_measure', 'ndcg_at_10', 'Category'])
22
 
23
  metadata_archive = 'mteb_metadata.yaml'
24
 
 
36
  dataset_name = results_list[i]['dataset']['name']
37
 
38
  # Initialize the row with NaN values
39
+ row = {'dataset_name': dataset_name, 'Accuracy': None, 'Spearman': None, 'V_measure': None, 'ndcg_at_10': None}
40
 
41
  if task_name == "Classification":
42
  accuracy = next((metric.get('value') for metric in results_list[i].get('metrics', []) if metric.get('type') == 'accuracy'), None)
 
50
  v_measure = next((metric.get('value') for metric in results_list[i].get('metrics', []) if metric.get('type') == 'v_measure'), None)
51
  row['V_measure'] = v_measure
52
  row['Category'] = "Clustering"
53
+ elif task_name == "Retrieval":
54
+ ndcg_at_10 = next((metric.get('value') for metric in results_list[i].get('metrics', []) if metric.get('type') == 'ndcg_at_10'), None)
55
+ row['ndcg_at_10'] = ndcg_at_10
56
+ row['Category'] = "Retrieval"
57
  # Append the row to the DataFrame using pd.concat
58
  new_df = pd.DataFrame([row])
59
  df = pd.concat([df, new_df], ignore_index=True)
 
70
  df['V_measure'] = pd.to_numeric(df['V_measure'], errors='coerce')
71
  clustering_v_measure_average = round(df.loc[df['Category'] == 'Clustering', 'V_measure'].mean(),2)
72
 
73
+ df['ndcg_at_10'] = pd.to_numeric(df['ndcg_at_10'], errors='coerce')
74
+ retrieval_average = round(df.loc[df['Category'] == 'Retrieval', 'ndcg_at_10'].mean(),2)
75
+
76
 
77
  ## CLASSIFICATION
78
  classification_dataframe = pd.read_csv('../data/classification.csv')
 
114
  clustering_dataframe = pd.concat([clustering_dataframe,new_row_df],ignore_index=True)
115
  clustering_dataframe.to_csv('../data/clustering.csv',index=False)
116
 
117
+ ## Retrieval
118
+ retrieval_dataframe = pd.read_csv("../data/retrieval.csv")
119
+ retrieval_df = df[df['Category']=='Retrieval']
120
+ new_row_data = {'Model name': model_name, 'Average': retrieval_average}
121
+ for index, row in retrieval_df.iterrows():
122
+ column_name = row['dataset_name']
123
+ ndcg_at_10_value = row['ndcg_at_10']
124
+ new_row_data[column_name] = round(ndcg_at_10_value,2)
125
+
126
+ new_row_df = pd.DataFrame(new_row_data,index = [0])
127
+ retrieval_dataframe = pd.concat([retrieval_dataframe,new_row_df],ignore_index=True)
128
+ retrieval_dataframe.to_csv('../data/retrieval.csv',index=False)
129
+
130
+
131
 
132
  ## GENERAL
133
  general_dataframe = pd.read_csv("../data/general.csv")
134
 
135
+ average = round(np.mean([classification_average,sts_spearman_average,clustering_v_measure_average,retrieval_average]),2)
136
  ## TODO: solucionar la meta-data como Model Size o Embedding Dimensions.
137
+ new_instance = {'Model name':model_name, 'Model Size (GB)': None, 'Embedding Dimensions': None, 'Average':average, 'Classification Average': classification_average, 'Clustering Average': clustering_v_measure_average, 'STS Average': sts_spearman_average, 'Retrieval Average': retrieval_average}
138
  new_row_df = pd.DataFrame(new_instance, index=[0])
139
  general_dataframe = pd.concat([general_dataframe, new_row_df], ignore_index=True)
140
  general_dataframe.to_csv("../data/general.csv",index=False)
 
236
 
237
 
238
  def main():
239
+ # if args.execute_eval:
240
+ # output_folder = evaluate(args.model_id)
241
+ # #results_to_yaml(output_folder)
242
+ # add_model()
243
+ # else:
244
+ # if args.output_folder == None and args.already_yaml == False:
245
+ # raise ValueError("You must indicate where your results are located")
246
+ # else:
247
+ #results_to_yaml(args.output_folder)
248
+ add_model()
249
+ print('Model added')
250
 
251
 
252
  if __name__ == "__main__":
253
 
254
  parser = argparse.ArgumentParser(description="Select the model that you want to add to the Leaderboard.")
255
+ #parser.add_argument("--model_id", type=str, required=True, help="HuggingFace model path that you want to evaluate.")
256
+ #parser.add_argument("--execute_eval",type=bool, default=False, help="Select if you want to execute evaluation.")
257
+ #parser.add_argument("--output_folder", type=str, help = "Select the folder in which the results are stored.")
258
+ #parser.add_argument("--already_yaml",default=False, help="Select if you already have the yaml file.")
259
  args = parser.parse_args()
260
  main()
261
 
app.py CHANGED
@@ -33,9 +33,14 @@ def clustering_dataframe_update():
33
  Returns clustering dataframe for clustering table.
34
  """
35
  dataframe = pd.read_csv("data/clustering.csv")
 
36
 
37
  def retrieval_dataframe_update():
38
- pass
 
 
 
 
39
 
40
  def make_clickable_model(link):
41
  """
@@ -92,7 +97,7 @@ with block:
92
  with gr.TabItem("STS"):
93
  with gr.Row():
94
  gr.Markdown("""
95
- **Tabla Classification de Embeddings**
96
 
97
  - **Metricas:** Spearman correlation basada en cosine similarity.
98
  - **Idioma:** Español
@@ -108,7 +113,7 @@ with block:
108
  with gr.TabItem("Clustering"):
109
  with gr.Row():
110
  gr.Markdown("""
111
- **Tabla Classification de Embeddings**
112
 
113
  - **Metricas:** V_measure.
114
  - **Idioma:** Español
@@ -122,6 +127,13 @@ with block:
122
  wrap=True,
123
  )
124
  with gr.TabItem("Retrieval"):
 
 
 
 
 
 
 
125
  with gr.Row():
126
  # Create and display a sample DataFrame
127
  sts = retrieval_dataframe_update()
 
33
  Returns clustering dataframe for clustering table.
34
  """
35
  dataframe = pd.read_csv("data/clustering.csv")
36
+ return dataframe
37
 
38
  def retrieval_dataframe_update():
39
+ """
40
+ Returns retrieval dataframe for retrieval table.
41
+ """
42
+ dataframe = pd.read_csv('data/retrieval.csv')
43
+ return dataframe
44
 
45
  def make_clickable_model(link):
46
  """
 
97
  with gr.TabItem("STS"):
98
  with gr.Row():
99
  gr.Markdown("""
100
+ **Tabla STS de Embeddings**
101
 
102
  - **Metricas:** Spearman correlation basada en cosine similarity.
103
  - **Idioma:** Español
 
113
  with gr.TabItem("Clustering"):
114
  with gr.Row():
115
  gr.Markdown("""
116
+ **Tabla Clustering de Embeddings**
117
 
118
  - **Metricas:** V_measure.
119
  - **Idioma:** Español
 
127
  wrap=True,
128
  )
129
  with gr.TabItem("Retrieval"):
130
+ with gr.Row():
131
+ gr.Markdown("""
132
+ **Tabla Retrieval de Embeddings**
133
+
134
+ - **Metricas:** ncdg_10.
135
+ - **Idioma:** Español
136
+ """)
137
  with gr.Row():
138
  # Create and display a sample DataFrame
139
  sts = retrieval_dataframe_update()
data/classification.csv CHANGED
@@ -1,30 +1,15 @@
1
  Model name,Average,MTEB AmazonReviewsClassification (es),MTEB MTOPDomainClassification (es),MTEB MTOPIntentClassification (es),MTEB MassiveIntentClassification (es),MTEB MassiveScenarioClassification (es)
2
- multilingual-e5-LARGE,66.59,42.66,89.95,66.84,64.68,68.85
3
- bge-SMALL-en-v1.5,52.86,32.03,76.93,52.15,48.77,54.42
4
- multilingual-e5-BASE,63.87,42.47,89.62,60.27,60.51,66.52
5
- multilingual-e5-SMALL,61.13,41.3,87.33,55.87,58.06,63.1
6
- paraphrase-multilingual-mpnet-BASE-v2,65.67,39.99,86.96,66.59,64.43,70.42
7
- sentence-t5-LARGE,57.87,42.89,80.78,52.07,54.1,59.56
8
- sentence-t5-xl,61.64,45.01,85.32,57.38,57.97,62.52
9
- paraphrase-spanish-distilroberta,63.98,38.24,86.81,65.94,60.52,68.39
10
- sentence_similarity_spanish_es,61.77,35.08,85.86,65.21,58.51,64.21
11
- paraphrase-multilingual-mpnet-BASE-v2-ft-stsb_multi_mt-embeddings,64.0,37.25,86.93,66.28,62.6,66.96
12
- mstsb-paraphrase-multilingual-mpnet-BASE-v2,64.47,38.29,86.04,67.06,63.47,67.53
13
- multilingual-e5-BASE-b16-e10,65.09,43.4,89.02,61.7,63.06,68.25
14
- multilingual-e5-LARGE-stsb-tuned-b32-e10,66.19,43.31,89.3,64.04,64.62,69.69
15
- multilingual-e5-LARGE-stsb-tuned-b64-e10,64.58,43.71,88.84,60.2,62.74,67.4
16
- LaBSE,61.97,39.39,84.07,64.44,58.32,63.61
17
- multilingual-e5-BASE-stsb-tuned-b16-e10-all-languages,64.39,43.04,88.81,59.25,63.04,67.8
18
- bge-BASE-tuned-b16-e10,50.83,31.34,74.1,45.63,48.72,54.36
19
- multilingual-e5-LARGE-stsb-tuned-b16-e10-all-languages,64.93,42.98,87.96,60.91,63.78,69.02
20
- bge-LARGE-tuned-b16-e10,51.67,31.72,76.04,48.01,48.9,53.66
21
- multilingual-e5-LARGE-stsb-tuned-b16-e15,67.56,43.35,90.98,67.27,65.35,70.87
22
- text2vec-base-multilingual,58.64,34.82,79.24,58.69,58.28,62.2
23
- paraphrase-multilingual-MiniLM-L12-v2,61.1,37.49,83.04,60.28,59.66,65.04
24
- text2vec-stsb-tuned-b16-e5,57.19,34.64,77.72,56.46,56.94,60.22
25
- paraphrase-multilingual-MiniLM-L12-v2-stsb-tuned-b16-e5,60.41,37.28,82.83,60.45,58.56,62.96
26
  multilingual-e5-LARGE-tuned-double-dataset-b16-e5,62.65,42.33,85.21,57.01,62.09,66.61
27
- multilingual-e5-LARGE-stsb-tuned-b16-e5,66.33,43.02,89.71,64.4,64.8,69.72
28
- multilingual-e5-LARGE-stsb-tuned-b16-e10,66.77,43.86,89.45,64.87,65.13,70.51
29
- multilingual-e5-LARGE-STSAUGMENTED-b16-e5,64.23,44.25,86.83,59.76,63.08,67.21
30
- multilingual-e5-LARGE-double-finetuning-b16-e5,63.55,44.23,86.21,57.96,62.58,66.75
 
 
 
 
 
 
 
1
  Model name,Average,MTEB AmazonReviewsClassification (es),MTEB MTOPDomainClassification (es),MTEB MTOPIntentClassification (es),MTEB MassiveIntentClassification (es),MTEB MassiveScenarioClassification (es)
2
+ bge-large-en-v1.5,52.27,34.63,78.8,47.4,45.58,54.95
3
+ e5-large-v2,56.15,40.01,82.74,49.66,49.82,58.5
4
+ multilingual-e5-base,63.88,42.47,89.62,60.27,60.51,66.52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  multilingual-e5-LARGE-tuned-double-dataset-b16-e5,62.65,42.33,85.21,57.01,62.09,66.61
6
+ multilingual-e5-large,66.6,42.66,89.95,66.84,64.68,68.85
7
+ paraphrase-multilingual-mpnet-base-v2,65.68,39.99,86.96,66.59,64.43,70.42
8
+ paraphrase-spanish-distilroberta,63.98,38.25,86.81,65.95,60.51,68.39
9
+ retromae_es_67000,58.27,33.14,84.09,62.75,51.93,59.45
10
+ RetroMAE-finetuned-stsb_multi_es_aug_gpt3.5-turbo_v2,59.63,29.7,87.64,60.15,55.43,65.22
11
+ sentence_similarity_spanish_es,61.77,35.08,85.86,65.21,58.49,64.21
12
+ sentence-t5-xl,61.64,45.01,85.32,57.38,57.97,62.52
13
+ bert-base-spanish-wwm-uncased,57.35,33.94,79.68,61.96,52.54,58.61
14
+ bertin-roberta-base-spanish,28.57,23.4,39.95,22.77,25.32,31.41
15
+ roberta-base-bne,56.27,34.15,78.91,63.17,50.3,54.82
data/clustering.csv CHANGED
@@ -1,3 +1,15 @@
1
  Model name,Average,MTEB BiorxivClusteringS2S_Spanish,MTEB RedditClusteringSpanish
2
- multilingual-e5-LARGE-STSAUGMENTED-b16-e5,33.86,26.64,41.07
3
- multilingual-e5-LARGE-STSAUGMENTED-b16-e5,33.86,26.64,41.07
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  Model name,Average,MTEB BiorxivClusteringS2S_Spanish,MTEB RedditClusteringSpanish
2
+ bge-large-en-v1.5,34.55,27.41,41.69
3
+ e5-large-v2,30.97,23.39,38.55
4
+ multilingual-e5-base,32.28,25.09,39.47
5
+ multilingual-e5-LARGE-tuned-double-dataset-b16-e5,33.34,26.57,40.11
6
+ multilingual-e5-large,34.37,28.1,40.65
7
+ paraphrase-multilingual-mpnet-base-v2,35.9,28.27,43.53
8
+ paraphrase-spanish-distilroberta,34.82,26.14,43.5
9
+ retromae_es_67000,19.57,19.78,19.36
10
+ RetroMAE-finetuned-stsb_multi_es_aug_gpt3.5-turbo_v2,32.17,24.06,40.27
11
+ sentence_similarity_spanish_es,28.58,22.84,34.33
12
+ sentence-t5-xl,32.34,21.56,43.12
13
+ bert-base-spanish-wwm-uncased,21.2,18.19,24.2
14
+ bertin-roberta-base-spanish,4.72,4.13,5.32
15
+ roberta-base-bne,21.44,19.22,23.66
data/general.csv CHANGED
@@ -1,30 +1,15 @@
1
  Model name,Model Size (GB),Embedding Dimensions,Average,Classification Average,Clustering Average,STS Average,Retrieval Average
2
- multilingual-e5-LARGE,,,72.22,66.6,,77.83,
3
- bge-SMALL-en-v1.5,,,59.73,52.86,,66.6,
4
- multilingual-e5-BASE,,,70.7,63.88,,77.53,
5
- multilingual-e5-SMALL,,,68.64,61.13,,76.15,
6
- paraphrase-multilingual-mpnet-BASE-v2,,,69.1,65.68,,72.53,
7
- sentence-t5-LARGE,,,64.04,57.88,,70.21,
8
- sentence-t5-xl,,,66.22,61.64,,70.79,
9
- paraphrase-spanish-distilroberta,,,69.34,63.98,,74.7,
10
- sentence_similarity_spanish_es,,,68.5,61.77,,75.22,
11
- paraphrase-multilingual-mpnet-BASE-v2-ft-stsb_multi_mt-embeddings,,,68.62,64.0,,73.25,
12
- mstsb-paraphrase-multilingual-mpnet-BASE-v2,,,69.39,64.48,,74.29,
13
- multilingual-e5-BASE-b16-e10,,,71.97,65.09,,78.86,
14
- multilingual-e5-LARGE-stsb-tuned-b32-e10,,,72.73,66.19,,79.27,
15
- multilingual-e5-LARGE-stsb-tuned-b64-e10,,,71.83,64.58,,79.08,
16
- LaBSE,,,66.99,61.97,,72.01,
17
- multilingual-e5-BASE-stsb-tuned-b16-e10-all-languages,,,71.88,64.39,,79.38,
18
- bge-BASE-tuned-b16-e10,,,59.69,50.83,,68.55,
19
- multilingual-e5-LARGE-stsb-tuned-b16-e10-all-languages,,,72.22,64.93,,79.5,
20
- bge-LARGE-tuned-b16-e10,,,61.5,51.67,,71.34,
21
- multilingual-e5-LARGE-stsb-tuned-b16-e15,,,73.38,67.56,,79.19,
22
- text2vec-base-multilingual,,,66.04,58.64,,73.43,
23
- paraphrase-multilingual-MiniLM-L12-v2,,,66.08,61.1,,71.06,
24
- text2vec-stsb-tuned-b16-e5,,,66.0,57.19,,74.81,
25
- paraphrase-multilingual-MiniLM-L12-v2-stsb-tuned-b16-e5,,,68.21,60.41,,76.01,
26
- multilingual-e5-LARGE-tuned-double-dataset-b16-e5,,,71.19,62.65,,79.74,
27
- multilingual-e5-LARGE-stsb-tuned-b16-e5,,,72.9,66.33,,79.47,
28
- multilingual-e5-LARGE-stsb-tuned-b16-e10,,,73.03,66.77,,79.29,
29
- multilingual-e5-LARGE-STSAUGMENTED-b16-e5,,,71.84,64.23,,79.44,
30
- multilingual-e5-LARGE-double-finetuning-b16-e5,,,71.53,63.55,,79.5,
 
1
  Model name,Model Size (GB),Embedding Dimensions,Average,Classification Average,Clustering Average,STS Average,Retrieval Average
2
+ bge-large-en-v1.5,,,45.25,52.27,34.55,68.77,25.41
3
+ e5-large-v2,,,45.87,56.15,30.97,71.76,24.6
4
+ multilingual-e5-base,,,51.14,63.88,32.28,77.53,30.85
5
+ multilingual-e5-LARGE-tuned-double-dataset-b16-e5,,,50.87,62.65,33.34,79.74,27.75
6
+ multilingual-e5-large,,,52.58,66.6,34.37,77.83,31.5
7
+ paraphrase-multilingual-mpnet-base-v2,,,50.47,65.68,35.9,72.53,27.78
8
+ paraphrase-spanish-distilroberta,,,50.6,63.98,34.82,74.7,28.92
9
+ retromae_es_67000,,,40.79,58.27,19.57,67.64,17.67
10
+ RetroMAE-finetuned-stsb_multi_es_aug_gpt3.5-turbo_v2,,,47.71,59.63,32.17,74.3,24.73
11
+ sentence_similarity_spanish_es,,,47.22,61.77,28.58,75.22,23.3
12
+ sentence-t5-xl,,,47.42,61.64,32.34,70.79,24.91
13
+ bert-base-spanish-wwm-uncased,,,37.28,57.35,21.2,57.38,13.17
14
+ bertin-roberta-base-spanish,,,22.13,28.57,4.72,47.67,7.56
15
+ roberta-base-bne,,,37.25,56.27,21.44,59.49,11.79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/retrieval.csv ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Model name,Average,MTEB ArguAna,MTEB SCIDOCS
2
+ bge-large-en-v1.5,25.41,41.27,9.54
3
+ e5-large-v2,24.6,37.67,11.52
4
+ multilingual-e5-base,30.85,50.36,11.34
5
+ multilingual-e5-LARGE-tuned-double-dataset-b16-e5,27.75,41.45,14.05
6
+ multilingual-e5-large,31.5,51.06,11.93
7
+ paraphrase-multilingual-mpnet-base-v2,27.78,43.66,11.89
8
+ paraphrase-spanish-distilroberta,28.92,46.06,11.77
9
+ retromae_es_67000,17.67,28.78,6.56
10
+ RetroMAE-finetuned-stsb_multi_es_aug_gpt3.5-turbo_v2,24.73,39.47,9.99
11
+ sentence_similarity_spanish_es,23.3,40.07,6.52
12
+ sentence-t5-xl,24.91,38.55,11.27
13
+ bert-base-spanish-wwm-uncased,13.17,23.93,2.42
14
+ bertin-roberta-base-spanish,7.56,14.84,0.29
15
+ roberta-base-bne,11.79,21.24,2.33
data/sts.csv CHANGED
@@ -1,30 +1,15 @@
1
  Model name,Average,MTEB STS17 (es-es),MTEB STS22 (es)
2
- multilingual-e5-LARGE,77.82,87.42,68.23
3
- bge-SMALL-en-v1.5,66.6,77.73,55.47
4
- multilingual-e5-BASE,77.52,87.26,67.79
5
- multilingual-e5-SMALL,76.15,85.27,67.04
6
- paraphrase-multilingual-mpnet-BASE-v2,72.52,85.14,59.91
7
- sentence-t5-LARGE,70.21,82.74,57.68
8
- sentence-t5-xl,70.78,83.42,58.16
9
  paraphrase-spanish-distilroberta,74.7,85.79,63.61
 
 
10
  sentence_similarity_spanish_es,75.22,85.37,65.07
11
- paraphrase-multilingual-mpnet-BASE-v2-ft-stsb_multi_mt-embeddings,73.24,86.89,59.6
12
- mstsb-paraphrase-multilingual-mpnet-BASE-v2,74.28,88.22,60.36
13
- multilingual-e5-BASE-b16-e10,78.86,87.51,70.21
14
- multilingual-e5-LARGE-stsb-tuned-b32-e10,79.27,88.1,70.44
15
- multilingual-e5-LARGE-stsb-tuned-b64-e10,79.08,88.03,70.12
16
- LaBSE,72.01,80.83,63.18
17
- multilingual-e5-BASE-stsb-tuned-b16-e10-all-languages,79.38,87.77,70.99
18
- bge-BASE-tuned-b16-e10,68.55,82.01,55.08
19
- multilingual-e5-LARGE-stsb-tuned-b16-e10-all-languages,79.5,88.66,70.35
20
- bge-LARGE-tuned-b16-e10,71.34,81.57,61.12
21
- multilingual-e5-LARGE-stsb-tuned-b16-e15,79.19,89.04,69.34
22
- text2vec-base-multilingual,73.43,87.91,58.94
23
- paraphrase-multilingual-MiniLM-L12-v2,71.06,85.56,56.56
24
- text2vec-stsb-tuned-b16-e5,74.81,88.28,61.34
25
- paraphrase-multilingual-MiniLM-L12-v2-stsb-tuned-b16-e5,76.01,88.03,63.99
26
- multilingual-e5-LARGE-tuned-double-dataset-b16-e5,79.74,88.98,70.5
27
- multilingual-e5-LARGE-stsb-tuned-b16-e5,79.47,88.72,70.21
28
- multilingual-e5-LARGE-stsb-tuned-b16-e10,79.29,89.1,69.48
29
- multilingual-e5-LARGE-STSAUGMENTED-b16-e5,79.44,88.78,70.11
30
- multilingual-e5-LARGE-double-finetuning-b16-e5,79.5,88.63,70.37
 
1
  Model name,Average,MTEB STS17 (es-es),MTEB STS22 (es)
2
+ bge-large-en-v1.5,68.77,79.62,57.92
3
+ e5-large-v2,71.76,82.16,61.36
4
+ multilingual-e5-base,77.53,87.26,67.79
5
+ multilingual-e5-LARGE-tuned-double-dataset-b16-e5,79.74,88.98,70.5
6
+ multilingual-e5-large,77.83,87.42,68.23
7
+ paraphrase-multilingual-mpnet-base-v2,72.53,85.14,59.91
 
8
  paraphrase-spanish-distilroberta,74.7,85.79,63.61
9
+ retromae_es_67000,67.64,73.01,62.26
10
+ RetroMAE-finetuned-stsb_multi_es_aug_gpt3.5-turbo_v2,74.3,84.77,63.83
11
  sentence_similarity_spanish_es,75.22,85.37,65.07
12
+ sentence-t5-xl,70.79,83.42,58.16
13
+ bert-base-spanish-wwm-uncased,57.38,63.61,51.14
14
+ bertin-roberta-base-spanish,47.67,41.87,53.46
15
+ roberta-base-bne,59.49,70.75,48.24