Spaces:
Runtime error
Runtime error
Santi Diana
commited on
Commit
•
8b42620
1
Parent(s):
743aac4
Added retrieval and revaluated every model
Browse files- .DS_Store +0 -0
- add_new_model/add_new_model.py +40 -19
- app.py +15 -3
- data/classification.csv +13 -28
- data/clustering.csv +14 -2
- data/general.csv +14 -29
- data/retrieval.csv +15 -0
- data/sts.csv +12 -27
.DS_Store
CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
|
|
add_new_model/add_new_model.py
CHANGED
@@ -18,7 +18,7 @@ def add_model():
|
|
18 |
|
19 |
"""
|
20 |
# Initialize an empty DataFrame
|
21 |
-
df = pd.DataFrame(columns=['dataset_name', 'Accuracy', 'Spearman','V_measure', 'Category'])
|
22 |
|
23 |
metadata_archive = 'mteb_metadata.yaml'
|
24 |
|
@@ -36,7 +36,7 @@ def add_model():
|
|
36 |
dataset_name = results_list[i]['dataset']['name']
|
37 |
|
38 |
# Initialize the row with NaN values
|
39 |
-
row = {'dataset_name': dataset_name, 'Accuracy': None, 'Spearman': None}
|
40 |
|
41 |
if task_name == "Classification":
|
42 |
accuracy = next((metric.get('value') for metric in results_list[i].get('metrics', []) if metric.get('type') == 'accuracy'), None)
|
@@ -50,6 +50,10 @@ def add_model():
|
|
50 |
v_measure = next((metric.get('value') for metric in results_list[i].get('metrics', []) if metric.get('type') == 'v_measure'), None)
|
51 |
row['V_measure'] = v_measure
|
52 |
row['Category'] = "Clustering"
|
|
|
|
|
|
|
|
|
53 |
# Append the row to the DataFrame using pd.concat
|
54 |
new_df = pd.DataFrame([row])
|
55 |
df = pd.concat([df, new_df], ignore_index=True)
|
@@ -66,6 +70,9 @@ def add_model():
|
|
66 |
df['V_measure'] = pd.to_numeric(df['V_measure'], errors='coerce')
|
67 |
clustering_v_measure_average = round(df.loc[df['Category'] == 'Clustering', 'V_measure'].mean(),2)
|
68 |
|
|
|
|
|
|
|
69 |
|
70 |
## CLASSIFICATION
|
71 |
classification_dataframe = pd.read_csv('../data/classification.csv')
|
@@ -107,13 +114,27 @@ def add_model():
|
|
107 |
clustering_dataframe = pd.concat([clustering_dataframe,new_row_df],ignore_index=True)
|
108 |
clustering_dataframe.to_csv('../data/clustering.csv',index=False)
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
## GENERAL
|
112 |
general_dataframe = pd.read_csv("../data/general.csv")
|
113 |
|
114 |
-
average = round(np.mean([classification_average,sts_spearman_average]),2)
|
115 |
## TODO: solucionar la meta-data como Model Size o Embedding Dimensions.
|
116 |
-
new_instance = {'Model name':model_name, 'Model Size (GB)': None, 'Embedding Dimensions': None, 'Average':average, 'Classification Average': classification_average, 'Clustering Average': clustering_v_measure_average, 'STS Average': sts_spearman_average, 'Retrieval Average':
|
117 |
new_row_df = pd.DataFrame(new_instance, index=[0])
|
118 |
general_dataframe = pd.concat([general_dataframe, new_row_df], ignore_index=True)
|
119 |
general_dataframe.to_csv("../data/general.csv",index=False)
|
@@ -215,26 +236,26 @@ def results_to_yaml(results_folder):
|
|
215 |
|
216 |
|
217 |
def main():
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
|
230 |
|
231 |
if __name__ == "__main__":
|
232 |
|
233 |
parser = argparse.ArgumentParser(description="Select the model that you want to add to the Leaderboard.")
|
234 |
-
parser.add_argument("--model_id", type=str, required=True, help="HuggingFace model path that you want to evaluate.")
|
235 |
-
parser.add_argument("--execute_eval",type=bool, default=False, help="Select if you want to execute evaluation.")
|
236 |
-
parser.add_argument("--output_folder", type=str, help = "Select the folder in which the results are stored.")
|
237 |
-
parser.add_argument("--already_yaml",default=False, help="Select if you already have the yaml file.")
|
238 |
args = parser.parse_args()
|
239 |
main()
|
240 |
|
|
|
18 |
|
19 |
"""
|
20 |
# Initialize an empty DataFrame
|
21 |
+
df = pd.DataFrame(columns=['dataset_name', 'Accuracy', 'Spearman','V_measure', 'ndcg_at_10', 'Category'])
|
22 |
|
23 |
metadata_archive = 'mteb_metadata.yaml'
|
24 |
|
|
|
36 |
dataset_name = results_list[i]['dataset']['name']
|
37 |
|
38 |
# Initialize the row with NaN values
|
39 |
+
row = {'dataset_name': dataset_name, 'Accuracy': None, 'Spearman': None, 'V_measure': None, 'ndcg_at_10': None}
|
40 |
|
41 |
if task_name == "Classification":
|
42 |
accuracy = next((metric.get('value') for metric in results_list[i].get('metrics', []) if metric.get('type') == 'accuracy'), None)
|
|
|
50 |
v_measure = next((metric.get('value') for metric in results_list[i].get('metrics', []) if metric.get('type') == 'v_measure'), None)
|
51 |
row['V_measure'] = v_measure
|
52 |
row['Category'] = "Clustering"
|
53 |
+
elif task_name == "Retrieval":
|
54 |
+
ndcg_at_10 = next((metric.get('value') for metric in results_list[i].get('metrics', []) if metric.get('type') == 'ndcg_at_10'), None)
|
55 |
+
row['ndcg_at_10'] = ndcg_at_10
|
56 |
+
row['Category'] = "Retrieval"
|
57 |
# Append the row to the DataFrame using pd.concat
|
58 |
new_df = pd.DataFrame([row])
|
59 |
df = pd.concat([df, new_df], ignore_index=True)
|
|
|
70 |
df['V_measure'] = pd.to_numeric(df['V_measure'], errors='coerce')
|
71 |
clustering_v_measure_average = round(df.loc[df['Category'] == 'Clustering', 'V_measure'].mean(),2)
|
72 |
|
73 |
+
df['ndcg_at_10'] = pd.to_numeric(df['ndcg_at_10'], errors='coerce')
|
74 |
+
retrieval_average = round(df.loc[df['Category'] == 'Retrieval', 'ndcg_at_10'].mean(),2)
|
75 |
+
|
76 |
|
77 |
## CLASSIFICATION
|
78 |
classification_dataframe = pd.read_csv('../data/classification.csv')
|
|
|
114 |
clustering_dataframe = pd.concat([clustering_dataframe,new_row_df],ignore_index=True)
|
115 |
clustering_dataframe.to_csv('../data/clustering.csv',index=False)
|
116 |
|
117 |
+
## Retrieval
|
118 |
+
retrieval_dataframe = pd.read_csv("../data/retrieval.csv")
|
119 |
+
retrieval_df = df[df['Category']=='Retrieval']
|
120 |
+
new_row_data = {'Model name': model_name, 'Average': retrieval_average}
|
121 |
+
for index, row in retrieval_df.iterrows():
|
122 |
+
column_name = row['dataset_name']
|
123 |
+
ndcg_at_10_value = row['ndcg_at_10']
|
124 |
+
new_row_data[column_name] = round(ndcg_at_10_value,2)
|
125 |
+
|
126 |
+
new_row_df = pd.DataFrame(new_row_data,index = [0])
|
127 |
+
retrieval_dataframe = pd.concat([retrieval_dataframe,new_row_df],ignore_index=True)
|
128 |
+
retrieval_dataframe.to_csv('../data/retrieval.csv',index=False)
|
129 |
+
|
130 |
+
|
131 |
|
132 |
## GENERAL
|
133 |
general_dataframe = pd.read_csv("../data/general.csv")
|
134 |
|
135 |
+
average = round(np.mean([classification_average,sts_spearman_average,clustering_v_measure_average,retrieval_average]),2)
|
136 |
## TODO: solucionar la meta-data como Model Size o Embedding Dimensions.
|
137 |
+
new_instance = {'Model name':model_name, 'Model Size (GB)': None, 'Embedding Dimensions': None, 'Average':average, 'Classification Average': classification_average, 'Clustering Average': clustering_v_measure_average, 'STS Average': sts_spearman_average, 'Retrieval Average': retrieval_average}
|
138 |
new_row_df = pd.DataFrame(new_instance, index=[0])
|
139 |
general_dataframe = pd.concat([general_dataframe, new_row_df], ignore_index=True)
|
140 |
general_dataframe.to_csv("../data/general.csv",index=False)
|
|
|
236 |
|
237 |
|
238 |
def main():
|
239 |
+
# if args.execute_eval:
|
240 |
+
# output_folder = evaluate(args.model_id)
|
241 |
+
# #results_to_yaml(output_folder)
|
242 |
+
# add_model()
|
243 |
+
# else:
|
244 |
+
# if args.output_folder == None and args.already_yaml == False:
|
245 |
+
# raise ValueError("You must indicate where your results are located")
|
246 |
+
# else:
|
247 |
+
#results_to_yaml(args.output_folder)
|
248 |
+
add_model()
|
249 |
+
print('Model added')
|
250 |
|
251 |
|
252 |
if __name__ == "__main__":
|
253 |
|
254 |
parser = argparse.ArgumentParser(description="Select the model that you want to add to the Leaderboard.")
|
255 |
+
#parser.add_argument("--model_id", type=str, required=True, help="HuggingFace model path that you want to evaluate.")
|
256 |
+
#parser.add_argument("--execute_eval",type=bool, default=False, help="Select if you want to execute evaluation.")
|
257 |
+
#parser.add_argument("--output_folder", type=str, help = "Select the folder in which the results are stored.")
|
258 |
+
#parser.add_argument("--already_yaml",default=False, help="Select if you already have the yaml file.")
|
259 |
args = parser.parse_args()
|
260 |
main()
|
261 |
|
app.py
CHANGED
@@ -33,9 +33,14 @@ def clustering_dataframe_update():
|
|
33 |
Returns clustering dataframe for clustering table.
|
34 |
"""
|
35 |
dataframe = pd.read_csv("data/clustering.csv")
|
|
|
36 |
|
37 |
def retrieval_dataframe_update():
|
38 |
-
|
|
|
|
|
|
|
|
|
39 |
|
40 |
def make_clickable_model(link):
|
41 |
"""
|
@@ -92,7 +97,7 @@ with block:
|
|
92 |
with gr.TabItem("STS"):
|
93 |
with gr.Row():
|
94 |
gr.Markdown("""
|
95 |
-
**Tabla
|
96 |
|
97 |
- **Metricas:** Spearman correlation basada en cosine similarity.
|
98 |
- **Idioma:** Español
|
@@ -108,7 +113,7 @@ with block:
|
|
108 |
with gr.TabItem("Clustering"):
|
109 |
with gr.Row():
|
110 |
gr.Markdown("""
|
111 |
-
**Tabla
|
112 |
|
113 |
- **Metricas:** V_measure.
|
114 |
- **Idioma:** Español
|
@@ -122,6 +127,13 @@ with block:
|
|
122 |
wrap=True,
|
123 |
)
|
124 |
with gr.TabItem("Retrieval"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
with gr.Row():
|
126 |
# Create and display a sample DataFrame
|
127 |
sts = retrieval_dataframe_update()
|
|
|
33 |
Returns clustering dataframe for clustering table.
|
34 |
"""
|
35 |
dataframe = pd.read_csv("data/clustering.csv")
|
36 |
+
return dataframe
|
37 |
|
38 |
def retrieval_dataframe_update():
|
39 |
+
"""
|
40 |
+
Returns retrieval dataframe for retrieval table.
|
41 |
+
"""
|
42 |
+
dataframe = pd.read_csv('data/retrieval.csv')
|
43 |
+
return dataframe
|
44 |
|
45 |
def make_clickable_model(link):
|
46 |
"""
|
|
|
97 |
with gr.TabItem("STS"):
|
98 |
with gr.Row():
|
99 |
gr.Markdown("""
|
100 |
+
**Tabla STS de Embeddings**
|
101 |
|
102 |
- **Metricas:** Spearman correlation basada en cosine similarity.
|
103 |
- **Idioma:** Español
|
|
|
113 |
with gr.TabItem("Clustering"):
|
114 |
with gr.Row():
|
115 |
gr.Markdown("""
|
116 |
+
**Tabla Clustering de Embeddings**
|
117 |
|
118 |
- **Metricas:** V_measure.
|
119 |
- **Idioma:** Español
|
|
|
127 |
wrap=True,
|
128 |
)
|
129 |
with gr.TabItem("Retrieval"):
|
130 |
+
with gr.Row():
|
131 |
+
gr.Markdown("""
|
132 |
+
**Tabla Retrieval de Embeddings**
|
133 |
+
|
134 |
+
- **Metricas:** ncdg_10.
|
135 |
+
- **Idioma:** Español
|
136 |
+
""")
|
137 |
with gr.Row():
|
138 |
# Create and display a sample DataFrame
|
139 |
sts = retrieval_dataframe_update()
|
data/classification.csv
CHANGED
@@ -1,30 +1,15 @@
|
|
1 |
Model name,Average,MTEB AmazonReviewsClassification (es),MTEB MTOPDomainClassification (es),MTEB MTOPIntentClassification (es),MTEB MassiveIntentClassification (es),MTEB MassiveScenarioClassification (es)
|
2 |
-
|
3 |
-
|
4 |
-
multilingual-e5-
|
5 |
-
multilingual-e5-SMALL,61.13,41.3,87.33,55.87,58.06,63.1
|
6 |
-
paraphrase-multilingual-mpnet-BASE-v2,65.67,39.99,86.96,66.59,64.43,70.42
|
7 |
-
sentence-t5-LARGE,57.87,42.89,80.78,52.07,54.1,59.56
|
8 |
-
sentence-t5-xl,61.64,45.01,85.32,57.38,57.97,62.52
|
9 |
-
paraphrase-spanish-distilroberta,63.98,38.24,86.81,65.94,60.52,68.39
|
10 |
-
sentence_similarity_spanish_es,61.77,35.08,85.86,65.21,58.51,64.21
|
11 |
-
paraphrase-multilingual-mpnet-BASE-v2-ft-stsb_multi_mt-embeddings,64.0,37.25,86.93,66.28,62.6,66.96
|
12 |
-
mstsb-paraphrase-multilingual-mpnet-BASE-v2,64.47,38.29,86.04,67.06,63.47,67.53
|
13 |
-
multilingual-e5-BASE-b16-e10,65.09,43.4,89.02,61.7,63.06,68.25
|
14 |
-
multilingual-e5-LARGE-stsb-tuned-b32-e10,66.19,43.31,89.3,64.04,64.62,69.69
|
15 |
-
multilingual-e5-LARGE-stsb-tuned-b64-e10,64.58,43.71,88.84,60.2,62.74,67.4
|
16 |
-
LaBSE,61.97,39.39,84.07,64.44,58.32,63.61
|
17 |
-
multilingual-e5-BASE-stsb-tuned-b16-e10-all-languages,64.39,43.04,88.81,59.25,63.04,67.8
|
18 |
-
bge-BASE-tuned-b16-e10,50.83,31.34,74.1,45.63,48.72,54.36
|
19 |
-
multilingual-e5-LARGE-stsb-tuned-b16-e10-all-languages,64.93,42.98,87.96,60.91,63.78,69.02
|
20 |
-
bge-LARGE-tuned-b16-e10,51.67,31.72,76.04,48.01,48.9,53.66
|
21 |
-
multilingual-e5-LARGE-stsb-tuned-b16-e15,67.56,43.35,90.98,67.27,65.35,70.87
|
22 |
-
text2vec-base-multilingual,58.64,34.82,79.24,58.69,58.28,62.2
|
23 |
-
paraphrase-multilingual-MiniLM-L12-v2,61.1,37.49,83.04,60.28,59.66,65.04
|
24 |
-
text2vec-stsb-tuned-b16-e5,57.19,34.64,77.72,56.46,56.94,60.22
|
25 |
-
paraphrase-multilingual-MiniLM-L12-v2-stsb-tuned-b16-e5,60.41,37.28,82.83,60.45,58.56,62.96
|
26 |
multilingual-e5-LARGE-tuned-double-dataset-b16-e5,62.65,42.33,85.21,57.01,62.09,66.61
|
27 |
-
multilingual-e5-
|
28 |
-
multilingual-
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model name,Average,MTEB AmazonReviewsClassification (es),MTEB MTOPDomainClassification (es),MTEB MTOPIntentClassification (es),MTEB MassiveIntentClassification (es),MTEB MassiveScenarioClassification (es)
|
2 |
+
bge-large-en-v1.5,52.27,34.63,78.8,47.4,45.58,54.95
|
3 |
+
e5-large-v2,56.15,40.01,82.74,49.66,49.82,58.5
|
4 |
+
multilingual-e5-base,63.88,42.47,89.62,60.27,60.51,66.52
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
multilingual-e5-LARGE-tuned-double-dataset-b16-e5,62.65,42.33,85.21,57.01,62.09,66.61
|
6 |
+
multilingual-e5-large,66.6,42.66,89.95,66.84,64.68,68.85
|
7 |
+
paraphrase-multilingual-mpnet-base-v2,65.68,39.99,86.96,66.59,64.43,70.42
|
8 |
+
paraphrase-spanish-distilroberta,63.98,38.25,86.81,65.95,60.51,68.39
|
9 |
+
retromae_es_67000,58.27,33.14,84.09,62.75,51.93,59.45
|
10 |
+
RetroMAE-finetuned-stsb_multi_es_aug_gpt3.5-turbo_v2,59.63,29.7,87.64,60.15,55.43,65.22
|
11 |
+
sentence_similarity_spanish_es,61.77,35.08,85.86,65.21,58.49,64.21
|
12 |
+
sentence-t5-xl,61.64,45.01,85.32,57.38,57.97,62.52
|
13 |
+
bert-base-spanish-wwm-uncased,57.35,33.94,79.68,61.96,52.54,58.61
|
14 |
+
bertin-roberta-base-spanish,28.57,23.4,39.95,22.77,25.32,31.41
|
15 |
+
roberta-base-bne,56.27,34.15,78.91,63.17,50.3,54.82
|
data/clustering.csv
CHANGED
@@ -1,3 +1,15 @@
|
|
1 |
Model name,Average,MTEB BiorxivClusteringS2S_Spanish,MTEB RedditClusteringSpanish
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model name,Average,MTEB BiorxivClusteringS2S_Spanish,MTEB RedditClusteringSpanish
|
2 |
+
bge-large-en-v1.5,34.55,27.41,41.69
|
3 |
+
e5-large-v2,30.97,23.39,38.55
|
4 |
+
multilingual-e5-base,32.28,25.09,39.47
|
5 |
+
multilingual-e5-LARGE-tuned-double-dataset-b16-e5,33.34,26.57,40.11
|
6 |
+
multilingual-e5-large,34.37,28.1,40.65
|
7 |
+
paraphrase-multilingual-mpnet-base-v2,35.9,28.27,43.53
|
8 |
+
paraphrase-spanish-distilroberta,34.82,26.14,43.5
|
9 |
+
retromae_es_67000,19.57,19.78,19.36
|
10 |
+
RetroMAE-finetuned-stsb_multi_es_aug_gpt3.5-turbo_v2,32.17,24.06,40.27
|
11 |
+
sentence_similarity_spanish_es,28.58,22.84,34.33
|
12 |
+
sentence-t5-xl,32.34,21.56,43.12
|
13 |
+
bert-base-spanish-wwm-uncased,21.2,18.19,24.2
|
14 |
+
bertin-roberta-base-spanish,4.72,4.13,5.32
|
15 |
+
roberta-base-bne,21.44,19.22,23.66
|
data/general.csv
CHANGED
@@ -1,30 +1,15 @@
|
|
1 |
Model name,Model Size (GB),Embedding Dimensions,Average,Classification Average,Clustering Average,STS Average,Retrieval Average
|
2 |
-
|
3 |
-
|
4 |
-
multilingual-e5-
|
5 |
-
multilingual-e5-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
LaBSE,,,66.99,61.97,,72.01,
|
17 |
-
multilingual-e5-BASE-stsb-tuned-b16-e10-all-languages,,,71.88,64.39,,79.38,
|
18 |
-
bge-BASE-tuned-b16-e10,,,59.69,50.83,,68.55,
|
19 |
-
multilingual-e5-LARGE-stsb-tuned-b16-e10-all-languages,,,72.22,64.93,,79.5,
|
20 |
-
bge-LARGE-tuned-b16-e10,,,61.5,51.67,,71.34,
|
21 |
-
multilingual-e5-LARGE-stsb-tuned-b16-e15,,,73.38,67.56,,79.19,
|
22 |
-
text2vec-base-multilingual,,,66.04,58.64,,73.43,
|
23 |
-
paraphrase-multilingual-MiniLM-L12-v2,,,66.08,61.1,,71.06,
|
24 |
-
text2vec-stsb-tuned-b16-e5,,,66.0,57.19,,74.81,
|
25 |
-
paraphrase-multilingual-MiniLM-L12-v2-stsb-tuned-b16-e5,,,68.21,60.41,,76.01,
|
26 |
-
multilingual-e5-LARGE-tuned-double-dataset-b16-e5,,,71.19,62.65,,79.74,
|
27 |
-
multilingual-e5-LARGE-stsb-tuned-b16-e5,,,72.9,66.33,,79.47,
|
28 |
-
multilingual-e5-LARGE-stsb-tuned-b16-e10,,,73.03,66.77,,79.29,
|
29 |
-
multilingual-e5-LARGE-STSAUGMENTED-b16-e5,,,71.84,64.23,,79.44,
|
30 |
-
multilingual-e5-LARGE-double-finetuning-b16-e5,,,71.53,63.55,,79.5,
|
|
|
1 |
Model name,Model Size (GB),Embedding Dimensions,Average,Classification Average,Clustering Average,STS Average,Retrieval Average
|
2 |
+
bge-large-en-v1.5,,,45.25,52.27,34.55,68.77,25.41
|
3 |
+
e5-large-v2,,,45.87,56.15,30.97,71.76,24.6
|
4 |
+
multilingual-e5-base,,,51.14,63.88,32.28,77.53,30.85
|
5 |
+
multilingual-e5-LARGE-tuned-double-dataset-b16-e5,,,50.87,62.65,33.34,79.74,27.75
|
6 |
+
multilingual-e5-large,,,52.58,66.6,34.37,77.83,31.5
|
7 |
+
paraphrase-multilingual-mpnet-base-v2,,,50.47,65.68,35.9,72.53,27.78
|
8 |
+
paraphrase-spanish-distilroberta,,,50.6,63.98,34.82,74.7,28.92
|
9 |
+
retromae_es_67000,,,40.79,58.27,19.57,67.64,17.67
|
10 |
+
RetroMAE-finetuned-stsb_multi_es_aug_gpt3.5-turbo_v2,,,47.71,59.63,32.17,74.3,24.73
|
11 |
+
sentence_similarity_spanish_es,,,47.22,61.77,28.58,75.22,23.3
|
12 |
+
sentence-t5-xl,,,47.42,61.64,32.34,70.79,24.91
|
13 |
+
bert-base-spanish-wwm-uncased,,,37.28,57.35,21.2,57.38,13.17
|
14 |
+
bertin-roberta-base-spanish,,,22.13,28.57,4.72,47.67,7.56
|
15 |
+
roberta-base-bne,,,37.25,56.27,21.44,59.49,11.79
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data/retrieval.csv
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model name,Average,MTEB ArguAna,MTEB SCIDOCS
|
2 |
+
bge-large-en-v1.5,25.41,41.27,9.54
|
3 |
+
e5-large-v2,24.6,37.67,11.52
|
4 |
+
multilingual-e5-base,30.85,50.36,11.34
|
5 |
+
multilingual-e5-LARGE-tuned-double-dataset-b16-e5,27.75,41.45,14.05
|
6 |
+
multilingual-e5-large,31.5,51.06,11.93
|
7 |
+
paraphrase-multilingual-mpnet-base-v2,27.78,43.66,11.89
|
8 |
+
paraphrase-spanish-distilroberta,28.92,46.06,11.77
|
9 |
+
retromae_es_67000,17.67,28.78,6.56
|
10 |
+
RetroMAE-finetuned-stsb_multi_es_aug_gpt3.5-turbo_v2,24.73,39.47,9.99
|
11 |
+
sentence_similarity_spanish_es,23.3,40.07,6.52
|
12 |
+
sentence-t5-xl,24.91,38.55,11.27
|
13 |
+
bert-base-spanish-wwm-uncased,13.17,23.93,2.42
|
14 |
+
bertin-roberta-base-spanish,7.56,14.84,0.29
|
15 |
+
roberta-base-bne,11.79,21.24,2.33
|
data/sts.csv
CHANGED
@@ -1,30 +1,15 @@
|
|
1 |
Model name,Average,MTEB STS17 (es-es),MTEB STS22 (es)
|
2 |
-
|
3 |
-
|
4 |
-
multilingual-e5-
|
5 |
-
multilingual-e5-
|
6 |
-
|
7 |
-
|
8 |
-
sentence-t5-xl,70.78,83.42,58.16
|
9 |
paraphrase-spanish-distilroberta,74.7,85.79,63.61
|
|
|
|
|
10 |
sentence_similarity_spanish_es,75.22,85.37,65.07
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
multilingual-e5-LARGE-stsb-tuned-b64-e10,79.08,88.03,70.12
|
16 |
-
LaBSE,72.01,80.83,63.18
|
17 |
-
multilingual-e5-BASE-stsb-tuned-b16-e10-all-languages,79.38,87.77,70.99
|
18 |
-
bge-BASE-tuned-b16-e10,68.55,82.01,55.08
|
19 |
-
multilingual-e5-LARGE-stsb-tuned-b16-e10-all-languages,79.5,88.66,70.35
|
20 |
-
bge-LARGE-tuned-b16-e10,71.34,81.57,61.12
|
21 |
-
multilingual-e5-LARGE-stsb-tuned-b16-e15,79.19,89.04,69.34
|
22 |
-
text2vec-base-multilingual,73.43,87.91,58.94
|
23 |
-
paraphrase-multilingual-MiniLM-L12-v2,71.06,85.56,56.56
|
24 |
-
text2vec-stsb-tuned-b16-e5,74.81,88.28,61.34
|
25 |
-
paraphrase-multilingual-MiniLM-L12-v2-stsb-tuned-b16-e5,76.01,88.03,63.99
|
26 |
-
multilingual-e5-LARGE-tuned-double-dataset-b16-e5,79.74,88.98,70.5
|
27 |
-
multilingual-e5-LARGE-stsb-tuned-b16-e5,79.47,88.72,70.21
|
28 |
-
multilingual-e5-LARGE-stsb-tuned-b16-e10,79.29,89.1,69.48
|
29 |
-
multilingual-e5-LARGE-STSAUGMENTED-b16-e5,79.44,88.78,70.11
|
30 |
-
multilingual-e5-LARGE-double-finetuning-b16-e5,79.5,88.63,70.37
|
|
|
1 |
Model name,Average,MTEB STS17 (es-es),MTEB STS22 (es)
|
2 |
+
bge-large-en-v1.5,68.77,79.62,57.92
|
3 |
+
e5-large-v2,71.76,82.16,61.36
|
4 |
+
multilingual-e5-base,77.53,87.26,67.79
|
5 |
+
multilingual-e5-LARGE-tuned-double-dataset-b16-e5,79.74,88.98,70.5
|
6 |
+
multilingual-e5-large,77.83,87.42,68.23
|
7 |
+
paraphrase-multilingual-mpnet-base-v2,72.53,85.14,59.91
|
|
|
8 |
paraphrase-spanish-distilroberta,74.7,85.79,63.61
|
9 |
+
retromae_es_67000,67.64,73.01,62.26
|
10 |
+
RetroMAE-finetuned-stsb_multi_es_aug_gpt3.5-turbo_v2,74.3,84.77,63.83
|
11 |
sentence_similarity_spanish_es,75.22,85.37,65.07
|
12 |
+
sentence-t5-xl,70.79,83.42,58.16
|
13 |
+
bert-base-spanish-wwm-uncased,57.38,63.61,51.14
|
14 |
+
bertin-roberta-base-spanish,47.67,41.87,53.46
|
15 |
+
roberta-base-bne,59.49,70.75,48.24
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|