Spaces:
Runtime error
Runtime error
Santi Diana
commited on
Commit
•
b8e8c93
1
Parent(s):
82d6c9a
Feat: add new model implemented. Read README.md inside add_new_model folder for more information
Browse files- .DS_Store +0 -0
- add_new_model/MTEB_results_to_yaml.py +122 -0
- add_new_model/README.md +10 -0
- add_new_model/add_new_model.py +93 -0
- add_new_model/metadata_example/mteb_metadata.yaml +114 -0
- app.py +26 -25
- data/classification.csv +8 -0
- data/general.csv +8 -0
- data/sts.csv +8 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
add_new_model/MTEB_results_to_yaml.py
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Usage: python mteb_meta.py path_to_results_folder
|
3 |
+
|
4 |
+
Creates evaluation results metadata for the model card.
|
5 |
+
E.g.
|
6 |
+
---
|
7 |
+
tags:
|
8 |
+
- mteb
|
9 |
+
model-index:
|
10 |
+
- name: SGPT-5.8B-weightedmean-msmarco-specb-bitfit
|
11 |
+
results:
|
12 |
+
- task:
|
13 |
+
type: classification
|
14 |
+
dataset:
|
15 |
+
type: mteb/banking77
|
16 |
+
name: MTEB Banking77
|
17 |
+
config: default
|
18 |
+
split: test
|
19 |
+
revision: 44fa15921b4c889113cc5df03dd4901b49161ab7
|
20 |
+
metrics:
|
21 |
+
- type: accuracy
|
22 |
+
value: 84.49350649350649
|
23 |
+
---
|
24 |
+
"""
|
25 |
+
|
26 |
+
import json
|
27 |
+
import logging
|
28 |
+
import os
|
29 |
+
import sys
|
30 |
+
|
31 |
+
from mteb import MTEB
|
32 |
+
|
33 |
+
logging.basicConfig(level=logging.INFO)
|
34 |
+
logger = logging.getLogger(__name__)
|
35 |
+
|
36 |
+
|
37 |
+
results_folder = sys.argv[1].rstrip("/")
|
38 |
+
model_name = results_folder.split("/")[-1]
|
39 |
+
|
40 |
+
all_results = {}
|
41 |
+
|
42 |
+
for file_name in os.listdir(results_folder):
|
43 |
+
if not file_name.endswith(".json"):
|
44 |
+
logger.info(f"Skipping non-json {file_name}")
|
45 |
+
continue
|
46 |
+
with open(os.path.join(results_folder, file_name), "r", encoding="utf-8") as f:
|
47 |
+
results = json.load(f)
|
48 |
+
all_results = {**all_results, **{file_name.replace(".json", ""): results}}
|
49 |
+
|
50 |
+
# Use "train" split instead
|
51 |
+
TRAIN_SPLIT = ["DanishPoliticalCommentsClassification"]
|
52 |
+
# Use "validation" split instead
|
53 |
+
VALIDATION_SPLIT = ["AFQMC", "Cmnli", "IFlyTek", "TNews", "MSMARCO", "MultilingualSentiment", "Ocnli"]
|
54 |
+
# Use "dev" split instead
|
55 |
+
DEV_SPLIT = ["CmedqaRetrieval", "CovidRetrieval", "DuRetrieval", "EcomRetrieval", "MedicalRetrieval", "MMarcoReranking", "MMarcoRetrieval", "MSMARCO", "T2Reranking", "T2Retrieval", "VideoRetrieval"]
|
56 |
+
|
57 |
+
MARKER = "---"
|
58 |
+
TAGS = "tags:"
|
59 |
+
MTEB_TAG = "- mteb"
|
60 |
+
HEADER = "model-index:"
|
61 |
+
MODEL = f"- name: {model_name}"
|
62 |
+
RES = " results:"
|
63 |
+
|
64 |
+
META_STRING = "\n".join([MARKER, TAGS, MTEB_TAG, HEADER, MODEL, RES])
|
65 |
+
|
66 |
+
|
67 |
+
ONE_TASK = " - task:\n type: {}\n dataset:\n type: {}\n name: {}\n config: {}\n split: {}\n revision: {}\n metrics:"
|
68 |
+
ONE_METRIC = " - type: {}\n value: {}"
|
69 |
+
SKIP_KEYS = ["std", "evaluation_time", "main_score", "threshold"]
|
70 |
+
|
71 |
+
for ds_name, res_dict in sorted(all_results.items()):
|
72 |
+
mteb_desc = (
|
73 |
+
MTEB(tasks=[ds_name.replace("CQADupstackRetrieval", "CQADupstackAndroidRetrieval")]).tasks[0].description
|
74 |
+
)
|
75 |
+
hf_hub_name = mteb_desc.get("hf_hub_name", mteb_desc.get("beir_name"))
|
76 |
+
if "CQADupstack" in ds_name:
|
77 |
+
hf_hub_name = "BeIR/cqadupstack"
|
78 |
+
mteb_type = mteb_desc["type"]
|
79 |
+
revision = res_dict.get("dataset_revision") # Okay if it's None
|
80 |
+
split = "test"
|
81 |
+
if (ds_name in TRAIN_SPLIT) and ("train" in res_dict):
|
82 |
+
split = "train"
|
83 |
+
elif (ds_name in VALIDATION_SPLIT) and ("validation" in res_dict):
|
84 |
+
split = "validation"
|
85 |
+
elif (ds_name in DEV_SPLIT) and ("dev" in res_dict):
|
86 |
+
split = "dev"
|
87 |
+
elif "test" not in res_dict:
|
88 |
+
logger.info(f"Skipping {ds_name} as split {split} not present.")
|
89 |
+
continue
|
90 |
+
res_dict = res_dict.get(split)
|
91 |
+
for lang in mteb_desc["eval_langs"]:
|
92 |
+
mteb_name = f"MTEB {ds_name}"
|
93 |
+
mteb_name += f" ({lang})" if len(mteb_desc["eval_langs"]) > 1 else ""
|
94 |
+
# For English there is no language key if it's the only language
|
95 |
+
test_result_lang = res_dict.get(lang) if len(mteb_desc["eval_langs"]) > 1 else res_dict
|
96 |
+
# Skip if the language was not found but it has other languages
|
97 |
+
if test_result_lang is None:
|
98 |
+
continue
|
99 |
+
META_STRING += "\n" + ONE_TASK.format(
|
100 |
+
mteb_type, hf_hub_name, mteb_name, lang if len(mteb_desc["eval_langs"]) > 1 else "default", split, revision
|
101 |
+
)
|
102 |
+
for metric, score in test_result_lang.items():
|
103 |
+
if not isinstance(score, dict):
|
104 |
+
score = {metric: score}
|
105 |
+
for sub_metric, sub_score in score.items():
|
106 |
+
if any([x in sub_metric for x in SKIP_KEYS]):
|
107 |
+
continue
|
108 |
+
META_STRING += "\n" + ONE_METRIC.format(
|
109 |
+
f"{metric}_{sub_metric}" if metric != sub_metric else metric,
|
110 |
+
# All MTEB scores are 0-1, multiply them by 100 for 3 reasons:
|
111 |
+
# 1) It's easier to visually digest (You need two chars less: "0.1" -> "1")
|
112 |
+
# 2) Others may multiply them by 100, when building on MTEB making it confusing what the range is
|
113 |
+
# This happend with Text and Code Embeddings paper (OpenAI) vs original BEIR paper
|
114 |
+
# 3) It's accepted practice (SuperGLUE, GLUE are 0-100)
|
115 |
+
sub_score * 100,
|
116 |
+
)
|
117 |
+
|
118 |
+
META_STRING += "\n" + MARKER
|
119 |
+
if os.path.exists(f"./mteb_metadata.yaml"):
|
120 |
+
logger.warning("Overwriting mteb_metadata.md")
|
121 |
+
with open(f"./mteb_metadata.yaml", "w") as f:
|
122 |
+
f.write(META_STRING)
|
add_new_model/README.md
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## How to add a new model to the Leaderboard
|
2 |
+
|
3 |
+
Here we are going to explain how to add a new model to the Leaderboard. The next steps must be followed:
|
4 |
+
1. `Git clone` this repository and `cd add_new_model`.
|
5 |
+
2. Perform evaluation of the new model using MTEB library. That will give you a folder as result. Example: `sentence-t5-large` is the folder name
|
6 |
+
when evaluating `sentence-transformers/sentence-t5-large`.
|
7 |
+
3. Once evaluated, move that folder to this folder, so it will be inside `add_new_model` folder.
|
8 |
+
4. Execute the file `MTEB_metadata_to_yaml.py`. That will create a file named `mteb_medadata.yaml` that contains the metadata regarding your evaluation.
|
9 |
+
5. Execute the file `add_new_model.py`. That file will add your model to the Leaderboard.
|
10 |
+
6. Add, commit and `git push` the changes without uploading the results and the `mteb_metadata.yaml`.
|
add_new_model/add_new_model.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import pandas as pd
|
3 |
+
import yaml
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
def add_model(metadata_archive):
|
7 |
+
"""
|
8 |
+
Esto actualiza el archivo del cual app.py coge la información para crear la leaderboard. Entonces, cuando
|
9 |
+
alguien quiera añadir un nuevo modelo, tiene que ejecutar este archivo.
|
10 |
+
|
11 |
+
1. Leemos el CSV, sacamos información y añadimos simplemente una nueva row.
|
12 |
+
|
13 |
+
"""
|
14 |
+
# Initialize an empty DataFrame
|
15 |
+
df = pd.DataFrame(columns=['dataset_name', 'Accuracy', 'Spearman', "Category"])
|
16 |
+
|
17 |
+
metadata_archive = 'mteb_metadata.yaml'
|
18 |
+
|
19 |
+
with open(metadata_archive, 'r') as file:
|
20 |
+
for index, data in enumerate(yaml.safe_load_all(file)):
|
21 |
+
if index == 0:
|
22 |
+
model_index_list = data.get('model-index', [])
|
23 |
+
model_name = model_index_list[0].get('name')
|
24 |
+
results_list = model_index_list[0].get('results', [])
|
25 |
+
|
26 |
+
if results_list:
|
27 |
+
for i in range(len(results_list)):
|
28 |
+
task = results_list[i].get('task', {})
|
29 |
+
task_name = task.get("type")
|
30 |
+
dataset_name = results_list[i]['dataset']['name']
|
31 |
+
|
32 |
+
# Initialize the row with NaN values
|
33 |
+
row = {'dataset_name': dataset_name, 'Accuracy': None, 'Spearman': None}
|
34 |
+
|
35 |
+
if task_name == "Classification":
|
36 |
+
accuracy = next((metric.get('value') for metric in results_list[i].get('metrics', []) if metric.get('type') == 'accuracy'), None)
|
37 |
+
row['Accuracy'] = accuracy
|
38 |
+
row['Category'] = "Classification"
|
39 |
+
elif task_name == "STS":
|
40 |
+
spearman = next((metric.get('value') for metric in results_list[i].get('metrics', []) if metric.get('type') == 'cos_sim_spearman'), None)
|
41 |
+
row['Spearman'] = spearman
|
42 |
+
row["Category"] = "STS"
|
43 |
+
|
44 |
+
# Append the row to the DataFrame using pd.concat
|
45 |
+
new_df = pd.DataFrame([row])
|
46 |
+
df = pd.concat([df, new_df], ignore_index=True)
|
47 |
+
|
48 |
+
df['Accuracy'] = pd.to_numeric(df['Accuracy'], errors='coerce')
|
49 |
+
classification_average = round(df.loc[df['Category'] == 'Classification', 'Accuracy'].mean(),2)
|
50 |
+
|
51 |
+
df['Spearman'] = pd.to_numeric(df['Spearman'], errors='coerce')
|
52 |
+
sts_spearman_average = round(df.loc[df['Category'] == 'STS', 'Spearman'].mean(),2)
|
53 |
+
|
54 |
+
## CLASSIFICATION
|
55 |
+
classification_dataframe = pd.read_csv('../data/classification.csv')
|
56 |
+
classification_df = df[df['Category']== 'Classification']
|
57 |
+
new_row_data = {'Model name': model_name}
|
58 |
+
|
59 |
+
for index, row in classification_df.iterrows():
|
60 |
+
column_name = row['dataset_name']
|
61 |
+
accuracy_value = row['Accuracy']
|
62 |
+
new_row_data[column_name] = round(accuracy_value,2)
|
63 |
+
|
64 |
+
new_row_df = pd.DataFrame(new_row_data,index=[0])
|
65 |
+
classification_dataframe = pd.concat([classification_dataframe,new_row_df],ignore_index=True)
|
66 |
+
classification_dataframe.to_csv("../data/classification.csv",index=False)
|
67 |
+
|
68 |
+
## STS
|
69 |
+
sts_dataframe = pd.read_csv('../data/sts.csv')
|
70 |
+
sts_df = df[df['Category']=='STS']
|
71 |
+
new_row_data = {'Model name': model_name}
|
72 |
+
|
73 |
+
for index, row in sts_df.iterrows():
|
74 |
+
column_name = row['dataset_name']
|
75 |
+
spearman_value = row['Spearman']
|
76 |
+
new_row_data[column_name] = round(spearman_value,2)
|
77 |
+
|
78 |
+
new_row_df = pd.DataFrame(new_row_data,index = [0])
|
79 |
+
sts_dataframe = pd.concat([sts_dataframe,new_row_df],ignore_index=True)
|
80 |
+
sts_dataframe.to_csv('../data/sts.csv',index=False)
|
81 |
+
|
82 |
+
## GENERAL
|
83 |
+
general_dataframe = pd.read_csv("../data/general.csv")
|
84 |
+
|
85 |
+
average = round(np.mean([classification_average,sts_spearman_average]),2)
|
86 |
+
## TODO: solucionar la meta-data como Model Size o Embedding Dimensions.
|
87 |
+
new_instance = {'Model name':model_name, 'Model Size (GB)': None, 'Embedding Dimensions': None, 'Average':average, 'Classification Average': classification_average, 'Clustering Average': None, 'STS Average': sts_spearman_average, 'Retrieval Average': None}
|
88 |
+
new_row_df = pd.DataFrame(new_instance, index=[0])
|
89 |
+
general_dataframe = pd.concat([general_dataframe, new_row_df], ignore_index=True)
|
90 |
+
general_dataframe.to_csv("../data/general.csv",index=False)
|
91 |
+
|
92 |
+
add_model('mteb_metadata.yaml')
|
93 |
+
|
add_new_model/metadata_example/mteb_metadata.yaml
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- mteb
|
4 |
+
model-index:
|
5 |
+
- name: sentence-t5-xl
|
6 |
+
results:
|
7 |
+
- task:
|
8 |
+
type: Classification
|
9 |
+
dataset:
|
10 |
+
type: mteb/amazon_reviews_multi
|
11 |
+
name: MTEB AmazonReviewsClassification (es)
|
12 |
+
config: es
|
13 |
+
split: test
|
14 |
+
revision: 1399c76144fd37290681b995c656ef9b2e06e26d
|
15 |
+
metrics:
|
16 |
+
- type: accuracy
|
17 |
+
value: 45.007999999999996
|
18 |
+
- type: f1
|
19 |
+
value: 41.6679637623569
|
20 |
+
- task:
|
21 |
+
type: Classification
|
22 |
+
dataset:
|
23 |
+
type: mteb/mtop_domain
|
24 |
+
name: MTEB MTOPDomainClassification (es)
|
25 |
+
config: es
|
26 |
+
split: test
|
27 |
+
revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
|
28 |
+
metrics:
|
29 |
+
- type: accuracy
|
30 |
+
value: 85.32354903268846
|
31 |
+
- type: f1
|
32 |
+
value: 85.23439986563692
|
33 |
+
- task:
|
34 |
+
type: Classification
|
35 |
+
dataset:
|
36 |
+
type: mteb/mtop_intent
|
37 |
+
name: MTEB MTOPIntentClassification (es)
|
38 |
+
config: es
|
39 |
+
split: test
|
40 |
+
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
|
41 |
+
metrics:
|
42 |
+
- type: accuracy
|
43 |
+
value: 57.384923282188126
|
44 |
+
- type: f1
|
45 |
+
value: 38.1008046822733
|
46 |
+
- task:
|
47 |
+
type: Classification
|
48 |
+
dataset:
|
49 |
+
type: mteb/amazon_massive_intent
|
50 |
+
name: MTEB MassiveIntentClassification (es)
|
51 |
+
config: es
|
52 |
+
split: test
|
53 |
+
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
|
54 |
+
metrics:
|
55 |
+
- type: accuracy
|
56 |
+
value: 57.96906523201076
|
57 |
+
- type: f1
|
58 |
+
value: 57.053434089481605
|
59 |
+
- task:
|
60 |
+
type: Classification
|
61 |
+
dataset:
|
62 |
+
type: mteb/amazon_massive_scenario
|
63 |
+
name: MTEB MassiveScenarioClassification (es)
|
64 |
+
config: es
|
65 |
+
split: test
|
66 |
+
revision: 7d571f92784cd94a019292a1f45445077d0ef634
|
67 |
+
metrics:
|
68 |
+
- type: accuracy
|
69 |
+
value: 62.51513113651648
|
70 |
+
- type: f1
|
71 |
+
value: 61.428522227301464
|
72 |
+
- task:
|
73 |
+
type: STS
|
74 |
+
dataset:
|
75 |
+
type: mteb/sts17-crosslingual-sts
|
76 |
+
name: MTEB STS17 (es-es)
|
77 |
+
config: es-es
|
78 |
+
split: test
|
79 |
+
revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
|
80 |
+
metrics:
|
81 |
+
- type: cos_sim_pearson
|
82 |
+
value: 83.7632102444147
|
83 |
+
- type: cos_sim_spearman
|
84 |
+
value: 83.41808607885294
|
85 |
+
- type: euclidean_pearson
|
86 |
+
value: 84.2318059368248
|
87 |
+
- type: euclidean_spearman
|
88 |
+
value: 83.41874306738518
|
89 |
+
- type: manhattan_pearson
|
90 |
+
value: 84.31088958713279
|
91 |
+
- type: manhattan_spearman
|
92 |
+
value: 83.41585915763147
|
93 |
+
- task:
|
94 |
+
type: STS
|
95 |
+
dataset:
|
96 |
+
type: mteb/sts22-crosslingual-sts
|
97 |
+
name: MTEB STS22 (es)
|
98 |
+
config: es
|
99 |
+
split: test
|
100 |
+
revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
|
101 |
+
metrics:
|
102 |
+
- type: cos_sim_pearson
|
103 |
+
value: 49.40174203847368
|
104 |
+
- type: cos_sim_spearman
|
105 |
+
value: 58.15600173312334
|
106 |
+
- type: euclidean_pearson
|
107 |
+
value: 53.967323698454365
|
108 |
+
- type: euclidean_spearman
|
109 |
+
value: 58.15600173312334
|
110 |
+
- type: manhattan_pearson
|
111 |
+
value: 53.89976435331337
|
112 |
+
- type: manhattan_spearman
|
113 |
+
value: 58.187134535671284
|
114 |
+
---
|
app.py
CHANGED
@@ -2,9 +2,29 @@ import gradio as gr
|
|
2 |
import pandas as pd
|
3 |
|
4 |
block = gr.Blocks()
|
5 |
-
NUM_DATASETS =
|
6 |
NUM_SCORES = 0
|
7 |
-
NUM_MODELS =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
with block:
|
10 |
gr.Markdown(f"""**Leaderboard de modelos de Embeddings en español
|
@@ -24,16 +44,7 @@ with block:
|
|
24 |
- **Idioma:** Español
|
25 |
""")
|
26 |
with gr.Row():
|
27 |
-
overall =
|
28 |
-
'Model name': ['hiiamsid/sentence_similarity_spanish_es', 'clibrain/paraphrase-multilingual-mpnet-base-v2-ft-stsb_multi_mt-embeddings', 'intfloat/mulilingua-e5-large','hackathon-pln-es/paraphrase-spanish-distilroberta'],
|
29 |
-
'Model Size (GB)': 100,
|
30 |
-
'Embedding Dimensions': 100,
|
31 |
-
'Average': 56,
|
32 |
-
'Classification Average': 55,
|
33 |
-
"Clustering Average": 50,
|
34 |
-
'STS Average': 40,
|
35 |
-
"Retrieval Average": 30
|
36 |
-
})
|
37 |
data_overall = gr.components.Dataframe(
|
38 |
overall,
|
39 |
type="pandas",
|
@@ -42,14 +53,7 @@ with block:
|
|
42 |
with gr.TabItem("Classification"):
|
43 |
with gr.Row():
|
44 |
# Create and display a sample DataFrame
|
45 |
-
classification =
|
46 |
-
'Model name': ['hiiamsid/sentence_similarity_spanish_es', 'clibrain/paraphrase-multilingual-mpnet-base-v2-ft-stsb_multi_mt-embeddings', 'intfloat/mulilingua-e5-large','hackathon-pln-es/paraphrase-spanish-distilroberta'],
|
47 |
-
'AmazonReviewsClassification': 100,
|
48 |
-
'MTOPDomainClassification': 100,
|
49 |
-
'MassiveIntentClassification': 56,
|
50 |
-
'MassiveScenarioClassification': 55,
|
51 |
-
"MTOPIntentClassification": 50,
|
52 |
-
})
|
53 |
data_overall = gr.components.Dataframe(
|
54 |
classification,
|
55 |
type="pandas",
|
@@ -58,11 +62,7 @@ with block:
|
|
58 |
with gr.TabItem("STS"):
|
59 |
with gr.Row():
|
60 |
# Create and display a sample DataFrame
|
61 |
-
sts =
|
62 |
-
'Model name': ['hiiamsid/sentence_similarity_spanish_es', 'clibrain/paraphrase-multilingual-mpnet-base-v2-ft-stsb_multi_mt-embeddings', 'intfloat/mulilingua-e5-large','hackathon-pln-es/paraphrase-spanish-distilroberta'],
|
63 |
-
'STS22': 100,
|
64 |
-
'STS17': 100,
|
65 |
-
})
|
66 |
data_overall = gr.components.Dataframe(
|
67 |
sts,
|
68 |
type="pandas",
|
@@ -70,3 +70,4 @@ with block:
|
|
70 |
)
|
71 |
|
72 |
block.launch()
|
|
|
|
2 |
import pandas as pd
|
3 |
|
4 |
block = gr.Blocks()
|
5 |
+
NUM_DATASETS = 7
|
6 |
NUM_SCORES = 0
|
7 |
+
NUM_MODELS = 5
|
8 |
+
|
9 |
+
def general_dataframe_update():
|
10 |
+
"""
|
11 |
+
Returns general dataframe for general table.
|
12 |
+
"""
|
13 |
+
dataframe = pd.read_csv('data/general.csv')
|
14 |
+
return dataframe
|
15 |
+
|
16 |
+
def classification_dataframe_update():
|
17 |
+
"""
|
18 |
+
Returns classification dataframe for classification table.
|
19 |
+
"""
|
20 |
+
dataframe = pd.read_csv('data/classification.csv')
|
21 |
+
return dataframe
|
22 |
+
def sts_dataframe_udpate():
|
23 |
+
"""
|
24 |
+
Returns sts dataframe for sts table.
|
25 |
+
"""
|
26 |
+
dataframe = pd.read_csv('data/sts.csv')
|
27 |
+
return dataframe
|
28 |
|
29 |
with block:
|
30 |
gr.Markdown(f"""**Leaderboard de modelos de Embeddings en español
|
|
|
44 |
- **Idioma:** Español
|
45 |
""")
|
46 |
with gr.Row():
|
47 |
+
overall = general_dataframe_update()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
data_overall = gr.components.Dataframe(
|
49 |
overall,
|
50 |
type="pandas",
|
|
|
53 |
with gr.TabItem("Classification"):
|
54 |
with gr.Row():
|
55 |
# Create and display a sample DataFrame
|
56 |
+
classification = classification_dataframe_update()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
data_overall = gr.components.Dataframe(
|
58 |
classification,
|
59 |
type="pandas",
|
|
|
62 |
with gr.TabItem("STS"):
|
63 |
with gr.Row():
|
64 |
# Create and display a sample DataFrame
|
65 |
+
sts = sts_dataframe_udpate()
|
|
|
|
|
|
|
|
|
66 |
data_overall = gr.components.Dataframe(
|
67 |
sts,
|
68 |
type="pandas",
|
|
|
70 |
)
|
71 |
|
72 |
block.launch()
|
73 |
+
|
data/classification.csv
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model name,MTEB AmazonReviewsClassification (es),MTEB MTOPDomainClassification (es),MTEB MTOPIntentClassification (es),MTEB MassiveIntentClassification (es),MTEB MassiveScenarioClassification (es)
|
2 |
+
multilingual-e5-large,42.66,89.95,66.84,64.68,68.85
|
3 |
+
bge-small-en-v1.5,32.03,76.93,52.15,48.77,54.42
|
4 |
+
multilingual-e5-base,42.47,89.62,60.27,60.51,66.52
|
5 |
+
multilingual-e5-small,41.3,87.33,55.87,58.06,63.1
|
6 |
+
paraphrase-multilingual-mpnet-base-v2,39.99,86.96,66.59,64.43,70.42
|
7 |
+
sentence-t5-large,42.89,80.78,52.07,54.1,59.56
|
8 |
+
sentence-t5-xl,45.01,85.32,57.38,57.97,62.52
|
data/general.csv
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model name,Model Size (GB),Embedding Dimensions,Average,Classification Average,Clustering Average,STS Average,Retrieval Average
|
2 |
+
multilingual-e5-large,,,72.22,66.6,,77.83,
|
3 |
+
bge-small-en-v1.5,,,59.73,52.86,,66.6,
|
4 |
+
multilingual-e5-base,,,70.7,63.88,,77.53,
|
5 |
+
multilingual-e5-small,,,68.64,61.13,,76.15,
|
6 |
+
paraphrase-multilingual-mpnet-base-v2,,,69.1,65.68,,72.53,
|
7 |
+
sentence-t5-large,,,64.04,57.88,,70.21,
|
8 |
+
sentence-t5-xl,,,66.22,61.64,,70.79,
|
data/sts.csv
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model name,MTEB STS17 (es-es),MTEB STS22 (es)
|
2 |
+
multilingual-e5-large,87.42,68.23
|
3 |
+
bge-small-en-v1.5,77.73,55.47
|
4 |
+
multilingual-e5-base,87.26,67.79
|
5 |
+
multilingual-e5-small,85.27,67.04
|
6 |
+
paraphrase-multilingual-mpnet-base-v2,85.14,59.91
|
7 |
+
sentence-t5-large,82.74,57.68
|
8 |
+
sentence-t5-xl,83.42,58.16
|