Spaces:
Runtime error
Runtime error
Santi Diana
commited on
Commit
•
ae9059f
1
Parent(s):
d3cd271
Added clustering task
Browse files- add_new_model/add_new_model.py +32 -7
- data/clustering.csv +3 -0
add_new_model/add_new_model.py
CHANGED
@@ -18,7 +18,7 @@ def add_model():
|
|
18 |
|
19 |
"""
|
20 |
# Initialize an empty DataFrame
|
21 |
-
df = pd.DataFrame(columns=['dataset_name', 'Accuracy', 'Spearman',
|
22 |
|
23 |
metadata_archive = 'mteb_metadata.yaml'
|
24 |
|
@@ -46,17 +46,27 @@ def add_model():
|
|
46 |
spearman = next((metric.get('value') for metric in results_list[i].get('metrics', []) if metric.get('type') == 'cos_sim_spearman'), None)
|
47 |
row['Spearman'] = spearman
|
48 |
row["Category"] = "STS"
|
49 |
-
|
|
|
|
|
|
|
50 |
# Append the row to the DataFrame using pd.concat
|
51 |
new_df = pd.DataFrame([row])
|
52 |
df = pd.concat([df, new_df], ignore_index=True)
|
53 |
|
|
|
|
|
|
|
54 |
df['Accuracy'] = pd.to_numeric(df['Accuracy'], errors='coerce')
|
55 |
classification_average = round(df.loc[df['Category'] == 'Classification', 'Accuracy'].mean(),2)
|
56 |
|
57 |
df['Spearman'] = pd.to_numeric(df['Spearman'], errors='coerce')
|
58 |
sts_spearman_average = round(df.loc[df['Category'] == 'STS', 'Spearman'].mean(),2)
|
59 |
|
|
|
|
|
|
|
|
|
60 |
## CLASSIFICATION
|
61 |
classification_dataframe = pd.read_csv('../data/classification.csv')
|
62 |
classification_df = df[df['Category']== 'Classification']
|
@@ -84,12 +94,26 @@ def add_model():
|
|
84 |
sts_dataframe = pd.concat([sts_dataframe,new_row_df],ignore_index=True)
|
85 |
sts_dataframe.to_csv('../data/sts.csv',index=False)
|
86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
## GENERAL
|
88 |
general_dataframe = pd.read_csv("../data/general.csv")
|
89 |
|
90 |
average = round(np.mean([classification_average,sts_spearman_average]),2)
|
91 |
## TODO: solucionar la meta-data como Model Size o Embedding Dimensions.
|
92 |
-
new_instance = {'Model name':model_name, 'Model Size (GB)': None, 'Embedding Dimensions': None, 'Average':average, 'Classification Average': classification_average, 'Clustering Average':
|
93 |
new_row_df = pd.DataFrame(new_instance, index=[0])
|
94 |
general_dataframe = pd.concat([general_dataframe, new_row_df], ignore_index=True)
|
95 |
general_dataframe.to_csv("../data/general.csv",index=False)
|
@@ -191,17 +215,17 @@ def results_to_yaml(results_folder):
|
|
191 |
|
192 |
|
193 |
def main():
|
194 |
-
|
195 |
if args.execute_eval:
|
196 |
output_folder = evaluate(args.model_id)
|
197 |
-
results_to_yaml(output_folder)
|
198 |
add_model()
|
199 |
else:
|
200 |
-
if args.output_folder == None:
|
201 |
raise ValueError("You must indicate where your results are located")
|
202 |
else:
|
203 |
-
results_to_yaml(args.output_folder)
|
204 |
add_model()
|
|
|
205 |
|
206 |
|
207 |
if __name__ == "__main__":
|
@@ -210,6 +234,7 @@ if __name__ == "__main__":
|
|
210 |
parser.add_argument("--model_id", type=str, required=True, help="HuggingFace model path that you want to evaluate.")
|
211 |
parser.add_argument("--execute_eval",type=bool, default=False, help="Select if you want to execute evaluation.")
|
212 |
parser.add_argument("--output_folder", type=str, help = "Select the folder in which the results are stored.")
|
|
|
213 |
args = parser.parse_args()
|
214 |
main()
|
215 |
|
|
|
18 |
|
19 |
"""
|
20 |
# Initialize an empty DataFrame
|
21 |
+
df = pd.DataFrame(columns=['dataset_name', 'Accuracy', 'Spearman','V_measure', 'Category'])
|
22 |
|
23 |
metadata_archive = 'mteb_metadata.yaml'
|
24 |
|
|
|
46 |
spearman = next((metric.get('value') for metric in results_list[i].get('metrics', []) if metric.get('type') == 'cos_sim_spearman'), None)
|
47 |
row['Spearman'] = spearman
|
48 |
row["Category"] = "STS"
|
49 |
+
elif task_name == "Clustering":
|
50 |
+
v_measure = next((metric.get('value') for metric in results_list[i].get('metrics', []) if metric.get('type') == 'v_measure'), None)
|
51 |
+
row['V_measure'] = v_measure
|
52 |
+
row['Category'] = "Clustering"
|
53 |
# Append the row to the DataFrame using pd.concat
|
54 |
new_df = pd.DataFrame([row])
|
55 |
df = pd.concat([df, new_df], ignore_index=True)
|
56 |
|
57 |
+
print(df)
|
58 |
+
|
59 |
+
|
60 |
df['Accuracy'] = pd.to_numeric(df['Accuracy'], errors='coerce')
|
61 |
classification_average = round(df.loc[df['Category'] == 'Classification', 'Accuracy'].mean(),2)
|
62 |
|
63 |
df['Spearman'] = pd.to_numeric(df['Spearman'], errors='coerce')
|
64 |
sts_spearman_average = round(df.loc[df['Category'] == 'STS', 'Spearman'].mean(),2)
|
65 |
|
66 |
+
df['V_measure'] = pd.to_numeric(df['V_measure'], errors='coerce')
|
67 |
+
clustering_v_measure_average = round(df.loc[df['Category'] == 'Clustering', 'V_measure'].mean(),2)
|
68 |
+
|
69 |
+
|
70 |
## CLASSIFICATION
|
71 |
classification_dataframe = pd.read_csv('../data/classification.csv')
|
72 |
classification_df = df[df['Category']== 'Classification']
|
|
|
94 |
sts_dataframe = pd.concat([sts_dataframe,new_row_df],ignore_index=True)
|
95 |
sts_dataframe.to_csv('../data/sts.csv',index=False)
|
96 |
|
97 |
+
## Clustering
|
98 |
+
clustering_dataframe = pd.read_csv("../data/clustering.csv")
|
99 |
+
clustering_df = df[df['Category']=='Clustering']
|
100 |
+
new_row_data = {'Model name': model_name, 'Average': clustering_v_measure_average}
|
101 |
+
for index, row in clustering_df.iterrows():
|
102 |
+
column_name = row['dataset_name']
|
103 |
+
v_measure_value = row['V_measure']
|
104 |
+
new_row_data[column_name] = round(v_measure_value,2)
|
105 |
+
|
106 |
+
new_row_df = pd.DataFrame(new_row_data,index = [0])
|
107 |
+
clustering_dataframe = pd.concat([clustering_dataframe,new_row_df],ignore_index=True)
|
108 |
+
clustering_dataframe.to_csv('../data/clustering.csv',index=False)
|
109 |
+
|
110 |
+
|
111 |
## GENERAL
|
112 |
general_dataframe = pd.read_csv("../data/general.csv")
|
113 |
|
114 |
average = round(np.mean([classification_average,sts_spearman_average]),2)
|
115 |
## TODO: solucionar la meta-data como Model Size o Embedding Dimensions.
|
116 |
+
new_instance = {'Model name':model_name, 'Model Size (GB)': None, 'Embedding Dimensions': None, 'Average':average, 'Classification Average': classification_average, 'Clustering Average': clustering_v_measure_average, 'STS Average': sts_spearman_average, 'Retrieval Average': None}
|
117 |
new_row_df = pd.DataFrame(new_instance, index=[0])
|
118 |
general_dataframe = pd.concat([general_dataframe, new_row_df], ignore_index=True)
|
119 |
general_dataframe.to_csv("../data/general.csv",index=False)
|
|
|
215 |
|
216 |
|
217 |
def main():
|
|
|
218 |
if args.execute_eval:
|
219 |
output_folder = evaluate(args.model_id)
|
220 |
+
#results_to_yaml(output_folder)
|
221 |
add_model()
|
222 |
else:
|
223 |
+
if args.output_folder == None and args.already_yaml == False:
|
224 |
raise ValueError("You must indicate where your results are located")
|
225 |
else:
|
226 |
+
#results_to_yaml(args.output_folder)
|
227 |
add_model()
|
228 |
+
print('Model added')
|
229 |
|
230 |
|
231 |
if __name__ == "__main__":
|
|
|
234 |
parser.add_argument("--model_id", type=str, required=True, help="HuggingFace model path that you want to evaluate.")
|
235 |
parser.add_argument("--execute_eval",type=bool, default=False, help="Select if you want to execute evaluation.")
|
236 |
parser.add_argument("--output_folder", type=str, help = "Select the folder in which the results are stored.")
|
237 |
+
parser.add_argument("--already_yaml",default=False, help="Select if you already have the yaml file.")
|
238 |
args = parser.parse_args()
|
239 |
main()
|
240 |
|
data/clustering.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
Model name,Average,MTEB BiorxivClusteringS2S_Spanish,MTEB RedditClusteringSpanish
|
2 |
+
multilingual-e5-LARGE-STSAUGMENTED-b16-e5,33.86,26.64,41.07
|
3 |
+
multilingual-e5-LARGE-STSAUGMENTED-b16-e5,33.86,26.64,41.07
|