Santi Diana commited on
Commit
ae9059f
1 Parent(s): d3cd271

Added clustering task

Browse files
add_new_model/add_new_model.py CHANGED
@@ -18,7 +18,7 @@ def add_model():
18
 
19
  """
20
  # Initialize an empty DataFrame
21
- df = pd.DataFrame(columns=['dataset_name', 'Accuracy', 'Spearman', "Category"])
22
 
23
  metadata_archive = 'mteb_metadata.yaml'
24
 
@@ -46,17 +46,27 @@ def add_model():
46
  spearman = next((metric.get('value') for metric in results_list[i].get('metrics', []) if metric.get('type') == 'cos_sim_spearman'), None)
47
  row['Spearman'] = spearman
48
  row["Category"] = "STS"
49
-
 
 
 
50
  # Append the row to the DataFrame using pd.concat
51
  new_df = pd.DataFrame([row])
52
  df = pd.concat([df, new_df], ignore_index=True)
53
 
 
 
 
54
  df['Accuracy'] = pd.to_numeric(df['Accuracy'], errors='coerce')
55
  classification_average = round(df.loc[df['Category'] == 'Classification', 'Accuracy'].mean(),2)
56
 
57
  df['Spearman'] = pd.to_numeric(df['Spearman'], errors='coerce')
58
  sts_spearman_average = round(df.loc[df['Category'] == 'STS', 'Spearman'].mean(),2)
59
 
 
 
 
 
60
  ## CLASSIFICATION
61
  classification_dataframe = pd.read_csv('../data/classification.csv')
62
  classification_df = df[df['Category']== 'Classification']
@@ -84,12 +94,26 @@ def add_model():
84
  sts_dataframe = pd.concat([sts_dataframe,new_row_df],ignore_index=True)
85
  sts_dataframe.to_csv('../data/sts.csv',index=False)
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  ## GENERAL
88
  general_dataframe = pd.read_csv("../data/general.csv")
89
 
90
  average = round(np.mean([classification_average,sts_spearman_average]),2)
91
  ## TODO: solucionar la meta-data como Model Size o Embedding Dimensions.
92
- new_instance = {'Model name':model_name, 'Model Size (GB)': None, 'Embedding Dimensions': None, 'Average':average, 'Classification Average': classification_average, 'Clustering Average': None, 'STS Average': sts_spearman_average, 'Retrieval Average': None}
93
  new_row_df = pd.DataFrame(new_instance, index=[0])
94
  general_dataframe = pd.concat([general_dataframe, new_row_df], ignore_index=True)
95
  general_dataframe.to_csv("../data/general.csv",index=False)
@@ -191,17 +215,17 @@ def results_to_yaml(results_folder):
191
 
192
 
193
  def main():
194
-
195
  if args.execute_eval:
196
  output_folder = evaluate(args.model_id)
197
- results_to_yaml(output_folder)
198
  add_model()
199
  else:
200
- if args.output_folder == None:
201
  raise ValueError("You must indicate where your results are located")
202
  else:
203
- results_to_yaml(args.output_folder)
204
  add_model()
 
205
 
206
 
207
  if __name__ == "__main__":
@@ -210,6 +234,7 @@ if __name__ == "__main__":
210
  parser.add_argument("--model_id", type=str, required=True, help="HuggingFace model path that you want to evaluate.")
211
  parser.add_argument("--execute_eval",type=bool, default=False, help="Select if you want to execute evaluation.")
212
  parser.add_argument("--output_folder", type=str, help = "Select the folder in which the results are stored.")
 
213
  args = parser.parse_args()
214
  main()
215
 
 
18
 
19
  """
20
  # Initialize an empty DataFrame
21
+ df = pd.DataFrame(columns=['dataset_name', 'Accuracy', 'Spearman','V_measure', 'Category'])
22
 
23
  metadata_archive = 'mteb_metadata.yaml'
24
 
 
46
  spearman = next((metric.get('value') for metric in results_list[i].get('metrics', []) if metric.get('type') == 'cos_sim_spearman'), None)
47
  row['Spearman'] = spearman
48
  row["Category"] = "STS"
49
+ elif task_name == "Clustering":
50
+ v_measure = next((metric.get('value') for metric in results_list[i].get('metrics', []) if metric.get('type') == 'v_measure'), None)
51
+ row['V_measure'] = v_measure
52
+ row['Category'] = "Clustering"
53
  # Append the row to the DataFrame using pd.concat
54
  new_df = pd.DataFrame([row])
55
  df = pd.concat([df, new_df], ignore_index=True)
56
 
57
+ print(df)
58
+
59
+
60
  df['Accuracy'] = pd.to_numeric(df['Accuracy'], errors='coerce')
61
  classification_average = round(df.loc[df['Category'] == 'Classification', 'Accuracy'].mean(),2)
62
 
63
  df['Spearman'] = pd.to_numeric(df['Spearman'], errors='coerce')
64
  sts_spearman_average = round(df.loc[df['Category'] == 'STS', 'Spearman'].mean(),2)
65
 
66
+ df['V_measure'] = pd.to_numeric(df['V_measure'], errors='coerce')
67
+ clustering_v_measure_average = round(df.loc[df['Category'] == 'Clustering', 'V_measure'].mean(),2)
68
+
69
+
70
  ## CLASSIFICATION
71
  classification_dataframe = pd.read_csv('../data/classification.csv')
72
  classification_df = df[df['Category']== 'Classification']
 
94
  sts_dataframe = pd.concat([sts_dataframe,new_row_df],ignore_index=True)
95
  sts_dataframe.to_csv('../data/sts.csv',index=False)
96
 
97
+ ## Clustering
98
+ clustering_dataframe = pd.read_csv("../data/clustering.csv")
99
+ clustering_df = df[df['Category']=='Clustering']
100
+ new_row_data = {'Model name': model_name, 'Average': clustering_v_measure_average}
101
+ for index, row in clustering_df.iterrows():
102
+ column_name = row['dataset_name']
103
+ v_measure_value = row['V_measure']
104
+ new_row_data[column_name] = round(v_measure_value,2)
105
+
106
+ new_row_df = pd.DataFrame(new_row_data,index = [0])
107
+ clustering_dataframe = pd.concat([clustering_dataframe,new_row_df],ignore_index=True)
108
+ clustering_dataframe.to_csv('../data/clustering.csv',index=False)
109
+
110
+
111
  ## GENERAL
112
  general_dataframe = pd.read_csv("../data/general.csv")
113
 
114
  average = round(np.mean([classification_average,sts_spearman_average]),2)
115
  ## TODO: solucionar la meta-data como Model Size o Embedding Dimensions.
116
+ new_instance = {'Model name':model_name, 'Model Size (GB)': None, 'Embedding Dimensions': None, 'Average':average, 'Classification Average': classification_average, 'Clustering Average': clustering_v_measure_average, 'STS Average': sts_spearman_average, 'Retrieval Average': None}
117
  new_row_df = pd.DataFrame(new_instance, index=[0])
118
  general_dataframe = pd.concat([general_dataframe, new_row_df], ignore_index=True)
119
  general_dataframe.to_csv("../data/general.csv",index=False)
 
215
 
216
 
217
  def main():
 
218
  if args.execute_eval:
219
  output_folder = evaluate(args.model_id)
220
+ #results_to_yaml(output_folder)
221
  add_model()
222
  else:
223
+ if args.output_folder == None and args.already_yaml == False:
224
  raise ValueError("You must indicate where your results are located")
225
  else:
226
+ #results_to_yaml(args.output_folder)
227
  add_model()
228
+ print('Model added')
229
 
230
 
231
  if __name__ == "__main__":
 
234
  parser.add_argument("--model_id", type=str, required=True, help="HuggingFace model path that you want to evaluate.")
235
  parser.add_argument("--execute_eval",type=bool, default=False, help="Select if you want to execute evaluation.")
236
  parser.add_argument("--output_folder", type=str, help = "Select the folder in which the results are stored.")
237
+ parser.add_argument("--already_yaml",default=False, help="Select if you already have the yaml file.")
238
  args = parser.parse_args()
239
  main()
240
 
data/clustering.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Model name,Average,MTEB BiorxivClusteringS2S_Spanish,MTEB RedditClusteringSpanish
2
+ multilingual-e5-LARGE-STSAUGMENTED-b16-e5,33.86,26.64,41.07
3
+ multilingual-e5-LARGE-STSAUGMENTED-b16-e5,33.86,26.64,41.07