eduagarcia commited on
Commit
7d8dd52
1 Parent(s): c43476e

fix f1_score calculation bug

Browse files
Files changed (1) hide show
  1. external_models_results.json +23 -23
external_models_results.json CHANGED
@@ -14,12 +14,12 @@
14
  "assin2_sts": 0.7053302344881672,
15
  "assin2_rte": 0.9121728362223306,
16
  "faquad_nli": 0.7575848453041435,
17
- "hatebr_offensive": 0.5025338637870607,
18
- "portuguese_hate_speech": 0.4650217578860529,
19
- "tweetsentbr": 0.533977453070735
20
  },
21
- "result_metrics_average": 0.6428099652929031,
22
- "result_metrics_npm": 0.43960062672137007
23
  },
24
  {
25
  "model": "sabia-2-medium",
@@ -82,10 +82,10 @@
82
  "faquad_nli": 0.6340996599941455,
83
  "hatebr_offensive": 0.8023698759439051,
84
  "portuguese_hate_speech": 0.7342166269560177,
85
- "tweetsentbr": 0.5477486799750156
86
  },
87
- "result_metrics_average": 0.7212271446046878,
88
- "result_metrics_npm": 0.5735261536314672
89
  },
90
  {
91
  "model": "gemini-1.0-pro",
@@ -122,14 +122,14 @@
122
  "bluex": 0.7719054242002782,
123
  "oab_exams": 0.6888382687927107,
124
  "assin2_sts": 0.8159702278408203,
125
- "assin2_rte": 0.9328989988467518,
126
- "faquad_nli": 0.7290756302521009,
127
- "hatebr_offensive": 0.8697698647467024,
128
- "portuguese_hate_speech": 0.7539414414414414,
129
- "tweetsentbr": 0.772785080895884
130
  },
131
- "result_metrics_average": 0.7984588504001905,
132
- "result_metrics_npm": 0.6908188311933006
133
  },
134
  {
135
  "model": "deepseek-v2-chat",
@@ -256,14 +256,14 @@
256
  "bluex": 0.8011126564673157,
257
  "oab_exams": 0.7640091116173121,
258
  "assin2_sts": 0.7888441732870783,
259
- "assin2_rte": 0.6317630318610981,
260
  "faquad_nli": 0.825063276593557,
261
  "hatebr_offensive": 0.9073940659389119,
262
  "portuguese_hate_speech": 0.7191480935512969,
263
  "tweetsentbr": 0.7821434639106575
264
  },
265
- "result_metrics_average": 0.7857580188820238,
266
- "result_metrics_npm": 0.6584973442501938
267
  },
268
  {
269
  "model": "sabia-3",
@@ -280,11 +280,11 @@
280
  "assin2_sts": 0.8253863689009022,
281
  "assin2_rte": 0.9477034821619312,
282
  "faquad_nli": 0.8243848812618203,
283
- "hatebr_offensive": 0.5519158516393349,
284
- "portuguese_hate_speech": 0.48273809523809524,
285
- "tweetsentbr": 0.5632959814986498
286
  },
287
- "result_metrics_average": 0.744836336226786,
288
- "result_metrics_npm": 0.5802643096708316
289
  }
290
  ]
 
14
  "assin2_sts": 0.7053302344881672,
15
  "assin2_rte": 0.9121728362223306,
16
  "faquad_nli": 0.7575848453041435,
17
+ "hatebr_offensive": 0.753800795680591,
18
+ "portuguese_hate_speech": 0.6975326368290793,
19
+ "tweetsentbr": 0.7119699374276466
20
  },
21
+ "result_metrics_average": 0.7163399980921773,
22
+ "result_metrics_npm": 0.5744541501392351
23
  },
24
  {
25
  "model": "sabia-2-medium",
 
82
  "faquad_nli": 0.6340996599941455,
83
  "hatebr_offensive": 0.8023698759439051,
84
  "portuguese_hate_speech": 0.7342166269560177,
85
+ "tweetsentbr": 0.7303315733000207
86
  },
87
+ "result_metrics_average": 0.7415141327519107,
88
+ "result_metrics_npm": 0.6037151240886439
89
  },
90
  {
91
  "model": "gemini-1.0-pro",
 
122
  "bluex": 0.7719054242002782,
123
  "oab_exams": 0.6888382687927107,
124
  "assin2_sts": 0.8159702278408203,
125
+ "assin2_rte": 0.4651063829787234,
126
+ "faquad_nli": 0.4114285714285714,
127
+ "hatebr_offensive": 0.4422336328626444,
128
+ "portuguese_hate_speech": 0.41441441441441434,
129
+ "tweetsentbr": 0.7725066133902373
130
  },
131
+ "result_metrics_average": 0.6259275836103805,
132
+ "result_metrics_npm": 0.3545226679161642
133
  },
134
  {
135
  "model": "deepseek-v2-chat",
 
256
  "bluex": 0.8011126564673157,
257
  "oab_exams": 0.7640091116173121,
258
  "assin2_sts": 0.7888441732870783,
259
+ "assin2_rte": 0.9476445477916471,
260
  "faquad_nli": 0.825063276593557,
261
  "hatebr_offensive": 0.9073940659389119,
262
  "portuguese_hate_speech": 0.7191480935512969,
263
  "tweetsentbr": 0.7821434639106575
264
  },
265
+ "result_metrics_average": 0.8208559650965292,
266
+ "result_metrics_npm": 0.7286932366792048
267
  },
268
  {
269
  "model": "sabia-3",
 
280
  "assin2_sts": 0.8253863689009022,
281
  "assin2_rte": 0.9477034821619312,
282
  "faquad_nli": 0.8243848812618203,
283
+ "hatebr_offensive": 0.8278737774590023,
284
+ "portuguese_hate_speech": 0.7241071428571428,
285
+ "tweetsentbr": 0.7510613086648664
286
  },
287
+ "result_metrics_average": 0.8231799251828895,
288
+ "result_metrics_npm": 0.7241097388486535
289
  }
290
  ]