Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
eduagarcia
commited on
Commit
•
7d8dd52
1
Parent(s):
c43476e
fix f1_score calculation bug
Browse files- external_models_results.json +23 -23
external_models_results.json
CHANGED
@@ -14,12 +14,12 @@
|
|
14 |
"assin2_sts": 0.7053302344881672,
|
15 |
"assin2_rte": 0.9121728362223306,
|
16 |
"faquad_nli": 0.7575848453041435,
|
17 |
-
"hatebr_offensive": 0.
|
18 |
-
"portuguese_hate_speech": 0.
|
19 |
-
"tweetsentbr": 0.
|
20 |
},
|
21 |
-
"result_metrics_average": 0.
|
22 |
-
"result_metrics_npm": 0.
|
23 |
},
|
24 |
{
|
25 |
"model": "sabia-2-medium",
|
@@ -82,10 +82,10 @@
|
|
82 |
"faquad_nli": 0.6340996599941455,
|
83 |
"hatebr_offensive": 0.8023698759439051,
|
84 |
"portuguese_hate_speech": 0.7342166269560177,
|
85 |
-
"tweetsentbr": 0.
|
86 |
},
|
87 |
-
"result_metrics_average": 0.
|
88 |
-
"result_metrics_npm": 0.
|
89 |
},
|
90 |
{
|
91 |
"model": "gemini-1.0-pro",
|
@@ -122,14 +122,14 @@
|
|
122 |
"bluex": 0.7719054242002782,
|
123 |
"oab_exams": 0.6888382687927107,
|
124 |
"assin2_sts": 0.8159702278408203,
|
125 |
-
"assin2_rte": 0.
|
126 |
-
"faquad_nli": 0.
|
127 |
-
"hatebr_offensive": 0.
|
128 |
-
"portuguese_hate_speech": 0.
|
129 |
-
"tweetsentbr": 0.
|
130 |
},
|
131 |
-
"result_metrics_average": 0.
|
132 |
-
"result_metrics_npm": 0.
|
133 |
},
|
134 |
{
|
135 |
"model": "deepseek-v2-chat",
|
@@ -256,14 +256,14 @@
|
|
256 |
"bluex": 0.8011126564673157,
|
257 |
"oab_exams": 0.7640091116173121,
|
258 |
"assin2_sts": 0.7888441732870783,
|
259 |
-
"assin2_rte": 0.
|
260 |
"faquad_nli": 0.825063276593557,
|
261 |
"hatebr_offensive": 0.9073940659389119,
|
262 |
"portuguese_hate_speech": 0.7191480935512969,
|
263 |
"tweetsentbr": 0.7821434639106575
|
264 |
},
|
265 |
-
"result_metrics_average": 0.
|
266 |
-
"result_metrics_npm": 0.
|
267 |
},
|
268 |
{
|
269 |
"model": "sabia-3",
|
@@ -280,11 +280,11 @@
|
|
280 |
"assin2_sts": 0.8253863689009022,
|
281 |
"assin2_rte": 0.9477034821619312,
|
282 |
"faquad_nli": 0.8243848812618203,
|
283 |
-
"hatebr_offensive": 0.
|
284 |
-
"portuguese_hate_speech": 0.
|
285 |
-
"tweetsentbr": 0.
|
286 |
},
|
287 |
-
"result_metrics_average": 0.
|
288 |
-
"result_metrics_npm": 0.
|
289 |
}
|
290 |
]
|
|
|
14 |
"assin2_sts": 0.7053302344881672,
|
15 |
"assin2_rte": 0.9121728362223306,
|
16 |
"faquad_nli": 0.7575848453041435,
|
17 |
+
"hatebr_offensive": 0.753800795680591,
|
18 |
+
"portuguese_hate_speech": 0.6975326368290793,
|
19 |
+
"tweetsentbr": 0.7119699374276466
|
20 |
},
|
21 |
+
"result_metrics_average": 0.7163399980921773,
|
22 |
+
"result_metrics_npm": 0.5744541501392351
|
23 |
},
|
24 |
{
|
25 |
"model": "sabia-2-medium",
|
|
|
82 |
"faquad_nli": 0.6340996599941455,
|
83 |
"hatebr_offensive": 0.8023698759439051,
|
84 |
"portuguese_hate_speech": 0.7342166269560177,
|
85 |
+
"tweetsentbr": 0.7303315733000207
|
86 |
},
|
87 |
+
"result_metrics_average": 0.7415141327519107,
|
88 |
+
"result_metrics_npm": 0.6037151240886439
|
89 |
},
|
90 |
{
|
91 |
"model": "gemini-1.0-pro",
|
|
|
122 |
"bluex": 0.7719054242002782,
|
123 |
"oab_exams": 0.6888382687927107,
|
124 |
"assin2_sts": 0.8159702278408203,
|
125 |
+
"assin2_rte": 0.4651063829787234,
|
126 |
+
"faquad_nli": 0.4114285714285714,
|
127 |
+
"hatebr_offensive": 0.4422336328626444,
|
128 |
+
"portuguese_hate_speech": 0.41441441441441434,
|
129 |
+
"tweetsentbr": 0.7725066133902373
|
130 |
},
|
131 |
+
"result_metrics_average": 0.6259275836103805,
|
132 |
+
"result_metrics_npm": 0.3545226679161642
|
133 |
},
|
134 |
{
|
135 |
"model": "deepseek-v2-chat",
|
|
|
256 |
"bluex": 0.8011126564673157,
|
257 |
"oab_exams": 0.7640091116173121,
|
258 |
"assin2_sts": 0.7888441732870783,
|
259 |
+
"assin2_rte": 0.9476445477916471,
|
260 |
"faquad_nli": 0.825063276593557,
|
261 |
"hatebr_offensive": 0.9073940659389119,
|
262 |
"portuguese_hate_speech": 0.7191480935512969,
|
263 |
"tweetsentbr": 0.7821434639106575
|
264 |
},
|
265 |
+
"result_metrics_average": 0.8208559650965292,
|
266 |
+
"result_metrics_npm": 0.7286932366792048
|
267 |
},
|
268 |
{
|
269 |
"model": "sabia-3",
|
|
|
280 |
"assin2_sts": 0.8253863689009022,
|
281 |
"assin2_rte": 0.9477034821619312,
|
282 |
"faquad_nli": 0.8243848812618203,
|
283 |
+
"hatebr_offensive": 0.8278737774590023,
|
284 |
+
"portuguese_hate_speech": 0.7241071428571428,
|
285 |
+
"tweetsentbr": 0.7510613086648664
|
286 |
},
|
287 |
+
"result_metrics_average": 0.8231799251828895,
|
288 |
+
"result_metrics_npm": 0.7241097388486535
|
289 |
}
|
290 |
]
|