shahrukhx01
commited on
Commit
路
240ea72
1
Parent(s):
0aae977
Update README.md
Browse files
README.md
CHANGED
@@ -1,4 +1,20 @@
|
|
1 |
-
BoolQ Validation dataset Evaluation: <br/>
|
2 |
support => 3270 <br/>
|
3 |
accuracy => 0.73 <br/>
|
4 |
-
macro f1 => 0.71
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## BoolQ Validation dataset Evaluation: <br/>
|
2 |
support => 3270 <br/>
|
3 |
accuracy => 0.73 <br/>
|
4 |
+
macro f1 => 0.71
|
5 |
+
|
6 |
+
## SQuAD Validation dataset Evaluation: <br/>
|
7 |
+
eval_HasAns_exact = 55.9885<br/>
|
8 |
+
eval_HasAns_f1 = 70.4997<br/>
|
9 |
+
eval_HasAns_total = 5928<br/>
|
10 |
+
eval_NoAns_exact = 20.5719<br/>
|
11 |
+
eval_NoAns_f1 = 20.5719<br/>
|
12 |
+
eval_NoAns_total = 5945<br/>
|
13 |
+
eval_best_exact = 50.0969<br/>
|
14 |
+
eval_best_exact_thresh = 0.0<br/>
|
15 |
+
eval_best_f1 = 50.1<br/>
|
16 |
+
eval_best_f1_thresh = 0.0<br/>
|
17 |
+
eval_exact = 38.2549<br/>
|
18 |
+
eval_f1 = 45.5<br/>
|
19 |
+
eval_samples = 12165<br/>
|
20 |
+
eval_total = 11873
|