Update README.md
Browse files
README.md
CHANGED
@@ -93,7 +93,29 @@ BigBench:
|
|
93 |
|bigbench_tracking_shuffled_objects_seven_objects| 0|multiple_choice_grade|0.1509|± |0.0086|
|
94 |
|bigbench_tracking_shuffled_objects_three_objects| 0|multiple_choice_grade|0.3833|± |0.0281|
|
95 |
Average: 0.3367
|
96 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
TruthfulQA:
|
99 |
```
|
|
|
93 |
|bigbench_tracking_shuffled_objects_seven_objects| 0|multiple_choice_grade|0.1509|± |0.0086|
|
94 |
|bigbench_tracking_shuffled_objects_three_objects| 0|multiple_choice_grade|0.3833|± |0.0281|
|
95 |
Average: 0.3367
|
96 |
+
```
|
97 |
+
|
98 |
+
AGI Eval
|
99 |
+
```
|
100 |
+
| Task |Version| Metric |Value | |Stderr|
|
101 |
+
|------------------------------|------:|--------|-----:|---|-----:|
|
102 |
+
|agieval_aqua_rat | 0|acc |0.2441|± |0.0270|
|
103 |
+
| | |acc_norm|0.2402|± |0.0269|
|
104 |
+
|agieval_logiqa_en | 0|acc |0.2458|± |0.0169|
|
105 |
+
| | |acc_norm|0.2965|± |0.0179|
|
106 |
+
|agieval_lsat_ar | 0|acc |0.2522|± |0.0287|
|
107 |
+
| | |acc_norm|0.2130|± |0.0271|
|
108 |
+
|agieval_lsat_lr | 0|acc |0.2745|± |0.0198|
|
109 |
+
| | |acc_norm|0.2686|± |0.0196|
|
110 |
+
|agieval_lsat_rc | 0|acc |0.2900|± |0.0277|
|
111 |
+
| | |acc_norm|0.2379|± |0.0260|
|
112 |
+
|agieval_sat_en | 0|acc |0.4466|± |0.0347|
|
113 |
+
| | |acc_norm|0.3738|± |0.0338|
|
114 |
+
|agieval_sat_en_without_passage| 0|acc |0.3738|± |0.0338|
|
115 |
+
| | |acc_norm|0.3301|± |0.0328|
|
116 |
+
|agieval_sat_math | 0|acc |0.2318|± |0.0285|
|
117 |
+
| | |acc_norm|0.1864|± |0.0263|
|
118 |
+
Average: 0.2683```
|
119 |
|
120 |
TruthfulQA:
|
121 |
```
|