|
{ |
|
"results": { |
|
"anli_r1": { |
|
"acc": 0.323, |
|
"acc_stderr": 0.014794927843348633 |
|
}, |
|
"anli_r2": { |
|
"acc": 0.317, |
|
"acc_stderr": 0.014721675438880236 |
|
}, |
|
"anli_r3": { |
|
"acc": 0.3625, |
|
"acc_stderr": 0.013883037874225516 |
|
}, |
|
"cb": { |
|
"acc": 0.35714285714285715, |
|
"acc_stderr": 0.06460957383809221, |
|
"f1": 0.19573820395738203 |
|
}, |
|
"copa": { |
|
"acc": 0.79, |
|
"acc_stderr": 0.040936018074033256 |
|
}, |
|
"hellaswag": { |
|
"acc": 0.4592710615415256, |
|
"acc_stderr": 0.004973199296339971, |
|
"acc_norm": 0.6106353316072496, |
|
"acc_norm_stderr": 0.00486609688094144 |
|
}, |
|
"rte": { |
|
"acc": 0.5126353790613718, |
|
"acc_stderr": 0.030086851767188564 |
|
}, |
|
"winogrande": { |
|
"acc": 0.585635359116022, |
|
"acc_stderr": 0.013844846232268565 |
|
}, |
|
"storycloze_2016": { |
|
"acc": 0.7194013896312133, |
|
"acc_stderr": 0.01038980964728882 |
|
}, |
|
"boolq": { |
|
"acc": 0.5755351681957187, |
|
"acc_stderr": 0.008644688121685498 |
|
}, |
|
"arc_easy": { |
|
"acc": 0.5942760942760943, |
|
"acc_stderr": 0.010075755540128873, |
|
"acc_norm": 0.5757575757575758, |
|
"acc_norm_stderr": 0.010141333654958552 |
|
}, |
|
"arc_challenge": { |
|
"acc": 0.2790102389078498, |
|
"acc_stderr": 0.013106784883601333, |
|
"acc_norm": 0.30802047781569963, |
|
"acc_norm_stderr": 0.013491429517292038 |
|
}, |
|
"sciq": { |
|
"acc": 0.835, |
|
"acc_stderr": 0.01174363286691616, |
|
"acc_norm": 0.788, |
|
"acc_norm_stderr": 0.01293148186493805 |
|
}, |
|
"piqa": { |
|
"acc": 0.7540805223068553, |
|
"acc_stderr": 0.010047331865625194, |
|
"acc_norm": 0.7589771490750816, |
|
"acc_norm_stderr": 0.009979042717267314 |
|
} |
|
}, |
|
"versions": { |
|
"anli_r1": 0, |
|
"anli_r2": 0, |
|
"anli_r3": 0, |
|
"cb": 1, |
|
"copa": 0, |
|
"hellaswag": 0, |
|
"rte": 0, |
|
"winogrande": 0, |
|
"storycloze_2016": 0, |
|
"boolq": 1, |
|
"arc_easy": 0, |
|
"arc_challenge": 0, |
|
"sciq": 0, |
|
"piqa": 0 |
|
} |
|
} |