| task,metric,value,err,version | |
| anli_r1,acc,0.35,0.015090650341444233,0 | |
| anli_r2,acc,0.361,0.015195720118175127,0 | |
| anli_r3,acc,0.31416666666666665,0.013405399314984101,0 | |
| arc_challenge,acc,0.2764505119453925,0.013069662474252425,0 | |
| arc_challenge,acc_norm,0.2977815699658703,0.013363080107244487,0 | |
| arc_easy,acc,0.5942760942760943,0.010075755540128873,0 | |
| arc_easy,acc_norm,0.5728114478114478,0.010150415974210875,0 | |
| boolq,acc,0.5929663608562691,0.00859256288706887,1 | |
| cb,acc,0.39285714285714285,0.0658538889806635,1 | |
| cb,f1,0.3300527326188914,,1 | |
| copa,acc,0.7,0.046056618647183814,0 | |
| hellaswag,acc,0.45180242979486157,0.004966544724452228,0 | |
| hellaswag,acc_norm,0.6045608444532962,0.00487945547466382,0 | |
| piqa,acc,0.7453754080522307,0.01016443223706049,0 | |
| piqa,acc_norm,0.7551686615886833,0.01003230910556881,0 | |
| rte,acc,0.5342960288808665,0.030025579819366426,0 | |
| sciq,acc,0.898,0.009575368801653897,0 | |
| sciq,acc_norm,0.892,0.009820001651345703,0 | |
| storycloze_2016,acc,0.6985569214323891,0.010611646032767588,0 | |
| winogrande,acc,0.5603788476716653,0.013949649776015694,0 | |