|
task,metric,value,err,version
|
|
anli_r1,acc,0.332,0.014899597242811483,0
|
|
anli_r2,acc,0.337,0.014955087918653607,0
|
|
anli_r3,acc,0.3516666666666667,0.013789711695404792,0
|
|
arc_challenge,acc,0.24829351535836178,0.012624912868089762,0
|
|
arc_challenge,acc_norm,0.2713310580204778,0.012993807727545796,0
|
|
arc_easy,acc,0.5589225589225589,0.010188293221040569,0
|
|
arc_easy,acc_norm,0.494949494949495,0.010259260102565853,0
|
|
boolq,acc,0.5425076452599389,0.00871339478784262,1
|
|
cb,acc,0.30357142857142855,0.06199938655510753,1
|
|
cb,f1,0.2584656084656085,,1
|
|
copa,acc,0.79,0.040936018074033256,0
|
|
hellaswag,acc,0.4444333798048198,0.004958872288442143,0
|
|
hellaswag,acc_norm,0.5771758613821948,0.00492998369279507,0
|
|
piqa,acc,0.7464635473340587,0.010150090834551791,0
|
|
piqa,acc_norm,0.7540805223068553,0.01004733186562518,0
|
|
rte,acc,0.5306859205776173,0.030039730592197812,0
|
|
sciq,acc,0.816,0.012259457340938577,0
|
|
sciq,acc_norm,0.734,0.01397996564514515,0
|
|
storycloze_2016,acc,0.7156600748262961,0.01043161412866525,0
|
|
winogrande,acc,0.5722178374112076,0.013905134013839953,0
|
|
|