lm1-misc-pile
/
619m22b22b
/evaluation
/rankeval
/lm1-619m-22b-results_lm-eval_global_step41007_2023-01-22-18-55-49_0shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.338,0.01496596071022448,0 | |
anli_r2,acc,0.336,0.014944140233795027,0 | |
anli_r3,acc,0.32916666666666666,0.013570806258433618,0 | |
arc_challenge,acc,0.20563139931740615,0.011810745260742567,0 | |
arc_challenge,acc_norm,0.2354948805460751,0.012399451855004759,0 | |
arc_easy,acc,0.45875420875420875,0.01022481573025582,0 | |
arc_easy,acc_norm,0.4166666666666667,0.010116282977781244,0 | |
boolq,acc,0.5602446483180428,0.008681343983423956,1 | |
cb,acc,0.4107142857142857,0.0663363415035954,1 | |
cb,f1,0.1940928270042194,,1 | |
copa,acc,0.63,0.04852365870939099,0 | |
hellaswag,acc,0.30013941445927106,0.004573817163007456,0 | |
hellaswag,acc_norm,0.335291774546903,0.004711275408138415,0 | |
piqa,acc,0.64689880304679,0.011150983944502308,0 | |
piqa,acc_norm,0.6452665941240479,0.011162617854280297,0 | |
rte,acc,0.5523465703971119,0.02993107036293953,0 | |
sciq,acc,0.788,0.012931481864938033,0 | |
sciq,acc_norm,0.696,0.014553205687950424,0 | |
storycloze_2016,acc,0.5873864243719936,0.011384472322969047,0 | |
winogrande,acc,0.5074980268350434,0.014050905521228577,0 | |