lm1-misc-pile / 2b246b46b /evaluation /lm1-2b2-46b-results_lm-eval_global_step44073_2023-01-24-13-53-38_2shots.csv
Muennighoff's picture
Add
956e279
raw
history blame contribute delete
No virus
1.05 kB
task,metric,value,err,version
anli_r1,acc,0.328,0.01485384248727033,0
anli_r2,acc,0.328,0.014853842487270333,0
anli_r3,acc,0.325,0.013526454480351025,0
arc_challenge,acc,0.25170648464163825,0.012682496334042961,0
arc_challenge,acc_norm,0.28924914675767915,0.013250012579393443,0
arc_easy,acc,0.5618686868686869,0.010180937100600076,0
arc_easy,acc_norm,0.5441919191919192,0.010219631763437851,0
boolq,acc,0.4828746177370031,0.008739923994130054,1
cb,acc,0.4107142857142857,0.0663363415035954,1
cb,f1,0.29069767441860467,,1
copa,acc,0.74,0.0440844002276808,0
hellaswag,acc,0.351822346146186,0.004765629263643526,0
hellaswag,acc_norm,0.43537143995220073,0.004947922692688831,0
piqa,acc,0.6822633297062024,0.010863133246569285,0
piqa,acc_norm,0.6806311207834603,0.010877964076613742,0
rte,acc,0.49458483754512633,0.030094698123239966,0
sciq,acc,0.902,0.009406619184621224,0
sciq,acc_norm,0.901,0.009449248027662732,0
storycloze_2016,acc,0.6424371993586317,0.01108334116882779,0
winogrande,acc,0.5438042620363063,0.013998453610924324,0