lm1-2b8-55b-c4-repetitions / evaluation /2b855b55bc4-results_lm-eval_global_step52452_2022-12-28-09-24-40.csv
Muennighoff's picture
Add
6134937
raw
history blame
No virus
1.06 kB
task,metric,value,err,version
anli_r1,acc,0.333,0.014910846164229871,0
anli_r2,acc,0.342,0.01500870618212173,0
anli_r3,acc,0.3383333333333333,0.013664144006618278,0
arc_challenge,acc,0.23890784982935154,0.012461071376316614,0
arc_challenge,acc_norm,0.27986348122866894,0.013119040897725925,0
arc_easy,acc,0.561026936026936,0.010183076012972067,0
arc_easy,acc_norm,0.5084175084175084,0.010258329515226459,0
boolq,acc,0.6119266055045871,0.008523130584760848,1
cb,acc,0.39285714285714285,0.0658538889806635,1
cb,f1,0.18803418803418803,,1
copa,acc,0.76,0.04292346959909282,0
hellaswag,acc,0.4357697669786895,0.004948439229523916,0
hellaswag,acc_norm,0.5615415255925115,0.0049518409782196935,0
piqa,acc,0.7328618063112078,0.010323440492612433,0
piqa,acc_norm,0.7426550598476604,0.01019992106479251,0
rte,acc,0.5234657039711191,0.03006330041190266,0
sciq,acc,0.812,0.012361586015103761,0
sciq,acc_norm,0.729,0.014062601350986186,0
storycloze_2016,acc,0.694815606627472,0.010648664383985661,0
winogrande,acc,0.5682715074980268,0.013920872110010713,0