Muennighoff commited on
Commit
9827944
1 Parent(s): 7c781ff
4b284b84bc4/evaluation/rankeval/4b284b84bc4_0.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task,metric,value,err,version
2
+ anli_r1,acc,0.328,0.014853842487270334,0
3
+ anli_r2,acc,0.337,0.014955087918653605,0
4
+ anli_r3,acc,0.32416666666666666,0.013517438120881624,0
5
+ arc_challenge,acc,0.20648464163822525,0.011828865619002316,0
6
+ arc_challenge,acc_norm,0.2551194539249147,0.012739038695202109,0
7
+ arc_easy,acc,0.40614478114478114,0.010077409815364048,0
8
+ arc_easy,acc_norm,0.3766835016835017,0.009942848077476172,0
9
+ boolq,acc,0.6256880733944954,0.00846424665644323,1
10
+ cb,acc,0.26785714285714285,0.05971290310957636,1
11
+ cb,f1,0.18656056587091072,,1
12
+ copa,acc,0.76,0.04292346959909283,0
13
+ hellaswag,acc,0.4547898824935272,0.004969341773423513,0
14
+ hellaswag,acc_norm,0.5937064329814777,0.004901368629533419,0
15
+ piqa,acc,0.6561479869423286,0.011082356277961393,0
16
+ piqa,acc_norm,0.6528835690968444,0.011107104993128086,0
17
+ rte,acc,0.5595667870036101,0.029882123363118726,0
18
+ sciq,acc,0.775,0.013211720158614756,0
19
+ sciq,acc_norm,0.709,0.014370995982377933,0
20
+ storycloze_2016,acc,0.694815606627472,0.010648664383985661,0
21
+ winogrande,acc,0.5769534333070244,0.01388505535905647,0
4b284b84bc4/evaluation/rankeval/4b284b84bc4_1.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task,metric,value,err,version
2
+ anli_r1,acc,0.303,0.014539683710535264,0
3
+ anli_r2,acc,0.312,0.01465847437050901,0
4
+ anli_r3,acc,0.3491666666666667,0.013767075395077247,0
5
+ arc_challenge,acc,0.23122866894197952,0.012320858834772273,0
6
+ arc_challenge,acc_norm,0.2619453924914676,0.012849054826858115,0
7
+ arc_easy,acc,0.5231481481481481,0.010248782484554473,0
8
+ arc_easy,acc_norm,0.4819023569023569,0.010253060653479177,0
9
+ boolq,acc,0.6134556574923548,0.008516943934341973,1
10
+ cb,acc,0.30357142857142855,0.06199938655510753,1
11
+ cb,f1,0.2927120669056153,,1
12
+ copa,acc,0.79,0.040936018074033256,0
13
+ hellaswag,acc,0.4506074487153953,0.0049653753416431376,0
14
+ hellaswag,acc_norm,0.5834495120493925,0.004919794704673269,0
15
+ piqa,acc,0.6887921653971708,0.010802263878045844,0
16
+ piqa,acc_norm,0.6866158868335147,0.010822829929195489,0
17
+ rte,acc,0.5342960288808665,0.030025579819366422,0
18
+ sciq,acc,0.88,0.010281328012747391,0
19
+ sciq,acc_norm,0.863,0.010878848714333327,0
20
+ storycloze_2016,acc,0.6996258685195083,0.010600915927985033,0
21
+ winogrande,acc,0.6077348066298343,0.013722400462000883,0
4b284b84bc4/evaluation/rankeval/4b284b84bc4_2.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task,metric,value,err,version
2
+ anli_r1,acc,0.315,0.014696631960792498,0
3
+ anli_r2,acc,0.341,0.014998131348402702,0
4
+ anli_r3,acc,0.34,0.0136804957257678,0
5
+ arc_challenge,acc,0.2636518771331058,0.01287592915129705,0
6
+ arc_challenge,acc_norm,0.2858361774744027,0.013203196088537369,0
7
+ arc_easy,acc,0.5593434343434344,0.010187264635711983,0
8
+ arc_easy,acc_norm,0.5298821548821548,0.010241444322886432,0
9
+ boolq,acc,0.6119266055045871,0.008523130584760851,1
10
+ cb,acc,0.14285714285714285,0.04718416136255829,1
11
+ cb,f1,0.143010752688172,,1
12
+ copa,acc,0.79,0.040936018074033256,0
13
+ hellaswag,acc,0.4473212507468632,0.004962010338226347,0
14
+ hellaswag,acc_norm,0.5848436566421031,0.0049174193677660296,0
15
+ piqa,acc,0.7089227421109902,0.010598612490942586,0
16
+ piqa,acc_norm,0.7143634385201306,0.010539303948661916,0
17
+ rte,acc,0.4729241877256318,0.0300523034631437,0
18
+ sciq,acc,0.906,0.009233052000787738,0
19
+ sciq,acc_norm,0.902,0.009406619184621226,0
20
+ storycloze_2016,acc,0.7156600748262961,0.01043161412866526,0
21
+ winogrande,acc,0.601420678768745,0.013760357176873838,0
4b284b84bc4/evaluation/rankeval/4b284b84bc4_3.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task,metric,value,err,version
2
+ anli_r1,acc,0.338,0.014965960710224489,0
3
+ anli_r2,acc,0.332,0.014899597242811476,0
4
+ anli_r3,acc,0.3325,0.013605417345710526,0
5
+ arc_challenge,acc,0.27986348122866894,0.01311904089772592,0
6
+ arc_challenge,acc_norm,0.29180887372013653,0.013284525292403506,0
7
+ arc_easy,acc,0.5765993265993266,0.010138671005289047,0
8
+ arc_easy,acc_norm,0.5585016835016835,0.010189314382749929,0
9
+ boolq,acc,0.6079510703363914,0.008538802914911992,1
10
+ cb,acc,0.08928571428571429,0.038450387280282494,1
11
+ cb,f1,0.0871517027863777,,1
12
+ copa,acc,0.83,0.03775251680686371,0
13
+ hellaswag,acc,0.4525990838478391,0.0049673082544257514,0
14
+ hellaswag,acc_norm,0.5948018323043218,0.004899270310557971,0
15
+ piqa,acc,0.7257889009793254,0.010408618664933382,0
16
+ piqa,acc_norm,0.7334058759521219,0.010316749863541365,0
17
+ rte,acc,0.49458483754512633,0.03009469812323996,0
18
+ sciq,acc,0.908,0.009144376393151086,0
19
+ sciq,acc_norm,0.906,0.009233052000787738,0
20
+ storycloze_2016,acc,0.7135221806520577,0.01045510591863303,0
21
+ winogrande,acc,0.585635359116022,0.01384484623226856,0
4b284b84bc4/evaluation/rankeval/4b284b84bc4_4.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task,metric,value,err,version
2
+ anli_r1,acc,0.334,0.014922019523732961,0
3
+ anli_r2,acc,0.34,0.014987482264363935,0
4
+ anli_r3,acc,0.35083333333333333,0.013782212417178197,0
5
+ arc_challenge,acc,0.27559726962457337,0.01305716965576184,0
6
+ arc_challenge,acc_norm,0.30802047781569963,0.01349142951729204,0
7
+ arc_easy,acc,0.5808080808080808,0.010124905282491183,0
8
+ arc_easy,acc_norm,0.5711279461279462,0.010155440652900152,0
9
+ boolq,acc,0.6116207951070336,0.008524357307908792,1
10
+ cb,acc,0.17857142857142858,0.051642771820087224,1
11
+ cb,f1,0.18279613107199313,,1
12
+ copa,acc,0.82,0.03861229196653697,0
13
+ hellaswag,acc,0.454690300736905,0.004969251445596333,0
14
+ hellaswag,acc_norm,0.5943039235212109,0.004900227226433378,0
15
+ piqa,acc,0.7285092491838956,0.010376251176596137,0
16
+ piqa,acc_norm,0.7388465723612623,0.010248738649935587,0
17
+ rte,acc,0.4548736462093863,0.029973636495415255,0
18
+ sciq,acc,0.915,0.008823426366942317,0
19
+ sciq,acc_norm,0.919,0.008632121032139993,0
20
+ storycloze_2016,acc,0.7247461250668092,0.010328538400500567,0
21
+ winogrande,acc,0.606156274664562,0.013732114472668741,0
4b284b84bc4/evaluation/rankeval/4b284b84bc4_5.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task,metric,value,err,version
2
+ anli_r1,acc,0.348,0.01507060460376841,0
3
+ anli_r2,acc,0.342,0.015008706182121738,0
4
+ anli_r3,acc,0.33,0.013579531277800918,0
5
+ arc_challenge,acc,0.28754266211604096,0.01322671905626613,0
6
+ arc_challenge,acc_norm,0.31313993174061433,0.013552671543623504,0
7
+ arc_easy,acc,0.5900673400673401,0.010091953527506246,0
8
+ arc_easy,acc_norm,0.5791245791245792,0.01013050216406634,0
9
+ boolq,acc,0.6201834862385321,0.008488668235778613,1
10
+ cb,acc,0.26785714285714285,0.05971290310957636,1
11
+ cb,f1,0.2511904761904762,,1
12
+ copa,acc,0.81,0.03942772444036623,0
13
+ hellaswag,acc,0.4565823541127266,0.0049709334202319285,0
14
+ hellaswag,acc_norm,0.6061541525592511,0.0048760280379419405,0
15
+ piqa,acc,0.7317736670293797,0.010336761992404485,0
16
+ piqa,acc_norm,0.7448313384113167,0.010171571592521828,0
17
+ rte,acc,0.5379061371841155,0.030009848912529117,0
18
+ sciq,acc,0.918,0.008680515615523746,0
19
+ sciq,acc_norm,0.917,0.00872852720607479,0
20
+ storycloze_2016,acc,0.7354355959380011,0.01020040054171416,0
21
+ winogrande,acc,0.6037884767166535,0.013746404157154946,0