Step=512 Train={'loss': 0.2522342102420225, 'acc': 0.9353003393916879} Hellaswag/choice={'accuracy': 0.2432782314280024} Hellaswag/no_choice={'accuracy': 0.39533957379008167} obqa/main={'accuracy': 0.352} winogrande/no_choice={'accuracy': 0.5288082083662194} arc/ARC-Challenge={'accuracy': 0.28762541806020064} arc/ARC-Easy={'accuracy': 0.23684210526315788} super_glue/boolq={'accuracy': 0.6207951070336392} piqa/no_choice={'accuracy': 0.6702937976060935} GLUE/cola={'matthews_correlation': 0.07366254903146852} GLUE/mnli_matched={'accuracy': 0.50412633723892} GLUE/mnli_mismatched={'accuracy': 0.5191212367778681} GLUE/mrpc={'accuracy': 0.5955882352941176, 'f1': 0.6224256292906178} GLUE/qnli={'accuracy': 0.638843126487278} GLUE/qqp={'accuracy': 0.6608953747217413, 'f1': 0.1716012084592145} GLUE/rte={'accuracy': 0.5451263537906137} GLUE/sst2={'accuracy': 0.7981651376146789} GLUE/stsb={'pearson': 0.40257920134653435, 'spearmanr': 0.4067145869932732} GLUE/wnli={'accuracy': 0.43661971830985913} race/middle={'accuracy': 0.28690807799442897} race/high={'accuracy': 0.27325412923790204} Step=1024 Train={'loss': 0.2315280896382319, 'acc': 0.93988147865457} Hellaswag/choice={'accuracy': 0.25652260505875324} Hellaswag/no_choice={'accuracy': 0.3986257717586138} obqa/main={'accuracy': 0.304} winogrande/no_choice={'accuracy': 0.5477505919494869} arc/ARC-Challenge={'accuracy': 0.27424749163879597} arc/ARC-Easy={'accuracy': 0.24912280701754386} super_glue/boolq={'accuracy': 0.6993883792048929} piqa/no_choice={'accuracy': 0.6708378672470077} GLUE/cola={'matthews_correlation': 0.018148342420931135} GLUE/mnli_matched={'accuracy': 0.6188487009679062} GLUE/mnli_mismatched={'accuracy': 0.6425956061838893} GLUE/mrpc={'accuracy': 0.5122549019607843, 'f1': 0.48311688311688306} GLUE/qnli={'accuracy': 0.7206663005674537} GLUE/qqp={'accuracy': 0.7263665594855305, 'f1': 0.46589098633708304} GLUE/rte={'accuracy': 0.6389891696750902} GLUE/sst2={'accuracy': 0.8532110091743119} GLUE/stsb={'pearson': 0.7411057905516302, 'spearmanr': 0.7461350353633797} GLUE/wnli={'accuracy': 0.4507042253521127} race/middle={'accuracy': 0.3050139275766017} race/high={'accuracy': 0.2984642132715155} Step=1536 Train={'loss': 0.22321232529452573, 'acc': 0.9419659655977739} Hellaswag/choice={'accuracy': 0.26687910774746065} Hellaswag/no_choice={'accuracy': 0.40599482174865564} obqa/main={'accuracy': 0.41} winogrande/no_choice={'accuracy': 0.5374901341752171} arc/ARC-Challenge={'accuracy': 0.34448160535117056} arc/ARC-Easy={'accuracy': 0.38421052631578945} super_glue/boolq={'accuracy': 0.7318042813455657} piqa/no_choice={'accuracy': 0.6887921653971708} GLUE/cola={'matthews_correlation': 0.0592680243795702} GLUE/mnli_matched={'accuracy': 0.692002037697402} GLUE/mnli_mismatched={'accuracy': 0.7192839707078926} GLUE/mrpc={'accuracy': 0.75, 'f1': 0.8152173913043477} GLUE/qnli={'accuracy': 0.7305509793153945} GLUE/qqp={'accuracy': 0.7809794706900817, 'f1': 0.7399641734942589} GLUE/rte={'accuracy': 0.7292418772563177} GLUE/sst2={'accuracy': 0.8956422018348624} GLUE/stsb={'pearson': 0.8148224387760193, 'spearmanr': 0.8217037925795925} GLUE/wnli={'accuracy': 0.4507042253521127} race/middle={'accuracy': 0.44846796657381616} race/high={'accuracy': 0.3790205737467401} Step=2048 Train={'loss': 0.21945939150191407, 'acc': 0.9429137157858349} Hellaswag/choice={'accuracy': 0.31069508066122287} Hellaswag/no_choice={'accuracy': 0.41147181836287594} obqa/main={'accuracy': 0.458} winogrande/no_choice={'accuracy': 0.5501183898973955} arc/ARC-Challenge={'accuracy': 0.33444816053511706} arc/ARC-Easy={'accuracy': 0.43859649122807015} super_glue/boolq={'accuracy': 0.7168195718654434} piqa/no_choice={'accuracy': 0.6985854189336235} GLUE/cola={'matthews_correlation': 0.16848752002152778} GLUE/mnli_matched={'accuracy': 0.7270504330106979} GLUE/mnli_mismatched={'accuracy': 0.7459316517493898} GLUE/mrpc={'accuracy': 0.6740196078431373, 'f1': 0.7200000000000001} GLUE/qnli={'accuracy': 0.7700896943071572} GLUE/qqp={'accuracy': 0.7724214692060352, 'f1': 0.6149725907017618} GLUE/rte={'accuracy': 0.7148014440433214} GLUE/sst2={'accuracy': 0.9025229357798165} GLUE/stsb={'pearson': 0.8246185257017313, 'spearmanr': 0.8350095448603486} GLUE/wnli={'accuracy': 0.4507042253521127} race/middle={'accuracy': 0.5125348189415042} race/high={'accuracy': 0.47029846421327154} Step=2560 Train={'loss': 0.21488739675896795, 'acc': 0.9439892922091531} Hellaswag/choice={'accuracy': 0.3314080860386377} Hellaswag/no_choice={'accuracy': 0.41724756024696275} obqa/main={'accuracy': 0.438} winogrande/no_choice={'accuracy': 0.5588003157063931} arc/ARC-Challenge={'accuracy': 0.36789297658862874} arc/ARC-Easy={'accuracy': 0.45263157894736844} super_glue/boolq={'accuracy': 0.7394495412844037} piqa/no_choice={'accuracy': 0.691512513601741} GLUE/cola={'matthews_correlation': 0.13283318224051427} GLUE/mnli_matched={'accuracy': 0.6886398369842078} GLUE/mnli_mismatched={'accuracy': 0.7149104963384866} GLUE/mrpc={'accuracy': 0.7279411764705882, 'f1': 0.7948243992606283} GLUE/qnli={'accuracy': 0.7918725974739155} GLUE/qqp={'accuracy': 0.7937175364828098, 'f1': 0.7043391945547363} GLUE/rte={'accuracy': 0.7256317689530686} GLUE/sst2={'accuracy': 0.9071100917431193} GLUE/stsb={'pearson': 0.8263292509858257, 'spearmanr': 0.8273984673778166} GLUE/wnli={'accuracy': 0.4225352112676056} race/middle={'accuracy': 0.532033426183844} race/high={'accuracy': 0.48246884960880904} Step=3072 Train={'loss': 0.21078881736502808, 'acc': 0.9449855978600681} Hellaswag/choice={'accuracy': 0.36586337382991435} Hellaswag/no_choice={'accuracy': 0.41884086835291773} obqa/main={'accuracy': 0.434} winogrande/no_choice={'accuracy': 0.5509076558800315} arc/ARC-Challenge={'accuracy': 0.38461538461538464} arc/ARC-Easy={'accuracy': 0.4473684210526316} super_glue/boolq={'accuracy': 0.7593272171253823} piqa/no_choice={'accuracy': 0.6893362350380848} GLUE/cola={'matthews_correlation': 0.10831649799274126} GLUE/mnli_matched={'accuracy': 0.7113601630157922} GLUE/mnli_mismatched={'accuracy': 0.726606997558991} GLUE/mrpc={'accuracy': 0.7230392156862745, 'f1': 0.7871939736346516} GLUE/qnli={'accuracy': 0.8142046494600037} GLUE/qqp={'accuracy': 0.8000247341083354, 'f1': 0.7119803355776424} GLUE/rte={'accuracy': 0.7256317689530686} GLUE/sst2={'accuracy': 0.9025229357798165} GLUE/stsb={'pearson': 0.82582592808152, 'spearmanr': 0.8296324719979368} GLUE/wnli={'accuracy': 0.43661971830985913} race/middle={'accuracy': 0.5466573816155988} race/high={'accuracy': 0.5123152709359606} Step=3584 Train={'loss': 0.20607954572187737, 'acc': 0.9461158117628656} Hellaswag/choice={'accuracy': 0.3822943636725752} Hellaswag/no_choice={'accuracy': 0.4244174467237602} obqa/main={'accuracy': 0.45} winogrande/no_choice={'accuracy': 0.5453827940015785} arc/ARC-Challenge={'accuracy': 0.36789297658862874} arc/ARC-Easy={'accuracy': 0.45263157894736844} super_glue/boolq={'accuracy': 0.7623853211009174} piqa/no_choice={'accuracy': 0.6871599564744287} GLUE/cola={'matthews_correlation': 0.21406100184418933} GLUE/mnli_matched={'accuracy': 0.7315333672949567} GLUE/mnli_mismatched={'accuracy': 0.7505085435313262} GLUE/mrpc={'accuracy': 0.7156862745098039, 'f1': 0.7827715355805244} GLUE/qnli={'accuracy': 0.8222588321435109} GLUE/qqp={'accuracy': 0.8101162503091763, 'f1': 0.7367374232708069} GLUE/rte={'accuracy': 0.7364620938628159} GLUE/sst2={'accuracy': 0.8990825688073395} GLUE/stsb={'pearson': 0.8320267135296606, 'spearmanr': 0.8362641064475601} GLUE/wnli={'accuracy': 0.43661971830985913} race/middle={'accuracy': 0.5675487465181058} race/high={'accuracy': 0.5224572587655752} Step=4096 Train={'loss': 0.20388431550509267, 'acc': 0.9466150429943809} Hellaswag/choice={'accuracy': 0.38129854610635333} Hellaswag/no_choice={'accuracy': 0.42561242780322645} obqa/main={'accuracy': 0.444} winogrande/no_choice={'accuracy': 0.5430149960536701} arc/ARC-Challenge={'accuracy': 0.3712374581939799} arc/ARC-Easy={'accuracy': 0.443859649122807} super_glue/boolq={'accuracy': 0.7611620795107034} piqa/no_choice={'accuracy': 0.6958650707290533} GLUE/cola={'matthews_correlation': 0.21838662331923692} GLUE/mnli_matched={'accuracy': 0.7395822720326032} GLUE/mnli_mismatched={'accuracy': 0.7588486574450773} GLUE/mrpc={'accuracy': 0.7279411764705882, 'f1': 0.7819253438113949} GLUE/qnli={'accuracy': 0.8215266337177375} GLUE/qqp={'accuracy': 0.8023992085085333, 'f1': 0.7011335153941117} GLUE/rte={'accuracy': 0.7436823104693141} GLUE/sst2={'accuracy': 0.9071100917431193} GLUE/stsb={'pearson': 0.832917529289115, 'spearmanr': 0.8366141733875496} GLUE/wnli={'accuracy': 0.43661971830985913} race/middle={'accuracy': 0.5682451253481894} race/high={'accuracy': 0.5169516082294987} Step=4608 Train={'loss': 0.20215968282400354, 'acc': 0.9470157118194038} Hellaswag/choice={'accuracy': 0.39603664608643696} Hellaswag/no_choice={'accuracy': 0.4281019717187811} obqa/main={'accuracy': 0.43} winogrande/no_choice={'accuracy': 0.5509076558800315} arc/ARC-Challenge={'accuracy': 0.38127090301003347} arc/ARC-Easy={'accuracy': 0.4614035087719298} super_glue/boolq={'accuracy': 0.7605504587155963} piqa/no_choice={'accuracy': 0.6964091403699674} GLUE/cola={'matthews_correlation': 0.2308810545585645} GLUE/mnli_matched={'accuracy': 0.7256240448293428} GLUE/mnli_mismatched={'accuracy': 0.741253051261188} GLUE/mrpc={'accuracy': 0.7426470588235294, 'f1': 0.8} GLUE/qnli={'accuracy': 0.8218927329306241} GLUE/qqp={'accuracy': 0.7993569131832797, 'f1': 0.693701857725419} GLUE/rte={'accuracy': 0.7184115523465704} GLUE/sst2={'accuracy': 0.8979357798165137} GLUE/stsb={'pearson': 0.8335554394972398, 'spearmanr': 0.8359066615989392} GLUE/wnli={'accuracy': 0.4225352112676056} race/middle={'accuracy': 0.5682451253481894} race/high={'accuracy': 0.5273833671399595} Step=5120 Train={'loss': 0.20209107419077554, 'acc': 0.9470347743335878} Hellaswag/choice={'accuracy': 0.3852818163712408} Hellaswag/no_choice={'accuracy': 0.425911173073093} obqa/main={'accuracy': 0.44} winogrande/no_choice={'accuracy': 0.5469613259668509} arc/ARC-Challenge={'accuracy': 0.4013377926421405} arc/ARC-Easy={'accuracy': 0.4649122807017544} super_glue/boolq={'accuracy': 0.7636085626911315} piqa/no_choice={'accuracy': 0.6958650707290533} GLUE/cola={'matthews_correlation': 0.23868889984712757} GLUE/mnli_matched={'accuracy': 0.745491594498217} GLUE/mnli_mismatched={'accuracy': 0.761899918633035} GLUE/mrpc={'accuracy': 0.7352941176470589, 'f1': 0.8014705882352942} GLUE/qnli={'accuracy': 0.8176825919824272} GLUE/qqp={'accuracy': 0.8082364580756863, 'f1': 0.737284402426214} GLUE/rte={'accuracy': 0.7111913357400722} GLUE/sst2={'accuracy': 0.8979357798165137} GLUE/stsb={'pearson': 0.8370202575390767, 'spearmanr': 0.8390825240948236} GLUE/wnli={'accuracy': 0.4647887323943662} race/middle={'accuracy': 0.5731197771587744} race/high={'accuracy': 0.530860620110113} Step=5632 Train={'loss': 0.2028822331549236, 'acc': 0.9469025877478998} Hellaswag/choice={'accuracy': 0.38966341366261703} Hellaswag/no_choice={'accuracy': 0.425911173073093} obqa/main={'accuracy': 0.438} winogrande/no_choice={'accuracy': 0.5516969218626677} arc/ARC-Challenge={'accuracy': 0.39464882943143814} arc/ARC-Easy={'accuracy': 0.4614035087719298} super_glue/boolq={'accuracy': 0.7617737003058104} piqa/no_choice={'accuracy': 0.6936887921653971} GLUE/cola={'matthews_correlation': 0.24499744431737797} GLUE/mnli_matched={'accuracy': 0.7410086602139583} GLUE/mnli_mismatched={'accuracy': 0.7594589096826688} GLUE/mrpc={'accuracy': 0.7352941176470589, 'f1': 0.8014705882352942} GLUE/qnli={'accuracy': 0.818597840014644} GLUE/qqp={'accuracy': 0.8090526836507544, 'f1': 0.7393124873370702} GLUE/rte={'accuracy': 0.7184115523465704} GLUE/sst2={'accuracy': 0.8990825688073395} GLUE/stsb={'pearson': 0.8406728176670001, 'spearmanr': 0.8427300768224941} GLUE/wnli={'accuracy': 0.43661971830985913} race/middle={'accuracy': 0.5738161559888579} race/high={'accuracy': 0.5299913068675746} Step=6144 Train={'loss': 0.20155814645704595, 'acc': 0.9471458137704758} Hellaswag/choice={'accuracy': 0.3899621589324836} Hellaswag/no_choice={'accuracy': 0.4255128460466043} obqa/main={'accuracy': 0.438} winogrande/no_choice={'accuracy': 0.5501183898973955} arc/ARC-Challenge={'accuracy': 0.391304347826087} arc/ARC-Easy={'accuracy': 0.45964912280701753} super_glue/boolq={'accuracy': 0.7620795107033639} piqa/no_choice={'accuracy': 0.6931447225244831} GLUE/cola={'matthews_correlation': 0.24066818407079058} GLUE/mnli_matched={'accuracy': 0.741110545084055} GLUE/mnli_mismatched={'accuracy': 0.7594589096826688} GLUE/mrpc={'accuracy': 0.7352941176470589, 'f1': 0.8014705882352942} GLUE/qnli={'accuracy': 0.818597840014644} GLUE/qqp={'accuracy': 0.8092010883007668, 'f1': 0.7396030245746693} GLUE/rte={'accuracy': 0.7148014440433214} GLUE/sst2={'accuracy': 0.8990825688073395} GLUE/stsb={'pearson': 0.8406896598512256, 'spearmanr': 0.8428307063668854} GLUE/wnli={'accuracy': 0.43661971830985913} race/middle={'accuracy': 0.5745125348189415} race/high={'accuracy': 0.5282526803824978}