diff --git "a/outputs.log" "b/outputs.log" new file mode 100644--- /dev/null +++ "b/outputs.log" @@ -0,0 +1,1035 @@ +Step=512 +Train={'loss_dpo': 0.6697518632863648, 'loss_dph': 1.2595687323337188, 'dpo/chosen': 0.08446986632669606, 'dpo/rejected': -0.542165306231464, 'dpo/accuracy': 0.75439453125, 'dpo/margin': 0.6266351726200874, 'dph/chosen': 0.6259329497261206, 'dph/rejected': 0.6336357829568442, 'dph/accuracy': 0.67608642578125, 'dph/margin': 0.6889255843580031} +Hellaswag/choice={'log_accuracy': 0.4291973710416252, 'dph_accuracy': 0.32154949213304124} +Hellaswag/no_choice={'log_accuracy': 0.4373630750846445, 'dph_accuracy': 0.3407687711611233} +obqa/main={'log_accuracy': 0.49, 'dph_accuracy': 0.468} +winogrande/no_choice={'log_accuracy': 0.5643251775848461, 'dph_accuracy': 0.5177584846093133} +arc/ARC-Challenge={'log_accuracy': 0.3745819397993311, 'dph_accuracy': 0.34448160535117056} +arc/ARC-Easy={'log_accuracy': 0.5035087719298246, 'dph_accuracy': 0.4263157894736842} +super_glue/boolq={'log_accuracy': 0.7678899082568807, 'dph_accuracy': 0.7458715596330275} +piqa/no_choice={'log_accuracy': 0.6942328618063112, 'dph_accuracy': 0.5652883569096845} +GLUE/cola={'log_matthews_correlation': 0.3408529915520382, 'dph_matthews_correlation': 0.3674294957074128} +GLUE/mnli_matched={'log_accuracy': 0.7666836474783495, 'dph_accuracy': 0.7457972491085074} +GLUE/mnli_mismatched={'log_accuracy': 0.7818348250610252, 'dph_accuracy': 0.7496948738812043} +GLUE/mrpc={'log_accuracy': 0.7647058823529411, 'log_f1': 0.8344827586206897, 'dph_accuracy': 0.7598039215686274, 'dph_f1': 0.8361204013377925} +GLUE/qnli={'log_accuracy': 0.8383671975105254, 'dph_accuracy': 0.8392824455427421} +GLUE/qqp={'log_accuracy': 0.8242888943853575, 'log_f1': 0.7685691946833464, 'dph_accuracy': 0.8211229285184269, 'dph_f1': 0.7689456869009584} +GLUE/rte={'log_accuracy': 0.7653429602888087, 'dph_accuracy': 0.7725631768953068} +GLUE/sst2={'log_accuracy': 0.9059633027522935, 'dph_accuracy': 0.8922018348623854} +GLUE/stsb={'log_pearson': 0.859265367501987, 'log_spearmanr': 0.8604002387131556, 'dph_pearson': 0.8273616183204963, 'dph_spearmanr': 0.8361956417024791} +GLUE/wnli={'log_accuracy': 0.4507042253521127, 'dph_accuracy': 0.49295774647887325} +race/middle={'log_accuracy': 0.5870473537604457, 'dph_accuracy': 0.4561281337047354} +race/high={'log_accuracy': 0.5320197044334976, 'dph_accuracy': 0.40973630831643004} + +Step=1024 +Train={'loss_dpo': 0.6996576679957798, 'loss_dph': 1.1412990734097548, 'dpo/chosen': 0.17809494912387436, 'dpo/rejected': -0.6068244920388679, 'dpo/accuracy': 0.757110595703125, 'dpo/margin': 0.784919441834063, 'dph/chosen': 0.5698942335075117, 'dph/rejected': 0.5714048408408416, 'dph/accuracy': 0.77679443359375, 'dph/margin': 1.3975966284779133} +Hellaswag/choice={'log_accuracy': 0.45140410276837284, 'dph_accuracy': 0.3496315475004979} +Hellaswag/no_choice={'log_accuracy': 0.4380601473809998, 'dph_accuracy': 0.36427006572395937} +obqa/main={'log_accuracy': 0.506, 'dph_accuracy': 0.468} +winogrande/no_choice={'log_accuracy': 0.5643251775848461, 'dph_accuracy': 0.5217048145224941} +arc/ARC-Challenge={'log_accuracy': 0.4080267558528428, 'dph_accuracy': 0.4080267558528428} +arc/ARC-Easy={'log_accuracy': 0.531578947368421, 'dph_accuracy': 0.4982456140350877} +super_glue/boolq={'log_accuracy': 0.7697247706422018, 'dph_accuracy': 0.7547400611620795} +piqa/no_choice={'log_accuracy': 0.705658324265506, 'dph_accuracy': 0.5854189336235038} +GLUE/cola={'log_matthews_correlation': 0.3584533515079486, 'dph_matthews_correlation': 0.391243912243602} +GLUE/mnli_matched={'log_accuracy': 0.7718797758532858, 'dph_accuracy': 0.7210392256749872} +GLUE/mnli_mismatched={'log_accuracy': 0.7870219690805533, 'dph_accuracy': 0.7329129373474369} +GLUE/mrpc={'log_accuracy': 0.7965686274509803, 'log_f1': 0.8614357262103506, 'dph_accuracy': 0.7745098039215687, 'dph_f1': 0.8486842105263158} +GLUE/qnli={'log_accuracy': 0.8412959912136189, 'dph_accuracy': 0.8264689730917079} +GLUE/qqp={'log_accuracy': 0.828716299777393, 'log_f1': 0.7810692042616422, 'dph_accuracy': 0.8221370269601781, 'dph_f1': 0.7823481340234268} +GLUE/rte={'log_accuracy': 0.7581227436823105, 'dph_accuracy': 0.7617328519855595} +GLUE/sst2={'log_accuracy': 0.9128440366972477, 'dph_accuracy': 0.9059633027522935} +GLUE/stsb={'log_pearson': 0.8534073683550283, 'log_spearmanr': 0.8548573564585142, 'dph_pearson': 0.8368583463533272, 'dph_spearmanr': 0.8383042130131589} +GLUE/wnli={'log_accuracy': 0.4225352112676056, 'dph_accuracy': 0.43661971830985913} +race/middle={'log_accuracy': 0.5863509749303621, 'dph_accuracy': 0.5139275766016713} +race/high={'log_accuracy': 0.5421616922631122, 'dph_accuracy': 0.4534917415241959} + +Step=1536 +Train={'loss_dpo': 0.7023084723914508, 'loss_dph': 1.09693936612166, 'dpo/chosen': 0.20840250718867992, 'dpo/rejected': -0.6329127724711725, 'dpo/accuracy': 0.765350341796875, 'dpo/margin': 0.8413152804964739, 'dph/chosen': 0.5468104102546931, 'dph/rejected': 0.5501289558160352, 'dph/accuracy': 0.79644775390625, 'dph/margin': 1.6855457446727087} +Hellaswag/choice={'log_accuracy': 0.46116311491734713, 'dph_accuracy': 0.36217884883489343} +Hellaswag/no_choice={'log_accuracy': 0.4362676757618004, 'dph_accuracy': 0.3822943636725752} +obqa/main={'log_accuracy': 0.52, 'dph_accuracy': 0.522} +winogrande/no_choice={'log_accuracy': 0.5714285714285714, 'dph_accuracy': 0.5327545382794001} +arc/ARC-Challenge={'log_accuracy': 0.4180602006688963, 'dph_accuracy': 0.3879598662207358} +arc/ARC-Easy={'log_accuracy': 0.531578947368421, 'dph_accuracy': 0.5403508771929825} +super_glue/boolq={'log_accuracy': 0.7782874617737003, 'dph_accuracy': 0.7746177370030581} +piqa/no_choice={'log_accuracy': 0.6991294885745375, 'dph_accuracy': 0.6099020674646355} +GLUE/cola={'log_matthews_correlation': 0.3109940583469735, 'dph_matthews_correlation': 0.3611442182230235} +GLUE/mnli_matched={'log_accuracy': 0.7777890983188996, 'dph_accuracy': 0.7779928680590932} +GLUE/mnli_mismatched={'log_accuracy': 0.7891578519121237, 'dph_accuracy': 0.7863100081366965} +GLUE/mrpc={'log_accuracy': 0.7720588235294118, 'log_f1': 0.8404802744425386, 'dph_accuracy': 0.7720588235294118, 'dph_f1': 0.8436974789915965} +GLUE/qnli={'log_accuracy': 0.8110928061504667, 'dph_accuracy': 0.8240893282079443} +GLUE/qqp={'log_accuracy': 0.8268859757605739, 'log_f1': 0.7802580766694924, 'dph_accuracy': 0.829779866435815, 'dph_f1': 0.7794230769230769} +GLUE/rte={'log_accuracy': 0.7545126353790613, 'dph_accuracy': 0.7581227436823105} +GLUE/sst2={'log_accuracy': 0.9071100917431193, 'dph_accuracy': 0.9048165137614679} +GLUE/stsb={'log_pearson': 0.860173737827279, 'log_spearmanr': 0.8629505211848604, 'dph_pearson': 0.8710048066167739, 'dph_spearmanr': 0.8698941756698273} +GLUE/wnli={'log_accuracy': 0.4084507042253521, 'dph_accuracy': 0.38028169014084506} +race/middle={'log_accuracy': 0.5919220055710307, 'dph_accuracy': 0.5355153203342619} +race/high={'log_accuracy': 0.5473775717183426, 'dph_accuracy': 0.4804404520428861} + +Step=2048 +Train={'loss_dpo': 0.7014972065226175, 'loss_dph': 1.0713350278820144, 'dpo/chosen': 0.20473940583349304, 'dpo/rejected': -0.6514070515340791, 'dpo/accuracy': 0.768829345703125, 'dpo/margin': 0.8561464592094126, 'dph/chosen': 0.5355548256993643, 'dph/rejected': 0.5357802019934752, 'dph/accuracy': 0.808990478515625, 'dph/margin': 1.8552048823185032} +Hellaswag/choice={'log_accuracy': 0.45598486357299345, 'dph_accuracy': 0.3875721967735511} +Hellaswag/no_choice={'log_accuracy': 0.4386576379207329, 'dph_accuracy': 0.40529774945230035} +obqa/main={'log_accuracy': 0.506, 'dph_accuracy': 0.532} +winogrande/no_choice={'log_accuracy': 0.5682715074980268, 'dph_accuracy': 0.5185477505919495} +arc/ARC-Challenge={'log_accuracy': 0.431438127090301, 'dph_accuracy': 0.4414715719063545} +arc/ARC-Easy={'log_accuracy': 0.531578947368421, 'dph_accuracy': 0.5543859649122806} +super_glue/boolq={'log_accuracy': 0.7795107033639144, 'dph_accuracy': 0.7712538226299694} +piqa/no_choice={'log_accuracy': 0.70620239390642, 'dph_accuracy': 0.6022850924918389} +GLUE/cola={'log_matthews_correlation': 0.3823796811639837, 'dph_matthews_correlation': 0.39194045968570046} +GLUE/mnli_matched={'log_accuracy': 0.7778909831889964, 'dph_accuracy': 0.779521141110545} +GLUE/mnli_mismatched={'log_accuracy': 0.7919039869812856, 'dph_accuracy': 0.7874288039056143} +GLUE/mrpc={'log_accuracy': 0.7671568627450981, 'log_f1': 0.8183556405353728, 'dph_accuracy': 0.7598039215686274, 'dph_f1': 0.8218181818181818} +GLUE/qnli={'log_accuracy': 0.8548416620904266, 'dph_accuracy': 0.8515467691744463} +GLUE/qqp={'log_accuracy': 0.8184516448182043, 'log_f1': 0.7207214062856709, 'dph_accuracy': 0.8280484788523373, 'dph_f1': 0.7485896137711558} +GLUE/rte={'log_accuracy': 0.7617328519855595, 'dph_accuracy': 0.7545126353790613} +GLUE/sst2={'log_accuracy': 0.911697247706422, 'dph_accuracy': 0.9036697247706422} +GLUE/stsb={'log_pearson': 0.8549936449147798, 'log_spearmanr': 0.8580339086071898, 'dph_pearson': 0.869524648504877, 'dph_spearmanr': 0.8679400572683136} +GLUE/wnli={'log_accuracy': 0.4225352112676056, 'dph_accuracy': 0.4507042253521127} +race/middle={'log_accuracy': 0.5891364902506964, 'dph_accuracy': 0.5341225626740947} +race/high={'log_accuracy': 0.5523036800927268, 'dph_accuracy': 0.504491451753115} + +Step=2560 +Train={'loss_dpo': 0.6995194201444974, 'loss_dph': 1.0480180184385972, 'dpo/chosen': 0.18932902458237777, 'dpo/rejected': -0.682265745399036, 'dpo/accuracy': 0.77471923828125, 'dpo/margin': 0.8715947690716348, 'dph/chosen': 0.5231242153895437, 'dph/rejected': 0.524893803310988, 'dph/accuracy': 0.818695068359375, 'dph/margin': 1.9979895569267683} +Hellaswag/choice={'log_accuracy': 0.4446325433180641, 'dph_accuracy': 0.4018123879705238} +Hellaswag/no_choice={'log_accuracy': 0.43995220075682134, 'dph_accuracy': 0.41435968930491934} +obqa/main={'log_accuracy': 0.52, 'dph_accuracy': 0.556} +winogrande/no_choice={'log_accuracy': 0.5714285714285714, 'dph_accuracy': 0.5295974743488555} +arc/ARC-Challenge={'log_accuracy': 0.411371237458194, 'dph_accuracy': 0.39464882943143814} +arc/ARC-Easy={'log_accuracy': 0.5543859649122806, 'dph_accuracy': 0.5543859649122806} +super_glue/boolq={'log_accuracy': 0.7844036697247706, 'dph_accuracy': 0.7877675840978593} +piqa/no_choice={'log_accuracy': 0.7121871599564744, 'dph_accuracy': 0.6278563656147987} +GLUE/cola={'log_matthews_correlation': 0.44638923908954203, 'dph_matthews_correlation': 0.4719110859010941} +GLUE/mnli_matched={'log_accuracy': 0.7716760061130922, 'dph_accuracy': 0.7765664798777382} +GLUE/mnli_mismatched={'log_accuracy': 0.7902766476810414, 'dph_accuracy': 0.7845809601301872} +GLUE/mrpc={'log_accuracy': 0.7696078431372549, 'log_f1': 0.8412162162162162, 'dph_accuracy': 0.7671568627450981, 'dph_f1': 0.8408710217755444} +GLUE/qnli={'log_accuracy': 0.8522789676002197, 'dph_accuracy': 0.8528281164195497} +GLUE/qqp={'log_accuracy': 0.8333910462527826, 'log_f1': 0.7910929165115991, 'dph_accuracy': 0.8338609943111551, 'dph_f1': 0.7930747666430485} +GLUE/rte={'log_accuracy': 0.7942238267148014, 'dph_accuracy': 0.7725631768953068} +GLUE/sst2={'log_accuracy': 0.9139908256880734, 'dph_accuracy': 0.9139908256880734} +GLUE/stsb={'log_pearson': 0.8651506908191392, 'log_spearmanr': 0.8662402544325684, 'dph_pearson': 0.8758249283279701, 'dph_spearmanr': 0.8724782664030489} +GLUE/wnli={'log_accuracy': 0.38028169014084506, 'dph_accuracy': 0.4647887323943662} +race/middle={'log_accuracy': 0.5940111420612814, 'dph_accuracy': 0.5557103064066853} +race/high={'log_accuracy': 0.5491161982034193, 'dph_accuracy': 0.513184584178499} + +Step=3072 +Train={'loss_dpo': 0.6977253113873303, 'loss_dph': 1.028073468216462, 'dpo/chosen': 0.17685046318825925, 'dpo/rejected': -0.7108122393101439, 'dpo/accuracy': 0.7796630859375, 'dpo/margin': 0.8876627017471037, 'dph/chosen': 0.5132615282273036, 'dph/rejected': 0.5148119412042433, 'dph/accuracy': 0.829437255859375, 'dph/margin': 2.116757177951513} +Hellaswag/choice={'log_accuracy': 0.46195976897032465, 'dph_accuracy': 0.4133638717386975} +Hellaswag/no_choice={'log_accuracy': 0.43815972913762197, 'dph_accuracy': 0.4343756223859789} +obqa/main={'log_accuracy': 0.518, 'dph_accuracy': 0.556} +winogrande/no_choice={'log_accuracy': 0.5674822415153907, 'dph_accuracy': 0.5374901341752171} +arc/ARC-Challenge={'log_accuracy': 0.40468227424749165, 'dph_accuracy': 0.41471571906354515} +arc/ARC-Easy={'log_accuracy': 0.5473684210526316, 'dph_accuracy': 0.5526315789473685} +super_glue/boolq={'log_accuracy': 0.7798165137614679, 'dph_accuracy': 0.7743119266055046} +piqa/no_choice={'log_accuracy': 0.7083786724700761, 'dph_accuracy': 0.6251360174102285} +GLUE/cola={'log_matthews_correlation': 0.3736520730249099, 'dph_matthews_correlation': 0.41870797137315424} +GLUE/mnli_matched={'log_accuracy': 0.7804381049414162, 'dph_accuracy': 0.7839021905247071} +GLUE/mnli_mismatched={'log_accuracy': 0.7919039869812856, 'dph_accuracy': 0.7928193653376729} +GLUE/mrpc={'log_accuracy': 0.7720588235294118, 'log_f1': 0.8241965973534972, 'dph_accuracy': 0.7671568627450981, 'dph_f1': 0.8237476808905381} +GLUE/qnli={'log_accuracy': 0.8522789676002197, 'dph_accuracy': 0.8500823723228995} +GLUE/qqp={'log_accuracy': 0.834232005936186, 'log_f1': 0.7675660678365817, 'dph_accuracy': 0.8368290873114024, 'dph_f1': 0.7790615894705114} +GLUE/rte={'log_accuracy': 0.7617328519855595, 'dph_accuracy': 0.7581227436823105} +GLUE/sst2={'log_accuracy': 0.9208715596330275, 'dph_accuracy': 0.9094036697247706} +GLUE/stsb={'log_pearson': 0.8509246487460939, 'log_spearmanr': 0.853088185852893, 'dph_pearson': 0.8639281252460753, 'dph_spearmanr': 0.8678218114483459} +GLUE/wnli={'log_accuracy': 0.4084507042253521, 'dph_accuracy': 0.4788732394366197} +race/middle={'log_accuracy': 0.5988857938718662, 'dph_accuracy': 0.5779944289693594} +race/high={'log_accuracy': 0.5566502463054187, 'dph_accuracy': 0.5181106925528832} + +Step=3584 +Train={'loss_dpo': 0.6943014134740224, 'loss_dph': 1.01206607282802, 'dpo/chosen': 0.1900959078265032, 'dpo/rejected': -0.7124356037556936, 'dpo/accuracy': 0.789520263671875, 'dpo/margin': 0.902531511017969, 'dph/chosen': 0.5049284644410363, 'dph/rejected': 0.5071376085325028, 'dph/accuracy': 0.835296630859375, 'dph/margin': 2.213982010485779} +Hellaswag/choice={'log_accuracy': 0.4616610237004581, 'dph_accuracy': 0.4495120493925513} +Hellaswag/no_choice={'log_accuracy': 0.44234216291575384, 'dph_accuracy': 0.44921330412268473} +obqa/main={'log_accuracy': 0.54, 'dph_accuracy': 0.558} +winogrande/no_choice={'log_accuracy': 0.5706393054459353, 'dph_accuracy': 0.5232833464877664} +arc/ARC-Challenge={'log_accuracy': 0.431438127090301, 'dph_accuracy': 0.4280936454849498} +arc/ARC-Easy={'log_accuracy': 0.5298245614035088, 'dph_accuracy': 0.5684210526315789} +super_glue/boolq={'log_accuracy': 0.7813455657492355, 'dph_accuracy': 0.77217125382263} +piqa/no_choice={'log_accuracy': 0.7040261153427638, 'dph_accuracy': 0.6191512513601741} +GLUE/cola={'log_matthews_correlation': 0.4158457682606511, 'dph_matthews_correlation': 0.46878933606826273} +GLUE/mnli_matched={'log_accuracy': 0.773204279164544, 'dph_accuracy': 0.7747325522159959} +GLUE/mnli_mismatched={'log_accuracy': 0.7876322213181448, 'dph_accuracy': 0.7855980471928397} +GLUE/mrpc={'log_accuracy': 0.7867647058823529, 'log_f1': 0.8438061041292639, 'dph_accuracy': 0.7696078431372549, 'dph_f1': 0.838487972508591} +GLUE/qnli={'log_accuracy': 0.85612300933553, 'dph_accuracy': 0.8550247116968699} +GLUE/qqp={'log_accuracy': 0.832352213702696, 'log_f1': 0.7584806157354617, 'dph_accuracy': 0.8409844175117487, 'dph_f1': 0.7841386025585065} +GLUE/rte={'log_accuracy': 0.7617328519855595, 'dph_accuracy': 0.7545126353790613} +GLUE/sst2={'log_accuracy': 0.9197247706422018, 'dph_accuracy': 0.9105504587155964} +GLUE/stsb={'log_pearson': 0.8595125547739438, 'log_spearmanr': 0.860004104415131, 'dph_pearson': 0.8751367268990637, 'dph_spearmanr': 0.8733046868130417} +GLUE/wnli={'log_accuracy': 0.4084507042253521, 'dph_accuracy': 0.43661971830985913} +race/middle={'log_accuracy': 0.6016713091922006, 'dph_accuracy': 0.5898328690807799} +race/high={'log_accuracy': 0.5543320776586497, 'dph_accuracy': 0.5354969574036511} + +Step=4096 +Train={'loss_dpo': 0.6910005195823032, 'loss_dph': 0.9915480361523805, 'dpo/chosen': 0.1853578156570279, 'dpo/rejected': -0.7303965972068909, 'dpo/accuracy': 0.799163818359375, 'dpo/margin': 0.915754412681963, 'dph/chosen': 0.49565251288731815, 'dph/rejected': 0.4958955234105815, 'dph/accuracy': 0.847564697265625, 'dph/margin': 2.3386987789999694} +Hellaswag/choice={'log_accuracy': 0.4761999601672974, 'dph_accuracy': 0.4718183628759211} +Hellaswag/no_choice={'log_accuracy': 0.44084843656642103, 'dph_accuracy': 0.4715196176060546} +obqa/main={'log_accuracy': 0.514, 'dph_accuracy': 0.548} +winogrande/no_choice={'log_accuracy': 0.5761641673243884, 'dph_accuracy': 0.5248618784530387} +arc/ARC-Challenge={'log_accuracy': 0.431438127090301, 'dph_accuracy': 0.40468227424749165} +arc/ARC-Easy={'log_accuracy': 0.5526315789473685, 'dph_accuracy': 0.5789473684210527} +super_glue/boolq={'log_accuracy': 0.7792048929663609, 'dph_accuracy': 0.7764525993883792} +piqa/no_choice={'log_accuracy': 0.7094668117519043, 'dph_accuracy': 0.6191512513601741} +GLUE/cola={'log_matthews_correlation': 0.42956981984553666, 'dph_matthews_correlation': 0.43942052933004133} +GLUE/mnli_matched={'log_accuracy': 0.780030565461029, 'dph_accuracy': 0.7768721344880285} +GLUE/mnli_mismatched={'log_accuracy': 0.7915988608624899, 'dph_accuracy': 0.7858014646053703} +GLUE/mrpc={'log_accuracy': 0.7990196078431373, 'log_f1': 0.8525179856115108, 'dph_accuracy': 0.7843137254901961, 'dph_f1': 0.8461538461538461} +GLUE/qnli={'log_accuracy': 0.8502654219293428, 'dph_accuracy': 0.8480688266520227} +GLUE/qqp={'log_accuracy': 0.8379421221864952, 'log_f1': 0.7798239128973721, 'dph_accuracy': 0.8428642097452387, 'dph_f1': 0.7941948232854967} +GLUE/rte={'log_accuracy': 0.7689530685920578, 'dph_accuracy': 0.7545126353790613} +GLUE/sst2={'log_accuracy': 0.9162844036697247, 'dph_accuracy': 0.908256880733945} +GLUE/stsb={'log_pearson': 0.8636696083713923, 'log_spearmanr': 0.8643591942382002, 'dph_pearson': 0.8796754436064546, 'dph_spearmanr': 0.8782942569331182} +GLUE/wnli={'log_accuracy': 0.4084507042253521, 'dph_accuracy': 0.4225352112676056} +race/middle={'log_accuracy': 0.5995821727019499, 'dph_accuracy': 0.6002785515320335} +race/high={'log_accuracy': 0.5598377281947262, 'dph_accuracy': 0.563894523326572} + +Step=4608 +Train={'loss_dpo': 0.7569483757106354, 'loss_dph': 0.9798464818159118, 'dpo/chosen': 0.24358833144680148, 'dpo/rejected': -0.7697398028458338, 'dpo/accuracy': 0.795745849609375, 'dpo/margin': 1.013328130253285, 'dph/chosen': 0.4902365370435291, 'dph/rejected': 0.48960994501248933, 'dph/accuracy': 0.852935791015625, 'dph/margin': 2.4057807390927337} +Hellaswag/choice={'log_accuracy': 0.49482174865564627, 'dph_accuracy': 0.5006970722963553} +Hellaswag/no_choice={'log_accuracy': 0.44284007169886475, 'dph_accuracy': 0.48645688109938257} +obqa/main={'log_accuracy': 0.52, 'dph_accuracy': 0.56} +winogrande/no_choice={'log_accuracy': 0.5761641673243884, 'dph_accuracy': 0.5311760063141279} +arc/ARC-Challenge={'log_accuracy': 0.43478260869565216, 'dph_accuracy': 0.4180602006688963} +arc/ARC-Easy={'log_accuracy': 0.5350877192982456, 'dph_accuracy': 0.5649122807017544} +super_glue/boolq={'log_accuracy': 0.7868501529051988, 'dph_accuracy': 0.7831804281345566} +piqa/no_choice={'log_accuracy': 0.7116430903155604, 'dph_accuracy': 0.6376496191512514} +GLUE/cola={'log_matthews_correlation': 0.43649620213642604, 'dph_matthews_correlation': 0.4781508152355689} +GLUE/mnli_matched={'log_accuracy': 0.7881813550687723, 'dph_accuracy': 0.7844116148751911} +GLUE/mnli_mismatched={'log_accuracy': 0.7960740439381611, 'dph_accuracy': 0.7875305126118796} +GLUE/mrpc={'log_accuracy': 0.7916666666666666, 'log_f1': 0.8542024013722128, 'dph_accuracy': 0.7843137254901961, 'dph_f1': 0.8472222222222223} +GLUE/qnli={'log_accuracy': 0.8520959179937763, 'dph_accuracy': 0.8586857038257367} +GLUE/qqp={'log_accuracy': 0.842072718278506, 'log_f1': 0.7875844173126184, 'dph_accuracy': 0.843952510511996, 'dph_f1': 0.7871816495193119} +GLUE/rte={'log_accuracy': 0.779783393501805, 'dph_accuracy': 0.7653429602888087} +GLUE/sst2={'log_accuracy': 0.9197247706422018, 'dph_accuracy': 0.9208715596330275} +GLUE/stsb={'log_pearson': 0.8629154283038692, 'log_spearmanr': 0.863234247692643, 'dph_pearson': 0.8806577765814934, 'dph_spearmanr': 0.8799841622449389} +GLUE/wnli={'log_accuracy': 0.4084507042253521, 'dph_accuracy': 0.43661971830985913} +race/middle={'log_accuracy': 0.6023676880222841, 'dph_accuracy': 0.6072423398328691} +race/high={'log_accuracy': 0.5685308606201102, 'dph_accuracy': 0.5636047522457259} + +Step=5120 +Train={'loss_dpo': 0.8066111446969444, 'loss_dph': 0.9751120967557654, 'dpo/chosen': 0.2443163182722401, 'dpo/rejected': -0.9165236386720608, 'dpo/accuracy': 0.7994384765625, 'dpo/margin': 1.1608399571687187, 'dph/chosen': 0.48841167718637735, 'dph/rejected': 0.48670041994046187, 'dph/accuracy': 0.855926513671875, 'dph/margin': 2.431234525574837} +Hellaswag/choice={'log_accuracy': 0.5064728141804421, 'dph_accuracy': 0.49900418243377814} +Hellaswag/no_choice={'log_accuracy': 0.44074885480979886, 'dph_accuracy': 0.49661422027484564} +obqa/main={'log_accuracy': 0.55, 'dph_accuracy': 0.554} +winogrande/no_choice={'log_accuracy': 0.5706393054459353, 'dph_accuracy': 0.5438042620363063} +arc/ARC-Challenge={'log_accuracy': 0.41471571906354515, 'dph_accuracy': 0.4414715719063545} +arc/ARC-Easy={'log_accuracy': 0.5456140350877193, 'dph_accuracy': 0.5736842105263158} +super_glue/boolq={'log_accuracy': 0.7862385321100918, 'dph_accuracy': 0.7865443425076453} +piqa/no_choice={'log_accuracy': 0.7094668117519043, 'dph_accuracy': 0.6409140369967355} +GLUE/cola={'log_matthews_correlation': 0.44788444107137326, 'dph_matthews_correlation': 0.48991730597142985} +GLUE/mnli_matched={'log_accuracy': 0.7879775853285788, 'dph_accuracy': 0.7906265919510953} +GLUE/mnli_mismatched={'log_accuracy': 0.7986167615947926, 'dph_accuracy': 0.7983116354759967} +GLUE/mrpc={'log_accuracy': 0.7843137254901961, 'log_f1': 0.8434163701067616, 'dph_accuracy': 0.7916666666666666, 'dph_f1': 0.8506151142355007} +GLUE/qnli={'log_accuracy': 0.861980596741717, 'dph_accuracy': 0.8623466959546037} +GLUE/qqp={'log_accuracy': 0.8441751174870146, 'log_f1': 0.7947214076246333, 'dph_accuracy': 0.845139747712095, 'dph_f1': 0.8021363334702778} +GLUE/rte={'log_accuracy': 0.7581227436823105, 'dph_accuracy': 0.7653429602888087} +GLUE/sst2={'log_accuracy': 0.9162844036697247, 'dph_accuracy': 0.9162844036697247} +GLUE/stsb={'log_pearson': 0.8568331758144762, 'log_spearmanr': 0.8581435495927164, 'dph_pearson': 0.8731767860417348, 'dph_spearmanr': 0.872449651167133} +GLUE/wnli={'log_accuracy': 0.4084507042253521, 'dph_accuracy': 0.4225352112676056} +race/middle={'log_accuracy': 0.6183844011142061, 'dph_accuracy': 0.6149025069637883} +race/high={'log_accuracy': 0.5763546798029556, 'dph_accuracy': 0.5728774268328021} + +Step=5632 +Train={'loss_dpo': 0.8032515685481485, 'loss_dph': 0.9635078349820105, 'dpo/chosen': 0.23829626143822225, 'dpo/rejected': -0.9572907404199213, 'dpo/accuracy': 0.805023193359375, 'dpo/margin': 1.1955870059673543, 'dph/chosen': 0.48209915233019274, 'dph/rejected': 0.48140868311020313, 'dph/accuracy': 0.86505126953125, 'dph/margin': 2.5008548987389077} +Hellaswag/choice={'log_accuracy': 0.5030870344552878, 'dph_accuracy': 0.5103565026887075} +Hellaswag/no_choice={'log_accuracy': 0.44433379804819756, 'dph_accuracy': 0.4938259310894244} +obqa/main={'log_accuracy': 0.528, 'dph_accuracy': 0.572} +winogrande/no_choice={'log_accuracy': 0.5666929755327546, 'dph_accuracy': 0.5564325177584846} +arc/ARC-Challenge={'log_accuracy': 0.411371237458194, 'dph_accuracy': 0.451505016722408} +arc/ARC-Easy={'log_accuracy': 0.5526315789473685, 'dph_accuracy': 0.5842105263157895} +super_glue/boolq={'log_accuracy': 0.789296636085627, 'dph_accuracy': 0.789908256880734} +piqa/no_choice={'log_accuracy': 0.7083786724700761, 'dph_accuracy': 0.6523394994559304} +GLUE/cola={'log_matthews_correlation': 0.44887349757367434, 'dph_matthews_correlation': 0.4758828120362657} +GLUE/mnli_matched={'log_accuracy': 0.7692307692307693, 'dph_accuracy': 0.7765664798777382} +GLUE/mnli_mismatched={'log_accuracy': 0.7891578519121237, 'dph_accuracy': 0.7897681041497152} +GLUE/mrpc={'log_accuracy': 0.7818627450980392, 'log_f1': 0.8402154398563735, 'dph_accuracy': 0.7794117647058824, 'dph_f1': 0.846938775510204} +GLUE/qnli={'log_accuracy': 0.8616144975288303, 'dph_accuracy': 0.861431447922387} +GLUE/qqp={'log_accuracy': 0.8426663368785555, 'log_f1': 0.7820903703196191, 'dph_accuracy': 0.8465990601038833, 'dph_f1': 0.7963887065003283} +GLUE/rte={'log_accuracy': 0.7653429602888087, 'dph_accuracy': 0.7617328519855595} +GLUE/sst2={'log_accuracy': 0.9162844036697247, 'dph_accuracy': 0.9151376146788991} +GLUE/stsb={'log_pearson': 0.8618353816212836, 'log_spearmanr': 0.8633237956265487, 'dph_pearson': 0.8770369370028187, 'dph_spearmanr': 0.8770361500907172} +GLUE/wnli={'log_accuracy': 0.4084507042253521, 'dph_accuracy': 0.4084507042253521} +race/middle={'log_accuracy': 0.628133704735376, 'dph_accuracy': 0.6162952646239555} +race/high={'log_accuracy': 0.5757751376412634, 'dph_accuracy': 0.5685308606201102} + +Step=6144 +Train={'loss_dpo': 0.8091267695999704, 'loss_dph': 0.9582598769193282, 'dpo/chosen': 0.2719304892996206, 'dpo/rejected': -0.9305797159668145, 'dpo/accuracy': 0.80963134765625, 'dpo/margin': 1.2025102057423283, 'dph/chosen': 0.4795740483823465, 'dph/rejected': 0.4786858284205664, 'dph/accuracy': 0.866851806640625, 'dph/margin': 2.5331677835201845} +Hellaswag/choice={'log_accuracy': 0.5033857797251543, 'dph_accuracy': 0.5167297351125274} +Hellaswag/no_choice={'log_accuracy': 0.44284007169886475, 'dph_accuracy': 0.5060744871539534} +obqa/main={'log_accuracy': 0.538, 'dph_accuracy': 0.564} +winogrande/no_choice={'log_accuracy': 0.5659037095501184, 'dph_accuracy': 0.5651144435674822} +arc/ARC-Challenge={'log_accuracy': 0.4013377926421405, 'dph_accuracy': 0.4214046822742475} +arc/ARC-Easy={'log_accuracy': 0.5421052631578948, 'dph_accuracy': 0.5701754385964912} +super_glue/boolq={'log_accuracy': 0.7865443425076453, 'dph_accuracy': 0.7828746177370031} +piqa/no_choice={'log_accuracy': 0.7165397170837867, 'dph_accuracy': 0.6572361262241567} +GLUE/cola={'log_matthews_correlation': 0.4155100874459552, 'dph_matthews_correlation': 0.4668770386077432} +GLUE/mnli_matched={'log_accuracy': 0.7713703515028019, 'dph_accuracy': 0.7749363219561896} +GLUE/mnli_mismatched={'log_accuracy': 0.7902766476810414, 'dph_accuracy': 0.7880390561432059} +GLUE/mrpc={'log_accuracy': 0.7941176470588235, 'log_f1': 0.8472727272727272, 'dph_accuracy': 0.7867647058823529, 'dph_f1': 0.8481675392670157} +GLUE/qnli={'log_accuracy': 0.8652754896576972, 'dph_accuracy': 0.866739886509244} +GLUE/qqp={'log_accuracy': 0.8452386841454366, 'log_f1': 0.7971733281467795, 'dph_accuracy': 0.8417759089784813, 'dph_f1': 0.8032721345757603} +GLUE/rte={'log_accuracy': 0.7581227436823105, 'dph_accuracy': 0.7581227436823105} +GLUE/sst2={'log_accuracy': 0.9208715596330275, 'dph_accuracy': 0.9185779816513762} +GLUE/stsb={'log_pearson': 0.8608639069469103, 'log_spearmanr': 0.861055201351945, 'dph_pearson': 0.8790003928995509, 'dph_spearmanr': 0.8785301317578216} +GLUE/wnli={'log_accuracy': 0.4084507042253521, 'dph_accuracy': 0.4507042253521127} +race/middle={'log_accuracy': 0.6183844011142061, 'dph_accuracy': 0.6246518105849582} +race/high={'log_accuracy': 0.5754853665604173, 'dph_accuracy': 0.5830194146624167} + +Step=6656 +Train={'loss_dpo': 0.8028534228214994, 'loss_dph': 0.9435320580087136, 'dpo/chosen': 0.3105322076975767, 'dpo/rejected': -0.896422611827802, 'dpo/accuracy': 0.814788818359375, 'dpo/margin': 1.2069548206527543, 'dph/chosen': 0.47239862076821737, 'dph/rejected': 0.47113343722594436, 'dph/accuracy': 0.87457275390625, 'dph/margin': 2.6209525984013453} +Hellaswag/choice={'log_accuracy': 0.49661422027484564, 'dph_accuracy': 0.5108544114718183} +Hellaswag/no_choice={'log_accuracy': 0.4439354710217088, 'dph_accuracy': 0.5146385182234615} +obqa/main={'log_accuracy': 0.526, 'dph_accuracy': 0.552} +winogrande/no_choice={'log_accuracy': 0.574585635359116, 'dph_accuracy': 0.55327545382794} +arc/ARC-Challenge={'log_accuracy': 0.40468227424749165, 'dph_accuracy': 0.431438127090301} +arc/ARC-Easy={'log_accuracy': 0.5561403508771929, 'dph_accuracy': 0.5807017543859649} +super_glue/boolq={'log_accuracy': 0.7865443425076453, 'dph_accuracy': 0.7837920489296636} +piqa/no_choice={'log_accuracy': 0.7154515778019587, 'dph_accuracy': 0.6583242655059848} +GLUE/cola={'log_matthews_correlation': 0.4354313415465737, 'dph_matthews_correlation': 0.5254536736633585} +GLUE/mnli_matched={'log_accuracy': 0.789302088639837, 'dph_accuracy': 0.7970453387671931} +GLUE/mnli_mismatched={'log_accuracy': 0.8036004882017901, 'dph_accuracy': 0.8015663140764849} +GLUE/mrpc={'log_accuracy': 0.8112745098039216, 'log_f1': 0.863716814159292, 'dph_accuracy': 0.7843137254901961, 'dph_f1': 0.847750865051903} +GLUE/qnli={'log_accuracy': 0.8704008786381109, 'dph_accuracy': 0.8652754896576972} +GLUE/qqp={'log_accuracy': 0.8412812268117734, 'log_f1': 0.7738661592134474, 'dph_accuracy': 0.8404155330200346, 'dph_f1': 0.7682804194799597} +GLUE/rte={'log_accuracy': 0.7545126353790613, 'dph_accuracy': 0.776173285198556} +GLUE/sst2={'log_accuracy': 0.9151376146788991, 'dph_accuracy': 0.9128440366972477} +GLUE/stsb={'log_pearson': 0.8606786355311589, 'log_spearmanr': 0.861791451277489, 'dph_pearson': 0.8780838485274596, 'dph_spearmanr': 0.8779996187376041} +GLUE/wnli={'log_accuracy': 0.39436619718309857, 'dph_accuracy': 0.5070422535211268} +race/middle={'log_accuracy': 0.6267409470752089, 'dph_accuracy': 0.6239554317548747} +race/high={'log_accuracy': 0.5818603303390322, 'dph_accuracy': 0.5801217038539553} + +Step=7168 +Train={'loss_dpo': 0.7831187903502723, 'loss_dph': 0.9438100070256041, 'dpo/chosen': 0.29278807562968723, 'dpo/rejected': -0.8826393843996811, 'dpo/accuracy': 0.8172607421875, 'dpo/margin': 1.1754274584973246, 'dph/chosen': 0.47318637008720543, 'dph/rejected': 0.4706236371130217, 'dph/accuracy': 0.872222900390625, 'dph/margin': 2.616220823052572} +Hellaswag/choice={'log_accuracy': 0.5029874526986656, 'dph_accuracy': 0.5191196972714599} +Hellaswag/no_choice={'log_accuracy': 0.44343756223859787, 'dph_accuracy': 0.5211113324039036} +obqa/main={'log_accuracy': 0.54, 'dph_accuracy': 0.56} +winogrande/no_choice={'log_accuracy': 0.5706393054459353, 'dph_accuracy': 0.5588003157063931} +arc/ARC-Challenge={'log_accuracy': 0.411371237458194, 'dph_accuracy': 0.43478260869565216} +arc/ARC-Easy={'log_accuracy': 0.5578947368421052, 'dph_accuracy': 0.5912280701754385} +super_glue/boolq={'log_accuracy': 0.7929663608562691, 'dph_accuracy': 0.7880733944954128} +piqa/no_choice={'log_accuracy': 0.720348204570185, 'dph_accuracy': 0.6599564744287268} +GLUE/cola={'log_matthews_correlation': 0.412472313849393, 'dph_matthews_correlation': 0.46458665505165037} +GLUE/mnli_matched={'log_accuracy': 0.7954151808456444, 'dph_accuracy': 0.7968415690269995} +GLUE/mnli_mismatched={'log_accuracy': 0.807160292921074, 'dph_accuracy': 0.7964808787632222} +GLUE/mrpc={'log_accuracy': 0.8235294117647058, 'log_f1': 0.8727915194346291, 'dph_accuracy': 0.7916666666666666, 'dph_f1': 0.8521739130434783} +GLUE/qnli={'log_accuracy': 0.866739886509244, 'dph_accuracy': 0.8718652754896576} +GLUE/qqp={'log_accuracy': 0.8449171407370765, 'log_f1': 0.7984052472509807, 'dph_accuracy': 0.8490230027207519, 'dph_f1': 0.8072502210433246} +GLUE/rte={'log_accuracy': 0.7653429602888087, 'dph_accuracy': 0.7617328519855595} +GLUE/sst2={'log_accuracy': 0.911697247706422, 'dph_accuracy': 0.9128440366972477} +GLUE/stsb={'log_pearson': 0.8618875810841379, 'log_spearmanr': 0.86132288781534, 'dph_pearson': 0.8679917966482774, 'dph_spearmanr': 0.8718922786378344} +GLUE/wnli={'log_accuracy': 0.39436619718309857, 'dph_accuracy': 0.4647887323943662} +race/middle={'log_accuracy': 0.6274373259052924, 'dph_accuracy': 0.621866295264624} +race/high={'log_accuracy': 0.5821501014198783, 'dph_accuracy': 0.5754853665604173} + +Step=7680 +Train={'loss_dpo': 0.7925912706414238, 'loss_dph': 0.9376877181057353, 'dpo/chosen': 0.19569723543696682, 'dpo/rejected': -0.9970300289623992, 'dpo/accuracy': 0.816375732421875, 'dpo/margin': 1.1927272670018283, 'dph/chosen': 0.46895182636217214, 'dph/rejected': 0.46873589145980077, 'dph/accuracy': 0.875457763671875, 'dph/margin': 2.656967250222806} +Hellaswag/choice={'log_accuracy': 0.5027882891854212, 'dph_accuracy': 0.5266879107747461} +Hellaswag/no_choice={'log_accuracy': 0.444035052778331, 'dph_accuracy': 0.5219079864568811} +obqa/main={'log_accuracy': 0.538, 'dph_accuracy': 0.558} +winogrande/no_choice={'log_accuracy': 0.5722178374112076, 'dph_accuracy': 0.5659037095501184} +arc/ARC-Challenge={'log_accuracy': 0.4080267558528428, 'dph_accuracy': 0.42474916387959866} +arc/ARC-Easy={'log_accuracy': 0.5508771929824562, 'dph_accuracy': 0.5771929824561404} +super_glue/boolq={'log_accuracy': 0.7877675840978593, 'dph_accuracy': 0.7828746177370031} +piqa/no_choice={'log_accuracy': 0.7181719260065288, 'dph_accuracy': 0.6583242655059848} +GLUE/cola={'log_matthews_correlation': 0.4007160945968504, 'dph_matthews_correlation': 0.4603816765153755} +GLUE/mnli_matched={'log_accuracy': 0.7822720326031585, 'dph_accuracy': 0.7931737137035151} +GLUE/mnli_mismatched={'log_accuracy': 0.7991253051261188, 'dph_accuracy': 0.8022782750203418} +GLUE/mrpc={'log_accuracy': 0.7965686274509803, 'log_f1': 0.8546409807355517, 'dph_accuracy': 0.7843137254901961, 'dph_f1': 0.8503401360544217} +GLUE/qnli={'log_accuracy': 0.8696686802123376, 'dph_accuracy': 0.8724144243089877} +GLUE/qqp={'log_accuracy': 0.8451150136037596, 'log_f1': 0.7864110785183163, 'dph_accuracy': 0.8491961414790997, 'dph_f1': 0.7938740322526117} +GLUE/rte={'log_accuracy': 0.7617328519855595, 'dph_accuracy': 0.7581227436823105} +GLUE/sst2={'log_accuracy': 0.9185779816513762, 'dph_accuracy': 0.9174311926605505} +GLUE/stsb={'log_pearson': 0.862385199291965, 'log_spearmanr': 0.8638037450526963, 'dph_pearson': 0.8800436003020474, 'dph_spearmanr': 0.8798616197942368} +GLUE/wnli={'log_accuracy': 0.39436619718309857, 'dph_accuracy': 0.38028169014084506} +race/middle={'log_accuracy': 0.6316155988857939, 'dph_accuracy': 0.6434540389972145} +race/high={'log_accuracy': 0.592581860330339, 'dph_accuracy': 0.5934511735728775} + +Step=8192 +Train={'loss_dpo': 0.8001744251087075, 'loss_dph': 0.9345945720997406, 'dpo/chosen': 0.2738910221062838, 'dpo/rejected': -0.9147949151965804, 'dpo/accuracy': 0.8192138671875, 'dpo/margin': 1.1886859362557516, 'dph/chosen': 0.46818528808944393, 'dph/rejected': 0.4664092838502256, 'dph/accuracy': 0.876129150390625, 'dph/margin': 2.6778990509628784} +Hellaswag/choice={'log_accuracy': 0.5171280621390162, 'dph_accuracy': 0.5359490141406095} +Hellaswag/no_choice={'log_accuracy': 0.4448317068313085, 'dph_accuracy': 0.5332603067118104} +obqa/main={'log_accuracy': 0.518, 'dph_accuracy': 0.546} +winogrande/no_choice={'log_accuracy': 0.574585635359116, 'dph_accuracy': 0.5580110497237569} +arc/ARC-Challenge={'log_accuracy': 0.42474916387959866, 'dph_accuracy': 0.44481605351170567} +arc/ARC-Easy={'log_accuracy': 0.5684210526315789, 'dph_accuracy': 0.5807017543859649} +super_glue/boolq={'log_accuracy': 0.7920489296636085, 'dph_accuracy': 0.7880733944954128} +piqa/no_choice={'log_accuracy': 0.720892274211099, 'dph_accuracy': 0.6632208922742111} +GLUE/cola={'log_matthews_correlation': 0.4709720622741428, 'dph_matthews_correlation': 0.5152625931494} +GLUE/mnli_matched={'log_accuracy': 0.776158940397351, 'dph_accuracy': 0.7855323484462557} +GLUE/mnli_mismatched={'log_accuracy': 0.7942432872253865, 'dph_accuracy': 0.798006509357201} +GLUE/mrpc={'log_accuracy': 0.7867647058823529, 'log_f1': 0.8443649373881931, 'dph_accuracy': 0.7941176470588235, 'dph_f1': 0.8541666666666666} +GLUE/qnli={'log_accuracy': 0.8725974739154311, 'dph_accuracy': 0.8725974739154311} +GLUE/qqp={'log_accuracy': 0.8467227306455603, 'log_f1': 0.7915503380537522, 'dph_accuracy': 0.8483551817956962, 'dph_f1': 0.7920496557338127} +GLUE/rte={'log_accuracy': 0.7581227436823105, 'dph_accuracy': 0.7653429602888087} +GLUE/sst2={'log_accuracy': 0.9162844036697247, 'dph_accuracy': 0.9162844036697247} +GLUE/stsb={'log_pearson': 0.8606254690598769, 'log_spearmanr': 0.8623091708927394, 'dph_pearson': 0.8786603032928515, 'dph_spearmanr': 0.8783346604080788} +GLUE/wnli={'log_accuracy': 0.39436619718309857, 'dph_accuracy': 0.4788732394366197} +race/middle={'log_accuracy': 0.6420612813370473, 'dph_accuracy': 0.6302228412256268} +race/high={'log_accuracy': 0.5893943784410316, 'dph_accuracy': 0.5905534627644161} + +Step=8704 +Train={'loss_dpo': 0.7765085079881828, 'loss_dph': 0.9308028246450704, 'dpo/chosen': 0.3282755390167722, 'dpo/rejected': -0.8493277809088795, 'dpo/accuracy': 0.820281982421875, 'dpo/margin': 1.1776033176956844, 'dph/chosen': 0.4666375823071576, 'dph/rejected': 0.4641652422287734, 'dph/accuracy': 0.881256103515625, 'dph/margin': 2.6918290476023685} +Hellaswag/choice={'log_accuracy': 0.5143397729535949, 'dph_accuracy': 0.5339573790081658} +Hellaswag/no_choice={'log_accuracy': 0.44503087034455285, 'dph_accuracy': 0.5361481776538538} +obqa/main={'log_accuracy': 0.542, 'dph_accuracy': 0.564} +winogrande/no_choice={'log_accuracy': 0.579321231254933, 'dph_accuracy': 0.56353591160221} +arc/ARC-Challenge={'log_accuracy': 0.4180602006688963, 'dph_accuracy': 0.431438127090301} +arc/ARC-Easy={'log_accuracy': 0.5543859649122806, 'dph_accuracy': 0.5877192982456141} +super_glue/boolq={'log_accuracy': 0.7874617737003058, 'dph_accuracy': 0.7862385321100918} +piqa/no_choice={'log_accuracy': 0.7176278563656148, 'dph_accuracy': 0.6751904243743199} +GLUE/cola={'log_matthews_correlation': 0.4637527852072172, 'dph_matthews_correlation': 0.4978941920618971} +GLUE/mnli_matched={'log_accuracy': 0.7816607233825776, 'dph_accuracy': 0.7909322465613856} +GLUE/mnli_mismatched={'log_accuracy': 0.7982099267697315, 'dph_accuracy': 0.8007526444263628} +GLUE/mrpc={'log_accuracy': 0.7965686274509803, 'log_f1': 0.8482632541133455, 'dph_accuracy': 0.7843137254901961, 'dph_f1': 0.8434163701067616} +GLUE/qnli={'log_accuracy': 0.8680212337543475, 'dph_accuracy': 0.8691195313930075} +GLUE/qqp={'log_accuracy': 0.845584961662132, 'log_f1': 0.7923499085315151, 'dph_accuracy': 0.8511006678209251, 'dph_f1': 0.8060192047431849} +GLUE/rte={'log_accuracy': 0.7545126353790613, 'dph_accuracy': 0.7653429602888087} +GLUE/sst2={'log_accuracy': 0.908256880733945, 'dph_accuracy': 0.9174311926605505} +GLUE/stsb={'log_pearson': 0.8619761295684436, 'log_spearmanr': 0.8635733940413887, 'dph_pearson': 0.8743982079721565, 'dph_spearmanr': 0.8754015942195833} +GLUE/wnli={'log_accuracy': 0.4084507042253521, 'dph_accuracy': 0.4647887323943662} +race/middle={'log_accuracy': 0.6469359331476323, 'dph_accuracy': 0.6427576601671309} +race/high={'log_accuracy': 0.5975079687047232, 'dph_accuracy': 0.5983772819472617} + +Step=9216 +Train={'loss_dpo': 0.7582050216587959, 'loss_dph': 0.92387313155632, 'dpo/chosen': 0.26634070124958953, 'dpo/rejected': -0.8677257980461945, 'dpo/accuracy': 0.8260498046875, 'dpo/margin': 1.1340664987656055, 'dph/chosen': 0.4626133128622314, 'dph/rejected': 0.4612598184176022, 'dph/accuracy': 0.884735107421875, 'dph/margin': 2.736708053620532} +Hellaswag/choice={'log_accuracy': 0.5277833100975902, 'dph_accuracy': 0.527185819557857} +Hellaswag/no_choice={'log_accuracy': 0.44602668791077477, 'dph_accuracy': 0.5399322844054969} +obqa/main={'log_accuracy': 0.516, 'dph_accuracy': 0.552} +winogrande/no_choice={'log_accuracy': 0.5761641673243884, 'dph_accuracy': 0.580110497237569} +arc/ARC-Challenge={'log_accuracy': 0.40468227424749165, 'dph_accuracy': 0.42474916387959866} +arc/ARC-Easy={'log_accuracy': 0.5701754385964912, 'dph_accuracy': 0.5877192982456141} +super_glue/boolq={'log_accuracy': 0.7862385321100918, 'dph_accuracy': 0.7868501529051988} +piqa/no_choice={'log_accuracy': 0.7127312295973884, 'dph_accuracy': 0.6713819368879217} +GLUE/cola={'log_matthews_correlation': 0.42727410372514657, 'dph_matthews_correlation': 0.4879260254793011} +GLUE/mnli_matched={'log_accuracy': 0.7890983188996434, 'dph_accuracy': 0.7960264900662252} +GLUE/mnli_mismatched={'log_accuracy': 0.8003458096013019, 'dph_accuracy': 0.8006509357200976} +GLUE/mrpc={'log_accuracy': 0.803921568627451, 'log_f1': 0.861111111111111, 'dph_accuracy': 0.8014705882352942, 'dph_f1': 0.8615384615384616} +GLUE/qnli={'log_accuracy': 0.8663737872963573, 'dph_accuracy': 0.8742449203734212} +GLUE/qqp={'log_accuracy': 0.846895869403908, 'log_f1': 0.801118108212312, 'dph_accuracy': 0.851348008904279, 'dph_f1': 0.8047179620483492} +GLUE/rte={'log_accuracy': 0.7617328519855595, 'dph_accuracy': 0.7617328519855595} +GLUE/sst2={'log_accuracy': 0.9208715596330275, 'dph_accuracy': 0.9162844036697247} +GLUE/stsb={'log_pearson': 0.8681492318925769, 'log_spearmanr': 0.8686811217438519, 'dph_pearson': 0.8808030721180241, 'dph_spearmanr': 0.879616576623286} +GLUE/wnli={'log_accuracy': 0.39436619718309857, 'dph_accuracy': 0.4647887323943662} +race/middle={'log_accuracy': 0.6344011142061281, 'dph_accuracy': 0.6455431754874652} +race/high={'log_accuracy': 0.592581860330339, 'dph_accuracy': 0.6012749927557229} + +Step=9728 +Train={'loss_dpo': 0.754898228609818, 'loss_dph': 0.9198042459756834, 'dpo/chosen': 0.18741152847258036, 'dpo/rejected': -0.9094864508992941, 'dpo/accuracy': 0.827728271484375, 'dpo/margin': 1.096897980229187, 'dph/chosen': 0.461053960796562, 'dph/rejected': 0.45875028474256396, 'dph/accuracy': 0.8848876953125, 'dph/margin': 2.760382961947471} +Hellaswag/choice={'log_accuracy': 0.522405895239992, 'dph_accuracy': 0.5465046803425613} +Hellaswag/no_choice={'log_accuracy': 0.44632543318064133, 'dph_accuracy': 0.5471021708822944} +obqa/main={'log_accuracy': 0.524, 'dph_accuracy': 0.552} +winogrande/no_choice={'log_accuracy': 0.5706393054459353, 'dph_accuracy': 0.5769534333070244} +arc/ARC-Challenge={'log_accuracy': 0.3979933110367893, 'dph_accuracy': 0.44481605351170567} +arc/ARC-Easy={'log_accuracy': 0.5473684210526316, 'dph_accuracy': 0.5824561403508772} +super_glue/boolq={'log_accuracy': 0.791131498470948, 'dph_accuracy': 0.7883792048929663} +piqa/no_choice={'log_accuracy': 0.7154515778019587, 'dph_accuracy': 0.6648531011969532} +GLUE/cola={'log_matthews_correlation': 0.428353373191974, 'dph_matthews_correlation': 0.4863630663948402} +GLUE/mnli_matched={'log_accuracy': 0.7890983188996434, 'dph_accuracy': 0.7966377992868059} +GLUE/mnli_mismatched={'log_accuracy': 0.8039056143205858, 'dph_accuracy': 0.8064483319772172} +GLUE/mrpc={'log_accuracy': 0.7867647058823529, 'log_f1': 0.8367729831144465, 'dph_accuracy': 0.7622549019607843, 'dph_f1': 0.8213627992633518} +GLUE/qnli={'log_accuracy': 0.8702178290316676, 'dph_accuracy': 0.8753432180120813} +GLUE/qqp={'log_accuracy': 0.8474152856789513, 'log_f1': 0.7909734693186052, 'dph_accuracy': 0.8501855058125155, 'dph_f1': 0.7944898720863162} +GLUE/rte={'log_accuracy': 0.7617328519855595, 'dph_accuracy': 0.7617328519855595} +GLUE/sst2={'log_accuracy': 0.9277522935779816, 'dph_accuracy': 0.9220183486238532} +GLUE/stsb={'log_pearson': 0.8597632868561446, 'log_spearmanr': 0.8613003457771866, 'dph_pearson': 0.8754364573332784, 'dph_spearmanr': 0.8758307707682753} +GLUE/wnli={'log_accuracy': 0.38028169014084506, 'dph_accuracy': 0.4507042253521127} +race/middle={'log_accuracy': 0.6420612813370473, 'dph_accuracy': 0.6392757660167131} +race/high={'log_accuracy': 0.5972181976238772, 'dph_accuracy': 0.6033033903216459} + +Step=10240 +Train={'loss_dpo': 0.7374241108627757, 'loss_dph': 0.921672235999722, 'dpo/chosen': 0.14391024177160716, 'dpo/rejected': -0.9341468750717468, 'dpo/accuracy': 0.83465576171875, 'dpo/margin': 1.0780571173509088, 'dph/chosen': 0.4619145220203791, 'dph/rejected': 0.45975771332450677, 'dph/accuracy': 0.88275146484375, 'dph/margin': 2.7469292909809155} +Hellaswag/choice={'log_accuracy': 0.5170284803823939, 'dph_accuracy': 0.5481975702051384} +Hellaswag/no_choice={'log_accuracy': 0.4455287791276638, 'dph_accuracy': 0.5473013343955387} +obqa/main={'log_accuracy': 0.522, 'dph_accuracy': 0.562} +winogrande/no_choice={'log_accuracy': 0.5730071033938438, 'dph_accuracy': 0.579321231254933} +arc/ARC-Challenge={'log_accuracy': 0.40468227424749165, 'dph_accuracy': 0.43812709030100333} +arc/ARC-Easy={'log_accuracy': 0.5403508771929825, 'dph_accuracy': 0.5736842105263158} +super_glue/boolq={'log_accuracy': 0.7902140672782875, 'dph_accuracy': 0.7831804281345566} +piqa/no_choice={'log_accuracy': 0.7159956474428727, 'dph_accuracy': 0.6626768226332971} +GLUE/cola={'log_matthews_correlation': 0.40629511747030483, 'dph_matthews_correlation': 0.49844602227653045} +GLUE/mnli_matched={'log_accuracy': 0.7833927661742232, 'dph_accuracy': 0.7959246051961284} +GLUE/mnli_mismatched={'log_accuracy': 0.7997355573637104, 'dph_accuracy': 0.8069568755085436} +GLUE/mrpc={'log_accuracy': 0.7990196078431373, 'log_f1': 0.8551236749116607, 'dph_accuracy': 0.7794117647058824, 'dph_f1': 0.841549295774648} +GLUE/qnli={'log_accuracy': 0.8665568369028006, 'dph_accuracy': 0.8746110195863079} +GLUE/qqp={'log_accuracy': 0.8484788523373732, 'log_f1': 0.7948288565878492, 'dph_accuracy': 0.8509027949542419, 'dph_f1': 0.7949799333378682} +GLUE/rte={'log_accuracy': 0.7653429602888087, 'dph_accuracy': 0.7725631768953068} +GLUE/sst2={'log_accuracy': 0.9162844036697247, 'dph_accuracy': 0.9185779816513762} +GLUE/stsb={'log_pearson': 0.8622043464072165, 'log_spearmanr': 0.8621732463411537, 'dph_pearson': 0.8764532871797044, 'dph_spearmanr': 0.8759012760579783} +GLUE/wnli={'log_accuracy': 0.39436619718309857, 'dph_accuracy': 0.4788732394366197} +race/middle={'log_accuracy': 0.6420612813370473, 'dph_accuracy': 0.6483286908077994} +race/high={'log_accuracy': 0.5975079687047232, 'dph_accuracy': 0.6085192697768763} + +Step=10752 +Train={'loss_dpo': 0.7452446562820114, 'loss_dph': 0.9156605976604624, 'dpo/chosen': 0.1450789462444959, 'dpo/rejected': -0.9333578770147142, 'dpo/accuracy': 0.8321533203125, 'dpo/margin': 1.0784368245640508, 'dph/chosen': 0.4585961289849365, 'dph/rejected': 0.4570644686173182, 'dph/accuracy': 0.886810302734375, 'dph/margin': 2.781100612715818} +Hellaswag/choice={'log_accuracy': 0.5389364668392751, 'dph_accuracy': 0.5329615614419438} +Hellaswag/no_choice={'log_accuracy': 0.44612626966739694, 'dph_accuracy': 0.5584544911372237} +obqa/main={'log_accuracy': 0.526, 'dph_accuracy': 0.558} +winogrande/no_choice={'log_accuracy': 0.5816890292028414, 'dph_accuracy': 0.5706393054459353} +arc/ARC-Challenge={'log_accuracy': 0.41471571906354515, 'dph_accuracy': 0.43812709030100333} +arc/ARC-Easy={'log_accuracy': 0.5368421052631579, 'dph_accuracy': 0.5701754385964912} +super_glue/boolq={'log_accuracy': 0.791131498470948, 'dph_accuracy': 0.7926605504587156} +piqa/no_choice={'log_accuracy': 0.7138193688792165, 'dph_accuracy': 0.6708378672470077} +GLUE/cola={'log_matthews_correlation': 0.44229349485124575, 'dph_matthews_correlation': 0.49354349027315053} +GLUE/mnli_matched={'log_accuracy': 0.7958227203260316, 'dph_accuracy': 0.8050942435048395} +GLUE/mnli_mismatched={'log_accuracy': 0.8099064279902359, 'dph_accuracy': 0.8091944670463792} +GLUE/mrpc={'log_accuracy': 0.803921568627451, 'log_f1': 0.86013986013986, 'dph_accuracy': 0.8014705882352942, 'dph_f1': 0.8624787775891342} +GLUE/qnli={'log_accuracy': 0.8546586124839831, 'dph_accuracy': 0.8596009518579535} +GLUE/qqp={'log_accuracy': 0.849468216670789, 'log_f1': 0.8011630946157867, 'dph_accuracy': 0.8513727430126143, 'dph_f1': 0.8104476199488974} +GLUE/rte={'log_accuracy': 0.776173285198556, 'dph_accuracy': 0.7725631768953068} +GLUE/sst2={'log_accuracy': 0.9208715596330275, 'dph_accuracy': 0.9185779816513762} +GLUE/stsb={'log_pearson': 0.8573240687930389, 'log_spearmanr': 0.8585299721004369, 'dph_pearson': 0.8776241502838167, 'dph_spearmanr': 0.8781930350011881} +GLUE/wnli={'log_accuracy': 0.38028169014084506, 'dph_accuracy': 0.4507042253521127} +race/middle={'log_accuracy': 0.6448467966573816, 'dph_accuracy': 0.6497214484679665} +race/high={'log_accuracy': 0.5972181976238772, 'dph_accuracy': 0.6009852216748769} + +Step=11264 +Train={'loss_dpo': 0.7381142137892311, 'loss_dph': 0.9176163975498639, 'dpo/chosen': 0.17663252297012377, 'dpo/rejected': -0.9085602695467969, 'dpo/accuracy': 0.83380126953125, 'dpo/margin': 1.0851927923777112, 'dph/chosen': 0.4600481659945217, 'dph/rejected': 0.45756823167175753, 'dph/accuracy': 0.889373779296875, 'dph/margin': 2.764136515223072} +Hellaswag/choice={'log_accuracy': 0.5399322844054969, 'dph_accuracy': 0.5454092810197172} +Hellaswag/no_choice={'log_accuracy': 0.44602668791077477, 'dph_accuracy': 0.5593507269468233} +obqa/main={'log_accuracy': 0.534, 'dph_accuracy': 0.562} +winogrande/no_choice={'log_accuracy': 0.5769534333070244, 'dph_accuracy': 0.5824782951854776} +arc/ARC-Challenge={'log_accuracy': 0.41471571906354515, 'dph_accuracy': 0.44481605351170567} +arc/ARC-Easy={'log_accuracy': 0.5280701754385965, 'dph_accuracy': 0.5807017543859649} +super_glue/boolq={'log_accuracy': 0.7932721712538227, 'dph_accuracy': 0.7874617737003058} +piqa/no_choice={'log_accuracy': 0.7170837867247007, 'dph_accuracy': 0.6773667029379761} +GLUE/cola={'log_matthews_correlation': 0.39465924214904374, 'dph_matthews_correlation': 0.45331260928311407} +GLUE/mnli_matched={'log_accuracy': 0.7970453387671931, 'dph_accuracy': 0.801018848700968} +GLUE/mnli_mismatched={'log_accuracy': 0.8078722538649309, 'dph_accuracy': 0.8050244100895037} +GLUE/mrpc={'log_accuracy': 0.7965686274509803, 'log_f1': 0.8499095840867993, 'dph_accuracy': 0.7769607843137255, 'dph_f1': 0.8389380530973451} +GLUE/qnli={'log_accuracy': 0.865641588870584, 'dph_accuracy': 0.867289035328574} +GLUE/qqp={'log_accuracy': 0.8505812515458818, 'log_f1': 0.7977095402337342, 'dph_accuracy': 0.8536730150878061, 'dph_f1': 0.8071833648393194} +GLUE/rte={'log_accuracy': 0.776173285198556, 'dph_accuracy': 0.7545126353790613} +GLUE/sst2={'log_accuracy': 0.9162844036697247, 'dph_accuracy': 0.9139908256880734} +GLUE/stsb={'log_pearson': 0.8591736944111781, 'log_spearmanr': 0.8598794033597081, 'dph_pearson': 0.8761374764232328, 'dph_spearmanr': 0.8765831812012714} +GLUE/wnli={'log_accuracy': 0.39436619718309857, 'dph_accuracy': 0.4647887323943662} +race/middle={'log_accuracy': 0.6392757660167131, 'dph_accuracy': 0.6559888579387186} +race/high={'log_accuracy': 0.5989568241089539, 'dph_accuracy': 0.6021443059982614} + +Step=11776 +Train={'loss_dpo': 0.7244887221313547, 'loss_dph': 0.9086774949973915, 'dpo/chosen': 0.14853365463454793, 'dpo/rejected': -0.9187837448371283, 'dpo/accuracy': 0.840087890625, 'dpo/margin': 1.0673174008461501, 'dph/chosen': 0.45528175501385704, 'dph/rejected': 0.45339573936507804, 'dph/accuracy': 0.893035888671875, 'dph/margin': 2.8206882230297197} +Hellaswag/choice={'log_accuracy': 0.5355506871141207, 'dph_accuracy': 0.5522804222266481} +Hellaswag/no_choice={'log_accuracy': 0.4448317068313085, 'dph_accuracy': 0.5595498904600678} +obqa/main={'log_accuracy': 0.542, 'dph_accuracy': 0.564} +winogrande/no_choice={'log_accuracy': 0.5722178374112076, 'dph_accuracy': 0.584846093133386} +arc/ARC-Challenge={'log_accuracy': 0.4080267558528428, 'dph_accuracy': 0.4414715719063545} +arc/ARC-Easy={'log_accuracy': 0.5350877192982456, 'dph_accuracy': 0.5719298245614035} +super_glue/boolq={'log_accuracy': 0.7877675840978593, 'dph_accuracy': 0.7896024464831805} +piqa/no_choice={'log_accuracy': 0.720892274211099, 'dph_accuracy': 0.6719260065288357} +GLUE/cola={'log_matthews_correlation': 0.43591443951045983, 'dph_matthews_correlation': 0.4755590351385187} +GLUE/mnli_matched={'log_accuracy': 0.7960264900662252, 'dph_accuracy': 0.7928680590932247} +GLUE/mnli_mismatched={'log_accuracy': 0.8085842148087876, 'dph_accuracy': 0.798413344182262} +GLUE/mrpc={'log_accuracy': 0.7990196078431373, 'log_f1': 0.850909090909091, 'dph_accuracy': 0.7818627450980392, 'dph_f1': 0.8407871198568874} +GLUE/qnli={'log_accuracy': 0.8740618707669778, 'dph_accuracy': 0.8749771187991946} +GLUE/qqp={'log_accuracy': 0.8481573089290131, 'log_f1': 0.7986619002328555, 'dph_accuracy': 0.8542418995795201, 'dph_f1': 0.8055693028473391} +GLUE/rte={'log_accuracy': 0.7689530685920578, 'dph_accuracy': 0.7653429602888087} +GLUE/sst2={'log_accuracy': 0.9220183486238532, 'dph_accuracy': 0.9185779816513762} +GLUE/stsb={'log_pearson': 0.8601702720057525, 'log_spearmanr': 0.8598605307852526, 'dph_pearson': 0.8707387014341804, 'dph_spearmanr': 0.8738822739438563} +GLUE/wnli={'log_accuracy': 0.4084507042253521, 'dph_accuracy': 0.4507042253521127} +race/middle={'log_accuracy': 0.6441504178272981, 'dph_accuracy': 0.6497214484679665} +race/high={'log_accuracy': 0.6024340770791075, 'dph_accuracy': 0.6117067516661837} + +Step=12288 +Train={'loss_dpo': 0.7170963930984726, 'loss_dph': 0.9078048044611933, 'dpo/chosen': 0.13217101945593868, 'dpo/rejected': -0.9235607889258972, 'dpo/accuracy': 0.842132568359375, 'dpo/margin': 1.055731808022756, 'dph/chosen': 0.4556393689563265, 'dph/rejected': 0.4521654361014953, 'dph/accuracy': 0.891754150390625, 'dph/margin': 2.8246371439890936} +Hellaswag/choice={'log_accuracy': 0.5397331208922526, 'dph_accuracy': 0.5593507269468233} +Hellaswag/no_choice={'log_accuracy': 0.4457279426409082, 'dph_accuracy': 0.5651264688309102} +obqa/main={'log_accuracy': 0.554, 'dph_accuracy': 0.568} +winogrande/no_choice={'log_accuracy': 0.5682715074980268, 'dph_accuracy': 0.574585635359116} +arc/ARC-Challenge={'log_accuracy': 0.41471571906354515, 'dph_accuracy': 0.431438127090301} +arc/ARC-Easy={'log_accuracy': 0.5421052631578948, 'dph_accuracy': 0.5649122807017544} +super_glue/boolq={'log_accuracy': 0.7831804281345566, 'dph_accuracy': 0.7877675840978593} +piqa/no_choice={'log_accuracy': 0.7187159956474428, 'dph_accuracy': 0.6773667029379761} +GLUE/cola={'log_matthews_correlation': 0.45705343141128263, 'dph_matthews_correlation': 0.5148368022957431} +GLUE/mnli_matched={'log_accuracy': 0.7913397860417728, 'dph_accuracy': 0.7990830361691289} +GLUE/mnli_mismatched={'log_accuracy': 0.8048209926769732, 'dph_accuracy': 0.8033970707892596} +GLUE/mrpc={'log_accuracy': 0.8063725490196079, 'log_f1': 0.8576576576576576, 'dph_accuracy': 0.7990196078431373, 'dph_f1': 0.8561403508771931} +GLUE/qnli={'log_accuracy': 0.8736957715540912, 'dph_accuracy': 0.8744279699798645} +GLUE/qqp={'log_accuracy': 0.843952510511996, 'log_f1': 0.7763162559829817, 'dph_accuracy': 0.8433588919119466, 'dph_f1': 0.7682681400709869} +GLUE/rte={'log_accuracy': 0.7653429602888087, 'dph_accuracy': 0.7545126353790613} +GLUE/sst2={'log_accuracy': 0.9105504587155964, 'dph_accuracy': 0.9048165137614679} +GLUE/stsb={'log_pearson': 0.862746693138591, 'log_spearmanr': 0.864273230293835, 'dph_pearson': 0.8782334252579084, 'dph_spearmanr': 0.878953066067983} +GLUE/wnli={'log_accuracy': 0.39436619718309857, 'dph_accuracy': 0.4647887323943662} +race/middle={'log_accuracy': 0.6399721448467967, 'dph_accuracy': 0.6566852367688022} +race/high={'log_accuracy': 0.5957693422196465, 'dph_accuracy': 0.6050420168067226} + +Step=12800 +Train={'loss_dpo': 0.7217660752212396, 'loss_dph': 0.9049211823148653, 'dpo/chosen': 0.1601009600137786, 'dpo/rejected': -0.905707640028595, 'dpo/accuracy': 0.844146728515625, 'dpo/margin': 1.0658085999457398, 'dph/chosen': 0.45394796747132204, 'dph/rejected': 0.45097321499633836, 'dph/accuracy': 0.892364501953125, 'dph/margin': 2.8445376286108512} +Hellaswag/choice={'log_accuracy': 0.5359490141406095, 'dph_accuracy': 0.5567616012746465} +Hellaswag/no_choice={'log_accuracy': 0.4442342162915754, 'dph_accuracy': 0.5655247958573989} +obqa/main={'log_accuracy': 0.542, 'dph_accuracy': 0.564} +winogrande/no_choice={'log_accuracy': 0.5706393054459353, 'dph_accuracy': 0.5722178374112076} +arc/ARC-Challenge={'log_accuracy': 0.4214046822742475, 'dph_accuracy': 0.431438127090301} +arc/ARC-Easy={'log_accuracy': 0.5456140350877193, 'dph_accuracy': 0.5666666666666667} +super_glue/boolq={'log_accuracy': 0.791743119266055, 'dph_accuracy': 0.7896024464831805} +piqa/no_choice={'log_accuracy': 0.7225244831338411, 'dph_accuracy': 0.6708378672470077} +GLUE/cola={'log_matthews_correlation': 0.4210344655831915, 'dph_matthews_correlation': 0.4620473859715981} +GLUE/mnli_matched={'log_accuracy': 0.7925624044829342, 'dph_accuracy': 0.8030565461029037} +GLUE/mnli_mismatched={'log_accuracy': 0.8078722538649309, 'dph_accuracy': 0.8074654190398698} +GLUE/mrpc={'log_accuracy': 0.803921568627451, 'log_f1': 0.85663082437276, 'dph_accuracy': 0.7818627450980392, 'dph_f1': 0.8452173913043478} +GLUE/qnli={'log_accuracy': 0.865641588870584, 'dph_accuracy': 0.867838184147904} +GLUE/qqp={'log_accuracy': 0.8502102399208509, 'log_f1': 0.7998678122934568, 'dph_accuracy': 0.85392035617116, 'dph_f1': 0.8102672834746851} +GLUE/rte={'log_accuracy': 0.7689530685920578, 'dph_accuracy': 0.7653429602888087} +GLUE/sst2={'log_accuracy': 0.911697247706422, 'dph_accuracy': 0.9128440366972477} +GLUE/stsb={'log_pearson': 0.8621088726583224, 'log_spearmanr': 0.862748323223195, 'dph_pearson': 0.8776707270405448, 'dph_spearmanr': 0.878615810311836} +GLUE/wnli={'log_accuracy': 0.39436619718309857, 'dph_accuracy': 0.4647887323943662} +race/middle={'log_accuracy': 0.6504178272980501, 'dph_accuracy': 0.6664345403899722} +race/high={'log_accuracy': 0.6070704143726456, 'dph_accuracy': 0.610257896261953} + +Step=13312 +Train={'loss_dpo': 0.6986713696533116, 'loss_dph': 0.9057046670641284, 'dpo/chosen': 0.23849661203328765, 'dpo/rejected': -0.813284079229561, 'dpo/accuracy': 0.84820556640625, 'dpo/margin': 1.0517806915840993, 'dph/chosen': 0.4544883792186738, 'dph/rejected': 0.45121628740162123, 'dph/accuracy': 0.890533447265625, 'dph/margin': 2.84740367813356} +Hellaswag/choice={'log_accuracy': 0.5365465046803426, 'dph_accuracy': 0.5578570005974905} +Hellaswag/no_choice={'log_accuracy': 0.44343756223859787, 'dph_accuracy': 0.5697072296355308} +obqa/main={'log_accuracy': 0.526, 'dph_accuracy': 0.56} +winogrande/no_choice={'log_accuracy': 0.5706393054459353, 'dph_accuracy': 0.5761641673243884} +arc/ARC-Challenge={'log_accuracy': 0.3979933110367893, 'dph_accuracy': 0.43478260869565216} +arc/ARC-Easy={'log_accuracy': 0.543859649122807, 'dph_accuracy': 0.5719298245614035} +super_glue/boolq={'log_accuracy': 0.7883792048929663, 'dph_accuracy': 0.7880733944954128} +piqa/no_choice={'log_accuracy': 0.719804134929271, 'dph_accuracy': 0.675734494015234} +GLUE/cola={'log_matthews_correlation': 0.38541257567946086, 'dph_matthews_correlation': 0.47042279469851556} +GLUE/mnli_matched={'log_accuracy': 0.7916454406520632, 'dph_accuracy': 0.8076413652572593} +GLUE/mnli_mismatched={'log_accuracy': 0.8064483319772172, 'dph_accuracy': 0.8112286411716843} +GLUE/mrpc={'log_accuracy': 0.7990196078431373, 'log_f1': 0.8530465949820788, 'dph_accuracy': 0.7941176470588235, 'dph_f1': 0.8536585365853658} +GLUE/qnli={'log_accuracy': 0.8557569101226432, 'dph_accuracy': 0.8660076880834706} +GLUE/qqp={'log_accuracy': 0.8506554538708879, 'log_f1': 0.8041136776537763, 'dph_accuracy': 0.8538708879544893, 'dph_f1': 0.8108471537427162} +GLUE/rte={'log_accuracy': 0.7581227436823105, 'dph_accuracy': 0.7617328519855595} +GLUE/sst2={'log_accuracy': 0.9243119266055045, 'dph_accuracy': 0.9231651376146789} +GLUE/stsb={'log_pearson': 0.8646954162145868, 'log_spearmanr': 0.8654324673163962, 'dph_pearson': 0.8810615544770161, 'dph_spearmanr': 0.8800037444295318} +GLUE/wnli={'log_accuracy': 0.4084507042253521, 'dph_accuracy': 0.4507042253521127} +race/middle={'log_accuracy': 0.6469359331476323, 'dph_accuracy': 0.6580779944289693} +race/high={'log_accuracy': 0.6096783541002608, 'dph_accuracy': 0.6172124022022603} + +Step=13824 +Train={'loss_dpo': 0.6971107885765377, 'loss_dph': 0.9000581898872042, 'dpo/chosen': 0.23899577761119417, 'dpo/rejected': -0.8323897029367799, 'dpo/accuracy': 0.8553466796875, 'dpo/margin': 1.0713854785954027, 'dph/chosen': 0.451553307044378, 'dph/rejected': 0.4485048827918945, 'dph/accuracy': 0.895477294921875, 'dph/margin': 2.8719428094336763} +Hellaswag/choice={'log_accuracy': 0.5465046803425613, 'dph_accuracy': 0.5629356701852221} +Hellaswag/no_choice={'log_accuracy': 0.4464250149372635, 'dph_accuracy': 0.5770762796255726} +obqa/main={'log_accuracy': 0.534, 'dph_accuracy': 0.562} +winogrande/no_choice={'log_accuracy': 0.579321231254933, 'dph_accuracy': 0.5808997632202052} +arc/ARC-Challenge={'log_accuracy': 0.4080267558528428, 'dph_accuracy': 0.44816053511705684} +arc/ARC-Easy={'log_accuracy': 0.5473684210526316, 'dph_accuracy': 0.5719298245614035} +super_glue/boolq={'log_accuracy': 0.7914373088685015, 'dph_accuracy': 0.7920489296636085} +piqa/no_choice={'log_accuracy': 0.720348204570185, 'dph_accuracy': 0.6800870511425462} +GLUE/cola={'log_matthews_correlation': 0.4401936795964168, 'dph_matthews_correlation': 0.5030480107607602} +GLUE/mnli_matched={'log_accuracy': 0.7958227203260316, 'dph_accuracy': 0.804381049414162} +GLUE/mnli_mismatched={'log_accuracy': 0.8089910496338486, 'dph_accuracy': 0.8105166802278275} +GLUE/mrpc={'log_accuracy': 0.7965686274509803, 'log_f1': 0.853615520282187, 'dph_accuracy': 0.7867647058823529, 'dph_f1': 0.8507718696397941} +GLUE/qnli={'log_accuracy': 0.8650924400512539, 'dph_accuracy': 0.8731466227347611} +GLUE/qqp={'log_accuracy': 0.8515211476626268, 'log_f1': 0.8005581580783415, 'dph_accuracy': 0.8563442987880286, 'dph_f1': 0.8083927157561362} +GLUE/rte={'log_accuracy': 0.7833935018050542, 'dph_accuracy': 0.7653429602888087} +GLUE/sst2={'log_accuracy': 0.9288990825688074, 'dph_accuracy': 0.9254587155963303} +GLUE/stsb={'log_pearson': 0.8601183699770651, 'log_spearmanr': 0.8625935910564285, 'dph_pearson': 0.8773253860925497, 'dph_spearmanr': 0.8789486196012476} +GLUE/wnli={'log_accuracy': 0.39436619718309857, 'dph_accuracy': 0.4507042253521127} +race/middle={'log_accuracy': 0.6504178272980501, 'dph_accuracy': 0.6671309192200557} +race/high={'log_accuracy': 0.6067806432917995, 'dph_accuracy': 0.6140249203129527} + +Step=14336 +Train={'loss_dpo': 0.6900135688047158, 'loss_dph': 0.9042224360746332, 'dpo/chosen': 0.2468563363074736, 'dpo/rejected': -0.7916532023905347, 'dpo/accuracy': 0.8546142578125, 'dpo/margin': 1.0385095387809997, 'dph/chosen': 0.4529036905951216, 'dph/rejected': 0.45131874546495965, 'dph/accuracy': 0.89410400390625, 'dph/margin': 2.842120578759932} +Hellaswag/choice={'log_accuracy': 0.5386377215694085, 'dph_accuracy': 0.5637323242381995} +Hellaswag/no_choice={'log_accuracy': 0.44811790479984065, 'dph_accuracy': 0.5770762796255726} +obqa/main={'log_accuracy': 0.524, 'dph_accuracy': 0.558} +winogrande/no_choice={'log_accuracy': 0.5753749013417522, 'dph_accuracy': 0.5832675611681136} +arc/ARC-Challenge={'log_accuracy': 0.3979933110367893, 'dph_accuracy': 0.43478260869565216} +arc/ARC-Easy={'log_accuracy': 0.5473684210526316, 'dph_accuracy': 0.5719298245614035} +super_glue/boolq={'log_accuracy': 0.7883792048929663, 'dph_accuracy': 0.789296636085627} +piqa/no_choice={'log_accuracy': 0.720348204570185, 'dph_accuracy': 0.6860718171926007} +GLUE/cola={'log_matthews_correlation': 0.4460777593165835, 'dph_matthews_correlation': 0.4811688648514395} +GLUE/mnli_matched={'log_accuracy': 0.7942944472745798, 'dph_accuracy': 0.8075394803871625} +GLUE/mnli_mismatched={'log_accuracy': 0.8096013018714402, 'dph_accuracy': 0.8163140764849471} +GLUE/mrpc={'log_accuracy': 0.7990196078431373, 'log_f1': 0.8535714285714285, 'dph_accuracy': 0.7843137254901961, 'dph_f1': 0.8456140350877194} +GLUE/qnli={'log_accuracy': 0.8744279699798645, 'dph_accuracy': 0.8788211605345049} +GLUE/qqp={'log_accuracy': 0.8514716794459559, 'log_f1': 0.8052158681760679, 'dph_accuracy': 0.8566905763047242, 'dph_f1': 0.8115649798360869} +GLUE/rte={'log_accuracy': 0.7870036101083032, 'dph_accuracy': 0.7689530685920578} +GLUE/sst2={'log_accuracy': 0.9162844036697247, 'dph_accuracy': 0.9197247706422018} +GLUE/stsb={'log_pearson': 0.8598506379927939, 'log_spearmanr': 0.8609015837577771, 'dph_pearson': 0.8776363156331531, 'dph_spearmanr': 0.8781945801986244} +GLUE/wnli={'log_accuracy': 0.38028169014084506, 'dph_accuracy': 0.4647887323943662} +race/middle={'log_accuracy': 0.6532033426183844, 'dph_accuracy': 0.6622562674094707} +race/high={'log_accuracy': 0.6082294986960302, 'dph_accuracy': 0.6154737757171834} + +Step=14848 +Train={'loss_dpo': 0.6909049053792842, 'loss_dph': 0.8959365366026759, 'dpo/chosen': 0.2323296107074384, 'dpo/rejected': -0.8280890735550201, 'dpo/accuracy': 0.856719970703125, 'dpo/margin': 1.0604186852215207, 'dph/chosen': 0.4497795119023067, 'dph/rejected': 0.44615702479495667, 'dph/accuracy': 0.898529052734375, 'dph/margin': 2.89460952358786} +Hellaswag/choice={'log_accuracy': 0.5379406492730532, 'dph_accuracy': 0.5601473809998009} +Hellaswag/no_choice={'log_accuracy': 0.4486158135829516, 'dph_accuracy': 0.5812587134037045} +obqa/main={'log_accuracy': 0.526, 'dph_accuracy': 0.562} +winogrande/no_choice={'log_accuracy': 0.5808997632202052, 'dph_accuracy': 0.5753749013417522} +arc/ARC-Challenge={'log_accuracy': 0.39464882943143814, 'dph_accuracy': 0.4414715719063545} +arc/ARC-Easy={'log_accuracy': 0.5456140350877193, 'dph_accuracy': 0.5789473684210527} +super_glue/boolq={'log_accuracy': 0.7865443425076453, 'dph_accuracy': 0.7902140672782875} +piqa/no_choice={'log_accuracy': 0.720348204570185, 'dph_accuracy': 0.6828073993471164} +GLUE/cola={'log_matthews_correlation': 0.4803292331534077, 'dph_matthews_correlation': 0.5193606740748844} +GLUE/mnli_matched={'log_accuracy': 0.7920529801324503, 'dph_accuracy': 0.804381049414162} +GLUE/mnli_mismatched={'log_accuracy': 0.8079739625711961, 'dph_accuracy': 0.8141781936533767} +GLUE/mrpc={'log_accuracy': 0.803921568627451, 'log_f1': 0.854014598540146, 'dph_accuracy': 0.7843137254901961, 'dph_f1': 0.8434163701067616} +GLUE/qnli={'log_accuracy': 0.8694856306058942, 'dph_accuracy': 0.8736957715540912} +GLUE/qqp={'log_accuracy': 0.8506059856542172, 'log_f1': 0.7941376959781867, 'dph_accuracy': 0.854291367796191, 'dph_f1': 0.7964057369967169} +GLUE/rte={'log_accuracy': 0.7689530685920578, 'dph_accuracy': 0.776173285198556} +GLUE/sst2={'log_accuracy': 0.9288990825688074, 'dph_accuracy': 0.9197247706422018} +GLUE/stsb={'log_pearson': 0.8672652236885505, 'log_spearmanr': 0.868262616135475, 'dph_pearson': 0.8807658231557323, 'dph_spearmanr': 0.8805954139761193} +GLUE/wnli={'log_accuracy': 0.39436619718309857, 'dph_accuracy': 0.4507042253521127} +race/middle={'log_accuracy': 0.6552924791086351, 'dph_accuracy': 0.6601671309192201} +race/high={'log_accuracy': 0.6093885830194147, 'dph_accuracy': 0.6134453781512605} + +Step=15360 +Train={'loss_dpo': 0.6855171182833146, 'loss_dph': 0.8946912774554221, 'dpo/chosen': 0.25823435614041657, 'dpo/rejected': -0.8080729935441013, 'dpo/accuracy': 0.85955810546875, 'dpo/margin': 1.0663073470150266, 'dph/chosen': 0.4487547505268594, 'dph/rejected': 0.44593652615731116, 'dph/accuracy': 0.89794921875, 'dph/margin': 2.9098178637650562} +Hellaswag/choice={'log_accuracy': 0.5386377215694085, 'dph_accuracy': 0.5649273053176658} +Hellaswag/no_choice={'log_accuracy': 0.4486158135829516, 'dph_accuracy': 0.5812587134037045} +obqa/main={'log_accuracy': 0.528, 'dph_accuracy': 0.564} +winogrande/no_choice={'log_accuracy': 0.5769534333070244, 'dph_accuracy': 0.5777426992896606} +arc/ARC-Challenge={'log_accuracy': 0.3879598662207358, 'dph_accuracy': 0.431438127090301} +arc/ARC-Easy={'log_accuracy': 0.5508771929824562, 'dph_accuracy': 0.5666666666666667} +super_glue/boolq={'log_accuracy': 0.7859327217125383, 'dph_accuracy': 0.7868501529051988} +piqa/no_choice={'log_accuracy': 0.7225244831338411, 'dph_accuracy': 0.6784548422198041} +GLUE/cola={'log_matthews_correlation': 0.4127412863004641, 'dph_matthews_correlation': 0.4827499680048533} +GLUE/mnli_matched={'log_accuracy': 0.7984717269485482, 'dph_accuracy': 0.8076413652572593} +GLUE/mnli_mismatched={'log_accuracy': 0.8112286411716843, 'dph_accuracy': 0.8137713588283157} +GLUE/mrpc={'log_accuracy': 0.7990196078431373, 'log_f1': 0.8503649635036497, 'dph_accuracy': 0.7843137254901961, 'dph_f1': 0.8450704225352113} +GLUE/qnli={'log_accuracy': 0.8764415156507414, 'dph_accuracy': 0.8795533589602782} +GLUE/qqp={'log_accuracy': 0.8433836260202819, 'log_f1': 0.7716058288847207, 'dph_accuracy': 0.8517437546376453, 'dph_f1': 0.7881978798586573} +GLUE/rte={'log_accuracy': 0.7689530685920578, 'dph_accuracy': 0.7689530685920578} +GLUE/sst2={'log_accuracy': 0.9220183486238532, 'dph_accuracy': 0.9208715596330275} +GLUE/stsb={'log_pearson': 0.864355066892048, 'log_spearmanr': 0.8653949842575509, 'dph_pearson': 0.883261255822212, 'dph_spearmanr': 0.88138083166449} +GLUE/wnli={'log_accuracy': 0.39436619718309857, 'dph_accuracy': 0.4788732394366197} +race/middle={'log_accuracy': 0.6511142061281338, 'dph_accuracy': 0.6608635097493036} +race/high={'log_accuracy': 0.6128658359895682, 'dph_accuracy': 0.6215589684149522} + +Step=15872 +Train={'loss_dpo': 0.6774998442124343, 'loss_dph': 0.8912709092255682, 'dpo/chosen': 0.2360078220754076, 'dpo/rejected': -0.827214043533786, 'dpo/accuracy': 0.862884521484375, 'dpo/margin': 1.0632218668015412, 'dph/chosen': 0.4470431267254753, 'dph/rejected': 0.44422778169973753, 'dph/accuracy': 0.900299072265625, 'dph/margin': 2.9218123423925135} +Hellaswag/choice={'log_accuracy': 0.54052977494523, 'dph_accuracy': 0.5703047201752639} +Hellaswag/no_choice={'log_accuracy': 0.4495120493925513, 'dph_accuracy': 0.5794662417845051} +obqa/main={'log_accuracy': 0.528, 'dph_accuracy': 0.564} +winogrande/no_choice={'log_accuracy': 0.5816890292028414, 'dph_accuracy': 0.574585635359116} +arc/ARC-Challenge={'log_accuracy': 0.4080267558528428, 'dph_accuracy': 0.431438127090301} +arc/ARC-Easy={'log_accuracy': 0.5403508771929825, 'dph_accuracy': 0.5719298245614035} +super_glue/boolq={'log_accuracy': 0.789908256880734, 'dph_accuracy': 0.7871559633027523} +piqa/no_choice={'log_accuracy': 0.720892274211099, 'dph_accuracy': 0.6784548422198041} +GLUE/cola={'log_matthews_correlation': 0.46084815662204015, 'dph_matthews_correlation': 0.49656613880470907} +GLUE/mnli_matched={'log_accuracy': 0.7935812531839022, 'dph_accuracy': 0.8036678553234845} +GLUE/mnli_mismatched={'log_accuracy': 0.8076688364524003, 'dph_accuracy': 0.810720097640358} +GLUE/mrpc={'log_accuracy': 0.7990196078431373, 'log_f1': 0.851985559566787, 'dph_accuracy': 0.7916666666666666, 'dph_f1': 0.8484848484848485} +GLUE/qnli={'log_accuracy': 0.8766245652571847, 'dph_accuracy': 0.8815669046311551} +GLUE/qqp={'log_accuracy': 0.8523373732376948, 'log_f1': 0.8001071452487779, 'dph_accuracy': 0.8563690328963641, 'dph_f1': 0.8065816207574193} +GLUE/rte={'log_accuracy': 0.7689530685920578, 'dph_accuracy': 0.7617328519855595} +GLUE/sst2={'log_accuracy': 0.930045871559633, 'dph_accuracy': 0.9220183486238532} +GLUE/stsb={'log_pearson': 0.8652252568598294, 'log_spearmanr': 0.8669694619389462, 'dph_pearson': 0.8822734101708452, 'dph_spearmanr': 0.8822373508493991} +GLUE/wnli={'log_accuracy': 0.39436619718309857, 'dph_accuracy': 0.4647887323943662} +race/middle={'log_accuracy': 0.6511142061281338, 'dph_accuracy': 0.6678272980501393} +race/high={'log_accuracy': 0.6117067516661837, 'dph_accuracy': 0.6154737757171834} + +Step=16384 +Train={'loss_dpo': 0.6766264436737401, 'loss_dph': 0.891623449351755, 'dpo/chosen': 0.22112714854529258, 'dpo/rejected': -0.8333338279903728, 'dpo/accuracy': 0.862762451171875, 'dpo/margin': 1.0544609779990424, 'dph/chosen': 0.4463256937742699, 'dph/rejected': 0.4452977558976272, 'dph/accuracy': 0.898406982421875, 'dph/margin': 2.9261320825025905} +Hellaswag/choice={'log_accuracy': 0.5391356303525194, 'dph_accuracy': 0.5729934276040629} +Hellaswag/no_choice={'log_accuracy': 0.4464250149372635, 'dph_accuracy': 0.5818562039434375} +obqa/main={'log_accuracy': 0.538, 'dph_accuracy': 0.568} +winogrande/no_choice={'log_accuracy': 0.5769534333070244, 'dph_accuracy': 0.5730071033938438} +arc/ARC-Challenge={'log_accuracy': 0.4180602006688963, 'dph_accuracy': 0.4280936454849498} +arc/ARC-Easy={'log_accuracy': 0.5543859649122806, 'dph_accuracy': 0.5719298245614035} +super_glue/boolq={'log_accuracy': 0.789296636085627, 'dph_accuracy': 0.790519877675841} +piqa/no_choice={'log_accuracy': 0.721436343852013, 'dph_accuracy': 0.6828073993471164} +GLUE/cola={'log_matthews_correlation': 0.4463638901728815, 'dph_matthews_correlation': 0.49708380450230455} +GLUE/mnli_matched={'log_accuracy': 0.795109526235354, 'dph_accuracy': 0.8018339276617422} +GLUE/mnli_mismatched={'log_accuracy': 0.809092758340114, 'dph_accuracy': 0.8101098454027664} +GLUE/mrpc={'log_accuracy': 0.7990196078431373, 'log_f1': 0.8514492753623188, 'dph_accuracy': 0.7843137254901961, 'dph_f1': 0.8450704225352113} +GLUE/qnli={'log_accuracy': 0.8766245652571847, 'dph_accuracy': 0.8780889621087314} +GLUE/qqp={'log_accuracy': 0.8527578530793964, 'log_f1': 0.803796842556277, 'dph_accuracy': 0.8547860499628989, 'dph_f1': 0.815649825729268} +GLUE/rte={'log_accuracy': 0.7653429602888087, 'dph_accuracy': 0.7689530685920578} +GLUE/sst2={'log_accuracy': 0.9243119266055045, 'dph_accuracy': 0.9254587155963303} +GLUE/stsb={'log_pearson': 0.8645713820573616, 'log_spearmanr': 0.8655826143430518, 'dph_pearson': 0.8784289678675842, 'dph_spearmanr': 0.8786133087551145} +GLUE/wnli={'log_accuracy': 0.39436619718309857, 'dph_accuracy': 0.4788732394366197} +race/middle={'log_accuracy': 0.6566852367688022, 'dph_accuracy': 0.6720055710306406} +race/high={'log_accuracy': 0.6143146913937989, 'dph_accuracy': 0.6201101130107215} + +Step=16896 +Train={'loss_dpo': 0.6723573630297324, 'loss_dph': 0.8859960495610721, 'dpo/chosen': 0.22495903943217854, 'dpo/rejected': -0.8365608866379262, 'dpo/accuracy': 0.868682861328125, 'dpo/margin': 1.061519928602138, 'dph/chosen': 0.44456734288542066, 'dph/rejected': 0.4414287069084821, 'dph/accuracy': 0.903961181640625, 'dph/margin': 2.956418908725027} +Hellaswag/choice={'log_accuracy': 0.5450109539932284, 'dph_accuracy': 0.5710017924716192} +Hellaswag/no_choice={'log_accuracy': 0.44692292372037445, 'dph_accuracy': 0.5849432383987253} +obqa/main={'log_accuracy': 0.534, 'dph_accuracy': 0.56} +winogrande/no_choice={'log_accuracy': 0.5753749013417522, 'dph_accuracy': 0.5753749013417522} +arc/ARC-Challenge={'log_accuracy': 0.3879598662207358, 'dph_accuracy': 0.431438127090301} +arc/ARC-Easy={'log_accuracy': 0.5403508771929825, 'dph_accuracy': 0.5631578947368421} +super_glue/boolq={'log_accuracy': 0.790519877675841, 'dph_accuracy': 0.7908256880733945} +piqa/no_choice={'log_accuracy': 0.7225244831338411, 'dph_accuracy': 0.6822633297062024} +GLUE/cola={'log_matthews_correlation': 0.45194005217306377, 'dph_matthews_correlation': 0.5059059940111488} +GLUE/mnli_matched={'log_accuracy': 0.7905247070809984, 'dph_accuracy': 0.8055017829852267} +GLUE/mnli_mismatched={'log_accuracy': 0.80553295362083, 'dph_accuracy': 0.815500406834825} +GLUE/mrpc={'log_accuracy': 0.803921568627451, 'log_f1': 0.85663082437276, 'dph_accuracy': 0.7867647058823529, 'dph_f1': 0.8486956521739131} +GLUE/qnli={'log_accuracy': 0.8735127219476478, 'dph_accuracy': 0.8746110195863079} +GLUE/qqp={'log_accuracy': 0.8537966856294831, 'log_f1': 0.8065202448365029, 'dph_accuracy': 0.8565669057630473, 'dph_f1': 0.8139019928757102} +GLUE/rte={'log_accuracy': 0.776173285198556, 'dph_accuracy': 0.7689530685920578} +GLUE/sst2={'log_accuracy': 0.9231651376146789, 'dph_accuracy': 0.9243119266055045} +GLUE/stsb={'log_pearson': 0.8660940145258471, 'log_spearmanr': 0.8674954150483171, 'dph_pearson': 0.8817727649353422, 'dph_spearmanr': 0.8813723309440674} +GLUE/wnli={'log_accuracy': 0.36619718309859156, 'dph_accuracy': 0.4788732394366197} +race/middle={'log_accuracy': 0.6608635097493036, 'dph_accuracy': 0.6692200557103064} +race/high={'log_accuracy': 0.6157635467980296, 'dph_accuracy': 0.6203998840915677} + +Step=17408 +Train={'loss_dpo': 0.664124875649577, 'loss_dph': 0.8855088593554683, 'dpo/chosen': 0.26259544904843324, 'dpo/rejected': -0.7783801829364165, 'dpo/accuracy': 0.870635986328125, 'dpo/margin': 1.0409756332865072, 'dph/chosen': 0.444399799045641, 'dph/rejected': 0.441109060018789, 'dph/accuracy': 0.902374267578125, 'dph/margin': 2.961787544307299} +Hellaswag/choice={'log_accuracy': 0.545807608046206, 'dph_accuracy': 0.5734913363871739} +Hellaswag/no_choice={'log_accuracy': 0.44682334196375223, 'dph_accuracy': 0.5872336188010356} +obqa/main={'log_accuracy': 0.518, 'dph_accuracy': 0.554} +winogrande/no_choice={'log_accuracy': 0.5730071033938438, 'dph_accuracy': 0.580110497237569} +arc/ARC-Challenge={'log_accuracy': 0.39464882943143814, 'dph_accuracy': 0.42474916387959866} +arc/ARC-Easy={'log_accuracy': 0.5403508771929825, 'dph_accuracy': 0.5736842105263158} +super_glue/boolq={'log_accuracy': 0.7896024464831805, 'dph_accuracy': 0.7877675840978593} +piqa/no_choice={'log_accuracy': 0.7230685527747551, 'dph_accuracy': 0.6784548422198041} +GLUE/cola={'log_matthews_correlation': 0.4627562379635548, 'dph_matthews_correlation': 0.4997839337066287} +GLUE/mnli_matched={'log_accuracy': 0.7983698420784513, 'dph_accuracy': 0.8096790626591951} +GLUE/mnli_mismatched={'log_accuracy': 0.811126932465419, 'dph_accuracy': 0.8164157851912124} +GLUE/mrpc={'log_accuracy': 0.803921568627451, 'log_f1': 0.85663082437276, 'dph_accuracy': 0.7843137254901961, 'dph_f1': 0.8461538461538461} +GLUE/qnli={'log_accuracy': 0.8744279699798645, 'dph_accuracy': 0.8801025077796083} +GLUE/qqp={'log_accuracy': 0.8528320554044027, 'log_f1': 0.8004427153206333, 'dph_accuracy': 0.8580756863715063, 'dph_f1': 0.810288963829928} +GLUE/rte={'log_accuracy': 0.7725631768953068, 'dph_accuracy': 0.7581227436823105} +GLUE/sst2={'log_accuracy': 0.9243119266055045, 'dph_accuracy': 0.9220183486238532} +GLUE/stsb={'log_pearson': 0.8634136031003787, 'log_spearmanr': 0.8652414693256705, 'dph_pearson': 0.8811011146509332, 'dph_spearmanr': 0.8818357819743232} +GLUE/wnli={'log_accuracy': 0.38028169014084506, 'dph_accuracy': 0.4507042253521127} +race/middle={'log_accuracy': 0.6552924791086351, 'dph_accuracy': 0.6796657381615598} +race/high={'log_accuracy': 0.6177919443639525, 'dph_accuracy': 0.6230078238191828} + +Step=17920 +Train={'loss_dpo': 0.6583122847368941, 'loss_dph': 0.8863598067691782, 'dpo/chosen': 0.27282268357214434, 'dpo/rejected': -0.7548155797435356, 'dpo/accuracy': 0.870086669921875, 'dpo/margin': 1.027638263348308, 'dph/chosen': 0.444506544386968, 'dph/rejected': 0.44185326287697535, 'dph/accuracy': 0.901885986328125, 'dph/margin': 2.9608035474957433} +Hellaswag/choice={'log_accuracy': 0.5499900418243377, 'dph_accuracy': 0.5784704242182832} +Hellaswag/no_choice={'log_accuracy': 0.4479187412865963, 'dph_accuracy': 0.5908185620394344} +obqa/main={'log_accuracy': 0.526, 'dph_accuracy': 0.55} +winogrande/no_choice={'log_accuracy': 0.5785319652722968, 'dph_accuracy': 0.5824782951854776} +arc/ARC-Challenge={'log_accuracy': 0.391304347826087, 'dph_accuracy': 0.43478260869565216} +arc/ARC-Easy={'log_accuracy': 0.5491228070175439, 'dph_accuracy': 0.5701754385964912} +super_glue/boolq={'log_accuracy': 0.7914373088685015, 'dph_accuracy': 0.7886850152905199} +piqa/no_choice={'log_accuracy': 0.720892274211099, 'dph_accuracy': 0.6811751904243744} +GLUE/cola={'log_matthews_correlation': 0.4679712798937498, 'dph_matthews_correlation': 0.5088644953584825} +GLUE/mnli_matched={'log_accuracy': 0.7954151808456444, 'dph_accuracy': 0.8116148751910341} +GLUE/mnli_mismatched={'log_accuracy': 0.8108218063466233, 'dph_accuracy': 0.8136696501220505} +GLUE/mrpc={'log_accuracy': 0.8014705882352942, 'log_f1': 0.8566371681415929, 'dph_accuracy': 0.7941176470588235, 'dph_f1': 0.8546712802768167} +GLUE/qnli={'log_accuracy': 0.8720483250961011, 'dph_accuracy': 0.8782720117151748} +GLUE/qqp={'log_accuracy': 0.8540934949295078, 'log_f1': 0.8075555410563403, 'dph_accuracy': 0.856740044521395, 'dph_f1': 0.814513546403638} +GLUE/rte={'log_accuracy': 0.779783393501805, 'dph_accuracy': 0.7617328519855595} +GLUE/sst2={'log_accuracy': 0.9288990825688074, 'dph_accuracy': 0.9208715596330275} +GLUE/stsb={'log_pearson': 0.8641665180541915, 'log_spearmanr': 0.8654571855789003, 'dph_pearson': 0.8825993219790554, 'dph_spearmanr': 0.8820932402621728} +GLUE/wnli={'log_accuracy': 0.39436619718309857, 'dph_accuracy': 0.4647887323943662} +race/middle={'log_accuracy': 0.658774373259053, 'dph_accuracy': 0.6678272980501393} +race/high={'log_accuracy': 0.6151840046363373, 'dph_accuracy': 0.6241669081425674} + +Step=18432 +Train={'loss_dpo': 0.658576072048163, 'loss_dph': 0.8866614753787871, 'dpo/chosen': 0.23191315842063887, 'dpo/rejected': -0.7967832262747834, 'dpo/accuracy': 0.87017822265625, 'dpo/margin': 1.028696386363663, 'dph/chosen': 0.44502880903019104, 'dph/rejected': 0.4416326663194923, 'dph/accuracy': 0.90093994140625, 'dph/margin': 2.9561701819475275} +Hellaswag/choice={'log_accuracy': 0.5563632742481578, 'dph_accuracy': 0.5797649870543716} +Hellaswag/no_choice={'log_accuracy': 0.44901414060944034, 'dph_accuracy': 0.5909181437960566} +obqa/main={'log_accuracy': 0.532, 'dph_accuracy': 0.558} +winogrande/no_choice={'log_accuracy': 0.5777426992896606, 'dph_accuracy': 0.5808997632202052} +arc/ARC-Challenge={'log_accuracy': 0.40468227424749165, 'dph_accuracy': 0.44481605351170567} +arc/ARC-Easy={'log_accuracy': 0.5473684210526316, 'dph_accuracy': 0.5701754385964912} +super_glue/boolq={'log_accuracy': 0.7908256880733945, 'dph_accuracy': 0.7880733944954128} +piqa/no_choice={'log_accuracy': 0.720892274211099, 'dph_accuracy': 0.6806311207834603} +GLUE/cola={'log_matthews_correlation': 0.41511377999074933, 'dph_matthews_correlation': 0.47053580111209176} +GLUE/mnli_matched={'log_accuracy': 0.7981660723382578, 'dph_accuracy': 0.8123280692817116} +GLUE/mnli_mismatched={'log_accuracy': 0.8097030105777054, 'dph_accuracy': 0.8151952807160293} +GLUE/mrpc={'log_accuracy': 0.8088235294117647, 'log_f1': 0.8617021276595744, 'dph_accuracy': 0.7916666666666666, 'dph_f1': 0.8537005163511187} +GLUE/qnli={'log_accuracy': 0.8742449203734212, 'dph_accuracy': 0.8822991030569284} +GLUE/qqp={'log_accuracy': 0.8550333910462528, 'log_f1': 0.8096892554469592, 'dph_accuracy': 0.856863715063072, 'dph_f1': 0.8156712852365027} +GLUE/rte={'log_accuracy': 0.7833935018050542, 'dph_accuracy': 0.7581227436823105} +GLUE/sst2={'log_accuracy': 0.9220183486238532, 'dph_accuracy': 0.9243119266055045} +GLUE/stsb={'log_pearson': 0.866870247996846, 'log_spearmanr': 0.8680217509682356, 'dph_pearson': 0.8829554808355286, 'dph_spearmanr': 0.8820551153266347} +GLUE/wnli={'log_accuracy': 0.38028169014084506, 'dph_accuracy': 0.4507042253521127} +race/middle={'log_accuracy': 0.6601671309192201, 'dph_accuracy': 0.6733983286908078} +race/high={'log_accuracy': 0.6140249203129527, 'dph_accuracy': 0.6221385105766445} + +Step=18944 +Train={'loss_dpo': 0.6548128774302313, 'loss_dph': 0.8862247159995604, 'dpo/chosen': 0.24321239308380882, 'dpo/rejected': -0.7828571538391316, 'dpo/accuracy': 0.869384765625, 'dpo/margin': 1.026069547433508, 'dph/chosen': 0.4447461806994397, 'dph/rejected': 0.44147853514004964, 'dph/accuracy': 0.90087890625, 'dph/margin': 2.958801340602804} +Hellaswag/choice={'log_accuracy': 0.5549691296554471, 'dph_accuracy': 0.5808603863772157} +Hellaswag/no_choice={'log_accuracy': 0.4487153953395738, 'dph_accuracy': 0.589523999203346} +obqa/main={'log_accuracy': 0.536, 'dph_accuracy': 0.556} +winogrande/no_choice={'log_accuracy': 0.574585635359116, 'dph_accuracy': 0.585635359116022} +arc/ARC-Challenge={'log_accuracy': 0.3979933110367893, 'dph_accuracy': 0.44481605351170567} +arc/ARC-Easy={'log_accuracy': 0.5385964912280702, 'dph_accuracy': 0.5596491228070175} +super_glue/boolq={'log_accuracy': 0.790519877675841, 'dph_accuracy': 0.7859327217125383} +piqa/no_choice={'log_accuracy': 0.720892274211099, 'dph_accuracy': 0.6828073993471164} +GLUE/cola={'log_matthews_correlation': 0.45418624291790166, 'dph_matthews_correlation': 0.4906666052000313} +GLUE/mnli_matched={'log_accuracy': 0.7982679572083545, 'dph_accuracy': 0.8109016811003565} +GLUE/mnli_mismatched={'log_accuracy': 0.8118388934092758, 'dph_accuracy': 0.8157038242473555} +GLUE/mrpc={'log_accuracy': 0.8088235294117647, 'log_f1': 0.8607142857142858, 'dph_accuracy': 0.7941176470588235, 'dph_f1': 0.8556701030927835} +GLUE/qnli={'log_accuracy': 0.8733296723412045, 'dph_accuracy': 0.881017755811825} +GLUE/qqp={'log_accuracy': 0.8555033391046253, 'log_f1': 0.807537721552349, 'dph_accuracy': 0.8582240910215186, 'dph_f1': 0.8148100284311192} +GLUE/rte={'log_accuracy': 0.779783393501805, 'dph_accuracy': 0.7617328519855595} +GLUE/sst2={'log_accuracy': 0.9254587155963303, 'dph_accuracy': 0.9197247706422018} +GLUE/stsb={'log_pearson': 0.8632506871456544, 'log_spearmanr': 0.8640390114098447, 'dph_pearson': 0.8820216737495873, 'dph_spearmanr': 0.8812202766868213} +GLUE/wnli={'log_accuracy': 0.36619718309859156, 'dph_accuracy': 0.4788732394366197} +race/middle={'log_accuracy': 0.6559888579387186, 'dph_accuracy': 0.6685236768802229} +race/high={'log_accuracy': 0.6128658359895682, 'dph_accuracy': 0.6279339321935671} + +Step=19456 +Train={'loss_dpo': 0.6517377040290739, 'loss_dph': 0.8784639651566977, 'dpo/chosen': 0.24363172495907293, 'dpo/rejected': -0.777479250878514, 'dpo/accuracy': 0.873779296875, 'dpo/margin': 1.0211109759220562, 'dph/chosen': 0.4402039164197049, 'dph/rejected': 0.4382600487297168, 'dph/accuracy': 0.90594482421875, 'dph/margin': 3.0017086548323277} +Hellaswag/choice={'log_accuracy': 0.5556662019518024, 'dph_accuracy': 0.5770762796255726} +Hellaswag/no_choice={'log_accuracy': 0.44722166899024096, 'dph_accuracy': 0.589523999203346} +obqa/main={'log_accuracy': 0.536, 'dph_accuracy': 0.56} +winogrande/no_choice={'log_accuracy': 0.5808997632202052, 'dph_accuracy': 0.5824782951854776} +arc/ARC-Challenge={'log_accuracy': 0.4013377926421405, 'dph_accuracy': 0.44481605351170567} +arc/ARC-Easy={'log_accuracy': 0.5473684210526316, 'dph_accuracy': 0.5631578947368421} +super_glue/boolq={'log_accuracy': 0.791743119266055, 'dph_accuracy': 0.789908256880734} +piqa/no_choice={'log_accuracy': 0.7187159956474428, 'dph_accuracy': 0.6849836779107725} +GLUE/cola={'log_matthews_correlation': 0.46973490818289354, 'dph_matthews_correlation': 0.49301987111948775} +GLUE/mnli_matched={'log_accuracy': 0.7982679572083545, 'dph_accuracy': 0.8096790626591951} +GLUE/mnli_mismatched={'log_accuracy': 0.8127542717656632, 'dph_accuracy': 0.8162123677786819} +GLUE/mrpc={'log_accuracy': 0.803921568627451, 'log_f1': 0.855595667870036, 'dph_accuracy': 0.7843137254901961, 'dph_f1': 0.8461538461538461} +GLUE/qnli={'log_accuracy': 0.8744279699798645, 'dph_accuracy': 0.8824821526633718} +GLUE/qqp={'log_accuracy': 0.8546129112045511, 'log_f1': 0.8048343183478319, 'dph_accuracy': 0.8581498886965125, 'dph_f1': 0.813489869589255} +GLUE/rte={'log_accuracy': 0.7725631768953068, 'dph_accuracy': 0.7653429602888087} +GLUE/sst2={'log_accuracy': 0.9254587155963303, 'dph_accuracy': 0.9243119266055045} +GLUE/stsb={'log_pearson': 0.8602414360304704, 'log_spearmanr': 0.8621373551011773, 'dph_pearson': 0.8790680816847011, 'dph_spearmanr': 0.8794207872318629} +GLUE/wnli={'log_accuracy': 0.36619718309859156, 'dph_accuracy': 0.4507042253521127} +race/middle={'log_accuracy': 0.6629526462395543, 'dph_accuracy': 0.6706128133704735} +race/high={'log_accuracy': 0.6169226311214141, 'dph_accuracy': 0.6227180527383367} + +Step=19968 +Train={'loss_dpo': 0.6457119183469331, 'loss_dph': 0.884433316430659, 'dpo/chosen': 0.24245759178973003, 'dpo/rejected': -0.7701535203195817, 'dpo/accuracy': 0.873077392578125, 'dpo/margin': 1.012611114361789, 'dph/chosen': 0.4435405313197407, 'dph/rejected': 0.4408927847252926, 'dph/accuracy': 0.901702880859375, 'dph/margin': 2.969079527509166} +Hellaswag/choice={'log_accuracy': 0.5561641107349133, 'dph_accuracy': 0.5777733519219279} +Hellaswag/no_choice={'log_accuracy': 0.4473212507468632, 'dph_accuracy': 0.5892252539334794} +obqa/main={'log_accuracy': 0.536, 'dph_accuracy': 0.558} +winogrande/no_choice={'log_accuracy': 0.574585635359116, 'dph_accuracy': 0.5824782951854776} +arc/ARC-Challenge={'log_accuracy': 0.4080267558528428, 'dph_accuracy': 0.4414715719063545} +arc/ARC-Easy={'log_accuracy': 0.5403508771929825, 'dph_accuracy': 0.5666666666666667} +super_glue/boolq={'log_accuracy': 0.7929663608562691, 'dph_accuracy': 0.789908256880734} +piqa/no_choice={'log_accuracy': 0.719804134929271, 'dph_accuracy': 0.6877040261153428} +GLUE/cola={'log_matthews_correlation': 0.4399369569903031, 'dph_matthews_correlation': 0.49890831682355663} +GLUE/mnli_matched={'log_accuracy': 0.8007131940906775, 'dph_accuracy': 0.8103922567498727} +GLUE/mnli_mismatched={'log_accuracy': 0.8134662327095199, 'dph_accuracy': 0.8140764849471115} +GLUE/mrpc={'log_accuracy': 0.8014705882352942, 'log_f1': 0.854054054054054, 'dph_accuracy': 0.7867647058823529, 'dph_f1': 0.8492201039861352} +GLUE/qnli={'log_accuracy': 0.8780889621087314, 'dph_accuracy': 0.8793703093538349} +GLUE/qqp={'log_accuracy': 0.8548849863962404, 'log_f1': 0.8048301786367718, 'dph_accuracy': 0.8580756863715063, 'dph_f1': 0.8128994391548194} +GLUE/rte={'log_accuracy': 0.779783393501805, 'dph_accuracy': 0.7653429602888087} +GLUE/sst2={'log_accuracy': 0.9277522935779816, 'dph_accuracy': 0.9197247706422018} +GLUE/stsb={'log_pearson': 0.8603161720918971, 'log_spearmanr': 0.8614358960825199, 'dph_pearson': 0.8780301613250111, 'dph_spearmanr': 0.878495282771374} +GLUE/wnli={'log_accuracy': 0.38028169014084506, 'dph_accuracy': 0.4647887323943662} +race/middle={'log_accuracy': 0.6608635097493036, 'dph_accuracy': 0.6775766016713092} +race/high={'log_accuracy': 0.6192407997681831, 'dph_accuracy': 0.6224282816574905} + +Step=20480 +Train={'loss_dpo': 0.6450484140223125, 'loss_dph': 0.8890220208704704, 'dpo/chosen': 0.22636930366206798, 'dpo/rejected': -0.780926240937788, 'dpo/accuracy': 0.87042236328125, 'dpo/margin': 1.007295546682144, 'dph/chosen': 0.44527264442149317, 'dph/rejected': 0.4437493767181877, 'dph/accuracy': 0.902313232421875, 'dph/margin': 2.935067816535593} +Hellaswag/choice={'log_accuracy': 0.5568611830312686, 'dph_accuracy': 0.5785700059749054} +Hellaswag/no_choice={'log_accuracy': 0.4482174865564629, 'dph_accuracy': 0.5914160525791675} +obqa/main={'log_accuracy': 0.532, 'dph_accuracy': 0.566} +winogrande/no_choice={'log_accuracy': 0.5761641673243884, 'dph_accuracy': 0.579321231254933} +arc/ARC-Challenge={'log_accuracy': 0.39464882943143814, 'dph_accuracy': 0.43812709030100333} +arc/ARC-Easy={'log_accuracy': 0.5385964912280702, 'dph_accuracy': 0.5649122807017544} +super_glue/boolq={'log_accuracy': 0.791131498470948, 'dph_accuracy': 0.7896024464831805} +piqa/no_choice={'log_accuracy': 0.7225244831338411, 'dph_accuracy': 0.6871599564744287} +GLUE/cola={'log_matthews_correlation': 0.46424552118769546, 'dph_matthews_correlation': 0.49234924434654215} +GLUE/mnli_matched={'log_accuracy': 0.7961283749363219, 'dph_accuracy': 0.8085583290881304} +GLUE/mnli_mismatched={'log_accuracy': 0.8087876322213181, 'dph_accuracy': 0.8149918633034988} +GLUE/mrpc={'log_accuracy': 0.803921568627451, 'log_f1': 0.8561151079136691, 'dph_accuracy': 0.7843137254901961, 'dph_f1': 0.8466898954703833} +GLUE/qnli={'log_accuracy': 0.8760754164378547, 'dph_accuracy': 0.8824821526633718} +GLUE/qqp={'log_accuracy': 0.8541924313628494, 'log_f1': 0.8003522200020319, 'dph_accuracy': 0.8577046747464754, 'dph_f1': 0.8054118044985625} +GLUE/rte={'log_accuracy': 0.779783393501805, 'dph_accuracy': 0.7653429602888087} +GLUE/sst2={'log_accuracy': 0.926605504587156, 'dph_accuracy': 0.9231651376146789} +GLUE/stsb={'log_pearson': 0.8626621377592061, 'log_spearmanr': 0.8633341379687846, 'dph_pearson': 0.8770299209132116, 'dph_spearmanr': 0.8772271676866188} +GLUE/wnli={'log_accuracy': 0.38028169014084506, 'dph_accuracy': 0.4647887323943662} +race/middle={'log_accuracy': 0.6594707520891365, 'dph_accuracy': 0.6761838440111421} +race/high={'log_accuracy': 0.6195305708490293, 'dph_accuracy': 0.6244566792234135} + +Step=20992 +Train={'loss_dpo': 0.6407839304883964, 'loss_dph': 0.8811599718901562, 'dpo/chosen': 0.23515735188685483, 'dpo/rejected': -0.763739437748427, 'dpo/accuracy': 0.87445068359375, 'dpo/margin': 0.9988967898334522, 'dph/chosen': 0.44194202753715217, 'dph/rejected': 0.43921794404741377, 'dph/accuracy': 0.90057373046875, 'dph/margin': 2.98597111334675} +Hellaswag/choice={'log_accuracy': 0.5531766580362477, 'dph_accuracy': 0.5791674965146385} +Hellaswag/no_choice={'log_accuracy': 0.4478191595299741, 'dph_accuracy': 0.5942043417645887} +obqa/main={'log_accuracy': 0.538, 'dph_accuracy': 0.554} +winogrande/no_choice={'log_accuracy': 0.5753749013417522, 'dph_accuracy': 0.5872138910812944} +arc/ARC-Challenge={'log_accuracy': 0.40468227424749165, 'dph_accuracy': 0.4414715719063545} +arc/ARC-Easy={'log_accuracy': 0.5473684210526316, 'dph_accuracy': 0.5684210526315789} +super_glue/boolq={'log_accuracy': 0.791131498470948, 'dph_accuracy': 0.7880733944954128} +piqa/no_choice={'log_accuracy': 0.7236126224156693, 'dph_accuracy': 0.6828073993471164} +GLUE/cola={'log_matthews_correlation': 0.4309672444395815, 'dph_matthews_correlation': 0.5003634450667074} +GLUE/mnli_matched={'log_accuracy': 0.7971472236372898, 'dph_accuracy': 0.8058074375955171} +GLUE/mnli_mismatched={'log_accuracy': 0.809499593165175, 'dph_accuracy': 0.8109235150528885} +GLUE/mrpc={'log_accuracy': 0.803921568627451, 'log_f1': 0.85663082437276, 'dph_accuracy': 0.7867647058823529, 'dph_f1': 0.8486956521739131} +GLUE/qnli={'log_accuracy': 0.8780889621087314, 'dph_accuracy': 0.8813838550247117} +GLUE/qqp={'log_accuracy': 0.8557754142963147, 'log_f1': 0.806298375577185, 'dph_accuracy': 0.8582735592381895, 'dph_f1': 0.8115255575291099} +GLUE/rte={'log_accuracy': 0.776173285198556, 'dph_accuracy': 0.7617328519855595} +GLUE/sst2={'log_accuracy': 0.9254587155963303, 'dph_accuracy': 0.9220183486238532} +GLUE/stsb={'log_pearson': 0.8601552416775728, 'log_spearmanr': 0.8608547214876128, 'dph_pearson': 0.878718511799762, 'dph_spearmanr': 0.8777637380483234} +GLUE/wnli={'log_accuracy': 0.36619718309859156, 'dph_accuracy': 0.4788732394366197} +race/middle={'log_accuracy': 0.6532033426183844, 'dph_accuracy': 0.6845403899721448} +race/high={'log_accuracy': 0.6206896551724138, 'dph_accuracy': 0.6230078238191828} + +Step=21504 +Train={'loss_dpo': 0.6380187210452277, 'loss_dph': 0.8783133569959318, 'dpo/chosen': 0.25222859967311706, 'dpo/rejected': -0.7432450793517091, 'dpo/accuracy': 0.8763427734375, 'dpo/margin': 0.9954736772178876, 'dph/chosen': 0.4404897607892053, 'dph/rejected': 0.4378235967378714, 'dph/accuracy': 0.904052734375, 'dph/margin': 2.9919944874418434} +Hellaswag/choice={'log_accuracy': 0.5566620195180243, 'dph_accuracy': 0.5791674965146385} +Hellaswag/no_choice={'log_accuracy': 0.4487153953395738, 'dph_accuracy': 0.5889265086636128} +obqa/main={'log_accuracy': 0.53, 'dph_accuracy': 0.556} +winogrande/no_choice={'log_accuracy': 0.5769534333070244, 'dph_accuracy': 0.5808997632202052} +arc/ARC-Challenge={'log_accuracy': 0.3979933110367893, 'dph_accuracy': 0.43478260869565216} +arc/ARC-Easy={'log_accuracy': 0.5403508771929825, 'dph_accuracy': 0.5631578947368421} +super_glue/boolq={'log_accuracy': 0.7886850152905199, 'dph_accuracy': 0.7883792048929663} +piqa/no_choice={'log_accuracy': 0.7236126224156693, 'dph_accuracy': 0.6811751904243744} +GLUE/cola={'log_matthews_correlation': 0.44581075604527515, 'dph_matthews_correlation': 0.5008706778439593} +GLUE/mnli_matched={'log_accuracy': 0.8018339276617422, 'dph_accuracy': 0.8115129903209374} +GLUE/mnli_mismatched={'log_accuracy': 0.8120423108218063, 'dph_accuracy': 0.8156021155410903} +GLUE/mrpc={'log_accuracy': 0.803921568627451, 'log_f1': 0.85663082437276, 'dph_accuracy': 0.7916666666666666, 'dph_f1': 0.853195164075993} +GLUE/qnli={'log_accuracy': 0.8757093172249679, 'dph_accuracy': 0.8819330038440417} +GLUE/qqp={'log_accuracy': 0.8548602522879051, 'log_f1': 0.8097523019063676, 'dph_accuracy': 0.8571852584714321, 'dph_f1': 0.8172553487783264} +GLUE/rte={'log_accuracy': 0.7725631768953068, 'dph_accuracy': 0.7617328519855595} +GLUE/sst2={'log_accuracy': 0.9243119266055045, 'dph_accuracy': 0.9220183486238532} +GLUE/stsb={'log_pearson': 0.8620314293832986, 'log_spearmanr': 0.8635614306628673, 'dph_pearson': 0.8807554391411834, 'dph_spearmanr': 0.8809608002024231} +GLUE/wnli={'log_accuracy': 0.36619718309859156, 'dph_accuracy': 0.4788732394366197} +race/middle={'log_accuracy': 0.6601671309192201, 'dph_accuracy': 0.6824512534818942} +race/high={'log_accuracy': 0.6169226311214141, 'dph_accuracy': 0.6244566792234135} + +Step=22016 +Train={'loss_dpo': 0.6322139124240493, 'loss_dph': 0.8736600039410405, 'dpo/chosen': 0.2296403573380985, 'dpo/rejected': -0.766866327407115, 'dpo/accuracy': 0.88037109375, 'dpo/margin': 0.9965066846343689, 'dph/chosen': 0.4379458367911866, 'dph/rejected': 0.4357141668369877, 'dph/accuracy': 0.90936279296875, 'dph/margin': 3.0285963225760497} +Hellaswag/choice={'log_accuracy': 0.5468034256124278, 'dph_accuracy': 0.576777534355706} +Hellaswag/no_choice={'log_accuracy': 0.4484166500697072, 'dph_accuracy': 0.5915156343357897} +obqa/main={'log_accuracy': 0.524, 'dph_accuracy': 0.558} +winogrande/no_choice={'log_accuracy': 0.5777426992896606, 'dph_accuracy': 0.584846093133386} +arc/ARC-Challenge={'log_accuracy': 0.3979933110367893, 'dph_accuracy': 0.44481605351170567} +arc/ARC-Easy={'log_accuracy': 0.5403508771929825, 'dph_accuracy': 0.5649122807017544} +super_glue/boolq={'log_accuracy': 0.7914373088685015, 'dph_accuracy': 0.7877675840978593} +piqa/no_choice={'log_accuracy': 0.720348204570185, 'dph_accuracy': 0.6806311207834603} +GLUE/cola={'log_matthews_correlation': 0.4522326299878879, 'dph_matthews_correlation': 0.4955431536029328} +GLUE/mnli_matched={'log_accuracy': 0.7944982170147733, 'dph_accuracy': 0.8095771777890983} +GLUE/mnli_mismatched={'log_accuracy': 0.8104149715215623, 'dph_accuracy': 0.8158055329536208} +GLUE/mrpc={'log_accuracy': 0.8014705882352942, 'log_f1': 0.854054054054054, 'dph_accuracy': 0.7843137254901961, 'dph_f1': 0.8466898954703833} +GLUE/qnli={'log_accuracy': 0.8802855573860516, 'dph_accuracy': 0.8806516565989383} +GLUE/qqp={'log_accuracy': 0.855107593371259, 'log_f1': 0.8050063244790626, 'dph_accuracy': 0.8589166460549097, 'dph_f1': 0.8146969007861737} +GLUE/rte={'log_accuracy': 0.7725631768953068, 'dph_accuracy': 0.7617328519855595} +GLUE/sst2={'log_accuracy': 0.926605504587156, 'dph_accuracy': 0.9254587155963303} +GLUE/stsb={'log_pearson': 0.8597869032416874, 'log_spearmanr': 0.8615300959701581, 'dph_pearson': 0.8781187562463446, 'dph_spearmanr': 0.8779595773300083} +GLUE/wnli={'log_accuracy': 0.36619718309859156, 'dph_accuracy': 0.49295774647887325} +race/middle={'log_accuracy': 0.6552924791086351, 'dph_accuracy': 0.674791086350975} +race/high={'log_accuracy': 0.6186612576064908, 'dph_accuracy': 0.6290930165169516} + +Step=22528 +Train={'loss_dpo': 0.6334172590868548, 'loss_dph': 0.875478174421005, 'dpo/chosen': 0.21187867486708, 'dpo/rejected': -0.7755621188198347, 'dpo/accuracy': 0.879119873046875, 'dpo/margin': 0.9874407933966722, 'dph/chosen': 0.43946526075160364, 'dph/rejected': 0.436012913894956, 'dph/accuracy': 0.907470703125, 'dph/margin': 3.0173863329109736} +Hellaswag/choice={'log_accuracy': 0.5546703843855806, 'dph_accuracy': 0.5809599681338379} +Hellaswag/no_choice={'log_accuracy': 0.4487153953395738, 'dph_accuracy': 0.5912168890659231} +obqa/main={'log_accuracy': 0.538, 'dph_accuracy': 0.566} +winogrande/no_choice={'log_accuracy': 0.574585635359116, 'dph_accuracy': 0.5864246250986582} +arc/ARC-Challenge={'log_accuracy': 0.3979933110367893, 'dph_accuracy': 0.4414715719063545} +arc/ARC-Easy={'log_accuracy': 0.5403508771929825, 'dph_accuracy': 0.5666666666666667} +super_glue/boolq={'log_accuracy': 0.790519877675841, 'dph_accuracy': 0.7880733944954128} +piqa/no_choice={'log_accuracy': 0.720348204570185, 'dph_accuracy': 0.690424374319913} +GLUE/cola={'log_matthews_correlation': 0.4539429381148277, 'dph_matthews_correlation': 0.48745245386575137} +GLUE/mnli_matched={'log_accuracy': 0.7965359144167091, 'dph_accuracy': 0.8105960264900662} +GLUE/mnli_mismatched={'log_accuracy': 0.8102115541090318, 'dph_accuracy': 0.8153986981285598} +GLUE/mrpc={'log_accuracy': 0.7990196078431373, 'log_f1': 0.851985559566787, 'dph_accuracy': 0.7867647058823529, 'dph_f1': 0.8492201039861352} +GLUE/qnli={'log_accuracy': 0.8755262676185246, 'dph_accuracy': 0.8780889621087314} +GLUE/qqp={'log_accuracy': 0.8548602522879051, 'log_f1': 0.8013406459475929, 'dph_accuracy': 0.8595349987632945, 'dph_f1': 0.8124566559889039} +GLUE/rte={'log_accuracy': 0.776173285198556, 'dph_accuracy': 0.7617328519855595} +GLUE/sst2={'log_accuracy': 0.926605504587156, 'dph_accuracy': 0.9243119266055045} +GLUE/stsb={'log_pearson': 0.863831245595338, 'log_spearmanr': 0.8654182372212466, 'dph_pearson': 0.8803080155376567, 'dph_spearmanr': 0.880919440516378} +GLUE/wnli={'log_accuracy': 0.36619718309859156, 'dph_accuracy': 0.4647887323943662} +race/middle={'log_accuracy': 0.6608635097493036, 'dph_accuracy': 0.6817548746518106} +race/high={'log_accuracy': 0.6235873659808752, 'dph_accuracy': 0.6253259924659519} + +Step=23040 +Train={'loss_dpo': 0.6311605380760739, 'loss_dph': 0.8796286850847537, 'dpo/chosen': 0.2072451079548614, 'dpo/rejected': -0.7832649466899966, 'dpo/accuracy': 0.876312255859375, 'dpo/margin': 0.9905100523901638, 'dph/chosen': 0.4418665583798429, 'dph/rejected': 0.4377621266830829, 'dph/accuracy': 0.904296875, 'dph/margin': 2.9959044574352447} +Hellaswag/choice={'log_accuracy': 0.5528779127663812, 'dph_accuracy': 0.5803624775941048} +Hellaswag/no_choice={'log_accuracy': 0.4486158135829516, 'dph_accuracy': 0.5935072694682334} +obqa/main={'log_accuracy': 0.534, 'dph_accuracy': 0.556} +winogrande/no_choice={'log_accuracy': 0.5737963693764798, 'dph_accuracy': 0.590370955011839} +arc/ARC-Challenge={'log_accuracy': 0.4013377926421405, 'dph_accuracy': 0.43478260869565216} +arc/ARC-Easy={'log_accuracy': 0.5350877192982456, 'dph_accuracy': 0.5631578947368421} +super_glue/boolq={'log_accuracy': 0.7896024464831805, 'dph_accuracy': 0.7880733944954128} +piqa/no_choice={'log_accuracy': 0.7236126224156693, 'dph_accuracy': 0.6877040261153428} +GLUE/cola={'log_matthews_correlation': 0.44836079619347136, 'dph_matthews_correlation': 0.48864095826977055} +GLUE/mnli_matched={'log_accuracy': 0.7927661742231279, 'dph_accuracy': 0.8029546612328069} +GLUE/mnli_mismatched={'log_accuracy': 0.8050244100895037, 'dph_accuracy': 0.811126932465419} +GLUE/mrpc={'log_accuracy': 0.803921568627451, 'log_f1': 0.8561151079136691, 'dph_accuracy': 0.7892156862745098, 'dph_f1': 0.8512110726643599} +GLUE/qnli={'log_accuracy': 0.8775398132894014, 'dph_accuracy': 0.8817499542375984} +GLUE/qqp={'log_accuracy': 0.8556270096463022, 'log_f1': 0.8074423514663676, 'dph_accuracy': 0.8590155824882513, 'dph_f1': 0.815342749773228} +GLUE/rte={'log_accuracy': 0.7725631768953068, 'dph_accuracy': 0.7617328519855595} +GLUE/sst2={'log_accuracy': 0.9254587155963303, 'dph_accuracy': 0.9231651376146789} +GLUE/stsb={'log_pearson': 0.863737403575195, 'log_spearmanr': 0.8648810559078739, 'dph_pearson': 0.8800226409638593, 'dph_spearmanr': 0.8798361605898767} +GLUE/wnli={'log_accuracy': 0.36619718309859156, 'dph_accuracy': 0.4507042253521127} +race/middle={'log_accuracy': 0.6622562674094707, 'dph_accuracy': 0.6740947075208914} +race/high={'log_accuracy': 0.6172124022022603, 'dph_accuracy': 0.6282237032744132} +