{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998854993048172, "eval_steps": 100, "global_step": 7641, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.535947712418301e-09, "logits/chosen": -2.896247386932373, "logits/rejected": -2.8002498149871826, "logps/chosen": -240.16311645507812, "logps/rejected": -260.468994140625, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 6.535947712418302e-08, "logits/chosen": -2.7081668376922607, "logits/rejected": -2.7046549320220947, "logps/chosen": -287.52117919921875, "logps/rejected": -263.2371520996094, "loss": 0.6935, "rewards/accuracies": 0.3194444477558136, "rewards/chosen": -0.00040327783790417016, "rewards/margins": -0.0007606567232869565, "rewards/rejected": 0.00035737885627895594, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.3071895424836603e-07, "logits/chosen": -2.7486777305603027, "logits/rejected": -2.735520839691162, "logps/chosen": -281.30926513671875, "logps/rejected": -285.0191955566406, "loss": 0.6925, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0007181967375800014, "rewards/margins": 0.0012460026191547513, "rewards/rejected": -0.0005278057651594281, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.9607843137254904e-07, "logits/chosen": -2.735398769378662, "logits/rejected": -2.6820080280303955, "logps/chosen": -251.34585571289062, "logps/rejected": -225.1924285888672, "loss": 0.693, "rewards/accuracies": 0.512499988079071, "rewards/chosen": 0.00032737970468588173, "rewards/margins": 0.0003834707895293832, "rewards/rejected": -5.609105573967099e-05, "step": 30 }, { "epoch": 0.01, "learning_rate": 2.6143790849673207e-07, "logits/chosen": -2.8098397254943848, "logits/rejected": -2.707000494003296, "logps/chosen": -281.877197265625, "logps/rejected": -259.3277587890625, "loss": 0.6937, "rewards/accuracies": 0.375, "rewards/chosen": -0.0002893812779802829, "rewards/margins": -0.0011306366650387645, "rewards/rejected": 0.000841255416162312, "step": 40 }, { "epoch": 0.01, "learning_rate": 3.267973856209151e-07, "logits/chosen": -2.8089194297790527, "logits/rejected": -2.823747396469116, "logps/chosen": -242.1394805908203, "logps/rejected": -241.009521484375, "loss": 0.6932, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.00046527199447155, "rewards/margins": -0.0001682665169937536, "rewards/rejected": 0.000633538409601897, "step": 50 }, { "epoch": 0.01, "learning_rate": 3.921568627450981e-07, "logits/chosen": -2.7848849296569824, "logits/rejected": -2.7923500537872314, "logps/chosen": -283.9176330566406, "logps/rejected": -254.57522583007812, "loss": 0.6931, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0006139302277006209, "rewards/margins": 0.00016295790555886924, "rewards/rejected": 0.00045097232214175165, "step": 60 }, { "epoch": 0.01, "learning_rate": 4.5751633986928105e-07, "logits/chosen": -2.775444746017456, "logits/rejected": -2.773970127105713, "logps/chosen": -264.4169616699219, "logps/rejected": -220.95669555664062, "loss": 0.6931, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": 0.0009447624906897545, "rewards/margins": 2.7601974579738453e-05, "rewards/rejected": 0.0009171604178845882, "step": 70 }, { "epoch": 0.01, "learning_rate": 5.228758169934641e-07, "logits/chosen": -2.7369792461395264, "logits/rejected": -2.719099760055542, "logps/chosen": -305.594482421875, "logps/rejected": -296.13848876953125, "loss": 0.693, "rewards/accuracies": 0.512499988079071, "rewards/chosen": 0.00130544847343117, "rewards/margins": 0.0002942857681773603, "rewards/rejected": 0.0010111627634614706, "step": 80 }, { "epoch": 0.01, "learning_rate": 5.882352941176471e-07, "logits/chosen": -2.8638012409210205, "logits/rejected": -2.7611491680145264, "logps/chosen": -269.5434265136719, "logps/rejected": -238.3434295654297, "loss": 0.6931, "rewards/accuracies": 0.5, "rewards/chosen": 0.0009442205773666501, "rewards/margins": 0.00013440940529108047, "rewards/rejected": 0.0008098110556602478, "step": 90 }, { "epoch": 0.01, "learning_rate": 6.535947712418302e-07, "logits/chosen": -2.756626605987549, "logits/rejected": -2.762937068939209, "logps/chosen": -247.02676391601562, "logps/rejected": -251.1614990234375, "loss": 0.6934, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0012332749320194125, "rewards/margins": -0.00041856098687276244, "rewards/rejected": 0.001651835860684514, "step": 100 }, { "epoch": 0.01, "eval_logits/chosen": -2.608107089996338, "eval_logits/rejected": -2.576101303100586, "eval_logps/chosen": -285.0846862792969, "eval_logps/rejected": -263.6873474121094, "eval_loss": 0.6930220723152161, "eval_rewards/accuracies": 0.5120000243186951, "eval_rewards/chosen": 0.002080138074234128, "eval_rewards/margins": 0.00025725935120135546, "eval_rewards/rejected": 0.0018228788394480944, "eval_runtime": 1173.2958, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 100 }, { "epoch": 0.01, "learning_rate": 7.189542483660131e-07, "logits/chosen": -2.756783962249756, "logits/rejected": -2.710714101791382, "logps/chosen": -289.2729797363281, "logps/rejected": -269.6139221191406, "loss": 0.6928, "rewards/accuracies": 0.512499988079071, "rewards/chosen": 0.002697740215808153, "rewards/margins": 0.0007278420380316675, "rewards/rejected": 0.0019698983523994684, "step": 110 }, { "epoch": 0.02, "learning_rate": 7.843137254901962e-07, "logits/chosen": -2.792996406555176, "logits/rejected": -2.717787504196167, "logps/chosen": -303.8896789550781, "logps/rejected": -243.03982543945312, "loss": 0.6927, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.001842958852648735, "rewards/margins": 0.0008399001089856029, "rewards/rejected": 0.0010030587436631322, "step": 120 }, { "epoch": 0.02, "learning_rate": 8.496732026143792e-07, "logits/chosen": -2.7940175533294678, "logits/rejected": -2.771402359008789, "logps/chosen": -265.6170349121094, "logps/rejected": -236.84207153320312, "loss": 0.693, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.00197789934463799, "rewards/margins": 0.00039560170262120664, "rewards/rejected": 0.0015822972636669874, "step": 130 }, { "epoch": 0.02, "learning_rate": 9.150326797385621e-07, "logits/chosen": -2.7395403385162354, "logits/rejected": -2.6864776611328125, "logps/chosen": -279.4371032714844, "logps/rejected": -249.57913208007812, "loss": 0.6924, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.003136344952508807, "rewards/margins": 0.0014974649529904127, "rewards/rejected": 0.0016388805815950036, "step": 140 }, { "epoch": 0.02, "learning_rate": 9.80392156862745e-07, "logits/chosen": -2.8249659538269043, "logits/rejected": -2.7408270835876465, "logps/chosen": -335.9043884277344, "logps/rejected": -297.19586181640625, "loss": 0.6926, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.0033657816238701344, "rewards/margins": 0.0010477075120434165, "rewards/rejected": 0.002318073995411396, "step": 150 }, { "epoch": 0.02, "learning_rate": 1.0457516339869283e-06, "logits/chosen": -2.8694610595703125, "logits/rejected": -2.738370656967163, "logps/chosen": -262.63018798828125, "logps/rejected": -240.7825469970703, "loss": 0.6931, "rewards/accuracies": 0.512499988079071, "rewards/chosen": 0.003096712287515402, "rewards/margins": 0.00016818303265608847, "rewards/rejected": 0.0029285294003784657, "step": 160 }, { "epoch": 0.02, "learning_rate": 1.111111111111111e-06, "logits/chosen": -2.7636828422546387, "logits/rejected": -2.7123844623565674, "logps/chosen": -245.6793975830078, "logps/rejected": -231.6672821044922, "loss": 0.6927, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0035004685632884502, "rewards/margins": 0.0009301775135099888, "rewards/rejected": 0.0025702915154397488, "step": 170 }, { "epoch": 0.02, "learning_rate": 1.1764705882352942e-06, "logits/chosen": -2.8375208377838135, "logits/rejected": -2.639805316925049, "logps/chosen": -346.8587341308594, "logps/rejected": -268.3854675292969, "loss": 0.6917, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.005277971271425486, "rewards/margins": 0.0029592241626232862, "rewards/rejected": 0.0023187468759715557, "step": 180 }, { "epoch": 0.02, "learning_rate": 1.2418300653594772e-06, "logits/chosen": -2.6728103160858154, "logits/rejected": -2.654719829559326, "logps/chosen": -266.8128662109375, "logps/rejected": -227.26626586914062, "loss": 0.6927, "rewards/accuracies": 0.5625, "rewards/chosen": 0.006007979158312082, "rewards/margins": 0.0008784201927483082, "rewards/rejected": 0.005129558499902487, "step": 190 }, { "epoch": 0.03, "learning_rate": 1.3071895424836604e-06, "logits/chosen": -2.8150954246520996, "logits/rejected": -2.7260398864746094, "logps/chosen": -261.29449462890625, "logps/rejected": -262.1349182128906, "loss": 0.6921, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.006078018341213465, "rewards/margins": 0.0021574501879513264, "rewards/rejected": 0.003920567687600851, "step": 200 }, { "epoch": 0.03, "eval_logits/chosen": -2.6088781356811523, "eval_logits/rejected": -2.576608657836914, "eval_logps/chosen": -284.6488037109375, "eval_logps/rejected": -263.39703369140625, "eval_loss": 0.6922996640205383, "eval_rewards/accuracies": 0.5820000171661377, "eval_rewards/chosen": 0.00643900316208601, "eval_rewards/margins": 0.0017128386534750462, "eval_rewards/rejected": 0.0047261640429496765, "eval_runtime": 1173.3945, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 200 }, { "epoch": 0.03, "learning_rate": 1.3725490196078434e-06, "logits/chosen": -2.8238754272460938, "logits/rejected": -2.802713632583618, "logps/chosen": -283.5692443847656, "logps/rejected": -258.04443359375, "loss": 0.6918, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.00797777995467186, "rewards/margins": 0.0026797554455697536, "rewards/rejected": 0.005298024974763393, "step": 210 }, { "epoch": 0.03, "learning_rate": 1.4379084967320261e-06, "logits/chosen": -2.720637083053589, "logits/rejected": -2.683905839920044, "logps/chosen": -285.0937194824219, "logps/rejected": -259.00048828125, "loss": 0.6916, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.007448518183082342, "rewards/margins": 0.003169125411659479, "rewards/rejected": 0.004279392305761576, "step": 220 }, { "epoch": 0.03, "learning_rate": 1.5032679738562091e-06, "logits/chosen": -2.6920247077941895, "logits/rejected": -2.685770034790039, "logps/chosen": -236.8917694091797, "logps/rejected": -278.4407958984375, "loss": 0.6929, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.006391332484781742, "rewards/margins": 0.0004684348532464355, "rewards/rejected": 0.005922897718846798, "step": 230 }, { "epoch": 0.03, "learning_rate": 1.5686274509803923e-06, "logits/chosen": -2.7400903701782227, "logits/rejected": -2.697659969329834, "logps/chosen": -250.9820098876953, "logps/rejected": -268.54437255859375, "loss": 0.6931, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.005527496337890625, "rewards/margins": 8.923767745727673e-05, "rewards/rejected": 0.0054382579401135445, "step": 240 }, { "epoch": 0.03, "learning_rate": 1.6339869281045753e-06, "logits/chosen": -2.7918593883514404, "logits/rejected": -2.662835121154785, "logps/chosen": -315.0617980957031, "logps/rejected": -275.05499267578125, "loss": 0.6917, "rewards/accuracies": 0.6875, "rewards/chosen": 0.007765919901430607, "rewards/margins": 0.002958547091111541, "rewards/rejected": 0.00480737304314971, "step": 250 }, { "epoch": 0.03, "learning_rate": 1.6993464052287585e-06, "logits/chosen": -2.8143398761749268, "logits/rejected": -2.777773857116699, "logps/chosen": -297.04248046875, "logps/rejected": -261.8209228515625, "loss": 0.691, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.008783089928328991, "rewards/margins": 0.004354340024292469, "rewards/rejected": 0.0044287508353590965, "step": 260 }, { "epoch": 0.04, "learning_rate": 1.7647058823529414e-06, "logits/chosen": -2.741014003753662, "logits/rejected": -2.662790060043335, "logps/chosen": -254.10281372070312, "logps/rejected": -228.9877166748047, "loss": 0.6912, "rewards/accuracies": 0.625, "rewards/chosen": 0.007920559495687485, "rewards/margins": 0.0039781928062438965, "rewards/rejected": 0.003942367620766163, "step": 270 }, { "epoch": 0.04, "learning_rate": 1.8300653594771242e-06, "logits/chosen": -2.8170313835144043, "logits/rejected": -2.7090656757354736, "logps/chosen": -317.5547790527344, "logps/rejected": -283.766845703125, "loss": 0.6911, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.009623361751437187, "rewards/margins": 0.004096606746315956, "rewards/rejected": 0.005526755005121231, "step": 280 }, { "epoch": 0.04, "learning_rate": 1.8954248366013072e-06, "logits/chosen": -2.7824151515960693, "logits/rejected": -2.7288193702697754, "logps/chosen": -291.642333984375, "logps/rejected": -274.235107421875, "loss": 0.6902, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.011028922162950039, "rewards/margins": 0.005957460962235928, "rewards/rejected": 0.005071460269391537, "step": 290 }, { "epoch": 0.04, "learning_rate": 1.96078431372549e-06, "logits/chosen": -2.8855767250061035, "logits/rejected": -2.769399642944336, "logps/chosen": -304.3223876953125, "logps/rejected": -249.46762084960938, "loss": 0.6913, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.01247901190072298, "rewards/margins": 0.0038229345809668303, "rewards/rejected": 0.008656077086925507, "step": 300 }, { "epoch": 0.04, "eval_logits/chosen": -2.6104612350463867, "eval_logits/rejected": -2.5774478912353516, "eval_logps/chosen": -284.02532958984375, "eval_logps/rejected": -263.038330078125, "eval_loss": 0.6910020709037781, "eval_rewards/accuracies": 0.6194999814033508, "eval_rewards/chosen": 0.012673730961978436, "eval_rewards/margins": 0.004360521212220192, "eval_rewards/rejected": 0.008313210681080818, "eval_runtime": 1173.212, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 300 }, { "epoch": 0.04, "learning_rate": 2.0261437908496734e-06, "logits/chosen": -2.726591110229492, "logits/rejected": -2.7057387828826904, "logps/chosen": -290.943359375, "logps/rejected": -283.51690673828125, "loss": 0.6918, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.013583977706730366, "rewards/margins": 0.002739064861088991, "rewards/rejected": 0.010844913311302662, "step": 310 }, { "epoch": 0.04, "learning_rate": 2.0915032679738565e-06, "logits/chosen": -2.77579927444458, "logits/rejected": -2.6547343730926514, "logps/chosen": -264.1826477050781, "logps/rejected": -255.95779418945312, "loss": 0.6916, "rewards/accuracies": 0.5625, "rewards/chosen": 0.013339856639504433, "rewards/margins": 0.0032375603914260864, "rewards/rejected": 0.010102294385433197, "step": 320 }, { "epoch": 0.04, "learning_rate": 2.1568627450980393e-06, "logits/chosen": -2.8468410968780518, "logits/rejected": -2.7400074005126953, "logps/chosen": -303.87384033203125, "logps/rejected": -253.5267791748047, "loss": 0.6912, "rewards/accuracies": 0.5625, "rewards/chosen": 0.014395073056221008, "rewards/margins": 0.003921784460544586, "rewards/rejected": 0.010473288595676422, "step": 330 }, { "epoch": 0.04, "learning_rate": 2.222222222222222e-06, "logits/chosen": -2.795454978942871, "logits/rejected": -2.681230306625366, "logps/chosen": -272.3973693847656, "logps/rejected": -230.4618682861328, "loss": 0.6904, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.015390843152999878, "rewards/margins": 0.0054797702468931675, "rewards/rejected": 0.009911073371767998, "step": 340 }, { "epoch": 0.05, "learning_rate": 2.2875816993464053e-06, "logits/chosen": -2.806203603744507, "logits/rejected": -2.747692108154297, "logps/chosen": -321.1043395996094, "logps/rejected": -270.4984130859375, "loss": 0.6891, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.018319781869649887, "rewards/margins": 0.008197757415473461, "rewards/rejected": 0.010122022591531277, "step": 350 }, { "epoch": 0.05, "learning_rate": 2.3529411764705885e-06, "logits/chosen": -2.84110426902771, "logits/rejected": -2.7785239219665527, "logps/chosen": -265.24749755859375, "logps/rejected": -260.6332092285156, "loss": 0.6908, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.018875570967793465, "rewards/margins": 0.004776433110237122, "rewards/rejected": 0.014099137857556343, "step": 360 }, { "epoch": 0.05, "learning_rate": 2.4183006535947716e-06, "logits/chosen": -2.7692885398864746, "logits/rejected": -2.760307788848877, "logps/chosen": -269.5130920410156, "logps/rejected": -244.1840362548828, "loss": 0.6889, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.01895751990377903, "rewards/margins": 0.008716282434761524, "rewards/rejected": 0.010241237469017506, "step": 370 }, { "epoch": 0.05, "learning_rate": 2.4836601307189544e-06, "logits/chosen": -2.8237204551696777, "logits/rejected": -2.781487464904785, "logps/chosen": -289.22412109375, "logps/rejected": -254.5786590576172, "loss": 0.6886, "rewards/accuracies": 0.625, "rewards/chosen": 0.020854527130723, "rewards/margins": 0.00919763371348381, "rewards/rejected": 0.01165689341723919, "step": 380 }, { "epoch": 0.05, "learning_rate": 2.549019607843137e-06, "logits/chosen": -2.7682974338531494, "logits/rejected": -2.6853373050689697, "logps/chosen": -305.08367919921875, "logps/rejected": -246.4702606201172, "loss": 0.6882, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.023305263370275497, "rewards/margins": 0.010024776682257652, "rewards/rejected": 0.013280488550662994, "step": 390 }, { "epoch": 0.05, "learning_rate": 2.6143790849673208e-06, "logits/chosen": -2.813638925552368, "logits/rejected": -2.840217113494873, "logps/chosen": -289.2650451660156, "logps/rejected": -301.24688720703125, "loss": 0.6888, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.026052230969071388, "rewards/margins": 0.008794544264674187, "rewards/rejected": 0.0172576867043972, "step": 400 }, { "epoch": 0.05, "eval_logits/chosen": -2.6114237308502197, "eval_logits/rejected": -2.5777738094329834, "eval_logps/chosen": -282.9473876953125, "eval_logps/rejected": -262.29913330078125, "eval_loss": 0.6893764734268188, "eval_rewards/accuracies": 0.6209999918937683, "eval_rewards/chosen": 0.02345350943505764, "eval_rewards/margins": 0.007748373784124851, "eval_rewards/rejected": 0.015705134719610214, "eval_runtime": 1173.1663, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 400 }, { "epoch": 0.05, "learning_rate": 2.6797385620915036e-06, "logits/chosen": -2.747955083847046, "logits/rejected": -2.6892735958099365, "logps/chosen": -263.50299072265625, "logps/rejected": -218.99850463867188, "loss": 0.69, "rewards/accuracies": 0.5625, "rewards/chosen": 0.022609690204262733, "rewards/margins": 0.006450247950851917, "rewards/rejected": 0.01615944132208824, "step": 410 }, { "epoch": 0.05, "learning_rate": 2.7450980392156867e-06, "logits/chosen": -2.7311861515045166, "logits/rejected": -2.6678895950317383, "logps/chosen": -280.86883544921875, "logps/rejected": -280.25823974609375, "loss": 0.6894, "rewards/accuracies": 0.5625, "rewards/chosen": 0.022379932925105095, "rewards/margins": 0.007762663997709751, "rewards/rejected": 0.014617268927395344, "step": 420 }, { "epoch": 0.06, "learning_rate": 2.8104575163398695e-06, "logits/chosen": -2.8040578365325928, "logits/rejected": -2.7093348503112793, "logps/chosen": -287.013671875, "logps/rejected": -269.5040588378906, "loss": 0.6896, "rewards/accuracies": 0.625, "rewards/chosen": 0.024479230865836143, "rewards/margins": 0.0074189514853060246, "rewards/rejected": 0.017060281708836555, "step": 430 }, { "epoch": 0.06, "learning_rate": 2.8758169934640523e-06, "logits/chosen": -2.806948661804199, "logits/rejected": -2.7780017852783203, "logps/chosen": -272.99945068359375, "logps/rejected": -245.48623657226562, "loss": 0.6872, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.026299938559532166, "rewards/margins": 0.012182589620351791, "rewards/rejected": 0.014117350801825523, "step": 440 }, { "epoch": 0.06, "learning_rate": 2.9411764705882355e-06, "logits/chosen": -2.7656569480895996, "logits/rejected": -2.783059597015381, "logps/chosen": -290.8000183105469, "logps/rejected": -304.024169921875, "loss": 0.6886, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.027255898341536522, "rewards/margins": 0.009453834965825081, "rewards/rejected": 0.01780206523835659, "step": 450 }, { "epoch": 0.06, "learning_rate": 3.0065359477124182e-06, "logits/chosen": -2.6999731063842773, "logits/rejected": -2.675001859664917, "logps/chosen": -257.7070007324219, "logps/rejected": -248.4289093017578, "loss": 0.6869, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.032838813960552216, "rewards/margins": 0.012781324796378613, "rewards/rejected": 0.02005748823285103, "step": 460 }, { "epoch": 0.06, "learning_rate": 3.071895424836602e-06, "logits/chosen": -2.782094717025757, "logits/rejected": -2.7665746212005615, "logps/chosen": -279.497314453125, "logps/rejected": -260.2268981933594, "loss": 0.6857, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.037720657885074615, "rewards/margins": 0.01530242245644331, "rewards/rejected": 0.02241823635995388, "step": 470 }, { "epoch": 0.06, "learning_rate": 3.1372549019607846e-06, "logits/chosen": -2.7836313247680664, "logits/rejected": -2.7629621028900146, "logps/chosen": -286.4947204589844, "logps/rejected": -240.4879608154297, "loss": 0.6843, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.03812272474169731, "rewards/margins": 0.018193546682596207, "rewards/rejected": 0.019929179921746254, "step": 480 }, { "epoch": 0.06, "learning_rate": 3.2026143790849674e-06, "logits/chosen": -2.810976266860962, "logits/rejected": -2.720935821533203, "logps/chosen": -276.6731262207031, "logps/rejected": -224.75808715820312, "loss": 0.686, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.03245459124445915, "rewards/margins": 0.014776689000427723, "rewards/rejected": 0.017677903175354004, "step": 490 }, { "epoch": 0.07, "learning_rate": 3.2679738562091506e-06, "logits/chosen": -2.761012315750122, "logits/rejected": -2.6800663471221924, "logps/chosen": -254.37905883789062, "logps/rejected": -234.1659698486328, "loss": 0.6881, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.025483015924692154, "rewards/margins": 0.01065473910421133, "rewards/rejected": 0.014828277751803398, "step": 500 }, { "epoch": 0.07, "eval_logits/chosen": -2.601078987121582, "eval_logits/rejected": -2.5648090839385986, "eval_logps/chosen": -282.0685119628906, "eval_logps/rejected": -262.00579833984375, "eval_loss": 0.6866379380226135, "eval_rewards/accuracies": 0.621999979019165, "eval_rewards/chosen": 0.03224240243434906, "eval_rewards/margins": 0.013603860512375832, "eval_rewards/rejected": 0.01863854192197323, "eval_runtime": 1173.0502, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 500 }, { "epoch": 0.07, "learning_rate": 3.3333333333333333e-06, "logits/chosen": -2.649364709854126, "logits/rejected": -2.695103645324707, "logps/chosen": -263.63897705078125, "logps/rejected": -262.0106201171875, "loss": 0.6882, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.03161431476473808, "rewards/margins": 0.010357612743973732, "rewards/rejected": 0.0212567001581192, "step": 510 }, { "epoch": 0.07, "learning_rate": 3.398692810457517e-06, "logits/chosen": -2.7090542316436768, "logits/rejected": -2.6017651557922363, "logps/chosen": -256.1787109375, "logps/rejected": -256.99066162109375, "loss": 0.6846, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03309843689203262, "rewards/margins": 0.017676886171102524, "rewards/rejected": 0.015421552583575249, "step": 520 }, { "epoch": 0.07, "learning_rate": 3.4640522875816997e-06, "logits/chosen": -2.7257823944091797, "logits/rejected": -2.676396369934082, "logps/chosen": -262.1148681640625, "logps/rejected": -240.3489990234375, "loss": 0.685, "rewards/accuracies": 0.6875, "rewards/chosen": 0.031875304877758026, "rewards/margins": 0.016705092042684555, "rewards/rejected": 0.01517021656036377, "step": 530 }, { "epoch": 0.07, "learning_rate": 3.529411764705883e-06, "logits/chosen": -2.7843520641326904, "logits/rejected": -2.6656417846679688, "logps/chosen": -286.06494140625, "logps/rejected": -263.2212829589844, "loss": 0.6845, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.03619309142231941, "rewards/margins": 0.01780475489795208, "rewards/rejected": 0.018388336524367332, "step": 540 }, { "epoch": 0.07, "learning_rate": 3.5947712418300657e-06, "logits/chosen": -2.7776148319244385, "logits/rejected": -2.716484546661377, "logps/chosen": -255.5173797607422, "logps/rejected": -238.3636474609375, "loss": 0.6779, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.04090049862861633, "rewards/margins": 0.031408317387104034, "rewards/rejected": 0.00949217937886715, "step": 550 }, { "epoch": 0.07, "learning_rate": 3.6601307189542484e-06, "logits/chosen": -2.7813169956207275, "logits/rejected": -2.696591854095459, "logps/chosen": -288.11907958984375, "logps/rejected": -301.1776428222656, "loss": 0.6838, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.02972649596631527, "rewards/margins": 0.019590891897678375, "rewards/rejected": 0.010135604068636894, "step": 560 }, { "epoch": 0.07, "learning_rate": 3.7254901960784316e-06, "logits/chosen": -2.866082191467285, "logits/rejected": -2.8021016120910645, "logps/chosen": -271.49017333984375, "logps/rejected": -257.3333435058594, "loss": 0.6859, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.02512175962328911, "rewards/margins": 0.015149513259530067, "rewards/rejected": 0.009972251020371914, "step": 570 }, { "epoch": 0.08, "learning_rate": 3.7908496732026144e-06, "logits/chosen": -2.798537015914917, "logits/rejected": -2.652083396911621, "logps/chosen": -316.0807189941406, "logps/rejected": -284.3338623046875, "loss": 0.683, "rewards/accuracies": 0.625, "rewards/chosen": 0.03139640763401985, "rewards/margins": 0.021447105333209038, "rewards/rejected": 0.009949302300810814, "step": 580 }, { "epoch": 0.08, "learning_rate": 3.856209150326798e-06, "logits/chosen": -2.79559588432312, "logits/rejected": -2.657886028289795, "logps/chosen": -294.9044189453125, "logps/rejected": -254.8910675048828, "loss": 0.6833, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.03273575007915497, "rewards/margins": 0.020848657935857773, "rewards/rejected": 0.011887092143297195, "step": 590 }, { "epoch": 0.08, "learning_rate": 3.92156862745098e-06, "logits/chosen": -2.746609687805176, "logits/rejected": -2.6399028301239014, "logps/chosen": -243.6225128173828, "logps/rejected": -247.07846069335938, "loss": 0.6848, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.028709720820188522, "rewards/margins": 0.01827172189950943, "rewards/rejected": 0.010438000783324242, "step": 600 }, { "epoch": 0.08, "eval_logits/chosen": -2.600626230239868, "eval_logits/rejected": -2.5620691776275635, "eval_logps/chosen": -281.3836364746094, "eval_logps/rejected": -262.1441955566406, "eval_loss": 0.6829456090927124, "eval_rewards/accuracies": 0.6230000257492065, "eval_rewards/chosen": 0.039090972393751144, "eval_rewards/margins": 0.021836327388882637, "eval_rewards/rejected": 0.017254654318094254, "eval_runtime": 1173.2709, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 600 }, { "epoch": 0.08, "learning_rate": 3.986928104575164e-06, "logits/chosen": -2.7640035152435303, "logits/rejected": -2.650482654571533, "logps/chosen": -239.72201538085938, "logps/rejected": -207.37960815429688, "loss": 0.6815, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.05101596564054489, "rewards/margins": 0.02423090860247612, "rewards/rejected": 0.02678506076335907, "step": 610 }, { "epoch": 0.08, "learning_rate": 4.052287581699347e-06, "logits/chosen": -2.7376561164855957, "logits/rejected": -2.6810410022735596, "logps/chosen": -288.5543212890625, "logps/rejected": -267.516845703125, "loss": 0.6799, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0445859432220459, "rewards/margins": 0.027907002717256546, "rewards/rejected": 0.016678940504789352, "step": 620 }, { "epoch": 0.08, "learning_rate": 4.11764705882353e-06, "logits/chosen": -2.727701425552368, "logits/rejected": -2.6993868350982666, "logps/chosen": -274.822509765625, "logps/rejected": -256.6311340332031, "loss": 0.6745, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0639568492770195, "rewards/margins": 0.03929334878921509, "rewards/rejected": 0.02466350421309471, "step": 630 }, { "epoch": 0.08, "learning_rate": 4.183006535947713e-06, "logits/chosen": -2.822457790374756, "logits/rejected": -2.756417751312256, "logps/chosen": -271.39703369140625, "logps/rejected": -258.93212890625, "loss": 0.6856, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.04032561182975769, "rewards/margins": 0.01741691306233406, "rewards/rejected": 0.02290869876742363, "step": 640 }, { "epoch": 0.09, "learning_rate": 4.2483660130718954e-06, "logits/chosen": -2.7424185276031494, "logits/rejected": -2.7166240215301514, "logps/chosen": -269.97467041015625, "logps/rejected": -256.1397399902344, "loss": 0.6842, "rewards/accuracies": 0.625, "rewards/chosen": 0.03450140729546547, "rewards/margins": 0.021146176382899284, "rewards/rejected": 0.013355230912566185, "step": 650 }, { "epoch": 0.09, "learning_rate": 4.313725490196079e-06, "logits/chosen": -2.773705005645752, "logits/rejected": -2.6584715843200684, "logps/chosen": -306.5790100097656, "logps/rejected": -254.39697265625, "loss": 0.6757, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.05402520298957825, "rewards/margins": 0.03739168122410774, "rewards/rejected": 0.016633519902825356, "step": 660 }, { "epoch": 0.09, "learning_rate": 4.379084967320262e-06, "logits/chosen": -2.8280460834503174, "logits/rejected": -2.7472095489501953, "logps/chosen": -293.5730285644531, "logps/rejected": -309.07196044921875, "loss": 0.685, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0446760393679142, "rewards/margins": 0.019286952912807465, "rewards/rejected": 0.025389084592461586, "step": 670 }, { "epoch": 0.09, "learning_rate": 4.444444444444444e-06, "logits/chosen": -2.8368771076202393, "logits/rejected": -2.781674861907959, "logps/chosen": -280.62738037109375, "logps/rejected": -264.23553466796875, "loss": 0.6849, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.053720176219940186, "rewards/margins": 0.01869060844182968, "rewards/rejected": 0.035029567778110504, "step": 680 }, { "epoch": 0.09, "learning_rate": 4.509803921568628e-06, "logits/chosen": -2.809265613555908, "logits/rejected": -2.7491776943206787, "logps/chosen": -304.00579833984375, "logps/rejected": -263.46368408203125, "loss": 0.6759, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.05731926113367081, "rewards/margins": 0.03783116117119789, "rewards/rejected": 0.01948809251189232, "step": 690 }, { "epoch": 0.09, "learning_rate": 4.5751633986928105e-06, "logits/chosen": -2.769301414489746, "logits/rejected": -2.728238105773926, "logps/chosen": -280.30340576171875, "logps/rejected": -268.40081787109375, "loss": 0.6706, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.07635831832885742, "rewards/margins": 0.0484110489487648, "rewards/rejected": 0.027947265654802322, "step": 700 }, { "epoch": 0.09, "eval_logits/chosen": -2.5860841274261475, "eval_logits/rejected": -2.543696403503418, "eval_logps/chosen": -280.1424560546875, "eval_logps/rejected": -262.17578125, "eval_loss": 0.6776183843612671, "eval_rewards/accuracies": 0.6134999990463257, "eval_rewards/chosen": 0.05150264874100685, "eval_rewards/margins": 0.03456386178731918, "eval_rewards/rejected": 0.01693878136575222, "eval_runtime": 1173.3642, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 700 }, { "epoch": 0.09, "learning_rate": 4.640522875816994e-06, "logits/chosen": -2.8206183910369873, "logits/rejected": -2.7675373554229736, "logps/chosen": -285.945068359375, "logps/rejected": -273.6590576171875, "loss": 0.6718, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.06139975041151047, "rewards/margins": 0.047468554228544235, "rewards/rejected": 0.013931198045611382, "step": 710 }, { "epoch": 0.09, "learning_rate": 4.705882352941177e-06, "logits/chosen": -2.7912356853485107, "logits/rejected": -2.746445655822754, "logps/chosen": -325.3434753417969, "logps/rejected": -304.920654296875, "loss": 0.6697, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.047881126403808594, "rewards/margins": 0.05247528478503227, "rewards/rejected": -0.004594164900481701, "step": 720 }, { "epoch": 0.1, "learning_rate": 4.77124183006536e-06, "logits/chosen": -2.786642551422119, "logits/rejected": -2.7573294639587402, "logps/chosen": -289.21734619140625, "logps/rejected": -281.2444152832031, "loss": 0.6692, "rewards/accuracies": 0.5625, "rewards/chosen": 0.052878547459840775, "rewards/margins": 0.05296853929758072, "rewards/rejected": -8.999630517791957e-05, "step": 730 }, { "epoch": 0.1, "learning_rate": 4.836601307189543e-06, "logits/chosen": -2.795292377471924, "logits/rejected": -2.666905641555786, "logps/chosen": -305.7247314453125, "logps/rejected": -264.77825927734375, "loss": 0.6652, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.05517961457371712, "rewards/margins": 0.06262197345495224, "rewards/rejected": -0.007442360278218985, "step": 740 }, { "epoch": 0.1, "learning_rate": 4.901960784313726e-06, "logits/chosen": -2.666839361190796, "logits/rejected": -2.665914297103882, "logps/chosen": -250.14108276367188, "logps/rejected": -241.32608032226562, "loss": 0.656, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.03899381682276726, "rewards/margins": 0.08067650347948074, "rewards/rejected": -0.04168267175555229, "step": 750 }, { "epoch": 0.1, "learning_rate": 4.967320261437909e-06, "logits/chosen": -2.7957959175109863, "logits/rejected": -2.714207172393799, "logps/chosen": -285.6178894042969, "logps/rejected": -246.8248291015625, "loss": 0.6672, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.007369598839432001, "rewards/margins": 0.05723084881901741, "rewards/rejected": -0.04986124485731125, "step": 760 }, { "epoch": 0.1, "learning_rate": 4.999993476542427e-06, "logits/chosen": -2.751208782196045, "logits/rejected": -2.791194438934326, "logps/chosen": -294.9483642578125, "logps/rejected": -281.4031066894531, "loss": 0.6549, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.021667104214429855, "rewards/margins": 0.08953282982110977, "rewards/rejected": -0.06786571443080902, "step": 770 }, { "epoch": 0.1, "learning_rate": 4.999941289086112e-06, "logits/chosen": -2.803582191467285, "logits/rejected": -2.6576297283172607, "logps/chosen": -308.5997619628906, "logps/rejected": -281.09771728515625, "loss": 0.6482, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.021102851256728172, "rewards/margins": 0.10389794409275055, "rewards/rejected": -0.12500080466270447, "step": 780 }, { "epoch": 0.1, "learning_rate": 4.999836915262896e-06, "logits/chosen": -2.6909666061401367, "logits/rejected": -2.71191668510437, "logps/chosen": -300.32122802734375, "logps/rejected": -303.9979553222656, "loss": 0.6562, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08398447185754776, "rewards/margins": 0.09393363445997238, "rewards/rejected": -0.17791810631752014, "step": 790 }, { "epoch": 0.1, "learning_rate": 4.999680357251587e-06, "logits/chosen": -2.566818952560425, "logits/rejected": -2.565382242202759, "logps/chosen": -273.0006103515625, "logps/rejected": -289.88714599609375, "loss": 0.6544, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.060392219573259354, "rewards/margins": 0.09415189921855927, "rewards/rejected": -0.15454411506652832, "step": 800 }, { "epoch": 0.1, "eval_logits/chosen": -2.5676047801971436, "eval_logits/rejected": -2.5207531452178955, "eval_logps/chosen": -293.7215576171875, "eval_logps/rejected": -279.89556884765625, "eval_loss": 0.6649993658065796, "eval_rewards/accuracies": 0.6065000295639038, "eval_rewards/chosen": -0.0842883288860321, "eval_rewards/margins": 0.0759705975651741, "eval_rewards/rejected": -0.160258948802948, "eval_runtime": 1173.6251, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 800 }, { "epoch": 0.11, "learning_rate": 4.999471618320339e-06, "logits/chosen": -2.781917095184326, "logits/rejected": -2.65069317817688, "logps/chosen": -305.3197326660156, "logps/rejected": -281.01947021484375, "loss": 0.6586, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10152751207351685, "rewards/margins": 0.0869065374135971, "rewards/rejected": -0.18843403458595276, "step": 810 }, { "epoch": 0.11, "learning_rate": 4.999210702826586e-06, "logits/chosen": -2.884624719619751, "logits/rejected": -2.808931827545166, "logps/chosen": -329.1521301269531, "logps/rejected": -292.8260498046875, "loss": 0.6533, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09741269052028656, "rewards/margins": 0.09646307677030563, "rewards/rejected": -0.1938757747411728, "step": 820 }, { "epoch": 0.11, "learning_rate": 4.998897616216947e-06, "logits/chosen": -2.705528974533081, "logits/rejected": -2.759491205215454, "logps/chosen": -258.15032958984375, "logps/rejected": -292.4070129394531, "loss": 0.6459, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11559978872537613, "rewards/margins": 0.11973357200622559, "rewards/rejected": -0.23533335328102112, "step": 830 }, { "epoch": 0.11, "learning_rate": 4.998532365027117e-06, "logits/chosen": -2.665297269821167, "logits/rejected": -2.577087879180908, "logps/chosen": -315.3207092285156, "logps/rejected": -264.4286804199219, "loss": 0.654, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.14258244633674622, "rewards/margins": 0.10882475227117538, "rewards/rejected": -0.2514071762561798, "step": 840 }, { "epoch": 0.11, "learning_rate": 4.9981149568817275e-06, "logits/chosen": -2.703603506088257, "logits/rejected": -2.700209140777588, "logps/chosen": -307.2548522949219, "logps/rejected": -332.7950439453125, "loss": 0.6573, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.19128301739692688, "rewards/margins": 0.09529605507850647, "rewards/rejected": -0.28657907247543335, "step": 850 }, { "epoch": 0.11, "learning_rate": 4.997645400494192e-06, "logits/chosen": -2.7560465335845947, "logits/rejected": -2.7364706993103027, "logps/chosen": -284.1697692871094, "logps/rejected": -289.9046325683594, "loss": 0.6815, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.31733840703964233, "rewards/margins": 0.0750778540968895, "rewards/rejected": -0.3924162685871124, "step": 860 }, { "epoch": 0.11, "learning_rate": 4.997123705666514e-06, "logits/chosen": -2.727036237716675, "logits/rejected": -2.6650333404541016, "logps/chosen": -322.28765869140625, "logps/rejected": -320.51031494140625, "loss": 0.6651, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.2925903797149658, "rewards/margins": 0.10269337892532349, "rewards/rejected": -0.3952837586402893, "step": 870 }, { "epoch": 0.12, "learning_rate": 4.996549883289093e-06, "logits/chosen": -2.7099690437316895, "logits/rejected": -2.6788439750671387, "logps/chosen": -287.4695129394531, "logps/rejected": -311.8743591308594, "loss": 0.6602, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.24767592549324036, "rewards/margins": 0.09456877410411835, "rewards/rejected": -0.3422446846961975, "step": 880 }, { "epoch": 0.12, "learning_rate": 4.995923945340495e-06, "logits/chosen": -2.7683117389678955, "logits/rejected": -2.7528557777404785, "logps/chosen": -283.42852783203125, "logps/rejected": -299.6573486328125, "loss": 0.6573, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15794335305690765, "rewards/margins": 0.10221956670284271, "rewards/rejected": -0.26016291975975037, "step": 890 }, { "epoch": 0.12, "learning_rate": 4.995245904887195e-06, "logits/chosen": -2.7562127113342285, "logits/rejected": -2.6816959381103516, "logps/chosen": -278.16796875, "logps/rejected": -251.93893432617188, "loss": 0.668, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.21272382140159607, "rewards/margins": 0.08459267765283585, "rewards/rejected": -0.2973165512084961, "step": 900 }, { "epoch": 0.12, "eval_logits/chosen": -2.56126070022583, "eval_logits/rejected": -2.5179994106292725, "eval_logps/chosen": -302.1818542480469, "eval_logps/rejected": -291.8527526855469, "eval_loss": 0.655211329460144, "eval_rewards/accuracies": 0.6169999837875366, "eval_rewards/chosen": -0.16889113187789917, "eval_rewards/margins": 0.11093967407941818, "eval_rewards/rejected": -0.27983081340789795, "eval_runtime": 1173.1095, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 900 }, { "epoch": 0.12, "learning_rate": 4.994515776083313e-06, "logits/chosen": -2.6447596549987793, "logits/rejected": -2.7301154136657715, "logps/chosen": -299.1502685546875, "logps/rejected": -334.7135925292969, "loss": 0.6481, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1342255175113678, "rewards/margins": 0.13406458497047424, "rewards/rejected": -0.26829007267951965, "step": 910 }, { "epoch": 0.12, "learning_rate": 4.993733574170316e-06, "logits/chosen": -2.722386360168457, "logits/rejected": -2.6634514331817627, "logps/chosen": -253.35986328125, "logps/rejected": -266.76409912109375, "loss": 0.6321, "rewards/accuracies": 0.6875, "rewards/chosen": -0.11939827352762222, "rewards/margins": 0.1619001179933548, "rewards/rejected": -0.28129833936691284, "step": 920 }, { "epoch": 0.12, "learning_rate": 4.992899315476696e-06, "logits/chosen": -2.738288402557373, "logits/rejected": -2.7077269554138184, "logps/chosen": -335.08160400390625, "logps/rejected": -319.16583251953125, "loss": 0.6579, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18058130145072937, "rewards/margins": 0.11753213405609131, "rewards/rejected": -0.2981134355068207, "step": 930 }, { "epoch": 0.12, "learning_rate": 4.9920130174176354e-06, "logits/chosen": -2.735929489135742, "logits/rejected": -2.6511847972869873, "logps/chosen": -314.0692443847656, "logps/rejected": -319.5186462402344, "loss": 0.6098, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20987017452716827, "rewards/margins": 0.21007618308067322, "rewards/rejected": -0.4199463725090027, "step": 940 }, { "epoch": 0.12, "learning_rate": 4.991074698494638e-06, "logits/chosen": -2.791724681854248, "logits/rejected": -2.652005434036255, "logps/chosen": -311.9523620605469, "logps/rejected": -291.4970703125, "loss": 0.6306, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2346021831035614, "rewards/margins": 0.16210028529167175, "rewards/rejected": -0.39670246839523315, "step": 950 }, { "epoch": 0.13, "learning_rate": 4.990084378295148e-06, "logits/chosen": -2.7419540882110596, "logits/rejected": -2.7098803520202637, "logps/chosen": -277.81427001953125, "logps/rejected": -267.3054504394531, "loss": 0.6452, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23132053017616272, "rewards/margins": 0.14752307534217834, "rewards/rejected": -0.37884360551834106, "step": 960 }, { "epoch": 0.13, "learning_rate": 4.989042077492135e-06, "logits/chosen": -2.708400249481201, "logits/rejected": -2.6803619861602783, "logps/chosen": -316.9068298339844, "logps/rejected": -321.396728515625, "loss": 0.6019, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2604824900627136, "rewards/margins": 0.22696319222450256, "rewards/rejected": -0.4874456822872162, "step": 970 }, { "epoch": 0.13, "learning_rate": 4.987947817843665e-06, "logits/chosen": -2.5916519165039062, "logits/rejected": -2.6380982398986816, "logps/chosen": -310.9526672363281, "logps/rejected": -296.34222412109375, "loss": 0.6496, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.4472068250179291, "rewards/margins": 0.1646738201379776, "rewards/rejected": -0.6118806600570679, "step": 980 }, { "epoch": 0.13, "learning_rate": 4.986801622192453e-06, "logits/chosen": -2.671600580215454, "logits/rejected": -2.6104674339294434, "logps/chosen": -275.8227233886719, "logps/rejected": -276.8636474609375, "loss": 0.6118, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3716748356819153, "rewards/margins": 0.22761638462543488, "rewards/rejected": -0.599291205406189, "step": 990 }, { "epoch": 0.13, "learning_rate": 4.985603514465372e-06, "logits/chosen": -2.6798784732818604, "logits/rejected": -2.6702020168304443, "logps/chosen": -330.8289489746094, "logps/rejected": -341.70623779296875, "loss": 0.6285, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4649590849876404, "rewards/margins": 0.22645731270313263, "rewards/rejected": -0.6914163827896118, "step": 1000 }, { "epoch": 0.13, "eval_logits/chosen": -2.4939186573028564, "eval_logits/rejected": -2.4563186168670654, "eval_logps/chosen": -338.1634521484375, "eval_logps/rejected": -335.08056640625, "eval_loss": 0.6456961035728455, "eval_rewards/accuracies": 0.6290000081062317, "eval_rewards/chosen": -0.5287072062492371, "eval_rewards/margins": 0.18340161442756653, "eval_rewards/rejected": -0.7121089100837708, "eval_runtime": 1172.976, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 1000 }, { "epoch": 0.13, "learning_rate": 4.984353519672966e-06, "logits/chosen": -2.6857194900512695, "logits/rejected": -2.5498063564300537, "logps/chosen": -330.83270263671875, "logps/rejected": -315.6231994628906, "loss": 0.6602, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.5528852939605713, "rewards/margins": 0.13348236680030823, "rewards/rejected": -0.6863676309585571, "step": 1010 }, { "epoch": 0.13, "learning_rate": 4.9830516639089226e-06, "logits/chosen": -2.660613536834717, "logits/rejected": -2.6461730003356934, "logps/chosen": -377.82635498046875, "logps/rejected": -328.1082458496094, "loss": 0.592, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5226212739944458, "rewards/margins": 0.2594411373138428, "rewards/rejected": -0.7820624113082886, "step": 1020 }, { "epoch": 0.13, "learning_rate": 4.9816979743495296e-06, "logits/chosen": -2.6877143383026123, "logits/rejected": -2.6519954204559326, "logps/chosen": -380.0502624511719, "logps/rejected": -382.59869384765625, "loss": 0.5997, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.6410335302352905, "rewards/margins": 0.30625757575035095, "rewards/rejected": -0.9472910761833191, "step": 1030 }, { "epoch": 0.14, "learning_rate": 4.980292479253105e-06, "logits/chosen": -2.7361416816711426, "logits/rejected": -2.6469337940216064, "logps/chosen": -386.8782958984375, "logps/rejected": -373.96405029296875, "loss": 0.5811, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6625531911849976, "rewards/margins": 0.34235090017318726, "rewards/rejected": -1.00490403175354, "step": 1040 }, { "epoch": 0.14, "learning_rate": 4.978835207959414e-06, "logits/chosen": -2.579270839691162, "logits/rejected": -2.5607879161834717, "logps/chosen": -343.03533935546875, "logps/rejected": -352.88916015625, "loss": 0.6196, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.702850878238678, "rewards/margins": 0.24091625213623047, "rewards/rejected": -0.9437670707702637, "step": 1050 }, { "epoch": 0.14, "learning_rate": 4.977326190889046e-06, "logits/chosen": -2.577333927154541, "logits/rejected": -2.3452649116516113, "logps/chosen": -359.04669189453125, "logps/rejected": -328.72467041015625, "loss": 0.6402, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7241743803024292, "rewards/margins": 0.2433900088071823, "rewards/rejected": -0.9675644040107727, "step": 1060 }, { "epoch": 0.14, "learning_rate": 4.975765459542788e-06, "logits/chosen": -2.4829294681549072, "logits/rejected": -2.470693826675415, "logps/chosen": -329.3133239746094, "logps/rejected": -346.67462158203125, "loss": 0.5942, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.5526672601699829, "rewards/margins": 0.3274114727973938, "rewards/rejected": -0.8800787925720215, "step": 1070 }, { "epoch": 0.14, "learning_rate": 4.9741530465009665e-06, "logits/chosen": -2.445230007171631, "logits/rejected": -2.4143424034118652, "logps/chosen": -319.7929382324219, "logps/rejected": -325.70654296875, "loss": 0.6218, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5758613348007202, "rewards/margins": 0.2517636716365814, "rewards/rejected": -0.8276249766349792, "step": 1080 }, { "epoch": 0.14, "learning_rate": 4.972488985422763e-06, "logits/chosen": -2.496365547180176, "logits/rejected": -2.4725289344787598, "logps/chosen": -324.9794006347656, "logps/rejected": -326.0638122558594, "loss": 0.5809, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.608519971370697, "rewards/margins": 0.37370020151138306, "rewards/rejected": -0.9822202920913696, "step": 1090 }, { "epoch": 0.14, "learning_rate": 4.970773311045514e-06, "logits/chosen": -2.509723663330078, "logits/rejected": -2.404963970184326, "logps/chosen": -345.8732604980469, "logps/rejected": -341.27020263671875, "loss": 0.6741, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6360703110694885, "rewards/margins": 0.16143734753131866, "rewards/rejected": -0.7975075840950012, "step": 1100 }, { "epoch": 0.14, "eval_logits/chosen": -2.322707414627075, "eval_logits/rejected": -2.2815117835998535, "eval_logps/chosen": -355.5893249511719, "eval_logps/rejected": -358.6846618652344, "eval_loss": 0.6395881772041321, "eval_rewards/accuracies": 0.6305000185966492, "eval_rewards/chosen": -0.7029657959938049, "eval_rewards/margins": 0.24518389999866486, "eval_rewards/rejected": -0.9481497406959534, "eval_runtime": 1173.8672, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 1100 }, { "epoch": 0.15, "learning_rate": 4.969006059183984e-06, "logits/chosen": -2.5231308937072754, "logits/rejected": -2.448873996734619, "logps/chosen": -348.31427001953125, "logps/rejected": -335.48345947265625, "loss": 0.69, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.6456478238105774, "rewards/margins": 0.12208940088748932, "rewards/rejected": -0.7677371501922607, "step": 1110 }, { "epoch": 0.15, "learning_rate": 4.967187266729623e-06, "logits/chosen": -2.678138256072998, "logits/rejected": -2.5693306922912598, "logps/chosen": -338.0236511230469, "logps/rejected": -344.2812194824219, "loss": 0.6281, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.43570685386657715, "rewards/margins": 0.19579991698265076, "rewards/rejected": -0.6315068006515503, "step": 1120 }, { "epoch": 0.15, "learning_rate": 4.965316971649791e-06, "logits/chosen": -2.6538443565368652, "logits/rejected": -2.5499930381774902, "logps/chosen": -344.49639892578125, "logps/rejected": -328.1519470214844, "loss": 0.5859, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3683456480503082, "rewards/margins": 0.3128353953361511, "rewards/rejected": -0.6811810731887817, "step": 1130 }, { "epoch": 0.15, "learning_rate": 4.963395212986964e-06, "logits/chosen": -2.6395363807678223, "logits/rejected": -2.521418333053589, "logps/chosen": -297.9776916503906, "logps/rejected": -293.7052917480469, "loss": 0.6067, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.4740704894065857, "rewards/margins": 0.2535014748573303, "rewards/rejected": -0.7275720238685608, "step": 1140 }, { "epoch": 0.15, "learning_rate": 4.9614220308579285e-06, "logits/chosen": -2.5091240406036377, "logits/rejected": -2.5847342014312744, "logps/chosen": -349.41888427734375, "logps/rejected": -358.28790283203125, "loss": 0.6277, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5796940922737122, "rewards/margins": 0.2202250212430954, "rewards/rejected": -0.799919068813324, "step": 1150 }, { "epoch": 0.15, "learning_rate": 4.9593974664529325e-06, "logits/chosen": -2.5793509483337402, "logits/rejected": -2.458042621612549, "logps/chosen": -345.2643127441406, "logps/rejected": -371.79388427734375, "loss": 0.6192, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6415513753890991, "rewards/margins": 0.27423813939094543, "rewards/rejected": -0.9157896041870117, "step": 1160 }, { "epoch": 0.15, "learning_rate": 4.957321562034833e-06, "logits/chosen": -2.5987672805786133, "logits/rejected": -2.5602588653564453, "logps/chosen": -367.10552978515625, "logps/rejected": -363.56866455078125, "loss": 0.5996, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.6459072828292847, "rewards/margins": 0.30040204524993896, "rewards/rejected": -0.9463094472885132, "step": 1170 }, { "epoch": 0.15, "learning_rate": 4.955194360938214e-06, "logits/chosen": -2.5837607383728027, "logits/rejected": -2.5810608863830566, "logps/chosen": -349.5048828125, "logps/rejected": -340.4737243652344, "loss": 0.6318, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.7418169975280762, "rewards/margins": 0.25447434186935425, "rewards/rejected": -0.9962912797927856, "step": 1180 }, { "epoch": 0.16, "learning_rate": 4.9530159075684735e-06, "logits/chosen": -2.541612148284912, "logits/rejected": -2.5287868976593018, "logps/chosen": -328.6512451171875, "logps/rejected": -410.9795837402344, "loss": 0.6613, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.8162468671798706, "rewards/margins": 0.17309090495109558, "rewards/rejected": -0.9893378019332886, "step": 1190 }, { "epoch": 0.16, "learning_rate": 4.950786247400908e-06, "logits/chosen": -2.456754446029663, "logits/rejected": -2.5195212364196777, "logps/chosen": -329.7606201171875, "logps/rejected": -354.2069091796875, "loss": 0.605, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.7051799893379211, "rewards/margins": 0.27783241868019104, "rewards/rejected": -0.983012318611145, "step": 1200 }, { "epoch": 0.16, "eval_logits/chosen": -2.2607674598693848, "eval_logits/rejected": -2.2197513580322266, "eval_logps/chosen": -356.06011962890625, "eval_logps/rejected": -360.99627685546875, "eval_loss": 0.6279371380805969, "eval_rewards/accuracies": 0.637499988079071, "eval_rewards/chosen": -0.7076742053031921, "eval_rewards/margins": 0.26359233260154724, "eval_rewards/rejected": -0.971266508102417, "eval_runtime": 1173.3612, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 1200 }, { "epoch": 0.16, "learning_rate": 4.948505426979756e-06, "logits/chosen": -2.5039706230163574, "logits/rejected": -2.4758121967315674, "logps/chosen": -349.3768615722656, "logps/rejected": -365.09967041015625, "loss": 0.5967, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.7029610872268677, "rewards/margins": 0.3289002776145935, "rewards/rejected": -1.031861424446106, "step": 1210 }, { "epoch": 0.16, "learning_rate": 4.946173493917228e-06, "logits/chosen": -2.492128849029541, "logits/rejected": -2.4254136085510254, "logps/chosen": -362.53436279296875, "logps/rejected": -328.02490234375, "loss": 0.7295, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.8030228614807129, "rewards/margins": 0.019256453961133957, "rewards/rejected": -0.822279155254364, "step": 1220 }, { "epoch": 0.16, "learning_rate": 4.943790496892513e-06, "logits/chosen": -2.600513458251953, "logits/rejected": -2.4696056842803955, "logps/chosen": -316.8155212402344, "logps/rejected": -307.95892333984375, "loss": 0.5983, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.43885356187820435, "rewards/margins": 0.3040347695350647, "rewards/rejected": -0.7428882718086243, "step": 1230 }, { "epoch": 0.16, "learning_rate": 4.941356485650762e-06, "logits/chosen": -2.6881508827209473, "logits/rejected": -2.6177926063537598, "logps/chosen": -367.1802062988281, "logps/rejected": -365.7328186035156, "loss": 0.6168, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.4106849133968353, "rewards/margins": 0.2384069859981537, "rewards/rejected": -0.649091899394989, "step": 1240 }, { "epoch": 0.16, "learning_rate": 4.93887151100205e-06, "logits/chosen": -2.733797550201416, "logits/rejected": -2.6489291191101074, "logps/chosen": -366.79193115234375, "logps/rejected": -345.00640869140625, "loss": 0.6567, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.35443398356437683, "rewards/margins": 0.1330409049987793, "rewards/rejected": -0.48747485876083374, "step": 1250 }, { "epoch": 0.16, "learning_rate": 4.936335624820313e-06, "logits/chosen": -2.7169675827026367, "logits/rejected": -2.6466755867004395, "logps/chosen": -302.01953125, "logps/rejected": -282.923828125, "loss": 0.6326, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.24352756142616272, "rewards/margins": 0.18360894918441772, "rewards/rejected": -0.42713651061058044, "step": 1260 }, { "epoch": 0.17, "learning_rate": 4.933748880042271e-06, "logits/chosen": -2.7060999870300293, "logits/rejected": -2.638232469558716, "logps/chosen": -310.6798400878906, "logps/rejected": -311.3609619140625, "loss": 0.6077, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.3422698378562927, "rewards/margins": 0.24755015969276428, "rewards/rejected": -0.5898200273513794, "step": 1270 }, { "epoch": 0.17, "learning_rate": 4.931111330666317e-06, "logits/chosen": -2.575082540512085, "logits/rejected": -2.4545376300811768, "logps/chosen": -317.52264404296875, "logps/rejected": -297.6214294433594, "loss": 0.6191, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.47772637009620667, "rewards/margins": 0.21179227530956268, "rewards/rejected": -0.6895186901092529, "step": 1280 }, { "epoch": 0.17, "learning_rate": 4.9284230317513906e-06, "logits/chosen": -2.5893354415893555, "logits/rejected": -2.5187082290649414, "logps/chosen": -379.6683654785156, "logps/rejected": -363.97015380859375, "loss": 0.6043, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.6415658593177795, "rewards/margins": 0.3069884777069092, "rewards/rejected": -0.9485543370246887, "step": 1290 }, { "epoch": 0.17, "learning_rate": 4.9256840394158325e-06, "logits/chosen": -2.4693050384521484, "logits/rejected": -2.432520627975464, "logps/chosen": -361.9473571777344, "logps/rejected": -426.77685546875, "loss": 0.5844, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.6800118684768677, "rewards/margins": 0.3689848780632019, "rewards/rejected": -1.0489966869354248, "step": 1300 }, { "epoch": 0.17, "eval_logits/chosen": -2.0743157863616943, "eval_logits/rejected": -2.0337142944335938, "eval_logps/chosen": -370.3147277832031, "eval_logps/rejected": -378.0120544433594, "eval_loss": 0.6228457093238831, "eval_rewards/accuracies": 0.640999972820282, "eval_rewards/chosen": -0.8502199649810791, "eval_rewards/margins": 0.29120388627052307, "eval_rewards/rejected": -1.1414238214492798, "eval_runtime": 1172.7082, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 1300 }, { "epoch": 0.17, "learning_rate": 4.922894410836207e-06, "logits/chosen": -2.494260787963867, "logits/rejected": -2.3333301544189453, "logps/chosen": -394.7021789550781, "logps/rejected": -361.4925842285156, "loss": 0.635, "rewards/accuracies": 0.625, "rewards/chosen": -0.8935394287109375, "rewards/margins": 0.2934940457344055, "rewards/rejected": -1.1870336532592773, "step": 1310 }, { "epoch": 0.17, "learning_rate": 4.920054204246116e-06, "logits/chosen": -2.525690793991089, "logits/rejected": -2.4263322353363037, "logps/chosen": -370.66790771484375, "logps/rejected": -344.0418395996094, "loss": 0.634, "rewards/accuracies": 0.625, "rewards/chosen": -0.7464545965194702, "rewards/margins": 0.2129075974225998, "rewards/rejected": -0.9593623280525208, "step": 1320 }, { "epoch": 0.17, "learning_rate": 4.9171634789349744e-06, "logits/chosen": -2.4831061363220215, "logits/rejected": -2.4301085472106934, "logps/chosen": -344.4041748046875, "logps/rejected": -388.18572998046875, "loss": 0.5445, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6461294293403625, "rewards/margins": 0.44957056641578674, "rewards/rejected": -1.0957000255584717, "step": 1330 }, { "epoch": 0.18, "learning_rate": 4.914222295246782e-06, "logits/chosen": -2.527456521987915, "logits/rejected": -2.4752743244171143, "logps/chosen": -345.05767822265625, "logps/rejected": -359.32391357421875, "loss": 0.6608, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.6638450622558594, "rewards/margins": 0.15090003609657288, "rewards/rejected": -0.8147451281547546, "step": 1340 }, { "epoch": 0.18, "learning_rate": 4.911230714578858e-06, "logits/chosen": -2.441866636276245, "logits/rejected": -2.462878704071045, "logps/chosen": -284.39227294921875, "logps/rejected": -344.8713073730469, "loss": 0.5629, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.500754177570343, "rewards/margins": 0.40834903717041016, "rewards/rejected": -0.9091032147407532, "step": 1350 }, { "epoch": 0.18, "learning_rate": 4.908188799380558e-06, "logits/chosen": -2.4615304470062256, "logits/rejected": -2.425814390182495, "logps/chosen": -331.05267333984375, "logps/rejected": -332.3231201171875, "loss": 0.5829, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6858211755752563, "rewards/margins": 0.3280574083328247, "rewards/rejected": -1.013878583908081, "step": 1360 }, { "epoch": 0.18, "learning_rate": 4.905096613151975e-06, "logits/chosen": -2.3768835067749023, "logits/rejected": -2.288085460662842, "logps/chosen": -415.037109375, "logps/rejected": -410.47821044921875, "loss": 0.6679, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.0296709537506104, "rewards/margins": 0.2172650396823883, "rewards/rejected": -1.2469360828399658, "step": 1370 }, { "epoch": 0.18, "learning_rate": 4.90195422044261e-06, "logits/chosen": -2.4731457233428955, "logits/rejected": -2.4283947944641113, "logps/chosen": -387.6423645019531, "logps/rejected": -397.9414367675781, "loss": 0.5595, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.753621518611908, "rewards/margins": 0.45417460799217224, "rewards/rejected": -1.2077960968017578, "step": 1380 }, { "epoch": 0.18, "learning_rate": 4.898761686850028e-06, "logits/chosen": -2.353015422821045, "logits/rejected": -2.2464370727539062, "logps/chosen": -369.61572265625, "logps/rejected": -382.53082275390625, "loss": 0.6584, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.8926342725753784, "rewards/margins": 0.2712360918521881, "rewards/rejected": -1.1638704538345337, "step": 1390 }, { "epoch": 0.18, "learning_rate": 4.895519079018485e-06, "logits/chosen": -2.401740312576294, "logits/rejected": -2.185915470123291, "logps/chosen": -335.4225158691406, "logps/rejected": -341.16680908203125, "loss": 0.6085, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.6165248155593872, "rewards/margins": 0.3225622773170471, "rewards/rejected": -0.9390870332717896, "step": 1400 }, { "epoch": 0.18, "eval_logits/chosen": -1.9741628170013428, "eval_logits/rejected": -1.927571415901184, "eval_logps/chosen": -346.9267883300781, "eval_logps/rejected": -353.4969787597656, "eval_loss": 0.6157093644142151, "eval_rewards/accuracies": 0.656499981880188, "eval_rewards/chosen": -0.6163406372070312, "eval_rewards/margins": 0.27993249893188477, "eval_rewards/rejected": -0.896273136138916, "eval_runtime": 1173.3158, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 1400 }, { "epoch": 0.18, "learning_rate": 4.89222646463754e-06, "logits/chosen": -2.4668169021606445, "logits/rejected": -2.419532060623169, "logps/chosen": -343.31878662109375, "logps/rejected": -360.8763427734375, "loss": 0.63, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.6849584579467773, "rewards/margins": 0.2665075957775116, "rewards/rejected": -0.9514660835266113, "step": 1410 }, { "epoch": 0.19, "learning_rate": 4.888883912440642e-06, "logits/chosen": -2.4510014057159424, "logits/rejected": -2.3843305110931396, "logps/chosen": -395.0271911621094, "logps/rejected": -407.986572265625, "loss": 0.6226, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.68365079164505, "rewards/margins": 0.29661715030670166, "rewards/rejected": -0.9802680015563965, "step": 1420 }, { "epoch": 0.19, "learning_rate": 4.885491492203688e-06, "logits/chosen": -2.329678773880005, "logits/rejected": -2.3171207904815674, "logps/chosen": -349.471435546875, "logps/rejected": -345.58251953125, "loss": 0.6077, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6296406388282776, "rewards/margins": 0.24890565872192383, "rewards/rejected": -0.8785463571548462, "step": 1430 }, { "epoch": 0.19, "learning_rate": 4.882049274743578e-06, "logits/chosen": -2.446080446243286, "logits/rejected": -2.4080348014831543, "logps/chosen": -392.8956298828125, "logps/rejected": -388.02960205078125, "loss": 0.5971, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6403361558914185, "rewards/margins": 0.33641594648361206, "rewards/rejected": -0.9767521023750305, "step": 1440 }, { "epoch": 0.19, "learning_rate": 4.878557331916729e-06, "logits/chosen": -2.3418636322021484, "logits/rejected": -2.364252805709839, "logps/chosen": -342.7167663574219, "logps/rejected": -350.45538330078125, "loss": 0.5703, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.7152738571166992, "rewards/margins": 0.36363348364830017, "rewards/rejected": -1.0789072513580322, "step": 1450 }, { "epoch": 0.19, "learning_rate": 4.875015736617576e-06, "logits/chosen": -2.3957436084747314, "logits/rejected": -2.3168387413024902, "logps/chosen": -424.3904724121094, "logps/rejected": -406.2117614746094, "loss": 0.5892, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7364000082015991, "rewards/margins": 0.3339517116546631, "rewards/rejected": -1.0703517198562622, "step": 1460 }, { "epoch": 0.19, "learning_rate": 4.8714245627770515e-06, "logits/chosen": -2.3729703426361084, "logits/rejected": -2.184260129928589, "logps/chosen": -353.2168273925781, "logps/rejected": -339.24261474609375, "loss": 0.6548, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.8984875679016113, "rewards/margins": 0.20593862235546112, "rewards/rejected": -1.1044261455535889, "step": 1470 }, { "epoch": 0.19, "learning_rate": 4.8677838853610445e-06, "logits/chosen": -2.2903103828430176, "logits/rejected": -2.1878256797790527, "logps/chosen": -370.137451171875, "logps/rejected": -356.63104248046875, "loss": 0.6219, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.9454523324966431, "rewards/margins": 0.3196515440940857, "rewards/rejected": -1.265103816986084, "step": 1480 }, { "epoch": 0.19, "learning_rate": 4.864093780368828e-06, "logits/chosen": -2.435291290283203, "logits/rejected": -2.2713615894317627, "logps/chosen": -396.46807861328125, "logps/rejected": -377.0063781738281, "loss": 0.5697, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.8792732357978821, "rewards/margins": 0.3991313874721527, "rewards/rejected": -1.2784045934677124, "step": 1490 }, { "epoch": 0.2, "learning_rate": 4.860354324831482e-06, "logits/chosen": -2.388760566711426, "logits/rejected": -2.385594129562378, "logps/chosen": -379.97344970703125, "logps/rejected": -420.51934814453125, "loss": 0.5887, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9489212036132812, "rewards/margins": 0.3839309811592102, "rewards/rejected": -1.3328521251678467, "step": 1500 }, { "epoch": 0.2, "eval_logits/chosen": -1.54761803150177, "eval_logits/rejected": -1.5129750967025757, "eval_logps/chosen": -390.6337890625, "eval_logps/rejected": -404.723388671875, "eval_loss": 0.6093257665634155, "eval_rewards/accuracies": 0.6585000157356262, "eval_rewards/chosen": -1.0534104108810425, "eval_rewards/margins": 0.35512715578079224, "eval_rewards/rejected": -1.40853750705719, "eval_runtime": 1173.7666, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 1500 }, { "epoch": 0.2, "learning_rate": 4.856565596810279e-06, "logits/chosen": -2.269038438796997, "logits/rejected": -2.240347385406494, "logps/chosen": -350.50933837890625, "logps/rejected": -381.20684814453125, "loss": 0.6657, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.1637290716171265, "rewards/margins": 0.2023475170135498, "rewards/rejected": -1.3660767078399658, "step": 1510 }, { "epoch": 0.2, "learning_rate": 4.852727675395056e-06, "logits/chosen": -2.311117172241211, "logits/rejected": -2.18780517578125, "logps/chosen": -383.8539123535156, "logps/rejected": -395.37872314453125, "loss": 0.5408, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0786935091018677, "rewards/margins": 0.4756636619567871, "rewards/rejected": -1.5543569326400757, "step": 1520 }, { "epoch": 0.2, "learning_rate": 4.848840640702565e-06, "logits/chosen": -2.2907707691192627, "logits/rejected": -2.2663180828094482, "logps/chosen": -388.94317626953125, "logps/rejected": -380.520263671875, "loss": 0.67, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.2756744623184204, "rewards/margins": 0.2061425894498825, "rewards/rejected": -1.4818170070648193, "step": 1530 }, { "epoch": 0.2, "learning_rate": 4.844904573874798e-06, "logits/chosen": -2.2316396236419678, "logits/rejected": -2.1963393688201904, "logps/chosen": -392.61492919921875, "logps/rejected": -379.75390625, "loss": 0.6004, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9927800297737122, "rewards/margins": 0.39371055364608765, "rewards/rejected": -1.3864905834197998, "step": 1540 }, { "epoch": 0.2, "learning_rate": 4.840919557077297e-06, "logits/chosen": -2.321735382080078, "logits/rejected": -2.1763479709625244, "logps/chosen": -388.51239013671875, "logps/rejected": -379.34027099609375, "loss": 0.6354, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0403954982757568, "rewards/margins": 0.2822137475013733, "rewards/rejected": -1.3226091861724854, "step": 1550 }, { "epoch": 0.2, "learning_rate": 4.836885673497435e-06, "logits/chosen": -2.3030014038085938, "logits/rejected": -2.185213565826416, "logps/chosen": -373.89276123046875, "logps/rejected": -391.1092529296875, "loss": 0.5617, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.8299756050109863, "rewards/margins": 0.43793243169784546, "rewards/rejected": -1.267907977104187, "step": 1560 }, { "epoch": 0.21, "learning_rate": 4.832803007342679e-06, "logits/chosen": -2.268263339996338, "logits/rejected": -2.30707049369812, "logps/chosen": -338.6516418457031, "logps/rejected": -377.26324462890625, "loss": 0.6173, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.8366347551345825, "rewards/margins": 0.321827232837677, "rewards/rejected": -1.1584619283676147, "step": 1570 }, { "epoch": 0.21, "learning_rate": 4.828671643838839e-06, "logits/chosen": -2.2386622428894043, "logits/rejected": -2.1988167762756348, "logps/chosen": -355.00958251953125, "logps/rejected": -341.5582580566406, "loss": 0.6171, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.7135469913482666, "rewards/margins": 0.30205848813056946, "rewards/rejected": -1.0156054496765137, "step": 1580 }, { "epoch": 0.21, "learning_rate": 4.824491669228279e-06, "logits/chosen": -2.232062816619873, "logits/rejected": -2.1673014163970947, "logps/chosen": -351.5841369628906, "logps/rejected": -344.68939208984375, "loss": 0.6685, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.8179405331611633, "rewards/margins": 0.19087883830070496, "rewards/rejected": -1.008819341659546, "step": 1590 }, { "epoch": 0.21, "learning_rate": 4.8202631707681245e-06, "logits/chosen": -2.2642502784729004, "logits/rejected": -2.138714551925659, "logps/chosen": -342.4280700683594, "logps/rejected": -366.55609130859375, "loss": 0.5585, "rewards/accuracies": 0.75, "rewards/chosen": -0.8875049352645874, "rewards/margins": 0.4244559407234192, "rewards/rejected": -1.3119609355926514, "step": 1600 }, { "epoch": 0.21, "eval_logits/chosen": -1.4652458429336548, "eval_logits/rejected": -1.4216023683547974, "eval_logps/chosen": -370.8767395019531, "eval_logps/rejected": -387.5893249511719, "eval_loss": 0.6019625663757324, "eval_rewards/accuracies": 0.6644999980926514, "eval_rewards/chosen": -0.8558406829833984, "eval_rewards/margins": 0.3813556730747223, "eval_rewards/rejected": -1.2371965646743774, "eval_runtime": 1173.7745, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 1600 }, { "epoch": 0.21, "learning_rate": 4.815986236728437e-06, "logits/chosen": -2.184940814971924, "logits/rejected": -2.126659870147705, "logps/chosen": -356.41668701171875, "logps/rejected": -386.04779052734375, "loss": 0.6132, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.8487674593925476, "rewards/margins": 0.3206823468208313, "rewards/rejected": -1.1694496870040894, "step": 1610 }, { "epoch": 0.21, "learning_rate": 4.811660956390372e-06, "logits/chosen": -2.3122637271881104, "logits/rejected": -2.2805025577545166, "logps/chosen": -394.89727783203125, "logps/rejected": -388.9953918457031, "loss": 0.6314, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.7255429029464722, "rewards/margins": 0.28325191140174866, "rewards/rejected": -1.0087947845458984, "step": 1620 }, { "epoch": 0.21, "learning_rate": 4.807287420044319e-06, "logits/chosen": -2.3865654468536377, "logits/rejected": -2.369770050048828, "logps/chosen": -314.7702331542969, "logps/rejected": -348.518310546875, "loss": 0.5655, "rewards/accuracies": 0.75, "rewards/chosen": -0.6648892164230347, "rewards/margins": 0.48127803206443787, "rewards/rejected": -1.1461671590805054, "step": 1630 }, { "epoch": 0.21, "learning_rate": 4.802865718988008e-06, "logits/chosen": -2.205326795578003, "logits/rejected": -2.1454052925109863, "logps/chosen": -348.91082763671875, "logps/rejected": -410.8165588378906, "loss": 0.6301, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.0370396375656128, "rewards/margins": 0.2929023504257202, "rewards/rejected": -1.329941987991333, "step": 1640 }, { "epoch": 0.22, "learning_rate": 4.798395945524615e-06, "logits/chosen": -2.2590596675872803, "logits/rejected": -2.1765456199645996, "logps/chosen": -394.92132568359375, "logps/rejected": -415.81707763671875, "loss": 0.5635, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1871960163116455, "rewards/margins": 0.4527507424354553, "rewards/rejected": -1.6399469375610352, "step": 1650 }, { "epoch": 0.22, "learning_rate": 4.793878192960823e-06, "logits/chosen": -2.2939252853393555, "logits/rejected": -2.1937241554260254, "logps/chosen": -469.12811279296875, "logps/rejected": -500.58837890625, "loss": 0.6277, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.3709437847137451, "rewards/margins": 0.4458431303501129, "rewards/rejected": -1.8167870044708252, "step": 1660 }, { "epoch": 0.22, "learning_rate": 4.789312555604887e-06, "logits/chosen": -2.2132697105407715, "logits/rejected": -2.1623330116271973, "logps/chosen": -370.47039794921875, "logps/rejected": -391.04534912109375, "loss": 0.5848, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.0416100025177002, "rewards/margins": 0.44926396012306213, "rewards/rejected": -1.49087393283844, "step": 1670 }, { "epoch": 0.22, "learning_rate": 4.784699128764654e-06, "logits/chosen": -2.3140299320220947, "logits/rejected": -2.226590394973755, "logps/chosen": -355.25970458984375, "logps/rejected": -377.73211669921875, "loss": 0.5963, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.9214965105056763, "rewards/margins": 0.4505564272403717, "rewards/rejected": -1.3720529079437256, "step": 1680 }, { "epoch": 0.22, "learning_rate": 4.780038008745581e-06, "logits/chosen": -2.3037924766540527, "logits/rejected": -2.20536732673645, "logps/chosen": -400.2337646484375, "logps/rejected": -388.82733154296875, "loss": 0.6463, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.9450837969779968, "rewards/margins": 0.2872735857963562, "rewards/rejected": -1.2323572635650635, "step": 1690 }, { "epoch": 0.22, "learning_rate": 4.775329292848721e-06, "logits/chosen": -2.296189069747925, "logits/rejected": -2.2350804805755615, "logps/chosen": -370.3669738769531, "logps/rejected": -406.692626953125, "loss": 0.5417, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.7108799815177917, "rewards/margins": 0.5038576126098633, "rewards/rejected": -1.2147375345230103, "step": 1700 }, { "epoch": 0.22, "eval_logits/chosen": -1.3614152669906616, "eval_logits/rejected": -1.3190244436264038, "eval_logps/chosen": -363.167236328125, "eval_logps/rejected": -380.348876953125, "eval_loss": 0.5937426686286926, "eval_rewards/accuracies": 0.6639999747276306, "eval_rewards/chosen": -0.7787453532218933, "eval_rewards/margins": 0.38604700565338135, "eval_rewards/rejected": -1.1647922992706299, "eval_runtime": 1173.3172, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 1700 }, { "epoch": 0.22, "learning_rate": 4.770573079368691e-06, "logits/chosen": -2.308875560760498, "logits/rejected": -2.3057169914245605, "logps/chosen": -360.26898193359375, "logps/rejected": -362.5707702636719, "loss": 0.6155, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.765152633190155, "rewards/margins": 0.3400669991970062, "rewards/rejected": -1.1052196025848389, "step": 1710 }, { "epoch": 0.23, "learning_rate": 4.765769467591626e-06, "logits/chosen": -2.417853832244873, "logits/rejected": -2.3791589736938477, "logps/chosen": -382.55157470703125, "logps/rejected": -393.84710693359375, "loss": 0.5753, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.7473410367965698, "rewards/margins": 0.3952387273311615, "rewards/rejected": -1.1425797939300537, "step": 1720 }, { "epoch": 0.23, "learning_rate": 4.760918557793096e-06, "logits/chosen": -2.3585808277130127, "logits/rejected": -2.375034809112549, "logps/chosen": -345.61383056640625, "logps/rejected": -390.66583251953125, "loss": 0.5851, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7573005557060242, "rewards/margins": 0.37787926197052, "rewards/rejected": -1.1351797580718994, "step": 1730 }, { "epoch": 0.23, "learning_rate": 4.756020451236025e-06, "logits/chosen": -2.3663887977600098, "logits/rejected": -2.2452588081359863, "logps/chosen": -395.936767578125, "logps/rejected": -402.03155517578125, "loss": 0.6093, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7393473386764526, "rewards/margins": 0.37452763319015503, "rewards/rejected": -1.113874912261963, "step": 1740 }, { "epoch": 0.23, "learning_rate": 4.751075250168569e-06, "logits/chosen": -2.3883743286132812, "logits/rejected": -2.1911792755126953, "logps/chosen": -386.3553161621094, "logps/rejected": -410.297119140625, "loss": 0.5629, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0602279901504517, "rewards/margins": 0.5666171908378601, "rewards/rejected": -1.626845121383667, "step": 1750 }, { "epoch": 0.23, "learning_rate": 4.746083057821981e-06, "logits/chosen": -2.1893460750579834, "logits/rejected": -2.053621530532837, "logps/chosen": -363.1799621582031, "logps/rejected": -383.117431640625, "loss": 0.5724, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9518973231315613, "rewards/margins": 0.5612205266952515, "rewards/rejected": -1.513117790222168, "step": 1760 }, { "epoch": 0.23, "learning_rate": 4.741043978408463e-06, "logits/chosen": -2.1911866664886475, "logits/rejected": -2.0612337589263916, "logps/chosen": -371.76446533203125, "logps/rejected": -427.1809997558594, "loss": 0.5255, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.9364175796508789, "rewards/margins": 0.6334894895553589, "rewards/rejected": -1.5699070692062378, "step": 1770 }, { "epoch": 0.23, "learning_rate": 4.735958117118983e-06, "logits/chosen": -2.347450017929077, "logits/rejected": -2.147848129272461, "logps/chosen": -394.55975341796875, "logps/rejected": -418.2250061035156, "loss": 0.5623, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8392922282218933, "rewards/margins": 0.5399720072746277, "rewards/rejected": -1.3792643547058105, "step": 1780 }, { "epoch": 0.23, "learning_rate": 4.730825580121084e-06, "logits/chosen": -2.3108162879943848, "logits/rejected": -2.1813712120056152, "logps/chosen": -350.3599853515625, "logps/rejected": -411.48944091796875, "loss": 0.545, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.8638598322868347, "rewards/margins": 0.5645357966423035, "rewards/rejected": -1.4283956289291382, "step": 1790 }, { "epoch": 0.24, "learning_rate": 4.725646474556666e-06, "logits/chosen": -2.17897367477417, "logits/rejected": -2.144803285598755, "logps/chosen": -335.24713134765625, "logps/rejected": -410.5194396972656, "loss": 0.5691, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.9080871343612671, "rewards/margins": 0.6468304395675659, "rewards/rejected": -1.554917573928833, "step": 1800 }, { "epoch": 0.24, "eval_logits/chosen": -0.7890856862068176, "eval_logits/rejected": -0.7432680726051331, "eval_logps/chosen": -392.1944885253906, "eval_logps/rejected": -420.14715576171875, "eval_loss": 0.5964349508285522, "eval_rewards/accuracies": 0.6704999804496765, "eval_rewards/chosen": -1.0690178871154785, "eval_rewards/margins": 0.49375709891319275, "eval_rewards/rejected": -1.5627750158309937, "eval_runtime": 1172.7485, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 1800 }, { "epoch": 0.24, "learning_rate": 4.720420908539748e-06, "logits/chosen": -2.155564785003662, "logits/rejected": -2.0796492099761963, "logps/chosen": -371.0989685058594, "logps/rejected": -411.58270263671875, "loss": 0.6435, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.069402813911438, "rewards/margins": 0.37863826751708984, "rewards/rejected": -1.4480410814285278, "step": 1810 }, { "epoch": 0.24, "learning_rate": 4.715148991154216e-06, "logits/chosen": -2.2921173572540283, "logits/rejected": -2.2819952964782715, "logps/chosen": -461.7276916503906, "logps/rejected": -498.3104553222656, "loss": 0.6099, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0293859243392944, "rewards/margins": 0.44142985343933105, "rewards/rejected": -1.470815896987915, "step": 1820 }, { "epoch": 0.24, "learning_rate": 4.709830832451538e-06, "logits/chosen": -2.1879446506500244, "logits/rejected": -2.137216091156006, "logps/chosen": -426.2359924316406, "logps/rejected": -461.81439208984375, "loss": 0.597, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1083770990371704, "rewards/margins": 0.5203989148139954, "rewards/rejected": -1.6287761926651, "step": 1830 }, { "epoch": 0.24, "learning_rate": 4.704466543448477e-06, "logits/chosen": -2.1278553009033203, "logits/rejected": -1.9929275512695312, "logps/chosen": -473.5394592285156, "logps/rejected": -482.0958557128906, "loss": 0.5253, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.1090824604034424, "rewards/margins": 0.6798272132873535, "rewards/rejected": -1.788909673690796, "step": 1840 }, { "epoch": 0.24, "learning_rate": 4.699056236124762e-06, "logits/chosen": -2.1284070014953613, "logits/rejected": -2.0913150310516357, "logps/chosen": -384.767333984375, "logps/rejected": -431.7093811035156, "loss": 0.592, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0932021141052246, "rewards/margins": 0.46525582671165466, "rewards/rejected": -1.5584577322006226, "step": 1850 }, { "epoch": 0.24, "learning_rate": 4.693600023420758e-06, "logits/chosen": -2.176835536956787, "logits/rejected": -2.0453624725341797, "logps/chosen": -439.25469970703125, "logps/rejected": -430.12017822265625, "loss": 0.5293, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.2489891052246094, "rewards/margins": 0.7063229084014893, "rewards/rejected": -1.9553120136260986, "step": 1860 }, { "epoch": 0.24, "learning_rate": 4.688098019235108e-06, "logits/chosen": -2.1578125953674316, "logits/rejected": -2.0287883281707764, "logps/chosen": -433.19635009765625, "logps/rejected": -473.2142028808594, "loss": 0.5775, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2516090869903564, "rewards/margins": 0.5367294549942017, "rewards/rejected": -1.7883384227752686, "step": 1870 }, { "epoch": 0.25, "learning_rate": 4.682550338422353e-06, "logits/chosen": -2.096961498260498, "logits/rejected": -1.9933475255966187, "logps/chosen": -422.03643798828125, "logps/rejected": -432.7709045410156, "loss": 0.5831, "rewards/accuracies": 0.6875, "rewards/chosen": -1.3931233882904053, "rewards/margins": 0.4907165467739105, "rewards/rejected": -1.8838398456573486, "step": 1880 }, { "epoch": 0.25, "learning_rate": 4.676957096790536e-06, "logits/chosen": -2.0234227180480957, "logits/rejected": -1.9713634252548218, "logps/chosen": -406.87890625, "logps/rejected": -406.08074951171875, "loss": 0.6289, "rewards/accuracies": 0.625, "rewards/chosen": -1.2355217933654785, "rewards/margins": 0.37370064854621887, "rewards/rejected": -1.609222650527954, "step": 1890 }, { "epoch": 0.25, "learning_rate": 4.671318411098782e-06, "logits/chosen": -2.08906626701355, "logits/rejected": -2.1200289726257324, "logps/chosen": -437.5709533691406, "logps/rejected": -503.25091552734375, "loss": 0.5869, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3465156555175781, "rewards/margins": 0.6033599376678467, "rewards/rejected": -1.9498755931854248, "step": 1900 }, { "epoch": 0.25, "eval_logits/chosen": -0.5963050127029419, "eval_logits/rejected": -0.575740396976471, "eval_logps/chosen": -427.6318054199219, "eval_logps/rejected": -450.0478210449219, "eval_loss": 0.5930874347686768, "eval_rewards/accuracies": 0.6700000166893005, "eval_rewards/chosen": -1.4233906269073486, "eval_rewards/margins": 0.4383908212184906, "eval_rewards/rejected": -1.8617814779281616, "eval_runtime": 1173.2085, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 1900 }, { "epoch": 0.25, "learning_rate": 4.665634399054864e-06, "logits/chosen": -2.063363552093506, "logits/rejected": -2.0153086185455322, "logps/chosen": -403.9264831542969, "logps/rejected": -435.4012756347656, "loss": 0.636, "rewards/accuracies": 0.625, "rewards/chosen": -1.5465749502182007, "rewards/margins": 0.37581175565719604, "rewards/rejected": -1.9223867654800415, "step": 1910 }, { "epoch": 0.25, "learning_rate": 4.659905179312743e-06, "logits/chosen": -2.292100429534912, "logits/rejected": -2.212569236755371, "logps/chosen": -456.89447021484375, "logps/rejected": -429.1956481933594, "loss": 0.5992, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.3993576765060425, "rewards/margins": 0.3659273087978363, "rewards/rejected": -1.7652848958969116, "step": 1920 }, { "epoch": 0.25, "learning_rate": 4.654130871470093e-06, "logits/chosen": -2.2021963596343994, "logits/rejected": -2.076413869857788, "logps/chosen": -391.838134765625, "logps/rejected": -380.90032958984375, "loss": 0.6364, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.1989585161209106, "rewards/margins": 0.24625608325004578, "rewards/rejected": -1.4452146291732788, "step": 1930 }, { "epoch": 0.25, "learning_rate": 4.6483115960658045e-06, "logits/chosen": -2.333247423171997, "logits/rejected": -2.182128429412842, "logps/chosen": -402.03070068359375, "logps/rejected": -364.1950988769531, "loss": 0.5966, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.1187512874603271, "rewards/margins": 0.3161240220069885, "rewards/rejected": -1.434875249862671, "step": 1940 }, { "epoch": 0.26, "learning_rate": 4.642447474577466e-06, "logits/chosen": -2.1036577224731445, "logits/rejected": -2.1021203994750977, "logps/chosen": -362.9808349609375, "logps/rejected": -400.50506591796875, "loss": 0.5646, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.1177865266799927, "rewards/margins": 0.47713446617126465, "rewards/rejected": -1.5949208736419678, "step": 1950 }, { "epoch": 0.26, "learning_rate": 4.636538629418832e-06, "logits/chosen": -2.1588752269744873, "logits/rejected": -2.1076531410217285, "logps/chosen": -413.4895935058594, "logps/rejected": -445.3883361816406, "loss": 0.5649, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1384721994400024, "rewards/margins": 0.5193332433700562, "rewards/rejected": -1.6578052043914795, "step": 1960 }, { "epoch": 0.26, "learning_rate": 4.630585183937263e-06, "logits/chosen": -2.1655097007751465, "logits/rejected": -2.0573620796203613, "logps/chosen": -405.8871765136719, "logps/rejected": -407.2527770996094, "loss": 0.6204, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0278955698013306, "rewards/margins": 0.3120744824409485, "rewards/rejected": -1.3399698734283447, "step": 1970 }, { "epoch": 0.26, "learning_rate": 4.6245872624111535e-06, "logits/chosen": -2.13643217086792, "logits/rejected": -2.1128616333007812, "logps/chosen": -335.54150390625, "logps/rejected": -354.4369812011719, "loss": 0.6132, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.9324430227279663, "rewards/margins": 0.3190438747406006, "rewards/rejected": -1.2514870166778564, "step": 1980 }, { "epoch": 0.26, "learning_rate": 4.618544990047336e-06, "logits/chosen": -2.114036798477173, "logits/rejected": -2.04970121383667, "logps/chosen": -415.504638671875, "logps/rejected": -439.928955078125, "loss": 0.6247, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.0088666677474976, "rewards/margins": 0.38996371626853943, "rewards/rejected": -1.3988304138183594, "step": 1990 }, { "epoch": 0.26, "learning_rate": 4.612458492978473e-06, "logits/chosen": -2.297130823135376, "logits/rejected": -2.232883930206299, "logps/chosen": -365.77850341796875, "logps/rejected": -401.78826904296875, "loss": 0.6732, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.0754987001419067, "rewards/margins": 0.30849093198776245, "rewards/rejected": -1.383989691734314, "step": 2000 }, { "epoch": 0.26, "eval_logits/chosen": -0.959629237651825, "eval_logits/rejected": -0.892835795879364, "eval_logps/chosen": -358.4945068359375, "eval_logps/rejected": -377.0960693359375, "eval_loss": 0.5928361415863037, "eval_rewards/accuracies": 0.6765000224113464, "eval_rewards/chosen": -0.7320181131362915, "eval_rewards/margins": 0.4002459943294525, "eval_rewards/rejected": -1.1322641372680664, "eval_runtime": 1173.4466, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 2000 }, { "epoch": 0.26, "learning_rate": 4.606327898260413e-06, "logits/chosen": -2.1415932178497314, "logits/rejected": -2.069800615310669, "logps/chosen": -369.90728759765625, "logps/rejected": -384.2721862792969, "loss": 0.6042, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.7192890048027039, "rewards/margins": 0.40543490648269653, "rewards/rejected": -1.1247237920761108, "step": 2010 }, { "epoch": 0.26, "learning_rate": 4.600153333869549e-06, "logits/chosen": -2.299431324005127, "logits/rejected": -2.22072172164917, "logps/chosen": -354.39117431640625, "logps/rejected": -355.1102294921875, "loss": 0.5805, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.6067346930503845, "rewards/margins": 0.40614938735961914, "rewards/rejected": -1.0128841400146484, "step": 2020 }, { "epoch": 0.27, "learning_rate": 4.593934928700141e-06, "logits/chosen": -2.2791316509246826, "logits/rejected": -2.1022727489471436, "logps/chosen": -375.39031982421875, "logps/rejected": -386.8539733886719, "loss": 0.6022, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.8547514081001282, "rewards/margins": 0.4911293089389801, "rewards/rejected": -1.3458807468414307, "step": 2030 }, { "epoch": 0.27, "learning_rate": 4.587672812561626e-06, "logits/chosen": -2.088589668273926, "logits/rejected": -2.056277275085449, "logps/chosen": -356.75152587890625, "logps/rejected": -447.05322265625, "loss": 0.529, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.013441801071167, "rewards/margins": 0.5791908502578735, "rewards/rejected": -1.592632532119751, "step": 2040 }, { "epoch": 0.27, "learning_rate": 4.581367116175911e-06, "logits/chosen": -2.0325567722320557, "logits/rejected": -1.9162721633911133, "logps/chosen": -429.95745849609375, "logps/rejected": -434.22979736328125, "loss": 0.5999, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1933940649032593, "rewards/margins": 0.44283047318458557, "rewards/rejected": -1.6362245082855225, "step": 2050 }, { "epoch": 0.27, "learning_rate": 4.5750179711746416e-06, "logits/chosen": -2.023871898651123, "logits/rejected": -1.9521923065185547, "logps/chosen": -420.52508544921875, "logps/rejected": -465.43798828125, "loss": 0.5524, "rewards/accuracies": 0.6875, "rewards/chosen": -1.5283615589141846, "rewards/margins": 0.5009020566940308, "rewards/rejected": -2.029263496398926, "step": 2060 }, { "epoch": 0.27, "learning_rate": 4.5686255100964535e-06, "logits/chosen": -2.0695993900299072, "logits/rejected": -1.992722749710083, "logps/chosen": -445.06683349609375, "logps/rejected": -456.1000061035156, "loss": 0.5776, "rewards/accuracies": 0.6875, "rewards/chosen": -1.6490529775619507, "rewards/margins": 0.45439139008522034, "rewards/rejected": -2.1034445762634277, "step": 2070 }, { "epoch": 0.27, "learning_rate": 4.562189866384209e-06, "logits/chosen": -1.94949209690094, "logits/rejected": -1.9335947036743164, "logps/chosen": -411.20379638671875, "logps/rejected": -485.4537048339844, "loss": 0.5876, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.5798475742340088, "rewards/margins": 0.5429893136024475, "rewards/rejected": -2.1228365898132324, "step": 2080 }, { "epoch": 0.27, "learning_rate": 4.555711174382209e-06, "logits/chosen": -2.0153870582580566, "logits/rejected": -1.9438400268554688, "logps/chosen": -401.353271484375, "logps/rejected": -415.864501953125, "loss": 0.6126, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.529844045639038, "rewards/margins": 0.366629958152771, "rewards/rejected": -1.8964741230010986, "step": 2090 }, { "epoch": 0.27, "learning_rate": 4.549189569333387e-06, "logits/chosen": -2.0757155418395996, "logits/rejected": -1.9299837350845337, "logps/chosen": -366.15838623046875, "logps/rejected": -385.17474365234375, "loss": 0.5453, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.1265515089035034, "rewards/margins": 0.49831241369247437, "rewards/rejected": -1.6248639822006226, "step": 2100 }, { "epoch": 0.27, "eval_logits/chosen": -0.33254069089889526, "eval_logits/rejected": -0.3057098388671875, "eval_logps/chosen": -407.44610595703125, "eval_logps/rejected": -431.1004943847656, "eval_loss": 0.5811684131622314, "eval_rewards/accuracies": 0.6769999861717224, "eval_rewards/chosen": -1.2215332984924316, "eval_rewards/margins": 0.450775146484375, "eval_rewards/rejected": -1.6723084449768066, "eval_runtime": 1173.1346, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 2100 }, { "epoch": 0.28, "learning_rate": 4.542625187376491e-06, "logits/chosen": -2.260960578918457, "logits/rejected": -2.1369497776031494, "logps/chosen": -435.5047912597656, "logps/rejected": -424.6029357910156, "loss": 0.6178, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.2315149307250977, "rewards/margins": 0.343548059463501, "rewards/rejected": -1.5750629901885986, "step": 2110 }, { "epoch": 0.28, "learning_rate": 4.536018165543239e-06, "logits/chosen": -2.270226240158081, "logits/rejected": -2.1414380073547363, "logps/chosen": -436.93499755859375, "logps/rejected": -462.13232421875, "loss": 0.6148, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.2013145685195923, "rewards/margins": 0.37624213099479675, "rewards/rejected": -1.5775566101074219, "step": 2120 }, { "epoch": 0.28, "learning_rate": 4.529368641755453e-06, "logits/chosen": -2.187873125076294, "logits/rejected": -2.1502745151519775, "logps/chosen": -373.24639892578125, "logps/rejected": -407.1181640625, "loss": 0.6436, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.308226466178894, "rewards/margins": 0.40959835052490234, "rewards/rejected": -1.7178246974945068, "step": 2130 }, { "epoch": 0.28, "learning_rate": 4.522676754822189e-06, "logits/chosen": -2.077741861343384, "logits/rejected": -1.9873619079589844, "logps/chosen": -417.1329650878906, "logps/rejected": -381.8856201171875, "loss": 0.5885, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2676500082015991, "rewards/margins": 0.38986071944236755, "rewards/rejected": -1.65751051902771, "step": 2140 }, { "epoch": 0.28, "learning_rate": 4.515942644436836e-06, "logits/chosen": -2.181087017059326, "logits/rejected": -2.0420756340026855, "logps/chosen": -428.8570251464844, "logps/rejected": -457.95330810546875, "loss": 0.5893, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.3216568231582642, "rewards/margins": 0.5574394464492798, "rewards/rejected": -1.879096269607544, "step": 2150 }, { "epoch": 0.28, "learning_rate": 4.509166451174194e-06, "logits/chosen": -2.1448304653167725, "logits/rejected": -2.118425130844116, "logps/chosen": -446.98272705078125, "logps/rejected": -475.9813537597656, "loss": 0.5674, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.2321898937225342, "rewards/margins": 0.5356115698814392, "rewards/rejected": -1.7678015232086182, "step": 2160 }, { "epoch": 0.28, "learning_rate": 4.502348316487552e-06, "logits/chosen": -2.1516335010528564, "logits/rejected": -1.9779102802276611, "logps/chosen": -437.4042053222656, "logps/rejected": -434.9273376464844, "loss": 0.6678, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.4476085901260376, "rewards/margins": 0.2950034439563751, "rewards/rejected": -1.7426121234893799, "step": 2170 }, { "epoch": 0.29, "learning_rate": 4.495488382705722e-06, "logits/chosen": -2.2639286518096924, "logits/rejected": -2.0572140216827393, "logps/chosen": -442.319580078125, "logps/rejected": -407.50311279296875, "loss": 0.5469, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.8552709817886353, "rewards/margins": 0.5348329544067383, "rewards/rejected": -1.390103816986084, "step": 2180 }, { "epoch": 0.29, "learning_rate": 4.488586793030075e-06, "logits/chosen": -2.1855459213256836, "logits/rejected": -2.064626455307007, "logps/chosen": -321.4360046386719, "logps/rejected": -400.38995361328125, "loss": 0.5179, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8480280041694641, "rewards/margins": 0.6384379863739014, "rewards/rejected": -1.4864661693572998, "step": 2190 }, { "epoch": 0.29, "learning_rate": 4.481643691531551e-06, "logits/chosen": -2.194056272506714, "logits/rejected": -2.1267337799072266, "logps/chosen": -382.1920166015625, "logps/rejected": -397.70758056640625, "loss": 0.5521, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9316880106925964, "rewards/margins": 0.5789372324943542, "rewards/rejected": -1.5106254816055298, "step": 2200 }, { "epoch": 0.29, "eval_logits/chosen": -0.10590755194425583, "eval_logits/rejected": -0.08353219926357269, "eval_logps/chosen": -383.8439025878906, "eval_logps/rejected": -412.94171142578125, "eval_loss": 0.5773423314094543, "eval_rewards/accuracies": 0.6775000095367432, "eval_rewards/chosen": -0.9855120778083801, "eval_rewards/margins": 0.5052086114883423, "eval_rewards/rejected": -1.4907207489013672, "eval_runtime": 1173.8424, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 2200 }, { "epoch": 0.29, "learning_rate": 4.474659223147652e-06, "logits/chosen": -2.1210954189300537, "logits/rejected": -2.0790343284606934, "logps/chosen": -388.6036682128906, "logps/rejected": -412.49755859375, "loss": 0.6111, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.0008533000946045, "rewards/margins": 0.5308166742324829, "rewards/rejected": -1.5316702127456665, "step": 2210 }, { "epoch": 0.29, "learning_rate": 4.4676335336794125e-06, "logits/chosen": -2.1499674320220947, "logits/rejected": -2.042468786239624, "logps/chosen": -427.4583435058594, "logps/rejected": -433.139404296875, "loss": 0.6276, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9962937235832214, "rewards/margins": 0.33339327573776245, "rewards/rejected": -1.3296869993209839, "step": 2220 }, { "epoch": 0.29, "learning_rate": 4.46056676978836e-06, "logits/chosen": -2.1695902347564697, "logits/rejected": -2.1840529441833496, "logps/chosen": -363.1017150878906, "logps/rejected": -432.8349609375, "loss": 0.6198, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.8799940347671509, "rewards/margins": 0.37018856406211853, "rewards/rejected": -1.2501827478408813, "step": 2230 }, { "epoch": 0.29, "learning_rate": 4.453459078993453e-06, "logits/chosen": -2.105926036834717, "logits/rejected": -2.1373894214630127, "logps/chosen": -356.4091796875, "logps/rejected": -404.40716552734375, "loss": 0.516, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.8220658302307129, "rewards/margins": 0.567711353302002, "rewards/rejected": -1.3897771835327148, "step": 2240 }, { "epoch": 0.29, "learning_rate": 4.446310609668001e-06, "logits/chosen": -2.105323553085327, "logits/rejected": -2.037275791168213, "logps/chosen": -361.72833251953125, "logps/rejected": -442.4640197753906, "loss": 0.5989, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1395691633224487, "rewards/margins": 0.46317243576049805, "rewards/rejected": -1.6027415990829468, "step": 2250 }, { "epoch": 0.3, "learning_rate": 4.439121511036562e-06, "logits/chosen": -2.156818389892578, "logits/rejected": -2.057016611099243, "logps/chosen": -418.86090087890625, "logps/rejected": -436.59210205078125, "loss": 0.5761, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.1772918701171875, "rewards/margins": 0.5623257160186768, "rewards/rejected": -1.739617943763733, "step": 2260 }, { "epoch": 0.3, "learning_rate": 4.431891933171839e-06, "logits/chosen": -2.0695672035217285, "logits/rejected": -1.941693663597107, "logps/chosen": -397.0233154296875, "logps/rejected": -432.2896423339844, "loss": 0.6226, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.2106586694717407, "rewards/margins": 0.47111576795578003, "rewards/rejected": -1.681774377822876, "step": 2270 }, { "epoch": 0.3, "learning_rate": 4.424622026991536e-06, "logits/chosen": -2.049593925476074, "logits/rejected": -1.9610483646392822, "logps/chosen": -415.3938903808594, "logps/rejected": -431.11236572265625, "loss": 0.6462, "rewards/accuracies": 0.625, "rewards/chosen": -1.3287193775177002, "rewards/margins": 0.3927326202392578, "rewards/rejected": -1.721451759338379, "step": 2280 }, { "epoch": 0.3, "learning_rate": 4.417311944255215e-06, "logits/chosen": -2.1346962451934814, "logits/rejected": -2.1600146293640137, "logps/chosen": -380.8770446777344, "logps/rejected": -421.3860778808594, "loss": 0.7034, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.2351090908050537, "rewards/margins": 0.2262163609266281, "rewards/rejected": -1.4613254070281982, "step": 2290 }, { "epoch": 0.3, "learning_rate": 4.409961837561122e-06, "logits/chosen": -2.099057912826538, "logits/rejected": -2.1106457710266113, "logps/chosen": -447.09356689453125, "logps/rejected": -506.8944396972656, "loss": 0.5352, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.1697063446044922, "rewards/margins": 0.6214213371276855, "rewards/rejected": -1.7911275625228882, "step": 2300 }, { "epoch": 0.3, "eval_logits/chosen": -0.24320349097251892, "eval_logits/rejected": -0.21170783042907715, "eval_logps/chosen": -393.0879821777344, "eval_logps/rejected": -416.6599426269531, "eval_loss": 0.5820898413658142, "eval_rewards/accuracies": 0.6884999871253967, "eval_rewards/chosen": -1.07795250415802, "eval_rewards/margins": 0.449950248003006, "eval_rewards/rejected": -1.5279029607772827, "eval_runtime": 1172.9381, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 2300 }, { "epoch": 0.3, "learning_rate": 4.402571860343006e-06, "logits/chosen": -2.2147767543792725, "logits/rejected": -2.037105083465576, "logps/chosen": -402.79998779296875, "logps/rejected": -373.8688659667969, "loss": 0.591, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.0390770435333252, "rewards/margins": 0.3865988552570343, "rewards/rejected": -1.4256759881973267, "step": 2310 }, { "epoch": 0.3, "learning_rate": 4.3951421668669165e-06, "logits/chosen": -2.2005527019500732, "logits/rejected": -2.104384183883667, "logps/chosen": -391.08050537109375, "logps/rejected": -424.8851623535156, "loss": 0.5589, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9733511209487915, "rewards/margins": 0.4935951828956604, "rewards/rejected": -1.4669463634490967, "step": 2320 }, { "epoch": 0.3, "learning_rate": 4.3876729122279784e-06, "logits/chosen": -2.1870036125183105, "logits/rejected": -2.1664469242095947, "logps/chosen": -309.8889465332031, "logps/rejected": -373.26312255859375, "loss": 0.5234, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.9488767385482788, "rewards/margins": 0.6144660115242004, "rewards/rejected": -1.563342809677124, "step": 2330 }, { "epoch": 0.31, "learning_rate": 4.3801642523471585e-06, "logits/chosen": -2.2839043140411377, "logits/rejected": -2.0562546253204346, "logps/chosen": -403.57757568359375, "logps/rejected": -419.24639892578125, "loss": 0.526, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.102151870727539, "rewards/margins": 0.5933542251586914, "rewards/rejected": -1.6955060958862305, "step": 2340 }, { "epoch": 0.31, "learning_rate": 4.37261634396801e-06, "logits/chosen": -2.0778768062591553, "logits/rejected": -1.971123456954956, "logps/chosen": -422.4281311035156, "logps/rejected": -454.2814025878906, "loss": 0.5549, "rewards/accuracies": 0.75, "rewards/chosen": -1.4297865629196167, "rewards/margins": 0.5288795232772827, "rewards/rejected": -1.958666205406189, "step": 2350 }, { "epoch": 0.31, "learning_rate": 4.365029344653401e-06, "logits/chosen": -2.1634223461151123, "logits/rejected": -2.0877110958099365, "logps/chosen": -486.320068359375, "logps/rejected": -487.45159912109375, "loss": 0.4957, "rewards/accuracies": 0.75, "rewards/chosen": -1.3157932758331299, "rewards/margins": 0.8328288197517395, "rewards/rejected": -2.1486220359802246, "step": 2360 }, { "epoch": 0.31, "learning_rate": 4.35740341278222e-06, "logits/chosen": -2.2238452434539795, "logits/rejected": -2.188713550567627, "logps/chosen": -474.1236267089844, "logps/rejected": -508.4130859375, "loss": 0.6364, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.3115034103393555, "rewards/margins": 0.4928676187992096, "rewards/rejected": -1.8043708801269531, "step": 2370 }, { "epoch": 0.31, "learning_rate": 4.349738707546079e-06, "logits/chosen": -2.0244901180267334, "logits/rejected": -1.984893798828125, "logps/chosen": -433.35693359375, "logps/rejected": -430.234130859375, "loss": 0.6442, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4878586530685425, "rewards/margins": 0.4103633761405945, "rewards/rejected": -1.8982219696044922, "step": 2380 }, { "epoch": 0.31, "learning_rate": 4.3420353889459835e-06, "logits/chosen": -2.2906548976898193, "logits/rejected": -2.105910539627075, "logps/chosen": -462.30682373046875, "logps/rejected": -467.44293212890625, "loss": 0.5388, "rewards/accuracies": 0.75, "rewards/chosen": -1.3121368885040283, "rewards/margins": 0.6499633193016052, "rewards/rejected": -1.9621002674102783, "step": 2390 }, { "epoch": 0.31, "learning_rate": 4.334293617788992e-06, "logits/chosen": -2.157517433166504, "logits/rejected": -1.953137755393982, "logps/chosen": -398.0602111816406, "logps/rejected": -423.69854736328125, "loss": 0.4291, "rewards/accuracies": 0.75, "rewards/chosen": -1.269965410232544, "rewards/margins": 1.029221534729004, "rewards/rejected": -2.2991867065429688, "step": 2400 }, { "epoch": 0.31, "eval_logits/chosen": 0.1741051822900772, "eval_logits/rejected": 0.18015538156032562, "eval_logps/chosen": -423.0900573730469, "eval_logps/rejected": -462.5804748535156, "eval_loss": 0.5799766778945923, "eval_rewards/accuracies": 0.6784999966621399, "eval_rewards/chosen": -1.3779734373092651, "eval_rewards/margins": 0.6091340780258179, "eval_rewards/rejected": -1.9871076345443726, "eval_runtime": 1173.1683, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 2400 }, { "epoch": 0.32, "learning_rate": 4.326513555684867e-06, "logits/chosen": -2.1554830074310303, "logits/rejected": -1.9456875324249268, "logps/chosen": -452.01861572265625, "logps/rejected": -421.59033203125, "loss": 0.644, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4219211339950562, "rewards/margins": 0.4033273160457611, "rewards/rejected": -1.82524836063385, "step": 2410 }, { "epoch": 0.32, "learning_rate": 4.31869536504269e-06, "logits/chosen": -2.084606647491455, "logits/rejected": -2.057734966278076, "logps/chosen": -394.4380187988281, "logps/rejected": -445.02484130859375, "loss": 0.5474, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.1819837093353271, "rewards/margins": 0.6775622367858887, "rewards/rejected": -1.8595459461212158, "step": 2420 }, { "epoch": 0.32, "learning_rate": 4.310839209067482e-06, "logits/chosen": -2.264970302581787, "logits/rejected": -2.110339879989624, "logps/chosen": -405.0823669433594, "logps/rejected": -430.5166015625, "loss": 0.5699, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1619272232055664, "rewards/margins": 0.5353389978408813, "rewards/rejected": -1.6972662210464478, "step": 2430 }, { "epoch": 0.32, "learning_rate": 4.302945251756788e-06, "logits/chosen": -2.028538227081299, "logits/rejected": -2.029587507247925, "logps/chosen": -395.78619384765625, "logps/rejected": -437.18133544921875, "loss": 0.5098, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.175262212753296, "rewards/margins": 0.7358410954475403, "rewards/rejected": -1.9111032485961914, "step": 2440 }, { "epoch": 0.32, "learning_rate": 4.29501365789726e-06, "logits/chosen": -2.0059101581573486, "logits/rejected": -1.9005285501480103, "logps/chosen": -351.9297180175781, "logps/rejected": -391.07647705078125, "loss": 0.5735, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0925757884979248, "rewards/margins": 0.6294921040534973, "rewards/rejected": -1.7220680713653564, "step": 2450 }, { "epoch": 0.32, "learning_rate": 4.2870445930612135e-06, "logits/chosen": -2.1147594451904297, "logits/rejected": -2.009192705154419, "logps/chosen": -425.51934814453125, "logps/rejected": -462.2431640625, "loss": 0.5077, "rewards/accuracies": 0.75, "rewards/chosen": -0.9345242381095886, "rewards/margins": 0.8426654934883118, "rewards/rejected": -1.77718985080719, "step": 2460 }, { "epoch": 0.32, "learning_rate": 4.279038223603171e-06, "logits/chosen": -2.177354097366333, "logits/rejected": -2.006502866744995, "logps/chosen": -370.07952880859375, "logps/rejected": -402.7530822753906, "loss": 0.5482, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9241195917129517, "rewards/margins": 0.6823036670684814, "rewards/rejected": -1.6064231395721436, "step": 2470 }, { "epoch": 0.32, "learning_rate": 4.2709947166563906e-06, "logits/chosen": -1.9542591571807861, "logits/rejected": -1.957780122756958, "logps/chosen": -391.3414611816406, "logps/rejected": -465.82244873046875, "loss": 0.5407, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1169626712799072, "rewards/margins": 0.6581896543502808, "rewards/rejected": -1.7751522064208984, "step": 2480 }, { "epoch": 0.33, "learning_rate": 4.262914240129379e-06, "logits/chosen": -2.082587242126465, "logits/rejected": -1.9763103723526, "logps/chosen": -411.59649658203125, "logps/rejected": -450.15313720703125, "loss": 0.5524, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0453674793243408, "rewards/margins": 0.8182929754257202, "rewards/rejected": -1.8636605739593506, "step": 2490 }, { "epoch": 0.33, "learning_rate": 4.254796962702382e-06, "logits/chosen": -2.1096723079681396, "logits/rejected": -2.033006191253662, "logps/chosen": -407.79656982421875, "logps/rejected": -441.3284606933594, "loss": 0.5324, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.9739041328430176, "rewards/margins": 0.6023324728012085, "rewards/rejected": -1.5762364864349365, "step": 2500 }, { "epoch": 0.33, "eval_logits/chosen": 0.07505005598068237, "eval_logits/rejected": 0.09037268161773682, "eval_logps/chosen": -388.1980285644531, "eval_logps/rejected": -422.61712646484375, "eval_loss": 0.5709013938903809, "eval_rewards/accuracies": 0.6765000224113464, "eval_rewards/chosen": -1.0290535688400269, "eval_rewards/margins": 0.5584208965301514, "eval_rewards/rejected": -1.5874744653701782, "eval_runtime": 1173.0849, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 2500 }, { "epoch": 0.33, "learning_rate": 4.246643053823864e-06, "logits/chosen": -2.1814770698547363, "logits/rejected": -2.039039134979248, "logps/chosen": -309.3426513671875, "logps/rejected": -398.8958740234375, "loss": 0.5441, "rewards/accuracies": 0.6875, "rewards/chosen": -0.8469230532646179, "rewards/margins": 0.632138729095459, "rewards/rejected": -1.4790617227554321, "step": 2510 }, { "epoch": 0.33, "learning_rate": 4.238452683706979e-06, "logits/chosen": -2.0768649578094482, "logits/rejected": -2.069648027420044, "logps/chosen": -340.1851501464844, "logps/rejected": -352.81805419921875, "loss": 0.5393, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.98432856798172, "rewards/margins": 0.5381032228469849, "rewards/rejected": -1.52243173122406, "step": 2520 }, { "epoch": 0.33, "learning_rate": 4.2302260233260025e-06, "logits/chosen": -2.135117292404175, "logits/rejected": -2.150930404663086, "logps/chosen": -380.9312438964844, "logps/rejected": -437.70672607421875, "loss": 0.5252, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.8901141285896301, "rewards/margins": 0.7313874363899231, "rewards/rejected": -1.6215015649795532, "step": 2530 }, { "epoch": 0.33, "learning_rate": 4.2219632444127766e-06, "logits/chosen": -1.9879133701324463, "logits/rejected": -1.93001389503479, "logps/chosen": -401.8074035644531, "logps/rejected": -424.042724609375, "loss": 0.6447, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.190601110458374, "rewards/margins": 0.33566969633102417, "rewards/rejected": -1.5262706279754639, "step": 2540 }, { "epoch": 0.33, "learning_rate": 4.213664519453115e-06, "logits/chosen": -2.1339848041534424, "logits/rejected": -2.0666513442993164, "logps/chosen": -367.1988525390625, "logps/rejected": -413.453369140625, "loss": 0.5879, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.175606608390808, "rewards/margins": 0.463453471660614, "rewards/rejected": -1.6390600204467773, "step": 2550 }, { "epoch": 0.33, "learning_rate": 4.205330021683208e-06, "logits/chosen": -1.9215284585952759, "logits/rejected": -1.9162276983261108, "logps/chosen": -332.72454833984375, "logps/rejected": -360.2316589355469, "loss": 0.627, "rewards/accuracies": 0.5625, "rewards/chosen": -1.0344467163085938, "rewards/margins": 0.313593327999115, "rewards/rejected": -1.348039984703064, "step": 2560 }, { "epoch": 0.34, "learning_rate": 4.196959925086008e-06, "logits/chosen": -2.032893419265747, "logits/rejected": -2.0280256271362305, "logps/chosen": -393.7264709472656, "logps/rejected": -445.7867126464844, "loss": 0.6322, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.13155198097229, "rewards/margins": 0.30951356887817383, "rewards/rejected": -1.4410655498504639, "step": 2570 }, { "epoch": 0.34, "learning_rate": 4.188554404387588e-06, "logits/chosen": -2.1735775470733643, "logits/rejected": -2.09834623336792, "logps/chosen": -401.140625, "logps/rejected": -413.3356018066406, "loss": 0.5872, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.045619249343872, "rewards/margins": 0.4095796048641205, "rewards/rejected": -1.4551990032196045, "step": 2580 }, { "epoch": 0.34, "learning_rate": 4.180113635053504e-06, "logits/chosen": -2.1379377841949463, "logits/rejected": -2.1078503131866455, "logps/chosen": -378.58740234375, "logps/rejected": -440.969482421875, "loss": 0.5702, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0751326084136963, "rewards/margins": 0.5110452175140381, "rewards/rejected": -1.5861778259277344, "step": 2590 }, { "epoch": 0.34, "learning_rate": 4.17163779328513e-06, "logits/chosen": -2.0438780784606934, "logits/rejected": -1.9390064477920532, "logps/chosen": -394.52532958984375, "logps/rejected": -429.409423828125, "loss": 0.5659, "rewards/accuracies": 0.75, "rewards/chosen": -1.087465763092041, "rewards/margins": 0.590351939201355, "rewards/rejected": -1.677817702293396, "step": 2600 }, { "epoch": 0.34, "eval_logits/chosen": 0.3240658640861511, "eval_logits/rejected": 0.3280923366546631, "eval_logps/chosen": -410.6242980957031, "eval_logps/rejected": -446.1897888183594, "eval_loss": 0.5639599561691284, "eval_rewards/accuracies": 0.6984999775886536, "eval_rewards/chosen": -1.2533156871795654, "eval_rewards/margins": 0.5698856115341187, "eval_rewards/rejected": -1.823201298713684, "eval_runtime": 1173.6285, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 2600 }, { "epoch": 0.34, "learning_rate": 4.163127056015975e-06, "logits/chosen": -2.0112125873565674, "logits/rejected": -1.8717600107192993, "logps/chosen": -421.8485412597656, "logps/rejected": -459.86590576171875, "loss": 0.6169, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.2108803987503052, "rewards/margins": 0.524616539478302, "rewards/rejected": -1.7354971170425415, "step": 2610 }, { "epoch": 0.34, "learning_rate": 4.154581600907994e-06, "logits/chosen": -2.0410170555114746, "logits/rejected": -1.8681037425994873, "logps/chosen": -370.8006286621094, "logps/rejected": -413.893310546875, "loss": 0.4689, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.0415531396865845, "rewards/margins": 0.7762254476547241, "rewards/rejected": -1.8177785873413086, "step": 2620 }, { "epoch": 0.34, "learning_rate": 4.14600160634788e-06, "logits/chosen": -1.9654664993286133, "logits/rejected": -1.8224735260009766, "logps/chosen": -377.9241943359375, "logps/rejected": -453.4291076660156, "loss": 0.5093, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.213224172592163, "rewards/margins": 0.7400587797164917, "rewards/rejected": -1.9532829523086548, "step": 2630 }, { "epoch": 0.35, "learning_rate": 4.137387251443335e-06, "logits/chosen": -2.0160083770751953, "logits/rejected": -1.8280494213104248, "logps/chosen": -392.6662902832031, "logps/rejected": -413.31341552734375, "loss": 0.5341, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.2335208654403687, "rewards/margins": 0.6282380223274231, "rewards/rejected": -1.8617589473724365, "step": 2640 }, { "epoch": 0.35, "learning_rate": 4.128738716019338e-06, "logits/chosen": -1.9816757440567017, "logits/rejected": -1.965356469154358, "logps/chosen": -434.535400390625, "logps/rejected": -474.57421875, "loss": 0.5544, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2539236545562744, "rewards/margins": 0.6901840567588806, "rewards/rejected": -1.9441077709197998, "step": 2650 }, { "epoch": 0.35, "learning_rate": 4.120056180614386e-06, "logits/chosen": -1.8631175756454468, "logits/rejected": -1.7722257375717163, "logps/chosen": -396.16748046875, "logps/rejected": -475.76214599609375, "loss": 0.5704, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3676929473876953, "rewards/margins": 0.6798584461212158, "rewards/rejected": -2.047551393508911, "step": 2660 }, { "epoch": 0.35, "learning_rate": 4.111339826476725e-06, "logits/chosen": -1.8956613540649414, "logits/rejected": -1.8707530498504639, "logps/chosen": -393.42095947265625, "logps/rejected": -448.44744873046875, "loss": 0.6406, "rewards/accuracies": 0.625, "rewards/chosen": -1.4871103763580322, "rewards/margins": 0.5370699167251587, "rewards/rejected": -2.0241801738739014, "step": 2670 }, { "epoch": 0.35, "learning_rate": 4.102589835560572e-06, "logits/chosen": -1.9932838678359985, "logits/rejected": -1.8022167682647705, "logps/chosen": -469.27490234375, "logps/rejected": -475.10028076171875, "loss": 0.5753, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4100916385650635, "rewards/margins": 0.57780522108078, "rewards/rejected": -1.9878966808319092, "step": 2680 }, { "epoch": 0.35, "learning_rate": 4.09380639052231e-06, "logits/chosen": -1.9947681427001953, "logits/rejected": -1.9494025707244873, "logps/chosen": -446.71923828125, "logps/rejected": -542.5546264648438, "loss": 0.511, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4790523052215576, "rewards/margins": 0.7871916890144348, "rewards/rejected": -2.2662441730499268, "step": 2690 }, { "epoch": 0.35, "learning_rate": 4.084989674716679e-06, "logits/chosen": -1.9378328323364258, "logits/rejected": -1.7882620096206665, "logps/chosen": -463.34368896484375, "logps/rejected": -528.638427734375, "loss": 0.5041, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.6602880954742432, "rewards/margins": 0.7532935738563538, "rewards/rejected": -2.413581371307373, "step": 2700 }, { "epoch": 0.35, "eval_logits/chosen": 0.5924010872840881, "eval_logits/rejected": 0.5911453366279602, "eval_logps/chosen": -459.9809875488281, "eval_logps/rejected": -503.08282470703125, "eval_loss": 0.5736746788024902, "eval_rewards/accuracies": 0.6865000128746033, "eval_rewards/chosen": -1.746882677078247, "eval_rewards/margins": 0.6452487707138062, "eval_rewards/rejected": -2.3921313285827637, "eval_runtime": 1173.2673, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 2700 }, { "epoch": 0.35, "learning_rate": 4.076139872192949e-06, "logits/chosen": -2.04536771774292, "logits/rejected": -1.9077255725860596, "logps/chosen": -490.4608459472656, "logps/rejected": -500.9752502441406, "loss": 0.5828, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.7122223377227783, "rewards/margins": 0.5663967132568359, "rewards/rejected": -2.278618812561035, "step": 2710 }, { "epoch": 0.36, "learning_rate": 4.067257167691074e-06, "logits/chosen": -1.8697471618652344, "logits/rejected": -1.8803532123565674, "logps/chosen": -453.03570556640625, "logps/rejected": -496.3179626464844, "loss": 0.5782, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.366571307182312, "rewards/margins": 0.6388453245162964, "rewards/rejected": -2.0054163932800293, "step": 2720 }, { "epoch": 0.36, "learning_rate": 4.05834174663784e-06, "logits/chosen": -2.091736078262329, "logits/rejected": -2.0846893787384033, "logps/chosen": -400.5804138183594, "logps/rejected": -394.76104736328125, "loss": 0.6245, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.1139838695526123, "rewards/margins": 0.3597862124443054, "rewards/rejected": -1.4737701416015625, "step": 2730 }, { "epoch": 0.36, "learning_rate": 4.0493937951429895e-06, "logits/chosen": -2.146397829055786, "logits/rejected": -2.0857930183410645, "logps/chosen": -382.061279296875, "logps/rejected": -389.96307373046875, "loss": 0.5693, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.002920150756836, "rewards/margins": 0.4762960374355316, "rewards/rejected": -1.4792163372039795, "step": 2740 }, { "epoch": 0.36, "learning_rate": 4.040413499995343e-06, "logits/chosen": -2.082854747772217, "logits/rejected": -1.9924688339233398, "logps/chosen": -414.23095703125, "logps/rejected": -456.17071533203125, "loss": 0.5656, "rewards/accuracies": 0.75, "rewards/chosen": -1.032799243927002, "rewards/margins": 0.5492347478866577, "rewards/rejected": -1.5820338726043701, "step": 2750 }, { "epoch": 0.36, "learning_rate": 4.031401048658892e-06, "logits/chosen": -2.005931854248047, "logits/rejected": -1.919759750366211, "logps/chosen": -386.944580078125, "logps/rejected": -435.21051025390625, "loss": 0.5126, "rewards/accuracies": 0.75, "rewards/chosen": -0.9739694595336914, "rewards/margins": 0.7307363748550415, "rewards/rejected": -1.704705834388733, "step": 2760 }, { "epoch": 0.36, "learning_rate": 4.022356629268894e-06, "logits/chosen": -2.0577034950256348, "logits/rejected": -1.8897212743759155, "logps/chosen": -429.05938720703125, "logps/rejected": -432.6758728027344, "loss": 0.6329, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.333314299583435, "rewards/margins": 0.4279370903968811, "rewards/rejected": -1.761251449584961, "step": 2770 }, { "epoch": 0.36, "learning_rate": 4.013280430627936e-06, "logits/chosen": -1.9496095180511475, "logits/rejected": -1.8696807622909546, "logps/chosen": -382.43658447265625, "logps/rejected": -411.63836669921875, "loss": 0.5828, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.233246922492981, "rewards/margins": 0.4972565174102783, "rewards/rejected": -1.7305036783218384, "step": 2780 }, { "epoch": 0.37, "learning_rate": 4.004172642202002e-06, "logits/chosen": -1.9676673412322998, "logits/rejected": -1.7953529357910156, "logps/chosen": -404.1937561035156, "logps/rejected": -451.77349853515625, "loss": 0.5252, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4179786443710327, "rewards/margins": 0.7819381952285767, "rewards/rejected": -2.1999170780181885, "step": 2790 }, { "epoch": 0.37, "learning_rate": 3.995033454116512e-06, "logits/chosen": -2.0970406532287598, "logits/rejected": -1.9235862493515015, "logps/chosen": -460.91064453125, "logps/rejected": -476.86199951171875, "loss": 0.5754, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4746737480163574, "rewards/margins": 0.4974168837070465, "rewards/rejected": -1.972090721130371, "step": 2800 }, { "epoch": 0.37, "eval_logits/chosen": 0.6612156629562378, "eval_logits/rejected": 0.6424288153648376, "eval_logps/chosen": -449.1170654296875, "eval_logps/rejected": -486.8488464355469, "eval_loss": 0.5716487169265747, "eval_rewards/accuracies": 0.6884999871253967, "eval_rewards/chosen": -1.638243317604065, "eval_rewards/margins": 0.5915481448173523, "eval_rewards/rejected": -2.2297914028167725, "eval_runtime": 1173.0043, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 2800 }, { "epoch": 0.37, "learning_rate": 3.985863057152355e-06, "logits/chosen": -1.9077107906341553, "logits/rejected": -1.9370073080062866, "logps/chosen": -474.05859375, "logps/rejected": -511.4286193847656, "loss": 0.5503, "rewards/accuracies": 0.75, "rewards/chosen": -1.5730762481689453, "rewards/margins": 0.6731056571006775, "rewards/rejected": -2.2461819648742676, "step": 2810 }, { "epoch": 0.37, "learning_rate": 3.976661642741908e-06, "logits/chosen": -1.743070363998413, "logits/rejected": -1.790679931640625, "logps/chosen": -475.90606689453125, "logps/rejected": -556.7145385742188, "loss": 0.5288, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9476301670074463, "rewards/margins": 0.7543555498123169, "rewards/rejected": -2.7019858360290527, "step": 2820 }, { "epoch": 0.37, "learning_rate": 3.967429402965035e-06, "logits/chosen": -1.6348196268081665, "logits/rejected": -1.5773961544036865, "logps/chosen": -544.2933349609375, "logps/rejected": -595.9285888671875, "loss": 0.5756, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.4446425437927246, "rewards/margins": 0.5722433924674988, "rewards/rejected": -3.016885995864868, "step": 2830 }, { "epoch": 0.37, "learning_rate": 3.958166530545085e-06, "logits/chosen": -1.7844722270965576, "logits/rejected": -1.717272162437439, "logps/chosen": -540.5704956054688, "logps/rejected": -611.501953125, "loss": 0.5377, "rewards/accuracies": 0.6875, "rewards/chosen": -2.6575241088867188, "rewards/margins": 0.7096670866012573, "rewards/rejected": -3.3671913146972656, "step": 2840 }, { "epoch": 0.37, "learning_rate": 3.948873218844863e-06, "logits/chosen": -1.512880563735962, "logits/rejected": -1.4856570959091187, "logps/chosen": -487.31597900390625, "logps/rejected": -574.8258666992188, "loss": 0.6078, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.586638927459717, "rewards/margins": 0.6180590391159058, "rewards/rejected": -3.204697370529175, "step": 2850 }, { "epoch": 0.37, "learning_rate": 3.939549661862592e-06, "logits/chosen": -1.6444950103759766, "logits/rejected": -1.5157592296600342, "logps/chosen": -531.1907958984375, "logps/rejected": -595.8784790039062, "loss": 0.5354, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.4481587409973145, "rewards/margins": 0.8699649572372437, "rewards/rejected": -3.3181240558624268, "step": 2860 }, { "epoch": 0.38, "learning_rate": 3.930196054227871e-06, "logits/chosen": -1.6284847259521484, "logits/rejected": -1.427422285079956, "logps/chosen": -496.8154296875, "logps/rejected": -556.5086059570312, "loss": 0.5569, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.3700180053710938, "rewards/margins": 0.7457307577133179, "rewards/rejected": -3.115748643875122, "step": 2870 }, { "epoch": 0.38, "learning_rate": 3.920812591197604e-06, "logits/chosen": -1.7529376745224, "logits/rejected": -1.6493419408798218, "logps/chosen": -475.435791015625, "logps/rejected": -499.7490234375, "loss": 0.5515, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9660146236419678, "rewards/margins": 0.6012964248657227, "rewards/rejected": -2.5673108100891113, "step": 2880 }, { "epoch": 0.38, "learning_rate": 3.9113994686519305e-06, "logits/chosen": -1.8608381748199463, "logits/rejected": -1.6812989711761475, "logps/chosen": -464.67657470703125, "logps/rejected": -529.2399291992188, "loss": 0.5276, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.7906898260116577, "rewards/margins": 0.6778753995895386, "rewards/rejected": -2.4685654640197754, "step": 2890 }, { "epoch": 0.38, "learning_rate": 3.90195688309013e-06, "logits/chosen": -1.8268420696258545, "logits/rejected": -1.641605019569397, "logps/chosen": -436.00921630859375, "logps/rejected": -460.01434326171875, "loss": 0.6073, "rewards/accuracies": 0.625, "rewards/chosen": -1.6543318033218384, "rewards/margins": 0.5443650484085083, "rewards/rejected": -2.1986968517303467, "step": 2900 }, { "epoch": 0.38, "eval_logits/chosen": 0.6979252099990845, "eval_logits/rejected": 0.7017303705215454, "eval_logps/chosen": -440.4114685058594, "eval_logps/rejected": -485.1723937988281, "eval_loss": 0.5730963349342346, "eval_rewards/accuracies": 0.6815000176429749, "eval_rewards/chosen": -1.5511873960494995, "eval_rewards/margins": 0.6618397831916809, "eval_rewards/rejected": -2.213027000427246, "eval_runtime": 1173.0012, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 2900 }, { "epoch": 0.38, "learning_rate": 3.892485031626527e-06, "logits/chosen": -1.8934433460235596, "logits/rejected": -1.7927643060684204, "logps/chosen": -420.6212463378906, "logps/rejected": -468.4596252441406, "loss": 0.5783, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.44437575340271, "rewards/margins": 0.6150509119033813, "rewards/rejected": -2.059426784515381, "step": 2910 }, { "epoch": 0.38, "learning_rate": 3.882984111986371e-06, "logits/chosen": -1.9982595443725586, "logits/rejected": -1.9308195114135742, "logps/chosen": -424.13134765625, "logps/rejected": -436.39080810546875, "loss": 0.5856, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.303920030593872, "rewards/margins": 0.5523670315742493, "rewards/rejected": -1.8562867641448975, "step": 2920 }, { "epoch": 0.38, "learning_rate": 3.873454322501711e-06, "logits/chosen": -2.102571964263916, "logits/rejected": -1.9521716833114624, "logps/chosen": -388.68817138671875, "logps/rejected": -427.4130859375, "loss": 0.56, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9647110104560852, "rewards/margins": 0.6733750104904175, "rewards/rejected": -1.6380860805511475, "step": 2930 }, { "epoch": 0.38, "learning_rate": 3.863895862107255e-06, "logits/chosen": -2.160048484802246, "logits/rejected": -1.9776769876480103, "logps/chosen": -365.2091369628906, "logps/rejected": -466.8072814941406, "loss": 0.4624, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.8954636454582214, "rewards/margins": 0.8491466641426086, "rewards/rejected": -1.7446101903915405, "step": 2940 }, { "epoch": 0.39, "learning_rate": 3.854308930336216e-06, "logits/chosen": -2.07346510887146, "logits/rejected": -1.9108175039291382, "logps/chosen": -432.8681640625, "logps/rejected": -447.23291015625, "loss": 0.5574, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.072650671005249, "rewards/margins": 0.6242086291313171, "rewards/rejected": -1.6968591213226318, "step": 2950 }, { "epoch": 0.39, "learning_rate": 3.844693727316151e-06, "logits/chosen": -2.0675299167633057, "logits/rejected": -1.8454557657241821, "logps/chosen": -419.49755859375, "logps/rejected": -450.098876953125, "loss": 0.5041, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.2237988710403442, "rewards/margins": 0.754100501537323, "rewards/rejected": -1.9778993129730225, "step": 2960 }, { "epoch": 0.39, "learning_rate": 3.835050453764779e-06, "logits/chosen": -1.8740476369857788, "logits/rejected": -1.7978649139404297, "logps/chosen": -379.86651611328125, "logps/rejected": -450.74761962890625, "loss": 0.505, "rewards/accuracies": 0.75, "rewards/chosen": -1.1377508640289307, "rewards/margins": 0.9418436288833618, "rewards/rejected": -2.079594373703003, "step": 2970 }, { "epoch": 0.39, "learning_rate": 3.825379310985792e-06, "logits/chosen": -1.916658639907837, "logits/rejected": -1.807265281677246, "logps/chosen": -402.7890625, "logps/rejected": -454.7411193847656, "loss": 0.5985, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.3055946826934814, "rewards/margins": 0.5781377553939819, "rewards/rejected": -1.8837321996688843, "step": 2980 }, { "epoch": 0.39, "learning_rate": 3.815680500864651e-06, "logits/chosen": -2.0287046432495117, "logits/rejected": -1.9784412384033203, "logps/chosen": -419.54022216796875, "logps/rejected": -433.28021240234375, "loss": 0.5507, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.0421308279037476, "rewards/margins": 0.5592368245124817, "rewards/rejected": -1.6013675928115845, "step": 2990 }, { "epoch": 0.39, "learning_rate": 3.80595422586438e-06, "logits/chosen": -1.9907076358795166, "logits/rejected": -1.922621488571167, "logps/chosen": -466.78118896484375, "logps/rejected": -450.73126220703125, "loss": 0.6283, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2883647680282593, "rewards/margins": 0.5715576410293579, "rewards/rejected": -1.8599224090576172, "step": 3000 }, { "epoch": 0.39, "eval_logits/chosen": 0.5950943827629089, "eval_logits/rejected": 0.6221369504928589, "eval_logps/chosen": -416.3377685546875, "eval_logps/rejected": -463.23724365234375, "eval_loss": 0.5645180940628052, "eval_rewards/accuracies": 0.6859999895095825, "eval_rewards/chosen": -1.3104503154754639, "eval_rewards/margins": 0.6832253932952881, "eval_rewards/rejected": -1.9936758279800415, "eval_runtime": 1173.5431, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 3000 }, { "epoch": 0.39, "learning_rate": 3.7962006890213266e-06, "logits/chosen": -1.7931913137435913, "logits/rejected": -1.710323691368103, "logps/chosen": -388.6459045410156, "logps/rejected": -430.0355529785156, "loss": 0.6373, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4123424291610718, "rewards/margins": 0.43220600485801697, "rewards/rejected": -1.8445484638214111, "step": 3010 }, { "epoch": 0.4, "learning_rate": 3.7864200939409336e-06, "logits/chosen": -2.0017385482788086, "logits/rejected": -1.7524493932724, "logps/chosen": -409.5406799316406, "logps/rejected": -428.1637268066406, "loss": 0.6217, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.2341853380203247, "rewards/margins": 0.4431460499763489, "rewards/rejected": -1.6773313283920288, "step": 3020 }, { "epoch": 0.4, "learning_rate": 3.7766126447934857e-06, "logits/chosen": -2.037930965423584, "logits/rejected": -1.996689796447754, "logps/chosen": -369.6080627441406, "logps/rejected": -401.9655456542969, "loss": 0.5999, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.0221846103668213, "rewards/margins": 0.5410764813423157, "rewards/rejected": -1.5632610321044922, "step": 3030 }, { "epoch": 0.4, "learning_rate": 3.766778546309847e-06, "logits/chosen": -2.0481367111206055, "logits/rejected": -1.9539272785186768, "logps/chosen": -416.8006896972656, "logps/rejected": -385.1816711425781, "loss": 0.5683, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9949377179145813, "rewards/margins": 0.5998483896255493, "rewards/rejected": -1.5947860479354858, "step": 3040 }, { "epoch": 0.4, "learning_rate": 3.7569180037771868e-06, "logits/chosen": -2.083566188812256, "logits/rejected": -2.0596466064453125, "logps/chosen": -388.3544006347656, "logps/rejected": -429.990478515625, "loss": 0.6271, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1148223876953125, "rewards/margins": 0.4753757417201996, "rewards/rejected": -1.5901981592178345, "step": 3050 }, { "epoch": 0.4, "learning_rate": 3.7470312230346955e-06, "logits/chosen": -1.9745086431503296, "logits/rejected": -1.7619606256484985, "logps/chosen": -418.34307861328125, "logps/rejected": -419.50396728515625, "loss": 0.5168, "rewards/accuracies": 0.75, "rewards/chosen": -0.8757292628288269, "rewards/margins": 0.6808592081069946, "rewards/rejected": -1.5565884113311768, "step": 3060 }, { "epoch": 0.4, "learning_rate": 3.7371184104692857e-06, "logits/chosen": -2.1536831855773926, "logits/rejected": -2.0850067138671875, "logps/chosen": -453.22320556640625, "logps/rejected": -438.8262634277344, "loss": 0.5683, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.0526719093322754, "rewards/margins": 0.5703314542770386, "rewards/rejected": -1.623003363609314, "step": 3070 }, { "epoch": 0.4, "learning_rate": 3.727179773011289e-06, "logits/chosen": -1.9461936950683594, "logits/rejected": -1.921303391456604, "logps/chosen": -414.42388916015625, "logps/rejected": -433.32769775390625, "loss": 0.6566, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.2511370182037354, "rewards/margins": 0.3483005166053772, "rewards/rejected": -1.5994374752044678, "step": 3080 }, { "epoch": 0.4, "learning_rate": 3.717215518130127e-06, "logits/chosen": -1.8317668437957764, "logits/rejected": -1.7200689315795898, "logps/chosen": -411.60223388671875, "logps/rejected": -431.7054138183594, "loss": 0.6871, "rewards/accuracies": 0.625, "rewards/chosen": -1.4211969375610352, "rewards/margins": 0.3373275399208069, "rewards/rejected": -1.7585245370864868, "step": 3090 }, { "epoch": 0.41, "learning_rate": 3.7072258538299923e-06, "logits/chosen": -2.1221868991851807, "logits/rejected": -1.958105444908142, "logps/chosen": -490.62860107421875, "logps/rejected": -451.4012756347656, "loss": 0.5199, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.2549514770507812, "rewards/margins": 0.6226548552513123, "rewards/rejected": -1.8776063919067383, "step": 3100 }, { "epoch": 0.41, "eval_logits/chosen": 0.4091779887676239, "eval_logits/rejected": 0.4404120445251465, "eval_logps/chosen": -401.47412109375, "eval_logps/rejected": -437.728271484375, "eval_loss": 0.5584598183631897, "eval_rewards/accuracies": 0.6940000057220459, "eval_rewards/chosen": -1.1618139743804932, "eval_rewards/margins": 0.5767720341682434, "eval_rewards/rejected": -1.7385860681533813, "eval_runtime": 1173.482, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 3100 }, { "epoch": 0.41, "learning_rate": 3.6972109886454933e-06, "logits/chosen": -1.927145004272461, "logits/rejected": -1.981048345565796, "logps/chosen": -397.15765380859375, "logps/rejected": -427.95953369140625, "loss": 0.5795, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.2384923696517944, "rewards/margins": 0.5860485434532166, "rewards/rejected": -1.8245410919189453, "step": 3110 }, { "epoch": 0.41, "learning_rate": 3.687171131637314e-06, "logits/chosen": -1.850130319595337, "logits/rejected": -1.8024015426635742, "logps/chosen": -407.8834533691406, "logps/rejected": -428.4339294433594, "loss": 0.5739, "rewards/accuracies": 0.6875, "rewards/chosen": -1.1514432430267334, "rewards/margins": 0.4715694785118103, "rewards/rejected": -1.6230125427246094, "step": 3120 }, { "epoch": 0.41, "learning_rate": 3.677106492387839e-06, "logits/chosen": -2.0786144733428955, "logits/rejected": -1.9395787715911865, "logps/chosen": -412.42626953125, "logps/rejected": -394.81634521484375, "loss": 0.6361, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.1597317457199097, "rewards/margins": 0.430908739566803, "rewards/rejected": -1.5906405448913574, "step": 3130 }, { "epoch": 0.41, "learning_rate": 3.6670172809967865e-06, "logits/chosen": -1.8933837413787842, "logits/rejected": -1.7607101202011108, "logps/chosen": -350.9492492675781, "logps/rejected": -383.0877380371094, "loss": 0.5527, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.2105662822723389, "rewards/margins": 0.5227794051170349, "rewards/rejected": -1.733345627784729, "step": 3140 }, { "epoch": 0.41, "learning_rate": 3.6569037080768153e-06, "logits/chosen": -2.1721205711364746, "logits/rejected": -2.0236735343933105, "logps/chosen": -381.0616760253906, "logps/rejected": -455.51458740234375, "loss": 0.5521, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2160230875015259, "rewards/margins": 0.643632709980011, "rewards/rejected": -1.859655737876892, "step": 3150 }, { "epoch": 0.41, "learning_rate": 3.646765984749137e-06, "logits/chosen": -2.021472692489624, "logits/rejected": -2.0420773029327393, "logps/chosen": -410.3907165527344, "logps/rejected": -466.248779296875, "loss": 0.5928, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.247714638710022, "rewards/margins": 0.5578851699829102, "rewards/rejected": -1.8055998086929321, "step": 3160 }, { "epoch": 0.41, "learning_rate": 3.6366043226391e-06, "logits/chosen": -1.936244010925293, "logits/rejected": -1.8434072732925415, "logps/chosen": -406.29278564453125, "logps/rejected": -426.49432373046875, "loss": 0.5065, "rewards/accuracies": 0.75, "rewards/chosen": -1.1206307411193848, "rewards/margins": 0.6835092306137085, "rewards/rejected": -1.8041400909423828, "step": 3170 }, { "epoch": 0.42, "learning_rate": 3.6264189338717766e-06, "logits/chosen": -2.2497780323028564, "logits/rejected": -2.100496292114258, "logps/chosen": -429.8587341308594, "logps/rejected": -444.8746032714844, "loss": 0.57, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3635942935943604, "rewards/margins": 0.5040538907051086, "rewards/rejected": -1.8676483631134033, "step": 3180 }, { "epoch": 0.42, "learning_rate": 3.6162100310675334e-06, "logits/chosen": -2.049527406692505, "logits/rejected": -2.0114481449127197, "logps/chosen": -408.66357421875, "logps/rejected": -434.10565185546875, "loss": 0.6632, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.2964597940444946, "rewards/margins": 0.35519662499427795, "rewards/rejected": -1.6516563892364502, "step": 3190 }, { "epoch": 0.42, "learning_rate": 3.605977827337596e-06, "logits/chosen": -1.9362224340438843, "logits/rejected": -1.8631280660629272, "logps/chosen": -393.43902587890625, "logps/rejected": -437.18450927734375, "loss": 0.5658, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1848984956741333, "rewards/margins": 0.6274099349975586, "rewards/rejected": -1.8123083114624023, "step": 3200 }, { "epoch": 0.42, "eval_logits/chosen": 0.25350603461265564, "eval_logits/rejected": 0.30752307176589966, "eval_logps/chosen": -404.454833984375, "eval_logps/rejected": -440.9099426269531, "eval_loss": 0.5603488087654114, "eval_rewards/accuracies": 0.6959999799728394, "eval_rewards/chosen": -1.19162118434906, "eval_rewards/margins": 0.5787816047668457, "eval_rewards/rejected": -1.7704027891159058, "eval_runtime": 1173.3181, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 3200 }, { "epoch": 0.42, "learning_rate": 3.595722536279595e-06, "logits/chosen": -2.178765296936035, "logits/rejected": -1.8713560104370117, "logps/chosen": -452.17724609375, "logps/rejected": -454.56707763671875, "loss": 0.4681, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.030299425125122, "rewards/margins": 0.7594797611236572, "rewards/rejected": -1.7897790670394897, "step": 3210 }, { "epoch": 0.42, "learning_rate": 3.58544437197311e-06, "logits/chosen": -1.9322439432144165, "logits/rejected": -1.8333985805511475, "logps/chosen": -403.7508544921875, "logps/rejected": -441.99371337890625, "loss": 0.5456, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.078952670097351, "rewards/margins": 0.6580331921577454, "rewards/rejected": -1.7369858026504517, "step": 3220 }, { "epoch": 0.42, "learning_rate": 3.5751435489752025e-06, "logits/chosen": -1.907280683517456, "logits/rejected": -1.8821004629135132, "logps/chosen": -380.5797119140625, "logps/rejected": -421.6756286621094, "loss": 0.5067, "rewards/accuracies": 0.8125, "rewards/chosen": -1.110288381576538, "rewards/margins": 0.7093003988265991, "rewards/rejected": -1.8195888996124268, "step": 3230 }, { "epoch": 0.42, "learning_rate": 3.5648202823159317e-06, "logits/chosen": -1.8815323114395142, "logits/rejected": -1.8551346063613892, "logps/chosen": -372.4736022949219, "logps/rejected": -472.7781677246094, "loss": 0.5164, "rewards/accuracies": 0.75, "rewards/chosen": -1.1829547882080078, "rewards/margins": 0.7592514753341675, "rewards/rejected": -1.9422063827514648, "step": 3240 }, { "epoch": 0.43, "learning_rate": 3.554474787493873e-06, "logits/chosen": -1.8966144323349, "logits/rejected": -1.6929298639297485, "logps/chosen": -440.6084899902344, "logps/rejected": -500.8756408691406, "loss": 0.5411, "rewards/accuracies": 0.75, "rewards/chosen": -1.260031819343567, "rewards/margins": 0.7993737459182739, "rewards/rejected": -2.059405565261841, "step": 3250 }, { "epoch": 0.43, "learning_rate": 3.5441072804716125e-06, "logits/chosen": -1.9628206491470337, "logits/rejected": -1.8756574392318726, "logps/chosen": -446.837158203125, "logps/rejected": -532.57568359375, "loss": 0.5969, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.32576584815979, "rewards/margins": 0.7321029901504517, "rewards/rejected": -2.0578689575195312, "step": 3260 }, { "epoch": 0.43, "learning_rate": 3.5337179776712427e-06, "logits/chosen": -1.8174540996551514, "logits/rejected": -1.7179148197174072, "logps/chosen": -422.1940002441406, "logps/rejected": -487.786865234375, "loss": 0.575, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4892836809158325, "rewards/margins": 0.698641300201416, "rewards/rejected": -2.187924861907959, "step": 3270 }, { "epoch": 0.43, "learning_rate": 3.5233070959698445e-06, "logits/chosen": -1.973497748374939, "logits/rejected": -1.8461055755615234, "logps/chosen": -470.6715393066406, "logps/rejected": -477.07208251953125, "loss": 0.6291, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.5432910919189453, "rewards/margins": 0.46076974272727966, "rewards/rejected": -2.004060745239258, "step": 3280 }, { "epoch": 0.43, "learning_rate": 3.512874852694959e-06, "logits/chosen": -1.8944342136383057, "logits/rejected": -1.7159817218780518, "logps/chosen": -417.69781494140625, "logps/rejected": -458.3694763183594, "loss": 0.5366, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.293985366821289, "rewards/margins": 0.6382049322128296, "rewards/rejected": -1.9321902990341187, "step": 3290 }, { "epoch": 0.43, "learning_rate": 3.5024214656200497e-06, "logits/chosen": -1.996705412864685, "logits/rejected": -1.7104390859603882, "logps/chosen": -444.9064025878906, "logps/rejected": -443.6212463378906, "loss": 0.6214, "rewards/accuracies": 0.625, "rewards/chosen": -1.434211015701294, "rewards/margins": 0.5524640679359436, "rewards/rejected": -1.9866750240325928, "step": 3300 }, { "epoch": 0.43, "eval_logits/chosen": 0.656358540058136, "eval_logits/rejected": 0.6742247343063354, "eval_logps/chosen": -418.947998046875, "eval_logps/rejected": -460.59857177734375, "eval_loss": 0.5605348944664001, "eval_rewards/accuracies": 0.6924999952316284, "eval_rewards/chosen": -1.336552619934082, "eval_rewards/margins": 0.6307366490364075, "eval_rewards/rejected": -1.9672893285751343, "eval_runtime": 1173.3928, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 3300 }, { "epoch": 0.43, "learning_rate": 3.491947152959958e-06, "logits/chosen": -2.102064847946167, "logits/rejected": -1.9289686679840088, "logps/chosen": -437.4242248535156, "logps/rejected": -465.6644592285156, "loss": 0.5751, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.2784984111785889, "rewards/margins": 0.5344318747520447, "rewards/rejected": -1.8129304647445679, "step": 3310 }, { "epoch": 0.43, "learning_rate": 3.4814521333663497e-06, "logits/chosen": -2.1381969451904297, "logits/rejected": -2.014378547668457, "logps/chosen": -461.36907958984375, "logps/rejected": -440.06787109375, "loss": 0.5694, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1574351787567139, "rewards/margins": 0.610199511051178, "rewards/rejected": -1.767634630203247, "step": 3320 }, { "epoch": 0.44, "learning_rate": 3.4709366259231468e-06, "logits/chosen": -1.9088739156723022, "logits/rejected": -1.7347705364227295, "logps/chosen": -422.6807556152344, "logps/rejected": -433.4283142089844, "loss": 0.5888, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.172013282775879, "rewards/margins": 0.5078199505805969, "rewards/rejected": -1.6798330545425415, "step": 3330 }, { "epoch": 0.44, "learning_rate": 3.460400850141956e-06, "logits/chosen": -1.9768264293670654, "logits/rejected": -1.7686551809310913, "logps/chosen": -385.18499755859375, "logps/rejected": -421.01708984375, "loss": 0.5462, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3630168437957764, "rewards/margins": 0.5688962936401367, "rewards/rejected": -1.931913137435913, "step": 3340 }, { "epoch": 0.44, "learning_rate": 3.4498450259574858e-06, "logits/chosen": -1.8806072473526, "logits/rejected": -1.845616340637207, "logps/chosen": -414.7618713378906, "logps/rejected": -438.54022216796875, "loss": 0.6045, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.3012163639068604, "rewards/margins": 0.42003631591796875, "rewards/rejected": -1.7212527990341187, "step": 3350 }, { "epoch": 0.44, "learning_rate": 3.439269373722957e-06, "logits/chosen": -1.8833872079849243, "logits/rejected": -1.8121143579483032, "logps/chosen": -402.79156494140625, "logps/rejected": -436.91632080078125, "loss": 0.5561, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.2642663717269897, "rewards/margins": 0.6149861812591553, "rewards/rejected": -1.8792526721954346, "step": 3360 }, { "epoch": 0.44, "learning_rate": 3.4286741142055014e-06, "logits/chosen": -1.9846508502960205, "logits/rejected": -1.9692661762237549, "logps/chosen": -426.28167724609375, "logps/rejected": -463.14971923828125, "loss": 0.5499, "rewards/accuracies": 0.75, "rewards/chosen": -1.2158000469207764, "rewards/margins": 0.5520261526107788, "rewards/rejected": -1.7678263187408447, "step": 3370 }, { "epoch": 0.44, "learning_rate": 3.4180594685815536e-06, "logits/chosen": -1.9412851333618164, "logits/rejected": -1.8633683919906616, "logps/chosen": -368.2373046875, "logps/rejected": -419.0936584472656, "loss": 0.5812, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.277923583984375, "rewards/margins": 0.536681056022644, "rewards/rejected": -1.8146045207977295, "step": 3380 }, { "epoch": 0.44, "learning_rate": 3.4074256584322336e-06, "logits/chosen": -1.9246950149536133, "logits/rejected": -1.8012104034423828, "logps/chosen": -366.427978515625, "logps/rejected": -421.9742736816406, "loss": 0.486, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0420225858688354, "rewards/margins": 0.7852731347084045, "rewards/rejected": -1.8272956609725952, "step": 3390 }, { "epoch": 0.44, "learning_rate": 3.3967729057387213e-06, "logits/chosen": -1.937070608139038, "logits/rejected": -1.8224836587905884, "logps/chosen": -417.2428283691406, "logps/rejected": -430.810791015625, "loss": 0.581, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.0852601528167725, "rewards/margins": 0.539286732673645, "rewards/rejected": -1.624547004699707, "step": 3400 }, { "epoch": 0.44, "eval_logits/chosen": 0.5448690056800842, "eval_logits/rejected": 0.583937406539917, "eval_logps/chosen": -398.8811950683594, "eval_logps/rejected": -440.7017517089844, "eval_loss": 0.5563305616378784, "eval_rewards/accuracies": 0.6984999775886536, "eval_rewards/chosen": -1.1358840465545654, "eval_rewards/margins": 0.6324369311332703, "eval_rewards/rejected": -1.7683210372924805, "eval_runtime": 1173.8192, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 3400 }, { "epoch": 0.45, "learning_rate": 3.386101432877624e-06, "logits/chosen": -2.051443099975586, "logits/rejected": -1.9577932357788086, "logps/chosen": -393.5393981933594, "logps/rejected": -406.2938537597656, "loss": 0.5609, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.0965825319290161, "rewards/margins": 0.5381280779838562, "rewards/rejected": -1.6347105503082275, "step": 3410 }, { "epoch": 0.45, "learning_rate": 3.375411462616332e-06, "logits/chosen": -2.104078769683838, "logits/rejected": -1.994580864906311, "logps/chosen": -422.4803771972656, "logps/rejected": -492.143310546875, "loss": 0.5181, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.1625874042510986, "rewards/margins": 0.6946650743484497, "rewards/rejected": -1.8572524785995483, "step": 3420 }, { "epoch": 0.45, "learning_rate": 3.3647032181083696e-06, "logits/chosen": -2.0899133682250977, "logits/rejected": -1.9766260385513306, "logps/chosen": -455.85955810546875, "logps/rejected": -512.9901123046875, "loss": 0.4945, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.3426158428192139, "rewards/margins": 0.7741657495498657, "rewards/rejected": -2.116781711578369, "step": 3430 }, { "epoch": 0.45, "learning_rate": 3.3539769228887382e-06, "logits/chosen": -2.0404748916625977, "logits/rejected": -1.9174991846084595, "logps/chosen": -448.553955078125, "logps/rejected": -503.87762451171875, "loss": 0.5073, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.2138594388961792, "rewards/margins": 0.6975424885749817, "rewards/rejected": -1.9114019870758057, "step": 3440 }, { "epoch": 0.45, "learning_rate": 3.343232800869247e-06, "logits/chosen": -1.865696907043457, "logits/rejected": -1.7030518054962158, "logps/chosen": -385.63775634765625, "logps/rejected": -397.9042663574219, "loss": 0.5341, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.3254315853118896, "rewards/margins": 0.6739403009414673, "rewards/rejected": -1.999372124671936, "step": 3450 }, { "epoch": 0.45, "learning_rate": 3.33247107633384e-06, "logits/chosen": -1.8744150400161743, "logits/rejected": -1.8472354412078857, "logps/chosen": -425.00030517578125, "logps/rejected": -494.12347412109375, "loss": 0.5114, "rewards/accuracies": 0.75, "rewards/chosen": -1.3889572620391846, "rewards/margins": 0.8310378789901733, "rewards/rejected": -2.2199950218200684, "step": 3460 }, { "epoch": 0.45, "learning_rate": 3.3216919739339155e-06, "logits/chosen": -1.9275439977645874, "logits/rejected": -1.855700135231018, "logps/chosen": -459.2312927246094, "logps/rejected": -486.5613708496094, "loss": 0.4922, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.5085200071334839, "rewards/margins": 0.8183439373970032, "rewards/rejected": -2.3268637657165527, "step": 3470 }, { "epoch": 0.46, "learning_rate": 3.310895718683635e-06, "logits/chosen": -1.9474788904190063, "logits/rejected": -1.896143913269043, "logps/chosen": -452.40032958984375, "logps/rejected": -480.8934020996094, "loss": 0.616, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.4318238496780396, "rewards/margins": 0.533476710319519, "rewards/rejected": -1.9653003215789795, "step": 3480 }, { "epoch": 0.46, "learning_rate": 3.3000825359552256e-06, "logits/chosen": -1.9331462383270264, "logits/rejected": -1.8510589599609375, "logps/chosen": -394.94024658203125, "logps/rejected": -465.19512939453125, "loss": 0.5357, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.0899837017059326, "rewards/margins": 0.7150499224662781, "rewards/rejected": -1.8050334453582764, "step": 3490 }, { "epoch": 0.46, "learning_rate": 3.2892526514742778e-06, "logits/chosen": -1.9553263187408447, "logits/rejected": -1.8354969024658203, "logps/chosen": -401.61383056640625, "logps/rejected": -421.52099609375, "loss": 0.5422, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.112273931503296, "rewards/margins": 0.6440818905830383, "rewards/rejected": -1.7563560009002686, "step": 3500 }, { "epoch": 0.46, "eval_logits/chosen": 0.5330484509468079, "eval_logits/rejected": 0.5734737515449524, "eval_logps/chosen": -388.93182373046875, "eval_logps/rejected": -425.3734130859375, "eval_loss": 0.5589743852615356, "eval_rewards/accuracies": 0.6915000081062317, "eval_rewards/chosen": -1.0363909006118774, "eval_rewards/margins": 0.5786464810371399, "eval_rewards/rejected": -1.615037441253662, "eval_runtime": 1173.0533, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 3500 }, { "epoch": 0.46, "learning_rate": 3.27840629131503e-06, "logits/chosen": -2.0102248191833496, "logits/rejected": -1.8568379878997803, "logps/chosen": -396.32257080078125, "logps/rejected": -432.460693359375, "loss": 0.5452, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.035860300064087, "rewards/margins": 0.662264883518219, "rewards/rejected": -1.6981252431869507, "step": 3510 }, { "epoch": 0.46, "learning_rate": 3.2675436818956522e-06, "logits/chosen": -2.016779661178589, "logits/rejected": -1.9230639934539795, "logps/chosen": -359.00775146484375, "logps/rejected": -420.87335205078125, "loss": 0.5526, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.9780494570732117, "rewards/margins": 0.529609203338623, "rewards/rejected": -1.5076587200164795, "step": 3520 }, { "epoch": 0.46, "learning_rate": 3.2566650499735185e-06, "logits/chosen": -1.8304370641708374, "logits/rejected": -1.7253713607788086, "logps/chosen": -419.9305725097656, "logps/rejected": -476.528076171875, "loss": 0.4987, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.106669545173645, "rewards/margins": 0.8515464067459106, "rewards/rejected": -1.9582157135009766, "step": 3530 }, { "epoch": 0.46, "learning_rate": 3.2457706226404715e-06, "logits/chosen": -1.9432268142700195, "logits/rejected": -1.873006820678711, "logps/chosen": -408.003173828125, "logps/rejected": -421.4966735839844, "loss": 0.6014, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2254436016082764, "rewards/margins": 0.5503524541854858, "rewards/rejected": -1.7757961750030518, "step": 3540 }, { "epoch": 0.46, "learning_rate": 3.2348606273180847e-06, "logits/chosen": -2.1072030067443848, "logits/rejected": -1.8886438608169556, "logps/chosen": -429.07977294921875, "logps/rejected": -406.6357421875, "loss": 0.5499, "rewards/accuracies": 0.75, "rewards/chosen": -1.0243723392486572, "rewards/margins": 0.6358232498168945, "rewards/rejected": -1.6601955890655518, "step": 3550 }, { "epoch": 0.47, "learning_rate": 3.2239352917529165e-06, "logits/chosen": -2.1119418144226074, "logits/rejected": -1.9380228519439697, "logps/chosen": -445.6163024902344, "logps/rejected": -476.1900939941406, "loss": 0.5612, "rewards/accuracies": 0.6875, "rewards/chosen": -1.2269493341445923, "rewards/margins": 0.5179942846298218, "rewards/rejected": -1.744943380355835, "step": 3560 }, { "epoch": 0.47, "learning_rate": 3.2129948440117487e-06, "logits/chosen": -1.987892508506775, "logits/rejected": -1.941389799118042, "logps/chosen": -425.90399169921875, "logps/rejected": -453.47479248046875, "loss": 0.6117, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.4512683153152466, "rewards/margins": 0.43263331055641174, "rewards/rejected": -1.883901596069336, "step": 3570 }, { "epoch": 0.47, "learning_rate": 3.202039512476833e-06, "logits/chosen": -1.837915062904358, "logits/rejected": -1.7621958255767822, "logps/chosen": -383.6486511230469, "logps/rejected": -465.4740295410156, "loss": 0.4819, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.3156425952911377, "rewards/margins": 0.8208361864089966, "rewards/rejected": -2.136478900909424, "step": 3580 }, { "epoch": 0.47, "learning_rate": 3.1910695258411216e-06, "logits/chosen": -2.027493715286255, "logits/rejected": -1.7317771911621094, "logps/chosen": -396.72076416015625, "logps/rejected": -412.47100830078125, "loss": 0.5382, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.1293015480041504, "rewards/margins": 0.7101532816886902, "rewards/rejected": -1.8394548892974854, "step": 3590 }, { "epoch": 0.47, "learning_rate": 3.1800851131034904e-06, "logits/chosen": -1.9563058614730835, "logits/rejected": -1.87738835811615, "logps/chosen": -409.775146484375, "logps/rejected": -439.15972900390625, "loss": 0.5626, "rewards/accuracies": 0.6875, "rewards/chosen": -1.2867438793182373, "rewards/margins": 0.6279332041740417, "rewards/rejected": -1.9146772623062134, "step": 3600 }, { "epoch": 0.47, "eval_logits/chosen": 0.7520022988319397, "eval_logits/rejected": 0.786247968673706, "eval_logps/chosen": -396.4902038574219, "eval_logps/rejected": -438.8792419433594, "eval_loss": 0.5602012276649475, "eval_rewards/accuracies": 0.6909999847412109, "eval_rewards/chosen": -1.1119749546051025, "eval_rewards/margins": 0.6381211876869202, "eval_rewards/rejected": -1.750096082687378, "eval_runtime": 1173.4035, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 3600 }, { "epoch": 0.47, "learning_rate": 3.169086503563962e-06, "logits/chosen": -2.0438876152038574, "logits/rejected": -1.9742358922958374, "logps/chosen": -373.54742431640625, "logps/rejected": -449.31475830078125, "loss": 0.562, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9906983375549316, "rewards/margins": 0.6011732220649719, "rewards/rejected": -1.5918715000152588, "step": 3610 }, { "epoch": 0.47, "learning_rate": 3.1580739268189165e-06, "logits/chosen": -1.9275391101837158, "logits/rejected": -1.7515623569488525, "logps/chosen": -396.3177490234375, "logps/rejected": -434.313232421875, "loss": 0.518, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.0211318731307983, "rewards/margins": 0.7688616514205933, "rewards/rejected": -1.7899936437606812, "step": 3620 }, { "epoch": 0.48, "learning_rate": 3.147047612756302e-06, "logits/chosen": -1.8347206115722656, "logits/rejected": -1.8424797058105469, "logps/chosen": -425.95281982421875, "logps/rejected": -481.7438049316406, "loss": 0.5059, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.0812221765518188, "rewards/margins": 0.7794182896614075, "rewards/rejected": -1.860640287399292, "step": 3630 }, { "epoch": 0.48, "learning_rate": 3.136007791550833e-06, "logits/chosen": -1.7254505157470703, "logits/rejected": -1.5392547845840454, "logps/chosen": -370.35992431640625, "logps/rejected": -395.29742431640625, "loss": 0.5626, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.103926658630371, "rewards/margins": 0.6251890063285828, "rewards/rejected": -1.7291157245635986, "step": 3640 }, { "epoch": 0.48, "learning_rate": 3.1249546936591848e-06, "logits/chosen": -1.8757476806640625, "logits/rejected": -1.7597439289093018, "logps/chosen": -354.79046630859375, "logps/rejected": -415.020751953125, "loss": 0.5309, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.9962058067321777, "rewards/margins": 0.6632649898529053, "rewards/rejected": -1.659470796585083, "step": 3650 }, { "epoch": 0.48, "learning_rate": 3.1138885498151843e-06, "logits/chosen": -1.6727317571640015, "logits/rejected": -1.6530840396881104, "logps/chosen": -423.81573486328125, "logps/rejected": -481.62615966796875, "loss": 0.443, "rewards/accuracies": 0.8125, "rewards/chosen": -1.3244787454605103, "rewards/margins": 1.0427327156066895, "rewards/rejected": -2.3672115802764893, "step": 3660 }, { "epoch": 0.48, "learning_rate": 3.1028095910249937e-06, "logits/chosen": -1.9963428974151611, "logits/rejected": -1.7119239568710327, "logps/chosen": -432.7828063964844, "logps/rejected": -448.8419494628906, "loss": 0.5304, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3926994800567627, "rewards/margins": 0.7119554281234741, "rewards/rejected": -2.1046550273895264, "step": 3670 }, { "epoch": 0.48, "learning_rate": 3.0917180485622895e-06, "logits/chosen": -1.8475887775421143, "logits/rejected": -1.564581274986267, "logps/chosen": -446.0194396972656, "logps/rejected": -470.3948669433594, "loss": 0.576, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.546870231628418, "rewards/margins": 0.7647116184234619, "rewards/rejected": -2.311582088470459, "step": 3680 }, { "epoch": 0.48, "learning_rate": 3.0806141539634294e-06, "logits/chosen": -1.904123306274414, "logits/rejected": -1.6115459203720093, "logps/chosen": -418.052001953125, "logps/rejected": -437.1920471191406, "loss": 0.557, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.486206293106079, "rewards/margins": 0.6802877187728882, "rewards/rejected": -2.1664938926696777, "step": 3690 }, { "epoch": 0.48, "learning_rate": 3.069498139022624e-06, "logits/chosen": -2.0392978191375732, "logits/rejected": -1.8265810012817383, "logps/chosen": -451.705078125, "logps/rejected": -444.59954833984375, "loss": 0.627, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.4540126323699951, "rewards/margins": 0.47257882356643677, "rewards/rejected": -1.9265915155410767, "step": 3700 }, { "epoch": 0.48, "eval_logits/chosen": 0.8575670123100281, "eval_logits/rejected": 0.8809006810188293, "eval_logps/chosen": -413.73907470703125, "eval_logps/rejected": -458.7536926269531, "eval_loss": 0.5579034686088562, "eval_rewards/accuracies": 0.6934999823570251, "eval_rewards/chosen": -1.2844632863998413, "eval_rewards/margins": 0.6643770337104797, "eval_rewards/rejected": -1.9488401412963867, "eval_runtime": 1173.4041, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 3700 }, { "epoch": 0.49, "learning_rate": 3.0583702357870964e-06, "logits/chosen": -1.8973909616470337, "logits/rejected": -1.836904525756836, "logps/chosen": -453.96282958984375, "logps/rejected": -506.8397521972656, "loss": 0.5881, "rewards/accuracies": 0.6875, "rewards/chosen": -1.2685407400131226, "rewards/margins": 0.5457057952880859, "rewards/rejected": -1.8142467737197876, "step": 3710 }, { "epoch": 0.49, "learning_rate": 3.0472306765522393e-06, "logits/chosen": -1.9780431985855103, "logits/rejected": -1.767011284828186, "logps/chosen": -373.0862731933594, "logps/rejected": -420.5169982910156, "loss": 0.5546, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.0875823497772217, "rewards/margins": 0.7041456699371338, "rewards/rejected": -1.7917280197143555, "step": 3720 }, { "epoch": 0.49, "learning_rate": 3.0360796938567628e-06, "logits/chosen": -2.005666732788086, "logits/rejected": -1.856957197189331, "logps/chosen": -408.0935974121094, "logps/rejected": -428.3479919433594, "loss": 0.5623, "rewards/accuracies": 0.6875, "rewards/chosen": -1.1865909099578857, "rewards/margins": 0.6112765073776245, "rewards/rejected": -1.7978674173355103, "step": 3730 }, { "epoch": 0.49, "learning_rate": 3.0249175204778435e-06, "logits/chosen": -1.895155668258667, "logits/rejected": -1.882965326309204, "logps/chosen": -400.49566650390625, "logps/rejected": -446.05010986328125, "loss": 0.5208, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.2255418300628662, "rewards/margins": 0.6657425165176392, "rewards/rejected": -1.8912845849990845, "step": 3740 }, { "epoch": 0.49, "learning_rate": 3.0137443894262634e-06, "logits/chosen": -1.6782268285751343, "logits/rejected": -1.5924017429351807, "logps/chosen": -419.470703125, "logps/rejected": -440.67437744140625, "loss": 0.4925, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.1807644367218018, "rewards/margins": 0.8425852060317993, "rewards/rejected": -2.0233497619628906, "step": 3750 }, { "epoch": 0.49, "learning_rate": 3.0025605339415476e-06, "logits/chosen": -1.8740978240966797, "logits/rejected": -1.718133568763733, "logps/chosen": -398.0804748535156, "logps/rejected": -441.2355041503906, "loss": 0.519, "rewards/accuracies": 0.75, "rewards/chosen": -1.1523767709732056, "rewards/margins": 0.7449162602424622, "rewards/rejected": -1.8972930908203125, "step": 3760 }, { "epoch": 0.49, "learning_rate": 2.9913661874870923e-06, "logits/chosen": -1.869728446006775, "logits/rejected": -1.7910888195037842, "logps/chosen": -395.14031982421875, "logps/rejected": -419.30487060546875, "loss": 0.5382, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.1323497295379639, "rewards/margins": 0.6088643074035645, "rewards/rejected": -1.7412141561508179, "step": 3770 }, { "epoch": 0.49, "learning_rate": 2.980161583745294e-06, "logits/chosen": -1.9254146814346313, "logits/rejected": -1.8167539834976196, "logps/chosen": -436.7218322753906, "logps/rejected": -462.28961181640625, "loss": 0.4902, "rewards/accuracies": 0.75, "rewards/chosen": -1.1210156679153442, "rewards/margins": 0.7795640230178833, "rewards/rejected": -1.9005796909332275, "step": 3780 }, { "epoch": 0.5, "learning_rate": 2.96894695661267e-06, "logits/chosen": -1.9709405899047852, "logits/rejected": -1.8259330987930298, "logps/chosen": -451.60150146484375, "logps/rejected": -450.0809631347656, "loss": 0.6165, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3595227003097534, "rewards/margins": 0.4553799033164978, "rewards/rejected": -1.814902901649475, "step": 3790 }, { "epoch": 0.5, "learning_rate": 2.9577225401949773e-06, "logits/chosen": -1.6736812591552734, "logits/rejected": -1.669345498085022, "logps/chosen": -382.5348815917969, "logps/rejected": -435.5172424316406, "loss": 0.5522, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.350743293762207, "rewards/margins": 0.6797314882278442, "rewards/rejected": -2.0304746627807617, "step": 3800 }, { "epoch": 0.5, "eval_logits/chosen": 0.8745436072349548, "eval_logits/rejected": 0.911791980266571, "eval_logps/chosen": -423.3916320800781, "eval_logps/rejected": -470.93115234375, "eval_loss": 0.5561516284942627, "eval_rewards/accuracies": 0.6974999904632568, "eval_rewards/chosen": -1.380988597869873, "eval_rewards/margins": 0.6896264553070068, "eval_rewards/rejected": -2.07061505317688, "eval_runtime": 1173.8815, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 3800 }, { "epoch": 0.5, "learning_rate": 2.946488568802324e-06, "logits/chosen": -1.7115625143051147, "logits/rejected": -1.5814566612243652, "logps/chosen": -417.40313720703125, "logps/rejected": -453.150634765625, "loss": 0.5803, "rewards/accuracies": 0.75, "rewards/chosen": -1.3829896450042725, "rewards/margins": 0.5222972631454468, "rewards/rejected": -1.9052867889404297, "step": 3810 }, { "epoch": 0.5, "learning_rate": 2.935245276944278e-06, "logits/chosen": -1.7636444568634033, "logits/rejected": -1.6741498708724976, "logps/chosen": -433.516357421875, "logps/rejected": -457.4190368652344, "loss": 0.5762, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.2248344421386719, "rewards/margins": 0.5882797837257385, "rewards/rejected": -1.8131141662597656, "step": 3820 }, { "epoch": 0.5, "learning_rate": 2.9239928993249723e-06, "logits/chosen": -1.8020870685577393, "logits/rejected": -1.7303444147109985, "logps/chosen": -424.1175842285156, "logps/rejected": -479.3556213378906, "loss": 0.5002, "rewards/accuracies": 0.75, "rewards/chosen": -1.2616357803344727, "rewards/margins": 0.9329161643981934, "rewards/rejected": -2.194552183151245, "step": 3830 }, { "epoch": 0.5, "learning_rate": 2.912731670838207e-06, "logits/chosen": -1.7195581197738647, "logits/rejected": -1.5701277256011963, "logps/chosen": -412.962646484375, "logps/rejected": -474.04156494140625, "loss": 0.6148, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4015324115753174, "rewards/margins": 0.593268096446991, "rewards/rejected": -1.994800329208374, "step": 3840 }, { "epoch": 0.5, "learning_rate": 2.901461826562543e-06, "logits/chosen": -1.814399003982544, "logits/rejected": -1.61077082157135, "logps/chosen": -366.83282470703125, "logps/rejected": -415.51483154296875, "loss": 0.5527, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.2060191631317139, "rewards/margins": 0.7003362774848938, "rewards/rejected": -1.9063555002212524, "step": 3850 }, { "epoch": 0.51, "learning_rate": 2.8901836017563966e-06, "logits/chosen": -1.8173844814300537, "logits/rejected": -1.6594011783599854, "logps/chosen": -419.95294189453125, "logps/rejected": -427.8134765625, "loss": 0.6101, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.2266980409622192, "rewards/margins": 0.46940097212791443, "rewards/rejected": -1.696099042892456, "step": 3860 }, { "epoch": 0.51, "learning_rate": 2.8788972318531272e-06, "logits/chosen": -1.8642652034759521, "logits/rejected": -1.7005784511566162, "logps/chosen": -385.55279541015625, "logps/rejected": -431.86590576171875, "loss": 0.574, "rewards/accuracies": 0.6875, "rewards/chosen": -1.1929553747177124, "rewards/margins": 0.5182313919067383, "rewards/rejected": -1.7111867666244507, "step": 3870 }, { "epoch": 0.51, "learning_rate": 2.8676029524561255e-06, "logits/chosen": -1.8119266033172607, "logits/rejected": -1.7860915660858154, "logps/chosen": -419.46258544921875, "logps/rejected": -460.0663146972656, "loss": 0.5882, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.134201169013977, "rewards/margins": 0.5569092631340027, "rewards/rejected": -1.691110372543335, "step": 3880 }, { "epoch": 0.51, "learning_rate": 2.8563009993338906e-06, "logits/chosen": -1.818861722946167, "logits/rejected": -1.6572418212890625, "logps/chosen": -381.38958740234375, "logps/rejected": -442.7064514160156, "loss": 0.5265, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1867233514785767, "rewards/margins": 0.7000702619552612, "rewards/rejected": -1.8867934942245483, "step": 3890 }, { "epoch": 0.51, "learning_rate": 2.844991608415113e-06, "logits/chosen": -1.8968263864517212, "logits/rejected": -1.835386872291565, "logps/chosen": -421.7579040527344, "logps/rejected": -479.73699951171875, "loss": 0.5734, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.3715274333953857, "rewards/margins": 0.689576268196106, "rewards/rejected": -2.061103582382202, "step": 3900 }, { "epoch": 0.51, "eval_logits/chosen": 0.7416585683822632, "eval_logits/rejected": 0.7968641519546509, "eval_logps/chosen": -424.93609619140625, "eval_logps/rejected": -472.9461669921875, "eval_loss": 0.5556566715240479, "eval_rewards/accuracies": 0.6970000267028809, "eval_rewards/chosen": -1.39643394947052, "eval_rewards/margins": 0.6943311095237732, "eval_rewards/rejected": -2.0907649993896484, "eval_runtime": 1173.1781, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 3900 }, { "epoch": 0.51, "learning_rate": 2.833675015783746e-06, "logits/chosen": -1.728763222694397, "logits/rejected": -1.7468255758285522, "logps/chosen": -406.5009460449219, "logps/rejected": -470.13671875, "loss": 0.5588, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.5327627658843994, "rewards/margins": 0.6243911385536194, "rewards/rejected": -2.157153606414795, "step": 3910 }, { "epoch": 0.51, "learning_rate": 2.8223514576740784e-06, "logits/chosen": -1.710656762123108, "logits/rejected": -1.6441787481307983, "logps/chosen": -368.9499816894531, "logps/rejected": -460.90606689453125, "loss": 0.5291, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.2226186990737915, "rewards/margins": 0.6628161072731018, "rewards/rejected": -1.8854347467422485, "step": 3920 }, { "epoch": 0.51, "learning_rate": 2.8110211704658073e-06, "logits/chosen": -1.9349536895751953, "logits/rejected": -1.797790288925171, "logps/chosen": -454.807373046875, "logps/rejected": -483.8372497558594, "loss": 0.5261, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.3279203176498413, "rewards/margins": 0.6820384860038757, "rewards/rejected": -2.0099589824676514, "step": 3930 }, { "epoch": 0.52, "learning_rate": 2.7996843906790955e-06, "logits/chosen": -1.7957370281219482, "logits/rejected": -1.6038751602172852, "logps/chosen": -396.48980712890625, "logps/rejected": -440.9287109375, "loss": 0.6408, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3963592052459717, "rewards/margins": 0.4027465879917145, "rewards/rejected": -1.7991058826446533, "step": 3940 }, { "epoch": 0.52, "learning_rate": 2.7883413549696396e-06, "logits/chosen": -1.888304352760315, "logits/rejected": -1.7081407308578491, "logps/chosen": -428.5066833496094, "logps/rejected": -495.19647216796875, "loss": 0.445, "rewards/accuracies": 0.8125, "rewards/chosen": -1.211716651916504, "rewards/margins": 0.9643009305000305, "rewards/rejected": -2.1760175228118896, "step": 3950 }, { "epoch": 0.52, "learning_rate": 2.776992300123732e-06, "logits/chosen": -1.6737916469573975, "logits/rejected": -1.5721557140350342, "logps/chosen": -391.4358825683594, "logps/rejected": -468.9794006347656, "loss": 0.6174, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.308905839920044, "rewards/margins": 0.7785030603408813, "rewards/rejected": -2.0874087810516357, "step": 3960 }, { "epoch": 0.52, "learning_rate": 2.7656374630533113e-06, "logits/chosen": -1.8585602045059204, "logits/rejected": -1.8566067218780518, "logps/chosen": -390.7518005371094, "logps/rejected": -458.5517578125, "loss": 0.5299, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.4467813968658447, "rewards/margins": 0.6529810428619385, "rewards/rejected": -2.099762439727783, "step": 3970 }, { "epoch": 0.52, "learning_rate": 2.754277080791021e-06, "logits/chosen": -1.8200175762176514, "logits/rejected": -1.7867431640625, "logps/chosen": -438.35693359375, "logps/rejected": -460.3201599121094, "loss": 0.7543, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.56465744972229, "rewards/margins": 0.29193997383117676, "rewards/rejected": -1.856597661972046, "step": 3980 }, { "epoch": 0.52, "learning_rate": 2.742911390485262e-06, "logits/chosen": -1.6067097187042236, "logits/rejected": -1.6185413599014282, "logps/chosen": -369.546630859375, "logps/rejected": -407.5931091308594, "loss": 0.595, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.4288946390151978, "rewards/margins": 0.47618919610977173, "rewards/rejected": -1.9050838947296143, "step": 3990 }, { "epoch": 0.52, "learning_rate": 2.731540629395239e-06, "logits/chosen": -1.738416075706482, "logits/rejected": -1.681363821029663, "logps/chosen": -443.32257080078125, "logps/rejected": -460.7607421875, "loss": 0.612, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.5387576818466187, "rewards/margins": 0.45962828397750854, "rewards/rejected": -1.998386025428772, "step": 4000 }, { "epoch": 0.52, "eval_logits/chosen": 0.8717538714408875, "eval_logits/rejected": 0.8941403031349182, "eval_logps/chosen": -447.7853698730469, "eval_logps/rejected": -496.18499755859375, "eval_loss": 0.5548127889633179, "eval_rewards/accuracies": 0.7074999809265137, "eval_rewards/chosen": -1.6249265670776367, "eval_rewards/margins": 0.6982267498970032, "eval_rewards/rejected": -2.323153257369995, "eval_runtime": 1172.9673, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 4000 }, { "epoch": 0.52, "learning_rate": 2.7201650348860115e-06, "logits/chosen": -1.8293654918670654, "logits/rejected": -1.6924068927764893, "logps/chosen": -425.6841735839844, "logps/rejected": -457.46051025390625, "loss": 0.5073, "rewards/accuracies": 0.75, "rewards/chosen": -1.6868245601654053, "rewards/margins": 0.7679763436317444, "rewards/rejected": -2.454801082611084, "step": 4010 }, { "epoch": 0.53, "learning_rate": 2.7087848444235354e-06, "logits/chosen": -1.9378684759140015, "logits/rejected": -1.771817922592163, "logps/chosen": -456.98504638671875, "logps/rejected": -536.107421875, "loss": 0.4734, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.6349451541900635, "rewards/margins": 0.9969595670700073, "rewards/rejected": -2.6319048404693604, "step": 4020 }, { "epoch": 0.53, "learning_rate": 2.697400295569707e-06, "logits/chosen": -1.890873908996582, "logits/rejected": -1.9099485874176025, "logps/chosen": -416.513671875, "logps/rejected": -483.06781005859375, "loss": 0.6058, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.5668065547943115, "rewards/margins": 0.696664035320282, "rewards/rejected": -2.263470411300659, "step": 4030 }, { "epoch": 0.53, "learning_rate": 2.6860116259774065e-06, "logits/chosen": -1.7504587173461914, "logits/rejected": -1.6228545904159546, "logps/chosen": -450.67535400390625, "logps/rejected": -522.6287841796875, "loss": 0.4873, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.4655582904815674, "rewards/margins": 0.89894038438797, "rewards/rejected": -2.3644988536834717, "step": 4040 }, { "epoch": 0.53, "learning_rate": 2.674619073385531e-06, "logits/chosen": -1.7882649898529053, "logits/rejected": -1.7534267902374268, "logps/chosen": -383.7250061035156, "logps/rejected": -463.1366271972656, "loss": 0.579, "rewards/accuracies": 0.75, "rewards/chosen": -1.245359182357788, "rewards/margins": 0.7908748388290405, "rewards/rejected": -2.036233901977539, "step": 4050 }, { "epoch": 0.53, "learning_rate": 2.663222875614038e-06, "logits/chosen": -1.9012863636016846, "logits/rejected": -1.7262403964996338, "logps/chosen": -394.57708740234375, "logps/rejected": -458.18414306640625, "loss": 0.6095, "rewards/accuracies": 0.75, "rewards/chosen": -1.335689902305603, "rewards/margins": 0.5589646100997925, "rewards/rejected": -1.8946545124053955, "step": 4060 }, { "epoch": 0.53, "learning_rate": 2.6518232705589775e-06, "logits/chosen": -1.9649426937103271, "logits/rejected": -1.8220106363296509, "logps/chosen": -378.5050354003906, "logps/rejected": -460.794677734375, "loss": 0.4866, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.0446014404296875, "rewards/margins": 0.8659340739250183, "rewards/rejected": -1.9105355739593506, "step": 4070 }, { "epoch": 0.53, "learning_rate": 2.640420496187528e-06, "logits/chosen": -1.921217679977417, "logits/rejected": -1.7409846782684326, "logps/chosen": -418.1365661621094, "logps/rejected": -441.38494873046875, "loss": 0.5027, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.063091516494751, "rewards/margins": 0.8426526784896851, "rewards/rejected": -1.9057443141937256, "step": 4080 }, { "epoch": 0.54, "learning_rate": 2.629014790533025e-06, "logits/chosen": -1.8884108066558838, "logits/rejected": -1.6967947483062744, "logps/chosen": -426.567626953125, "logps/rejected": -445.81134033203125, "loss": 0.4957, "rewards/accuracies": 0.75, "rewards/chosen": -1.1580777168273926, "rewards/margins": 0.8561995625495911, "rewards/rejected": -2.014277458190918, "step": 4090 }, { "epoch": 0.54, "learning_rate": 2.617606391689996e-06, "logits/chosen": -1.9376146793365479, "logits/rejected": -1.746341347694397, "logps/chosen": -382.73590087890625, "logps/rejected": -441.44134521484375, "loss": 0.5357, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0421863794326782, "rewards/margins": 0.8114742040634155, "rewards/rejected": -1.8536605834960938, "step": 4100 }, { "epoch": 0.54, "eval_logits/chosen": 0.5101784467697144, "eval_logits/rejected": 0.5836014747619629, "eval_logps/chosen": -404.91351318359375, "eval_logps/rejected": -452.53375244140625, "eval_loss": 0.5587130188941956, "eval_rewards/accuracies": 0.6995000243186951, "eval_rewards/chosen": -1.196208119392395, "eval_rewards/margins": 0.6904324889183044, "eval_rewards/rejected": -1.8866406679153442, "eval_runtime": 1172.8993, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 4100 }, { "epoch": 0.54, "learning_rate": 2.6061955378091896e-06, "logits/chosen": -1.8436400890350342, "logits/rejected": -1.7029361724853516, "logps/chosen": -379.65777587890625, "logps/rejected": -479.08734130859375, "loss": 0.5021, "rewards/accuracies": 0.75, "rewards/chosen": -1.1766784191131592, "rewards/margins": 0.9267901182174683, "rewards/rejected": -2.103468656539917, "step": 4110 }, { "epoch": 0.54, "learning_rate": 2.5947824670926025e-06, "logits/chosen": -1.9190120697021484, "logits/rejected": -1.902486801147461, "logps/chosen": -372.6246337890625, "logps/rejected": -463.3282775878906, "loss": 0.4966, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.1196424961090088, "rewards/margins": 0.8446052670478821, "rewards/rejected": -1.964247465133667, "step": 4120 }, { "epoch": 0.54, "learning_rate": 2.583367417788508e-06, "logits/chosen": -1.7358171939849854, "logits/rejected": -1.6001354455947876, "logps/chosen": -416.04217529296875, "logps/rejected": -481.5262756347656, "loss": 0.5564, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5063390731811523, "rewards/margins": 0.796363353729248, "rewards/rejected": -2.3027024269104004, "step": 4130 }, { "epoch": 0.54, "learning_rate": 2.5719506281864838e-06, "logits/chosen": -1.901760458946228, "logits/rejected": -1.8143211603164673, "logps/chosen": -435.39495849609375, "logps/rejected": -438.5569763183594, "loss": 0.6003, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.3779494762420654, "rewards/margins": 0.6639505624771118, "rewards/rejected": -2.041900157928467, "step": 4140 }, { "epoch": 0.54, "learning_rate": 2.5605323366124335e-06, "logits/chosen": -1.8352622985839844, "logits/rejected": -1.6869032382965088, "logps/chosen": -411.93011474609375, "logps/rejected": -476.12548828125, "loss": 0.5492, "rewards/accuracies": 0.6875, "rewards/chosen": -1.41312575340271, "rewards/margins": 0.7376724481582642, "rewards/rejected": -2.1507978439331055, "step": 4150 }, { "epoch": 0.54, "learning_rate": 2.5491127814236172e-06, "logits/chosen": -1.7956464290618896, "logits/rejected": -1.8745015859603882, "logps/chosen": -333.6759033203125, "logps/rejected": -437.91644287109375, "loss": 0.5616, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0492000579833984, "rewards/margins": 0.6359735727310181, "rewards/rejected": -1.6851736307144165, "step": 4160 }, { "epoch": 0.55, "learning_rate": 2.537692201003671e-06, "logits/chosen": -1.8365240097045898, "logits/rejected": -1.851731538772583, "logps/chosen": -418.044677734375, "logps/rejected": -476.5796813964844, "loss": 0.5671, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.3667340278625488, "rewards/margins": 0.718292236328125, "rewards/rejected": -2.085026264190674, "step": 4170 }, { "epoch": 0.55, "learning_rate": 2.526270833757635e-06, "logits/chosen": -1.9323654174804688, "logits/rejected": -1.7108045816421509, "logps/chosen": -397.91290283203125, "logps/rejected": -448.17999267578125, "loss": 0.5709, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.2763726711273193, "rewards/margins": 0.6827574968338013, "rewards/rejected": -1.9591302871704102, "step": 4180 }, { "epoch": 0.55, "learning_rate": 2.514848918106971e-06, "logits/chosen": -1.7983453273773193, "logits/rejected": -1.5887773036956787, "logps/chosen": -421.1139221191406, "logps/rejected": -466.9378356933594, "loss": 0.5187, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.395037293434143, "rewards/margins": 0.8693952560424805, "rewards/rejected": -2.264432430267334, "step": 4190 }, { "epoch": 0.55, "learning_rate": 2.503426692484594e-06, "logits/chosen": -1.888593316078186, "logits/rejected": -1.8359572887420654, "logps/chosen": -388.5292053222656, "logps/rejected": -456.5140075683594, "loss": 0.5648, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.2017731666564941, "rewards/margins": 0.5971595048904419, "rewards/rejected": -1.798932671546936, "step": 4200 }, { "epoch": 0.55, "eval_logits/chosen": 0.6439515352249146, "eval_logits/rejected": 0.7062841653823853, "eval_logps/chosen": -416.76263427734375, "eval_logps/rejected": -468.4803771972656, "eval_loss": 0.5569632649421692, "eval_rewards/accuracies": 0.6940000057220459, "eval_rewards/chosen": -1.314698576927185, "eval_rewards/margins": 0.7314084768295288, "eval_rewards/rejected": -2.046107053756714, "eval_runtime": 1173.6503, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 4200 }, { "epoch": 0.55, "learning_rate": 2.492004395329883e-06, "logits/chosen": -1.7668720483779907, "logits/rejected": -1.7909843921661377, "logps/chosen": -388.5613708496094, "logps/rejected": -459.2867736816406, "loss": 0.4986, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.2471996545791626, "rewards/margins": 0.8756244778633118, "rewards/rejected": -2.122824192047119, "step": 4210 }, { "epoch": 0.55, "learning_rate": 2.4805822650837165e-06, "logits/chosen": -1.641169786453247, "logits/rejected": -1.5679924488067627, "logps/chosen": -380.5749206542969, "logps/rejected": -509.94647216796875, "loss": 0.4449, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.2884600162506104, "rewards/margins": 1.1968393325805664, "rewards/rejected": -2.485299587249756, "step": 4220 }, { "epoch": 0.55, "learning_rate": 2.4691605401834843e-06, "logits/chosen": -1.9538590908050537, "logits/rejected": -1.8063548803329468, "logps/chosen": -446.77532958984375, "logps/rejected": -504.0521545410156, "loss": 0.5588, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.4774322509765625, "rewards/margins": 0.6580866575241089, "rewards/rejected": -2.135518789291382, "step": 4230 }, { "epoch": 0.55, "learning_rate": 2.457739459058117e-06, "logits/chosen": -1.9371347427368164, "logits/rejected": -1.8718658685684204, "logps/chosen": -493.62213134765625, "logps/rejected": -517.4634399414062, "loss": 0.5261, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.528563141822815, "rewards/margins": 0.7139015793800354, "rewards/rejected": -2.242464780807495, "step": 4240 }, { "epoch": 0.56, "learning_rate": 2.4463192601231054e-06, "logits/chosen": -1.7504394054412842, "logits/rejected": -1.6634776592254639, "logps/chosen": -490.6211853027344, "logps/rejected": -508.7572326660156, "loss": 0.5518, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.7602230310440063, "rewards/margins": 0.8414942026138306, "rewards/rejected": -2.601717233657837, "step": 4250 }, { "epoch": 0.56, "learning_rate": 2.434900181775524e-06, "logits/chosen": -1.7957899570465088, "logits/rejected": -1.7329041957855225, "logps/chosen": -449.1871032714844, "logps/rejected": -517.341552734375, "loss": 0.5279, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.644603967666626, "rewards/margins": 0.8088521957397461, "rewards/rejected": -2.453456163406372, "step": 4260 }, { "epoch": 0.56, "learning_rate": 2.4234824623890578e-06, "logits/chosen": -1.8632961511611938, "logits/rejected": -1.733759880065918, "logps/chosen": -439.6903381347656, "logps/rejected": -491.3687438964844, "loss": 0.5422, "rewards/accuracies": 0.75, "rewards/chosen": -1.599808692932129, "rewards/margins": 0.7229506969451904, "rewards/rejected": -2.3227593898773193, "step": 4270 }, { "epoch": 0.56, "learning_rate": 2.4120663403090193e-06, "logits/chosen": -1.8758010864257812, "logits/rejected": -1.781036138534546, "logps/chosen": -454.6363220214844, "logps/rejected": -528.6107177734375, "loss": 0.618, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.645216941833496, "rewards/margins": 0.6545249223709106, "rewards/rejected": -2.299741744995117, "step": 4280 }, { "epoch": 0.56, "learning_rate": 2.40065205384738e-06, "logits/chosen": -1.780618667602539, "logits/rejected": -1.578452467918396, "logps/chosen": -435.1331481933594, "logps/rejected": -444.5065002441406, "loss": 0.641, "rewards/accuracies": 0.625, "rewards/chosen": -1.7645103931427002, "rewards/margins": 0.4341967701911926, "rewards/rejected": -2.198707103729248, "step": 4290 }, { "epoch": 0.56, "learning_rate": 2.389239841277793e-06, "logits/chosen": -1.6420913934707642, "logits/rejected": -1.584995985031128, "logps/chosen": -413.6902770996094, "logps/rejected": -455.03314208984375, "loss": 0.5237, "rewards/accuracies": 0.75, "rewards/chosen": -1.5267785787582397, "rewards/margins": 0.7052222490310669, "rewards/rejected": -2.2320008277893066, "step": 4300 }, { "epoch": 0.56, "eval_logits/chosen": 0.828182578086853, "eval_logits/rejected": 0.8568943738937378, "eval_logps/chosen": -435.56292724609375, "eval_logps/rejected": -484.738525390625, "eval_loss": 0.5515031814575195, "eval_rewards/accuracies": 0.703000009059906, "eval_rewards/chosen": -1.502702236175537, "eval_rewards/margins": 0.7059863805770874, "eval_rewards/rejected": -2.208688735961914, "eval_runtime": 1172.7955, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 4300 }, { "epoch": 0.56, "learning_rate": 2.3778299408306167e-06, "logits/chosen": -1.7654708623886108, "logits/rejected": -1.5771191120147705, "logps/chosen": -421.884033203125, "logps/rejected": -471.3894958496094, "loss": 0.5476, "rewards/accuracies": 0.6875, "rewards/chosen": -1.5702762603759766, "rewards/margins": 0.6879106163978577, "rewards/rejected": -2.2581868171691895, "step": 4310 }, { "epoch": 0.57, "learning_rate": 2.3664225906879452e-06, "logits/chosen": -1.813707709312439, "logits/rejected": -1.6927099227905273, "logps/chosen": -418.1673889160156, "logps/rejected": -447.35015869140625, "loss": 0.6011, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.5919063091278076, "rewards/margins": 0.5547482967376709, "rewards/rejected": -2.1466546058654785, "step": 4320 }, { "epoch": 0.57, "learning_rate": 2.3550180289786357e-06, "logits/chosen": -1.7970657348632812, "logits/rejected": -1.6429212093353271, "logps/chosen": -430.11041259765625, "logps/rejected": -457.9832458496094, "loss": 0.5378, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.5009050369262695, "rewards/margins": 0.713435173034668, "rewards/rejected": -2.2143399715423584, "step": 4330 }, { "epoch": 0.57, "learning_rate": 2.343616493773335e-06, "logits/chosen": -1.906446099281311, "logits/rejected": -1.7467994689941406, "logps/chosen": -435.53692626953125, "logps/rejected": -507.5245056152344, "loss": 0.5075, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.5532820224761963, "rewards/margins": 0.7375933527946472, "rewards/rejected": -2.2908754348754883, "step": 4340 }, { "epoch": 0.57, "learning_rate": 2.3322182230795127e-06, "logits/chosen": -1.7325433492660522, "logits/rejected": -1.7731993198394775, "logps/chosen": -391.47540283203125, "logps/rejected": -505.1651916503906, "loss": 0.5042, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4061033725738525, "rewards/margins": 0.892315685749054, "rewards/rejected": -2.29841947555542, "step": 4350 }, { "epoch": 0.57, "learning_rate": 2.320823454836491e-06, "logits/chosen": -2.009273052215576, "logits/rejected": -1.7471644878387451, "logps/chosen": -407.6497497558594, "logps/rejected": -466.71051025390625, "loss": 0.459, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.3678957223892212, "rewards/margins": 0.7732141613960266, "rewards/rejected": -2.1411099433898926, "step": 4360 }, { "epoch": 0.57, "learning_rate": 2.309432426910478e-06, "logits/chosen": -1.7197542190551758, "logits/rejected": -1.5388801097869873, "logps/chosen": -463.50555419921875, "logps/rejected": -478.2345275878906, "loss": 0.5387, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.5038155317306519, "rewards/margins": 0.7517666220664978, "rewards/rejected": -2.255582094192505, "step": 4370 }, { "epoch": 0.57, "learning_rate": 2.298045377089604e-06, "logits/chosen": -1.7552915811538696, "logits/rejected": -1.6244428157806396, "logps/chosen": -422.2392578125, "logps/rejected": -498.9564514160156, "loss": 0.4528, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.5110352039337158, "rewards/margins": 0.991446852684021, "rewards/rejected": -2.5024819374084473, "step": 4380 }, { "epoch": 0.57, "learning_rate": 2.286662543078955e-06, "logits/chosen": -1.5256160497665405, "logits/rejected": -1.3968003988265991, "logps/chosen": -461.56817626953125, "logps/rejected": -494.91461181640625, "loss": 0.5027, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.660752534866333, "rewards/margins": 0.7574098110198975, "rewards/rejected": -2.4181621074676514, "step": 4390 }, { "epoch": 0.58, "learning_rate": 2.2752841624956125e-06, "logits/chosen": -1.7991011142730713, "logits/rejected": -1.7218538522720337, "logps/chosen": -502.18878173828125, "logps/rejected": -554.5040283203125, "loss": 0.5979, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9555965662002563, "rewards/margins": 0.7310833930969238, "rewards/rejected": -2.6866796016693115, "step": 4400 }, { "epoch": 0.58, "eval_logits/chosen": 0.9060326218605042, "eval_logits/rejected": 0.9415406584739685, "eval_logps/chosen": -455.10614013671875, "eval_logps/rejected": -511.879638671875, "eval_loss": 0.5594107508659363, "eval_rewards/accuracies": 0.7039999961853027, "eval_rewards/chosen": -1.698134183883667, "eval_rewards/margins": 0.7819651365280151, "eval_rewards/rejected": -2.4800994396209717, "eval_runtime": 1172.9465, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 4400 }, { "epoch": 0.58, "learning_rate": 2.2639104728636915e-06, "logits/chosen": -1.6456069946289062, "logits/rejected": -1.6536614894866943, "logps/chosen": -434.7286682128906, "logps/rejected": -494.59918212890625, "loss": 0.6201, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.5495412349700928, "rewards/margins": 0.6422635912895203, "rewards/rejected": -2.191804885864258, "step": 4410 }, { "epoch": 0.58, "learning_rate": 2.252541711609384e-06, "logits/chosen": -1.7286525964736938, "logits/rejected": -1.5054218769073486, "logps/chosen": -430.8987731933594, "logps/rejected": -480.05218505859375, "loss": 0.5374, "rewards/accuracies": 0.6875, "rewards/chosen": -1.5618770122528076, "rewards/margins": 0.811420738697052, "rewards/rejected": -2.373297929763794, "step": 4420 }, { "epoch": 0.58, "learning_rate": 2.241178116056002e-06, "logits/chosen": -1.7933905124664307, "logits/rejected": -1.6752099990844727, "logps/chosen": -420.69537353515625, "logps/rejected": -474.57073974609375, "loss": 0.5055, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4889934062957764, "rewards/margins": 0.815077006816864, "rewards/rejected": -2.304070234298706, "step": 4430 }, { "epoch": 0.58, "learning_rate": 2.2298199234190236e-06, "logits/chosen": -1.6978040933609009, "logits/rejected": -1.7049938440322876, "logps/chosen": -447.2154235839844, "logps/rejected": -499.2445373535156, "loss": 0.4904, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4197678565979004, "rewards/margins": 0.8962188959121704, "rewards/rejected": -2.3159868717193604, "step": 4440 }, { "epoch": 0.58, "learning_rate": 2.218467370801138e-06, "logits/chosen": -1.8251584768295288, "logits/rejected": -1.7593870162963867, "logps/chosen": -444.00738525390625, "logps/rejected": -476.70355224609375, "loss": 0.6049, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.6047332286834717, "rewards/margins": 0.632063090801239, "rewards/rejected": -2.2367963790893555, "step": 4450 }, { "epoch": 0.58, "learning_rate": 2.207120695187304e-06, "logits/chosen": -1.642364501953125, "logits/rejected": -1.4046622514724731, "logps/chosen": -451.1121520996094, "logps/rejected": -507.24920654296875, "loss": 0.4812, "rewards/accuracies": 0.8125, "rewards/chosen": -1.538770079612732, "rewards/margins": 0.9383605122566223, "rewards/rejected": -2.47713041305542, "step": 4460 }, { "epoch": 0.58, "learning_rate": 2.195780133439794e-06, "logits/chosen": -1.7654697895050049, "logits/rejected": -1.7827869653701782, "logps/chosen": -442.58209228515625, "logps/rejected": -508.88427734375, "loss": 0.6248, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4976444244384766, "rewards/margins": 0.6089947819709778, "rewards/rejected": -2.1066391468048096, "step": 4470 }, { "epoch": 0.59, "learning_rate": 2.1844459222932535e-06, "logits/chosen": -1.780927300453186, "logits/rejected": -1.665780782699585, "logps/chosen": -436.9989318847656, "logps/rejected": -478.2315979003906, "loss": 0.5156, "rewards/accuracies": 0.75, "rewards/chosen": -1.4333505630493164, "rewards/margins": 0.753585934638977, "rewards/rejected": -2.186936855316162, "step": 4480 }, { "epoch": 0.59, "learning_rate": 2.17311829834976e-06, "logits/chosen": -1.9392468929290771, "logits/rejected": -1.8275117874145508, "logps/chosen": -409.9462890625, "logps/rejected": -481.535888671875, "loss": 0.5013, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.3325854539871216, "rewards/margins": 0.7732506394386292, "rewards/rejected": -2.1058361530303955, "step": 4490 }, { "epoch": 0.59, "learning_rate": 2.1617974980738814e-06, "logits/chosen": -1.8007287979125977, "logits/rejected": -1.6861215829849243, "logps/chosen": -410.5309143066406, "logps/rejected": -460.1180114746094, "loss": 0.4859, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.4100977182388306, "rewards/margins": 0.8361636400222778, "rewards/rejected": -2.2462613582611084, "step": 4500 }, { "epoch": 0.59, "eval_logits/chosen": 0.9057154655456543, "eval_logits/rejected": 0.9398696422576904, "eval_logps/chosen": -444.394775390625, "eval_logps/rejected": -499.04150390625, "eval_loss": 0.5529686808586121, "eval_rewards/accuracies": 0.7080000042915344, "eval_rewards/chosen": -1.5910205841064453, "eval_rewards/margins": 0.7606974840164185, "eval_rewards/rejected": -2.3517181873321533, "eval_runtime": 1173.0292, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 4500 }, { "epoch": 0.59, "learning_rate": 2.150483757787744e-06, "logits/chosen": -1.8965717554092407, "logits/rejected": -1.667515754699707, "logps/chosen": -432.5536193847656, "logps/rejected": -446.9703063964844, "loss": 0.5839, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.6405025720596313, "rewards/margins": 0.6483727693557739, "rewards/rejected": -2.2888753414154053, "step": 4510 }, { "epoch": 0.59, "learning_rate": 2.139177313666093e-06, "logits/chosen": -1.6948086023330688, "logits/rejected": -1.6933705806732178, "logps/chosen": -480.1251525878906, "logps/rejected": -491.8417053222656, "loss": 0.599, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5966379642486572, "rewards/margins": 0.6552848815917969, "rewards/rejected": -2.251922845840454, "step": 4520 }, { "epoch": 0.59, "learning_rate": 2.1278784017313688e-06, "logits/chosen": -1.7972462177276611, "logits/rejected": -1.895336389541626, "logps/chosen": -454.59442138671875, "logps/rejected": -512.6532592773438, "loss": 0.5886, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5067318677902222, "rewards/margins": 0.5520287752151489, "rewards/rejected": -2.05876088142395, "step": 4530 }, { "epoch": 0.59, "learning_rate": 2.116587257848776e-06, "logits/chosen": -1.7740548849105835, "logits/rejected": -1.7964674234390259, "logps/chosen": -412.32763671875, "logps/rejected": -487.65216064453125, "loss": 0.6279, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.5109002590179443, "rewards/margins": 0.5173921585083008, "rewards/rejected": -2.028292179107666, "step": 4540 }, { "epoch": 0.6, "learning_rate": 2.105304117721361e-06, "logits/chosen": -1.6072734594345093, "logits/rejected": -1.5378026962280273, "logps/chosen": -397.68829345703125, "logps/rejected": -420.8843688964844, "loss": 0.6461, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.6038001775741577, "rewards/margins": 0.5361725091934204, "rewards/rejected": -2.1399729251861572, "step": 4550 }, { "epoch": 0.6, "learning_rate": 2.0940292168850913e-06, "logits/chosen": -1.6167068481445312, "logits/rejected": -1.6595230102539062, "logps/chosen": -426.5391540527344, "logps/rejected": -442.6863708496094, "loss": 0.6535, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.5523406267166138, "rewards/margins": 0.45763474702835083, "rewards/rejected": -2.0099751949310303, "step": 4560 }, { "epoch": 0.6, "learning_rate": 2.082762790703939e-06, "logits/chosen": -1.6738531589508057, "logits/rejected": -1.5881974697113037, "logps/chosen": -430.85076904296875, "logps/rejected": -484.5582580566406, "loss": 0.5929, "rewards/accuracies": 0.75, "rewards/chosen": -1.4763389825820923, "rewards/margins": 0.6432314515113831, "rewards/rejected": -2.11957049369812, "step": 4570 }, { "epoch": 0.6, "learning_rate": 2.0715050743649674e-06, "logits/chosen": -1.8061870336532593, "logits/rejected": -1.7034177780151367, "logps/chosen": -397.50482177734375, "logps/rejected": -524.7658081054688, "loss": 0.4877, "rewards/accuracies": 0.75, "rewards/chosen": -1.4619042873382568, "rewards/margins": 0.9087886810302734, "rewards/rejected": -2.370692729949951, "step": 4580 }, { "epoch": 0.6, "learning_rate": 2.060256302873421e-06, "logits/chosen": -1.80402410030365, "logits/rejected": -1.782848596572876, "logps/chosen": -414.2543029785156, "logps/rejected": -496.57159423828125, "loss": 0.5442, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4690145254135132, "rewards/margins": 0.7510448694229126, "rewards/rejected": -2.220059394836426, "step": 4590 }, { "epoch": 0.6, "learning_rate": 2.049016711047822e-06, "logits/chosen": -1.8822914361953735, "logits/rejected": -1.7078053951263428, "logps/chosen": -440.61187744140625, "logps/rejected": -489.31005859375, "loss": 0.5484, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.6159025430679321, "rewards/margins": 0.7375297546386719, "rewards/rejected": -2.3534321784973145, "step": 4600 }, { "epoch": 0.6, "eval_logits/chosen": 0.8267521858215332, "eval_logits/rejected": 0.8710527420043945, "eval_logps/chosen": -436.8822326660156, "eval_logps/rejected": -488.2594909667969, "eval_loss": 0.5524637699127197, "eval_rewards/accuracies": 0.7055000066757202, "eval_rewards/chosen": -1.515894889831543, "eval_rewards/margins": 0.7280031442642212, "eval_rewards/rejected": -2.2438981533050537, "eval_runtime": 1173.1276, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 4600 }, { "epoch": 0.6, "learning_rate": 2.037786533515064e-06, "logits/chosen": -1.8766225576400757, "logits/rejected": -1.850306749343872, "logps/chosen": -479.96905517578125, "logps/rejected": -508.6878967285156, "loss": 0.5693, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.6044374704360962, "rewards/margins": 0.5724669694900513, "rewards/rejected": -2.1769044399261475, "step": 4610 }, { "epoch": 0.6, "learning_rate": 2.02656600470552e-06, "logits/chosen": -1.845931053161621, "logits/rejected": -1.7757551670074463, "logps/chosen": -415.3828125, "logps/rejected": -472.7496643066406, "loss": 0.5342, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.394207239151001, "rewards/margins": 0.7796455025672913, "rewards/rejected": -2.1738526821136475, "step": 4620 }, { "epoch": 0.61, "learning_rate": 2.015355358848144e-06, "logits/chosen": -1.6293227672576904, "logits/rejected": -1.7113008499145508, "logps/chosen": -379.7857971191406, "logps/rejected": -464.6165466308594, "loss": 0.5687, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4572124481201172, "rewards/margins": 0.6002478003501892, "rewards/rejected": -2.057460308074951, "step": 4630 }, { "epoch": 0.61, "learning_rate": 2.004154829965582e-06, "logits/chosen": -1.8674468994140625, "logits/rejected": -1.8090667724609375, "logps/chosen": -425.97772216796875, "logps/rejected": -482.00677490234375, "loss": 0.5024, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.3103511333465576, "rewards/margins": 0.7306745648384094, "rewards/rejected": -2.0410256385803223, "step": 4640 }, { "epoch": 0.61, "learning_rate": 1.99296465186929e-06, "logits/chosen": -1.892324686050415, "logits/rejected": -1.7118568420410156, "logps/chosen": -428.433837890625, "logps/rejected": -429.88726806640625, "loss": 0.5307, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3096859455108643, "rewards/margins": 0.6370649337768555, "rewards/rejected": -1.9467506408691406, "step": 4650 }, { "epoch": 0.61, "learning_rate": 1.9817850581546488e-06, "logits/chosen": -1.7857517004013062, "logits/rejected": -1.7456003427505493, "logps/chosen": -450.404296875, "logps/rejected": -521.0674438476562, "loss": 0.612, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.5571680068969727, "rewards/margins": 0.6643776893615723, "rewards/rejected": -2.221545696258545, "step": 4660 }, { "epoch": 0.61, "learning_rate": 1.970616282196091e-06, "logits/chosen": -1.8562015295028687, "logits/rejected": -1.7407516241073608, "logps/chosen": -412.98291015625, "logps/rejected": -477.928466796875, "loss": 0.5602, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4430997371673584, "rewards/margins": 0.7090359926223755, "rewards/rejected": -2.1521358489990234, "step": 4670 }, { "epoch": 0.61, "learning_rate": 1.959458557142228e-06, "logits/chosen": -1.817831039428711, "logits/rejected": -1.7628173828125, "logps/chosen": -423.5113220214844, "logps/rejected": -473.5160217285156, "loss": 0.6716, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.5431184768676758, "rewards/margins": 0.3869319558143616, "rewards/rejected": -1.9300504922866821, "step": 4680 }, { "epoch": 0.61, "learning_rate": 1.948312115910982e-06, "logits/chosen": -1.7673568725585938, "logits/rejected": -1.725992202758789, "logps/chosen": -449.0369567871094, "logps/rejected": -479.7022399902344, "loss": 0.5836, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.4680416584014893, "rewards/margins": 0.7295928001403809, "rewards/rejected": -2.197634696960449, "step": 4690 }, { "epoch": 0.62, "learning_rate": 1.937177191184729e-06, "logits/chosen": -1.7792892456054688, "logits/rejected": -1.7978578805923462, "logps/chosen": -383.3244934082031, "logps/rejected": -434.3316345214844, "loss": 0.6135, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.2648676633834839, "rewards/margins": 0.47942668199539185, "rewards/rejected": -1.74429452419281, "step": 4700 }, { "epoch": 0.62, "eval_logits/chosen": 0.722199022769928, "eval_logits/rejected": 0.773621141910553, "eval_logps/chosen": -417.8462219238281, "eval_logps/rejected": -466.32476806640625, "eval_loss": 0.5504409670829773, "eval_rewards/accuracies": 0.7064999938011169, "eval_rewards/chosen": -1.3255350589752197, "eval_rewards/margins": 0.6990163922309875, "eval_rewards/rejected": -2.0245513916015625, "eval_runtime": 1173.0581, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 4700 }, { "epoch": 0.62, "learning_rate": 1.9260540154054317e-06, "logits/chosen": -1.8403739929199219, "logits/rejected": -1.6461031436920166, "logps/chosen": -374.638427734375, "logps/rejected": -467.19354248046875, "loss": 0.4626, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.187178611755371, "rewards/margins": 0.9907558560371399, "rewards/rejected": -2.177934408187866, "step": 4710 }, { "epoch": 0.62, "learning_rate": 1.9149428207697983e-06, "logits/chosen": -1.8509531021118164, "logits/rejected": -1.8378045558929443, "logps/chosen": -416.5984802246094, "logps/rejected": -460.538818359375, "loss": 0.6526, "rewards/accuracies": 0.625, "rewards/chosen": -1.379197597503662, "rewards/margins": 0.4792349338531494, "rewards/rejected": -1.858432412147522, "step": 4720 }, { "epoch": 0.62, "learning_rate": 1.9038438392244262e-06, "logits/chosen": -1.901710867881775, "logits/rejected": -1.9277139902114868, "logps/chosen": -416.91595458984375, "logps/rejected": -461.445556640625, "loss": 0.4871, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.1508758068084717, "rewards/margins": 0.7535255551338196, "rewards/rejected": -1.904401183128357, "step": 4730 }, { "epoch": 0.62, "learning_rate": 1.8927573024609666e-06, "logits/chosen": -1.69242262840271, "logits/rejected": -1.5772535800933838, "logps/chosen": -376.327392578125, "logps/rejected": -435.93878173828125, "loss": 0.5541, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.3499438762664795, "rewards/margins": 0.7300174832344055, "rewards/rejected": -2.0799612998962402, "step": 4740 }, { "epoch": 0.62, "learning_rate": 1.8816834419112845e-06, "logits/chosen": -1.782232642173767, "logits/rejected": -1.657865285873413, "logps/chosen": -399.690673828125, "logps/rejected": -431.1106872558594, "loss": 0.5689, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.357898473739624, "rewards/margins": 0.7007596492767334, "rewards/rejected": -2.0586581230163574, "step": 4750 }, { "epoch": 0.62, "learning_rate": 1.8706224887426283e-06, "logits/chosen": -1.742174744606018, "logits/rejected": -1.7735731601715088, "logps/chosen": -439.009765625, "logps/rejected": -501.45965576171875, "loss": 0.5835, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.5864620208740234, "rewards/margins": 0.5894501805305481, "rewards/rejected": -2.175912380218506, "step": 4760 }, { "epoch": 0.62, "learning_rate": 1.8595746738528045e-06, "logits/chosen": -1.7558538913726807, "logits/rejected": -1.8131908178329468, "logps/chosen": -402.8466491699219, "logps/rejected": -495.69940185546875, "loss": 0.561, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.370308518409729, "rewards/margins": 0.7301324605941772, "rewards/rejected": -2.1004412174224854, "step": 4770 }, { "epoch": 0.63, "learning_rate": 1.8485402278653584e-06, "logits/chosen": -1.844264030456543, "logits/rejected": -1.8011757135391235, "logps/chosen": -424.69268798828125, "logps/rejected": -461.2425231933594, "loss": 0.5332, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.6498619318008423, "rewards/margins": 0.6133615970611572, "rewards/rejected": -2.263223648071289, "step": 4780 }, { "epoch": 0.63, "learning_rate": 1.8375193811247577e-06, "logits/chosen": -1.711334228515625, "logits/rejected": -1.6143522262573242, "logps/chosen": -424.0199279785156, "logps/rejected": -475.4574279785156, "loss": 0.5249, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.5820074081420898, "rewards/margins": 0.7127015590667725, "rewards/rejected": -2.2947089672088623, "step": 4790 }, { "epoch": 0.63, "learning_rate": 1.826512363691586e-06, "logits/chosen": -1.871834397315979, "logits/rejected": -1.849853754043579, "logps/chosen": -444.37725830078125, "logps/rejected": -480.35107421875, "loss": 0.5714, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.5137640237808228, "rewards/margins": 0.6592321991920471, "rewards/rejected": -2.1729962825775146, "step": 4800 }, { "epoch": 0.63, "eval_logits/chosen": 0.8370016813278198, "eval_logits/rejected": 0.8648673295974731, "eval_logps/chosen": -432.65576171875, "eval_logps/rejected": -480.5717468261719, "eval_loss": 0.5500975251197815, "eval_rewards/accuracies": 0.7070000171661377, "eval_rewards/chosen": -1.4736299514770508, "eval_rewards/margins": 0.6933912634849548, "eval_rewards/rejected": -2.1670212745666504, "eval_runtime": 1172.8752, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 4800 }, { "epoch": 0.63, "learning_rate": 1.8155194053377391e-06, "logits/chosen": -1.8831536769866943, "logits/rejected": -1.7245756387710571, "logps/chosen": -405.37158203125, "logps/rejected": -458.77398681640625, "loss": 0.5054, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.287423849105835, "rewards/margins": 0.9036749005317688, "rewards/rejected": -2.191098690032959, "step": 4810 }, { "epoch": 0.63, "learning_rate": 1.80454073554163e-06, "logits/chosen": -1.6334726810455322, "logits/rejected": -1.5998659133911133, "logps/chosen": -380.0008239746094, "logps/rejected": -431.1194763183594, "loss": 0.6071, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.3910070657730103, "rewards/margins": 0.6737453937530518, "rewards/rejected": -2.0647525787353516, "step": 4820 }, { "epoch": 0.63, "learning_rate": 1.7935765834833966e-06, "logits/chosen": -1.799440622329712, "logits/rejected": -1.7077531814575195, "logps/chosen": -425.4602966308594, "logps/rejected": -520.7520141601562, "loss": 0.4739, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4049465656280518, "rewards/margins": 0.9730203747749329, "rewards/rejected": -2.37796688079834, "step": 4830 }, { "epoch": 0.63, "learning_rate": 1.7826271780401182e-06, "logits/chosen": -1.6157394647598267, "logits/rejected": -1.4483401775360107, "logps/chosen": -411.940185546875, "logps/rejected": -470.05889892578125, "loss": 0.4797, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.516154170036316, "rewards/margins": 0.7820614576339722, "rewards/rejected": -2.298215866088867, "step": 4840 }, { "epoch": 0.63, "learning_rate": 1.7716927477810389e-06, "logits/chosen": -1.850337266921997, "logits/rejected": -1.8355680704116821, "logps/chosen": -418.2672424316406, "logps/rejected": -512.0823974609375, "loss": 0.5196, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.4566137790679932, "rewards/margins": 0.8951760530471802, "rewards/rejected": -2.351789712905884, "step": 4850 }, { "epoch": 0.64, "learning_rate": 1.7607735209627953e-06, "logits/chosen": -1.7742077112197876, "logits/rejected": -1.6215053796768188, "logps/chosen": -431.87786865234375, "logps/rejected": -473.6273498535156, "loss": 0.5181, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.518726110458374, "rewards/margins": 0.7906683683395386, "rewards/rejected": -2.309394359588623, "step": 4860 }, { "epoch": 0.64, "learning_rate": 1.749869725524651e-06, "logits/chosen": -1.8782122135162354, "logits/rejected": -1.7312371730804443, "logps/chosen": -437.98809814453125, "logps/rejected": -499.5009765625, "loss": 0.4952, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.4420686960220337, "rewards/margins": 0.960234522819519, "rewards/rejected": -2.4023032188415527, "step": 4870 }, { "epoch": 0.64, "learning_rate": 1.7389815890837392e-06, "logits/chosen": -1.685935378074646, "logits/rejected": -1.7608509063720703, "logps/chosen": -444.6941833496094, "logps/rejected": -559.3511352539062, "loss": 0.4362, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.518878698348999, "rewards/margins": 0.9780572056770325, "rewards/rejected": -2.496936082839966, "step": 4880 }, { "epoch": 0.64, "learning_rate": 1.7281093389303105e-06, "logits/chosen": -1.766892433166504, "logits/rejected": -1.663165807723999, "logps/chosen": -409.701416015625, "logps/rejected": -469.8858337402344, "loss": 0.5157, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4582762718200684, "rewards/margins": 0.8043732643127441, "rewards/rejected": -2.2626495361328125, "step": 4890 }, { "epoch": 0.64, "learning_rate": 1.7172532020229899e-06, "logits/chosen": -1.9181607961654663, "logits/rejected": -1.8041608333587646, "logps/chosen": -453.35223388671875, "logps/rejected": -496.159912109375, "loss": 0.517, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.5475901365280151, "rewards/margins": 0.7943762540817261, "rewards/rejected": -2.341966390609741, "step": 4900 }, { "epoch": 0.64, "eval_logits/chosen": 0.9524018168449402, "eval_logits/rejected": 0.9735285639762878, "eval_logps/chosen": -450.3797302246094, "eval_logps/rejected": -504.55609130859375, "eval_loss": 0.5530635714530945, "eval_rewards/accuracies": 0.7089999914169312, "eval_rewards/chosen": -1.6508703231811523, "eval_rewards/margins": 0.7559937834739685, "eval_rewards/rejected": -2.4068639278411865, "eval_runtime": 1173.3372, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 4900 }, { "epoch": 0.64, "learning_rate": 1.7064134049840359e-06, "logits/chosen": -1.8048479557037354, "logits/rejected": -1.780055046081543, "logps/chosen": -418.9851989746094, "logps/rejected": -504.3468322753906, "loss": 0.4785, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.4821782112121582, "rewards/margins": 0.8749674558639526, "rewards/rejected": -2.3571457862854004, "step": 4910 }, { "epoch": 0.64, "learning_rate": 1.6955901740946136e-06, "logits/chosen": -1.8494422435760498, "logits/rejected": -1.7496616840362549, "logps/chosen": -496.0858459472656, "logps/rejected": -550.9440307617188, "loss": 0.6453, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.9027026891708374, "rewards/margins": 0.5574442148208618, "rewards/rejected": -2.460146903991699, "step": 4920 }, { "epoch": 0.65, "learning_rate": 1.684783735290067e-06, "logits/chosen": -1.8014285564422607, "logits/rejected": -1.6779054403305054, "logps/chosen": -424.20208740234375, "logps/rejected": -518.1012573242188, "loss": 0.4467, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.5048842430114746, "rewards/margins": 1.0522735118865967, "rewards/rejected": -2.5571579933166504, "step": 4930 }, { "epoch": 0.65, "learning_rate": 1.6739943141552079e-06, "logits/chosen": -1.820129632949829, "logits/rejected": -1.690853476524353, "logps/chosen": -472.244873046875, "logps/rejected": -495.262451171875, "loss": 0.5876, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.6202694177627563, "rewards/margins": 0.7392504811286926, "rewards/rejected": -2.3595199584960938, "step": 4940 }, { "epoch": 0.65, "learning_rate": 1.663222135919601e-06, "logits/chosen": -1.8577134609222412, "logits/rejected": -1.719242811203003, "logps/chosen": -462.2020568847656, "logps/rejected": -504.34454345703125, "loss": 0.54, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4801579713821411, "rewards/margins": 0.667270839214325, "rewards/rejected": -2.1474289894104004, "step": 4950 }, { "epoch": 0.65, "learning_rate": 1.652467425452865e-06, "logits/chosen": -1.7774289846420288, "logits/rejected": -1.7675281763076782, "logps/chosen": -415.58038330078125, "logps/rejected": -468.835205078125, "loss": 0.5367, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4868974685668945, "rewards/margins": 0.7201194167137146, "rewards/rejected": -2.207017183303833, "step": 4960 }, { "epoch": 0.65, "learning_rate": 1.6417304072599787e-06, "logits/chosen": -1.9124637842178345, "logits/rejected": -1.720577597618103, "logps/chosen": -459.5377502441406, "logps/rejected": -508.71832275390625, "loss": 0.599, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.7493877410888672, "rewards/margins": 0.5594874024391174, "rewards/rejected": -2.30887508392334, "step": 4970 }, { "epoch": 0.65, "learning_rate": 1.6310113054765947e-06, "logits/chosen": -1.8772382736206055, "logits/rejected": -1.7379204034805298, "logps/chosen": -455.2511291503906, "logps/rejected": -504.4996643066406, "loss": 0.5382, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.625997543334961, "rewards/margins": 0.8635755777359009, "rewards/rejected": -2.4895730018615723, "step": 4980 }, { "epoch": 0.65, "learning_rate": 1.6203103438643591e-06, "logits/chosen": -1.847890853881836, "logits/rejected": -1.774196982383728, "logps/chosen": -434.84954833984375, "logps/rejected": -511.7041015625, "loss": 0.5613, "rewards/accuracies": 0.6875, "rewards/chosen": -1.6426740884780884, "rewards/margins": 0.7089305520057678, "rewards/rejected": -2.351604700088501, "step": 4990 }, { "epoch": 0.65, "learning_rate": 1.6096277458062417e-06, "logits/chosen": -1.681212067604065, "logits/rejected": -1.6562614440917969, "logps/chosen": -375.3092041015625, "logps/rejected": -472.8038635253906, "loss": 0.4862, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.5255978107452393, "rewards/margins": 0.8586394190788269, "rewards/rejected": -2.38423752784729, "step": 5000 }, { "epoch": 0.65, "eval_logits/chosen": 0.8849155306816101, "eval_logits/rejected": 0.9138307571411133, "eval_logps/chosen": -439.3872985839844, "eval_logps/rejected": -493.1929626464844, "eval_loss": 0.5523954033851624, "eval_rewards/accuracies": 0.7080000042915344, "eval_rewards/chosen": -1.5409460067749023, "eval_rewards/margins": 0.7522870302200317, "eval_rewards/rejected": -2.2932329177856445, "eval_runtime": 1173.3026, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 5000 }, { "epoch": 0.66, "learning_rate": 1.5989637343018705e-06, "logits/chosen": -1.7913602590560913, "logits/rejected": -1.6334741115570068, "logps/chosen": -427.86187744140625, "logps/rejected": -514.5504150390625, "loss": 0.4979, "rewards/accuracies": 0.75, "rewards/chosen": -1.422251582145691, "rewards/margins": 0.8504781723022461, "rewards/rejected": -2.2727296352386475, "step": 5010 }, { "epoch": 0.66, "learning_rate": 1.5883185319628824e-06, "logits/chosen": -1.7208000421524048, "logits/rejected": -1.4415217638015747, "logps/chosen": -453.88446044921875, "logps/rejected": -490.77569580078125, "loss": 0.5046, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.5737110376358032, "rewards/margins": 0.7625322341918945, "rewards/rejected": -2.3362433910369873, "step": 5020 }, { "epoch": 0.66, "learning_rate": 1.5776923610082695e-06, "logits/chosen": -1.8103011846542358, "logits/rejected": -1.690213918685913, "logps/chosen": -441.17919921875, "logps/rejected": -509.001708984375, "loss": 0.5442, "rewards/accuracies": 0.75, "rewards/chosen": -1.6508638858795166, "rewards/margins": 0.9134138822555542, "rewards/rejected": -2.5642776489257812, "step": 5030 }, { "epoch": 0.66, "learning_rate": 1.5670854432597433e-06, "logits/chosen": -1.824211835861206, "logits/rejected": -1.8008737564086914, "logps/chosen": -478.08709716796875, "logps/rejected": -482.66168212890625, "loss": 0.5657, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.6046216487884521, "rewards/margins": 0.6122376322746277, "rewards/rejected": -2.2168593406677246, "step": 5040 }, { "epoch": 0.66, "learning_rate": 1.556498000137104e-06, "logits/chosen": -1.6245505809783936, "logits/rejected": -1.5970425605773926, "logps/chosen": -395.28411865234375, "logps/rejected": -462.77069091796875, "loss": 0.4882, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.4695818424224854, "rewards/margins": 0.8584505915641785, "rewards/rejected": -2.3280324935913086, "step": 5050 }, { "epoch": 0.66, "learning_rate": 1.5459302526536188e-06, "logits/chosen": -1.8356765508651733, "logits/rejected": -1.7552534341812134, "logps/chosen": -429.896728515625, "logps/rejected": -458.87860107421875, "loss": 0.6114, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4339876174926758, "rewards/margins": 0.5756856799125671, "rewards/rejected": -2.0096733570098877, "step": 5060 }, { "epoch": 0.66, "learning_rate": 1.5353824214114075e-06, "logits/chosen": -1.9251823425292969, "logits/rejected": -1.8641544580459595, "logps/chosen": -418.57318115234375, "logps/rejected": -481.7355041503906, "loss": 0.5363, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.475541353225708, "rewards/margins": 0.7100075483322144, "rewards/rejected": -2.185549020767212, "step": 5070 }, { "epoch": 0.66, "learning_rate": 1.5248547265968373e-06, "logits/chosen": -1.9587377309799194, "logits/rejected": -1.9029967784881592, "logps/chosen": -388.7999572753906, "logps/rejected": -448.08221435546875, "loss": 0.5406, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.3019168376922607, "rewards/margins": 0.6987492442131042, "rewards/rejected": -2.0006661415100098, "step": 5080 }, { "epoch": 0.67, "learning_rate": 1.5143473879759265e-06, "logits/chosen": -1.9306983947753906, "logits/rejected": -1.7760818004608154, "logps/chosen": -393.8701171875, "logps/rejected": -435.2232971191406, "loss": 0.5177, "rewards/accuracies": 0.75, "rewards/chosen": -1.366986632347107, "rewards/margins": 0.8715737462043762, "rewards/rejected": -2.238560199737549, "step": 5090 }, { "epoch": 0.67, "learning_rate": 1.5038606248897586e-06, "logits/chosen": -1.8374249935150146, "logits/rejected": -1.8686059713363647, "logps/chosen": -469.5269470214844, "logps/rejected": -503.837890625, "loss": 0.6176, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.7035410404205322, "rewards/margins": 0.5446879863739014, "rewards/rejected": -2.2482292652130127, "step": 5100 }, { "epoch": 0.67, "eval_logits/chosen": 0.8442540764808655, "eval_logits/rejected": 0.8784592747688293, "eval_logps/chosen": -432.8858947753906, "eval_logps/rejected": -486.6266174316406, "eval_loss": 0.5519185662269592, "eval_rewards/accuracies": 0.7020000219345093, "eval_rewards/chosen": -1.4759317636489868, "eval_rewards/margins": 0.7516381144523621, "eval_rewards/rejected": -2.227569818496704, "eval_runtime": 1172.979, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 5100 }, { "epoch": 0.67, "learning_rate": 1.4933946562499008e-06, "logits/chosen": -1.7316030263900757, "logits/rejected": -1.6031084060668945, "logps/chosen": -421.27056884765625, "logps/rejected": -470.91259765625, "loss": 0.5496, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4366884231567383, "rewards/margins": 0.8011615872383118, "rewards/rejected": -2.2378499507904053, "step": 5110 }, { "epoch": 0.67, "learning_rate": 1.482949700533835e-06, "logits/chosen": -1.6878440380096436, "logits/rejected": -1.6608251333236694, "logps/chosen": -375.3850402832031, "logps/rejected": -426.45477294921875, "loss": 0.5485, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4140172004699707, "rewards/margins": 0.6440494656562805, "rewards/rejected": -2.0580666065216064, "step": 5120 }, { "epoch": 0.67, "learning_rate": 1.4725259757803983e-06, "logits/chosen": -1.8848049640655518, "logits/rejected": -1.8635708093643188, "logps/chosen": -474.72637939453125, "logps/rejected": -510.2239685058594, "loss": 0.567, "rewards/accuracies": 0.75, "rewards/chosen": -1.468737006187439, "rewards/margins": 0.7705806493759155, "rewards/rejected": -2.2393178939819336, "step": 5130 }, { "epoch": 0.67, "learning_rate": 1.4621236995852314e-06, "logits/chosen": -2.061633586883545, "logits/rejected": -1.883298635482788, "logps/chosen": -415.67401123046875, "logps/rejected": -478.1891174316406, "loss": 0.5078, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.371880054473877, "rewards/margins": 0.8724004030227661, "rewards/rejected": -2.2442805767059326, "step": 5140 }, { "epoch": 0.67, "learning_rate": 1.4517430890962337e-06, "logits/chosen": -1.9740339517593384, "logits/rejected": -1.664778709411621, "logps/chosen": -439.51983642578125, "logps/rejected": -422.0973205566406, "loss": 0.4926, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.3358771800994873, "rewards/margins": 0.821506679058075, "rewards/rejected": -2.157384157180786, "step": 5150 }, { "epoch": 0.68, "learning_rate": 1.4413843610090342e-06, "logits/chosen": -1.973301649093628, "logits/rejected": -1.7762012481689453, "logps/chosen": -462.387939453125, "logps/rejected": -499.8759765625, "loss": 0.5439, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.535919427871704, "rewards/margins": 0.7471837997436523, "rewards/rejected": -2.2831029891967773, "step": 5160 }, { "epoch": 0.68, "learning_rate": 1.4310477315624637e-06, "logits/chosen": -1.90434992313385, "logits/rejected": -1.79486083984375, "logps/chosen": -407.06341552734375, "logps/rejected": -462.5298767089844, "loss": 0.659, "rewards/accuracies": 0.6875, "rewards/chosen": -1.5114107131958008, "rewards/margins": 0.5543745756149292, "rewards/rejected": -2.0657854080200195, "step": 5170 }, { "epoch": 0.68, "learning_rate": 1.420733416534045e-06, "logits/chosen": -1.6625850200653076, "logits/rejected": -1.5175690650939941, "logps/chosen": -415.73321533203125, "logps/rejected": -489.0868225097656, "loss": 0.6199, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.5352885723114014, "rewards/margins": 0.6777051687240601, "rewards/rejected": -2.212993860244751, "step": 5180 }, { "epoch": 0.68, "learning_rate": 1.410441631235487e-06, "logits/chosen": -1.9261877536773682, "logits/rejected": -1.7891525030136108, "logps/chosen": -431.77911376953125, "logps/rejected": -491.50311279296875, "loss": 0.5214, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3564847707748413, "rewards/margins": 0.7622567415237427, "rewards/rejected": -2.118741512298584, "step": 5190 }, { "epoch": 0.68, "learning_rate": 1.4001725905081868e-06, "logits/chosen": -1.853712797164917, "logits/rejected": -1.645713210105896, "logps/chosen": -394.0192565917969, "logps/rejected": -418.23992919921875, "loss": 0.5514, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.45644211769104, "rewards/margins": 0.6980880498886108, "rewards/rejected": -2.1545300483703613, "step": 5200 }, { "epoch": 0.68, "eval_logits/chosen": 0.7893780469894409, "eval_logits/rejected": 0.8299470543861389, "eval_logps/chosen": -426.1199645996094, "eval_logps/rejected": -477.4418029785156, "eval_loss": 0.5500012040138245, "eval_rewards/accuracies": 0.7024999856948853, "eval_rewards/chosen": -1.4082725048065186, "eval_rewards/margins": 0.7274492979049683, "eval_rewards/rejected": -2.1357219219207764, "eval_runtime": 1172.9082, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 5200 }, { "epoch": 0.68, "learning_rate": 1.3899265087187507e-06, "logits/chosen": -1.8301036357879639, "logits/rejected": -1.7930876016616821, "logps/chosen": -383.07904052734375, "logps/rejected": -428.0769958496094, "loss": 0.5495, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3355047702789307, "rewards/margins": 0.6709381937980652, "rewards/rejected": -2.0064430236816406, "step": 5210 }, { "epoch": 0.68, "learning_rate": 1.3797035997545144e-06, "logits/chosen": -1.9526773691177368, "logits/rejected": -1.8379061222076416, "logps/chosen": -439.8001403808594, "logps/rejected": -469.07537841796875, "loss": 0.5418, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3511309623718262, "rewards/margins": 0.6671268939971924, "rewards/rejected": -2.0182576179504395, "step": 5220 }, { "epoch": 0.68, "learning_rate": 1.3695040770190816e-06, "logits/chosen": -1.8893228769302368, "logits/rejected": -1.8120670318603516, "logps/chosen": -392.29559326171875, "logps/rejected": -449.6136169433594, "loss": 0.5796, "rewards/accuracies": 0.6875, "rewards/chosen": -1.3194262981414795, "rewards/margins": 0.6628329753875732, "rewards/rejected": -1.9822590351104736, "step": 5230 }, { "epoch": 0.69, "learning_rate": 1.3593281534278651e-06, "logits/chosen": -1.8769222497940063, "logits/rejected": -1.8663833141326904, "logps/chosen": -374.20556640625, "logps/rejected": -466.6195373535156, "loss": 0.4697, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.2972444295883179, "rewards/margins": 0.8416715860366821, "rewards/rejected": -2.138916015625, "step": 5240 }, { "epoch": 0.69, "learning_rate": 1.3491760414036478e-06, "logits/chosen": -1.8334850072860718, "logits/rejected": -1.6556533575057983, "logps/chosen": -448.02227783203125, "logps/rejected": -452.9075622558594, "loss": 0.6063, "rewards/accuracies": 0.625, "rewards/chosen": -1.4142099618911743, "rewards/margins": 0.57100909948349, "rewards/rejected": -1.9852192401885986, "step": 5250 }, { "epoch": 0.69, "learning_rate": 1.3390479528721444e-06, "logits/chosen": -1.7845255136489868, "logits/rejected": -1.7285311222076416, "logps/chosen": -428.2275390625, "logps/rejected": -496.64190673828125, "loss": 0.5979, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5454214811325073, "rewards/margins": 0.6397279500961304, "rewards/rejected": -2.1851494312286377, "step": 5260 }, { "epoch": 0.69, "learning_rate": 1.3289440992575756e-06, "logits/chosen": -1.957970380783081, "logits/rejected": -1.8876816034317017, "logps/chosen": -428.3063049316406, "logps/rejected": -476.18719482421875, "loss": 0.4958, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.2013753652572632, "rewards/margins": 0.7330563068389893, "rewards/rejected": -1.9344314336776733, "step": 5270 }, { "epoch": 0.69, "learning_rate": 1.3188646914782616e-06, "logits/chosen": -2.035910129547119, "logits/rejected": -1.855177640914917, "logps/chosen": -481.1543884277344, "logps/rejected": -454.63922119140625, "loss": 0.5401, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.420890212059021, "rewards/margins": 0.6797800064086914, "rewards/rejected": -2.100670337677002, "step": 5280 }, { "epoch": 0.69, "learning_rate": 1.3088099399422109e-06, "logits/chosen": -1.9418426752090454, "logits/rejected": -1.8563458919525146, "logps/chosen": -439.60595703125, "logps/rejected": -491.9336853027344, "loss": 0.5342, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.406085729598999, "rewards/margins": 0.7221137285232544, "rewards/rejected": -2.128199338912964, "step": 5290 }, { "epoch": 0.69, "learning_rate": 1.2987800545427353e-06, "logits/chosen": -1.963463544845581, "logits/rejected": -1.832273244857788, "logps/chosen": -422.38409423828125, "logps/rejected": -463.7245178222656, "loss": 0.5166, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.283937692642212, "rewards/margins": 0.7820326089859009, "rewards/rejected": -2.0659701824188232, "step": 5300 }, { "epoch": 0.69, "eval_logits/chosen": 0.8065236806869507, "eval_logits/rejected": 0.844149649143219, "eval_logps/chosen": -426.8324279785156, "eval_logps/rejected": -478.9722595214844, "eval_loss": 0.5508423447608948, "eval_rewards/accuracies": 0.7039999961853027, "eval_rewards/chosen": -1.4153975248336792, "eval_rewards/margins": 0.7356284260749817, "eval_rewards/rejected": -2.1510257720947266, "eval_runtime": 1173.231, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 5300 }, { "epoch": 0.69, "learning_rate": 1.288775244654062e-06, "logits/chosen": -1.929620385169983, "logits/rejected": -1.8457822799682617, "logps/chosen": -474.7225646972656, "logps/rejected": -492.4024963378906, "loss": 0.5901, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.4866751432418823, "rewards/margins": 0.6385191679000854, "rewards/rejected": -2.1251943111419678, "step": 5310 }, { "epoch": 0.7, "learning_rate": 1.2787957191269696e-06, "logits/chosen": -1.7664387226104736, "logits/rejected": -1.6823327541351318, "logps/chosen": -432.7451171875, "logps/rejected": -496.489013671875, "loss": 0.6383, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.5256898403167725, "rewards/margins": 0.481570303440094, "rewards/rejected": -2.0072600841522217, "step": 5320 }, { "epoch": 0.7, "learning_rate": 1.2688416862844193e-06, "logits/chosen": -1.6892430782318115, "logits/rejected": -1.6865297555923462, "logps/chosen": -378.4471130371094, "logps/rejected": -495.246826171875, "loss": 0.4466, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.293825387954712, "rewards/margins": 1.037535309791565, "rewards/rejected": -2.3313608169555664, "step": 5330 }, { "epoch": 0.7, "learning_rate": 1.2589133539172193e-06, "logits/chosen": -2.0279300212860107, "logits/rejected": -1.9393310546875, "logps/chosen": -454.46893310546875, "logps/rejected": -485.9325256347656, "loss": 0.4776, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.2705265283584595, "rewards/margins": 0.8261939883232117, "rewards/rejected": -2.0967202186584473, "step": 5340 }, { "epoch": 0.7, "learning_rate": 1.249010929279672e-06, "logits/chosen": -2.0560059547424316, "logits/rejected": -1.9522721767425537, "logps/chosen": -452.753662109375, "logps/rejected": -501.004638671875, "loss": 0.5789, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.441618800163269, "rewards/margins": 0.6563819646835327, "rewards/rejected": -2.098001003265381, "step": 5350 }, { "epoch": 0.7, "learning_rate": 1.2391346190852603e-06, "logits/chosen": -2.059138536453247, "logits/rejected": -1.8753607273101807, "logps/chosen": -434.37322998046875, "logps/rejected": -465.62017822265625, "loss": 0.6552, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.5591915845870972, "rewards/margins": 0.5033558011054993, "rewards/rejected": -2.062547206878662, "step": 5360 }, { "epoch": 0.7, "learning_rate": 1.2292846295023222e-06, "logits/chosen": -1.9328066110610962, "logits/rejected": -1.88934326171875, "logps/chosen": -456.8828125, "logps/rejected": -491.4498596191406, "loss": 0.6035, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.49178147315979, "rewards/margins": 0.5151655077934265, "rewards/rejected": -2.0069470405578613, "step": 5370 }, { "epoch": 0.7, "learning_rate": 1.2194611661497576e-06, "logits/chosen": -1.8160051107406616, "logits/rejected": -1.7239576578140259, "logps/chosen": -415.52423095703125, "logps/rejected": -475.9527893066406, "loss": 0.5137, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.3479664325714111, "rewards/margins": 0.7646690011024475, "rewards/rejected": -2.112635374069214, "step": 5380 }, { "epoch": 0.71, "learning_rate": 1.2096644340927247e-06, "logits/chosen": -1.9325023889541626, "logits/rejected": -1.795840859413147, "logps/chosen": -447.41448974609375, "logps/rejected": -493.99786376953125, "loss": 0.5656, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.4784553050994873, "rewards/margins": 0.6328384280204773, "rewards/rejected": -2.1112937927246094, "step": 5390 }, { "epoch": 0.71, "learning_rate": 1.19989463783837e-06, "logits/chosen": -2.0235092639923096, "logits/rejected": -1.8829200267791748, "logps/chosen": -442.604736328125, "logps/rejected": -510.56304931640625, "loss": 0.4918, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.288726568222046, "rewards/margins": 0.842505931854248, "rewards/rejected": -2.131232500076294, "step": 5400 }, { "epoch": 0.71, "eval_logits/chosen": 0.7905195355415344, "eval_logits/rejected": 0.8312855958938599, "eval_logps/chosen": -426.2182922363281, "eval_logps/rejected": -476.76666259765625, "eval_loss": 0.54958176612854, "eval_rewards/accuracies": 0.7089999914169312, "eval_rewards/chosen": -1.4092553853988647, "eval_rewards/margins": 0.719714343547821, "eval_rewards/rejected": -2.128969669342041, "eval_runtime": 1172.6248, "eval_samples_per_second": 1.706, "eval_steps_per_second": 0.853, "step": 5400 }, { "epoch": 0.71, "learning_rate": 1.1901519813315495e-06, "logits/chosen": -1.752189040184021, "logits/rejected": -1.7029765844345093, "logps/chosen": -406.2033996582031, "logps/rejected": -462.9178161621094, "loss": 0.493, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.382287859916687, "rewards/margins": 0.7878392338752747, "rewards/rejected": -2.1701271533966064, "step": 5410 }, { "epoch": 0.71, "learning_rate": 1.1804366679505798e-06, "logits/chosen": -1.8241612911224365, "logits/rejected": -1.653684377670288, "logps/chosen": -465.00390625, "logps/rejected": -486.6300354003906, "loss": 0.5537, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.5488064289093018, "rewards/margins": 0.7547654509544373, "rewards/rejected": -2.3035717010498047, "step": 5420 }, { "epoch": 0.71, "learning_rate": 1.1707489005029877e-06, "logits/chosen": -1.8216431140899658, "logits/rejected": -1.764282464981079, "logps/chosen": -425.51739501953125, "logps/rejected": -489.0801696777344, "loss": 0.5062, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.4804413318634033, "rewards/margins": 0.888811469078064, "rewards/rejected": -2.3692526817321777, "step": 5430 }, { "epoch": 0.71, "learning_rate": 1.1610888812212749e-06, "logits/chosen": -1.7654953002929688, "logits/rejected": -1.655609130859375, "logps/chosen": -429.49456787109375, "logps/rejected": -489.21337890625, "loss": 0.51, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4289910793304443, "rewards/margins": 0.8090342283248901, "rewards/rejected": -2.238025188446045, "step": 5440 }, { "epoch": 0.71, "learning_rate": 1.1514568117587035e-06, "logits/chosen": -1.829450011253357, "logits/rejected": -1.9704999923706055, "logps/chosen": -452.014892578125, "logps/rejected": -474.6261291503906, "loss": 0.6124, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.6344467401504517, "rewards/margins": 0.4384728968143463, "rewards/rejected": -2.0729196071624756, "step": 5450 }, { "epoch": 0.71, "learning_rate": 1.1418528931850781e-06, "logits/chosen": -1.9476792812347412, "logits/rejected": -1.781757116317749, "logps/chosen": -425.89154052734375, "logps/rejected": -459.1473693847656, "loss": 0.5068, "rewards/accuracies": 0.6875, "rewards/chosen": -1.3089628219604492, "rewards/margins": 0.851481556892395, "rewards/rejected": -2.1604442596435547, "step": 5460 }, { "epoch": 0.72, "learning_rate": 1.1322773259825563e-06, "logits/chosen": -1.9182113409042358, "logits/rejected": -1.6885206699371338, "logps/chosen": -441.8060607910156, "logps/rejected": -442.9778747558594, "loss": 0.5254, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.4591407775878906, "rewards/margins": 0.6979411840438843, "rewards/rejected": -2.1570820808410645, "step": 5470 }, { "epoch": 0.72, "learning_rate": 1.1227303100414552e-06, "logits/chosen": -1.796644926071167, "logits/rejected": -1.7756593227386475, "logps/chosen": -386.8682556152344, "logps/rejected": -492.8446350097656, "loss": 0.4757, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.478819489479065, "rewards/margins": 0.8811575174331665, "rewards/rejected": -2.3599770069122314, "step": 5480 }, { "epoch": 0.72, "learning_rate": 1.113212044656087e-06, "logits/chosen": -1.7989683151245117, "logits/rejected": -1.7970603704452515, "logps/chosen": -397.41998291015625, "logps/rejected": -476.31024169921875, "loss": 0.5683, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4304078817367554, "rewards/margins": 0.6791922450065613, "rewards/rejected": -2.109600067138672, "step": 5490 }, { "epoch": 0.72, "learning_rate": 1.1037227285205951e-06, "logits/chosen": -1.678154706954956, "logits/rejected": -1.7668046951293945, "logps/chosen": -427.35101318359375, "logps/rejected": -486.7571716308594, "loss": 0.596, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.5133135318756104, "rewards/margins": 0.5993794202804565, "rewards/rejected": -2.1126928329467773, "step": 5500 }, { "epoch": 0.72, "eval_logits/chosen": 0.8238762617111206, "eval_logits/rejected": 0.8632076978683472, "eval_logps/chosen": -434.1884765625, "eval_logps/rejected": -486.0820617675781, "eval_loss": 0.548935055732727, "eval_rewards/accuracies": 0.7074999809265137, "eval_rewards/chosen": -1.4889572858810425, "eval_rewards/margins": 0.7331663370132446, "eval_rewards/rejected": -2.222123622894287, "eval_runtime": 1173.5009, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 5500 }, { "epoch": 0.72, "learning_rate": 1.0942625597248028e-06, "logits/chosen": -1.771903395652771, "logits/rejected": -1.5377171039581299, "logps/chosen": -414.85784912109375, "logps/rejected": -461.81634521484375, "loss": 0.4904, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4489622116088867, "rewards/margins": 0.8897550702095032, "rewards/rejected": -2.338717222213745, "step": 5510 }, { "epoch": 0.72, "learning_rate": 1.0848317357500854e-06, "logits/chosen": -1.8069961071014404, "logits/rejected": -1.700169324874878, "logps/chosen": -461.0796813964844, "logps/rejected": -446.8509826660156, "loss": 0.5895, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.5638563632965088, "rewards/margins": 0.5624241828918457, "rewards/rejected": -2.1262803077697754, "step": 5520 }, { "epoch": 0.72, "learning_rate": 1.0754304534652404e-06, "logits/chosen": -1.7843172550201416, "logits/rejected": -1.8855609893798828, "logps/chosen": -430.64056396484375, "logps/rejected": -515.0929565429688, "loss": 0.5999, "rewards/accuracies": 0.6875, "rewards/chosen": -1.5967886447906494, "rewards/margins": 0.5139278173446655, "rewards/rejected": -2.1107165813446045, "step": 5530 }, { "epoch": 0.72, "learning_rate": 1.0660589091223854e-06, "logits/chosen": -1.6621599197387695, "logits/rejected": -1.7448629140853882, "logps/chosen": -377.0594482421875, "logps/rejected": -441.2286071777344, "loss": 0.5291, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.4500750303268433, "rewards/margins": 0.7363721132278442, "rewards/rejected": -2.1864471435546875, "step": 5540 }, { "epoch": 0.73, "learning_rate": 1.0567172983528534e-06, "logits/chosen": -1.7251408100128174, "logits/rejected": -1.5952707529067993, "logps/chosen": -363.9090576171875, "logps/rejected": -439.10638427734375, "loss": 0.474, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.3747875690460205, "rewards/margins": 0.7879632711410522, "rewards/rejected": -2.162750720977783, "step": 5550 }, { "epoch": 0.73, "learning_rate": 1.0474058161631168e-06, "logits/chosen": -1.884746789932251, "logits/rejected": -1.881100058555603, "logps/chosen": -481.223876953125, "logps/rejected": -511.8279724121094, "loss": 0.6107, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.501129388809204, "rewards/margins": 0.5625635981559753, "rewards/rejected": -2.063692808151245, "step": 5560 }, { "epoch": 0.73, "learning_rate": 1.0381246569307077e-06, "logits/chosen": -1.9864051342010498, "logits/rejected": -1.8679383993148804, "logps/chosen": -462.429443359375, "logps/rejected": -480.8426818847656, "loss": 0.5692, "rewards/accuracies": 0.75, "rewards/chosen": -1.4590893983840942, "rewards/margins": 0.5718271732330322, "rewards/rejected": -2.030916690826416, "step": 5570 }, { "epoch": 0.73, "learning_rate": 1.0288740144001722e-06, "logits/chosen": -2.014369249343872, "logits/rejected": -1.766344428062439, "logps/chosen": -424.77685546875, "logps/rejected": -442.02764892578125, "loss": 0.5989, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.4355696439743042, "rewards/margins": 0.6431950926780701, "rewards/rejected": -2.0787649154663086, "step": 5580 }, { "epoch": 0.73, "learning_rate": 1.0196540816790127e-06, "logits/chosen": -1.748151183128357, "logits/rejected": -1.6582529544830322, "logps/chosen": -391.3197021484375, "logps/rejected": -418.3585510253906, "loss": 0.5349, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.3962318897247314, "rewards/margins": 0.6935681700706482, "rewards/rejected": -2.0897998809814453, "step": 5590 }, { "epoch": 0.73, "learning_rate": 1.0104650512336679e-06, "logits/chosen": -2.043593168258667, "logits/rejected": -1.8819385766983032, "logps/chosen": -424.79779052734375, "logps/rejected": -437.5965881347656, "loss": 0.6034, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.3628209829330444, "rewards/margins": 0.634640634059906, "rewards/rejected": -1.9974616765975952, "step": 5600 }, { "epoch": 0.73, "eval_logits/chosen": 0.7560797333717346, "eval_logits/rejected": 0.8040981292724609, "eval_logps/chosen": -425.7730407714844, "eval_logps/rejected": -477.2521667480469, "eval_loss": 0.5488966703414917, "eval_rewards/accuracies": 0.7064999938011169, "eval_rewards/chosen": -1.4048031568527222, "eval_rewards/margins": 0.7290219664573669, "eval_rewards/rejected": -2.1338253021240234, "eval_runtime": 1173.6172, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 5600 }, { "epoch": 0.73, "learning_rate": 1.0013071148854861e-06, "logits/chosen": -1.779545545578003, "logits/rejected": -1.77289617061615, "logps/chosen": -388.35260009765625, "logps/rejected": -488.19500732421875, "loss": 0.4682, "rewards/accuracies": 0.75, "rewards/chosen": -1.3467668294906616, "rewards/margins": 0.9838630557060242, "rewards/rejected": -2.330629825592041, "step": 5610 }, { "epoch": 0.74, "learning_rate": 9.921804638067292e-07, "logits/chosen": -1.8865807056427002, "logits/rejected": -1.6680485010147095, "logps/chosen": -441.1761169433594, "logps/rejected": -476.1258850097656, "loss": 0.5362, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.551322340965271, "rewards/margins": 0.755243182182312, "rewards/rejected": -2.306565761566162, "step": 5620 }, { "epoch": 0.74, "learning_rate": 9.830852885165749e-07, "logits/chosen": -1.747839331626892, "logits/rejected": -1.9316389560699463, "logps/chosen": -381.62451171875, "logps/rejected": -481.34161376953125, "loss": 0.5668, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4812041521072388, "rewards/margins": 0.6032929420471191, "rewards/rejected": -2.0844969749450684, "step": 5630 }, { "epoch": 0.74, "learning_rate": 9.740217788771453e-07, "logits/chosen": -1.8652489185333252, "logits/rejected": -1.7647807598114014, "logps/chosen": -425.3164978027344, "logps/rejected": -458.489501953125, "loss": 0.535, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.3008569478988647, "rewards/margins": 0.7232750654220581, "rewards/rejected": -2.024132013320923, "step": 5640 }, { "epoch": 0.74, "learning_rate": 9.649901240895374e-07, "logits/chosen": -1.6762669086456299, "logits/rejected": -1.7178618907928467, "logps/chosen": -401.3484802246094, "logps/rejected": -476.75885009765625, "loss": 0.5098, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.4109026193618774, "rewards/margins": 0.8481072187423706, "rewards/rejected": -2.259009599685669, "step": 5650 }, { "epoch": 0.74, "learning_rate": 9.559905126898803e-07, "logits/chosen": -1.8930237293243408, "logits/rejected": -1.6374887228012085, "logps/chosen": -427.38232421875, "logps/rejected": -483.18170166015625, "loss": 0.4217, "rewards/accuracies": 0.8125, "rewards/chosen": -1.3818823099136353, "rewards/margins": 0.9239101409912109, "rewards/rejected": -2.3057923316955566, "step": 5660 }, { "epoch": 0.74, "learning_rate": 9.470231325453958e-07, "logits/chosen": -1.7371124029159546, "logits/rejected": -1.5872479677200317, "logps/chosen": -423.66546630859375, "logps/rejected": -459.2461853027344, "loss": 0.5601, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4602539539337158, "rewards/margins": 0.718927800655365, "rewards/rejected": -2.1791815757751465, "step": 5670 }, { "epoch": 0.74, "learning_rate": 9.380881708504741e-07, "logits/chosen": -1.709235429763794, "logits/rejected": -1.5888019800186157, "logps/chosen": -376.9496154785156, "logps/rejected": -422.35467529296875, "loss": 0.531, "rewards/accuracies": 0.6875, "rewards/chosen": -1.370224952697754, "rewards/margins": 0.7989784479141235, "rewards/rejected": -2.169203281402588, "step": 5680 }, { "epoch": 0.74, "learning_rate": 9.291858141227733e-07, "logits/chosen": -1.8022798299789429, "logits/rejected": -1.8127330541610718, "logps/chosen": -404.7644958496094, "logps/rejected": -518.0156860351562, "loss": 0.4482, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.3747928142547607, "rewards/margins": 0.9949323534965515, "rewards/rejected": -2.369725227355957, "step": 5690 }, { "epoch": 0.75, "learning_rate": 9.203162481993175e-07, "logits/chosen": -1.9120906591415405, "logits/rejected": -1.8126256465911865, "logps/chosen": -458.3221130371094, "logps/rejected": -549.8818969726562, "loss": 0.4793, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.4057399034500122, "rewards/margins": 1.0536389350891113, "rewards/rejected": -2.459378719329834, "step": 5700 }, { "epoch": 0.75, "eval_logits/chosen": 0.8544980883598328, "eval_logits/rejected": 0.8918463587760925, "eval_logps/chosen": -435.4676208496094, "eval_logps/rejected": -489.2809143066406, "eval_loss": 0.5494768023490906, "eval_rewards/accuracies": 0.7080000042915344, "eval_rewards/chosen": -1.5017492771148682, "eval_rewards/margins": 0.7523629069328308, "eval_rewards/rejected": -2.2541120052337646, "eval_runtime": 1173.0208, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 5700 }, { "epoch": 0.75, "learning_rate": 9.114796582326255e-07, "logits/chosen": -2.0369105339050293, "logits/rejected": -1.7664175033569336, "logps/chosen": -435.7451171875, "logps/rejected": -467.1946716308594, "loss": 0.5717, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.6470935344696045, "rewards/margins": 0.6342312693595886, "rewards/rejected": -2.281324863433838, "step": 5710 }, { "epoch": 0.75, "learning_rate": 9.026762286868373e-07, "logits/chosen": -1.9730746746063232, "logits/rejected": -1.9558706283569336, "logps/chosen": -419.619384765625, "logps/rejected": -524.6027221679688, "loss": 0.5154, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4232149124145508, "rewards/margins": 0.9718655347824097, "rewards/rejected": -2.39508056640625, "step": 5720 }, { "epoch": 0.75, "learning_rate": 8.939061433338722e-07, "logits/chosen": -1.7903960943222046, "logits/rejected": -1.7915462255477905, "logps/chosen": -440.1333923339844, "logps/rejected": -487.67620849609375, "loss": 0.6097, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.5487083196640015, "rewards/margins": 0.5043095350265503, "rewards/rejected": -2.0530178546905518, "step": 5730 }, { "epoch": 0.75, "learning_rate": 8.851695852495867e-07, "logits/chosen": -1.7867329120635986, "logits/rejected": -1.8247419595718384, "logps/chosen": -390.4758605957031, "logps/rejected": -478.8408203125, "loss": 0.554, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4541441202163696, "rewards/margins": 0.8417972326278687, "rewards/rejected": -2.2959413528442383, "step": 5740 }, { "epoch": 0.75, "learning_rate": 8.764667368099525e-07, "logits/chosen": -1.7342529296875, "logits/rejected": -1.6524286270141602, "logps/chosen": -415.9396057128906, "logps/rejected": -469.9048767089844, "loss": 0.5183, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.5002275705337524, "rewards/margins": 0.7854744791984558, "rewards/rejected": -2.2857022285461426, "step": 5750 }, { "epoch": 0.75, "learning_rate": 8.677977796872541e-07, "logits/chosen": -1.7585633993148804, "logits/rejected": -1.5713212490081787, "logps/chosen": -468.2236328125, "logps/rejected": -477.36895751953125, "loss": 0.5981, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.6817764043807983, "rewards/margins": 0.7200533151626587, "rewards/rejected": -2.401829719543457, "step": 5760 }, { "epoch": 0.76, "learning_rate": 8.591628948462913e-07, "logits/chosen": -1.5852441787719727, "logits/rejected": -1.4527372121810913, "logps/chosen": -446.5345764160156, "logps/rejected": -523.7599487304688, "loss": 0.5773, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.5635210275650024, "rewards/margins": 0.7766464948654175, "rewards/rejected": -2.34016752243042, "step": 5770 }, { "epoch": 0.76, "learning_rate": 8.505622625406054e-07, "logits/chosen": -1.7319908142089844, "logits/rejected": -1.716835379600525, "logps/chosen": -430.82904052734375, "logps/rejected": -505.0128479003906, "loss": 0.5574, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.5698482990264893, "rewards/margins": 0.7734547257423401, "rewards/rejected": -2.3433032035827637, "step": 5780 }, { "epoch": 0.76, "learning_rate": 8.419960623087129e-07, "logits/chosen": -1.5350911617279053, "logits/rejected": -1.5504592657089233, "logps/chosen": -367.9596862792969, "logps/rejected": -462.3741149902344, "loss": 0.5646, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4081454277038574, "rewards/margins": 0.6708158254623413, "rewards/rejected": -2.0789613723754883, "step": 5790 }, { "epoch": 0.76, "learning_rate": 8.334644729703617e-07, "logits/chosen": -1.7765905857086182, "logits/rejected": -1.7603641748428345, "logps/chosen": -389.497314453125, "logps/rejected": -474.2781677246094, "loss": 0.5164, "rewards/accuracies": 0.75, "rewards/chosen": -1.4255878925323486, "rewards/margins": 0.8293389081954956, "rewards/rejected": -2.2549266815185547, "step": 5800 }, { "epoch": 0.76, "eval_logits/chosen": 0.8885383009910583, "eval_logits/rejected": 0.9220978617668152, "eval_logps/chosen": -440.76849365234375, "eval_logps/rejected": -496.01495361328125, "eval_loss": 0.5497148633003235, "eval_rewards/accuracies": 0.7085000276565552, "eval_rewards/chosen": -1.5547575950622559, "eval_rewards/margins": 0.7666952013969421, "eval_rewards/rejected": -2.321453094482422, "eval_runtime": 1172.8418, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 5800 }, { "epoch": 0.76, "learning_rate": 8.249676726227931e-07, "logits/chosen": -1.6978380680084229, "logits/rejected": -1.7133516073226929, "logps/chosen": -496.34356689453125, "logps/rejected": -496.5018615722656, "loss": 0.6626, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.8182287216186523, "rewards/margins": 0.40940380096435547, "rewards/rejected": -2.2276322841644287, "step": 5810 }, { "epoch": 0.76, "learning_rate": 8.165058386370314e-07, "logits/chosen": -1.6891844272613525, "logits/rejected": -1.6935393810272217, "logps/chosen": -435.996337890625, "logps/rejected": -526.1800537109375, "loss": 0.5456, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.5769296884536743, "rewards/margins": 0.7456473112106323, "rewards/rejected": -2.3225769996643066, "step": 5820 }, { "epoch": 0.76, "learning_rate": 8.080791476541721e-07, "logits/chosen": -1.7240186929702759, "logits/rejected": -1.7112128734588623, "logps/chosen": -397.134521484375, "logps/rejected": -465.83990478515625, "loss": 0.4912, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4226911067962646, "rewards/margins": 0.9041608572006226, "rewards/rejected": -2.3268518447875977, "step": 5830 }, { "epoch": 0.76, "learning_rate": 7.996877755817026e-07, "logits/chosen": -1.8242250680923462, "logits/rejected": -1.7658584117889404, "logps/chosen": -432.068115234375, "logps/rejected": -438.3821716308594, "loss": 0.6201, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.6332203149795532, "rewards/margins": 0.39267483353614807, "rewards/rejected": -2.025895118713379, "step": 5840 }, { "epoch": 0.77, "learning_rate": 7.913318975898238e-07, "logits/chosen": -1.9215571880340576, "logits/rejected": -1.667769193649292, "logps/chosen": -510.7637634277344, "logps/rejected": -530.4691772460938, "loss": 0.5864, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.6410611867904663, "rewards/margins": 0.7761032581329346, "rewards/rejected": -2.4171645641326904, "step": 5850 }, { "epoch": 0.77, "learning_rate": 7.830116881077992e-07, "logits/chosen": -1.7714402675628662, "logits/rejected": -1.5833399295806885, "logps/chosen": -444.66864013671875, "logps/rejected": -507.297607421875, "loss": 0.4912, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.451790452003479, "rewards/margins": 0.8779904246330261, "rewards/rejected": -2.3297810554504395, "step": 5860 }, { "epoch": 0.77, "learning_rate": 7.747273208203096e-07, "logits/chosen": -1.7608497142791748, "logits/rejected": -1.6446444988250732, "logps/chosen": -463.907958984375, "logps/rejected": -553.3729248046875, "loss": 0.5506, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.7586644887924194, "rewards/margins": 0.8479653596878052, "rewards/rejected": -2.6066298484802246, "step": 5870 }, { "epoch": 0.77, "learning_rate": 7.664789686638272e-07, "logits/chosen": -1.7554763555526733, "logits/rejected": -1.5306179523468018, "logps/chosen": -419.94183349609375, "logps/rejected": -517.2022094726562, "loss": 0.5401, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.49278724193573, "rewards/margins": 0.9025591015815735, "rewards/rejected": -2.395346164703369, "step": 5880 }, { "epoch": 0.77, "learning_rate": 7.582668038230089e-07, "logits/chosen": -2.0004661083221436, "logits/rejected": -1.8577114343643188, "logps/chosen": -443.9126892089844, "logps/rejected": -507.6675720214844, "loss": 0.5518, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4959663152694702, "rewards/margins": 0.8871985673904419, "rewards/rejected": -2.383164882659912, "step": 5890 }, { "epoch": 0.77, "learning_rate": 7.500909977271007e-07, "logits/chosen": -1.8355262279510498, "logits/rejected": -1.774918556213379, "logps/chosen": -470.26348876953125, "logps/rejected": -520.8164672851562, "loss": 0.6164, "rewards/accuracies": 0.6875, "rewards/chosen": -1.6989784240722656, "rewards/margins": 0.7304049730300903, "rewards/rejected": -2.4293832778930664, "step": 5900 }, { "epoch": 0.77, "eval_logits/chosen": 0.8645352721214294, "eval_logits/rejected": 0.8986992835998535, "eval_logps/chosen": -438.6431884765625, "eval_logps/rejected": -492.7100830078125, "eval_loss": 0.549113392829895, "eval_rewards/accuracies": 0.7080000042915344, "eval_rewards/chosen": -1.5335044860839844, "eval_rewards/margins": 0.754899799823761, "eval_rewards/rejected": -2.2884044647216797, "eval_runtime": 1172.6923, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 5900 }, { "epoch": 0.77, "learning_rate": 7.41951721046357e-07, "logits/chosen": -1.773479700088501, "logits/rejected": -1.5404237508773804, "logps/chosen": -413.2481384277344, "logps/rejected": -473.30511474609375, "loss": 0.5612, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4232587814331055, "rewards/margins": 0.6826823353767395, "rewards/rejected": -2.1059410572052, "step": 5910 }, { "epoch": 0.77, "learning_rate": 7.338491436884787e-07, "logits/chosen": -1.734042763710022, "logits/rejected": -1.6976509094238281, "logps/chosen": -414.5589294433594, "logps/rejected": -492.0824279785156, "loss": 0.5474, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.6448618173599243, "rewards/margins": 0.7069370150566101, "rewards/rejected": -2.3517985343933105, "step": 5920 }, { "epoch": 0.78, "learning_rate": 7.257834347950693e-07, "logits/chosen": -1.7987234592437744, "logits/rejected": -1.587146520614624, "logps/chosen": -432.47119140625, "logps/rejected": -452.37841796875, "loss": 0.62, "rewards/accuracies": 0.625, "rewards/chosen": -1.552919626235962, "rewards/margins": 0.5302165746688843, "rewards/rejected": -2.0831360816955566, "step": 5930 }, { "epoch": 0.78, "learning_rate": 7.177547627380987e-07, "logits/chosen": -1.802987813949585, "logits/rejected": -1.7997875213623047, "logps/chosen": -448.490234375, "logps/rejected": -509.35107421875, "loss": 0.4917, "rewards/accuracies": 0.75, "rewards/chosen": -1.473778486251831, "rewards/margins": 0.7411264181137085, "rewards/rejected": -2.21490478515625, "step": 5940 }, { "epoch": 0.78, "learning_rate": 7.097632951163949e-07, "logits/chosen": -1.8216888904571533, "logits/rejected": -1.6682456731796265, "logps/chosen": -466.199462890625, "logps/rejected": -504.44537353515625, "loss": 0.5822, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.5226764678955078, "rewards/margins": 0.7720099687576294, "rewards/rejected": -2.2946863174438477, "step": 5950 }, { "epoch": 0.78, "learning_rate": 7.018091987521386e-07, "logits/chosen": -1.9580612182617188, "logits/rejected": -1.79523503780365, "logps/chosen": -450.51348876953125, "logps/rejected": -491.3302307128906, "loss": 0.5887, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.6182911396026611, "rewards/margins": 0.6931548714637756, "rewards/rejected": -2.311445713043213, "step": 5960 }, { "epoch": 0.78, "learning_rate": 6.93892639687386e-07, "logits/chosen": -1.9790513515472412, "logits/rejected": -1.8413312435150146, "logps/chosen": -460.31689453125, "logps/rejected": -458.7654724121094, "loss": 0.6095, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4465421438217163, "rewards/margins": 0.5680855512619019, "rewards/rejected": -2.014627695083618, "step": 5970 }, { "epoch": 0.78, "learning_rate": 6.860137831806018e-07, "logits/chosen": -1.7386270761489868, "logits/rejected": -1.6823354959487915, "logps/chosen": -449.7940368652344, "logps/rejected": -483.63800048828125, "loss": 0.5787, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4916565418243408, "rewards/margins": 0.7418898344039917, "rewards/rejected": -2.233546257019043, "step": 5980 }, { "epoch": 0.78, "learning_rate": 6.781727937032054e-07, "logits/chosen": -1.7121458053588867, "logits/rejected": -1.5884407758712769, "logps/chosen": -407.84271240234375, "logps/rejected": -500.04986572265625, "loss": 0.4405, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.3468246459960938, "rewards/margins": 0.9939699172973633, "rewards/rejected": -2.340794563293457, "step": 5990 }, { "epoch": 0.79, "learning_rate": 6.703698349361437e-07, "logits/chosen": -1.857116460800171, "logits/rejected": -1.6669254302978516, "logps/chosen": -414.12078857421875, "logps/rejected": -444.47564697265625, "loss": 0.5347, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4424694776535034, "rewards/margins": 0.7128639817237854, "rewards/rejected": -2.1553330421447754, "step": 6000 }, { "epoch": 0.79, "eval_logits/chosen": 0.8396689295768738, "eval_logits/rejected": 0.8765884637832642, "eval_logps/chosen": -435.57208251953125, "eval_logps/rejected": -488.74273681640625, "eval_loss": 0.5487044453620911, "eval_rewards/accuracies": 0.7105000019073486, "eval_rewards/chosen": -1.5027936697006226, "eval_rewards/margins": 0.7459368109703064, "eval_rewards/rejected": -2.2487306594848633, "eval_runtime": 1172.9793, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 6000 }, { "epoch": 0.79, "learning_rate": 6.626050697664682e-07, "logits/chosen": -1.7867813110351562, "logits/rejected": -1.6846868991851807, "logps/chosen": -441.70672607421875, "logps/rejected": -468.6539001464844, "loss": 0.526, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5382730960845947, "rewards/margins": 0.7291477918624878, "rewards/rejected": -2.267420530319214, "step": 6010 }, { "epoch": 0.79, "learning_rate": 6.548786602839404e-07, "logits/chosen": -1.7776895761489868, "logits/rejected": -1.759871244430542, "logps/chosen": -385.42010498046875, "logps/rejected": -454.4854431152344, "loss": 0.458, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.3485528230667114, "rewards/margins": 0.991489589214325, "rewards/rejected": -2.3400423526763916, "step": 6020 }, { "epoch": 0.79, "learning_rate": 6.471907677776426e-07, "logits/chosen": -1.9682811498641968, "logits/rejected": -1.8500810861587524, "logps/chosen": -446.57818603515625, "logps/rejected": -465.53302001953125, "loss": 0.5939, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4418151378631592, "rewards/margins": 0.6426098942756653, "rewards/rejected": -2.0844249725341797, "step": 6030 }, { "epoch": 0.79, "learning_rate": 6.39541552732617e-07, "logits/chosen": -1.8172651529312134, "logits/rejected": -1.7830369472503662, "logps/chosen": -446.30657958984375, "logps/rejected": -528.9716186523438, "loss": 0.5815, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.6189403533935547, "rewards/margins": 0.6232365965843201, "rewards/rejected": -2.2421767711639404, "step": 6040 }, { "epoch": 0.79, "learning_rate": 6.319311748265086e-07, "logits/chosen": -1.8408966064453125, "logits/rejected": -1.5872822999954224, "logps/chosen": -526.5015869140625, "logps/rejected": -542.4791259765625, "loss": 0.5483, "rewards/accuracies": 0.75, "rewards/chosen": -1.5981743335723877, "rewards/margins": 0.7914873361587524, "rewards/rejected": -2.3896615505218506, "step": 6050 }, { "epoch": 0.79, "learning_rate": 6.243597929262404e-07, "logits/chosen": -1.792720079421997, "logits/rejected": -1.552986979484558, "logps/chosen": -375.17608642578125, "logps/rejected": -522.9835205078125, "loss": 0.5579, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.538088321685791, "rewards/margins": 1.001598596572876, "rewards/rejected": -2.539686918258667, "step": 6060 }, { "epoch": 0.79, "learning_rate": 6.168275650846875e-07, "logits/chosen": -1.867114782333374, "logits/rejected": -1.8291406631469727, "logps/chosen": -454.90289306640625, "logps/rejected": -469.98162841796875, "loss": 0.5895, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4467754364013672, "rewards/margins": 0.6458438634872437, "rewards/rejected": -2.0926194190979004, "step": 6070 }, { "epoch": 0.8, "learning_rate": 6.093346485373863e-07, "logits/chosen": -1.8101370334625244, "logits/rejected": -1.6821973323822021, "logps/chosen": -459.3401794433594, "logps/rejected": -492.3536071777344, "loss": 0.538, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.5773446559906006, "rewards/margins": 0.6525165438652039, "rewards/rejected": -2.229861259460449, "step": 6080 }, { "epoch": 0.8, "learning_rate": 6.018811996992455e-07, "logits/chosen": -1.8199021816253662, "logits/rejected": -1.6964166164398193, "logps/chosen": -426.5926208496094, "logps/rejected": -489.61199951171875, "loss": 0.4093, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.2519571781158447, "rewards/margins": 1.1711586713790894, "rewards/rejected": -2.4231159687042236, "step": 6090 }, { "epoch": 0.8, "learning_rate": 5.944673741612866e-07, "logits/chosen": -1.8592021465301514, "logits/rejected": -1.814186453819275, "logps/chosen": -465.5372009277344, "logps/rejected": -520.5704345703125, "loss": 0.56, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.6353585720062256, "rewards/margins": 0.6319504380226135, "rewards/rejected": -2.2673091888427734, "step": 6100 }, { "epoch": 0.8, "eval_logits/chosen": 0.8248075842857361, "eval_logits/rejected": 0.8642656803131104, "eval_logps/chosen": -433.8428955078125, "eval_logps/rejected": -487.2425842285156, "eval_loss": 0.5491208434104919, "eval_rewards/accuracies": 0.7105000019073486, "eval_rewards/chosen": -1.4855022430419922, "eval_rewards/margins": 0.7482272386550903, "eval_rewards/rejected": -2.233729600906372, "eval_runtime": 1173.0295, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 6100 }, { "epoch": 0.8, "learning_rate": 5.870933266873916e-07, "logits/chosen": -1.8650023937225342, "logits/rejected": -1.7973130941390991, "logps/chosen": -394.88909912109375, "logps/rejected": -456.81024169921875, "loss": 0.625, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4461156129837036, "rewards/margins": 0.5903973579406738, "rewards/rejected": -2.036512851715088, "step": 6110 }, { "epoch": 0.8, "learning_rate": 5.797592112110734e-07, "logits/chosen": -1.6772758960723877, "logits/rejected": -1.6647506952285767, "logps/chosen": -378.04296875, "logps/rejected": -415.96343994140625, "loss": 0.653, "rewards/accuracies": 0.6875, "rewards/chosen": -1.527093529701233, "rewards/margins": 0.5279368162155151, "rewards/rejected": -2.055030345916748, "step": 6120 }, { "epoch": 0.8, "learning_rate": 5.724651808322645e-07, "logits/chosen": -1.7247283458709717, "logits/rejected": -1.7005923986434937, "logps/chosen": -404.6063232421875, "logps/rejected": -508.9358825683594, "loss": 0.4811, "rewards/accuracies": 0.75, "rewards/chosen": -1.3675364255905151, "rewards/margins": 0.8878251910209656, "rewards/rejected": -2.255361557006836, "step": 6130 }, { "epoch": 0.8, "learning_rate": 5.652113878141194e-07, "logits/chosen": -1.7346999645233154, "logits/rejected": -1.5381842851638794, "logps/chosen": -362.3267517089844, "logps/rejected": -425.4991760253906, "loss": 0.5472, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.4318227767944336, "rewards/margins": 0.6716090440750122, "rewards/rejected": -2.103431463241577, "step": 6140 }, { "epoch": 0.8, "learning_rate": 5.579979835798361e-07, "logits/chosen": -1.8034861087799072, "logits/rejected": -1.7127468585968018, "logps/chosen": -408.5749816894531, "logps/rejected": -490.4581604003906, "loss": 0.4916, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.4602086544036865, "rewards/margins": 0.8907852172851562, "rewards/rejected": -2.3509938716888428, "step": 6150 }, { "epoch": 0.81, "learning_rate": 5.508251187094932e-07, "logits/chosen": -1.9209445714950562, "logits/rejected": -1.8475583791732788, "logps/chosen": -458.3445739746094, "logps/rejected": -470.88262939453125, "loss": 0.6092, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.5180823802947998, "rewards/margins": 0.6414216756820679, "rewards/rejected": -2.1595041751861572, "step": 6160 }, { "epoch": 0.81, "learning_rate": 5.436929429369122e-07, "logits/chosen": -1.7689403295516968, "logits/rejected": -1.6730201244354248, "logps/chosen": -408.4197692871094, "logps/rejected": -456.3441467285156, "loss": 0.5689, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4721378087997437, "rewards/margins": 0.6763318777084351, "rewards/rejected": -2.1484696865081787, "step": 6170 }, { "epoch": 0.81, "learning_rate": 5.366016051465245e-07, "logits/chosen": -1.8204491138458252, "logits/rejected": -1.7095301151275635, "logps/chosen": -408.523193359375, "logps/rejected": -497.50860595703125, "loss": 0.4561, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.390578269958496, "rewards/margins": 0.9562222361564636, "rewards/rejected": -2.3468003273010254, "step": 6180 }, { "epoch": 0.81, "learning_rate": 5.295512533702701e-07, "logits/chosen": -1.8240352869033813, "logits/rejected": -1.6943086385726929, "logps/chosen": -389.2616271972656, "logps/rejected": -451.5743103027344, "loss": 0.5642, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.4481045007705688, "rewards/margins": 0.6837536096572876, "rewards/rejected": -2.1318581104278564, "step": 6190 }, { "epoch": 0.81, "learning_rate": 5.225420347845023e-07, "logits/chosen": -1.8606281280517578, "logits/rejected": -1.7882606983184814, "logps/chosen": -444.04937744140625, "logps/rejected": -502.1737365722656, "loss": 0.587, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4267102479934692, "rewards/margins": 0.74085932970047, "rewards/rejected": -2.167569637298584, "step": 6200 }, { "epoch": 0.81, "eval_logits/chosen": 0.8071790933609009, "eval_logits/rejected": 0.8489038944244385, "eval_logps/chosen": -431.671142578125, "eval_logps/rejected": -484.97882080078125, "eval_loss": 0.549113392829895, "eval_rewards/accuracies": 0.7095000147819519, "eval_rewards/chosen": -1.4637844562530518, "eval_rewards/margins": 0.7473068237304688, "eval_rewards/rejected": -2.2110912799835205, "eval_runtime": 1172.7765, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 6200 }, { "epoch": 0.81, "learning_rate": 5.155740957069186e-07, "logits/chosen": -1.9389774799346924, "logits/rejected": -1.954559564590454, "logps/chosen": -434.64599609375, "logps/rejected": -488.9850158691406, "loss": 0.5066, "rewards/accuracies": 0.75, "rewards/chosen": -1.4530550241470337, "rewards/margins": 0.8636603355407715, "rewards/rejected": -2.3167154788970947, "step": 6210 }, { "epoch": 0.81, "learning_rate": 5.08647581593506e-07, "logits/chosen": -1.7193613052368164, "logits/rejected": -1.6669191122055054, "logps/chosen": -404.49835205078125, "logps/rejected": -478.39080810546875, "loss": 0.4748, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.2635455131530762, "rewards/margins": 0.8986026048660278, "rewards/rejected": -2.1621482372283936, "step": 6220 }, { "epoch": 0.82, "learning_rate": 5.017626370355014e-07, "logits/chosen": -1.7605371475219727, "logits/rejected": -1.5838454961776733, "logps/chosen": -415.41375732421875, "logps/rejected": -465.14178466796875, "loss": 0.4511, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.3500653505325317, "rewards/margins": 0.9630001783370972, "rewards/rejected": -2.31306529045105, "step": 6230 }, { "epoch": 0.82, "learning_rate": 4.949194057563783e-07, "logits/chosen": -1.8982305526733398, "logits/rejected": -1.763641357421875, "logps/chosen": -435.21533203125, "logps/rejected": -451.747314453125, "loss": 0.614, "rewards/accuracies": 0.6875, "rewards/chosen": -1.5065052509307861, "rewards/margins": 0.5958704352378845, "rewards/rejected": -2.1023757457733154, "step": 6240 }, { "epoch": 0.82, "learning_rate": 4.881180306088418e-07, "logits/chosen": -1.8761934041976929, "logits/rejected": -1.6820194721221924, "logps/chosen": -433.70941162109375, "logps/rejected": -476.25091552734375, "loss": 0.4914, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.3821966648101807, "rewards/margins": 0.9251546859741211, "rewards/rejected": -2.3073513507843018, "step": 6250 }, { "epoch": 0.82, "learning_rate": 4.813586535718512e-07, "logits/chosen": -1.9150760173797607, "logits/rejected": -1.6301143169403076, "logps/chosen": -467.3033142089844, "logps/rejected": -483.08721923828125, "loss": 0.5139, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.407266616821289, "rewards/margins": 0.9681908488273621, "rewards/rejected": -2.375457286834717, "step": 6260 }, { "epoch": 0.82, "learning_rate": 4.746414157476506e-07, "logits/chosen": -1.9365136623382568, "logits/rejected": -1.7622143030166626, "logps/chosen": -382.59930419921875, "logps/rejected": -452.78363037109375, "loss": 0.4941, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3162574768066406, "rewards/margins": 0.9539827108383179, "rewards/rejected": -2.270240306854248, "step": 6270 }, { "epoch": 0.82, "learning_rate": 4.679664573588294e-07, "logits/chosen": -1.7709249258041382, "logits/rejected": -1.65009343624115, "logps/chosen": -375.88079833984375, "logps/rejected": -438.1414489746094, "loss": 0.508, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.3196238279342651, "rewards/margins": 0.7960411906242371, "rewards/rejected": -2.1156649589538574, "step": 6280 }, { "epoch": 0.82, "learning_rate": 4.6133391774538903e-07, "logits/chosen": -1.9863331317901611, "logits/rejected": -1.8483604192733765, "logps/chosen": -450.23858642578125, "logps/rejected": -484.78631591796875, "loss": 0.5767, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3949511051177979, "rewards/margins": 0.8456497192382812, "rewards/rejected": -2.2406005859375, "step": 6290 }, { "epoch": 0.82, "learning_rate": 4.5474393536184214e-07, "logits/chosen": -1.9218631982803345, "logits/rejected": -1.7662346363067627, "logps/chosen": -417.332763671875, "logps/rejected": -461.92791748046875, "loss": 0.4927, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.3764417171478271, "rewards/margins": 0.7701510190963745, "rewards/rejected": -2.146592617034912, "step": 6300 }, { "epoch": 0.82, "eval_logits/chosen": 0.8118359446525574, "eval_logits/rejected": 0.8531017899513245, "eval_logps/chosen": -431.203857421875, "eval_logps/rejected": -484.6880798339844, "eval_loss": 0.5489959716796875, "eval_rewards/accuracies": 0.7089999914169312, "eval_rewards/chosen": -1.4591114521026611, "eval_rewards/margins": 0.7490728497505188, "eval_rewards/rejected": -2.208184242248535, "eval_runtime": 1172.584, "eval_samples_per_second": 1.706, "eval_steps_per_second": 0.853, "step": 6300 }, { "epoch": 0.83, "learning_rate": 4.4819664777431243e-07, "logits/chosen": -1.7982280254364014, "logits/rejected": -1.6588172912597656, "logps/chosen": -379.1011657714844, "logps/rejected": -411.88262939453125, "loss": 0.6031, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.398041009902954, "rewards/margins": 0.5266937017440796, "rewards/rejected": -1.9247347116470337, "step": 6310 }, { "epoch": 0.83, "learning_rate": 4.416921916576722e-07, "logits/chosen": -1.7167733907699585, "logits/rejected": -1.534623622894287, "logps/chosen": -463.98712158203125, "logps/rejected": -516.0758056640625, "loss": 0.5965, "rewards/accuracies": 0.6875, "rewards/chosen": -1.516566514968872, "rewards/margins": 0.6256294250488281, "rewards/rejected": -2.142195701599121, "step": 6320 }, { "epoch": 0.83, "learning_rate": 4.352307027926828e-07, "logits/chosen": -1.8432748317718506, "logits/rejected": -1.7553224563598633, "logps/chosen": -410.40191650390625, "logps/rejected": -486.0369567871094, "loss": 0.4201, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2879002094268799, "rewards/margins": 1.0923058986663818, "rewards/rejected": -2.3802061080932617, "step": 6330 }, { "epoch": 0.83, "learning_rate": 4.288123160631624e-07, "logits/chosen": -1.5096865892410278, "logits/rejected": -1.5543712377548218, "logps/chosen": -422.20526123046875, "logps/rejected": -469.3055725097656, "loss": 0.6692, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.6089191436767578, "rewards/margins": 0.574799656867981, "rewards/rejected": -2.1837189197540283, "step": 6340 }, { "epoch": 0.83, "learning_rate": 4.224371654531731e-07, "logits/chosen": -1.836951494216919, "logits/rejected": -1.7463347911834717, "logps/chosen": -422.5487365722656, "logps/rejected": -446.9239196777344, "loss": 0.591, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.5224004983901978, "rewards/margins": 0.5998014211654663, "rewards/rejected": -2.122201919555664, "step": 6350 }, { "epoch": 0.83, "learning_rate": 4.1610538404421837e-07, "logits/chosen": -1.851381540298462, "logits/rejected": -1.8891586065292358, "logps/chosen": -388.76495361328125, "logps/rejected": -489.7081604003906, "loss": 0.4899, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.263028860092163, "rewards/margins": 0.7727184891700745, "rewards/rejected": -2.0357470512390137, "step": 6360 }, { "epoch": 0.83, "learning_rate": 4.098171040124699e-07, "logits/chosen": -1.901296615600586, "logits/rejected": -1.7560195922851562, "logps/chosen": -491.11767578125, "logps/rejected": -490.3089904785156, "loss": 0.621, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.6198790073394775, "rewards/margins": 0.612278938293457, "rewards/rejected": -2.2321579456329346, "step": 6370 }, { "epoch": 0.83, "learning_rate": 4.03572456626006e-07, "logits/chosen": -1.7471396923065186, "logits/rejected": -1.8034473657608032, "logps/chosen": -431.95391845703125, "logps/rejected": -472.6758728027344, "loss": 0.5683, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4471083879470825, "rewards/margins": 0.592138409614563, "rewards/rejected": -2.0392465591430664, "step": 6380 }, { "epoch": 0.84, "learning_rate": 3.9737157224207265e-07, "logits/chosen": -1.8360217809677124, "logits/rejected": -1.772001028060913, "logps/chosen": -389.5067443847656, "logps/rejected": -454.16253662109375, "loss": 0.5769, "rewards/accuracies": 0.6875, "rewards/chosen": -1.3939635753631592, "rewards/margins": 0.6599873304367065, "rewards/rejected": -2.0539510250091553, "step": 6390 }, { "epoch": 0.84, "learning_rate": 3.912145803043596e-07, "logits/chosen": -1.8079197406768799, "logits/rejected": -1.750848412513733, "logps/chosen": -432.05364990234375, "logps/rejected": -449.82830810546875, "loss": 0.6102, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.4526618719100952, "rewards/margins": 0.4696109890937805, "rewards/rejected": -1.9222729206085205, "step": 6400 }, { "epoch": 0.84, "eval_logits/chosen": 0.8054977655410767, "eval_logits/rejected": 0.8473966717720032, "eval_logps/chosen": -429.9117126464844, "eval_logps/rejected": -483.1517639160156, "eval_loss": 0.5485800504684448, "eval_rewards/accuracies": 0.7105000019073486, "eval_rewards/chosen": -1.4461897611618042, "eval_rewards/margins": 0.7466309666633606, "eval_rewards/rejected": -2.1928207874298096, "eval_runtime": 1172.5265, "eval_samples_per_second": 1.706, "eval_steps_per_second": 0.853, "step": 6400 }, { "epoch": 0.84, "learning_rate": 3.851016093403023e-07, "logits/chosen": -1.6788511276245117, "logits/rejected": -1.6036514043807983, "logps/chosen": -402.7706604003906, "logps/rejected": -466.3506774902344, "loss": 0.5659, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.5908563137054443, "rewards/margins": 0.6856015920639038, "rewards/rejected": -2.2764577865600586, "step": 6410 }, { "epoch": 0.84, "learning_rate": 3.7903278695839456e-07, "logits/chosen": -1.7197093963623047, "logits/rejected": -1.774407982826233, "logps/chosen": -426.36505126953125, "logps/rejected": -464.59356689453125, "loss": 0.5771, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.430627465248108, "rewards/margins": 0.6341504454612732, "rewards/rejected": -2.0647778511047363, "step": 6420 }, { "epoch": 0.84, "learning_rate": 3.7300823984552983e-07, "logits/chosen": -1.7846879959106445, "logits/rejected": -1.7678248882293701, "logps/chosen": -379.194580078125, "logps/rejected": -457.87493896484375, "loss": 0.5666, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3751968145370483, "rewards/margins": 0.6400051712989807, "rewards/rejected": -2.015202045440674, "step": 6430 }, { "epoch": 0.84, "learning_rate": 3.670280937643503e-07, "logits/chosen": -1.780124306678772, "logits/rejected": -1.6688493490219116, "logps/chosen": -417.65606689453125, "logps/rejected": -460.10565185546875, "loss": 0.5392, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.4370096921920776, "rewards/margins": 0.7721136212348938, "rewards/rejected": -2.209123134613037, "step": 6440 }, { "epoch": 0.84, "learning_rate": 3.610924735506274e-07, "logits/chosen": -1.850022315979004, "logits/rejected": -1.601930856704712, "logps/chosen": -468.7164001464844, "logps/rejected": -456.21746826171875, "loss": 0.6132, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.5035723447799683, "rewards/margins": 0.572163462638855, "rewards/rejected": -2.0757358074188232, "step": 6450 }, { "epoch": 0.85, "learning_rate": 3.5520150311065316e-07, "logits/chosen": -1.6352055072784424, "logits/rejected": -1.6226692199707031, "logps/chosen": -429.9473571777344, "logps/rejected": -486.21343994140625, "loss": 0.4952, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3845300674438477, "rewards/margins": 0.7877673506736755, "rewards/rejected": -2.1722970008850098, "step": 6460 }, { "epoch": 0.85, "learning_rate": 3.493553054186527e-07, "logits/chosen": -1.8799070119857788, "logits/rejected": -1.7547706365585327, "logps/chosen": -437.9310607910156, "logps/rejected": -493.5552673339844, "loss": 0.577, "rewards/accuracies": 0.6875, "rewards/chosen": -1.586670160293579, "rewards/margins": 0.640863299369812, "rewards/rejected": -2.2275335788726807, "step": 6470 }, { "epoch": 0.85, "learning_rate": 3.4355400251421977e-07, "logits/chosen": -1.700897216796875, "logits/rejected": -1.6764428615570068, "logps/chosen": -419.20635986328125, "logps/rejected": -443.47802734375, "loss": 0.6838, "rewards/accuracies": 0.6875, "rewards/chosen": -1.5222584009170532, "rewards/margins": 0.4975927472114563, "rewards/rejected": -2.0198514461517334, "step": 6480 }, { "epoch": 0.85, "learning_rate": 3.3779771549976637e-07, "logits/chosen": -1.8302929401397705, "logits/rejected": -1.645246148109436, "logps/chosen": -411.7997131347656, "logps/rejected": -459.4706115722656, "loss": 0.547, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4969736337661743, "rewards/margins": 0.7363954186439514, "rewards/rejected": -2.2333691120147705, "step": 6490 }, { "epoch": 0.85, "learning_rate": 3.3208656453799783e-07, "logits/chosen": -1.8124793767929077, "logits/rejected": -1.742392897605896, "logps/chosen": -402.3931884765625, "logps/rejected": -453.42083740234375, "loss": 0.4988, "rewards/accuracies": 0.6875, "rewards/chosen": -1.3512194156646729, "rewards/margins": 0.8209896087646484, "rewards/rejected": -2.172208786010742, "step": 6500 }, { "epoch": 0.85, "eval_logits/chosen": 0.8046004772186279, "eval_logits/rejected": 0.8463611006736755, "eval_logps/chosen": -430.1142272949219, "eval_logps/rejected": -483.24664306640625, "eval_loss": 0.5485355257987976, "eval_rewards/accuracies": 0.7095000147819519, "eval_rewards/chosen": -1.448215126991272, "eval_rewards/margins": 0.74555504322052, "eval_rewards/rejected": -2.193770170211792, "eval_runtime": 1172.8617, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 6500 }, { "epoch": 0.85, "learning_rate": 3.2642066884940064e-07, "logits/chosen": -1.7718359231948853, "logits/rejected": -1.6276681423187256, "logps/chosen": -421.28515625, "logps/rejected": -518.3148193359375, "loss": 0.534, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.3754109144210815, "rewards/margins": 0.9865762591362, "rewards/rejected": -2.361987352371216, "step": 6510 }, { "epoch": 0.85, "learning_rate": 3.2080014670975825e-07, "logits/chosen": -1.913540244102478, "logits/rejected": -1.8367464542388916, "logps/chosen": -403.6368103027344, "logps/rejected": -440.57232666015625, "loss": 0.5577, "rewards/accuracies": 0.75, "rewards/chosen": -1.3662607669830322, "rewards/margins": 0.6077486872673035, "rewards/rejected": -1.9740097522735596, "step": 6520 }, { "epoch": 0.85, "learning_rate": 3.152251154476765e-07, "logits/chosen": -1.7818803787231445, "logits/rejected": -1.7090286016464233, "logps/chosen": -394.8916320800781, "logps/rejected": -472.5263671875, "loss": 0.5199, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4043118953704834, "rewards/margins": 0.8041407465934753, "rewards/rejected": -2.2084529399871826, "step": 6530 }, { "epoch": 0.86, "learning_rate": 3.0969569144214147e-07, "logits/chosen": -1.9432601928710938, "logits/rejected": -1.8312276601791382, "logps/chosen": -423.01953125, "logps/rejected": -476.34075927734375, "loss": 0.4858, "rewards/accuracies": 0.75, "rewards/chosen": -1.3503367900848389, "rewards/margins": 0.8274005055427551, "rewards/rejected": -2.177737236022949, "step": 6540 }, { "epoch": 0.86, "learning_rate": 3.042119901200824e-07, "logits/chosen": -1.7422155141830444, "logits/rejected": -1.7008956670761108, "logps/chosen": -401.38153076171875, "logps/rejected": -487.8135681152344, "loss": 0.6273, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.5218549966812134, "rewards/margins": 0.5686341524124146, "rewards/rejected": -2.090488910675049, "step": 6550 }, { "epoch": 0.86, "learning_rate": 2.9877412595396726e-07, "logits/chosen": -1.9248844385147095, "logits/rejected": -1.8276618719100952, "logps/chosen": -468.0619201660156, "logps/rejected": -514.0059204101562, "loss": 0.5368, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4151637554168701, "rewards/margins": 0.8898487091064453, "rewards/rejected": -2.3050124645233154, "step": 6560 }, { "epoch": 0.86, "learning_rate": 2.933822124594124e-07, "logits/chosen": -1.873353362083435, "logits/rejected": -1.7209460735321045, "logps/chosen": -425.2776794433594, "logps/rejected": -449.8960876464844, "loss": 0.5923, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4606962203979492, "rewards/margins": 0.6035781502723694, "rewards/rejected": -2.064274311065674, "step": 6570 }, { "epoch": 0.86, "learning_rate": 2.880363621928106e-07, "logits/chosen": -1.8270835876464844, "logits/rejected": -1.6930897235870361, "logps/chosen": -447.11419677734375, "logps/rejected": -467.6141662597656, "loss": 0.5549, "rewards/accuracies": 0.6875, "rewards/chosen": -1.502572774887085, "rewards/margins": 0.617160975933075, "rewards/rejected": -2.1197338104248047, "step": 6580 }, { "epoch": 0.86, "learning_rate": 2.82736686748985e-07, "logits/chosen": -1.8135850429534912, "logits/rejected": -1.7034486532211304, "logps/chosen": -442.55743408203125, "logps/rejected": -456.33111572265625, "loss": 0.5417, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.453835368156433, "rewards/margins": 0.7465580701828003, "rewards/rejected": -2.2003934383392334, "step": 6590 }, { "epoch": 0.86, "learning_rate": 2.774832967588556e-07, "logits/chosen": -1.9186111688613892, "logits/rejected": -1.779409646987915, "logps/chosen": -448.62322998046875, "logps/rejected": -503.616943359375, "loss": 0.5544, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4722912311553955, "rewards/margins": 0.8435390591621399, "rewards/rejected": -2.3158302307128906, "step": 6600 }, { "epoch": 0.86, "eval_logits/chosen": 0.8067517876625061, "eval_logits/rejected": 0.8487147688865662, "eval_logps/chosen": -430.1987609863281, "eval_logps/rejected": -483.3599853515625, "eval_loss": 0.5486475229263306, "eval_rewards/accuracies": 0.7114999890327454, "eval_rewards/chosen": -1.449060320854187, "eval_rewards/margins": 0.7458434700965881, "eval_rewards/rejected": -2.19490385055542, "eval_runtime": 1172.9906, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 6600 }, { "epoch": 0.86, "learning_rate": 2.7227630188713326e-07, "logits/chosen": -1.9206463098526, "logits/rejected": -1.6951115131378174, "logps/chosen": -467.5791015625, "logps/rejected": -486.3162536621094, "loss": 0.5427, "rewards/accuracies": 0.6875, "rewards/chosen": -1.501364827156067, "rewards/margins": 0.8067043423652649, "rewards/rejected": -2.3080692291259766, "step": 6610 }, { "epoch": 0.87, "learning_rate": 2.671158108300284e-07, "logits/chosen": -1.9582704305648804, "logits/rejected": -1.8491710424423218, "logps/chosen": -426.76953125, "logps/rejected": -487.92584228515625, "loss": 0.554, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4947972297668457, "rewards/margins": 0.6211631298065186, "rewards/rejected": -2.1159605979919434, "step": 6620 }, { "epoch": 0.87, "learning_rate": 2.6200193131298376e-07, "logits/chosen": -1.9450123310089111, "logits/rejected": -1.8916527032852173, "logps/chosen": -439.2418518066406, "logps/rejected": -511.76898193359375, "loss": 0.4518, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.3632786273956299, "rewards/margins": 1.0412644147872925, "rewards/rejected": -2.404543161392212, "step": 6630 }, { "epoch": 0.87, "learning_rate": 2.569347700884217e-07, "logits/chosen": -1.978216528892517, "logits/rejected": -1.759415626525879, "logps/chosen": -418.84686279296875, "logps/rejected": -473.9796447753906, "loss": 0.4749, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.3123157024383545, "rewards/margins": 1.0165531635284424, "rewards/rejected": -2.328868865966797, "step": 6640 }, { "epoch": 0.87, "learning_rate": 2.5191443293352186e-07, "logits/chosen": -1.8891208171844482, "logits/rejected": -1.8109643459320068, "logps/chosen": -438.7274475097656, "logps/rejected": -514.1735229492188, "loss": 0.5407, "rewards/accuracies": 0.75, "rewards/chosen": -1.4189406633377075, "rewards/margins": 0.7500912547111511, "rewards/rejected": -2.169032335281372, "step": 6650 }, { "epoch": 0.87, "learning_rate": 2.469410246480067e-07, "logits/chosen": -1.6742660999298096, "logits/rejected": -1.5309489965438843, "logps/chosen": -404.78057861328125, "logps/rejected": -482.4720153808594, "loss": 0.4872, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.563522458076477, "rewards/margins": 0.971258819103241, "rewards/rejected": -2.5347812175750732, "step": 6660 }, { "epoch": 0.87, "learning_rate": 2.4201464905195955e-07, "logits/chosen": -1.8839927911758423, "logits/rejected": -1.8668091297149658, "logps/chosen": -414.96142578125, "logps/rejected": -461.70416259765625, "loss": 0.6223, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4317705631256104, "rewards/margins": 0.45908093452453613, "rewards/rejected": -1.890851616859436, "step": 6670 }, { "epoch": 0.87, "learning_rate": 2.3713540898365196e-07, "logits/chosen": -1.726810097694397, "logits/rejected": -1.7777540683746338, "logps/chosen": -420.916748046875, "logps/rejected": -478.23419189453125, "loss": 0.5019, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.336426854133606, "rewards/margins": 0.8878466486930847, "rewards/rejected": -2.224273443222046, "step": 6680 }, { "epoch": 0.88, "learning_rate": 2.3230340629740166e-07, "logits/chosen": -1.9040178060531616, "logits/rejected": -1.8105716705322266, "logps/chosen": -430.43817138671875, "logps/rejected": -438.405029296875, "loss": 0.6784, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.4686359167099, "rewards/margins": 0.3403673768043518, "rewards/rejected": -1.8090031147003174, "step": 6690 }, { "epoch": 0.88, "learning_rate": 2.2751874186144357e-07, "logits/chosen": -1.8653980493545532, "logits/rejected": -1.7417211532592773, "logps/chosen": -430.21527099609375, "logps/rejected": -449.42987060546875, "loss": 0.5828, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.2628891468048096, "rewards/margins": 0.6491962671279907, "rewards/rejected": -1.9120851755142212, "step": 6700 }, { "epoch": 0.88, "eval_logits/chosen": 0.8096733689308167, "eval_logits/rejected": 0.8511734008789062, "eval_logps/chosen": -430.47711181640625, "eval_logps/rejected": -483.68023681640625, "eval_loss": 0.5486401319503784, "eval_rewards/accuracies": 0.7099999785423279, "eval_rewards/chosen": -1.4518442153930664, "eval_rewards/margins": 0.7462618947029114, "eval_rewards/rejected": -2.198106050491333, "eval_runtime": 1172.8106, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 6700 }, { "epoch": 0.88, "learning_rate": 2.227815155558241e-07, "logits/chosen": -1.9332526922225952, "logits/rejected": -1.8840205669403076, "logps/chosen": -431.55413818359375, "logps/rejected": -513.9845581054688, "loss": 0.4791, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.412088394165039, "rewards/margins": 0.9422322511672974, "rewards/rejected": -2.354320526123047, "step": 6710 }, { "epoch": 0.88, "learning_rate": 2.1809182627031883e-07, "logits/chosen": -2.016242265701294, "logits/rejected": -1.8350414037704468, "logps/chosen": -445.91046142578125, "logps/rejected": -487.2635803222656, "loss": 0.5423, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4208695888519287, "rewards/margins": 0.719671905040741, "rewards/rejected": -2.1405415534973145, "step": 6720 }, { "epoch": 0.88, "learning_rate": 2.1344977190236372e-07, "logits/chosen": -1.6209218502044678, "logits/rejected": -1.6282440423965454, "logps/chosen": -411.72576904296875, "logps/rejected": -483.763916015625, "loss": 0.579, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.5281398296356201, "rewards/margins": 0.7512315511703491, "rewards/rejected": -2.279371500015259, "step": 6730 }, { "epoch": 0.88, "learning_rate": 2.0885544935501656e-07, "logits/chosen": -1.7832863330841064, "logits/rejected": -1.773505449295044, "logps/chosen": -409.8037109375, "logps/rejected": -504.25994873046875, "loss": 0.4929, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.349562168121338, "rewards/margins": 0.9078701138496399, "rewards/rejected": -2.257432222366333, "step": 6740 }, { "epoch": 0.88, "learning_rate": 2.0430895453492944e-07, "logits/chosen": -1.8785454034805298, "logits/rejected": -1.8223320245742798, "logps/chosen": -455.398193359375, "logps/rejected": -466.92901611328125, "loss": 0.613, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4360158443450928, "rewards/margins": 0.49601641297340393, "rewards/rejected": -1.9320322275161743, "step": 6750 }, { "epoch": 0.88, "learning_rate": 1.9981038235035111e-07, "logits/chosen": -1.7741162776947021, "logits/rejected": -1.7254183292388916, "logps/chosen": -409.7948303222656, "logps/rejected": -478.5233459472656, "loss": 0.4285, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.2761456966400146, "rewards/margins": 0.9531618356704712, "rewards/rejected": -2.2293076515197754, "step": 6760 }, { "epoch": 0.89, "learning_rate": 1.9535982670914112e-07, "logits/chosen": -1.7457072734832764, "logits/rejected": -1.5970163345336914, "logps/chosen": -461.16253662109375, "logps/rejected": -513.734130859375, "loss": 0.532, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.446793794631958, "rewards/margins": 0.8001499176025391, "rewards/rejected": -2.246943712234497, "step": 6770 }, { "epoch": 0.89, "learning_rate": 1.9095738051681412e-07, "logits/chosen": -1.7886247634887695, "logits/rejected": -1.7298681735992432, "logps/chosen": -417.9925231933594, "logps/rejected": -471.6416015625, "loss": 0.59, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.6166937351226807, "rewards/margins": 0.5833561420440674, "rewards/rejected": -2.200049877166748, "step": 6780 }, { "epoch": 0.89, "learning_rate": 1.8660313567459703e-07, "logits/chosen": -1.7199945449829102, "logits/rejected": -1.8157663345336914, "logps/chosen": -388.76190185546875, "logps/rejected": -467.54638671875, "loss": 0.5576, "rewards/accuracies": 0.75, "rewards/chosen": -1.4115843772888184, "rewards/margins": 0.8241313695907593, "rewards/rejected": -2.235715866088867, "step": 6790 }, { "epoch": 0.89, "learning_rate": 1.8229718307751165e-07, "logits/chosen": -1.9006595611572266, "logits/rejected": -1.7318729162216187, "logps/chosen": -449.3634338378906, "logps/rejected": -472.896728515625, "loss": 0.5711, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.5036065578460693, "rewards/margins": 0.819083034992218, "rewards/rejected": -2.3226895332336426, "step": 6800 }, { "epoch": 0.89, "eval_logits/chosen": 0.8124059438705444, "eval_logits/rejected": 0.8538053631782532, "eval_logps/chosen": -430.8609924316406, "eval_logps/rejected": -484.1659851074219, "eval_loss": 0.5484992861747742, "eval_rewards/accuracies": 0.7095000147819519, "eval_rewards/chosen": -1.4556825160980225, "eval_rewards/margins": 0.747280478477478, "eval_rewards/rejected": -2.20296311378479, "eval_runtime": 1173.0318, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 6800 }, { "epoch": 0.89, "learning_rate": 1.7803961261247864e-07, "logits/chosen": -1.7197239398956299, "logits/rejected": -1.6941922903060913, "logps/chosen": -430.0616149902344, "logps/rejected": -510.97027587890625, "loss": 0.4903, "rewards/accuracies": 0.75, "rewards/chosen": -1.3526915311813354, "rewards/margins": 0.8942115902900696, "rewards/rejected": -2.24690318107605, "step": 6810 }, { "epoch": 0.89, "learning_rate": 1.7383051315643772e-07, "logits/chosen": -1.8691461086273193, "logits/rejected": -1.7417608499526978, "logps/chosen": -444.39208984375, "logps/rejected": -472.8169860839844, "loss": 0.5579, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4681408405303955, "rewards/margins": 0.6582044363021851, "rewards/rejected": -2.126345157623291, "step": 6820 }, { "epoch": 0.89, "learning_rate": 1.6966997257449685e-07, "logits/chosen": -1.8133577108383179, "logits/rejected": -1.7999480962753296, "logps/chosen": -434.6233825683594, "logps/rejected": -474.06884765625, "loss": 0.562, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4581729173660278, "rewards/margins": 0.6189436912536621, "rewards/rejected": -2.0771164894104004, "step": 6830 }, { "epoch": 0.9, "learning_rate": 1.6555807771809375e-07, "logits/chosen": -1.7792173624038696, "logits/rejected": -1.694387674331665, "logps/chosen": -416.30206298828125, "logps/rejected": -446.8271484375, "loss": 0.5098, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4068571329116821, "rewards/margins": 0.8837915658950806, "rewards/rejected": -2.2906486988067627, "step": 6840 }, { "epoch": 0.9, "learning_rate": 1.6149491442318617e-07, "logits/chosen": -1.906616449356079, "logits/rejected": -1.8216674327850342, "logps/chosen": -415.72918701171875, "logps/rejected": -471.26568603515625, "loss": 0.5208, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.342486023902893, "rewards/margins": 0.7155783176422119, "rewards/rejected": -2.0580644607543945, "step": 6850 }, { "epoch": 0.9, "learning_rate": 1.5748056750845786e-07, "logits/chosen": -1.8992087841033936, "logits/rejected": -1.8292995691299438, "logps/chosen": -452.1145935058594, "logps/rejected": -446.95452880859375, "loss": 0.5498, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.5781564712524414, "rewards/margins": 0.6276174783706665, "rewards/rejected": -2.2057740688323975, "step": 6860 }, { "epoch": 0.9, "learning_rate": 1.5351512077355024e-07, "logits/chosen": -1.8367058038711548, "logits/rejected": -1.720963478088379, "logps/chosen": -443.9596252441406, "logps/rejected": -561.5131225585938, "loss": 0.4586, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.2995741367340088, "rewards/margins": 0.9421840906143188, "rewards/rejected": -2.241758108139038, "step": 6870 }, { "epoch": 0.9, "learning_rate": 1.4959865699730902e-07, "logits/chosen": -1.7452888488769531, "logits/rejected": -1.623615026473999, "logps/chosen": -401.3358459472656, "logps/rejected": -451.8515625, "loss": 0.5025, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.5068118572235107, "rewards/margins": 0.8342909812927246, "rewards/rejected": -2.3411028385162354, "step": 6880 }, { "epoch": 0.9, "learning_rate": 1.4573125793606202e-07, "logits/chosen": -1.7507559061050415, "logits/rejected": -1.617040991783142, "logps/chosen": -389.5786437988281, "logps/rejected": -453.15240478515625, "loss": 0.5231, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.5261350870132446, "rewards/margins": 0.7890563607215881, "rewards/rejected": -2.3151915073394775, "step": 6890 }, { "epoch": 0.9, "learning_rate": 1.4191300432190634e-07, "logits/chosen": -1.7426488399505615, "logits/rejected": -1.4778661727905273, "logps/chosen": -446.41387939453125, "logps/rejected": -484.15740966796875, "loss": 0.5621, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.5824512243270874, "rewards/margins": 0.6358043551445007, "rewards/rejected": -2.2182555198669434, "step": 6900 }, { "epoch": 0.9, "eval_logits/chosen": 0.8118740916252136, "eval_logits/rejected": 0.8534757494926453, "eval_logps/chosen": -430.86248779296875, "eval_logps/rejected": -484.2228698730469, "eval_loss": 0.5483530759811401, "eval_rewards/accuracies": 0.7124999761581421, "eval_rewards/chosen": -1.4556974172592163, "eval_rewards/margins": 0.7478345632553101, "eval_rewards/rejected": -2.2035317420959473, "eval_runtime": 1173.3393, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 6900 }, { "epoch": 0.9, "learning_rate": 1.381439758610284e-07, "logits/chosen": -1.8737863302230835, "logits/rejected": -1.677040696144104, "logps/chosen": -414.41937255859375, "logps/rejected": -454.90057373046875, "loss": 0.5616, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3846615552902222, "rewards/margins": 0.5714194774627686, "rewards/rejected": -1.9560811519622803, "step": 6910 }, { "epoch": 0.91, "learning_rate": 1.3442425123203596e-07, "logits/chosen": -1.9634335041046143, "logits/rejected": -1.827630639076233, "logps/chosen": -404.2421875, "logps/rejected": -489.4110412597656, "loss": 0.4855, "rewards/accuracies": 0.75, "rewards/chosen": -1.3388338088989258, "rewards/margins": 0.8733822107315063, "rewards/rejected": -2.2122159004211426, "step": 6920 }, { "epoch": 0.91, "learning_rate": 1.3075390808431897e-07, "logits/chosen": -1.6054050922393799, "logits/rejected": -1.5290915966033936, "logps/chosen": -398.23614501953125, "logps/rejected": -439.45953369140625, "loss": 0.5452, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4326770305633545, "rewards/margins": 0.7014239430427551, "rewards/rejected": -2.134101152420044, "step": 6930 }, { "epoch": 0.91, "learning_rate": 1.271330230364262e-07, "logits/chosen": -1.8218040466308594, "logits/rejected": -1.748741865158081, "logps/chosen": -415.97271728515625, "logps/rejected": -539.3084716796875, "loss": 0.5837, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.513819932937622, "rewards/margins": 0.7359345555305481, "rewards/rejected": -2.2497544288635254, "step": 6940 }, { "epoch": 0.91, "learning_rate": 1.2356167167446698e-07, "logits/chosen": -1.772926926612854, "logits/rejected": -1.7543052434921265, "logps/chosen": -412.350341796875, "logps/rejected": -494.44891357421875, "loss": 0.5541, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.5925939083099365, "rewards/margins": 0.7502508759498596, "rewards/rejected": -2.3428447246551514, "step": 6950 }, { "epoch": 0.91, "learning_rate": 1.2003992855053326e-07, "logits/chosen": -1.7478806972503662, "logits/rejected": -1.59023916721344, "logps/chosen": -392.5653991699219, "logps/rejected": -542.6173706054688, "loss": 0.4976, "rewards/accuracies": 0.75, "rewards/chosen": -1.3845903873443604, "rewards/margins": 1.542232871055603, "rewards/rejected": -2.926823616027832, "step": 6960 }, { "epoch": 0.91, "learning_rate": 1.1656786718114239e-07, "logits/chosen": -1.7298316955566406, "logits/rejected": -1.7008460760116577, "logps/chosen": -412.833984375, "logps/rejected": -471.994384765625, "loss": 0.5261, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4343531131744385, "rewards/margins": 0.7191973328590393, "rewards/rejected": -2.153550386428833, "step": 6970 }, { "epoch": 0.91, "learning_rate": 1.1314556004570487e-07, "logits/chosen": -1.722328782081604, "logits/rejected": -1.6999261379241943, "logps/chosen": -368.1650085449219, "logps/rejected": -455.1087951660156, "loss": 0.593, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.3800055980682373, "rewards/margins": 0.6139020323753357, "rewards/rejected": -1.9939076900482178, "step": 6980 }, { "epoch": 0.91, "learning_rate": 1.0977307858500818e-07, "logits/chosen": -1.714484453201294, "logits/rejected": -1.553884506225586, "logps/chosen": -396.49676513671875, "logps/rejected": -444.13494873046875, "loss": 0.4963, "rewards/accuracies": 0.75, "rewards/chosen": -1.282323956489563, "rewards/margins": 0.7836967706680298, "rewards/rejected": -2.0660204887390137, "step": 6990 }, { "epoch": 0.92, "learning_rate": 1.0645049319972789e-07, "logits/chosen": -1.7314668893814087, "logits/rejected": -1.6142488718032837, "logps/chosen": -424.98333740234375, "logps/rejected": -467.7752990722656, "loss": 0.5093, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4310033321380615, "rewards/margins": 0.9052546620368958, "rewards/rejected": -2.3362579345703125, "step": 7000 }, { "epoch": 0.92, "eval_logits/chosen": 0.812759518623352, "eval_logits/rejected": 0.8539248704910278, "eval_logps/chosen": -430.8410949707031, "eval_logps/rejected": -484.165771484375, "eval_loss": 0.5484933257102966, "eval_rewards/accuracies": 0.7095000147819519, "eval_rewards/chosen": -1.4554840326309204, "eval_rewards/margins": 0.7474771738052368, "eval_rewards/rejected": -2.2029612064361572, "eval_runtime": 1172.8269, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 7000 }, { "epoch": 0.92, "learning_rate": 1.0317787324895634e-07, "logits/chosen": -1.8907572031021118, "logits/rejected": -1.7053619623184204, "logps/chosen": -454.3196716308594, "logps/rejected": -511.54364013671875, "loss": 0.4711, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.471142053604126, "rewards/margins": 0.9644231796264648, "rewards/rejected": -2.4355649948120117, "step": 7010 }, { "epoch": 0.92, "learning_rate": 9.995528704875635e-08, "logits/chosen": -1.7494027614593506, "logits/rejected": -1.843583345413208, "logps/chosen": -399.296142578125, "logps/rejected": -481.3427734375, "loss": 0.5744, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.471015214920044, "rewards/margins": 0.6564980745315552, "rewards/rejected": -2.1275134086608887, "step": 7020 }, { "epoch": 0.92, "learning_rate": 9.678280187073452e-08, "logits/chosen": -1.6823720932006836, "logits/rejected": -1.6337709426879883, "logps/chosen": -429.0879821777344, "logps/rejected": -483.7962951660156, "loss": 0.4381, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.2382405996322632, "rewards/margins": 1.0622050762176514, "rewards/rejected": -2.300445795059204, "step": 7030 }, { "epoch": 0.92, "learning_rate": 9.366048394063549e-08, "logits/chosen": -1.9416471719741821, "logits/rejected": -1.8561681509017944, "logps/chosen": -417.84527587890625, "logps/rejected": -503.8096618652344, "loss": 0.5097, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3203117847442627, "rewards/margins": 0.8082769513130188, "rewards/rejected": -2.1285886764526367, "step": 7040 }, { "epoch": 0.92, "learning_rate": 9.058839843696237e-08, "logits/chosen": -1.8911237716674805, "logits/rejected": -1.7695062160491943, "logps/chosen": -437.6932067871094, "logps/rejected": -481.0242614746094, "loss": 0.5439, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4225181341171265, "rewards/margins": 0.7539501190185547, "rewards/rejected": -2.1764683723449707, "step": 7050 }, { "epoch": 0.92, "learning_rate": 8.756660948961299e-08, "logits/chosen": -1.851973295211792, "logits/rejected": -1.8292022943496704, "logps/chosen": -406.18634033203125, "logps/rejected": -482.53515625, "loss": 0.5743, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4888880252838135, "rewards/margins": 0.6048755049705505, "rewards/rejected": -2.093763589859009, "step": 7060 }, { "epoch": 0.93, "learning_rate": 8.459518017854412e-08, "logits/chosen": -1.8769149780273438, "logits/rejected": -1.762813925743103, "logps/chosen": -420.82550048828125, "logps/rejected": -439.24658203125, "loss": 0.6129, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.437957525253296, "rewards/margins": 0.41024312376976013, "rewards/rejected": -1.8482005596160889, "step": 7070 }, { "epoch": 0.93, "learning_rate": 8.167417253245213e-08, "logits/chosen": -1.799330711364746, "logits/rejected": -1.6416656970977783, "logps/chosen": -401.96075439453125, "logps/rejected": -454.5691833496094, "loss": 0.494, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.3122749328613281, "rewards/margins": 0.7804873585700989, "rewards/rejected": -2.0927624702453613, "step": 7080 }, { "epoch": 0.93, "learning_rate": 7.880364752747948e-08, "logits/chosen": -1.7926000356674194, "logits/rejected": -1.7163807153701782, "logps/chosen": -416.36773681640625, "logps/rejected": -473.047607421875, "loss": 0.5961, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.6541506052017212, "rewards/margins": 0.5921451449394226, "rewards/rejected": -2.246295690536499, "step": 7090 }, { "epoch": 0.93, "learning_rate": 7.598366508594245e-08, "logits/chosen": -1.8010107278823853, "logits/rejected": -1.767491102218628, "logps/chosen": -462.71319580078125, "logps/rejected": -527.4778442382812, "loss": 0.4665, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.4491039514541626, "rewards/margins": 0.9631915092468262, "rewards/rejected": -2.4122955799102783, "step": 7100 }, { "epoch": 0.93, "eval_logits/chosen": 0.8128459453582764, "eval_logits/rejected": 0.8539407253265381, "eval_logps/chosen": -430.9034729003906, "eval_logps/rejected": -484.2508850097656, "eval_loss": 0.5485362410545349, "eval_rewards/accuracies": 0.7099999785423279, "eval_rewards/chosen": -1.4561071395874023, "eval_rewards/margins": 0.7477050423622131, "eval_rewards/rejected": -2.203812599182129, "eval_runtime": 1172.6821, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 7100 }, { "epoch": 0.93, "learning_rate": 7.32142840750788e-08, "logits/chosen": -1.8629350662231445, "logits/rejected": -1.6973631381988525, "logps/chosen": -448.58514404296875, "logps/rejected": -496.39508056640625, "loss": 0.4596, "rewards/accuracies": 0.75, "rewards/chosen": -1.2828123569488525, "rewards/margins": 0.9491091966629028, "rewards/rejected": -2.231921672821045, "step": 7110 }, { "epoch": 0.93, "learning_rate": 7.049556230581872e-08, "logits/chosen": -1.7679073810577393, "logits/rejected": -1.6131082773208618, "logps/chosen": -414.78521728515625, "logps/rejected": -452.10125732421875, "loss": 0.6113, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.5814316272735596, "rewards/margins": 0.5837645530700684, "rewards/rejected": -2.165196180343628, "step": 7120 }, { "epoch": 0.93, "learning_rate": 6.782755653158085e-08, "logits/chosen": -1.8873008489608765, "logits/rejected": -1.805215835571289, "logps/chosen": -429.1568298339844, "logps/rejected": -474.21484375, "loss": 0.5254, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3898502588272095, "rewards/margins": 0.7227694988250732, "rewards/rejected": -2.1126198768615723, "step": 7130 }, { "epoch": 0.93, "learning_rate": 6.521032244708375e-08, "logits/chosen": -1.6917879581451416, "logits/rejected": -1.6695703268051147, "logps/chosen": -420.2552795410156, "logps/rejected": -478.6356506347656, "loss": 0.6021, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4334557056427002, "rewards/margins": 0.6401674151420593, "rewards/rejected": -2.0736231803894043, "step": 7140 }, { "epoch": 0.94, "learning_rate": 6.264391468718628e-08, "logits/chosen": -1.921420693397522, "logits/rejected": -1.8295361995697021, "logps/chosen": -416.1781311035156, "logps/rejected": -477.92254638671875, "loss": 0.5117, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.2795865535736084, "rewards/margins": 0.805153489112854, "rewards/rejected": -2.084740161895752, "step": 7150 }, { "epoch": 0.94, "learning_rate": 6.012838682574462e-08, "logits/chosen": -1.92929208278656, "logits/rejected": -1.8011394739151, "logps/chosen": -431.6207580566406, "logps/rejected": -437.07916259765625, "loss": 0.5315, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4483994245529175, "rewards/margins": 0.6417319178581238, "rewards/rejected": -2.0901312828063965, "step": 7160 }, { "epoch": 0.94, "learning_rate": 5.766379137449624e-08, "logits/chosen": -1.821616768836975, "logits/rejected": -1.80642831325531, "logps/chosen": -387.9848327636719, "logps/rejected": -483.4175720214844, "loss": 0.5058, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.400189757347107, "rewards/margins": 0.7653945684432983, "rewards/rejected": -2.1655843257904053, "step": 7170 }, { "epoch": 0.94, "learning_rate": 5.525017978196295e-08, "logits/chosen": -2.0324506759643555, "logits/rejected": -1.8615728616714478, "logps/chosen": -441.63037109375, "logps/rejected": -474.91741943359375, "loss": 0.5405, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.4084820747375488, "rewards/margins": 0.7742080688476562, "rewards/rejected": -2.182690143585205, "step": 7180 }, { "epoch": 0.94, "learning_rate": 5.288760243237545e-08, "logits/chosen": -1.8969142436981201, "logits/rejected": -1.8226633071899414, "logps/chosen": -476.3313903808594, "logps/rejected": -500.29693603515625, "loss": 0.5595, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.5552139282226562, "rewards/margins": 0.7276593446731567, "rewards/rejected": -2.2828731536865234, "step": 7190 }, { "epoch": 0.94, "learning_rate": 5.0576108644623536e-08, "logits/chosen": -1.7604423761367798, "logits/rejected": -1.576672911643982, "logps/chosen": -485.7357482910156, "logps/rejected": -487.3819274902344, "loss": 0.6276, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.6866333484649658, "rewards/margins": 0.6542980074882507, "rewards/rejected": -2.3409314155578613, "step": 7200 }, { "epoch": 0.94, "eval_logits/chosen": 0.8129886388778687, "eval_logits/rejected": 0.8539232611656189, "eval_logps/chosen": -430.8554382324219, "eval_logps/rejected": -484.1955261230469, "eval_loss": 0.548583447933197, "eval_rewards/accuracies": 0.7110000252723694, "eval_rewards/chosen": -1.4556269645690918, "eval_rewards/margins": 0.7476316690444946, "eval_rewards/rejected": -2.203258752822876, "eval_runtime": 1173.429, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 7200 }, { "epoch": 0.94, "learning_rate": 4.8315746671225296e-08, "logits/chosen": -1.8429571390151978, "logits/rejected": -1.6994577646255493, "logps/chosen": -457.65594482421875, "logps/rejected": -516.1876220703125, "loss": 0.4917, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.343750238418579, "rewards/margins": 0.8745664358139038, "rewards/rejected": -2.2183165550231934, "step": 7210 }, { "epoch": 0.94, "learning_rate": 4.6106563697320695e-08, "logits/chosen": -1.7050548791885376, "logits/rejected": -1.5491039752960205, "logps/chosen": -397.58441162109375, "logps/rejected": -462.5196228027344, "loss": 0.5121, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4449985027313232, "rewards/margins": 0.9326278567314148, "rewards/rejected": -2.377626419067383, "step": 7220 }, { "epoch": 0.95, "learning_rate": 4.394860583968624e-08, "logits/chosen": -1.7307459115982056, "logits/rejected": -1.8069360256195068, "logps/chosen": -362.82421875, "logps/rejected": -458.86895751953125, "loss": 0.5635, "rewards/accuracies": 0.625, "rewards/chosen": -1.4238734245300293, "rewards/margins": 0.7079671621322632, "rewards/rejected": -2.131840467453003, "step": 7230 }, { "epoch": 0.95, "learning_rate": 4.1841918145771874e-08, "logits/chosen": -1.8039257526397705, "logits/rejected": -1.7260109186172485, "logps/chosen": -419.90777587890625, "logps/rejected": -495.89288330078125, "loss": 0.4669, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.2997856140136719, "rewards/margins": 0.9247959852218628, "rewards/rejected": -2.2245819568634033, "step": 7240 }, { "epoch": 0.95, "learning_rate": 3.978654459276088e-08, "logits/chosen": -1.9709882736206055, "logits/rejected": -1.841698408126831, "logps/chosen": -469.30804443359375, "logps/rejected": -484.8894958496094, "loss": 0.5384, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3955186605453491, "rewards/margins": 0.803984522819519, "rewards/rejected": -2.1995034217834473, "step": 7250 }, { "epoch": 0.95, "learning_rate": 3.778252808665284e-08, "logits/chosen": -2.043179512023926, "logits/rejected": -1.958134651184082, "logps/chosen": -479.24005126953125, "logps/rejected": -458.7781677246094, "loss": 0.5527, "rewards/accuracies": 0.75, "rewards/chosen": -1.469952940940857, "rewards/margins": 0.5939545631408691, "rewards/rejected": -2.0639073848724365, "step": 7260 }, { "epoch": 0.95, "learning_rate": 3.5829910461366023e-08, "logits/chosen": -1.721975326538086, "logits/rejected": -1.708946943283081, "logps/chosen": -403.50286865234375, "logps/rejected": -470.90777587890625, "loss": 0.6299, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4790034294128418, "rewards/margins": 0.7231262922286987, "rewards/rejected": -2.20212984085083, "step": 7270 }, { "epoch": 0.95, "learning_rate": 3.39287324778656e-08, "logits/chosen": -1.9802652597427368, "logits/rejected": -1.9110679626464844, "logps/chosen": -495.3759765625, "logps/rejected": -515.5206909179688, "loss": 0.6205, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.5836551189422607, "rewards/margins": 0.6330682039260864, "rewards/rejected": -2.216723680496216, "step": 7280 }, { "epoch": 0.95, "learning_rate": 3.207903382331262e-08, "logits/chosen": -1.7459537982940674, "logits/rejected": -1.6916313171386719, "logps/chosen": -444.4571228027344, "logps/rejected": -485.93829345703125, "loss": 0.5289, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.3292511701583862, "rewards/margins": 0.8171280026435852, "rewards/rejected": -2.146378993988037, "step": 7290 }, { "epoch": 0.96, "learning_rate": 3.028085311023443e-08, "logits/chosen": -1.7082124948501587, "logits/rejected": -1.6282784938812256, "logps/chosen": -425.0762634277344, "logps/rejected": -481.3499450683594, "loss": 0.457, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.2966469526290894, "rewards/margins": 0.932367205619812, "rewards/rejected": -2.2290141582489014, "step": 7300 }, { "epoch": 0.96, "eval_logits/chosen": 0.8128509521484375, "eval_logits/rejected": 0.8539592027664185, "eval_logps/chosen": -430.7640075683594, "eval_logps/rejected": -484.09423828125, "eval_loss": 0.5485822558403015, "eval_rewards/accuracies": 0.7110000252723694, "eval_rewards/chosen": -1.4547128677368164, "eval_rewards/margins": 0.7475329637527466, "eval_rewards/rejected": -2.2022459506988525, "eval_runtime": 1172.8041, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 7300 }, { "epoch": 0.96, "learning_rate": 2.8534227875720576e-08, "logits/chosen": -1.932652473449707, "logits/rejected": -1.9709556102752686, "logps/chosen": -431.27130126953125, "logps/rejected": -491.1851501464844, "loss": 0.5513, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4966964721679688, "rewards/margins": 0.7334243655204773, "rewards/rejected": -2.2301206588745117, "step": 7310 }, { "epoch": 0.96, "learning_rate": 2.683919458063705e-08, "logits/chosen": -1.788451910018921, "logits/rejected": -1.520784854888916, "logps/chosen": -363.04632568359375, "logps/rejected": -387.00390625, "loss": 0.559, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3995718955993652, "rewards/margins": 0.6893042325973511, "rewards/rejected": -2.088876247406006, "step": 7320 }, { "epoch": 0.96, "learning_rate": 2.5195788608866345e-08, "logits/chosen": -1.8044917583465576, "logits/rejected": -1.6800788640975952, "logps/chosen": -483.80987548828125, "logps/rejected": -498.57293701171875, "loss": 0.5478, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.3945772647857666, "rewards/margins": 0.8723125457763672, "rewards/rejected": -2.2668895721435547, "step": 7330 }, { "epoch": 0.96, "learning_rate": 2.3604044266569426e-08, "logits/chosen": -1.8498961925506592, "logits/rejected": -1.608460783958435, "logps/chosen": -443.71759033203125, "logps/rejected": -481.8394470214844, "loss": 0.5601, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5067075490951538, "rewards/margins": 0.72029709815979, "rewards/rejected": -2.2270047664642334, "step": 7340 }, { "epoch": 0.96, "learning_rate": 2.2063994781468256e-08, "logits/chosen": -1.7632825374603271, "logits/rejected": -1.7024528980255127, "logps/chosen": -417.3409118652344, "logps/rejected": -460.17144775390625, "loss": 0.5216, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.375964641571045, "rewards/margins": 0.7951169610023499, "rewards/rejected": -2.171082019805908, "step": 7350 }, { "epoch": 0.96, "learning_rate": 2.057567230215246e-08, "logits/chosen": -1.9352567195892334, "logits/rejected": -1.86801016330719, "logps/chosen": -435.8123474121094, "logps/rejected": -501.49761962890625, "loss": 0.5404, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.5306956768035889, "rewards/margins": 0.6156077980995178, "rewards/rejected": -2.146303415298462, "step": 7360 }, { "epoch": 0.96, "learning_rate": 1.9139107897409303e-08, "logits/chosen": -1.764081597328186, "logits/rejected": -1.732365369796753, "logps/chosen": -438.1732482910156, "logps/rejected": -472.97088623046875, "loss": 0.4651, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.3602381944656372, "rewards/margins": 0.9602410197257996, "rewards/rejected": -2.320479154586792, "step": 7370 }, { "epoch": 0.97, "learning_rate": 1.7754331555573656e-08, "logits/chosen": -1.9736744165420532, "logits/rejected": -1.8503955602645874, "logps/chosen": -452.91314697265625, "logps/rejected": -552.435791015625, "loss": 0.5093, "rewards/accuracies": 0.75, "rewards/chosen": -1.4927455186843872, "rewards/margins": 0.7449467182159424, "rewards/rejected": -2.237692356109619, "step": 7380 }, { "epoch": 0.97, "learning_rate": 1.642137218390294e-08, "logits/chosen": -1.9133977890014648, "logits/rejected": -1.7161099910736084, "logps/chosen": -445.2350158691406, "logps/rejected": -474.0205078125, "loss": 0.5617, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.501450538635254, "rewards/margins": 0.7882502675056458, "rewards/rejected": -2.289700746536255, "step": 7390 }, { "epoch": 0.97, "learning_rate": 1.514025760797344e-08, "logits/chosen": -2.056525707244873, "logits/rejected": -1.8239011764526367, "logps/chosen": -483.4375, "logps/rejected": -487.8236389160156, "loss": 0.5436, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.394179105758667, "rewards/margins": 0.7199534773826599, "rewards/rejected": -2.1141324043273926, "step": 7400 }, { "epoch": 0.97, "eval_logits/chosen": 0.8129631280899048, "eval_logits/rejected": 0.8540742993354797, "eval_logps/chosen": -430.8633728027344, "eval_logps/rejected": -484.2209167480469, "eval_loss": 0.5485607981681824, "eval_rewards/accuracies": 0.7129999995231628, "eval_rewards/chosen": -1.4557064771652222, "eval_rewards/margins": 0.7478062510490417, "eval_rewards/rejected": -2.203512668609619, "eval_runtime": 1173.1991, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 7400 }, { "epoch": 0.97, "learning_rate": 1.3911014571098835e-08, "logits/chosen": -1.8888267278671265, "logits/rejected": -1.8209940195083618, "logps/chosen": -410.78131103515625, "logps/rejected": -490.9281311035156, "loss": 0.5512, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4821470975875854, "rewards/margins": 0.6978796124458313, "rewards/rejected": -2.1800267696380615, "step": 7410 }, { "epoch": 0.97, "learning_rate": 1.2733668733773685e-08, "logits/chosen": -1.8667023181915283, "logits/rejected": -1.763035774230957, "logps/chosen": -422.2196350097656, "logps/rejected": -472.96563720703125, "loss": 0.4785, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.3575246334075928, "rewards/margins": 0.8752638697624207, "rewards/rejected": -2.232788562774658, "step": 7420 }, { "epoch": 0.97, "learning_rate": 1.160824467313526e-08, "logits/chosen": -1.9163471460342407, "logits/rejected": -1.8418922424316406, "logps/chosen": -469.48895263671875, "logps/rejected": -527.531005859375, "loss": 0.5388, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4518873691558838, "rewards/margins": 0.7550562024116516, "rewards/rejected": -2.2069435119628906, "step": 7430 }, { "epoch": 0.97, "learning_rate": 1.0534765882453113e-08, "logits/chosen": -1.9798256158828735, "logits/rejected": -1.8704360723495483, "logps/chosen": -416.0478515625, "logps/rejected": -476.11407470703125, "loss": 0.5321, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3923460245132446, "rewards/margins": 0.7569142580032349, "rewards/rejected": -2.1492602825164795, "step": 7440 }, { "epoch": 0.97, "learning_rate": 9.513254770636138e-09, "logits/chosen": -1.9356752634048462, "logits/rejected": -1.822356939315796, "logps/chosen": -485.7112731933594, "logps/rejected": -521.7303466796875, "loss": 0.6574, "rewards/accuracies": 0.625, "rewards/chosen": -1.7006072998046875, "rewards/margins": 0.49051332473754883, "rewards/rejected": -2.1911206245422363, "step": 7450 }, { "epoch": 0.98, "learning_rate": 8.543732661767113e-09, "logits/chosen": -1.7195825576782227, "logits/rejected": -1.857712984085083, "logps/chosen": -450.140380859375, "logps/rejected": -496.9141540527344, "loss": 0.6259, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.4755804538726807, "rewards/margins": 0.47876566648483276, "rewards/rejected": -1.9543460607528687, "step": 7460 }, { "epoch": 0.98, "learning_rate": 7.626219794655553e-09, "logits/chosen": -1.7965953350067139, "logits/rejected": -1.7672102451324463, "logps/chosen": -396.3360290527344, "logps/rejected": -501.77081298828125, "loss": 0.4944, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.3167970180511475, "rewards/margins": 0.9213516116142273, "rewards/rejected": -2.2381484508514404, "step": 7470 }, { "epoch": 0.98, "learning_rate": 6.7607353224163896e-09, "logits/chosen": -1.8362220525741577, "logits/rejected": -1.7684568166732788, "logps/chosen": -438.6917419433594, "logps/rejected": -450.4482421875, "loss": 0.5987, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.4603219032287598, "rewards/margins": 0.5805224776268005, "rewards/rejected": -2.040844440460205, "step": 7480 }, { "epoch": 0.98, "learning_rate": 5.947297312070554e-09, "logits/chosen": -1.8304193019866943, "logits/rejected": -1.589911699295044, "logps/chosen": -460.3739318847656, "logps/rejected": -479.26934814453125, "loss": 0.5077, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4384151697158813, "rewards/margins": 0.8745136260986328, "rewards/rejected": -2.3129289150238037, "step": 7490 }, { "epoch": 0.98, "learning_rate": 5.185922744166128e-09, "logits/chosen": -1.8723042011260986, "logits/rejected": -1.8588206768035889, "logps/chosen": -432.6783142089844, "logps/rejected": -502.30999755859375, "loss": 0.4801, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.3651726245880127, "rewards/margins": 0.8920621871948242, "rewards/rejected": -2.257235050201416, "step": 7500 }, { "epoch": 0.98, "eval_logits/chosen": 0.8125240206718445, "eval_logits/rejected": 0.8537938594818115, "eval_logps/chosen": -430.8403625488281, "eval_logps/rejected": -484.1994323730469, "eval_loss": 0.5486313700675964, "eval_rewards/accuracies": 0.7124999761581421, "eval_rewards/chosen": -1.4554764032363892, "eval_rewards/margins": 0.747821569442749, "eval_rewards/rejected": -2.2032980918884277, "eval_runtime": 1172.9523, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 7500 }, { "epoch": 0.98, "learning_rate": 4.476627512425558e-09, "logits/chosen": -1.7662780284881592, "logits/rejected": -1.7995166778564453, "logps/chosen": -437.27130126953125, "logps/rejected": -486.76220703125, "loss": 0.6005, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5092612504959106, "rewards/margins": 0.5584593415260315, "rewards/rejected": -2.067720651626587, "step": 7510 }, { "epoch": 0.98, "learning_rate": 3.819426423412875e-09, "logits/chosen": -1.9472389221191406, "logits/rejected": -1.870298147201538, "logps/chosen": -446.26641845703125, "logps/rejected": -475.03253173828125, "loss": 0.6559, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.4711787700653076, "rewards/margins": 0.6127547025680542, "rewards/rejected": -2.0839333534240723, "step": 7520 }, { "epoch": 0.99, "learning_rate": 3.2143331962256053e-09, "logits/chosen": -1.8179538249969482, "logits/rejected": -1.8011947870254517, "logps/chosen": -432.440185546875, "logps/rejected": -500.1532287597656, "loss": 0.5916, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.3934599161148071, "rewards/margins": 0.6785745024681091, "rewards/rejected": -2.0720343589782715, "step": 7530 }, { "epoch": 0.99, "learning_rate": 2.6613604622066635e-09, "logits/chosen": -1.9298431873321533, "logits/rejected": -1.9188578128814697, "logps/chosen": -392.70599365234375, "logps/rejected": -483.16143798828125, "loss": 0.5142, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1843836307525635, "rewards/margins": 0.7742894887924194, "rewards/rejected": -1.9586732387542725, "step": 7540 }, { "epoch": 0.99, "learning_rate": 2.1605197646826228e-09, "logits/chosen": -1.72048020362854, "logits/rejected": -1.58535635471344, "logps/chosen": -393.91241455078125, "logps/rejected": -453.22698974609375, "loss": 0.5001, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.3305981159210205, "rewards/margins": 0.8685491681098938, "rewards/rejected": -2.1991469860076904, "step": 7550 }, { "epoch": 0.99, "learning_rate": 1.711821558721405e-09, "logits/chosen": -1.9033464193344116, "logits/rejected": -1.7436285018920898, "logps/chosen": -467.17437744140625, "logps/rejected": -472.579833984375, "loss": 0.5372, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4799602031707764, "rewards/margins": 0.6517667770385742, "rewards/rejected": -2.1317269802093506, "step": 7560 }, { "epoch": 0.99, "learning_rate": 1.3152752109149569e-09, "logits/chosen": -1.8958499431610107, "logits/rejected": -1.8162498474121094, "logps/chosen": -447.59515380859375, "logps/rejected": -489.57025146484375, "loss": 0.5856, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.484851360321045, "rewards/margins": 0.6364052891731262, "rewards/rejected": -2.1212563514709473, "step": 7570 }, { "epoch": 0.99, "learning_rate": 9.708889991830173e-10, "logits/chosen": -1.8766504526138306, "logits/rejected": -1.6922956705093384, "logps/chosen": -444.44268798828125, "logps/rejected": -451.81640625, "loss": 0.5367, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.5096843242645264, "rewards/margins": 0.752662181854248, "rewards/rejected": -2.2623465061187744, "step": 7580 }, { "epoch": 0.99, "learning_rate": 6.786701125999218e-10, "logits/chosen": -1.6351743936538696, "logits/rejected": -1.620650053024292, "logps/chosen": -442.44403076171875, "logps/rejected": -487.6390686035156, "loss": 0.6604, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.7403841018676758, "rewards/margins": 0.5352242588996887, "rewards/rejected": -2.275608539581299, "step": 7590 }, { "epoch": 0.99, "learning_rate": 4.3862465124638873e-10, "logits/chosen": -1.736732840538025, "logits/rejected": -1.741463303565979, "logps/chosen": -422.96258544921875, "logps/rejected": -466.79266357421875, "loss": 0.5922, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4744383096694946, "rewards/margins": 0.6053553819656372, "rewards/rejected": -2.0797934532165527, "step": 7600 }, { "epoch": 0.99, "eval_logits/chosen": 0.8123713135719299, "eval_logits/rejected": 0.853736162185669, "eval_logps/chosen": -430.84136962890625, "eval_logps/rejected": -484.1860046386719, "eval_loss": 0.5485937595367432, "eval_rewards/accuracies": 0.7099999785423279, "eval_rewards/chosen": -1.4554866552352905, "eval_rewards/margins": 0.7476763129234314, "eval_rewards/rejected": -2.2031631469726562, "eval_runtime": 1173.62, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 7600 }, { "epoch": 1.0, "learning_rate": 2.507576260799005e-10, "logits/chosen": -2.041153907775879, "logits/rejected": -1.9447238445281982, "logps/chosen": -438.7113342285156, "logps/rejected": -516.343505859375, "loss": 0.4997, "rewards/accuracies": 0.75, "rewards/chosen": -1.3306516408920288, "rewards/margins": 0.8777866363525391, "rewards/rejected": -2.2084383964538574, "step": 7610 }, { "epoch": 1.0, "learning_rate": 1.1507295883145253e-10, "logits/chosen": -1.8992458581924438, "logits/rejected": -1.8089666366577148, "logps/chosen": -424.86865234375, "logps/rejected": -513.1679077148438, "loss": 0.4914, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.2947027683258057, "rewards/margins": 0.8707477450370789, "rewards/rejected": -2.16545033454895, "step": 7620 }, { "epoch": 1.0, "learning_rate": 3.1573481923952156e-11, "logits/chosen": -1.8514108657836914, "logits/rejected": -1.7576173543930054, "logps/chosen": -459.4273986816406, "logps/rejected": -517.269287109375, "loss": 0.5021, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.298668622970581, "rewards/margins": 0.9450668096542358, "rewards/rejected": -2.2437355518341064, "step": 7630 }, { "epoch": 1.0, "learning_rate": 2.609384119889313e-13, "logits/chosen": -1.6424258947372437, "logits/rejected": -1.6611589193344116, "logps/chosen": -414.85186767578125, "logps/rejected": -503.81170654296875, "loss": 0.5105, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.3835017681121826, "rewards/margins": 0.8242697715759277, "rewards/rejected": -2.2077715396881104, "step": 7640 }, { "epoch": 1.0, "step": 7641, "total_flos": 0.0, "train_loss": 0.5756704107174588, "train_runtime": 163576.2451, "train_samples_per_second": 0.374, "train_steps_per_second": 0.047 } ], "logging_steps": 10, "max_steps": 7641, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }