{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998854993048172, "eval_steps": 100, "global_step": 7641, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.535947712418301e-09, "logits/chosen": -2.7937374114990234, "logits/rejected": -2.696331262588501, "logps/chosen": -219.9345245361328, "logps/rejected": -238.54010009765625, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 6.535947712418302e-08, "logits/chosen": -2.686509370803833, "logits/rejected": -2.617267370223999, "logps/chosen": -272.2289123535156, "logps/rejected": -247.8722381591797, "loss": 0.6929, "rewards/accuracies": 0.2222222238779068, "rewards/chosen": 0.00035665329778566957, "rewards/margins": 0.0005560062127187848, "rewards/rejected": -0.00019935290038120002, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.3071895424836603e-07, "logits/chosen": -2.6644845008850098, "logits/rejected": -2.6715245246887207, "logps/chosen": -264.3231201171875, "logps/rejected": -269.95965576171875, "loss": 0.6934, "rewards/accuracies": 0.4375, "rewards/chosen": 2.1979305529384874e-05, "rewards/margins": -0.0005006279679946601, "rewards/rejected": 0.0005226072971709073, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.9607843137254904e-07, "logits/chosen": -2.709402084350586, "logits/rejected": -2.5456981658935547, "logps/chosen": -235.95193481445312, "logps/rejected": -211.9298095703125, "loss": 0.6932, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": 9.268704889109358e-05, "rewards/margins": -0.00016139161016326398, "rewards/rejected": 0.0002540787390898913, "step": 30 }, { "epoch": 0.01, "learning_rate": 2.6143790849673207e-07, "logits/chosen": -2.7728488445281982, "logits/rejected": -2.588787078857422, "logps/chosen": -265.30828857421875, "logps/rejected": -244.3712615966797, "loss": 0.6932, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.00027978085563518107, "rewards/margins": -3.424427268328145e-05, "rewards/rejected": -0.0002455365320201963, "step": 40 }, { "epoch": 0.01, "learning_rate": 3.267973856209151e-07, "logits/chosen": -2.763617515563965, "logits/rejected": -2.7047462463378906, "logps/chosen": -226.91055297851562, "logps/rejected": -226.02536010742188, "loss": 0.6932, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -5.5276301281992346e-05, "rewards/margins": -6.809332262491807e-05, "rewards/rejected": 1.2817062270187307e-05, "step": 50 }, { "epoch": 0.01, "learning_rate": 3.921568627450981e-07, "logits/chosen": -2.7146997451782227, "logits/rejected": -2.6671783924102783, "logps/chosen": -267.3989562988281, "logps/rejected": -241.76480102539062, "loss": 0.6935, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -0.0004219438415020704, "rewards/margins": -0.0006447834894061089, "rewards/rejected": 0.00022283961880020797, "step": 60 }, { "epoch": 0.01, "learning_rate": 4.5751633986928105e-07, "logits/chosen": -2.7232730388641357, "logits/rejected": -2.6747944355010986, "logps/chosen": -245.4066619873047, "logps/rejected": -204.679443359375, "loss": 0.693, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.000368702836567536, "rewards/margins": 0.0002510659396648407, "rewards/rejected": 0.00011763688962673768, "step": 70 }, { "epoch": 0.01, "learning_rate": 5.228758169934641e-07, "logits/chosen": -2.68009614944458, "logits/rejected": -2.608882427215576, "logps/chosen": -289.00653076171875, "logps/rejected": -281.58251953125, "loss": 0.6927, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.0005034831119701266, "rewards/margins": 0.0008431966416537762, "rewards/rejected": -0.00033971358789131045, "step": 80 }, { "epoch": 0.01, "learning_rate": 5.882352941176471e-07, "logits/chosen": -2.799229145050049, "logits/rejected": -2.685251474380493, "logps/chosen": -254.7516632080078, "logps/rejected": -224.9294891357422, "loss": 0.693, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.00010115172335645184, "rewards/margins": 0.0002790264261420816, "rewards/rejected": -0.000380178214982152, "step": 90 }, { "epoch": 0.01, "learning_rate": 6.535947712418302e-07, "logits/chosen": -2.6838011741638184, "logits/rejected": -2.681504726409912, "logps/chosen": -228.4124298095703, "logps/rejected": -235.77685546875, "loss": 0.6934, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.0002754587621893734, "rewards/margins": -0.0004044932429678738, "rewards/rejected": 0.00012903442257083952, "step": 100 }, { "epoch": 0.01, "eval_logits/chosen": -2.526097059249878, "eval_logits/rejected": -2.438264846801758, "eval_logps/chosen": -268.4692077636719, "eval_logps/rejected": -248.5731201171875, "eval_loss": 0.6930866241455078, "eval_rewards/accuracies": 0.5105000138282776, "eval_rewards/chosen": 0.00020805322856176645, "eval_rewards/margins": 0.00012786558363586664, "eval_rewards/rejected": 8.018761582206935e-05, "eval_runtime": 1145.3814, "eval_samples_per_second": 1.746, "eval_steps_per_second": 0.873, "step": 100 }, { "epoch": 0.01, "learning_rate": 7.189542483660131e-07, "logits/chosen": -2.694509506225586, "logits/rejected": -2.6128382682800293, "logps/chosen": -269.1925048828125, "logps/rejected": -250.8614044189453, "loss": 0.693, "rewards/accuracies": 0.512499988079071, "rewards/chosen": 0.0004071092698723078, "rewards/margins": 0.00034786976175382733, "rewards/rejected": 5.923947173869237e-05, "step": 110 }, { "epoch": 0.02, "learning_rate": 7.843137254901962e-07, "logits/chosen": -2.7306885719299316, "logits/rejected": -2.6021299362182617, "logps/chosen": -287.8159484863281, "logps/rejected": -229.77554321289062, "loss": 0.6927, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0005465588765218854, "rewards/margins": 0.0008666679495945573, "rewards/rejected": -0.0003201091312803328, "step": 120 }, { "epoch": 0.02, "learning_rate": 8.496732026143792e-07, "logits/chosen": -2.693432569503784, "logits/rejected": -2.6983115673065186, "logps/chosen": -251.46865844726562, "logps/rejected": -224.0875244140625, "loss": 0.6926, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0007204865687526762, "rewards/margins": 0.001154972007498145, "rewards/rejected": -0.0004344852641224861, "step": 130 }, { "epoch": 0.02, "learning_rate": 9.150326797385621e-07, "logits/chosen": -2.65818452835083, "logits/rejected": -2.5886805057525635, "logps/chosen": -259.5521545410156, "logps/rejected": -231.7545928955078, "loss": 0.6932, "rewards/accuracies": 0.512499988079071, "rewards/chosen": 0.00015563381020911038, "rewards/margins": -8.64746980369091e-05, "rewards/rejected": 0.0002421085664536804, "step": 140 }, { "epoch": 0.02, "learning_rate": 9.80392156862745e-07, "logits/chosen": -2.792015314102173, "logits/rejected": -2.6346828937530518, "logps/chosen": -317.06451416015625, "logps/rejected": -283.6150817871094, "loss": 0.6927, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.000999806565232575, "rewards/margins": 0.0009337253868579865, "rewards/rejected": 6.60812875139527e-05, "step": 150 }, { "epoch": 0.02, "learning_rate": 1.0457516339869283e-06, "logits/chosen": -2.8084640502929688, "logits/rejected": -2.663980007171631, "logps/chosen": -247.31594848632812, "logps/rejected": -224.99697875976562, "loss": 0.6936, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0001547201827634126, "rewards/margins": -0.0008144931052811444, "rewards/rejected": 0.0006597728352062404, "step": 160 }, { "epoch": 0.02, "learning_rate": 1.111111111111111e-06, "logits/chosen": -2.684959650039673, "logits/rejected": -2.617687940597534, "logps/chosen": -228.97103881835938, "logps/rejected": -213.688232421875, "loss": 0.693, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0012499226722866297, "rewards/margins": 0.0002835305640473962, "rewards/rejected": 0.0009663921082392335, "step": 170 }, { "epoch": 0.02, "learning_rate": 1.1764705882352942e-06, "logits/chosen": -2.8306772708892822, "logits/rejected": -2.568779468536377, "logps/chosen": -329.3360900878906, "logps/rejected": -258.120361328125, "loss": 0.6928, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": 0.0013785558985546231, "rewards/margins": 0.0007068994455039501, "rewards/rejected": 0.000671656453050673, "step": 180 }, { "epoch": 0.02, "learning_rate": 1.2418300653594772e-06, "logits/chosen": -2.6136114597320557, "logits/rejected": -2.5621001720428467, "logps/chosen": -249.232421875, "logps/rejected": -210.09970092773438, "loss": 0.6922, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.002042675856500864, "rewards/margins": 0.0018692022422328591, "rewards/rejected": 0.00017347374523524195, "step": 190 }, { "epoch": 0.03, "learning_rate": 1.3071895424836604e-06, "logits/chosen": -2.7648887634277344, "logits/rejected": -2.6305103302001953, "logps/chosen": -245.13864135742188, "logps/rejected": -249.7960968017578, "loss": 0.6924, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0012468935456126928, "rewards/margins": 0.0015361621044576168, "rewards/rejected": -0.0002892684715334326, "step": 200 }, { "epoch": 0.03, "eval_logits/chosen": -2.5247368812561035, "eval_logits/rejected": -2.436842918395996, "eval_logps/chosen": -268.3450927734375, "eval_logps/rejected": -248.5510711669922, "eval_loss": 0.6925778388977051, "eval_rewards/accuracies": 0.5605000257492065, "eval_rewards/chosen": 0.0014493772760033607, "eval_rewards/margins": 0.001148571027442813, "eval_rewards/rejected": 0.000300806452287361, "eval_runtime": 1160.8989, "eval_samples_per_second": 1.723, "eval_steps_per_second": 0.861, "step": 200 }, { "epoch": 0.03, "learning_rate": 1.3725490196078434e-06, "logits/chosen": -2.7637641429901123, "logits/rejected": -2.705857515335083, "logps/chosen": -267.3808898925781, "logps/rejected": -244.5528564453125, "loss": 0.6921, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.00269283726811409, "rewards/margins": 0.0021984183695167303, "rewards/rejected": 0.0004944190150126815, "step": 210 }, { "epoch": 0.03, "learning_rate": 1.4379084967320261e-06, "logits/chosen": -2.667719602584839, "logits/rejected": -2.578129291534424, "logps/chosen": -267.173095703125, "logps/rejected": -245.6712646484375, "loss": 0.692, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.0021157357841730118, "rewards/margins": 0.0022508525289595127, "rewards/rejected": -0.0001351167302345857, "step": 220 }, { "epoch": 0.03, "learning_rate": 1.5032679738562091e-06, "logits/chosen": -2.6373825073242188, "logits/rejected": -2.600693941116333, "logps/chosen": -218.6166534423828, "logps/rejected": -262.50567626953125, "loss": 0.6921, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0025270141195505857, "rewards/margins": 0.0020475280471146107, "rewards/rejected": 0.0004794862470589578, "step": 230 }, { "epoch": 0.03, "learning_rate": 1.5686274509803923e-06, "logits/chosen": -2.6723687648773193, "logits/rejected": -2.6017861366271973, "logps/chosen": -234.77505493164062, "logps/rejected": -253.36123657226562, "loss": 0.692, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.002189940307289362, "rewards/margins": 0.0022566465195268393, "rewards/rejected": -6.670653237961233e-05, "step": 240 }, { "epoch": 0.03, "learning_rate": 1.6339869281045753e-06, "logits/chosen": -2.7224369049072266, "logits/rejected": -2.5702614784240723, "logps/chosen": -301.28271484375, "logps/rejected": -258.6485900878906, "loss": 0.6917, "rewards/accuracies": 0.5625, "rewards/chosen": 0.003571895882487297, "rewards/margins": 0.0029362873174250126, "rewards/rejected": 0.0006356079829856753, "step": 250 }, { "epoch": 0.03, "learning_rate": 1.6993464052287585e-06, "logits/chosen": -2.75251841545105, "logits/rejected": -2.6484122276306152, "logps/chosen": -279.2475280761719, "logps/rejected": -247.06002807617188, "loss": 0.6915, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.004341104533523321, "rewards/margins": 0.0033817340154200792, "rewards/rejected": 0.0009593702852725983, "step": 260 }, { "epoch": 0.04, "learning_rate": 1.7647058823529414e-06, "logits/chosen": -2.6575112342834473, "logits/rejected": -2.5978779792785645, "logps/chosen": -235.12271118164062, "logps/rejected": -213.41455078125, "loss": 0.6907, "rewards/accuracies": 0.6875, "rewards/chosen": 0.004421571735292673, "rewards/margins": 0.00482561532407999, "rewards/rejected": -0.00040404353057965636, "step": 270 }, { "epoch": 0.04, "learning_rate": 1.8300653594771242e-06, "logits/chosen": -2.789543867111206, "logits/rejected": -2.6499438285827637, "logps/chosen": -304.09271240234375, "logps/rejected": -272.1944274902344, "loss": 0.6908, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.006846042815595865, "rewards/margins": 0.004689323715865612, "rewards/rejected": 0.0021567184012383223, "step": 280 }, { "epoch": 0.04, "learning_rate": 1.8954248366013072e-06, "logits/chosen": -2.6990113258361816, "logits/rejected": -2.6313693523406982, "logps/chosen": -274.0718688964844, "logps/rejected": -256.3681335449219, "loss": 0.6911, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.008100202307105064, "rewards/margins": 0.0040775686502456665, "rewards/rejected": 0.004022633656859398, "step": 290 }, { "epoch": 0.04, "learning_rate": 1.96078431372549e-06, "logits/chosen": -2.8378288745880127, "logits/rejected": -2.6756560802459717, "logps/chosen": -286.62457275390625, "logps/rejected": -235.5787811279297, "loss": 0.691, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.007958942092955112, "rewards/margins": 0.004316599573940039, "rewards/rejected": 0.00364234228618443, "step": 300 }, { "epoch": 0.04, "eval_logits/chosen": -2.5252809524536133, "eval_logits/rejected": -2.4377505779266357, "eval_logps/chosen": -267.5838928222656, "eval_logps/rejected": -248.17530822753906, "eval_loss": 0.6906724572181702, "eval_rewards/accuracies": 0.6439999938011169, "eval_rewards/chosen": 0.009060990996658802, "eval_rewards/margins": 0.005002738442271948, "eval_rewards/rejected": 0.004058253485709429, "eval_runtime": 1152.1824, "eval_samples_per_second": 1.736, "eval_steps_per_second": 0.868, "step": 300 }, { "epoch": 0.04, "learning_rate": 2.0261437908496734e-06, "logits/chosen": -2.657287120819092, "logits/rejected": -2.624457836151123, "logps/chosen": -276.63470458984375, "logps/rejected": -271.01629638671875, "loss": 0.6908, "rewards/accuracies": 0.625, "rewards/chosen": 0.008446435444056988, "rewards/margins": 0.004816326312720776, "rewards/rejected": 0.003630108432844281, "step": 310 }, { "epoch": 0.04, "learning_rate": 2.0915032679738565e-06, "logits/chosen": -2.745816707611084, "logits/rejected": -2.5970892906188965, "logps/chosen": -250.56851196289062, "logps/rejected": -241.47689819335938, "loss": 0.6909, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.009846082888543606, "rewards/margins": 0.004527992103248835, "rewards/rejected": 0.005318091716617346, "step": 320 }, { "epoch": 0.04, "learning_rate": 2.1568627450980393e-06, "logits/chosen": -2.8040456771850586, "logits/rejected": -2.6397745609283447, "logps/chosen": -284.6964416503906, "logps/rejected": -238.743408203125, "loss": 0.6906, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.01216509472578764, "rewards/margins": 0.005259203724563122, "rewards/rejected": 0.006905891001224518, "step": 330 }, { "epoch": 0.04, "learning_rate": 2.222222222222222e-06, "logits/chosen": -2.731226682662964, "logits/rejected": -2.5713067054748535, "logps/chosen": -256.9880676269531, "logps/rejected": -214.75692749023438, "loss": 0.6895, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.013420146889984608, "rewards/margins": 0.007420173846185207, "rewards/rejected": 0.005999973509460688, "step": 340 }, { "epoch": 0.05, "learning_rate": 2.2875816993464053e-06, "logits/chosen": -2.7683897018432617, "logits/rejected": -2.619065761566162, "logps/chosen": -305.935546875, "logps/rejected": -253.32894897460938, "loss": 0.688, "rewards/accuracies": 0.6875, "rewards/chosen": 0.018984589725732803, "rewards/margins": 0.010493551380932331, "rewards/rejected": 0.008491038344800472, "step": 350 }, { "epoch": 0.05, "learning_rate": 2.3529411764705885e-06, "logits/chosen": -2.7598016262054443, "logits/rejected": -2.6604552268981934, "logps/chosen": -247.54931640625, "logps/rejected": -245.3474578857422, "loss": 0.6894, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0182194784283638, "rewards/margins": 0.0077258930541574955, "rewards/rejected": 0.010493585839867592, "step": 360 }, { "epoch": 0.05, "learning_rate": 2.4183006535947716e-06, "logits/chosen": -2.6800496578216553, "logits/rejected": -2.684868812561035, "logps/chosen": -250.73703002929688, "logps/rejected": -229.0603485107422, "loss": 0.6881, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.021059587597846985, "rewards/margins": 0.01036703772842884, "rewards/rejected": 0.010692549869418144, "step": 370 }, { "epoch": 0.05, "learning_rate": 2.4836601307189544e-06, "logits/chosen": -2.7361929416656494, "logits/rejected": -2.643367290496826, "logps/chosen": -272.9651794433594, "logps/rejected": -239.2753143310547, "loss": 0.6851, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.025732994079589844, "rewards/margins": 0.01641467772424221, "rewards/rejected": 0.009318319149315357, "step": 380 }, { "epoch": 0.05, "learning_rate": 2.549019607843137e-06, "logits/chosen": -2.7081408500671387, "logits/rejected": -2.5116286277770996, "logps/chosen": -286.8047790527344, "logps/rejected": -231.5174560546875, "loss": 0.6823, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.03527464345097542, "rewards/margins": 0.02219114825129509, "rewards/rejected": 0.013083499856293201, "step": 390 }, { "epoch": 0.05, "learning_rate": 2.6143790849673208e-06, "logits/chosen": -2.7479846477508545, "logits/rejected": -2.771252155303955, "logps/chosen": -272.4174499511719, "logps/rejected": -283.92108154296875, "loss": 0.6876, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03528792783617973, "rewards/margins": 0.011549949645996094, "rewards/rejected": 0.02373797819018364, "step": 400 }, { "epoch": 0.05, "eval_logits/chosen": -2.522955894470215, "eval_logits/rejected": -2.4351353645324707, "eval_logps/chosen": -264.435302734375, "eval_logps/rejected": -246.30886840820312, "eval_loss": 0.6845206022262573, "eval_rewards/accuracies": 0.6579999923706055, "eval_rewards/chosen": 0.04054699465632439, "eval_rewards/margins": 0.017824340611696243, "eval_rewards/rejected": 0.022722657769918442, "eval_runtime": 1160.5462, "eval_samples_per_second": 1.723, "eval_steps_per_second": 0.862, "step": 400 }, { "epoch": 0.05, "learning_rate": 2.6797385620915036e-06, "logits/chosen": -2.6627039909362793, "logits/rejected": -2.5389530658721924, "logps/chosen": -244.06640625, "logps/rejected": -203.2464599609375, "loss": 0.6848, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.040567509829998016, "rewards/margins": 0.01748758926987648, "rewards/rejected": 0.023079920560121536, "step": 410 }, { "epoch": 0.05, "learning_rate": 2.7450980392156867e-06, "logits/chosen": -2.6784210205078125, "logits/rejected": -2.6046371459960938, "logps/chosen": -261.45135498046875, "logps/rejected": -262.4620666503906, "loss": 0.6811, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.049992240965366364, "rewards/margins": 0.024862922728061676, "rewards/rejected": 0.025129318237304688, "step": 420 }, { "epoch": 0.06, "learning_rate": 2.8104575163398695e-06, "logits/chosen": -2.731562852859497, "logits/rejected": -2.607314109802246, "logps/chosen": -264.93365478515625, "logps/rejected": -252.49789428710938, "loss": 0.6815, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.04952297732234001, "rewards/margins": 0.02442570962011814, "rewards/rejected": 0.02509726583957672, "step": 430 }, { "epoch": 0.06, "learning_rate": 2.8758169934640523e-06, "logits/chosen": -2.7449309825897217, "logits/rejected": -2.7042744159698486, "logps/chosen": -255.37319946289062, "logps/rejected": -230.885009765625, "loss": 0.6847, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.05002352595329285, "rewards/margins": 0.017672471702098846, "rewards/rejected": 0.032351054251194, "step": 440 }, { "epoch": 0.06, "learning_rate": 2.9411764705882355e-06, "logits/chosen": -2.7228033542633057, "logits/rejected": -2.7313358783721924, "logps/chosen": -272.48297119140625, "logps/rejected": -288.769287109375, "loss": 0.684, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.05112846940755844, "rewards/margins": 0.019452670589089394, "rewards/rejected": 0.031675804406404495, "step": 450 }, { "epoch": 0.06, "learning_rate": 3.0065359477124182e-06, "logits/chosen": -2.6720528602600098, "logits/rejected": -2.554985761642456, "logps/chosen": -238.60134887695312, "logps/rejected": -231.50741577148438, "loss": 0.6795, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.06515996903181076, "rewards/margins": 0.028682807460427284, "rewards/rejected": 0.036477167159318924, "step": 460 }, { "epoch": 0.06, "learning_rate": 3.071895424836602e-06, "logits/chosen": -2.6872806549072266, "logits/rejected": -2.650778293609619, "logps/chosen": -258.9559020996094, "logps/rejected": -241.73696899414062, "loss": 0.6749, "rewards/accuracies": 0.625, "rewards/chosen": 0.0744255781173706, "rewards/margins": 0.038194023072719574, "rewards/rejected": 0.03623156249523163, "step": 470 }, { "epoch": 0.06, "learning_rate": 3.1372549019607846e-06, "logits/chosen": -2.6812539100646973, "logits/rejected": -2.6287622451782227, "logps/chosen": -267.0392761230469, "logps/rejected": -225.1342010498047, "loss": 0.6688, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.07361556589603424, "rewards/margins": 0.051149915903806686, "rewards/rejected": 0.022465649992227554, "step": 480 }, { "epoch": 0.06, "learning_rate": 3.2026143790849674e-06, "logits/chosen": -2.7315773963928223, "logits/rejected": -2.561474561691284, "logps/chosen": -257.4845886230469, "logps/rejected": -213.83639526367188, "loss": 0.6674, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.05385025218129158, "rewards/margins": 0.05505413934588432, "rewards/rejected": -0.0012038892600685358, "step": 490 }, { "epoch": 0.07, "learning_rate": 3.2679738562091506e-06, "logits/chosen": -2.6467950344085693, "logits/rejected": -2.5683364868164062, "logps/chosen": -237.36563110351562, "logps/rejected": -221.3732452392578, "loss": 0.6799, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.02083980292081833, "rewards/margins": 0.029936185106635094, "rewards/rejected": -0.009096382185816765, "step": 500 }, { "epoch": 0.07, "eval_logits/chosen": -2.4660205841064453, "eval_logits/rejected": -2.375483989715576, "eval_logps/chosen": -264.949462890625, "eval_logps/rejected": -249.92762756347656, "eval_loss": 0.6707118153572083, "eval_rewards/accuracies": 0.6815000176429749, "eval_rewards/chosen": 0.035405587404966354, "eval_rewards/margins": 0.04887029901146889, "eval_rewards/rejected": -0.013464723713696003, "eval_runtime": 1161.9283, "eval_samples_per_second": 1.721, "eval_steps_per_second": 0.861, "step": 500 }, { "epoch": 0.07, "learning_rate": 3.3333333333333333e-06, "logits/chosen": -2.557415008544922, "logits/rejected": -2.587461471557617, "logps/chosen": -248.0208740234375, "logps/rejected": -248.16250610351562, "loss": 0.676, "rewards/accuracies": 0.6875, "rewards/chosen": 0.02980933152139187, "rewards/margins": 0.03878789395093918, "rewards/rejected": -0.008978564292192459, "step": 510 }, { "epoch": 0.07, "learning_rate": 3.398692810457517e-06, "logits/chosen": -2.5861740112304688, "logits/rejected": -2.4508297443389893, "logps/chosen": -242.4407196044922, "logps/rejected": -246.52182006835938, "loss": 0.6693, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.01556556485593319, "rewards/margins": 0.05277082324028015, "rewards/rejected": -0.03720525652170181, "step": 520 }, { "epoch": 0.07, "learning_rate": 3.4640522875816997e-06, "logits/chosen": -2.627671957015991, "logits/rejected": -2.5770435333251953, "logps/chosen": -247.16738891601562, "logps/rejected": -231.6279754638672, "loss": 0.6649, "rewards/accuracies": 0.6875, "rewards/chosen": 0.014550316147506237, "rewards/margins": 0.06174767017364502, "rewards/rejected": -0.047197360545396805, "step": 530 }, { "epoch": 0.07, "learning_rate": 3.529411764705883e-06, "logits/chosen": -2.699380874633789, "logits/rejected": -2.5035440921783447, "logps/chosen": -272.3833923339844, "logps/rejected": -255.84744262695312, "loss": 0.6618, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.0020259805023670197, "rewards/margins": 0.06897115707397461, "rewards/rejected": -0.06694517284631729, "step": 540 }, { "epoch": 0.07, "learning_rate": 3.5947712418300657e-06, "logits/chosen": -2.6517231464385986, "logits/rejected": -2.578498125076294, "logps/chosen": -242.3655548095703, "logps/rejected": -233.637939453125, "loss": 0.6476, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.020458657294511795, "rewards/margins": 0.10107719898223877, "rewards/rejected": -0.08061854541301727, "step": 550 }, { "epoch": 0.07, "learning_rate": 3.6601307189542484e-06, "logits/chosen": -2.6894688606262207, "logits/rejected": -2.506308078765869, "logps/chosen": -277.38946533203125, "logps/rejected": -297.1171569824219, "loss": 0.6589, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.031552791595458984, "rewards/margins": 0.07832719385623932, "rewards/rejected": -0.10987997055053711, "step": 560 }, { "epoch": 0.07, "learning_rate": 3.7254901960784316e-06, "logits/chosen": -2.7173991203308105, "logits/rejected": -2.6116385459899902, "logps/chosen": -262.6662902832031, "logps/rejected": -255.9048309326172, "loss": 0.6584, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.06762684136629105, "rewards/margins": 0.077293761074543, "rewards/rejected": -0.14492060244083405, "step": 570 }, { "epoch": 0.08, "learning_rate": 3.7908496732026144e-06, "logits/chosen": -2.681260347366333, "logits/rejected": -2.4548838138580322, "logps/chosen": -302.3983154296875, "logps/rejected": -283.51959228515625, "loss": 0.6385, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.025161724537611008, "rewards/margins": 0.125971257686615, "rewards/rejected": -0.1511329710483551, "step": 580 }, { "epoch": 0.08, "learning_rate": 3.856209150326798e-06, "logits/chosen": -2.656480312347412, "logits/rejected": -2.4974989891052246, "logps/chosen": -286.38043212890625, "logps/rejected": -257.6932678222656, "loss": 0.6432, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05199650675058365, "rewards/margins": 0.11849125474691391, "rewards/rejected": -0.17048776149749756, "step": 590 }, { "epoch": 0.08, "learning_rate": 3.92156862745098e-06, "logits/chosen": -2.5832934379577637, "logits/rejected": -2.4160356521606445, "logps/chosen": -245.601806640625, "logps/rejected": -255.2142333984375, "loss": 0.6577, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.1271064281463623, "rewards/margins": 0.09216101467609406, "rewards/rejected": -0.21926744282245636, "step": 600 }, { "epoch": 0.08, "eval_logits/chosen": -2.360114097595215, "eval_logits/rejected": -2.2541239261627197, "eval_logps/chosen": -280.78851318359375, "eval_logps/rejected": -272.36041259765625, "eval_loss": 0.6461644172668457, "eval_rewards/accuracies": 0.675000011920929, "eval_rewards/chosen": -0.12298478186130524, "eval_rewards/margins": 0.11480776965618134, "eval_rewards/rejected": -0.23779255151748657, "eval_runtime": 1158.9282, "eval_samples_per_second": 1.726, "eval_steps_per_second": 0.863, "step": 600 }, { "epoch": 0.08, "learning_rate": 3.986928104575164e-06, "logits/chosen": -2.62365460395813, "logits/rejected": -2.392054557800293, "logps/chosen": -231.8965606689453, "logps/rejected": -213.38131713867188, "loss": 0.6325, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05727694183588028, "rewards/margins": 0.13814638555049896, "rewards/rejected": -0.19542333483695984, "step": 610 }, { "epoch": 0.08, "learning_rate": 4.052287581699347e-06, "logits/chosen": -2.64388108253479, "logits/rejected": -2.4387760162353516, "logps/chosen": -285.2777404785156, "logps/rejected": -278.3298645019531, "loss": 0.6246, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07192676514387131, "rewards/margins": 0.16072218120098114, "rewards/rejected": -0.23264892399311066, "step": 620 }, { "epoch": 0.08, "learning_rate": 4.11764705882353e-06, "logits/chosen": -2.4769465923309326, "logits/rejected": -2.419826030731201, "logps/chosen": -278.97564697265625, "logps/rejected": -272.57269287109375, "loss": 0.6501, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.16124705970287323, "rewards/margins": 0.11454138904809952, "rewards/rejected": -0.27578845620155334, "step": 630 }, { "epoch": 0.08, "learning_rate": 4.183006535947713e-06, "logits/chosen": -2.6546480655670166, "logits/rejected": -2.4742162227630615, "logps/chosen": -272.10870361328125, "logps/rejected": -273.37518310546875, "loss": 0.6526, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.15491154789924622, "rewards/margins": 0.1191616877913475, "rewards/rejected": -0.27407321333885193, "step": 640 }, { "epoch": 0.09, "learning_rate": 4.2483660130718954e-06, "logits/chosen": -2.5434234142303467, "logits/rejected": -2.544323682785034, "logps/chosen": -271.45587158203125, "logps/rejected": -271.0767517089844, "loss": 0.6503, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.1807040274143219, "rewards/margins": 0.1250898838043213, "rewards/rejected": -0.3057938814163208, "step": 650 }, { "epoch": 0.09, "learning_rate": 4.313725490196079e-06, "logits/chosen": -2.596226215362549, "logits/rejected": -2.439690113067627, "logps/chosen": -318.5782775878906, "logps/rejected": -278.7407531738281, "loss": 0.6247, "rewards/accuracies": 0.6875, "rewards/chosen": -0.22762902081012726, "rewards/margins": 0.16828063130378723, "rewards/rejected": -0.3959096074104309, "step": 660 }, { "epoch": 0.09, "learning_rate": 4.379084967320262e-06, "logits/chosen": -2.6381888389587402, "logits/rejected": -2.522258758544922, "logps/chosen": -300.07244873046875, "logps/rejected": -334.3465270996094, "loss": 0.6205, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.18682761490345, "rewards/margins": 0.18621641397476196, "rewards/rejected": -0.3730439841747284, "step": 670 }, { "epoch": 0.09, "learning_rate": 4.444444444444444e-06, "logits/chosen": -2.663062572479248, "logits/rejected": -2.5052947998046875, "logps/chosen": -281.8200988769531, "logps/rejected": -277.8845520019531, "loss": 0.6352, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13653087615966797, "rewards/margins": 0.1511278599500656, "rewards/rejected": -0.2876587510108948, "step": 680 }, { "epoch": 0.09, "learning_rate": 4.509803921568628e-06, "logits/chosen": -2.664280652999878, "logits/rejected": -2.524028778076172, "logps/chosen": -309.06658935546875, "logps/rejected": -282.0691223144531, "loss": 0.6352, "rewards/accuracies": 0.6875, "rewards/chosen": -0.15293274819850922, "rewards/margins": 0.15575169026851654, "rewards/rejected": -0.30868446826934814, "step": 690 }, { "epoch": 0.09, "learning_rate": 4.5751633986928105e-06, "logits/chosen": -2.6197104454040527, "logits/rejected": -2.5259227752685547, "logps/chosen": -279.31195068359375, "logps/rejected": -282.20111083984375, "loss": 0.6365, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.11969868838787079, "rewards/margins": 0.15021947026252747, "rewards/rejected": -0.26991817355155945, "step": 700 }, { "epoch": 0.09, "eval_logits/chosen": -2.31355619430542, "eval_logits/rejected": -2.2012522220611572, "eval_logps/chosen": -277.0453186035156, "eval_logps/rejected": -272.20367431640625, "eval_loss": 0.6344681978225708, "eval_rewards/accuracies": 0.6859999895095825, "eval_rewards/chosen": -0.08555291593074799, "eval_rewards/margins": 0.15067268908023834, "eval_rewards/rejected": -0.23622561991214752, "eval_runtime": 1159.7861, "eval_samples_per_second": 1.724, "eval_steps_per_second": 0.862, "step": 700 }, { "epoch": 0.09, "learning_rate": 4.640522875816994e-06, "logits/chosen": -2.5975005626678467, "logits/rejected": -2.508746862411499, "logps/chosen": -281.31512451171875, "logps/rejected": -281.1041259765625, "loss": 0.6322, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.062341589480638504, "rewards/margins": 0.1610972285270691, "rewards/rejected": -0.2234388291835785, "step": 710 }, { "epoch": 0.09, "learning_rate": 4.705882352941177e-06, "logits/chosen": -2.6123766899108887, "logits/rejected": -2.5293030738830566, "logps/chosen": -323.938720703125, "logps/rejected": -315.38092041015625, "loss": 0.624, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.09480178356170654, "rewards/margins": 0.18506909906864166, "rewards/rejected": -0.2798708975315094, "step": 720 }, { "epoch": 0.1, "learning_rate": 4.77124183006536e-06, "logits/chosen": -2.605785369873047, "logits/rejected": -2.518705129623413, "logps/chosen": -289.82965087890625, "logps/rejected": -295.834228515625, "loss": 0.6195, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12233264744281769, "rewards/margins": 0.18506425619125366, "rewards/rejected": -0.30739688873291016, "step": 730 }, { "epoch": 0.1, "learning_rate": 4.836601307189543e-06, "logits/chosen": -2.632423162460327, "logits/rejected": -2.4382903575897217, "logps/chosen": -304.190185546875, "logps/rejected": -279.628662109375, "loss": 0.6155, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10397078841924667, "rewards/margins": 0.20101885497570038, "rewards/rejected": -0.30498963594436646, "step": 740 }, { "epoch": 0.1, "learning_rate": 4.901960784313726e-06, "logits/chosen": -2.450904369354248, "logits/rejected": -2.3369948863983154, "logps/chosen": -257.42974853515625, "logps/rejected": -265.8070373535156, "loss": 0.6048, "rewards/accuracies": 0.6875, "rewards/chosen": -0.1934954673051834, "rewards/margins": 0.22951745986938477, "rewards/rejected": -0.423012912273407, "step": 750 }, { "epoch": 0.1, "learning_rate": 4.967320261437909e-06, "logits/chosen": -2.5839266777038574, "logits/rejected": -2.357056140899658, "logps/chosen": -303.64654541015625, "logps/rejected": -279.87774658203125, "loss": 0.6236, "rewards/accuracies": 0.6875, "rewards/chosen": -0.32853788137435913, "rewards/margins": 0.19128181040287018, "rewards/rejected": -0.5198196172714233, "step": 760 }, { "epoch": 0.1, "learning_rate": 4.999993476542427e-06, "logits/chosen": -2.515817165374756, "logits/rejected": -2.486161708831787, "logps/chosen": -327.0534973144531, "logps/rejected": -329.8384704589844, "loss": 0.6198, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4592045247554779, "rewards/margins": 0.23688983917236328, "rewards/rejected": -0.6960943937301636, "step": 770 }, { "epoch": 0.1, "learning_rate": 4.999941289086112e-06, "logits/chosen": -2.5871434211730957, "logits/rejected": -2.2986748218536377, "logps/chosen": -352.54925537109375, "logps/rejected": -345.39727783203125, "loss": 0.602, "rewards/accuracies": 0.6875, "rewards/chosen": -0.615364134311676, "rewards/margins": 0.27003955841064453, "rewards/rejected": -0.8854037523269653, "step": 780 }, { "epoch": 0.1, "learning_rate": 4.999836915262896e-06, "logits/chosen": -2.4022507667541504, "logits/rejected": -2.423859119415283, "logps/chosen": -341.8403625488281, "logps/rejected": -363.97869873046875, "loss": 0.6102, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.6583345532417297, "rewards/margins": 0.2388831377029419, "rewards/rejected": -0.8972176313400269, "step": 790 }, { "epoch": 0.1, "learning_rate": 4.999680357251587e-06, "logits/chosen": -2.2201342582702637, "logits/rejected": -2.259474277496338, "logps/chosen": -311.77374267578125, "logps/rejected": -336.84295654296875, "loss": 0.6519, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.613008975982666, "rewards/margins": 0.15706071257591248, "rewards/rejected": -0.7700697183609009, "step": 800 }, { "epoch": 0.1, "eval_logits/chosen": -2.183530330657959, "eval_logits/rejected": -2.04819917678833, "eval_logps/chosen": -317.9223327636719, "eval_logps/rejected": -320.8871765136719, "eval_loss": 0.6239581108093262, "eval_rewards/accuracies": 0.6629999876022339, "eval_rewards/chosen": -0.49432334303855896, "eval_rewards/margins": 0.22873705625534058, "eval_rewards/rejected": -0.7230603098869324, "eval_runtime": 1149.5976, "eval_samples_per_second": 1.74, "eval_steps_per_second": 0.87, "step": 800 }, { "epoch": 0.11, "learning_rate": 4.999471618320339e-06, "logits/chosen": -2.5523288249969482, "logits/rejected": -2.320265054702759, "logps/chosen": -328.2205810546875, "logps/rejected": -322.53076171875, "loss": 0.6069, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4844594895839691, "rewards/margins": 0.2521725296974182, "rewards/rejected": -0.7366319894790649, "step": 810 }, { "epoch": 0.11, "learning_rate": 4.999210702826586e-06, "logits/chosen": -2.6358237266540527, "logits/rejected": -2.4474587440490723, "logps/chosen": -346.6995849609375, "logps/rejected": -324.92242431640625, "loss": 0.6221, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.4281977117061615, "rewards/margins": 0.23045018315315247, "rewards/rejected": -0.658647894859314, "step": 820 }, { "epoch": 0.11, "learning_rate": 4.998897616216947e-06, "logits/chosen": -2.4511475563049316, "logits/rejected": -2.5670619010925293, "logps/chosen": -258.81573486328125, "logps/rejected": -311.01043701171875, "loss": 0.6131, "rewards/accuracies": 0.6875, "rewards/chosen": -0.3054274618625641, "rewards/margins": 0.24654905498027802, "rewards/rejected": -0.5519765019416809, "step": 830 }, { "epoch": 0.11, "learning_rate": 4.998532365027117e-06, "logits/chosen": -2.4918437004089355, "logits/rejected": -2.2926580905914307, "logps/chosen": -311.8231201171875, "logps/rejected": -280.79620361328125, "loss": 0.5902, "rewards/accuracies": 0.75, "rewards/chosen": -0.2812344431877136, "rewards/margins": 0.2974868416786194, "rewards/rejected": -0.5787213444709778, "step": 840 }, { "epoch": 0.11, "learning_rate": 4.9981149568817275e-06, "logits/chosen": -2.498668909072876, "logits/rejected": -2.4396538734436035, "logps/chosen": -313.8962707519531, "logps/rejected": -356.46392822265625, "loss": 0.6126, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.4118213653564453, "rewards/margins": 0.26523926854133606, "rewards/rejected": -0.677060604095459, "step": 850 }, { "epoch": 0.11, "learning_rate": 4.997645400494192e-06, "logits/chosen": -2.544689178466797, "logits/rejected": -2.427544116973877, "logps/chosen": -297.4951477050781, "logps/rejected": -325.6196594238281, "loss": 0.6211, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5930116176605225, "rewards/margins": 0.29512810707092285, "rewards/rejected": -0.8881398439407349, "step": 860 }, { "epoch": 0.11, "learning_rate": 4.997123705666514e-06, "logits/chosen": -2.5834832191467285, "logits/rejected": -2.3967490196228027, "logps/chosen": -340.9512939453125, "logps/rejected": -351.071044921875, "loss": 0.6576, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.6529733538627625, "rewards/margins": 0.1915055215358734, "rewards/rejected": -0.8444789052009583, "step": 870 }, { "epoch": 0.12, "learning_rate": 4.996549883289093e-06, "logits/chosen": -2.5025086402893066, "logits/rejected": -2.4052650928497314, "logps/chosen": -308.8294677734375, "logps/rejected": -350.6068115234375, "loss": 0.6265, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.593645453453064, "rewards/margins": 0.27240580320358276, "rewards/rejected": -0.866051197052002, "step": 880 }, { "epoch": 0.12, "learning_rate": 4.995923945340495e-06, "logits/chosen": -2.5428309440612793, "logits/rejected": -2.4630656242370605, "logps/chosen": -305.7501525878906, "logps/rejected": -334.24639892578125, "loss": 0.6473, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.537492036819458, "rewards/margins": 0.21050593256950378, "rewards/rejected": -0.7479979395866394, "step": 890 }, { "epoch": 0.12, "learning_rate": 4.995245904887195e-06, "logits/chosen": -2.57853364944458, "logits/rejected": -2.3580322265625, "logps/chosen": -310.814208984375, "logps/rejected": -298.79962158203125, "loss": 0.6547, "rewards/accuracies": 0.625, "rewards/chosen": -0.7046155333518982, "rewards/margins": 0.197780042886734, "rewards/rejected": -0.9023955464363098, "step": 900 }, { "epoch": 0.12, "eval_logits/chosen": -2.2183713912963867, "eval_logits/rejected": -2.0783114433288574, "eval_logps/chosen": -325.8177490234375, "eval_logps/rejected": -331.4542236328125, "eval_loss": 0.6203334927558899, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -0.5732770562171936, "eval_rewards/margins": 0.25545385479927063, "eval_rewards/rejected": -0.8287308216094971, "eval_runtime": 1154.0508, "eval_samples_per_second": 1.733, "eval_steps_per_second": 0.867, "step": 900 }, { "epoch": 0.12, "learning_rate": 4.994515776083313e-06, "logits/chosen": -2.4050710201263428, "logits/rejected": -2.4928653240203857, "logps/chosen": -315.0536193847656, "logps/rejected": -373.6515197753906, "loss": 0.5972, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.47445574402809143, "rewards/margins": 0.32963109016418457, "rewards/rejected": -0.8040868639945984, "step": 910 }, { "epoch": 0.12, "learning_rate": 4.993733574170316e-06, "logits/chosen": -2.5724964141845703, "logits/rejected": -2.402193546295166, "logps/chosen": -261.9773864746094, "logps/rejected": -292.7881774902344, "loss": 0.5911, "rewards/accuracies": 0.75, "rewards/chosen": -0.3413178622722626, "rewards/margins": 0.32790833711624146, "rewards/rejected": -0.6692262291908264, "step": 920 }, { "epoch": 0.12, "learning_rate": 4.992899315476696e-06, "logits/chosen": -2.6308352947235107, "logits/rejected": -2.504096508026123, "logps/chosen": -331.2294616699219, "logps/rejected": -336.93280029296875, "loss": 0.5964, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.303416907787323, "rewards/margins": 0.3124091327190399, "rewards/rejected": -0.6158260703086853, "step": 930 }, { "epoch": 0.12, "learning_rate": 4.9920130174176354e-06, "logits/chosen": -2.5981762409210205, "logits/rejected": -2.4487905502319336, "logps/chosen": -319.9151916503906, "logps/rejected": -333.0742492675781, "loss": 0.5814, "rewards/accuracies": 0.75, "rewards/chosen": -0.42610105872154236, "rewards/margins": 0.30205437541007996, "rewards/rejected": -0.7281554341316223, "step": 940 }, { "epoch": 0.12, "learning_rate": 4.991074698494638e-06, "logits/chosen": -2.6599678993225098, "logits/rejected": -2.3881497383117676, "logps/chosen": -321.3490905761719, "logps/rejected": -312.3178405761719, "loss": 0.6153, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4955690801143646, "rewards/margins": 0.26488804817199707, "rewards/rejected": -0.7604571580886841, "step": 950 }, { "epoch": 0.13, "learning_rate": 4.990084378295148e-06, "logits/chosen": -2.6287171840667725, "logits/rejected": -2.499150514602661, "logps/chosen": -286.4674987792969, "logps/rejected": -286.6602478027344, "loss": 0.6153, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.46585598587989807, "rewards/margins": 0.2714241147041321, "rewards/rejected": -0.737280011177063, "step": 960 }, { "epoch": 0.13, "learning_rate": 4.989042077492135e-06, "logits/chosen": -2.6049046516418457, "logits/rejected": -2.512279987335205, "logps/chosen": -312.2615051269531, "logps/rejected": -334.3189392089844, "loss": 0.548, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.38576096296310425, "rewards/margins": 0.3811526596546173, "rewards/rejected": -0.7669135928153992, "step": 970 }, { "epoch": 0.13, "learning_rate": 4.987947817843665e-06, "logits/chosen": -2.4653379917144775, "logits/rejected": -2.4543960094451904, "logps/chosen": -293.2818908691406, "logps/rejected": -294.23089599609375, "loss": 0.6096, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.46745139360427856, "rewards/margins": 0.312784880399704, "rewards/rejected": -0.7802362442016602, "step": 980 }, { "epoch": 0.13, "learning_rate": 4.986801622192453e-06, "logits/chosen": -2.585569381713867, "logits/rejected": -2.398247241973877, "logps/chosen": -264.38580322265625, "logps/rejected": -286.49078369140625, "loss": 0.5645, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.41495591402053833, "rewards/margins": 0.42753225564956665, "rewards/rejected": -0.842488169670105, "step": 990 }, { "epoch": 0.13, "learning_rate": 4.985603514465372e-06, "logits/chosen": -2.496156692504883, "logits/rejected": -2.562666416168213, "logps/chosen": -313.02593994140625, "logps/rejected": -339.96746826171875, "loss": 0.5841, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.47666803002357483, "rewards/margins": 0.3657141327857971, "rewards/rejected": -0.8423821330070496, "step": 1000 }, { "epoch": 0.13, "eval_logits/chosen": -2.2086312770843506, "eval_logits/rejected": -2.068852186203003, "eval_logps/chosen": -322.0998229980469, "eval_logps/rejected": -334.5815734863281, "eval_loss": 0.6070671677589417, "eval_rewards/accuracies": 0.6819999814033508, "eval_rewards/chosen": -0.5360978245735168, "eval_rewards/margins": 0.32390668988227844, "eval_rewards/rejected": -0.8600045442581177, "eval_runtime": 1162.8161, "eval_samples_per_second": 1.72, "eval_steps_per_second": 0.86, "step": 1000 }, { "epoch": 0.13, "learning_rate": 4.984353519672966e-06, "logits/chosen": -2.5102970600128174, "logits/rejected": -2.296638011932373, "logps/chosen": -322.3448486328125, "logps/rejected": -314.18927001953125, "loss": 0.652, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.6231977343559265, "rewards/margins": 0.2020292580127716, "rewards/rejected": -0.8252270817756653, "step": 1010 }, { "epoch": 0.13, "learning_rate": 4.9830516639089226e-06, "logits/chosen": -2.5087199211120605, "logits/rejected": -2.2799477577209473, "logps/chosen": -359.6461486816406, "logps/rejected": -333.55712890625, "loss": 0.5308, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.509143590927124, "rewards/margins": 0.46995463967323303, "rewards/rejected": -0.9790982007980347, "step": 1020 }, { "epoch": 0.13, "learning_rate": 4.9816979743495296e-06, "logits/chosen": -2.437312602996826, "logits/rejected": -2.312748670578003, "logps/chosen": -381.3270263671875, "logps/rejected": -398.35748291015625, "loss": 0.5985, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8338835835456848, "rewards/margins": 0.41778063774108887, "rewards/rejected": -1.251664400100708, "step": 1030 }, { "epoch": 0.14, "learning_rate": 4.980292479253105e-06, "logits/chosen": -2.523524761199951, "logits/rejected": -2.1683216094970703, "logps/chosen": -413.48956298828125, "logps/rejected": -424.48797607421875, "loss": 0.5374, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.087397575378418, "rewards/margins": 0.5586522817611694, "rewards/rejected": -1.6460498571395874, "step": 1040 }, { "epoch": 0.14, "learning_rate": 4.978835207959414e-06, "logits/chosen": -2.291563034057617, "logits/rejected": -2.2155869007110596, "logps/chosen": -365.6241760253906, "logps/rejected": -386.0607604980469, "loss": 0.5994, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.0963432788848877, "rewards/margins": 0.34998494386672974, "rewards/rejected": -1.4463282823562622, "step": 1050 }, { "epoch": 0.14, "learning_rate": 4.977326190889046e-06, "logits/chosen": -2.4078869819641113, "logits/rejected": -1.7181726694107056, "logps/chosen": -359.93756103515625, "logps/rejected": -351.11859130859375, "loss": 0.5669, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.9058128595352173, "rewards/margins": 0.4577213227748871, "rewards/rejected": -1.3635342121124268, "step": 1060 }, { "epoch": 0.14, "learning_rate": 4.975765459542788e-06, "logits/chosen": -2.2240052223205566, "logits/rejected": -2.059451103210449, "logps/chosen": -315.9155578613281, "logps/rejected": -355.5037841796875, "loss": 0.528, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5928197503089905, "rewards/margins": 0.5360963344573975, "rewards/rejected": -1.1289160251617432, "step": 1070 }, { "epoch": 0.14, "learning_rate": 4.9741530465009665e-06, "logits/chosen": -2.186079502105713, "logits/rejected": -1.9260094165802002, "logps/chosen": -312.0130920410156, "logps/rejected": -334.43389892578125, "loss": 0.5806, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6744036078453064, "rewards/margins": 0.40451207756996155, "rewards/rejected": -1.0789155960083008, "step": 1080 }, { "epoch": 0.14, "learning_rate": 4.972488985422763e-06, "logits/chosen": -2.0618720054626465, "logits/rejected": -1.9399276971817017, "logps/chosen": -329.8536376953125, "logps/rejected": -354.6712341308594, "loss": 0.5418, "rewards/accuracies": 0.75, "rewards/chosen": -0.8148940205574036, "rewards/margins": 0.6003459692001343, "rewards/rejected": -1.4152400493621826, "step": 1090 }, { "epoch": 0.14, "learning_rate": 4.970773311045514e-06, "logits/chosen": -2.120664358139038, "logits/rejected": -1.6714054346084595, "logps/chosen": -371.4166564941406, "logps/rejected": -393.62786865234375, "loss": 0.5877, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.0566858053207397, "rewards/margins": 0.43407002091407776, "rewards/rejected": -1.4907559156417847, "step": 1100 }, { "epoch": 0.14, "eval_logits/chosen": -1.3836040496826172, "eval_logits/rejected": -1.1052757501602173, "eval_logps/chosen": -383.43798828125, "eval_logps/rejected": -410.8677673339844, "eval_loss": 0.5946979522705078, "eval_rewards/accuracies": 0.6855000257492065, "eval_rewards/chosen": -1.149479866027832, "eval_rewards/margins": 0.4733865261077881, "eval_rewards/rejected": -1.6228665113449097, "eval_runtime": 1178.8626, "eval_samples_per_second": 1.697, "eval_steps_per_second": 0.848, "step": 1100 }, { "epoch": 0.15, "learning_rate": 4.969006059183984e-06, "logits/chosen": -2.1172611713409424, "logits/rejected": -1.6654752492904663, "logps/chosen": -387.62664794921875, "logps/rejected": -408.0655212402344, "loss": 0.6262, "rewards/accuracies": 0.6875, "rewards/chosen": -1.2082122564315796, "rewards/margins": 0.43365636467933655, "rewards/rejected": -1.6418688297271729, "step": 1110 }, { "epoch": 0.15, "learning_rate": 4.967187266729623e-06, "logits/chosen": -2.193946123123169, "logits/rejected": -1.850895643234253, "logps/chosen": -398.3614501953125, "logps/rejected": -430.51373291015625, "loss": 0.5971, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2287832498550415, "rewards/margins": 0.43036943674087524, "rewards/rejected": -1.6591527462005615, "step": 1120 }, { "epoch": 0.15, "learning_rate": 4.965316971649791e-06, "logits/chosen": -2.100188732147217, "logits/rejected": -1.686700463294983, "logps/chosen": -396.20623779296875, "logps/rejected": -422.5747985839844, "loss": 0.4954, "rewards/accuracies": 0.75, "rewards/chosen": -1.049180269241333, "rewards/margins": 0.7083006501197815, "rewards/rejected": -1.7574809789657593, "step": 1130 }, { "epoch": 0.15, "learning_rate": 4.963395212986964e-06, "logits/chosen": -2.071812152862549, "logits/rejected": -1.535829782485962, "logps/chosen": -342.7199401855469, "logps/rejected": -373.0933837890625, "loss": 0.526, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0833193063735962, "rewards/margins": 0.5862609148025513, "rewards/rejected": -1.6695802211761475, "step": 1140 }, { "epoch": 0.15, "learning_rate": 4.9614220308579285e-06, "logits/chosen": -1.8345826864242554, "logits/rejected": -2.1796462535858154, "logps/chosen": -351.6676330566406, "logps/rejected": -381.9559326171875, "loss": 0.5963, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7989309430122375, "rewards/margins": 0.41886311769485474, "rewards/rejected": -1.2177939414978027, "step": 1150 }, { "epoch": 0.15, "learning_rate": 4.9593974664529325e-06, "logits/chosen": -2.1840920448303223, "logits/rejected": -1.7762985229492188, "logps/chosen": -331.3350830078125, "logps/rejected": -385.296630859375, "loss": 0.5442, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.6507974863052368, "rewards/margins": 0.5434342622756958, "rewards/rejected": -1.1942317485809326, "step": 1160 }, { "epoch": 0.15, "learning_rate": 4.957321562034833e-06, "logits/chosen": -2.0527548789978027, "logits/rejected": -1.8421388864517212, "logps/chosen": -366.1250305175781, "logps/rejected": -394.0170593261719, "loss": 0.559, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8194394111633301, "rewards/margins": 0.5880136489868164, "rewards/rejected": -1.4074528217315674, "step": 1170 }, { "epoch": 0.15, "learning_rate": 4.955194360938214e-06, "logits/chosen": -1.9533300399780273, "logits/rejected": -1.7631326913833618, "logps/chosen": -371.4923400878906, "logps/rejected": -387.41253662109375, "loss": 0.5852, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.1551687717437744, "rewards/margins": 0.4826745092868805, "rewards/rejected": -1.637843370437622, "step": 1180 }, { "epoch": 0.16, "learning_rate": 4.9530159075684735e-06, "logits/chosen": -1.4516584873199463, "logits/rejected": -1.5901073217391968, "logps/chosen": -388.7632141113281, "logps/rejected": -484.33258056640625, "loss": 0.6893, "rewards/accuracies": 0.625, "rewards/chosen": -1.5982259511947632, "rewards/margins": 0.268177330493927, "rewards/rejected": -1.8664032220840454, "step": 1190 }, { "epoch": 0.16, "learning_rate": 4.950786247400908e-06, "logits/chosen": -1.6140592098236084, "logits/rejected": -1.6555970907211304, "logps/chosen": -390.38330078125, "logps/rejected": -435.25604248046875, "loss": 0.5552, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4886068105697632, "rewards/margins": 0.4878392219543457, "rewards/rejected": -1.9764461517333984, "step": 1200 }, { "epoch": 0.16, "eval_logits/chosen": -0.7372294068336487, "eval_logits/rejected": -0.36139780282974243, "eval_logps/chosen": -411.0458679199219, "eval_logps/rejected": -437.91998291015625, "eval_loss": 0.5908603072166443, "eval_rewards/accuracies": 0.6880000233650208, "eval_rewards/chosen": -1.4255588054656982, "eval_rewards/margins": 0.4678295850753784, "eval_rewards/rejected": -1.8933883905410767, "eval_runtime": 1176.6814, "eval_samples_per_second": 1.7, "eval_steps_per_second": 0.85, "step": 1200 }, { "epoch": 0.16, "learning_rate": 4.948505426979756e-06, "logits/chosen": -1.8322169780731201, "logits/rejected": -1.5653588771820068, "logps/chosen": -406.3680114746094, "logps/rejected": -444.8448791503906, "loss": 0.5533, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4452760219573975, "rewards/margins": 0.5475454926490784, "rewards/rejected": -1.992821455001831, "step": 1210 }, { "epoch": 0.16, "learning_rate": 4.946173493917228e-06, "logits/chosen": -1.7810437679290771, "logits/rejected": -1.2556498050689697, "logps/chosen": -417.5589294433594, "logps/rejected": -397.54437255859375, "loss": 0.733, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.5235958099365234, "rewards/margins": 0.1696973741054535, "rewards/rejected": -1.6932932138442993, "step": 1220 }, { "epoch": 0.16, "learning_rate": 4.943790496892513e-06, "logits/chosen": -1.9522157907485962, "logits/rejected": -1.3978416919708252, "logps/chosen": -345.17913818359375, "logps/rejected": -371.7323303222656, "loss": 0.5339, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.8955880999565125, "rewards/margins": 0.6181024312973022, "rewards/rejected": -1.5136905908584595, "step": 1230 }, { "epoch": 0.16, "learning_rate": 4.941356485650762e-06, "logits/chosen": -2.062680721282959, "logits/rejected": -1.6688048839569092, "logps/chosen": -390.939208984375, "logps/rejected": -419.36334228515625, "loss": 0.5703, "rewards/accuracies": 0.6875, "rewards/chosen": -0.8187043070793152, "rewards/margins": 0.4967420995235443, "rewards/rejected": -1.3154462575912476, "step": 1240 }, { "epoch": 0.16, "learning_rate": 4.93887151100205e-06, "logits/chosen": -2.130277156829834, "logits/rejected": -1.685664176940918, "logps/chosen": -385.4061279296875, "logps/rejected": -389.6874084472656, "loss": 0.6048, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6924458146095276, "rewards/margins": 0.38014930486679077, "rewards/rejected": -1.0725951194763184, "step": 1250 }, { "epoch": 0.16, "learning_rate": 4.936335624820313e-06, "logits/chosen": -2.035191774368286, "logits/rejected": -1.712632417678833, "logps/chosen": -348.1141052246094, "logps/rejected": -357.98541259765625, "loss": 0.5586, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.8647063970565796, "rewards/margins": 0.4742640554904938, "rewards/rejected": -1.3389705419540405, "step": 1260 }, { "epoch": 0.17, "learning_rate": 4.933748880042271e-06, "logits/chosen": -2.0088629722595215, "logits/rejected": -1.5719774961471558, "logps/chosen": -360.0817565917969, "logps/rejected": -390.73211669921875, "loss": 0.5454, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.9902151226997375, "rewards/margins": 0.534164309501648, "rewards/rejected": -1.5243794918060303, "step": 1270 }, { "epoch": 0.17, "learning_rate": 4.931111330666317e-06, "logits/chosen": -2.0022754669189453, "logits/rejected": -1.2308666706085205, "logps/chosen": -358.9193420410156, "logps/rejected": -354.67352294921875, "loss": 0.6109, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0700308084487915, "rewards/margins": 0.3465597927570343, "rewards/rejected": -1.4165904521942139, "step": 1280 }, { "epoch": 0.17, "learning_rate": 4.9284230317513906e-06, "logits/chosen": -1.9100980758666992, "logits/rejected": -1.4506380558013916, "logps/chosen": -428.6927795410156, "logps/rejected": -433.57489013671875, "loss": 0.5906, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2611325979232788, "rewards/margins": 0.5384291410446167, "rewards/rejected": -1.7995617389678955, "step": 1290 }, { "epoch": 0.17, "learning_rate": 4.9256840394158325e-06, "logits/chosen": -1.5153193473815918, "logits/rejected": -1.5525444746017456, "logps/chosen": -408.9667053222656, "logps/rejected": -496.4200134277344, "loss": 0.5492, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3559167385101318, "rewards/margins": 0.5765284895896912, "rewards/rejected": -1.9324451684951782, "step": 1300 }, { "epoch": 0.17, "eval_logits/chosen": -0.5949398279190063, "eval_logits/rejected": -0.19331219792366028, "eval_logps/chosen": -414.6322937011719, "eval_logps/rejected": -446.29095458984375, "eval_loss": 0.5791042447090149, "eval_rewards/accuracies": 0.6934999823570251, "eval_rewards/chosen": -1.461422085762024, "eval_rewards/margins": 0.5156759023666382, "eval_rewards/rejected": -1.977097988128662, "eval_runtime": 1198.5552, "eval_samples_per_second": 1.669, "eval_steps_per_second": 0.834, "step": 1300 }, { "epoch": 0.17, "learning_rate": 4.922894410836207e-06, "logits/chosen": -1.9784332513809204, "logits/rejected": -1.0070569515228271, "logps/chosen": -438.13262939453125, "logps/rejected": -431.6548767089844, "loss": 0.5808, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.5133016109466553, "rewards/margins": 0.5359851121902466, "rewards/rejected": -2.0492866039276123, "step": 1310 }, { "epoch": 0.17, "learning_rate": 4.920054204246116e-06, "logits/chosen": -1.8840601444244385, "logits/rejected": -1.0693460702896118, "logps/chosen": -434.94189453125, "logps/rejected": -427.7396545410156, "loss": 0.6386, "rewards/accuracies": 0.625, "rewards/chosen": -1.5475298166275024, "rewards/margins": 0.38071927428245544, "rewards/rejected": -1.9282491207122803, "step": 1320 }, { "epoch": 0.17, "learning_rate": 4.9171634789349744e-06, "logits/chosen": -1.7856547832489014, "logits/rejected": -1.456695795059204, "logps/chosen": -393.8877868652344, "logps/rejected": -460.5874938964844, "loss": 0.5115, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.295278787612915, "rewards/margins": 0.6688205003738403, "rewards/rejected": -1.9640991687774658, "step": 1330 }, { "epoch": 0.18, "learning_rate": 4.914222295246782e-06, "logits/chosen": -1.7937772274017334, "logits/rejected": -1.691471815109253, "logps/chosen": -362.4700927734375, "logps/rejected": -393.37750244140625, "loss": 0.6389, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.0026386976242065, "rewards/margins": 0.29950928688049316, "rewards/rejected": -1.3021478652954102, "step": 1340 }, { "epoch": 0.18, "learning_rate": 4.911230714578858e-06, "logits/chosen": -1.6912847757339478, "logits/rejected": -1.8542919158935547, "logps/chosen": -274.45208740234375, "logps/rejected": -340.21807861328125, "loss": 0.5671, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.5728007555007935, "rewards/margins": 0.4466250538825989, "rewards/rejected": -1.0194257497787476, "step": 1350 }, { "epoch": 0.18, "learning_rate": 4.908188799380558e-06, "logits/chosen": -2.217525005340576, "logits/rejected": -1.9035638570785522, "logps/chosen": -292.1073303222656, "logps/rejected": -301.85491943359375, "loss": 0.5692, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.46799778938293457, "rewards/margins": 0.4237847924232483, "rewards/rejected": -0.8917825818061829, "step": 1360 }, { "epoch": 0.18, "learning_rate": 4.905096613151975e-06, "logits/chosen": -2.0345702171325684, "logits/rejected": -1.6463664770126343, "logps/chosen": -375.4377746582031, "logps/rejected": -376.78997802734375, "loss": 0.629, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7877501845359802, "rewards/margins": 0.28059062361717224, "rewards/rejected": -1.06834077835083, "step": 1370 }, { "epoch": 0.18, "learning_rate": 4.90195422044261e-06, "logits/chosen": -1.9617021083831787, "logits/rejected": -1.6719785928726196, "logps/chosen": -360.3094787597656, "logps/rejected": -401.42926025390625, "loss": 0.4957, "rewards/accuracies": 0.75, "rewards/chosen": -0.7087548971176147, "rewards/margins": 0.6915256381034851, "rewards/rejected": -1.4002805948257446, "step": 1380 }, { "epoch": 0.18, "learning_rate": 4.898761686850028e-06, "logits/chosen": -1.6123435497283936, "logits/rejected": -1.2256313562393188, "logps/chosen": -361.95721435546875, "logps/rejected": -406.7974548339844, "loss": 0.5884, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.009804129600525, "rewards/margins": 0.534344494342804, "rewards/rejected": -1.544148564338684, "step": 1390 }, { "epoch": 0.18, "learning_rate": 4.895519079018485e-06, "logits/chosen": -1.6615076065063477, "logits/rejected": -0.7398694753646851, "logps/chosen": -338.4859619140625, "logps/rejected": -375.15704345703125, "loss": 0.5789, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8177839517593384, "rewards/margins": 0.6118384599685669, "rewards/rejected": -1.4296224117279053, "step": 1400 }, { "epoch": 0.18, "eval_logits/chosen": -0.5845944285392761, "eval_logits/rejected": -0.1907668113708496, "eval_logps/chosen": -356.483154296875, "eval_logps/rejected": -384.9108581542969, "eval_loss": 0.5771133303642273, "eval_rewards/accuracies": 0.703499972820282, "eval_rewards/chosen": -0.8799312114715576, "eval_rewards/margins": 0.4833654463291168, "eval_rewards/rejected": -1.3632965087890625, "eval_runtime": 1177.7993, "eval_samples_per_second": 1.698, "eval_steps_per_second": 0.849, "step": 1400 }, { "epoch": 0.18, "learning_rate": 4.89222646463754e-06, "logits/chosen": -1.5245163440704346, "logits/rejected": -1.3327850103378296, "logps/chosen": -351.33154296875, "logps/rejected": -393.2879333496094, "loss": 0.5975, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.9374910593032837, "rewards/margins": 0.4914394021034241, "rewards/rejected": -1.4289302825927734, "step": 1410 }, { "epoch": 0.19, "learning_rate": 4.888883912440642e-06, "logits/chosen": -1.7364609241485596, "logits/rejected": -1.2762303352355957, "logps/chosen": -401.25, "logps/rejected": -442.7745666503906, "loss": 0.5555, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9093993306159973, "rewards/margins": 0.5804132223129272, "rewards/rejected": -1.4898124933242798, "step": 1420 }, { "epoch": 0.19, "learning_rate": 4.885491492203688e-06, "logits/chosen": -1.50997793674469, "logits/rejected": -1.1342096328735352, "logps/chosen": -360.4520568847656, "logps/rejected": -378.17291259765625, "loss": 0.5518, "rewards/accuracies": 0.75, "rewards/chosen": -0.8876248598098755, "rewards/margins": 0.491793155670166, "rewards/rejected": -1.3794180154800415, "step": 1430 }, { "epoch": 0.19, "learning_rate": 4.882049274743578e-06, "logits/chosen": -1.7279183864593506, "logits/rejected": -1.3486309051513672, "logps/chosen": -405.9081115722656, "logps/rejected": -435.51708984375, "loss": 0.5283, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.9554494619369507, "rewards/margins": 0.64899742603302, "rewards/rejected": -1.6044470071792603, "step": 1440 }, { "epoch": 0.19, "learning_rate": 4.878557331916729e-06, "logits/chosen": -1.4362828731536865, "logits/rejected": -1.2300403118133545, "logps/chosen": -356.7294006347656, "logps/rejected": -382.6611633300781, "loss": 0.5344, "rewards/accuracies": 0.75, "rewards/chosen": -0.9905643463134766, "rewards/margins": 0.5484082698822021, "rewards/rejected": -1.5389726161956787, "step": 1450 }, { "epoch": 0.19, "learning_rate": 4.875015736617576e-06, "logits/chosen": -1.6814028024673462, "logits/rejected": -1.2631704807281494, "logps/chosen": -434.5621643066406, "logps/rejected": -433.96038818359375, "loss": 0.5972, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0096790790557861, "rewards/margins": 0.5042110681533813, "rewards/rejected": -1.5138901472091675, "step": 1460 }, { "epoch": 0.19, "learning_rate": 4.8714245627770515e-06, "logits/chosen": -1.7851835489273071, "logits/rejected": -1.0083516836166382, "logps/chosen": -348.3280334472656, "logps/rejected": -359.25750732421875, "loss": 0.6018, "rewards/accuracies": 0.625, "rewards/chosen": -1.0166282653808594, "rewards/margins": 0.45403042435646057, "rewards/rejected": -1.470658540725708, "step": 1470 }, { "epoch": 0.19, "learning_rate": 4.8677838853610445e-06, "logits/chosen": -1.6132957935333252, "logits/rejected": -0.8410366177558899, "logps/chosen": -358.55548095703125, "logps/rejected": -369.1767883300781, "loss": 0.5801, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.9697539210319519, "rewards/margins": 0.5656896829605103, "rewards/rejected": -1.535443663597107, "step": 1480 }, { "epoch": 0.19, "learning_rate": 4.864093780368828e-06, "logits/chosen": -1.8534510135650635, "logits/rejected": -1.0167924165725708, "logps/chosen": -379.2197265625, "logps/rejected": -396.7680358886719, "loss": 0.4763, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.8626648783683777, "rewards/margins": 0.758463978767395, "rewards/rejected": -1.621128797531128, "step": 1490 }, { "epoch": 0.2, "learning_rate": 4.860354324831482e-06, "logits/chosen": -1.2768698930740356, "logits/rejected": -1.3293505907058716, "logps/chosen": -382.0423583984375, "logps/rejected": -450.2859802246094, "loss": 0.5456, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1258668899536133, "rewards/margins": 0.6569615602493286, "rewards/rejected": -1.7828283309936523, "step": 1500 }, { "epoch": 0.2, "eval_logits/chosen": -0.15739573538303375, "eval_logits/rejected": 0.3097783625125885, "eval_logps/chosen": -386.943603515625, "eval_logps/rejected": -427.7158203125, "eval_loss": 0.5645538568496704, "eval_rewards/accuracies": 0.703499972820282, "eval_rewards/chosen": -1.1845359802246094, "eval_rewards/margins": 0.606810986995697, "eval_rewards/rejected": -1.7913470268249512, "eval_runtime": 1155.3202, "eval_samples_per_second": 1.731, "eval_steps_per_second": 0.866, "step": 1500 }, { "epoch": 0.2, "learning_rate": 4.856565596810279e-06, "logits/chosen": -1.3375440835952759, "logits/rejected": -0.9983604550361633, "logps/chosen": -339.1542053222656, "logps/rejected": -410.56072998046875, "loss": 0.5896, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.2330477237701416, "rewards/margins": 0.5682455897331238, "rewards/rejected": -1.8012933731079102, "step": 1510 }, { "epoch": 0.2, "learning_rate": 4.852727675395056e-06, "logits/chosen": -1.191486120223999, "logits/rejected": -0.736042857170105, "logps/chosen": -383.180419921875, "logps/rejected": -435.84185791015625, "loss": 0.4452, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2295153141021729, "rewards/margins": 0.8655103445053101, "rewards/rejected": -2.0950255393981934, "step": 1520 }, { "epoch": 0.2, "learning_rate": 4.848840640702565e-06, "logits/chosen": -1.4446022510528564, "logits/rejected": -1.0689058303833008, "logps/chosen": -384.16009521484375, "logps/rejected": -390.2742614746094, "loss": 0.6654, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.3684113025665283, "rewards/margins": 0.37485355138778687, "rewards/rejected": -1.74326491355896, "step": 1530 }, { "epoch": 0.2, "learning_rate": 4.844904573874798e-06, "logits/chosen": -1.4517594575881958, "logits/rejected": -0.9329123497009277, "logps/chosen": -404.48828125, "logps/rejected": -426.7264709472656, "loss": 0.5109, "rewards/accuracies": 0.75, "rewards/chosen": -1.2724699974060059, "rewards/margins": 0.7288385629653931, "rewards/rejected": -2.0013086795806885, "step": 1540 }, { "epoch": 0.2, "learning_rate": 4.840919557077297e-06, "logits/chosen": -1.6456127166748047, "logits/rejected": -0.6936973333358765, "logps/chosen": -370.9361267089844, "logps/rejected": -393.5981140136719, "loss": 0.5758, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.0427272319793701, "rewards/margins": 0.5629565119743347, "rewards/rejected": -1.60568368434906, "step": 1550 }, { "epoch": 0.2, "learning_rate": 4.836885673497435e-06, "logits/chosen": -1.382566213607788, "logits/rejected": -0.757800817489624, "logps/chosen": -366.60137939453125, "logps/rejected": -412.43865966796875, "loss": 0.501, "rewards/accuracies": 0.75, "rewards/chosen": -0.9314566850662231, "rewards/margins": 0.710411548614502, "rewards/rejected": -1.6418683528900146, "step": 1560 }, { "epoch": 0.21, "learning_rate": 4.832803007342679e-06, "logits/chosen": -0.9425197839736938, "logits/rejected": -0.9502668380737305, "logps/chosen": -353.66204833984375, "logps/rejected": -422.31640625, "loss": 0.5845, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.1626675128936768, "rewards/margins": 0.5805758237838745, "rewards/rejected": -1.7432434558868408, "step": 1570 }, { "epoch": 0.21, "learning_rate": 4.828671643838839e-06, "logits/chosen": -1.275815725326538, "logits/rejected": -0.9463005065917969, "logps/chosen": -385.7954406738281, "logps/rejected": -382.41021728515625, "loss": 0.6096, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.1785976886749268, "rewards/margins": 0.4413912892341614, "rewards/rejected": -1.619989037513733, "step": 1580 }, { "epoch": 0.21, "learning_rate": 4.824491669228279e-06, "logits/chosen": -1.289780616760254, "logits/rejected": -0.7804897427558899, "logps/chosen": -371.3612060546875, "logps/rejected": -400.07611083984375, "loss": 0.5764, "rewards/accuracies": 0.6875, "rewards/chosen": -1.173409104347229, "rewards/margins": 0.5362740159034729, "rewards/rejected": -1.7096830606460571, "step": 1590 }, { "epoch": 0.21, "learning_rate": 4.8202631707681245e-06, "logits/chosen": -1.3760565519332886, "logits/rejected": -0.5771588683128357, "logps/chosen": -360.80352783203125, "logps/rejected": -426.24688720703125, "loss": 0.4722, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.24233078956604, "rewards/margins": 0.8159643411636353, "rewards/rejected": -2.0582950115203857, "step": 1600 }, { "epoch": 0.21, "eval_logits/chosen": 0.03460278362035751, "eval_logits/rejected": 0.539508044719696, "eval_logps/chosen": -400.91131591796875, "eval_logps/rejected": -442.8173828125, "eval_loss": 0.5598156452178955, "eval_rewards/accuracies": 0.7074999809265137, "eval_rewards/chosen": -1.3242131471633911, "eval_rewards/margins": 0.6181491613388062, "eval_rewards/rejected": -1.9423623085021973, "eval_runtime": 1154.908, "eval_samples_per_second": 1.732, "eval_steps_per_second": 0.866, "step": 1600 }, { "epoch": 0.21, "learning_rate": 4.815986236728437e-06, "logits/chosen": -1.1099474430084229, "logits/rejected": -0.6507538557052612, "logps/chosen": -378.3443298339844, "logps/rejected": -439.36651611328125, "loss": 0.5462, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.2413318157196045, "rewards/margins": 0.6197353601455688, "rewards/rejected": -1.8610671758651733, "step": 1610 }, { "epoch": 0.21, "learning_rate": 4.811660956390372e-06, "logits/chosen": -1.168228030204773, "logits/rejected": -0.9048255085945129, "logps/chosen": -420.23974609375, "logps/rejected": -447.28790283203125, "loss": 0.5796, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.1689625978469849, "rewards/margins": 0.5838147401809692, "rewards/rejected": -1.7527774572372437, "step": 1620 }, { "epoch": 0.21, "learning_rate": 4.807287420044319e-06, "logits/chosen": -1.4433248043060303, "logits/rejected": -0.7626504302024841, "logps/chosen": -344.18035888671875, "logps/rejected": -398.12451171875, "loss": 0.5509, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.1426336765289307, "rewards/margins": 0.6661030650138855, "rewards/rejected": -1.8087365627288818, "step": 1630 }, { "epoch": 0.21, "learning_rate": 4.802865718988008e-06, "logits/chosen": -1.0445847511291504, "logits/rejected": -1.0025596618652344, "logps/chosen": -345.99066162109375, "logps/rejected": -438.6319885253906, "loss": 0.5689, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.2196751832962036, "rewards/margins": 0.5887807607650757, "rewards/rejected": -1.8084558248519897, "step": 1640 }, { "epoch": 0.22, "learning_rate": 4.798395945524615e-06, "logits/chosen": -1.2105060815811157, "logits/rejected": -0.5044958591461182, "logps/chosen": -390.4164123535156, "logps/rejected": -438.38916015625, "loss": 0.5252, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3014953136444092, "rewards/margins": 0.7168537974357605, "rewards/rejected": -2.0183491706848145, "step": 1650 }, { "epoch": 0.22, "learning_rate": 4.793878192960823e-06, "logits/chosen": -1.5575644969940186, "logits/rejected": -0.6008479595184326, "logps/chosen": -447.37384033203125, "logps/rejected": -512.7503051757812, "loss": 0.5289, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.3483020067214966, "rewards/margins": 0.7725515961647034, "rewards/rejected": -2.1208536624908447, "step": 1660 }, { "epoch": 0.22, "learning_rate": 4.789312555604887e-06, "logits/chosen": -1.3201217651367188, "logits/rejected": -0.7254796028137207, "logps/chosen": -357.54486083984375, "logps/rejected": -404.8153381347656, "loss": 0.5135, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.0712493658065796, "rewards/margins": 0.709019660949707, "rewards/rejected": -1.7802692651748657, "step": 1670 }, { "epoch": 0.22, "learning_rate": 4.784699128764654e-06, "logits/chosen": -1.4174318313598633, "logits/rejected": -0.4926614761352539, "logps/chosen": -362.864013671875, "logps/rejected": -409.38238525390625, "loss": 0.5552, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.1419918537139893, "rewards/margins": 0.6905233263969421, "rewards/rejected": -1.8325151205062866, "step": 1680 }, { "epoch": 0.22, "learning_rate": 4.780038008745581e-06, "logits/chosen": -1.2989250421524048, "logits/rejected": -0.561491072177887, "logps/chosen": -437.3382263183594, "logps/rejected": -468.6178283691406, "loss": 0.5451, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.4737846851348877, "rewards/margins": 0.7077856063842773, "rewards/rejected": -2.181570291519165, "step": 1690 }, { "epoch": 0.22, "learning_rate": 4.775329292848721e-06, "logits/chosen": -0.8505582809448242, "logits/rejected": -0.4519156515598297, "logps/chosen": -432.04736328125, "logps/rejected": -503.6212463378906, "loss": 0.5072, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.4707634449005127, "rewards/margins": 0.8545511960983276, "rewards/rejected": -2.32531476020813, "step": 1700 }, { "epoch": 0.22, "eval_logits/chosen": 0.46571579575538635, "eval_logits/rejected": 1.0410724878311157, "eval_logps/chosen": -418.8860168457031, "eval_logps/rejected": -465.253662109375, "eval_loss": 0.5574353933334351, "eval_rewards/accuracies": 0.7059999704360962, "eval_rewards/chosen": -1.503960132598877, "eval_rewards/margins": 0.6627644896507263, "eval_rewards/rejected": -2.166724681854248, "eval_runtime": 1172.1157, "eval_samples_per_second": 1.706, "eval_steps_per_second": 0.853, "step": 1700 }, { "epoch": 0.22, "learning_rate": 4.770573079368691e-06, "logits/chosen": -0.927274227142334, "logits/rejected": -0.7411154508590698, "logps/chosen": -412.53338623046875, "logps/rejected": -437.78875732421875, "loss": 0.6122, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.4526969194412231, "rewards/margins": 0.5428808927536011, "rewards/rejected": -1.9955780506134033, "step": 1710 }, { "epoch": 0.23, "learning_rate": 4.765769467591626e-06, "logits/chosen": -1.1742867231369019, "logits/rejected": -0.6260591745376587, "logps/chosen": -435.9891052246094, "logps/rejected": -470.205810546875, "loss": 0.5436, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4501960277557373, "rewards/margins": 0.603704035282135, "rewards/rejected": -2.0539000034332275, "step": 1720 }, { "epoch": 0.23, "learning_rate": 4.760918557793096e-06, "logits/chosen": -1.0430524349212646, "logits/rejected": -0.6410683989524841, "logps/chosen": -408.3353271484375, "logps/rejected": -473.285888671875, "loss": 0.5752, "rewards/accuracies": 0.6875, "rewards/chosen": -1.56908118724823, "rewards/margins": 0.5391335487365723, "rewards/rejected": -2.108214855194092, "step": 1730 }, { "epoch": 0.23, "learning_rate": 4.756020451236025e-06, "logits/chosen": -1.3290060758590698, "logits/rejected": -0.5599908232688904, "logps/chosen": -448.45428466796875, "logps/rejected": -473.66387939453125, "loss": 0.5918, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4412941932678223, "rewards/margins": 0.5439326167106628, "rewards/rejected": -1.9852268695831299, "step": 1740 }, { "epoch": 0.23, "learning_rate": 4.751075250168569e-06, "logits/chosen": -1.4968469142913818, "logits/rejected": 0.1215222105383873, "logps/chosen": -433.72222900390625, "logps/rejected": -473.5547790527344, "loss": 0.539, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.6900333166122437, "rewards/margins": 0.7276581525802612, "rewards/rejected": -2.417691707611084, "step": 1750 }, { "epoch": 0.23, "learning_rate": 4.746083057821981e-06, "logits/chosen": -1.1097086668014526, "logits/rejected": -0.21016299724578857, "logps/chosen": -395.05413818359375, "logps/rejected": -453.3968811035156, "loss": 0.5255, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.454843282699585, "rewards/margins": 0.9116643667221069, "rewards/rejected": -2.3665075302124023, "step": 1760 }, { "epoch": 0.23, "learning_rate": 4.741043978408463e-06, "logits/chosen": -0.9275191426277161, "logits/rejected": -0.33824652433395386, "logps/chosen": -380.26177978515625, "logps/rejected": -466.066650390625, "loss": 0.509, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.2040696144104004, "rewards/margins": 0.8978725671768188, "rewards/rejected": -2.1019420623779297, "step": 1770 }, { "epoch": 0.23, "learning_rate": 4.735958117118983e-06, "logits/chosen": -1.4339239597320557, "logits/rejected": -0.27769067883491516, "logps/chosen": -403.5133056640625, "logps/rejected": -451.6259765625, "loss": 0.5387, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.0971940755844116, "rewards/margins": 0.7503548860549927, "rewards/rejected": -1.8475488424301147, "step": 1780 }, { "epoch": 0.23, "learning_rate": 4.730825580121084e-06, "logits/chosen": -1.1844618320465088, "logits/rejected": -0.35769081115722656, "logps/chosen": -366.8836669921875, "logps/rejected": -446.3826599121094, "loss": 0.5439, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.2104787826538086, "rewards/margins": 0.7347462177276611, "rewards/rejected": -1.9452251195907593, "step": 1790 }, { "epoch": 0.24, "learning_rate": 4.725646474556666e-06, "logits/chosen": -0.8105975985527039, "logits/rejected": -0.6895971894264221, "logps/chosen": -352.57110595703125, "logps/rejected": -445.0667419433594, "loss": 0.5284, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.290921926498413, "rewards/margins": 0.7879074811935425, "rewards/rejected": -2.078829526901245, "step": 1800 }, { "epoch": 0.24, "eval_logits/chosen": 0.6528418064117432, "eval_logits/rejected": 1.2403807640075684, "eval_logps/chosen": -423.3541564941406, "eval_logps/rejected": -469.1292724609375, "eval_loss": 0.5534334182739258, "eval_rewards/accuracies": 0.7070000171661377, "eval_rewards/chosen": -1.5486416816711426, "eval_rewards/margins": 0.6568393111228943, "eval_rewards/rejected": -2.2054810523986816, "eval_runtime": 1170.7626, "eval_samples_per_second": 1.708, "eval_steps_per_second": 0.854, "step": 1800 }, { "epoch": 0.24, "learning_rate": 4.720420908539748e-06, "logits/chosen": -1.189819574356079, "logits/rejected": -0.6282674074172974, "logps/chosen": -418.718505859375, "logps/rejected": -480.90216064453125, "loss": 0.6174, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.7022778987884521, "rewards/margins": 0.5764120817184448, "rewards/rejected": -2.2786898612976074, "step": 1810 }, { "epoch": 0.24, "learning_rate": 4.715148991154216e-06, "logits/chosen": -1.222535252571106, "logits/rejected": -0.8831266164779663, "logps/chosen": -516.7198486328125, "logps/rejected": -564.3641967773438, "loss": 0.586, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.7563714981079102, "rewards/margins": 0.5409680604934692, "rewards/rejected": -2.297339677810669, "step": 1820 }, { "epoch": 0.24, "learning_rate": 4.709830832451538e-06, "logits/chosen": -0.8930926322937012, "logits/rejected": -0.4406977593898773, "logps/chosen": -486.24578857421875, "logps/rejected": -537.7103881835938, "loss": 0.5226, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.8711732625961304, "rewards/margins": 0.6819201111793518, "rewards/rejected": -2.553093433380127, "step": 1830 }, { "epoch": 0.24, "learning_rate": 4.704466543448477e-06, "logits/chosen": -1.0724461078643799, "logits/rejected": -0.03855453059077263, "logps/chosen": -519.872802734375, "logps/rejected": -553.6289672851562, "loss": 0.4735, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7257235050201416, "rewards/margins": 0.9186381101608276, "rewards/rejected": -2.644361972808838, "step": 1840 }, { "epoch": 0.24, "learning_rate": 4.699056236124762e-06, "logits/chosen": -1.1924560070037842, "logits/rejected": -0.6589155197143555, "logps/chosen": -442.31707763671875, "logps/rejected": -505.4425354003906, "loss": 0.6075, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.8461265563964844, "rewards/margins": 0.616051197052002, "rewards/rejected": -2.4621777534484863, "step": 1850 }, { "epoch": 0.24, "learning_rate": 4.693600023420758e-06, "logits/chosen": -1.2352359294891357, "logits/rejected": -0.2970736026763916, "logps/chosen": -436.0951232910156, "logps/rejected": -452.52923583984375, "loss": 0.4406, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.391472339630127, "rewards/margins": 0.9484335780143738, "rewards/rejected": -2.3399059772491455, "step": 1860 }, { "epoch": 0.24, "learning_rate": 4.688098019235108e-06, "logits/chosen": -1.2450083494186401, "logits/rejected": -0.5829068422317505, "logps/chosen": -445.16143798828125, "logps/rejected": -517.4144287109375, "loss": 0.5586, "rewards/accuracies": 0.75, "rewards/chosen": -1.5391108989715576, "rewards/margins": 0.8366822004318237, "rewards/rejected": -2.375793218612671, "step": 1870 }, { "epoch": 0.25, "learning_rate": 4.682550338422353e-06, "logits/chosen": -1.3568122386932373, "logits/rejected": -0.5020118355751038, "logps/chosen": -410.96826171875, "logps/rejected": -465.6785583496094, "loss": 0.4733, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4551056623458862, "rewards/margins": 0.9018338322639465, "rewards/rejected": -2.3569395542144775, "step": 1880 }, { "epoch": 0.25, "learning_rate": 4.676957096790536e-06, "logits/chosen": -1.0935922861099243, "logits/rejected": -0.5090438723564148, "logps/chosen": -427.3106994628906, "logps/rejected": -443.31231689453125, "loss": 0.6429, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.6104503870010376, "rewards/margins": 0.5206896066665649, "rewards/rejected": -2.1311399936676025, "step": 1890 }, { "epoch": 0.25, "learning_rate": 4.671318411098782e-06, "logits/chosen": -0.7024677991867065, "logits/rejected": -0.8284416198730469, "logps/chosen": -458.29608154296875, "logps/rejected": -553.300537109375, "loss": 0.5623, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.7181098461151123, "rewards/margins": 0.9186338186264038, "rewards/rejected": -2.6367437839508057, "step": 1900 }, { "epoch": 0.25, "eval_logits/chosen": 0.30577704310417175, "eval_logits/rejected": 0.7807530760765076, "eval_logps/chosen": -439.5539245605469, "eval_logps/rejected": -491.0526123046875, "eval_loss": 0.5624514222145081, "eval_rewards/accuracies": 0.7055000066757202, "eval_rewards/chosen": -1.7106391191482544, "eval_rewards/margins": 0.7140753269195557, "eval_rewards/rejected": -2.4247145652770996, "eval_runtime": 1175.0312, "eval_samples_per_second": 1.702, "eval_steps_per_second": 0.851, "step": 1900 }, { "epoch": 0.25, "learning_rate": 4.665634399054864e-06, "logits/chosen": -0.725782036781311, "logits/rejected": -0.6188144683837891, "logps/chosen": -409.5749206542969, "logps/rejected": -466.3291931152344, "loss": 0.663, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.785436987876892, "rewards/margins": 0.5989683270454407, "rewards/rejected": -2.3844053745269775, "step": 1910 }, { "epoch": 0.25, "learning_rate": 4.659905179312743e-06, "logits/chosen": -1.4557887315750122, "logits/rejected": -0.6816984415054321, "logps/chosen": -442.3401794433594, "logps/rejected": -428.6465759277344, "loss": 0.6225, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4092276096343994, "rewards/margins": 0.5179110765457153, "rewards/rejected": -1.9271386861801147, "step": 1920 }, { "epoch": 0.25, "learning_rate": 4.654130871470093e-06, "logits/chosen": -1.4024882316589355, "logits/rejected": -0.5982595682144165, "logps/chosen": -372.2467346191406, "logps/rejected": -375.06610107421875, "loss": 0.6068, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.1297688484191895, "rewards/margins": 0.4118806719779968, "rewards/rejected": -1.541649580001831, "step": 1930 }, { "epoch": 0.25, "learning_rate": 4.6483115960658045e-06, "logits/chosen": -1.4664485454559326, "logits/rejected": -0.3487986624240875, "logps/chosen": -396.49334716796875, "logps/rejected": -393.38616943359375, "loss": 0.5078, "rewards/accuracies": 0.75, "rewards/chosen": -1.2406772375106812, "rewards/margins": 0.653410792350769, "rewards/rejected": -1.8940880298614502, "step": 1940 }, { "epoch": 0.26, "learning_rate": 4.642447474577466e-06, "logits/chosen": -1.0883629322052002, "logits/rejected": -1.072458028793335, "logps/chosen": -371.15093994140625, "logps/rejected": -431.68426513671875, "loss": 0.5129, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.395630121231079, "rewards/margins": 0.68388831615448, "rewards/rejected": -2.0795183181762695, "step": 1950 }, { "epoch": 0.26, "learning_rate": 4.636538629418832e-06, "logits/chosen": -1.2723660469055176, "logits/rejected": -0.904948353767395, "logps/chosen": -401.46112060546875, "logps/rejected": -466.59393310546875, "loss": 0.4758, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.1891543865203857, "rewards/margins": 0.8602797389030457, "rewards/rejected": -2.049434185028076, "step": 1960 }, { "epoch": 0.26, "learning_rate": 4.630585183937263e-06, "logits/chosen": -1.2972795963287354, "logits/rejected": -0.6552487015724182, "logps/chosen": -409.4198913574219, "logps/rejected": -429.36248779296875, "loss": 0.5831, "rewards/accuracies": 0.6875, "rewards/chosen": -1.228527307510376, "rewards/margins": 0.5128410458564758, "rewards/rejected": -1.741368055343628, "step": 1970 }, { "epoch": 0.26, "learning_rate": 4.6245872624111535e-06, "logits/chosen": -1.1344765424728394, "logits/rejected": -0.9775272607803345, "logps/chosen": -344.75238037109375, "logps/rejected": -382.8861389160156, "loss": 0.615, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.2352514266967773, "rewards/margins": 0.49147820472717285, "rewards/rejected": -1.7267297506332397, "step": 1980 }, { "epoch": 0.26, "learning_rate": 4.618544990047336e-06, "logits/chosen": -1.1246018409729004, "logits/rejected": -0.5179897546768188, "logps/chosen": -425.73406982421875, "logps/rejected": -470.3760681152344, "loss": 0.5957, "rewards/accuracies": 0.6875, "rewards/chosen": -1.2693254947662354, "rewards/margins": 0.6230798959732056, "rewards/rejected": -1.8924052715301514, "step": 1990 }, { "epoch": 0.26, "learning_rate": 4.612458492978473e-06, "logits/chosen": -1.3398029804229736, "logits/rejected": -0.8593130111694336, "logps/chosen": -380.1425476074219, "logps/rejected": -439.82891845703125, "loss": 0.6092, "rewards/accuracies": 0.6875, "rewards/chosen": -1.3958600759506226, "rewards/margins": 0.515204131603241, "rewards/rejected": -1.9110643863677979, "step": 2000 }, { "epoch": 0.26, "eval_logits/chosen": 0.007945289835333824, "eval_logits/rejected": 0.519872784614563, "eval_logps/chosen": -370.0728454589844, "eval_logps/rejected": -413.70892333984375, "eval_loss": 0.550082266330719, "eval_rewards/accuracies": 0.7085000276565552, "eval_rewards/chosen": -1.0158281326293945, "eval_rewards/margins": 0.6354495882987976, "eval_rewards/rejected": -1.6512778997421265, "eval_runtime": 1182.5268, "eval_samples_per_second": 1.691, "eval_steps_per_second": 0.846, "step": 2000 }, { "epoch": 0.26, "learning_rate": 4.606327898260413e-06, "logits/chosen": -1.1447194814682007, "logits/rejected": -0.6674858331680298, "logps/chosen": -386.634765625, "logps/rejected": -428.674072265625, "loss": 0.5888, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.0479754209518433, "rewards/margins": 0.6669696569442749, "rewards/rejected": -1.7149450778961182, "step": 2010 }, { "epoch": 0.26, "learning_rate": 4.600153333869549e-06, "logits/chosen": -1.5867061614990234, "logits/rejected": -0.8068370819091797, "logps/chosen": -372.733154296875, "logps/rejected": -399.0820007324219, "loss": 0.5192, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.9379202127456665, "rewards/margins": 0.6650333404541016, "rewards/rejected": -1.602953553199768, "step": 2020 }, { "epoch": 0.27, "learning_rate": 4.593934928700141e-06, "logits/chosen": -1.5054073333740234, "logits/rejected": -0.331088125705719, "logps/chosen": -396.2590026855469, "logps/rejected": -440.35308837890625, "loss": 0.516, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.2215917110443115, "rewards/margins": 0.7985371947288513, "rewards/rejected": -2.0201287269592285, "step": 2030 }, { "epoch": 0.27, "learning_rate": 4.587672812561626e-06, "logits/chosen": -1.0228203535079956, "logits/rejected": -0.6623596549034119, "logps/chosen": -359.9720458984375, "logps/rejected": -467.29461669921875, "loss": 0.5241, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.2281525135040283, "rewards/margins": 0.7495523691177368, "rewards/rejected": -1.9777047634124756, "step": 2040 }, { "epoch": 0.27, "learning_rate": 4.581367116175911e-06, "logits/chosen": -0.9328869581222534, "logits/rejected": -0.2950093150138855, "logps/chosen": -431.26873779296875, "logps/rejected": -445.1465759277344, "loss": 0.5993, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3680379390716553, "rewards/margins": 0.5561319589614868, "rewards/rejected": -1.9241701364517212, "step": 2050 }, { "epoch": 0.27, "learning_rate": 4.5750179711746416e-06, "logits/chosen": -0.9565197229385376, "logits/rejected": -0.35139861702919006, "logps/chosen": -391.14947509765625, "logps/rejected": -434.99713134765625, "loss": 0.5767, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3655421733856201, "rewards/margins": 0.5043060183525085, "rewards/rejected": -1.8698484897613525, "step": 2060 }, { "epoch": 0.27, "learning_rate": 4.5686255100964535e-06, "logits/chosen": -1.3030929565429688, "logits/rejected": -0.6504136323928833, "logps/chosen": -408.542236328125, "logps/rejected": -433.14068603515625, "loss": 0.5608, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.455055832862854, "rewards/margins": 0.6146378517150879, "rewards/rejected": -2.0696938037872314, "step": 2070 }, { "epoch": 0.27, "learning_rate": 4.562189866384209e-06, "logits/chosen": -0.8565002679824829, "logits/rejected": -0.5893241763114929, "logps/chosen": -393.67462158203125, "logps/rejected": -500.3961486816406, "loss": 0.5081, "rewards/accuracies": 0.6875, "rewards/chosen": -1.5734975337982178, "rewards/margins": 0.8559523820877075, "rewards/rejected": -2.429450035095215, "step": 2080 }, { "epoch": 0.27, "learning_rate": 4.555711174382209e-06, "logits/chosen": -1.1275274753570557, "logits/rejected": -0.4757865369319916, "logps/chosen": -406.5447692871094, "logps/rejected": -458.0465393066406, "loss": 0.5529, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.7623049020767212, "rewards/margins": 0.6915754079818726, "rewards/rejected": -2.4538803100585938, "step": 2090 }, { "epoch": 0.27, "learning_rate": 4.549189569333387e-06, "logits/chosen": -1.1001176834106445, "logits/rejected": -0.4274144172668457, "logps/chosen": -383.9236755371094, "logps/rejected": -410.41162109375, "loss": 0.5726, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4652644395828247, "rewards/margins": 0.5952554941177368, "rewards/rejected": -2.0605199337005615, "step": 2100 }, { "epoch": 0.27, "eval_logits/chosen": 0.4405115842819214, "eval_logits/rejected": 0.9980767965316772, "eval_logps/chosen": -415.4569396972656, "eval_logps/rejected": -464.3841857910156, "eval_loss": 0.5433006882667542, "eval_rewards/accuracies": 0.7149999737739563, "eval_rewards/chosen": -1.4696691036224365, "eval_rewards/margins": 0.6883615255355835, "eval_rewards/rejected": -2.1580307483673096, "eval_runtime": 1178.0191, "eval_samples_per_second": 1.698, "eval_steps_per_second": 0.849, "step": 2100 }, { "epoch": 0.28, "learning_rate": 4.542625187376491e-06, "logits/chosen": -1.145003318786621, "logits/rejected": 0.003861379576846957, "logps/chosen": -429.87255859375, "logps/rejected": -454.26239013671875, "loss": 0.5476, "rewards/accuracies": 0.75, "rewards/chosen": -1.3411312103271484, "rewards/margins": 0.6698344349861145, "rewards/rejected": -2.0109658241271973, "step": 2110 }, { "epoch": 0.28, "learning_rate": 4.536018165543239e-06, "logits/chosen": -0.990827739238739, "logits/rejected": -0.4457016885280609, "logps/chosen": -453.3412170410156, "logps/rejected": -506.03741455078125, "loss": 0.5634, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.5011839866638184, "rewards/margins": 0.6426320672035217, "rewards/rejected": -2.1438162326812744, "step": 2120 }, { "epoch": 0.28, "learning_rate": 4.529368641755453e-06, "logits/chosen": -0.9683168530464172, "logits/rejected": -0.37535277009010315, "logps/chosen": -391.17059326171875, "logps/rejected": -449.50714111328125, "loss": 0.6002, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.6766704320907593, "rewards/margins": 0.6462152600288391, "rewards/rejected": -2.322885513305664, "step": 2130 }, { "epoch": 0.28, "learning_rate": 4.522676754822189e-06, "logits/chosen": -1.1304986476898193, "logits/rejected": -0.19757482409477234, "logps/chosen": -427.12188720703125, "logps/rejected": -439.59716796875, "loss": 0.4706, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.5254873037338257, "rewards/margins": 0.839495837688446, "rewards/rejected": -2.364983081817627, "step": 2140 }, { "epoch": 0.28, "learning_rate": 4.515942644436836e-06, "logits/chosen": -1.0416542291641235, "logits/rejected": -0.08233954012393951, "logps/chosen": -426.8787536621094, "logps/rejected": -478.9959411621094, "loss": 0.5463, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.4865224361419678, "rewards/margins": 0.7738333940505981, "rewards/rejected": -2.2603557109832764, "step": 2150 }, { "epoch": 0.28, "learning_rate": 4.509166451174194e-06, "logits/chosen": -1.0556268692016602, "logits/rejected": -0.6550186276435852, "logps/chosen": -412.85565185546875, "logps/rejected": -469.2373962402344, "loss": 0.5133, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.0766072273254395, "rewards/margins": 0.8188613057136536, "rewards/rejected": -1.8954684734344482, "step": 2160 }, { "epoch": 0.28, "learning_rate": 4.502348316487552e-06, "logits/chosen": -1.5349935293197632, "logits/rejected": -0.5842410922050476, "logps/chosen": -392.6039123535156, "logps/rejected": -432.892578125, "loss": 0.5352, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.1731538772583008, "rewards/margins": 0.7138124704360962, "rewards/rejected": -1.886966347694397, "step": 2170 }, { "epoch": 0.29, "learning_rate": 4.495488382705722e-06, "logits/chosen": -1.4572819471359253, "logits/rejected": -0.1267738789319992, "logps/chosen": -435.73748779296875, "logps/rejected": -435.91864013671875, "loss": 0.4479, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.9534079432487488, "rewards/margins": 0.9101373553276062, "rewards/rejected": -1.8635451793670654, "step": 2180 }, { "epoch": 0.29, "learning_rate": 4.488586793030075e-06, "logits/chosen": -1.0830357074737549, "logits/rejected": -0.5002374053001404, "logps/chosen": -330.9154052734375, "logps/rejected": -439.44158935546875, "loss": 0.4391, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.0812445878982544, "rewards/margins": 0.9342634081840515, "rewards/rejected": -2.015507936477661, "step": 2190 }, { "epoch": 0.29, "learning_rate": 4.481643691531551e-06, "logits/chosen": -1.0210946798324585, "logits/rejected": -0.11824611574411392, "logps/chosen": -396.3450622558594, "logps/rejected": -435.77734375, "loss": 0.5323, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.2495872974395752, "rewards/margins": 0.7661415934562683, "rewards/rejected": -2.015728712081909, "step": 2200 }, { "epoch": 0.29, "eval_logits/chosen": 0.7444968819618225, "eval_logits/rejected": 1.3532707691192627, "eval_logps/chosen": -400.2243957519531, "eval_logps/rejected": -457.445068359375, "eval_loss": 0.5483363270759583, "eval_rewards/accuracies": 0.7149999737739563, "eval_rewards/chosen": -1.3173435926437378, "eval_rewards/margins": 0.7712955474853516, "eval_rewards/rejected": -2.088639259338379, "eval_runtime": 1176.3943, "eval_samples_per_second": 1.7, "eval_steps_per_second": 0.85, "step": 2200 }, { "epoch": 0.29, "learning_rate": 4.474659223147652e-06, "logits/chosen": -0.6220839619636536, "logits/rejected": -0.34505695104599, "logps/chosen": -400.8376770019531, "logps/rejected": -455.2310485839844, "loss": 0.5745, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3072404861450195, "rewards/margins": 0.7971282005310059, "rewards/rejected": -2.1043686866760254, "step": 2210 }, { "epoch": 0.29, "learning_rate": 4.4676335336794125e-06, "logits/chosen": -0.9507938623428345, "logits/rejected": -0.2239619791507721, "logps/chosen": -421.60479736328125, "logps/rejected": -466.75341796875, "loss": 0.5746, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.1340086460113525, "rewards/margins": 0.7016123533248901, "rewards/rejected": -1.8356211185455322, "step": 2220 }, { "epoch": 0.29, "learning_rate": 4.46056676978836e-06, "logits/chosen": -0.8311563730239868, "logits/rejected": -0.7106046676635742, "logps/chosen": -368.59271240234375, "logps/rejected": -457.5370178222656, "loss": 0.5991, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.149174451828003, "rewards/margins": 0.5227919816970825, "rewards/rejected": -1.671966314315796, "step": 2230 }, { "epoch": 0.29, "learning_rate": 4.453459078993453e-06, "logits/chosen": -0.7870109677314758, "logits/rejected": -0.6436534523963928, "logps/chosen": -353.3741760253906, "logps/rejected": -449.02093505859375, "loss": 0.4118, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9842308163642883, "rewards/margins": 1.0269984006881714, "rewards/rejected": -2.0112290382385254, "step": 2240 }, { "epoch": 0.29, "learning_rate": 4.446310609668001e-06, "logits/chosen": -0.685552716255188, "logits/rejected": -0.2860308289527893, "logps/chosen": -392.5391540527344, "logps/rejected": -509.0357360839844, "loss": 0.5367, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.616072654724121, "rewards/margins": 0.8003999590873718, "rewards/rejected": -2.4164726734161377, "step": 2250 }, { "epoch": 0.3, "learning_rate": 4.439121511036562e-06, "logits/chosen": -0.8480124473571777, "logits/rejected": 0.001265025115571916, "logps/chosen": -474.6297912597656, "logps/rejected": -509.7176818847656, "loss": 0.5317, "rewards/accuracies": 0.6875, "rewards/chosen": -1.8826634883880615, "rewards/margins": 0.7242499589920044, "rewards/rejected": -2.6069135665893555, "step": 2260 }, { "epoch": 0.3, "learning_rate": 4.431891933171839e-06, "logits/chosen": -1.1753742694854736, "logits/rejected": -0.3657965064048767, "logps/chosen": -403.2115783691406, "logps/rejected": -454.543212890625, "loss": 0.6065, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.4112242460250854, "rewards/margins": 0.6532294154167175, "rewards/rejected": -2.064453601837158, "step": 2270 }, { "epoch": 0.3, "learning_rate": 4.424622026991536e-06, "logits/chosen": -1.0060756206512451, "logits/rejected": -0.37251150608062744, "logps/chosen": -391.8915100097656, "logps/rejected": -437.4137268066406, "loss": 0.5717, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.2653111219406128, "rewards/margins": 0.6680868864059448, "rewards/rejected": -1.9333980083465576, "step": 2280 }, { "epoch": 0.3, "learning_rate": 4.417311944255215e-06, "logits/chosen": -0.7842764258384705, "logits/rejected": -0.8166142702102661, "logps/chosen": -354.4258728027344, "logps/rejected": -414.52362060546875, "loss": 0.6835, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.1270352602005005, "rewards/margins": 0.41290101408958435, "rewards/rejected": -1.5399363040924072, "step": 2290 }, { "epoch": 0.3, "learning_rate": 4.409961837561122e-06, "logits/chosen": -0.6530027985572815, "logits/rejected": -0.5591267943382263, "logps/chosen": -439.7125549316406, "logps/rejected": -520.6340942382812, "loss": 0.5148, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.2820568084716797, "rewards/margins": 0.7924367785453796, "rewards/rejected": -2.074493408203125, "step": 2300 }, { "epoch": 0.3, "eval_logits/chosen": 0.5106939077377319, "eval_logits/rejected": 1.1454416513442993, "eval_logps/chosen": -400.4307861328125, "eval_logps/rejected": -450.4645690917969, "eval_loss": 0.5387266278266907, "eval_rewards/accuracies": 0.7275000214576721, "eval_rewards/chosen": -1.3194077014923096, "eval_rewards/margins": 0.6994263529777527, "eval_rewards/rejected": -2.018834114074707, "eval_runtime": 1973.7027, "eval_samples_per_second": 1.013, "eval_steps_per_second": 0.507, "step": 2300 }, { "epoch": 0.3, "learning_rate": 4.402571860343006e-06, "logits/chosen": -1.4138994216918945, "logits/rejected": 0.06842954456806183, "logps/chosen": -422.7046813964844, "logps/rejected": -416.2068786621094, "loss": 0.5745, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4042686223983765, "rewards/margins": 0.6060650944709778, "rewards/rejected": -2.01033353805542, "step": 2310 }, { "epoch": 0.3, "learning_rate": 4.3951421668669165e-06, "logits/chosen": -0.9971601366996765, "logits/rejected": -0.3194156587123871, "logps/chosen": -423.9295349121094, "logps/rejected": -490.6680603027344, "loss": 0.5077, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.4917466640472412, "rewards/margins": 0.8139832615852356, "rewards/rejected": -2.305730104446411, "step": 2320 }, { "epoch": 0.3, "learning_rate": 4.3876729122279784e-06, "logits/chosen": -0.9761942028999329, "logits/rejected": -0.7307599782943726, "logps/chosen": -324.6410827636719, "logps/rejected": -422.3362731933594, "loss": 0.4951, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.2676632404327393, "rewards/margins": 0.9440364837646484, "rewards/rejected": -2.2116997241973877, "step": 2330 }, { "epoch": 0.31, "learning_rate": 4.3801642523471585e-06, "logits/chosen": -1.422680139541626, "logits/rejected": 0.22055339813232422, "logps/chosen": -427.3160705566406, "logps/rejected": -468.10980224609375, "loss": 0.4991, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.5216461420059204, "rewards/margins": 0.8185620307922363, "rewards/rejected": -2.3402082920074463, "step": 2340 }, { "epoch": 0.31, "learning_rate": 4.37261634396801e-06, "logits/chosen": -0.7539848685264587, "logits/rejected": -0.37924566864967346, "logps/chosen": -433.6114807128906, "logps/rejected": -493.5924377441406, "loss": 0.5182, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.7175815105438232, "rewards/margins": 0.7918481230735779, "rewards/rejected": -2.509429693222046, "step": 2350 }, { "epoch": 0.31, "learning_rate": 4.365029344653401e-06, "logits/chosen": -1.1154688596725464, "logits/rejected": -0.17494897544384003, "logps/chosen": -479.53936767578125, "logps/rejected": -493.6295471191406, "loss": 0.5254, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.4410346746444702, "rewards/margins": 0.9099518656730652, "rewards/rejected": -2.3509867191314697, "step": 2360 }, { "epoch": 0.31, "learning_rate": 4.35740341278222e-06, "logits/chosen": -1.2674121856689453, "logits/rejected": -0.6544126272201538, "logps/chosen": -448.0379943847656, "logps/rejected": -505.9681701660156, "loss": 0.5445, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.2304657697677612, "rewards/margins": 0.7070282697677612, "rewards/rejected": -1.9374940395355225, "step": 2370 }, { "epoch": 0.31, "learning_rate": 4.349738707546079e-06, "logits/chosen": -0.8608369827270508, "logits/rejected": -0.4290972650051117, "logps/chosen": -400.2996826171875, "logps/rejected": -436.1752014160156, "loss": 0.5362, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.3461062908172607, "rewards/margins": 0.770219624042511, "rewards/rejected": -2.116325855255127, "step": 2380 }, { "epoch": 0.31, "learning_rate": 4.3420353889459835e-06, "logits/chosen": -1.3208414316177368, "logits/rejected": -0.2934853434562683, "logps/chosen": -462.20465087890625, "logps/rejected": -489.04156494140625, "loss": 0.4917, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4787876605987549, "rewards/margins": 0.8698205947875977, "rewards/rejected": -2.3486080169677734, "step": 2390 }, { "epoch": 0.31, "learning_rate": 4.334293617788992e-06, "logits/chosen": -1.3569272756576538, "logits/rejected": 0.16524724662303925, "logps/chosen": -401.202880859375, "logps/rejected": -444.6561584472656, "loss": 0.4112, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.5012043714523315, "rewards/margins": 1.1782524585723877, "rewards/rejected": -2.679456949234009, "step": 2400 }, { "epoch": 0.31, "eval_logits/chosen": 0.6648316383361816, "eval_logits/rejected": 1.2865513563156128, "eval_logps/chosen": -430.5039978027344, "eval_logps/rejected": -490.7723388671875, "eval_loss": 0.5400993824005127, "eval_rewards/accuracies": 0.7200000286102295, "eval_rewards/chosen": -1.6201398372650146, "eval_rewards/margins": 0.8017721772193909, "eval_rewards/rejected": -2.4219119548797607, "eval_runtime": 2327.0644, "eval_samples_per_second": 0.859, "eval_steps_per_second": 0.43, "step": 2400 }, { "epoch": 0.32, "learning_rate": 4.326513555684867e-06, "logits/chosen": -1.254010558128357, "logits/rejected": 0.03536475822329521, "logps/chosen": -450.07354736328125, "logps/rejected": -438.840576171875, "loss": 0.6208, "rewards/accuracies": 0.6875, "rewards/chosen": -1.5773955583572388, "rewards/margins": 0.5903611779212952, "rewards/rejected": -2.1677565574645996, "step": 2410 }, { "epoch": 0.32, "learning_rate": 4.31869536504269e-06, "logits/chosen": -0.6679142117500305, "logits/rejected": -0.44401970505714417, "logps/chosen": -402.5331115722656, "logps/rejected": -454.895263671875, "loss": 0.5753, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4515701532363892, "rewards/margins": 0.6856138110160828, "rewards/rejected": -2.1371841430664062, "step": 2420 }, { "epoch": 0.32, "learning_rate": 4.310839209067482e-06, "logits/chosen": -1.3650258779525757, "logits/rejected": 0.0447537787258625, "logps/chosen": -433.0634765625, "logps/rejected": -456.85467529296875, "loss": 0.5657, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.5849604606628418, "rewards/margins": 0.5240734815597534, "rewards/rejected": -2.1090340614318848, "step": 2430 }, { "epoch": 0.32, "learning_rate": 4.302945251756788e-06, "logits/chosen": -0.8697819709777832, "logits/rejected": -0.31318679451942444, "logps/chosen": -399.88421630859375, "logps/rejected": -466.7974548339844, "loss": 0.4323, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.4232676029205322, "rewards/margins": 0.9615268707275391, "rewards/rejected": -2.3847947120666504, "step": 2440 }, { "epoch": 0.32, "learning_rate": 4.29501365789726e-06, "logits/chosen": -0.6540710926055908, "logits/rejected": -0.13195696473121643, "logps/chosen": -375.75726318359375, "logps/rejected": -441.58135986328125, "loss": 0.5364, "rewards/accuracies": 0.75, "rewards/chosen": -1.5193512439727783, "rewards/margins": 0.8794568181037903, "rewards/rejected": -2.398808002471924, "step": 2450 }, { "epoch": 0.32, "learning_rate": 4.2870445930612135e-06, "logits/chosen": -0.6666491627693176, "logits/rejected": -0.20681039988994598, "logps/chosen": -443.6128845214844, "logps/rejected": -499.73370361328125, "loss": 0.4717, "rewards/accuracies": 0.75, "rewards/chosen": -1.2950096130371094, "rewards/margins": 1.0014543533325195, "rewards/rejected": -2.296464204788208, "step": 2460 }, { "epoch": 0.32, "learning_rate": 4.279038223603171e-06, "logits/chosen": -0.9633728265762329, "logits/rejected": 0.12687508761882782, "logps/chosen": -392.9165344238281, "logps/rejected": -445.36907958984375, "loss": 0.5094, "rewards/accuracies": 0.6875, "rewards/chosen": -1.3179303407669067, "rewards/margins": 0.8730724453926086, "rewards/rejected": -2.19100284576416, "step": 2470 }, { "epoch": 0.32, "learning_rate": 4.2709947166563906e-06, "logits/chosen": -0.3830980658531189, "logits/rejected": -0.04551839455962181, "logps/chosen": -437.1377868652344, "logps/rejected": -540.1201171875, "loss": 0.5012, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.7204186916351318, "rewards/margins": 0.9347385168075562, "rewards/rejected": -2.6551575660705566, "step": 2480 }, { "epoch": 0.33, "learning_rate": 4.262914240129379e-06, "logits/chosen": -0.7016464471817017, "logits/rejected": 0.48445090651512146, "logps/chosen": -457.5303649902344, "logps/rejected": -510.51959228515625, "loss": 0.5521, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.668168067932129, "rewards/margins": 0.9587725400924683, "rewards/rejected": -2.6269407272338867, "step": 2490 }, { "epoch": 0.33, "learning_rate": 4.254796962702382e-06, "logits/chosen": -0.9013687968254089, "logits/rejected": -0.2077399045228958, "logps/chosen": -482.5912170410156, "logps/rejected": -530.4383544921875, "loss": 0.5246, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9096496105194092, "rewards/margins": 0.7448057532310486, "rewards/rejected": -2.6544556617736816, "step": 2500 }, { "epoch": 0.33, "eval_logits/chosen": 1.0913811922073364, "eval_logits/rejected": 1.7388339042663574, "eval_logps/chosen": -481.2728576660156, "eval_logps/rejected": -538.2222290039062, "eval_loss": 0.5413315296173096, "eval_rewards/accuracies": 0.722000002861023, "eval_rewards/chosen": -2.1278281211853027, "eval_rewards/margins": 0.7685829997062683, "eval_rewards/rejected": -2.896411180496216, "eval_runtime": 2343.5019, "eval_samples_per_second": 0.853, "eval_steps_per_second": 0.427, "step": 2500 }, { "epoch": 0.33, "learning_rate": 4.246643053823864e-06, "logits/chosen": -0.9218589663505554, "logits/rejected": -0.20635871589183807, "logps/chosen": -394.0392761230469, "logps/rejected": -518.8317260742188, "loss": 0.4924, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.8538624048233032, "rewards/margins": 0.9797351956367493, "rewards/rejected": -2.8335976600646973, "step": 2510 }, { "epoch": 0.33, "learning_rate": 4.238452683706979e-06, "logits/chosen": -0.845422089099884, "logits/rejected": -0.2671593129634857, "logps/chosen": -385.7846984863281, "logps/rejected": -433.94219970703125, "loss": 0.52, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.65887451171875, "rewards/margins": 0.8339220285415649, "rewards/rejected": -2.4927964210510254, "step": 2520 }, { "epoch": 0.33, "learning_rate": 4.2302260233260025e-06, "logits/chosen": -0.5951265096664429, "logits/rejected": -0.30148234963417053, "logps/chosen": -463.2898864746094, "logps/rejected": -552.0485229492188, "loss": 0.5021, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.882061243057251, "rewards/margins": 0.9961883425712585, "rewards/rejected": -2.8782496452331543, "step": 2530 }, { "epoch": 0.33, "learning_rate": 4.2219632444127766e-06, "logits/chosen": -0.3708893358707428, "logits/rejected": 0.35125669836997986, "logps/chosen": -490.02197265625, "logps/rejected": -541.1585083007812, "loss": 0.577, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2178242206573486, "rewards/margins": 0.617692232131958, "rewards/rejected": -2.8355164527893066, "step": 2540 }, { "epoch": 0.33, "learning_rate": 4.213664519453115e-06, "logits/chosen": -0.8187354803085327, "logits/rejected": -0.09626396000385284, "logps/chosen": -438.7705993652344, "logps/rejected": -519.1937866210938, "loss": 0.5085, "rewards/accuracies": 0.75, "rewards/chosen": -2.062185287475586, "rewards/margins": 0.7500772476196289, "rewards/rejected": -2.8122622966766357, "step": 2550 }, { "epoch": 0.33, "learning_rate": 4.205330021683208e-06, "logits/chosen": -0.38596871495246887, "logits/rejected": -0.07203111797571182, "logps/chosen": -407.65740966796875, "logps/rejected": -462.069580078125, "loss": 0.5697, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9310047626495361, "rewards/margins": 0.5898799300193787, "rewards/rejected": -2.5208847522735596, "step": 2560 }, { "epoch": 0.34, "learning_rate": 4.196959925086008e-06, "logits/chosen": -0.22275564074516296, "logits/rejected": -0.3874654173851013, "logps/chosen": -478.38409423828125, "logps/rejected": -548.2817993164062, "loss": 0.6185, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1326379776000977, "rewards/margins": 0.47789937257766724, "rewards/rejected": -2.6105377674102783, "step": 2570 }, { "epoch": 0.34, "learning_rate": 4.188554404387588e-06, "logits/chosen": -1.0716884136199951, "logits/rejected": -0.1740306168794632, "logps/chosen": -451.58734130859375, "logps/rejected": -492.9994201660156, "loss": 0.5536, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.7293052673339844, "rewards/margins": 0.6498473286628723, "rewards/rejected": -2.379152536392212, "step": 2580 }, { "epoch": 0.34, "learning_rate": 4.180113635053504e-06, "logits/chosen": -0.3191063404083252, "logits/rejected": -0.5359519720077515, "logps/chosen": -417.9990234375, "logps/rejected": -500.7484436035156, "loss": 0.5768, "rewards/accuracies": 0.6875, "rewards/chosen": -1.6265789270401, "rewards/margins": 0.6983104944229126, "rewards/rejected": -2.3248894214630127, "step": 2590 }, { "epoch": 0.34, "learning_rate": 4.17163779328513e-06, "logits/chosen": -0.8110774755477905, "logits/rejected": -0.09047582000494003, "logps/chosen": -430.71197509765625, "logps/rejected": -488.92913818359375, "loss": 0.5657, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.597516417503357, "rewards/margins": 0.8392965197563171, "rewards/rejected": -2.4368128776550293, "step": 2600 }, { "epoch": 0.34, "eval_logits/chosen": 0.9886474609375, "eval_logits/rejected": 1.657106876373291, "eval_logps/chosen": -437.1171875, "eval_logps/rejected": -495.00030517578125, "eval_loss": 0.5373482704162598, "eval_rewards/accuracies": 0.7200000286102295, "eval_rewards/chosen": -1.6862711906433105, "eval_rewards/margins": 0.7779201865196228, "eval_rewards/rejected": -2.4641919136047363, "eval_runtime": 2326.8602, "eval_samples_per_second": 0.86, "eval_steps_per_second": 0.43, "step": 2600 }, { "epoch": 0.34, "learning_rate": 4.163127056015975e-06, "logits/chosen": -0.8432804346084595, "logits/rejected": 0.2679641544818878, "logps/chosen": -450.518798828125, "logps/rejected": -520.7162475585938, "loss": 0.5617, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.684958815574646, "rewards/margins": 0.8352130651473999, "rewards/rejected": -2.520171642303467, "step": 2610 }, { "epoch": 0.34, "learning_rate": 4.154581600907994e-06, "logits/chosen": -0.8970314264297485, "logits/rejected": -0.025129878893494606, "logps/chosen": -394.2820739746094, "logps/rejected": -448.968505859375, "loss": 0.4855, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.448757529258728, "rewards/margins": 0.8607563972473145, "rewards/rejected": -2.309514284133911, "step": 2620 }, { "epoch": 0.34, "learning_rate": 4.14600160634788e-06, "logits/chosen": -0.5631152391433716, "logits/rejected": 0.03483830764889717, "logps/chosen": -381.072998046875, "logps/rejected": -490.3233947753906, "loss": 0.4763, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.4302902221679688, "rewards/margins": 1.0292136669158936, "rewards/rejected": -2.459503650665283, "step": 2630 }, { "epoch": 0.35, "learning_rate": 4.137387251443335e-06, "logits/chosen": -1.161237120628357, "logits/rejected": 0.12660464644432068, "logps/chosen": -383.56878662109375, "logps/rejected": -413.07550048828125, "loss": 0.5769, "rewards/accuracies": 0.75, "rewards/chosen": -1.3406836986541748, "rewards/margins": 0.6809174418449402, "rewards/rejected": -2.0216009616851807, "step": 2640 }, { "epoch": 0.35, "learning_rate": 4.128738716019338e-06, "logits/chosen": -0.9206587672233582, "logits/rejected": 0.036871038377285004, "logps/chosen": -433.82080078125, "logps/rejected": -477.78619384765625, "loss": 0.538, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.3911445140838623, "rewards/margins": 0.7216758728027344, "rewards/rejected": -2.1128203868865967, "step": 2650 }, { "epoch": 0.35, "learning_rate": 4.120056180614386e-06, "logits/chosen": -0.46158695220947266, "logits/rejected": 0.09717199206352234, "logps/chosen": -426.7666015625, "logps/rejected": -509.9091796875, "loss": 0.5761, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.841655969619751, "rewards/margins": 0.7033026814460754, "rewards/rejected": -2.5449583530426025, "step": 2660 }, { "epoch": 0.35, "learning_rate": 4.111339826476725e-06, "logits/chosen": -0.36191526055336, "logits/rejected": 0.032123737037181854, "logps/chosen": -385.82342529296875, "logps/rejected": -472.90203857421875, "loss": 0.5479, "rewards/accuracies": 0.75, "rewards/chosen": -1.5523761510849, "rewards/margins": 0.8306114077568054, "rewards/rejected": -2.3829877376556396, "step": 2670 }, { "epoch": 0.35, "learning_rate": 4.102589835560572e-06, "logits/chosen": -0.8637989163398743, "logits/rejected": 0.42278608679771423, "logps/chosen": -448.33477783203125, "logps/rejected": -472.2781677246094, "loss": 0.5531, "rewards/accuracies": 0.625, "rewards/chosen": -1.3624870777130127, "rewards/margins": 0.7671451568603516, "rewards/rejected": -2.1296322345733643, "step": 2680 }, { "epoch": 0.35, "learning_rate": 4.09380639052231e-06, "logits/chosen": -0.7388796210289001, "logits/rejected": -0.16724643111228943, "logps/chosen": -423.61370849609375, "logps/rejected": -544.0777587890625, "loss": 0.4631, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.392254114151001, "rewards/margins": 1.0100640058517456, "rewards/rejected": -2.402318239212036, "step": 2690 }, { "epoch": 0.35, "learning_rate": 4.084989674716679e-06, "logits/chosen": -0.7967337369918823, "logits/rejected": -0.30105361342430115, "logps/chosen": -473.30743408203125, "logps/rejected": -546.0299072265625, "loss": 0.5216, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9343668222427368, "rewards/margins": 0.7889801859855652, "rewards/rejected": -2.7233471870422363, "step": 2700 }, { "epoch": 0.35, "eval_logits/chosen": 1.1290334463119507, "eval_logits/rejected": 1.7935971021652222, "eval_logps/chosen": -467.43646240234375, "eval_logps/rejected": -522.52783203125, "eval_loss": 0.5356955528259277, "eval_rewards/accuracies": 0.7260000109672546, "eval_rewards/chosen": -1.9894644021987915, "eval_rewards/margins": 0.7500025629997253, "eval_rewards/rejected": -2.7394673824310303, "eval_runtime": 2319.9728, "eval_samples_per_second": 0.862, "eval_steps_per_second": 0.431, "step": 2700 }, { "epoch": 0.35, "learning_rate": 4.076139872192949e-06, "logits/chosen": -0.7494713068008423, "logits/rejected": 0.4128738045692444, "logps/chosen": -523.4064331054688, "logps/rejected": -553.7559204101562, "loss": 0.5189, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.168044090270996, "rewards/margins": 0.7787975668907166, "rewards/rejected": -2.9468414783477783, "step": 2710 }, { "epoch": 0.36, "learning_rate": 4.067257167691074e-06, "logits/chosen": -0.3000775873661041, "logits/rejected": -0.04052892327308655, "logps/chosen": -495.1270446777344, "logps/rejected": -570.2074584960938, "loss": 0.5173, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.9619982242584229, "rewards/margins": 0.9272588491439819, "rewards/rejected": -2.8892569541931152, "step": 2720 }, { "epoch": 0.36, "learning_rate": 4.05834174663784e-06, "logits/chosen": -0.5102322697639465, "logits/rejected": -0.13270506262779236, "logps/chosen": -456.03424072265625, "logps/rejected": -465.28240966796875, "loss": 0.6489, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.8180183172225952, "rewards/margins": 0.48946094512939453, "rewards/rejected": -2.307478904724121, "step": 2730 }, { "epoch": 0.36, "learning_rate": 4.0493937951429895e-06, "logits/chosen": -1.0210916996002197, "logits/rejected": -0.37836208939552307, "logps/chosen": -429.5328674316406, "logps/rejected": -458.05877685546875, "loss": 0.5128, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.6570367813110352, "rewards/margins": 0.6776058673858643, "rewards/rejected": -2.3346428871154785, "step": 2740 }, { "epoch": 0.36, "learning_rate": 4.040413499995343e-06, "logits/chosen": -0.7698782086372375, "logits/rejected": -0.08163869380950928, "logps/chosen": -478.63848876953125, "logps/rejected": -546.338623046875, "loss": 0.513, "rewards/accuracies": 0.75, "rewards/chosen": -1.83940851688385, "rewards/margins": 0.8054366111755371, "rewards/rejected": -2.6448452472686768, "step": 2750 }, { "epoch": 0.36, "learning_rate": 4.031401048658892e-06, "logits/chosen": -0.7496566772460938, "logits/rejected": 0.024263203144073486, "logps/chosen": -451.26336669921875, "logps/rejected": -517.8736572265625, "loss": 0.5583, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.7768245935440063, "rewards/margins": 0.8688470721244812, "rewards/rejected": -2.6456716060638428, "step": 2760 }, { "epoch": 0.36, "learning_rate": 4.022356629268894e-06, "logits/chosen": -0.7827082276344299, "logits/rejected": 0.21052603423595428, "logps/chosen": -487.554443359375, "logps/rejected": -506.66192626953125, "loss": 0.6025, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0982871055603027, "rewards/margins": 0.5698203444480896, "rewards/rejected": -2.668107509613037, "step": 2770 }, { "epoch": 0.36, "learning_rate": 4.013280430627936e-06, "logits/chosen": -0.36198630928993225, "logits/rejected": 0.06339599192142487, "logps/chosen": -443.41961669921875, "logps/rejected": -474.7898864746094, "loss": 0.6083, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9905750751495361, "rewards/margins": 0.5316168665885925, "rewards/rejected": -2.5221920013427734, "step": 2780 }, { "epoch": 0.37, "learning_rate": 4.004172642202002e-06, "logits/chosen": -0.9610457420349121, "logits/rejected": 0.30055952072143555, "logps/chosen": -452.96343994140625, "logps/rejected": -512.3440551757812, "loss": 0.4909, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.0993430614471436, "rewards/margins": 0.8472961187362671, "rewards/rejected": -2.9466395378112793, "step": 2790 }, { "epoch": 0.37, "learning_rate": 3.995033454116512e-06, "logits/chosen": -1.0286301374435425, "logits/rejected": 0.016544628888368607, "logps/chosen": -508.1040954589844, "logps/rejected": -527.2821044921875, "loss": 0.5865, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1054210662841797, "rewards/margins": 0.5004376173019409, "rewards/rejected": -2.605858564376831, "step": 2800 }, { "epoch": 0.37, "eval_logits/chosen": 1.101927399635315, "eval_logits/rejected": 1.7565044164657593, "eval_logps/chosen": -478.56048583984375, "eval_logps/rejected": -529.6148681640625, "eval_loss": 0.5350669622421265, "eval_rewards/accuracies": 0.7260000109672546, "eval_rewards/chosen": -2.1007049083709717, "eval_rewards/margins": 0.709632158279419, "eval_rewards/rejected": -2.8103370666503906, "eval_runtime": 2335.1815, "eval_samples_per_second": 0.856, "eval_steps_per_second": 0.428, "step": 2800 }, { "epoch": 0.37, "learning_rate": 3.985863057152355e-06, "logits/chosen": -0.4937034547328949, "logits/rejected": -0.2508159577846527, "logps/chosen": -504.6494140625, "logps/rejected": -560.7249755859375, "loss": 0.5349, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.059462785720825, "rewards/margins": 0.8392888307571411, "rewards/rejected": -2.898751735687256, "step": 2810 }, { "epoch": 0.37, "learning_rate": 3.976661642741908e-06, "logits/chosen": -0.27275025844573975, "logits/rejected": -0.03731584548950195, "logps/chosen": -484.62506103515625, "logps/rejected": -580.8031005859375, "loss": 0.466, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.229395627975464, "rewards/margins": 0.9002262353897095, "rewards/rejected": -3.129621982574463, "step": 2820 }, { "epoch": 0.37, "learning_rate": 3.967429402965035e-06, "logits/chosen": -0.3462119400501251, "logits/rejected": 0.061771608889102936, "logps/chosen": -553.1046142578125, "logps/rejected": -620.2770385742188, "loss": 0.5486, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.695598602294922, "rewards/margins": 0.7296158671379089, "rewards/rejected": -3.4252142906188965, "step": 2830 }, { "epoch": 0.37, "learning_rate": 3.958166530545085e-06, "logits/chosen": -0.5360496044158936, "logits/rejected": -0.30475491285324097, "logps/chosen": -516.7517700195312, "logps/rejected": -609.4689331054688, "loss": 0.4753, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.5592031478881836, "rewards/margins": 0.9313445091247559, "rewards/rejected": -3.4905476570129395, "step": 2840 }, { "epoch": 0.37, "learning_rate": 3.948873218844863e-06, "logits/chosen": 0.14160022139549255, "logits/rejected": -0.38983091711997986, "logps/chosen": -468.97314453125, "logps/rejected": -537.5845947265625, "loss": 0.6692, "rewards/accuracies": 0.5625, "rewards/chosen": -2.569286584854126, "rewards/margins": 0.41470590233802795, "rewards/rejected": -2.983992338180542, "step": 2850 }, { "epoch": 0.37, "learning_rate": 3.939549661862592e-06, "logits/chosen": -0.5928257703781128, "logits/rejected": 0.027815770357847214, "logps/chosen": -490.8857421875, "logps/rejected": -555.0617065429688, "loss": 0.5306, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.194727659225464, "rewards/margins": 0.8603199124336243, "rewards/rejected": -3.0550477504730225, "step": 2860 }, { "epoch": 0.38, "learning_rate": 3.930196054227871e-06, "logits/chosen": -0.7506051063537598, "logits/rejected": 0.29753103852272034, "logps/chosen": -445.02667236328125, "logps/rejected": -515.992431640625, "loss": 0.5096, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.015972375869751, "rewards/margins": 0.8588827848434448, "rewards/rejected": -2.8748552799224854, "step": 2870 }, { "epoch": 0.38, "learning_rate": 3.920812591197604e-06, "logits/chosen": -1.051764726638794, "logits/rejected": 0.04196419566869736, "logps/chosen": -435.1283264160156, "logps/rejected": -485.16290283203125, "loss": 0.4647, "rewards/accuracies": 0.8125, "rewards/chosen": -1.730743646621704, "rewards/margins": 0.8607555627822876, "rewards/rejected": -2.591498851776123, "step": 2880 }, { "epoch": 0.38, "learning_rate": 3.9113994686519305e-06, "logits/chosen": -1.111640214920044, "logits/rejected": 0.03947020322084427, "logps/chosen": -432.510009765625, "logps/rejected": -520.13134765625, "loss": 0.4645, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6205533742904663, "rewards/margins": 0.9038299322128296, "rewards/rejected": -2.524383306503296, "step": 2890 }, { "epoch": 0.38, "learning_rate": 3.90195688309013e-06, "logits/chosen": -0.9940659403800964, "logits/rejected": -0.16780364513397217, "logps/chosen": -408.54425048828125, "logps/rejected": -470.9107360839844, "loss": 0.5252, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.5781054496765137, "rewards/margins": 0.9151102304458618, "rewards/rejected": -2.493215560913086, "step": 2900 }, { "epoch": 0.38, "eval_logits/chosen": 0.9107775688171387, "eval_logits/rejected": 1.5685968399047852, "eval_logps/chosen": -426.64959716796875, "eval_logps/rejected": -492.73974609375, "eval_loss": 0.537619948387146, "eval_rewards/accuracies": 0.7204999923706055, "eval_rewards/chosen": -1.581595540046692, "eval_rewards/margins": 0.8599900603294373, "eval_rewards/rejected": -2.4415855407714844, "eval_runtime": 2334.4828, "eval_samples_per_second": 0.857, "eval_steps_per_second": 0.428, "step": 2900 }, { "epoch": 0.38, "learning_rate": 3.892485031626527e-06, "logits/chosen": -1.0271751880645752, "logits/rejected": -0.31663069128990173, "logps/chosen": -413.0658264160156, "logps/rejected": -492.5518493652344, "loss": 0.5214, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.5365521907806396, "rewards/margins": 0.9228584170341492, "rewards/rejected": -2.4594106674194336, "step": 2910 }, { "epoch": 0.38, "learning_rate": 3.882984111986371e-06, "logits/chosen": -0.7470510005950928, "logits/rejected": 0.05860505253076553, "logps/chosen": -435.0110778808594, "logps/rejected": -458.22137451171875, "loss": 0.5542, "rewards/accuracies": 0.75, "rewards/chosen": -1.5745357275009155, "rewards/margins": 0.6277254223823547, "rewards/rejected": -2.202260971069336, "step": 2920 }, { "epoch": 0.38, "learning_rate": 3.873454322501711e-06, "logits/chosen": -1.084609866142273, "logits/rejected": -0.0922069326043129, "logps/chosen": -392.8829650878906, "logps/rejected": -459.1026306152344, "loss": 0.5197, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.1698970794677734, "rewards/margins": 0.9355286359786987, "rewards/rejected": -2.1054255962371826, "step": 2930 }, { "epoch": 0.38, "learning_rate": 3.863895862107255e-06, "logits/chosen": -1.0651941299438477, "logits/rejected": -0.298669695854187, "logps/chosen": -362.4504089355469, "logps/rejected": -488.83001708984375, "loss": 0.4332, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.0489132404327393, "rewards/margins": 1.0638964176177979, "rewards/rejected": -2.112809658050537, "step": 2940 }, { "epoch": 0.39, "learning_rate": 3.854308930336216e-06, "logits/chosen": -1.077282190322876, "logits/rejected": 0.2055933028459549, "logps/chosen": -443.42486572265625, "logps/rejected": -475.886474609375, "loss": 0.555, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3301843404769897, "rewards/margins": 0.7858977317810059, "rewards/rejected": -2.116081953048706, "step": 2950 }, { "epoch": 0.39, "learning_rate": 3.844693727316151e-06, "logits/chosen": -1.1035977602005005, "logits/rejected": -0.3125559389591217, "logps/chosen": -408.77783203125, "logps/rejected": -454.4917907714844, "loss": 0.4932, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.2791414260864258, "rewards/margins": 0.9063733220100403, "rewards/rejected": -2.185514450073242, "step": 2960 }, { "epoch": 0.39, "learning_rate": 3.835050453764779e-06, "logits/chosen": -0.4838363230228424, "logits/rejected": -0.24740548431873322, "logps/chosen": -358.820556640625, "logps/rejected": -441.27001953125, "loss": 0.4782, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.1172313690185547, "rewards/margins": 1.0152322053909302, "rewards/rejected": -2.1324634552001953, "step": 2970 }, { "epoch": 0.39, "learning_rate": 3.825379310985792e-06, "logits/chosen": -0.7724876999855042, "logits/rejected": -0.538825273513794, "logps/chosen": -370.72052001953125, "logps/rejected": -447.32745361328125, "loss": 0.5206, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.1402572393417358, "rewards/margins": 0.8123918771743774, "rewards/rejected": -1.9526491165161133, "step": 2980 }, { "epoch": 0.39, "learning_rate": 3.815680500864651e-06, "logits/chosen": -0.9769965410232544, "logits/rejected": -0.2623330056667328, "logps/chosen": -419.5028381347656, "logps/rejected": -455.3551330566406, "loss": 0.5107, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.1941817998886108, "rewards/margins": 0.7795951962471008, "rewards/rejected": -1.9737770557403564, "step": 2990 }, { "epoch": 0.39, "learning_rate": 3.80595422586438e-06, "logits/chosen": -1.0406408309936523, "logits/rejected": -0.20213142037391663, "logps/chosen": -462.68487548828125, "logps/rejected": -471.08123779296875, "loss": 0.5381, "rewards/accuracies": 0.75, "rewards/chosen": -1.3869965076446533, "rewards/margins": 0.8180079460144043, "rewards/rejected": -2.2050046920776367, "step": 3000 }, { "epoch": 0.39, "eval_logits/chosen": 1.0232552289962769, "eval_logits/rejected": 1.7206393480300903, "eval_logps/chosen": -422.64849853515625, "eval_logps/rejected": -485.7741394042969, "eval_loss": 0.5305867791175842, "eval_rewards/accuracies": 0.7229999899864197, "eval_rewards/chosen": -1.5415852069854736, "eval_rewards/margins": 0.8303446769714355, "eval_rewards/rejected": -2.37192964553833, "eval_runtime": 2335.0533, "eval_samples_per_second": 0.857, "eval_steps_per_second": 0.428, "step": 3000 }, { "epoch": 0.39, "learning_rate": 3.7962006890213266e-06, "logits/chosen": -0.1788121610879898, "logits/rejected": 0.016005922108888626, "logps/chosen": -406.82928466796875, "logps/rejected": -460.6402282714844, "loss": 0.606, "rewards/accuracies": 0.6875, "rewards/chosen": -1.7624022960662842, "rewards/margins": 0.5616916418075562, "rewards/rejected": -2.324093818664551, "step": 3010 }, { "epoch": 0.4, "learning_rate": 3.7864200939409336e-06, "logits/chosen": -1.0381033420562744, "logits/rejected": 0.2535991072654724, "logps/chosen": -414.68536376953125, "logps/rejected": -450.0343322753906, "loss": 0.5872, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4555238485336304, "rewards/margins": 0.6130931973457336, "rewards/rejected": -2.068617105484009, "step": 3020 }, { "epoch": 0.4, "learning_rate": 3.7766126447934857e-06, "logits/chosen": -1.1287479400634766, "logits/rejected": -0.4950384199619293, "logps/chosen": -377.2622985839844, "logps/rejected": -427.8341369628906, "loss": 0.5285, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.304473638534546, "rewards/margins": 0.7007145881652832, "rewards/rejected": -2.005188465118408, "step": 3030 }, { "epoch": 0.4, "learning_rate": 3.766778546309847e-06, "logits/chosen": -0.7997163534164429, "logits/rejected": 0.35035890340805054, "logps/chosen": -444.89190673828125, "logps/rejected": -414.5162048339844, "loss": 0.6038, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4366438388824463, "rewards/margins": 0.5985267758369446, "rewards/rejected": -2.035170793533325, "step": 3040 }, { "epoch": 0.4, "learning_rate": 3.7569180037771868e-06, "logits/chosen": -0.36123308539390564, "logits/rejected": -0.1611182987689972, "logps/chosen": -421.418701171875, "logps/rejected": -478.39300537109375, "loss": 0.596, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.594327688217163, "rewards/margins": 0.6372068524360657, "rewards/rejected": -2.231534481048584, "step": 3050 }, { "epoch": 0.4, "learning_rate": 3.7470312230346955e-06, "logits/chosen": -0.7029015421867371, "logits/rejected": 0.3699846863746643, "logps/chosen": -454.07025146484375, "logps/rejected": -483.5018615722656, "loss": 0.4709, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4222877025604248, "rewards/margins": 0.9277554750442505, "rewards/rejected": -2.3500430583953857, "step": 3060 }, { "epoch": 0.4, "learning_rate": 3.7371184104692857e-06, "logits/chosen": -1.1917140483856201, "logits/rejected": -0.15570615231990814, "logps/chosen": -475.3992614746094, "logps/rejected": -481.27301025390625, "loss": 0.5378, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.428065299987793, "rewards/margins": 0.7733792066574097, "rewards/rejected": -2.201444387435913, "step": 3070 }, { "epoch": 0.4, "learning_rate": 3.727179773011289e-06, "logits/chosen": -0.5196498036384583, "logits/rejected": -0.2388072907924652, "logps/chosen": -431.4600524902344, "logps/rejected": -482.206787109375, "loss": 0.5579, "rewards/accuracies": 0.6875, "rewards/chosen": -1.5728769302368164, "rewards/margins": 0.6688525080680847, "rewards/rejected": -2.241729259490967, "step": 3080 }, { "epoch": 0.4, "learning_rate": 3.717215518130127e-06, "logits/chosen": -0.6362151503562927, "logits/rejected": 0.041982658207416534, "logps/chosen": -410.38189697265625, "logps/rejected": -457.52093505859375, "loss": 0.5947, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.5848448276519775, "rewards/margins": 0.5786422491073608, "rewards/rejected": -2.163486957550049, "step": 3090 }, { "epoch": 0.41, "learning_rate": 3.7072258538299923e-06, "logits/chosen": -1.2260688543319702, "logits/rejected": 0.28254860639572144, "logps/chosen": -483.26568603515625, "logps/rejected": -465.8003845214844, "loss": 0.4587, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.3665094375610352, "rewards/margins": 0.8110405206680298, "rewards/rejected": -2.1775500774383545, "step": 3100 }, { "epoch": 0.41, "eval_logits/chosen": 1.1220719814300537, "eval_logits/rejected": 1.844536542892456, "eval_logps/chosen": -413.6004943847656, "eval_logps/rejected": -467.0777587890625, "eval_loss": 0.522217869758606, "eval_rewards/accuracies": 0.7260000109672546, "eval_rewards/chosen": -1.4511048793792725, "eval_rewards/margins": 0.7338610291481018, "eval_rewards/rejected": -2.1849660873413086, "eval_runtime": 2334.682, "eval_samples_per_second": 0.857, "eval_steps_per_second": 0.428, "step": 3100 }, { "epoch": 0.41, "learning_rate": 3.6972109886454933e-06, "logits/chosen": -0.25699383020401, "logits/rejected": -0.20727062225341797, "logps/chosen": -435.0664978027344, "logps/rejected": -490.909912109375, "loss": 0.4972, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.762265920639038, "rewards/margins": 0.8372318148612976, "rewards/rejected": -2.5994980335235596, "step": 3110 }, { "epoch": 0.41, "learning_rate": 3.687171131637314e-06, "logits/chosen": -0.7965458035469055, "logits/rejected": 0.2634788155555725, "logps/chosen": -453.2422790527344, "logps/rejected": -499.4004821777344, "loss": 0.5227, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.7745777368545532, "rewards/margins": 0.7375408411026001, "rewards/rejected": -2.5121185779571533, "step": 3120 }, { "epoch": 0.41, "learning_rate": 3.677106492387839e-06, "logits/chosen": -0.7059360146522522, "logits/rejected": 0.452791303396225, "logps/chosen": -459.8872985839844, "logps/rejected": -473.69781494140625, "loss": 0.5509, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.7924385070800781, "rewards/margins": 0.7473533153533936, "rewards/rejected": -2.5397915840148926, "step": 3130 }, { "epoch": 0.41, "learning_rate": 3.6670172809967865e-06, "logits/chosen": -0.48607778549194336, "logits/rejected": 0.2857428193092346, "logps/chosen": -408.91790771484375, "logps/rejected": -458.8837890625, "loss": 0.5115, "rewards/accuracies": 0.75, "rewards/chosen": -1.9582751989364624, "rewards/margins": 0.6871889233589172, "rewards/rejected": -2.6454639434814453, "step": 3140 }, { "epoch": 0.41, "learning_rate": 3.6569037080768153e-06, "logits/chosen": -0.8563801646232605, "logits/rejected": -0.1331445276737213, "logps/chosen": -419.33544921875, "logps/rejected": -523.2894287109375, "loss": 0.5028, "rewards/accuracies": 0.75, "rewards/chosen": -1.7469348907470703, "rewards/margins": 0.928310215473175, "rewards/rejected": -2.6752450466156006, "step": 3150 }, { "epoch": 0.41, "learning_rate": 3.646765984749137e-06, "logits/chosen": -0.42074188590049744, "logits/rejected": -0.15325963497161865, "logps/chosen": -437.05352783203125, "logps/rejected": -532.8778076171875, "loss": 0.4926, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.6952593326568604, "rewards/margins": 0.9555587768554688, "rewards/rejected": -2.650818109512329, "step": 3160 }, { "epoch": 0.41, "learning_rate": 3.6366043226391e-06, "logits/chosen": -0.8013358116149902, "logits/rejected": 0.14026977121829987, "logps/chosen": -455.299560546875, "logps/rejected": -499.14892578125, "loss": 0.505, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.768510103225708, "rewards/margins": 0.9221822023391724, "rewards/rejected": -2.690692186355591, "step": 3170 }, { "epoch": 0.42, "learning_rate": 3.6264189338717766e-06, "logits/chosen": -1.1270676851272583, "logits/rejected": 0.0055741192772984505, "logps/chosen": -444.71502685546875, "logps/rejected": -478.0035705566406, "loss": 0.5603, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.6736608743667603, "rewards/margins": 0.6656028032302856, "rewards/rejected": -2.339263439178467, "step": 3180 }, { "epoch": 0.42, "learning_rate": 3.6162100310675334e-06, "logits/chosen": -0.5859667062759399, "logits/rejected": -0.5138736963272095, "logps/chosen": -400.99151611328125, "logps/rejected": -445.34747314453125, "loss": 0.65, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.3842413425445557, "rewards/margins": 0.5574811697006226, "rewards/rejected": -1.9417225122451782, "step": 3190 }, { "epoch": 0.42, "learning_rate": 3.605977827337596e-06, "logits/chosen": -0.5511821508407593, "logits/rejected": -0.18979701399803162, "logps/chosen": -392.2962341308594, "logps/rejected": -460.0108947753906, "loss": 0.5173, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.33115553855896, "rewards/margins": 0.8485208749771118, "rewards/rejected": -2.1796765327453613, "step": 3200 }, { "epoch": 0.42, "eval_logits/chosen": 0.8981449604034424, "eval_logits/rejected": 1.6186491250991821, "eval_logps/chosen": -403.9989318847656, "eval_logps/rejected": -462.40948486328125, "eval_loss": 0.5276510715484619, "eval_rewards/accuracies": 0.7260000109672546, "eval_rewards/chosen": -1.3550893068313599, "eval_rewards/margins": 0.7831941843032837, "eval_rewards/rejected": -2.1382837295532227, "eval_runtime": 2082.617, "eval_samples_per_second": 0.96, "eval_steps_per_second": 0.48, "step": 3200 }, { "epoch": 0.42, "learning_rate": 3.595722536279595e-06, "logits/chosen": -1.2897363901138306, "logits/rejected": 0.7435730695724487, "logps/chosen": -463.46563720703125, "logps/rejected": -484.604736328125, "loss": 0.4551, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.345106840133667, "rewards/margins": 0.9554675221443176, "rewards/rejected": -2.300574541091919, "step": 3210 }, { "epoch": 0.42, "learning_rate": 3.58544437197311e-06, "logits/chosen": -0.5996850728988647, "logits/rejected": 0.24509985744953156, "logps/chosen": -428.47125244140625, "logps/rejected": -500.66168212890625, "loss": 0.5034, "rewards/accuracies": 0.6875, "rewards/chosen": -1.491923213005066, "rewards/margins": 0.9786638021469116, "rewards/rejected": -2.4705870151519775, "step": 3220 }, { "epoch": 0.42, "learning_rate": 3.5751435489752025e-06, "logits/chosen": -0.4525715410709381, "logits/rejected": 0.17086896300315857, "logps/chosen": -396.47723388671875, "logps/rejected": -455.05706787109375, "loss": 0.4999, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.4165663719177246, "rewards/margins": 0.9099749326705933, "rewards/rejected": -2.3265414237976074, "step": 3230 }, { "epoch": 0.42, "learning_rate": 3.5648202823159317e-06, "logits/chosen": -0.16280296444892883, "logits/rejected": 0.15849009156227112, "logps/chosen": -411.54742431640625, "logps/rejected": -548.3161010742188, "loss": 0.4644, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7535407543182373, "rewards/margins": 1.087467908859253, "rewards/rejected": -2.8410089015960693, "step": 3240 }, { "epoch": 0.43, "learning_rate": 3.554474787493873e-06, "logits/chosen": 0.05241694301366806, "logits/rejected": 0.6363898515701294, "logps/chosen": -502.8150329589844, "logps/rejected": -597.0306396484375, "loss": 0.4855, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0513508319854736, "rewards/margins": 1.1114223003387451, "rewards/rejected": -3.1627731323242188, "step": 3250 }, { "epoch": 0.43, "learning_rate": 3.5441072804716125e-06, "logits/chosen": 0.01650853082537651, "logits/rejected": 0.25886648893356323, "logps/chosen": -555.9203491210938, "logps/rejected": -650.7965087890625, "loss": 0.5809, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.538379192352295, "rewards/margins": 0.8736263513565063, "rewards/rejected": -3.412005662918091, "step": 3260 }, { "epoch": 0.43, "learning_rate": 3.5337179776712427e-06, "logits/chosen": -0.008978593163192272, "logits/rejected": 0.6548370718955994, "logps/chosen": -530.6326904296875, "logps/rejected": -644.48876953125, "loss": 0.5841, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.7416481971740723, "rewards/margins": 1.1851587295532227, "rewards/rejected": -3.926807403564453, "step": 3270 }, { "epoch": 0.43, "learning_rate": 3.5233070959698445e-06, "logits/chosen": -0.5847116708755493, "logits/rejected": 0.3420669138431549, "logps/chosen": -555.023681640625, "logps/rejected": -576.3981323242188, "loss": 0.6021, "rewards/accuracies": 0.6875, "rewards/chosen": -2.538581371307373, "rewards/margins": 0.596244752407074, "rewards/rejected": -3.1348259449005127, "step": 3280 }, { "epoch": 0.43, "learning_rate": 3.512874852694959e-06, "logits/chosen": -0.6708934903144836, "logits/rejected": 0.5892789959907532, "logps/chosen": -480.2303161621094, "logps/rejected": -554.13671875, "loss": 0.4727, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.084829807281494, "rewards/margins": 0.9657213091850281, "rewards/rejected": -3.050551414489746, "step": 3290 }, { "epoch": 0.43, "learning_rate": 3.5024214656200497e-06, "logits/chosen": -0.9602855443954468, "logits/rejected": 0.5978536009788513, "logps/chosen": -469.49737548828125, "logps/rejected": -488.523681640625, "loss": 0.5851, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.8592431545257568, "rewards/margins": 0.7517732381820679, "rewards/rejected": -2.6110165119171143, "step": 3300 }, { "epoch": 0.43, "eval_logits/chosen": 1.2859539985656738, "eval_logits/rejected": 2.034395933151245, "eval_logps/chosen": -437.1257629394531, "eval_logps/rejected": -498.6931457519531, "eval_loss": 0.5180677771568298, "eval_rewards/accuracies": 0.7325000166893005, "eval_rewards/chosen": -1.6863574981689453, "eval_rewards/margins": 0.8147625923156738, "eval_rewards/rejected": -2.50111985206604, "eval_runtime": 1144.5777, "eval_samples_per_second": 1.747, "eval_steps_per_second": 0.874, "step": 3300 }, { "epoch": 0.43, "learning_rate": 3.491947152959958e-06, "logits/chosen": -0.5393772125244141, "logits/rejected": 0.08978531509637833, "logps/chosen": -471.2378845214844, "logps/rejected": -527.0679321289062, "loss": 0.5259, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.7623119354248047, "rewards/margins": 0.7789031863212585, "rewards/rejected": -2.541214942932129, "step": 3310 }, { "epoch": 0.43, "learning_rate": 3.4814521333663497e-06, "logits/chosen": -0.8797744512557983, "logits/rejected": 0.28487175703048706, "logps/chosen": -507.973388671875, "logps/rejected": -497.87677001953125, "loss": 0.5358, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.7715896368026733, "rewards/margins": 0.7174810767173767, "rewards/rejected": -2.489070415496826, "step": 3320 }, { "epoch": 0.44, "learning_rate": 3.4709366259231468e-06, "logits/chosen": -0.5718088150024414, "logits/rejected": 0.6245108842849731, "logps/chosen": -451.671142578125, "logps/rejected": -492.45111083984375, "loss": 0.5618, "rewards/accuracies": 0.6875, "rewards/chosen": -1.632046103477478, "rewards/margins": 0.802625834941864, "rewards/rejected": -2.4346723556518555, "step": 3330 }, { "epoch": 0.44, "learning_rate": 3.460400850141956e-06, "logits/chosen": -0.8436506390571594, "logits/rejected": 0.6500002145767212, "logps/chosen": -418.16571044921875, "logps/rejected": -476.41375732421875, "loss": 0.5184, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.8505115509033203, "rewards/margins": 0.7895227670669556, "rewards/rejected": -2.6400341987609863, "step": 3340 }, { "epoch": 0.44, "learning_rate": 3.4498450259574858e-06, "logits/chosen": -0.3697187304496765, "logits/rejected": 0.12063203006982803, "logps/chosen": -465.37066650390625, "logps/rejected": -506.58319091796875, "loss": 0.5763, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.957269310951233, "rewards/margins": 0.574730634689331, "rewards/rejected": -2.5320000648498535, "step": 3350 }, { "epoch": 0.44, "learning_rate": 3.439269373722957e-06, "logits/chosen": -0.4114798605442047, "logits/rejected": 0.28758490085601807, "logps/chosen": -450.16741943359375, "logps/rejected": -514.9403076171875, "loss": 0.542, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.925722360610962, "rewards/margins": 0.8905431628227234, "rewards/rejected": -2.81626558303833, "step": 3360 }, { "epoch": 0.44, "learning_rate": 3.4286741142055014e-06, "logits/chosen": -0.9048460721969604, "logits/rejected": -0.23542626202106476, "logps/chosen": -464.1852111816406, "logps/rejected": -529.8837890625, "loss": 0.4867, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.771493673324585, "rewards/margins": 0.8255535364151001, "rewards/rejected": -2.5970473289489746, "step": 3370 }, { "epoch": 0.44, "learning_rate": 3.4180594685815536e-06, "logits/chosen": -0.6555899381637573, "logits/rejected": 0.23162047564983368, "logps/chosen": -387.56414794921875, "logps/rejected": -469.15020751953125, "loss": 0.5231, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.6515061855316162, "rewards/margins": 0.8171674609184265, "rewards/rejected": -2.4686737060546875, "step": 3380 }, { "epoch": 0.44, "learning_rate": 3.4074256584322336e-06, "logits/chosen": -0.6201430559158325, "logits/rejected": 0.13968434929847717, "logps/chosen": -395.79229736328125, "logps/rejected": -470.4158630371094, "loss": 0.521, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.4998222589492798, "rewards/margins": 0.9599917531013489, "rewards/rejected": -2.4598140716552734, "step": 3390 }, { "epoch": 0.44, "learning_rate": 3.3967729057387213e-06, "logits/chosen": -0.7700475454330444, "logits/rejected": 0.4305594563484192, "logps/chosen": -439.98504638671875, "logps/rejected": -465.68408203125, "loss": 0.5811, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.470069169998169, "rewards/margins": 0.6720464825630188, "rewards/rejected": -2.142115831375122, "step": 3400 }, { "epoch": 0.44, "eval_logits/chosen": 1.0161676406860352, "eval_logits/rejected": 1.7237727642059326, "eval_logps/chosen": -428.5589904785156, "eval_logps/rejected": -492.4408264160156, "eval_loss": 0.5165792107582092, "eval_rewards/accuracies": 0.7335000038146973, "eval_rewards/chosen": -1.6006896495819092, "eval_rewards/margins": 0.8379069566726685, "eval_rewards/rejected": -2.4385969638824463, "eval_runtime": 1168.883, "eval_samples_per_second": 1.711, "eval_steps_per_second": 0.856, "step": 3400 }, { "epoch": 0.45, "learning_rate": 3.386101432877624e-06, "logits/chosen": -0.7892847657203674, "logits/rejected": -0.024471605196595192, "logps/chosen": -428.40667724609375, "logps/rejected": -461.5455627441406, "loss": 0.5302, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.6095516681671143, "rewards/margins": 0.7331417798995972, "rewards/rejected": -2.342693328857422, "step": 3410 }, { "epoch": 0.45, "learning_rate": 3.375411462616332e-06, "logits/chosen": -1.0004537105560303, "logits/rejected": 0.13648569583892822, "logps/chosen": -466.4956970214844, "logps/rejected": -552.7140502929688, "loss": 0.5037, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7703803777694702, "rewards/margins": 0.8303622007369995, "rewards/rejected": -2.600742816925049, "step": 3420 }, { "epoch": 0.45, "learning_rate": 3.3647032181083696e-06, "logits/chosen": -0.7642697095870972, "logits/rejected": -0.002010262105613947, "logps/chosen": -486.2002868652344, "logps/rejected": -559.04736328125, "loss": 0.4941, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.827528715133667, "rewards/margins": 0.9090808629989624, "rewards/rejected": -2.73660945892334, "step": 3430 }, { "epoch": 0.45, "learning_rate": 3.3539769228887382e-06, "logits/chosen": -0.9899004697799683, "logits/rejected": 0.285744845867157, "logps/chosen": -455.26788330078125, "logps/rejected": -538.8175048828125, "loss": 0.4733, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.473595142364502, "rewards/margins": 0.9281530380249023, "rewards/rejected": -2.4017484188079834, "step": 3440 }, { "epoch": 0.45, "learning_rate": 3.343232800869247e-06, "logits/chosen": -0.8719793558120728, "logits/rejected": 0.3464857041835785, "logps/chosen": -392.4674072265625, "logps/rejected": -419.7901306152344, "loss": 0.518, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.578559160232544, "rewards/margins": 0.7874252796173096, "rewards/rejected": -2.3659844398498535, "step": 3450 }, { "epoch": 0.45, "learning_rate": 3.33247107633384e-06, "logits/chosen": -0.6414632201194763, "logits/rejected": -0.19829490780830383, "logps/chosen": -397.5970764160156, "logps/rejected": -498.804931640625, "loss": 0.4297, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.311751365661621, "rewards/margins": 1.1057935953140259, "rewards/rejected": -2.4175448417663574, "step": 3460 }, { "epoch": 0.45, "learning_rate": 3.3216919739339155e-06, "logits/chosen": -0.8113977313041687, "logits/rejected": 0.24353833496570587, "logps/chosen": -451.53900146484375, "logps/rejected": -521.3798217773438, "loss": 0.4028, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.5756160020828247, "rewards/margins": 1.230313777923584, "rewards/rejected": -2.805929660797119, "step": 3470 }, { "epoch": 0.46, "learning_rate": 3.310895718683635e-06, "logits/chosen": -0.5552123188972473, "logits/rejected": 0.25212961435317993, "logps/chosen": -468.75030517578125, "logps/rejected": -510.51214599609375, "loss": 0.6551, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.7728900909423828, "rewards/margins": 0.6345969438552856, "rewards/rejected": -2.4074866771698, "step": 3480 }, { "epoch": 0.46, "learning_rate": 3.3000825359552256e-06, "logits/chosen": -0.29439646005630493, "logits/rejected": 0.14724037051200867, "logps/chosen": -405.78594970703125, "logps/rejected": -501.95465087890625, "loss": 0.497, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.3883360624313354, "rewards/margins": 0.9358784556388855, "rewards/rejected": -2.324214458465576, "step": 3490 }, { "epoch": 0.46, "learning_rate": 3.2892526514742778e-06, "logits/chosen": -0.5424224734306335, "logits/rejected": 0.3168255388736725, "logps/chosen": -418.60272216796875, "logps/rejected": -471.0479431152344, "loss": 0.4892, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4777576923370361, "rewards/margins": 0.914044201374054, "rewards/rejected": -2.391801595687866, "step": 3500 }, { "epoch": 0.46, "eval_logits/chosen": 1.3014029264450073, "eval_logits/rejected": 2.0708816051483154, "eval_logps/chosen": -415.6103820800781, "eval_logps/rejected": -480.9519348144531, "eval_loss": 0.5256860256195068, "eval_rewards/accuracies": 0.7279999852180481, "eval_rewards/chosen": -1.471203327178955, "eval_rewards/margins": 0.8525046110153198, "eval_rewards/rejected": -2.3237080574035645, "eval_runtime": 1172.9577, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.853, "step": 3500 }, { "epoch": 0.46, "learning_rate": 3.27840629131503e-06, "logits/chosen": -0.525278627872467, "logits/rejected": 0.5383912324905396, "logps/chosen": -441.439208984375, "logps/rejected": -500.04010009765625, "loss": 0.5312, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.6438672542572021, "rewards/margins": 0.845868706703186, "rewards/rejected": -2.4897360801696777, "step": 3510 }, { "epoch": 0.46, "learning_rate": 3.2675436818956522e-06, "logits/chosen": -0.6424092054367065, "logits/rejected": 0.2780497372150421, "logps/chosen": -387.6494445800781, "logps/rejected": -470.696533203125, "loss": 0.5343, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4292685985565186, "rewards/margins": 0.742842972278595, "rewards/rejected": -2.1721115112304688, "step": 3520 }, { "epoch": 0.46, "learning_rate": 3.2566650499735185e-06, "logits/chosen": -0.35371822118759155, "logits/rejected": 0.6347888112068176, "logps/chosen": -437.46600341796875, "logps/rejected": -535.9605712890625, "loss": 0.4477, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.4554798603057861, "rewards/margins": 1.2568625211715698, "rewards/rejected": -2.7123425006866455, "step": 3530 }, { "epoch": 0.46, "learning_rate": 3.2457706226404715e-06, "logits/chosen": -0.41827550530433655, "logits/rejected": -0.005315917544066906, "logps/chosen": -431.5044860839844, "logps/rejected": -449.8809509277344, "loss": 0.6296, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.6185357570648193, "rewards/margins": 0.5960680246353149, "rewards/rejected": -2.214603900909424, "step": 3540 }, { "epoch": 0.46, "learning_rate": 3.2348606273180847e-06, "logits/chosen": -1.0620290040969849, "logits/rejected": 0.8680380582809448, "logps/chosen": -446.9424743652344, "logps/rejected": -452.28485107421875, "loss": 0.4631, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.3773703575134277, "rewards/margins": 0.9029923677444458, "rewards/rejected": -2.280362844467163, "step": 3550 }, { "epoch": 0.47, "learning_rate": 3.2239352917529165e-06, "logits/chosen": -0.7291229367256165, "logits/rejected": 0.35278087854385376, "logps/chosen": -489.33551025390625, "logps/rejected": -550.3321533203125, "loss": 0.519, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.8307793140411377, "rewards/margins": 0.7998561859130859, "rewards/rejected": -2.6306357383728027, "step": 3560 }, { "epoch": 0.47, "learning_rate": 3.2129948440117487e-06, "logits/chosen": -0.4160127639770508, "logits/rejected": 0.06812303513288498, "logps/chosen": -480.4317932128906, "logps/rejected": -528.1541748046875, "loss": 0.5639, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1663801670074463, "rewards/margins": 0.6297258734703064, "rewards/rejected": -2.7961058616638184, "step": 3570 }, { "epoch": 0.47, "learning_rate": 3.202039512476833e-06, "logits/chosen": -0.47289180755615234, "logits/rejected": 0.3967745304107666, "logps/chosen": -413.58563232421875, "logps/rejected": -521.4444580078125, "loss": 0.4684, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.8022072315216064, "rewards/margins": 1.0440325736999512, "rewards/rejected": -2.8462398052215576, "step": 3580 }, { "epoch": 0.47, "learning_rate": 3.1910695258411216e-06, "logits/chosen": -0.7860490679740906, "logits/rejected": 0.8082360029220581, "logps/chosen": -440.5712890625, "logps/rejected": -458.45526123046875, "loss": 0.5925, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.7098718881607056, "rewards/margins": 0.7629823684692383, "rewards/rejected": -2.4728541374206543, "step": 3590 }, { "epoch": 0.47, "learning_rate": 3.1800851131034904e-06, "logits/chosen": -0.5506579279899597, "logits/rejected": 0.5083298087120056, "logps/chosen": -437.36016845703125, "logps/rejected": -497.79742431640625, "loss": 0.5438, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.7282190322875977, "rewards/margins": 0.9188164472579956, "rewards/rejected": -2.647035598754883, "step": 3600 }, { "epoch": 0.47, "eval_logits/chosen": 1.4150185585021973, "eval_logits/rejected": 2.2019591331481934, "eval_logps/chosen": -428.1592102050781, "eval_logps/rejected": -493.0663757324219, "eval_loss": 0.5251966714859009, "eval_rewards/accuracies": 0.7275000214576721, "eval_rewards/chosen": -1.5966919660568237, "eval_rewards/margins": 0.8481603860855103, "eval_rewards/rejected": -2.444852113723755, "eval_runtime": 1146.1005, "eval_samples_per_second": 1.745, "eval_steps_per_second": 0.873, "step": 3600 }, { "epoch": 0.47, "learning_rate": 3.169086503563962e-06, "logits/chosen": -0.6776447296142578, "logits/rejected": -0.23693008720874786, "logps/chosen": -416.4723205566406, "logps/rejected": -498.85479736328125, "loss": 0.6094, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.5892466306686401, "rewards/margins": 0.6511684656143188, "rewards/rejected": -2.240415096282959, "step": 3610 }, { "epoch": 0.47, "learning_rate": 3.1580739268189165e-06, "logits/chosen": -0.5585545897483826, "logits/rejected": 0.5914020538330078, "logps/chosen": -437.05572509765625, "logps/rejected": -491.81671142578125, "loss": 0.505, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.594426155090332, "rewards/margins": 0.934490978717804, "rewards/rejected": -2.528916835784912, "step": 3620 }, { "epoch": 0.48, "learning_rate": 3.147047612756302e-06, "logits/chosen": -0.24671559035778046, "logits/rejected": 0.12607893347740173, "logps/chosen": -449.9618225097656, "logps/rejected": -528.7197265625, "loss": 0.4698, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.4847663640975952, "rewards/margins": 0.9548713564872742, "rewards/rejected": -2.4396376609802246, "step": 3630 }, { "epoch": 0.48, "learning_rate": 3.136007791550833e-06, "logits/chosen": -0.7207472324371338, "logits/rejected": 0.6024399995803833, "logps/chosen": -402.71319580078125, "logps/rejected": -450.76373291015625, "loss": 0.5079, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.6094977855682373, "rewards/margins": 0.8133002519607544, "rewards/rejected": -2.4227981567382812, "step": 3640 }, { "epoch": 0.48, "learning_rate": 3.1249546936591848e-06, "logits/chosen": -0.587475597858429, "logits/rejected": 0.040339358150959015, "logps/chosen": -386.5211181640625, "logps/rejected": -455.6336364746094, "loss": 0.5345, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4895503520965576, "rewards/margins": 0.7380887866020203, "rewards/rejected": -2.2276394367218018, "step": 3650 }, { "epoch": 0.48, "learning_rate": 3.1138885498151843e-06, "logits/chosen": -0.3841520845890045, "logits/rejected": 0.43626269698143005, "logps/chosen": -448.64080810546875, "logps/rejected": -513.5535888671875, "loss": 0.4928, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.7264331579208374, "rewards/margins": 1.076446771621704, "rewards/rejected": -2.802879810333252, "step": 3660 }, { "epoch": 0.48, "learning_rate": 3.1028095910249937e-06, "logits/chosen": -1.0335357189178467, "logits/rejected": 0.6055603623390198, "logps/chosen": -452.31829833984375, "logps/rejected": -487.44921875, "loss": 0.4924, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.7537025213241577, "rewards/margins": 0.8785789608955383, "rewards/rejected": -2.632281541824341, "step": 3670 }, { "epoch": 0.48, "learning_rate": 3.0917180485622895e-06, "logits/chosen": -0.4549393653869629, "logits/rejected": 1.1361083984375, "logps/chosen": -451.30145263671875, "logps/rejected": -507.1758728027344, "loss": 0.4922, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.7780840396881104, "rewards/margins": 1.0487792491912842, "rewards/rejected": -2.8268632888793945, "step": 3680 }, { "epoch": 0.48, "learning_rate": 3.0806141539634294e-06, "logits/chosen": -0.6923087239265442, "logits/rejected": 0.8231356739997864, "logps/chosen": -423.26531982421875, "logps/rejected": -463.19921875, "loss": 0.5082, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.7002700567245483, "rewards/margins": 0.9019443392753601, "rewards/rejected": -2.6022145748138428, "step": 3690 }, { "epoch": 0.48, "learning_rate": 3.069498139022624e-06, "logits/chosen": -0.8322190046310425, "logits/rejected": 0.6718935966491699, "logps/chosen": -491.6170349121094, "logps/rejected": -499.7591247558594, "loss": 0.5677, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0136921405792236, "rewards/margins": 0.6361916661262512, "rewards/rejected": -2.64988374710083, "step": 3700 }, { "epoch": 0.48, "eval_logits/chosen": 1.6843464374542236, "eval_logits/rejected": 2.467794895172119, "eval_logps/chosen": -465.7503662109375, "eval_logps/rejected": -529.8629760742188, "eval_loss": 0.5152121782302856, "eval_rewards/accuracies": 0.7275000214576721, "eval_rewards/chosen": -1.972603678703308, "eval_rewards/margins": 0.8402146697044373, "eval_rewards/rejected": -2.8128180503845215, "eval_runtime": 1146.5714, "eval_samples_per_second": 1.744, "eval_steps_per_second": 0.872, "step": 3700 }, { "epoch": 0.49, "learning_rate": 3.0583702357870964e-06, "logits/chosen": -0.48846787214279175, "logits/rejected": -0.0028875707648694515, "logps/chosen": -522.038818359375, "logps/rejected": -585.753662109375, "loss": 0.6047, "rewards/accuracies": 0.6875, "rewards/chosen": -2.117889881134033, "rewards/margins": 0.6571815609931946, "rewards/rejected": -2.775071144104004, "step": 3710 }, { "epoch": 0.49, "learning_rate": 3.0472306765522393e-06, "logits/chosen": -0.9246646761894226, "logits/rejected": 1.0386006832122803, "logps/chosen": -434.03704833984375, "logps/rejected": -522.7357177734375, "loss": 0.4683, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.8714182376861572, "rewards/margins": 1.1093872785568237, "rewards/rejected": -2.9808051586151123, "step": 3720 }, { "epoch": 0.49, "learning_rate": 3.0360796938567628e-06, "logits/chosen": -0.7488586902618408, "logits/rejected": 0.5929322838783264, "logps/chosen": -465.7920837402344, "logps/rejected": -497.0887145996094, "loss": 0.5484, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9402759075164795, "rewards/margins": 0.7168923616409302, "rewards/rejected": -2.65716814994812, "step": 3730 }, { "epoch": 0.49, "learning_rate": 3.0249175204778435e-06, "logits/chosen": -0.12813475728034973, "logits/rejected": 0.1722133457660675, "logps/chosen": -443.7284240722656, "logps/rejected": -510.58660888671875, "loss": 0.513, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.8336127996444702, "rewards/margins": 0.8592530488967896, "rewards/rejected": -2.6928658485412598, "step": 3740 }, { "epoch": 0.49, "learning_rate": 3.0137443894262634e-06, "logits/chosen": -0.1264442503452301, "logits/rejected": 1.0170029401779175, "logps/chosen": -455.2422790527344, "logps/rejected": -530.8844604492188, "loss": 0.3586, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.7252286672592163, "rewards/margins": 1.3205913305282593, "rewards/rejected": -3.0458197593688965, "step": 3750 }, { "epoch": 0.49, "learning_rate": 3.0025605339415476e-06, "logits/chosen": -0.2637806832790375, "logits/rejected": 0.7203923463821411, "logps/chosen": -443.43988037109375, "logps/rejected": -518.1168823242188, "loss": 0.4633, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.7762037515640259, "rewards/margins": 1.030763030052185, "rewards/rejected": -2.8069663047790527, "step": 3760 }, { "epoch": 0.49, "learning_rate": 2.9913661874870923e-06, "logits/chosen": -0.07495728135108948, "logits/rejected": 0.4468969702720642, "logps/chosen": -466.39678955078125, "logps/rejected": -520.4652709960938, "loss": 0.5005, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.0380637645721436, "rewards/margins": 0.8805074691772461, "rewards/rejected": -2.9185712337493896, "step": 3770 }, { "epoch": 0.49, "learning_rate": 2.980161583745294e-06, "logits/chosen": -0.31729915738105774, "logits/rejected": 0.5375093221664429, "logps/chosen": -503.6490783691406, "logps/rejected": -559.5162963867188, "loss": 0.4487, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9461323022842407, "rewards/margins": 1.0691391229629517, "rewards/rejected": -3.0152714252471924, "step": 3780 }, { "epoch": 0.5, "learning_rate": 2.96894695661267e-06, "logits/chosen": -0.8055630922317505, "logits/rejected": 0.4190472662448883, "logps/chosen": -503.9349670410156, "logps/rejected": -536.4412841796875, "loss": 0.5495, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.072908878326416, "rewards/margins": 0.787026584148407, "rewards/rejected": -2.8599355220794678, "step": 3790 }, { "epoch": 0.5, "learning_rate": 2.9577225401949773e-06, "logits/chosen": -0.2834416925907135, "logits/rejected": -0.048535846173763275, "logps/chosen": -445.82147216796875, "logps/rejected": -526.7498779296875, "loss": 0.5471, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1524107456207275, "rewards/margins": 0.9361416101455688, "rewards/rejected": -3.0885519981384277, "step": 3800 }, { "epoch": 0.5, "eval_logits/chosen": 1.4351999759674072, "eval_logits/rejected": 2.202155113220215, "eval_logps/chosen": -475.79779052734375, "eval_logps/rejected": -551.583251953125, "eval_loss": 0.5240182876586914, "eval_rewards/accuracies": 0.7254999876022339, "eval_rewards/chosen": -2.07307767868042, "eval_rewards/margins": 0.956943154335022, "eval_rewards/rejected": -3.0300209522247314, "eval_runtime": 1140.6996, "eval_samples_per_second": 1.753, "eval_steps_per_second": 0.877, "step": 3800 }, { "epoch": 0.5, "learning_rate": 2.946488568802324e-06, "logits/chosen": -0.331181138753891, "logits/rejected": 0.5133270025253296, "logps/chosen": -477.48321533203125, "logps/rejected": -547.2113647460938, "loss": 0.507, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1447014808654785, "rewards/margins": 0.8755722045898438, "rewards/rejected": -3.0202736854553223, "step": 3810 }, { "epoch": 0.5, "learning_rate": 2.935245276944278e-06, "logits/chosen": -0.2571033537387848, "logits/rejected": 0.4771188199520111, "logps/chosen": -497.06866455078125, "logps/rejected": -538.19091796875, "loss": 0.5672, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0118210315704346, "rewards/margins": 0.7694553136825562, "rewards/rejected": -2.781276226043701, "step": 3820 }, { "epoch": 0.5, "learning_rate": 2.9239928993249723e-06, "logits/chosen": -0.2940518260002136, "logits/rejected": 0.16310206055641174, "logps/chosen": -479.2884216308594, "logps/rejected": -555.8187255859375, "loss": 0.4874, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.98129141330719, "rewards/margins": 1.1137723922729492, "rewards/rejected": -3.0950639247894287, "step": 3830 }, { "epoch": 0.5, "learning_rate": 2.912731670838207e-06, "logits/chosen": -0.561539888381958, "logits/rejected": 0.4365290701389313, "logps/chosen": -458.83734130859375, "logps/rejected": -545.0903930664062, "loss": 0.5649, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.022507667541504, "rewards/margins": 0.8180673718452454, "rewards/rejected": -2.8405749797821045, "step": 3840 }, { "epoch": 0.5, "learning_rate": 2.901461826562543e-06, "logits/chosen": -0.57462078332901, "logits/rejected": 0.3850022852420807, "logps/chosen": -398.58343505859375, "logps/rejected": -475.1954040527344, "loss": 0.4639, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.729619026184082, "rewards/margins": 0.9505112767219543, "rewards/rejected": -2.6801302433013916, "step": 3850 }, { "epoch": 0.51, "learning_rate": 2.8901836017563966e-06, "logits/chosen": -0.42263826727867126, "logits/rejected": 0.20525923371315002, "logps/chosen": -463.990478515625, "logps/rejected": -499.493408203125, "loss": 0.562, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.830883264541626, "rewards/margins": 0.7106839418411255, "rewards/rejected": -2.541567325592041, "step": 3860 }, { "epoch": 0.51, "learning_rate": 2.8788972318531272e-06, "logits/chosen": -0.6156303882598877, "logits/rejected": 0.3582186996936798, "logps/chosen": -427.96002197265625, "logps/rejected": -484.9053649902344, "loss": 0.5611, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.7909084558486938, "rewards/margins": 0.6238812208175659, "rewards/rejected": -2.4147896766662598, "step": 3870 }, { "epoch": 0.51, "learning_rate": 2.8676029524561255e-06, "logits/chosen": -0.09342961758375168, "logits/rejected": 0.25026214122772217, "logps/chosen": -468.81072998046875, "logps/rejected": -550.8553466796875, "loss": 0.5211, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.7681678533554077, "rewards/margins": 0.9555566906929016, "rewards/rejected": -2.723724603652954, "step": 3880 }, { "epoch": 0.51, "learning_rate": 2.8563009993338906e-06, "logits/chosen": -0.24521970748901367, "logits/rejected": 0.4679892659187317, "logps/chosen": -440.3994140625, "logps/rejected": -543.8873291015625, "loss": 0.4792, "rewards/accuracies": 0.75, "rewards/chosen": -1.9353783130645752, "rewards/margins": 1.108598232269287, "rewards/rejected": -3.043976306915283, "step": 3890 }, { "epoch": 0.51, "learning_rate": 2.844991608415113e-06, "logits/chosen": -0.3554648458957672, "logits/rejected": -0.03292546421289444, "logps/chosen": -480.20037841796875, "logps/rejected": -566.8743896484375, "loss": 0.5193, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.146230936050415, "rewards/margins": 0.9551491737365723, "rewards/rejected": -3.1013801097869873, "step": 3900 }, { "epoch": 0.51, "eval_logits/chosen": 1.3990401029586792, "eval_logits/rejected": 2.1469078063964844, "eval_logps/chosen": -485.6194152832031, "eval_logps/rejected": -559.7595825195312, "eval_loss": 0.5184990167617798, "eval_rewards/accuracies": 0.734000027179718, "eval_rewards/chosen": -2.1712939739227295, "eval_rewards/margins": 0.940490186214447, "eval_rewards/rejected": -3.111783981323242, "eval_runtime": 1152.1911, "eval_samples_per_second": 1.736, "eval_steps_per_second": 0.868, "step": 3900 }, { "epoch": 0.51, "learning_rate": 2.833675015783746e-06, "logits/chosen": -0.07204243540763855, "logits/rejected": 0.11534376442432404, "logps/chosen": -458.18798828125, "logps/rejected": -554.1547241210938, "loss": 0.5293, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2303760051727295, "rewards/margins": 0.9195043444633484, "rewards/rejected": -3.1498801708221436, "step": 3910 }, { "epoch": 0.51, "learning_rate": 2.8223514576740784e-06, "logits/chosen": 0.09607383608818054, "logits/rejected": 0.03933734819293022, "logps/chosen": -431.48175048828125, "logps/rejected": -560.3643798828125, "loss": 0.4971, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0321695804595947, "rewards/margins": 0.9992935061454773, "rewards/rejected": -3.0314629077911377, "step": 3920 }, { "epoch": 0.51, "learning_rate": 2.8110211704658073e-06, "logits/chosen": -0.698123037815094, "logits/rejected": 0.5209210515022278, "logps/chosen": -530.5778198242188, "logps/rejected": -592.2348022460938, "loss": 0.4693, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.271674156188965, "rewards/margins": 0.9807974696159363, "rewards/rejected": -3.252471446990967, "step": 3930 }, { "epoch": 0.52, "learning_rate": 2.7996843906790955e-06, "logits/chosen": -0.11168187856674194, "logits/rejected": 0.6565873026847839, "logps/chosen": -470.85064697265625, "logps/rejected": -538.1116943359375, "loss": 0.6032, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2667603492736816, "rewards/margins": 0.637987494468689, "rewards/rejected": -2.9047482013702393, "step": 3940 }, { "epoch": 0.52, "learning_rate": 2.7883413549696396e-06, "logits/chosen": -0.47097617387771606, "logits/rejected": 0.9360452890396118, "logps/chosen": -505.99658203125, "logps/rejected": -607.1832885742188, "loss": 0.3929, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.171560764312744, "rewards/margins": 1.2839739322662354, "rewards/rejected": -3.4555351734161377, "step": 3950 }, { "epoch": 0.52, "learning_rate": 2.776992300123732e-06, "logits/chosen": -0.3683644235134125, "logits/rejected": 0.48894062638282776, "logps/chosen": -468.18316650390625, "logps/rejected": -581.08154296875, "loss": 0.5471, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.261422872543335, "rewards/margins": 1.1004810333251953, "rewards/rejected": -3.3619041442871094, "step": 3960 }, { "epoch": 0.52, "learning_rate": 2.7656374630533113e-06, "logits/chosen": -0.399179607629776, "logits/rejected": -0.14117364585399628, "logps/chosen": -440.6139221191406, "logps/rejected": -555.7425537109375, "loss": 0.4605, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.0904624462127686, "rewards/margins": 1.1303551197052002, "rewards/rejected": -3.2208175659179688, "step": 3970 }, { "epoch": 0.52, "learning_rate": 2.754277080791021e-06, "logits/chosen": -0.6631547212600708, "logits/rejected": -0.17448690533638, "logps/chosen": -481.0576171875, "logps/rejected": -556.90380859375, "loss": 0.6197, "rewards/accuracies": 0.625, "rewards/chosen": -2.1500160694122314, "rewards/margins": 0.8200462460517883, "rewards/rejected": -2.970062255859375, "step": 3980 }, { "epoch": 0.52, "learning_rate": 2.742911390485262e-06, "logits/chosen": -0.10709667205810547, "logits/rejected": 0.19126668572425842, "logps/chosen": -419.6375427246094, "logps/rejected": -470.33880615234375, "loss": 0.6197, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0977683067321777, "rewards/margins": 0.590727686882019, "rewards/rejected": -2.6884961128234863, "step": 3990 }, { "epoch": 0.52, "learning_rate": 2.731540629395239e-06, "logits/chosen": -0.5709615349769592, "logits/rejected": 0.3959980607032776, "logps/chosen": -460.33544921875, "logps/rejected": -499.603759765625, "loss": 0.5764, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.8743162155151367, "rewards/margins": 0.6547318696975708, "rewards/rejected": -2.529047966003418, "step": 4000 }, { "epoch": 0.52, "eval_logits/chosen": 1.1191996335983276, "eval_logits/rejected": 1.8653334379196167, "eval_logps/chosen": -469.05755615234375, "eval_logps/rejected": -545.9298095703125, "eval_loss": 0.5176907777786255, "eval_rewards/accuracies": 0.7310000061988831, "eval_rewards/chosen": -2.0056753158569336, "eval_rewards/margins": 0.9678111672401428, "eval_rewards/rejected": -2.9734864234924316, "eval_runtime": 1155.1218, "eval_samples_per_second": 1.731, "eval_steps_per_second": 0.866, "step": 4000 }, { "epoch": 0.52, "learning_rate": 2.7201650348860115e-06, "logits/chosen": -0.7722757458686829, "logits/rejected": 0.36960023641586304, "logps/chosen": -442.9876403808594, "logps/rejected": -502.1902770996094, "loss": 0.4848, "rewards/accuracies": 0.75, "rewards/chosen": -2.029517412185669, "rewards/margins": 1.0260822772979736, "rewards/rejected": -3.0555994510650635, "step": 4010 }, { "epoch": 0.53, "learning_rate": 2.7087848444235354e-06, "logits/chosen": -0.8600364923477173, "logits/rejected": 0.4562684893608093, "logps/chosen": -454.643310546875, "logps/rejected": -560.4906005859375, "loss": 0.4478, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.75933837890625, "rewards/margins": 1.259271264076233, "rewards/rejected": -3.0186095237731934, "step": 4020 }, { "epoch": 0.53, "learning_rate": 2.697400295569707e-06, "logits/chosen": -0.6417149901390076, "logits/rejected": -0.5029559135437012, "logps/chosen": -410.6507873535156, "logps/rejected": -496.6881408691406, "loss": 0.5825, "rewards/accuracies": 0.6875, "rewards/chosen": -1.6875232458114624, "rewards/margins": 0.8735893964767456, "rewards/rejected": -2.561112880706787, "step": 4030 }, { "epoch": 0.53, "learning_rate": 2.6860116259774065e-06, "logits/chosen": -0.604103684425354, "logits/rejected": 0.4802830219268799, "logps/chosen": -457.4012145996094, "logps/rejected": -566.4182739257812, "loss": 0.4229, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.7043393850326538, "rewards/margins": 1.2353878021240234, "rewards/rejected": -2.939727306365967, "step": 4040 }, { "epoch": 0.53, "learning_rate": 2.674619073385531e-06, "logits/chosen": -0.3527432382106781, "logits/rejected": 0.4966762959957123, "logps/chosen": -414.24102783203125, "logps/rejected": -521.317626953125, "loss": 0.5281, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.7188953161239624, "rewards/margins": 1.0378594398498535, "rewards/rejected": -2.7567548751831055, "step": 4050 }, { "epoch": 0.53, "learning_rate": 2.663222875614038e-06, "logits/chosen": -0.4794388711452484, "logits/rejected": 0.13224102556705475, "logps/chosen": -445.65087890625, "logps/rejected": -518.9547729492188, "loss": 0.6135, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.054771661758423, "rewards/margins": 0.5872553586959839, "rewards/rejected": -2.6420271396636963, "step": 4060 }, { "epoch": 0.53, "learning_rate": 2.6518232705589775e-06, "logits/chosen": -0.4580906331539154, "logits/rejected": 0.23300707340240479, "logps/chosen": -428.31707763671875, "logps/rejected": -548.2869262695312, "loss": 0.4599, "rewards/accuracies": 0.75, "rewards/chosen": -1.6607511043548584, "rewards/margins": 1.2533749341964722, "rewards/rejected": -2.91412615776062, "step": 4070 }, { "epoch": 0.53, "learning_rate": 2.640420496187528e-06, "logits/chosen": -0.6754065752029419, "logits/rejected": 0.6897384524345398, "logps/chosen": -462.10235595703125, "logps/rejected": -518.758544921875, "loss": 0.4402, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.6582934856414795, "rewards/margins": 1.126479983329773, "rewards/rejected": -2.784773588180542, "step": 4080 }, { "epoch": 0.54, "learning_rate": 2.629014790533025e-06, "logits/chosen": -0.8516901135444641, "logits/rejected": 0.13358630239963531, "logps/chosen": -460.47509765625, "logps/rejected": -500.4847106933594, "loss": 0.4939, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.6765422821044922, "rewards/margins": 1.0363404750823975, "rewards/rejected": -2.7128829956054688, "step": 4090 }, { "epoch": 0.54, "learning_rate": 2.617606391689996e-06, "logits/chosen": -0.5585889220237732, "logits/rejected": 0.3247618079185486, "logps/chosen": -427.287841796875, "logps/rejected": -505.0663146972656, "loss": 0.504, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.6491613388061523, "rewards/margins": 0.9884650111198425, "rewards/rejected": -2.6376264095306396, "step": 4100 }, { "epoch": 0.54, "eval_logits/chosen": 1.0344183444976807, "eval_logits/rejected": 1.7948068380355835, "eval_logps/chosen": -450.85650634765625, "eval_logps/rejected": -523.1134643554688, "eval_loss": 0.5179835557937622, "eval_rewards/accuracies": 0.7269999980926514, "eval_rewards/chosen": -1.8236651420593262, "eval_rewards/margins": 0.9216578006744385, "eval_rewards/rejected": -2.7453227043151855, "eval_runtime": 1151.937, "eval_samples_per_second": 1.736, "eval_steps_per_second": 0.868, "step": 4100 }, { "epoch": 0.54, "learning_rate": 2.6061955378091896e-06, "logits/chosen": -0.4794127345085144, "logits/rejected": 0.3910555839538574, "logps/chosen": -423.6997985839844, "logps/rejected": -548.6406860351562, "loss": 0.4475, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.8037827014923096, "rewards/margins": 1.150130271911621, "rewards/rejected": -2.9539127349853516, "step": 4110 }, { "epoch": 0.54, "learning_rate": 2.5947824670926025e-06, "logits/chosen": -0.4441138803958893, "logits/rejected": -0.20847466588020325, "logps/chosen": -416.5233459472656, "logps/rejected": -513.5562744140625, "loss": 0.5321, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.7394109964370728, "rewards/margins": 0.8989079594612122, "rewards/rejected": -2.6383190155029297, "step": 4120 }, { "epoch": 0.54, "learning_rate": 2.583367417788508e-06, "logits/chosen": -0.3594195246696472, "logits/rejected": 0.763751745223999, "logps/chosen": -463.782470703125, "logps/rejected": -538.66748046875, "loss": 0.5754, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1264193058013916, "rewards/margins": 0.9109905958175659, "rewards/rejected": -3.037409543991089, "step": 4130 }, { "epoch": 0.54, "learning_rate": 2.5719506281864838e-06, "logits/chosen": -0.767003059387207, "logits/rejected": -0.14061614871025085, "logps/chosen": -479.3724670410156, "logps/rejected": -501.5732421875, "loss": 0.5855, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9783029556274414, "rewards/margins": 0.8176881670951843, "rewards/rejected": -2.7959909439086914, "step": 4140 }, { "epoch": 0.54, "learning_rate": 2.5605323366124335e-06, "logits/chosen": -0.4945451617240906, "logits/rejected": 0.5269497036933899, "logps/chosen": -454.4396057128906, "logps/rejected": -526.3812866210938, "loss": 0.513, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9973022937774658, "rewards/margins": 0.7900811433792114, "rewards/rejected": -2.787383556365967, "step": 4150 }, { "epoch": 0.54, "learning_rate": 2.5491127814236172e-06, "logits/chosen": -0.19666481018066406, "logits/rejected": -0.4852234423160553, "logps/chosen": -372.50628662109375, "logps/rejected": -494.17144775390625, "loss": 0.51, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.6012966632843018, "rewards/margins": 0.775696337223053, "rewards/rejected": -2.37699294090271, "step": 4160 }, { "epoch": 0.55, "learning_rate": 2.537692201003671e-06, "logits/chosen": -0.26586753129959106, "logits/rejected": 0.2930835485458374, "logps/chosen": -467.44317626953125, "logps/rejected": -552.4528198242188, "loss": 0.5688, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.0270495414733887, "rewards/margins": 0.9594928622245789, "rewards/rejected": -2.9865424633026123, "step": 4170 }, { "epoch": 0.55, "learning_rate": 2.526270833757635e-06, "logits/chosen": -0.6537747383117676, "logits/rejected": 0.6223368048667908, "logps/chosen": -447.3667907714844, "logps/rejected": -499.9156188964844, "loss": 0.5537, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.9054014682769775, "rewards/margins": 0.7102811932563782, "rewards/rejected": -2.615682601928711, "step": 4180 }, { "epoch": 0.55, "learning_rate": 2.514848918106971e-06, "logits/chosen": -0.553719162940979, "logits/rejected": 0.4931580424308777, "logps/chosen": -447.16204833984375, "logps/rejected": -520.3040771484375, "loss": 0.4678, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8360874652862549, "rewards/margins": 1.1568195819854736, "rewards/rejected": -2.9929070472717285, "step": 4190 }, { "epoch": 0.55, "learning_rate": 2.503426692484594e-06, "logits/chosen": -0.3490946888923645, "logits/rejected": 0.03165990114212036, "logps/chosen": -444.204345703125, "logps/rejected": -547.2092895507812, "loss": 0.4846, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9349002838134766, "rewards/margins": 0.9464532732963562, "rewards/rejected": -2.8813538551330566, "step": 4200 }, { "epoch": 0.55, "eval_logits/chosen": 1.3328680992126465, "eval_logits/rejected": 2.1064412593841553, "eval_logps/chosen": -480.6316833496094, "eval_logps/rejected": -553.0634765625, "eval_loss": 0.5167676210403442, "eval_rewards/accuracies": 0.7260000109672546, "eval_rewards/chosen": -2.1214165687561035, "eval_rewards/margins": 0.9234069585800171, "eval_rewards/rejected": -3.044823169708252, "eval_runtime": 1156.9425, "eval_samples_per_second": 1.729, "eval_steps_per_second": 0.864, "step": 4200 }, { "epoch": 0.55, "learning_rate": 2.492004395329883e-06, "logits/chosen": -0.47201377153396606, "logits/rejected": 0.1662154495716095, "logps/chosen": -442.28265380859375, "logps/rejected": -555.5616455078125, "loss": 0.424, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9666084051132202, "rewards/margins": 1.2794740200042725, "rewards/rejected": -3.246082305908203, "step": 4210 }, { "epoch": 0.55, "learning_rate": 2.4805822650837165e-06, "logits/chosen": -0.05408313125371933, "logits/rejected": 0.7589906454086304, "logps/chosen": -454.1454162597656, "logps/rejected": -595.169189453125, "loss": 0.4313, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.1684341430664062, "rewards/margins": 1.3142242431640625, "rewards/rejected": -3.4826583862304688, "step": 4220 }, { "epoch": 0.55, "learning_rate": 2.4691605401834843e-06, "logits/chosen": -0.5830433964729309, "logits/rejected": 0.2723081707954407, "logps/chosen": -507.00323486328125, "logps/rejected": -582.15283203125, "loss": 0.5321, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2436351776123047, "rewards/margins": 0.8307396173477173, "rewards/rejected": -3.0743746757507324, "step": 4230 }, { "epoch": 0.55, "learning_rate": 2.457739459058117e-06, "logits/chosen": -0.5049376487731934, "logits/rejected": 0.14484481513500214, "logps/chosen": -547.284423828125, "logps/rejected": -602.6152954101562, "loss": 0.4511, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.2237820625305176, "rewards/margins": 1.0236866474151611, "rewards/rejected": -3.2474684715270996, "step": 4240 }, { "epoch": 0.56, "learning_rate": 2.4463192601231054e-06, "logits/chosen": -0.28555774688720703, "logits/rejected": 0.8731438517570496, "logps/chosen": -530.2666015625, "logps/rejected": -578.294677734375, "loss": 0.4807, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.338956117630005, "rewards/margins": 1.1115846633911133, "rewards/rejected": -3.450540542602539, "step": 4250 }, { "epoch": 0.56, "learning_rate": 2.434900181775524e-06, "logits/chosen": -0.7155624628067017, "logits/rejected": 0.29977577924728394, "logps/chosen": -479.2982482910156, "logps/rejected": -573.4529418945312, "loss": 0.4766, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.0659286975860596, "rewards/margins": 1.073961615562439, "rewards/rejected": -3.139890193939209, "step": 4260 }, { "epoch": 0.56, "learning_rate": 2.4234824623890578e-06, "logits/chosen": -0.7157832980155945, "logits/rejected": 0.18825335800647736, "logps/chosen": -466.107177734375, "logps/rejected": -545.6228637695312, "loss": 0.4646, "rewards/accuracies": 0.75, "rewards/chosen": -2.0345892906188965, "rewards/margins": 0.9853957295417786, "rewards/rejected": -3.0199851989746094, "step": 4270 }, { "epoch": 0.56, "learning_rate": 2.4120663403090193e-06, "logits/chosen": -0.6923838257789612, "logits/rejected": 0.20935389399528503, "logps/chosen": -482.73724365234375, "logps/rejected": -583.6288452148438, "loss": 0.5585, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0791468620300293, "rewards/margins": 0.9277111291885376, "rewards/rejected": -3.0068578720092773, "step": 4280 }, { "epoch": 0.56, "learning_rate": 2.40065205384738e-06, "logits/chosen": -0.7260463833808899, "logits/rejected": 0.5240879058837891, "logps/chosen": -472.66845703125, "logps/rejected": -497.08575439453125, "loss": 0.6256, "rewards/accuracies": 0.6875, "rewards/chosen": -2.3160202503204346, "rewards/margins": 0.5535823106765747, "rewards/rejected": -2.8696022033691406, "step": 4290 }, { "epoch": 0.56, "learning_rate": 2.389239841277793e-06, "logits/chosen": -0.25377124547958374, "logits/rejected": 0.46026620268821716, "logps/chosen": -445.01019287109375, "logps/rejected": -527.544677734375, "loss": 0.426, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9602826833724976, "rewards/margins": 1.1500470638275146, "rewards/rejected": -3.1103296279907227, "step": 4300 }, { "epoch": 0.56, "eval_logits/chosen": 1.2899590730667114, "eval_logits/rejected": 2.0376882553100586, "eval_logps/chosen": -469.9074401855469, "eval_logps/rejected": -543.4855346679688, "eval_loss": 0.5095502138137817, "eval_rewards/accuracies": 0.7325000166893005, "eval_rewards/chosen": -2.014173746109009, "eval_rewards/margins": 0.9348700642585754, "eval_rewards/rejected": -2.9490435123443604, "eval_runtime": 1156.5138, "eval_samples_per_second": 1.729, "eval_steps_per_second": 0.865, "step": 4300 }, { "epoch": 0.56, "learning_rate": 2.3778299408306167e-06, "logits/chosen": -0.4929943084716797, "logits/rejected": 0.48326101899147034, "logps/chosen": -455.02838134765625, "logps/rejected": -525.9819946289062, "loss": 0.5103, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.0632247924804688, "rewards/margins": 0.8879944682121277, "rewards/rejected": -2.951219081878662, "step": 4310 }, { "epoch": 0.57, "learning_rate": 2.3664225906879452e-06, "logits/chosen": -0.42962485551834106, "logits/rejected": 0.17619100213050842, "logps/chosen": -452.4551696777344, "logps/rejected": -521.5052490234375, "loss": 0.4912, "rewards/accuracies": 0.75, "rewards/chosen": -2.080214023590088, "rewards/margins": 0.9484179615974426, "rewards/rejected": -3.0286319255828857, "step": 4320 }, { "epoch": 0.57, "learning_rate": 2.3550180289786357e-06, "logits/chosen": -0.8907386064529419, "logits/rejected": 0.4001527726650238, "logps/chosen": -469.4991760253906, "logps/rejected": -503.5176696777344, "loss": 0.5744, "rewards/accuracies": 0.625, "rewards/chosen": -2.0900187492370605, "rewards/margins": 0.7551154494285583, "rewards/rejected": -2.8451342582702637, "step": 4330 }, { "epoch": 0.57, "learning_rate": 2.343616493773335e-06, "logits/chosen": -0.3753073513507843, "logits/rejected": -0.04642016813158989, "logps/chosen": -479.0115661621094, "logps/rejected": -580.8361206054688, "loss": 0.5041, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.154541492462158, "rewards/margins": 1.0236194133758545, "rewards/rejected": -3.1781609058380127, "step": 4340 }, { "epoch": 0.57, "learning_rate": 2.3322182230795127e-06, "logits/chosen": -0.14095106720924377, "logits/rejected": -0.2498963177204132, "logps/chosen": -418.10491943359375, "logps/rejected": -577.77783203125, "loss": 0.4198, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8447338342666626, "rewards/margins": 1.3301194906234741, "rewards/rejected": -3.1748533248901367, "step": 4350 }, { "epoch": 0.57, "learning_rate": 2.320823454836491e-06, "logits/chosen": -0.9373834729194641, "logits/rejected": 0.3088436722755432, "logps/chosen": -433.88226318359375, "logps/rejected": -530.290283203125, "loss": 0.401, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7864519357681274, "rewards/margins": 1.156484603881836, "rewards/rejected": -2.942936658859253, "step": 4360 }, { "epoch": 0.57, "learning_rate": 2.309432426910478e-06, "logits/chosen": -0.6841565370559692, "logits/rejected": 0.7033779621124268, "logps/chosen": -494.7813415527344, "logps/rejected": -543.1317138671875, "loss": 0.4644, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.9815502166748047, "rewards/margins": 1.0772145986557007, "rewards/rejected": -3.058764934539795, "step": 4370 }, { "epoch": 0.57, "learning_rate": 2.298045377089604e-06, "logits/chosen": -0.8424911499023438, "logits/rejected": 0.32612770795822144, "logps/chosen": -466.86895751953125, "logps/rejected": -569.2764892578125, "loss": 0.4831, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.1209797859191895, "rewards/margins": 1.2162678241729736, "rewards/rejected": -3.337247371673584, "step": 4380 }, { "epoch": 0.57, "learning_rate": 2.286662543078955e-06, "logits/chosen": -0.4193035066127777, "logits/rejected": 0.4950118958950043, "logps/chosen": -491.828857421875, "logps/rejected": -533.1112060546875, "loss": 0.5354, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.155092239379883, "rewards/margins": 0.7993155121803284, "rewards/rejected": -2.9544079303741455, "step": 4390 }, { "epoch": 0.58, "learning_rate": 2.2752841624956125e-06, "logits/chosen": -0.604637622833252, "logits/rejected": 0.18484275043010712, "logps/chosen": -507.84112548828125, "logps/rejected": -585.2672119140625, "loss": 0.5289, "rewards/accuracies": 0.75, "rewards/chosen": -2.1505980491638184, "rewards/margins": 0.9751760363578796, "rewards/rejected": -3.1257741451263428, "step": 4400 }, { "epoch": 0.58, "eval_logits/chosen": 1.0285941362380981, "eval_logits/rejected": 1.7669211626052856, "eval_logps/chosen": -464.733154296875, "eval_logps/rejected": -542.265869140625, "eval_loss": 0.5142533779144287, "eval_rewards/accuracies": 0.7260000109672546, "eval_rewards/chosen": -1.9624314308166504, "eval_rewards/margins": 0.9744157195091248, "eval_rewards/rejected": -2.93684720993042, "eval_runtime": 1341.7741, "eval_samples_per_second": 1.491, "eval_steps_per_second": 0.745, "step": 4400 }, { "epoch": 0.58, "learning_rate": 2.2639104728636915e-06, "logits/chosen": -0.3302585482597351, "logits/rejected": -0.0848245620727539, "logps/chosen": -438.95501708984375, "logps/rejected": -518.0885009765625, "loss": 0.5619, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.7836341857910156, "rewards/margins": 0.8088982701301575, "rewards/rejected": -2.5925326347351074, "step": 4410 }, { "epoch": 0.58, "learning_rate": 2.252541711609384e-06, "logits/chosen": -0.4086834788322449, "logits/rejected": 0.6533911824226379, "logps/chosen": -442.6697692871094, "logps/rejected": -500.9053649902344, "loss": 0.5224, "rewards/accuracies": 0.75, "rewards/chosen": -1.846428632736206, "rewards/margins": 0.9241348505020142, "rewards/rejected": -2.7705636024475098, "step": 4420 }, { "epoch": 0.58, "learning_rate": 2.241178116056002e-06, "logits/chosen": -0.7984479665756226, "logits/rejected": 0.013322305865585804, "logps/chosen": -431.56884765625, "logps/rejected": -503.01080322265625, "loss": 0.4753, "rewards/accuracies": 0.75, "rewards/chosen": -1.7908424139022827, "rewards/margins": 0.9538822174072266, "rewards/rejected": -2.744724750518799, "step": 4430 }, { "epoch": 0.58, "learning_rate": 2.2298199234190236e-06, "logits/chosen": -0.3780064582824707, "logits/rejected": 0.18104317784309387, "logps/chosen": -475.06024169921875, "logps/rejected": -548.1165771484375, "loss": 0.4519, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.868931770324707, "rewards/margins": 1.0790822505950928, "rewards/rejected": -2.9480140209198, "step": 4440 }, { "epoch": 0.58, "learning_rate": 2.218467370801138e-06, "logits/chosen": -0.6153514981269836, "logits/rejected": 0.3399887979030609, "logps/chosen": -482.9944763183594, "logps/rejected": -523.6447143554688, "loss": 0.6105, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1587672233581543, "rewards/margins": 0.7031978368759155, "rewards/rejected": -2.8619649410247803, "step": 4450 }, { "epoch": 0.58, "learning_rate": 2.207120695187304e-06, "logits/chosen": -0.7532152533531189, "logits/rejected": 0.8358518481254578, "logps/chosen": -490.2608337402344, "logps/rejected": -566.9043579101562, "loss": 0.4384, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.088639736175537, "rewards/margins": 1.1486093997955322, "rewards/rejected": -3.2372488975524902, "step": 4460 }, { "epoch": 0.58, "learning_rate": 2.195780133439794e-06, "logits/chosen": -0.20792141556739807, "logits/rejected": 0.016714613884687424, "logps/chosen": -480.798583984375, "logps/rejected": -579.108642578125, "loss": 0.5193, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.015219211578369, "rewards/margins": 0.9281864166259766, "rewards/rejected": -2.9434056282043457, "step": 4470 }, { "epoch": 0.59, "learning_rate": 2.1844459222932535e-06, "logits/chosen": -0.6897789835929871, "logits/rejected": 0.44204026460647583, "logps/chosen": -500.740478515625, "logps/rejected": -560.5718383789062, "loss": 0.4913, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.2337794303894043, "rewards/margins": 0.9274255633354187, "rewards/rejected": -3.161205768585205, "step": 4480 }, { "epoch": 0.59, "learning_rate": 2.17311829834976e-06, "logits/chosen": -1.0006176233291626, "logits/rejected": -0.1672603338956833, "logps/chosen": -442.28802490234375, "logps/rejected": -546.0424194335938, "loss": 0.4748, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.864127516746521, "rewards/margins": 1.0716313123703003, "rewards/rejected": -2.9357590675354004, "step": 4490 }, { "epoch": 0.59, "learning_rate": 2.1617974980738814e-06, "logits/chosen": -0.7605545520782471, "logits/rejected": 0.6477273106575012, "logps/chosen": -446.9376525878906, "logps/rejected": -515.064453125, "loss": 0.4542, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9530082941055298, "rewards/margins": 0.973568320274353, "rewards/rejected": -2.926576852798462, "step": 4500 }, { "epoch": 0.59, "eval_logits/chosen": 1.1394939422607422, "eval_logits/rejected": 1.8774791955947876, "eval_logps/chosen": -464.9223327636719, "eval_logps/rejected": -541.3861083984375, "eval_loss": 0.5101990699768066, "eval_rewards/accuracies": 0.7335000038146973, "eval_rewards/chosen": -1.9643235206604004, "eval_rewards/margins": 0.963726282119751, "eval_rewards/rejected": -2.9280498027801514, "eval_runtime": 1305.9412, "eval_samples_per_second": 1.531, "eval_steps_per_second": 0.766, "step": 4500 }, { "epoch": 0.59, "learning_rate": 2.150483757787744e-06, "logits/chosen": -0.9358876943588257, "logits/rejected": 0.6876541972160339, "logps/chosen": -459.2244567871094, "logps/rejected": -489.6673278808594, "loss": 0.5597, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.086225986480713, "rewards/margins": 0.8154786229133606, "rewards/rejected": -2.9017043113708496, "step": 4510 }, { "epoch": 0.59, "learning_rate": 2.139177313666093e-06, "logits/chosen": -0.5859389901161194, "logits/rejected": -0.10606422275304794, "logps/chosen": -503.25933837890625, "logps/rejected": -543.6451416015625, "loss": 0.533, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0373759269714355, "rewards/margins": 0.933161735534668, "rewards/rejected": -2.9705376625061035, "step": 4520 }, { "epoch": 0.59, "learning_rate": 2.1278784017313688e-06, "logits/chosen": -0.2547047734260559, "logits/rejected": -0.3063717484474182, "logps/chosen": -468.4463806152344, "logps/rejected": -556.9096069335938, "loss": 0.5121, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.8143895864486694, "rewards/margins": 0.8284521102905273, "rewards/rejected": -2.6428415775299072, "step": 4530 }, { "epoch": 0.59, "learning_rate": 2.116587257848776e-06, "logits/chosen": -0.6765443086624146, "logits/rejected": -0.6112786531448364, "logps/chosen": -419.4952697753906, "logps/rejected": -512.7720947265625, "loss": 0.587, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.773984670639038, "rewards/margins": 0.6838240623474121, "rewards/rejected": -2.4578089714050293, "step": 4540 }, { "epoch": 0.6, "learning_rate": 2.105304117721361e-06, "logits/chosen": -0.5224363803863525, "logits/rejected": 0.21402570605278015, "logps/chosen": -394.37713623046875, "logps/rejected": -444.3313903808594, "loss": 0.595, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.7446057796478271, "rewards/margins": 0.7791751027107239, "rewards/rejected": -2.5237812995910645, "step": 4550 }, { "epoch": 0.6, "learning_rate": 2.0940292168850913e-06, "logits/chosen": -0.5463732481002808, "logits/rejected": 0.1663360893726349, "logps/chosen": -414.0874938964844, "logps/rejected": -459.6375427246094, "loss": 0.5231, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.5902382135391235, "rewards/margins": 0.7520595788955688, "rewards/rejected": -2.3422977924346924, "step": 4560 }, { "epoch": 0.6, "learning_rate": 2.082762790703939e-06, "logits/chosen": -0.802535891532898, "logits/rejected": 0.28110548853874207, "logps/chosen": -429.9853515625, "logps/rejected": -507.537841796875, "loss": 0.5194, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.6454626321792603, "rewards/margins": 0.8513976335525513, "rewards/rejected": -2.4968602657318115, "step": 4570 }, { "epoch": 0.6, "learning_rate": 2.0715050743649674e-06, "logits/chosen": -0.736199676990509, "logits/rejected": 0.028460631147027016, "logps/chosen": -406.27423095703125, "logps/rejected": -543.2493286132812, "loss": 0.4913, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.7144979238510132, "rewards/margins": 0.9728431701660156, "rewards/rejected": -2.6873412132263184, "step": 4580 }, { "epoch": 0.6, "learning_rate": 2.060256302873421e-06, "logits/chosen": -0.4676334857940674, "logits/rejected": -0.17427358031272888, "logps/chosen": -442.82806396484375, "logps/rejected": -547.2783203125, "loss": 0.4903, "rewards/accuracies": 0.75, "rewards/chosen": -1.9163872003555298, "rewards/margins": 0.9291712641716003, "rewards/rejected": -2.8455584049224854, "step": 4590 }, { "epoch": 0.6, "learning_rate": 2.049016711047822e-06, "logits/chosen": -0.7500999569892883, "logits/rejected": 0.41036081314086914, "logps/chosen": -462.0934143066406, "logps/rejected": -542.0261840820312, "loss": 0.4839, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.009445905685425, "rewards/margins": 1.0364575386047363, "rewards/rejected": -3.045903444290161, "step": 4600 }, { "epoch": 0.6, "eval_logits/chosen": 1.1471757888793945, "eval_logits/rejected": 1.885801911354065, "eval_logps/chosen": -468.8564147949219, "eval_logps/rejected": -546.4149780273438, "eval_loss": 0.5094394087791443, "eval_rewards/accuracies": 0.7304999828338623, "eval_rewards/chosen": -2.003664016723633, "eval_rewards/margins": 0.9746747016906738, "eval_rewards/rejected": -2.9783387184143066, "eval_runtime": 1172.0125, "eval_samples_per_second": 1.706, "eval_steps_per_second": 0.853, "step": 4600 }, { "epoch": 0.6, "learning_rate": 2.037786533515064e-06, "logits/chosen": -0.33482661843299866, "logits/rejected": 0.0835098996758461, "logps/chosen": -504.9192810058594, "logps/rejected": -563.8491821289062, "loss": 0.5111, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.06898832321167, "rewards/margins": 0.8450971841812134, "rewards/rejected": -2.914085626602173, "step": 4610 }, { "epoch": 0.6, "learning_rate": 2.02656600470552e-06, "logits/chosen": -0.6622999906539917, "logits/rejected": 0.010640504769980907, "logps/chosen": -441.935546875, "logps/rejected": -526.4178466796875, "loss": 0.4816, "rewards/accuracies": 0.75, "rewards/chosen": -1.8346233367919922, "rewards/margins": 1.0247472524642944, "rewards/rejected": -2.859370708465576, "step": 4620 }, { "epoch": 0.61, "learning_rate": 2.015355358848144e-06, "logits/chosen": -0.013636887073516846, "logits/rejected": -0.028725851327180862, "logps/chosen": -397.56976318359375, "logps/rejected": -514.6229248046875, "loss": 0.5147, "rewards/accuracies": 0.6875, "rewards/chosen": -1.8054370880126953, "rewards/margins": 0.9198155403137207, "rewards/rejected": -2.725252866744995, "step": 4630 }, { "epoch": 0.61, "learning_rate": 2.004154829965582e-06, "logits/chosen": -0.6800112128257751, "logits/rejected": 0.04380284622311592, "logps/chosen": -467.8673400878906, "logps/rejected": -557.4893798828125, "loss": 0.5006, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.9010484218597412, "rewards/margins": 1.0638830661773682, "rewards/rejected": -2.9649314880371094, "step": 4640 }, { "epoch": 0.61, "learning_rate": 1.99296465186929e-06, "logits/chosen": -0.8370498418807983, "logits/rejected": 0.4450223445892334, "logps/chosen": -462.6966247558594, "logps/rejected": -493.6236877441406, "loss": 0.514, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.8274908065795898, "rewards/margins": 0.9097514152526855, "rewards/rejected": -2.7372422218322754, "step": 4650 }, { "epoch": 0.61, "learning_rate": 1.9817850581546488e-06, "logits/chosen": -0.24383525550365448, "logits/rejected": 0.3440066874027252, "logps/chosen": -495.785400390625, "logps/rejected": -580.841796875, "loss": 0.5974, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.167267084121704, "rewards/margins": 0.7816963791847229, "rewards/rejected": -2.9489636421203613, "step": 4660 }, { "epoch": 0.61, "learning_rate": 1.970616282196091e-06, "logits/chosen": -0.5986989140510559, "logits/rejected": 0.10017760097980499, "logps/chosen": -437.3359375, "logps/rejected": -534.2577514648438, "loss": 0.4824, "rewards/accuracies": 0.6875, "rewards/chosen": -1.8744062185287476, "rewards/margins": 1.0383882522583008, "rewards/rejected": -2.912794589996338, "step": 4670 }, { "epoch": 0.61, "learning_rate": 1.959458557142228e-06, "logits/chosen": -0.3310568928718567, "logits/rejected": -0.00010063648369396105, "logps/chosen": -468.37921142578125, "logps/rejected": -538.8629150390625, "loss": 0.7126, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1639952659606934, "rewards/margins": 0.5907930135726929, "rewards/rejected": -2.754788398742676, "step": 4680 }, { "epoch": 0.61, "learning_rate": 1.948312115910982e-06, "logits/chosen": -0.44464436173439026, "logits/rejected": 0.15500059723854065, "logps/chosen": -484.95843505859375, "logps/rejected": -547.2438354492188, "loss": 0.4925, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0217325687408447, "rewards/margins": 1.021672010421753, "rewards/rejected": -3.0434045791625977, "step": 4690 }, { "epoch": 0.62, "learning_rate": 1.937177191184729e-06, "logits/chosen": -0.2106432020664215, "logits/rejected": -0.05869419500231743, "logps/chosen": -432.0470275878906, "logps/rejected": -516.4940185546875, "loss": 0.5562, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.9173109531402588, "rewards/margins": 0.8195775747299194, "rewards/rejected": -2.7368886470794678, "step": 4700 }, { "epoch": 0.62, "eval_logits/chosen": 1.1998752355575562, "eval_logits/rejected": 1.9384359121322632, "eval_logps/chosen": -471.0873107910156, "eval_logps/rejected": -546.7677001953125, "eval_loss": 0.5075832009315491, "eval_rewards/accuracies": 0.734000027179718, "eval_rewards/chosen": -2.025972843170166, "eval_rewards/margins": 0.9558923244476318, "eval_rewards/rejected": -2.9818649291992188, "eval_runtime": 1220.8498, "eval_samples_per_second": 1.638, "eval_steps_per_second": 0.819, "step": 4700 }, { "epoch": 0.62, "learning_rate": 1.9260540154054317e-06, "logits/chosen": -0.3351259231567383, "logits/rejected": 0.35000115633010864, "logps/chosen": -423.49017333984375, "logps/rejected": -554.1179809570312, "loss": 0.3844, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -1.8695085048675537, "rewards/margins": 1.3363901376724243, "rewards/rejected": -3.2058990001678467, "step": 4710 }, { "epoch": 0.62, "learning_rate": 1.9149428207697983e-06, "logits/chosen": -0.33344680070877075, "logits/rejected": -0.03340202569961548, "logps/chosen": -488.39239501953125, "logps/rejected": -548.125244140625, "loss": 0.6408, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.3214313983917236, "rewards/margins": 0.5961470603942871, "rewards/rejected": -2.91757869720459, "step": 4720 }, { "epoch": 0.62, "learning_rate": 1.9038438392244262e-06, "logits/chosen": -0.3149067759513855, "logits/rejected": -0.3603346645832062, "logps/chosen": -476.91766357421875, "logps/rejected": -549.4235229492188, "loss": 0.4499, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9402230978012085, "rewards/margins": 1.0026373863220215, "rewards/rejected": -2.9428603649139404, "step": 4730 }, { "epoch": 0.62, "learning_rate": 1.8927573024609666e-06, "logits/chosen": -0.267407089471817, "logits/rejected": 0.3591880202293396, "logps/chosen": -424.043212890625, "logps/rejected": -519.4016723632812, "loss": 0.4639, "rewards/accuracies": 0.8125, "rewards/chosen": -2.020537853240967, "rewards/margins": 1.063114047050476, "rewards/rejected": -3.0836517810821533, "step": 4740 }, { "epoch": 0.62, "learning_rate": 1.8816834419112845e-06, "logits/chosen": -0.276968777179718, "logits/rejected": 0.6711007356643677, "logps/chosen": -467.92401123046875, "logps/rejected": -534.5933837890625, "loss": 0.5329, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.1998984813690186, "rewards/margins": 1.0478665828704834, "rewards/rejected": -3.247765302658081, "step": 4750 }, { "epoch": 0.62, "learning_rate": 1.8706224887426283e-06, "logits/chosen": -0.017868299037218094, "logits/rejected": 0.1659470647573471, "logps/chosen": -508.35198974609375, "logps/rejected": -592.7130126953125, "loss": 0.5827, "rewards/accuracies": 0.6875, "rewards/chosen": -2.4431605339050293, "rewards/margins": 0.7835296392440796, "rewards/rejected": -3.2266902923583984, "step": 4760 }, { "epoch": 0.62, "learning_rate": 1.8595746738528045e-06, "logits/chosen": -0.05624357610940933, "logits/rejected": -0.07322041690349579, "logps/chosen": -468.21783447265625, "logps/rejected": -583.8419799804688, "loss": 0.5432, "rewards/accuracies": 0.75, "rewards/chosen": -2.211426258087158, "rewards/margins": 0.9063574075698853, "rewards/rejected": -3.117783546447754, "step": 4770 }, { "epoch": 0.63, "learning_rate": 1.8485402278653584e-06, "logits/chosen": -0.331932932138443, "logits/rejected": 0.19877803325653076, "logps/chosen": -465.69122314453125, "logps/rejected": -539.7578125, "loss": 0.4492, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.2374541759490967, "rewards/margins": 0.9785755276679993, "rewards/rejected": -3.216029644012451, "step": 4780 }, { "epoch": 0.63, "learning_rate": 1.8375193811247577e-06, "logits/chosen": -0.3239549398422241, "logits/rejected": 0.33996933698654175, "logps/chosen": -472.1409606933594, "logps/rejected": -542.0296020507812, "loss": 0.5019, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.24760365486145, "rewards/margins": 0.8752537965774536, "rewards/rejected": -3.1228575706481934, "step": 4790 }, { "epoch": 0.63, "learning_rate": 1.826512363691586e-06, "logits/chosen": -0.8161247372627258, "logits/rejected": -0.2031267136335373, "logps/chosen": -500.17401123046875, "logps/rejected": -567.0885009765625, "loss": 0.4964, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2298948764801025, "rewards/margins": 0.9767163991928101, "rewards/rejected": -3.206610918045044, "step": 4800 }, { "epoch": 0.63, "eval_logits/chosen": 1.3968160152435303, "eval_logits/rejected": 2.1537721157073975, "eval_logps/chosen": -485.73052978515625, "eval_logps/rejected": -561.4290161132812, "eval_loss": 0.5078465938568115, "eval_rewards/accuracies": 0.7335000038146973, "eval_rewards/chosen": -2.172405481338501, "eval_rewards/margins": 0.956072986125946, "eval_rewards/rejected": -3.1284782886505127, "eval_runtime": 1205.6317, "eval_samples_per_second": 1.659, "eval_steps_per_second": 0.829, "step": 4800 }, { "epoch": 0.63, "learning_rate": 1.8155194053377391e-06, "logits/chosen": -0.4702053964138031, "logits/rejected": 0.691516101360321, "logps/chosen": -462.8230895996094, "logps/rejected": -549.562744140625, "loss": 0.478, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.0018343925476074, "rewards/margins": 1.2428886890411377, "rewards/rejected": -3.244723081588745, "step": 4810 }, { "epoch": 0.63, "learning_rate": 1.80454073554163e-06, "logits/chosen": -0.25434356927871704, "logits/rejected": 0.4281914234161377, "logps/chosen": -443.32781982421875, "logps/rejected": -518.9435424804688, "loss": 0.5968, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2195210456848145, "rewards/margins": 0.8934110403060913, "rewards/rejected": -3.1129322052001953, "step": 4820 }, { "epoch": 0.63, "learning_rate": 1.7935765834833966e-06, "logits/chosen": -0.17489385604858398, "logits/rejected": 0.3458006978034973, "logps/chosen": -487.3931579589844, "logps/rejected": -616.27587890625, "loss": 0.4374, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.1858935356140137, "rewards/margins": 1.282659888267517, "rewards/rejected": -3.468553066253662, "step": 4830 }, { "epoch": 0.63, "learning_rate": 1.7826271780401182e-06, "logits/chosen": -0.34163016080856323, "logits/rejected": 0.6715383529663086, "logps/chosen": -477.7323303222656, "logps/rejected": -576.5382690429688, "loss": 0.4413, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.3642547130584717, "rewards/margins": 1.1457160711288452, "rewards/rejected": -3.5099709033966064, "step": 4840 }, { "epoch": 0.63, "learning_rate": 1.7716927477810389e-06, "logits/chosen": -0.3121749758720398, "logits/rejected": 0.46080583333969116, "logps/chosen": -460.3019104003906, "logps/rejected": -576.9380493164062, "loss": 0.4645, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.031574249267578, "rewards/margins": 1.1349817514419556, "rewards/rejected": -3.166555881500244, "step": 4850 }, { "epoch": 0.64, "learning_rate": 1.7607735209627953e-06, "logits/chosen": -0.23650212585926056, "logits/rejected": 0.6278365850448608, "logps/chosen": -487.94305419921875, "logps/rejected": -562.5647583007812, "loss": 0.4918, "rewards/accuracies": 0.75, "rewards/chosen": -2.249572515487671, "rewards/margins": 1.1094893217086792, "rewards/rejected": -3.3590621948242188, "step": 4860 }, { "epoch": 0.64, "learning_rate": 1.749869725524651e-06, "logits/chosen": -0.2613787055015564, "logits/rejected": 0.479464054107666, "logps/chosen": -465.05743408203125, "logps/rejected": -545.4020385742188, "loss": 0.4567, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.889392614364624, "rewards/margins": 1.1381362676620483, "rewards/rejected": -3.027529001235962, "step": 4870 }, { "epoch": 0.64, "learning_rate": 1.7389815890837392e-06, "logits/chosen": 0.03331397473812103, "logits/rejected": -0.37613219022750854, "logps/chosen": -484.1585998535156, "logps/rejected": -606.52978515625, "loss": 0.4609, "rewards/accuracies": 0.75, "rewards/chosen": -2.071956157684326, "rewards/margins": 1.0469639301300049, "rewards/rejected": -3.118920087814331, "step": 4880 }, { "epoch": 0.64, "learning_rate": 1.7281093389303105e-06, "logits/chosen": -0.5912939310073853, "logits/rejected": -0.04338790848851204, "logps/chosen": -431.63238525390625, "logps/rejected": -512.7877197265625, "loss": 0.4884, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.8607794046401978, "rewards/margins": 0.9917821884155273, "rewards/rejected": -2.8525617122650146, "step": 4890 }, { "epoch": 0.64, "learning_rate": 1.7172532020229899e-06, "logits/chosen": -0.8086379766464233, "logits/rejected": 0.37625136971473694, "logps/chosen": -488.94122314453125, "logps/rejected": -558.6923828125, "loss": 0.4879, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.099210739135742, "rewards/margins": 1.0362012386322021, "rewards/rejected": -3.1354117393493652, "step": 4900 }, { "epoch": 0.64, "eval_logits/chosen": 1.3801579475402832, "eval_logits/rejected": 2.132445812225342, "eval_logps/chosen": -489.562255859375, "eval_logps/rejected": -571.5598754882812, "eval_loss": 0.5124561190605164, "eval_rewards/accuracies": 0.7310000061988831, "eval_rewards/chosen": -2.210721731185913, "eval_rewards/margins": 1.0190653800964355, "eval_rewards/rejected": -3.2297873497009277, "eval_runtime": 1238.0078, "eval_samples_per_second": 1.615, "eval_steps_per_second": 0.808, "step": 4900 }, { "epoch": 0.64, "learning_rate": 1.7064134049840359e-06, "logits/chosen": -0.2153107225894928, "logits/rejected": 0.029362941160798073, "logps/chosen": -449.80029296875, "logps/rejected": -585.2144775390625, "loss": 0.4087, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.9706871509552002, "rewards/margins": 1.3460237979888916, "rewards/rejected": -3.31671142578125, "step": 4910 }, { "epoch": 0.64, "learning_rate": 1.6955901740946136e-06, "logits/chosen": -0.38802531361579895, "logits/rejected": 0.2870200276374817, "logps/chosen": -543.64306640625, "logps/rejected": -657.2401123046875, "loss": 0.483, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.5144500732421875, "rewards/margins": 1.142197847366333, "rewards/rejected": -3.6566474437713623, "step": 4920 }, { "epoch": 0.65, "learning_rate": 1.684783735290067e-06, "logits/chosen": -0.6487330198287964, "logits/rejected": 0.3835352659225464, "logps/chosen": -452.5752868652344, "logps/rejected": -591.7291870117188, "loss": 0.3777, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9951683282852173, "rewards/margins": 1.4564054012298584, "rewards/rejected": -3.451573610305786, "step": 4930 }, { "epoch": 0.65, "learning_rate": 1.6739943141552079e-06, "logits/chosen": -0.6588994860649109, "logits/rejected": 0.3580857515335083, "logps/chosen": -522.0641479492188, "logps/rejected": -571.3804931640625, "loss": 0.5866, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.2700130939483643, "rewards/margins": 1.0017244815826416, "rewards/rejected": -3.271738052368164, "step": 4940 }, { "epoch": 0.65, "learning_rate": 1.663222135919601e-06, "logits/chosen": -0.5175567865371704, "logits/rejected": 0.5185331106185913, "logps/chosen": -509.4527282714844, "logps/rejected": -577.5142822265625, "loss": 0.4903, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.127331256866455, "rewards/margins": 0.9136832356452942, "rewards/rejected": -3.0410146713256836, "step": 4950 }, { "epoch": 0.65, "learning_rate": 1.652467425452865e-06, "logits/chosen": -0.43478575348854065, "logits/rejected": -0.07904218137264252, "logps/chosen": -461.2347717285156, "logps/rejected": -556.1910400390625, "loss": 0.4535, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.1336779594421387, "rewards/margins": 1.1299854516983032, "rewards/rejected": -3.2636635303497314, "step": 4960 }, { "epoch": 0.65, "learning_rate": 1.6417304072599787e-06, "logits/chosen": -0.6369231939315796, "logits/rejected": 0.5573645234107971, "logps/chosen": -503.250732421875, "logps/rejected": -592.5829467773438, "loss": 0.531, "rewards/accuracies": 0.75, "rewards/chosen": -2.3848960399627686, "rewards/margins": 0.9315534830093384, "rewards/rejected": -3.3164494037628174, "step": 4970 }, { "epoch": 0.65, "learning_rate": 1.6310113054765947e-06, "logits/chosen": -0.6183720231056213, "logits/rejected": 0.6410430073738098, "logps/chosen": -529.9524536132812, "logps/rejected": -604.1205444335938, "loss": 0.5296, "rewards/accuracies": 0.75, "rewards/chosen": -2.5189731121063232, "rewards/margins": 1.104549765586853, "rewards/rejected": -3.623522996902466, "step": 4980 }, { "epoch": 0.65, "learning_rate": 1.6203103438643591e-06, "logits/chosen": -0.5253661870956421, "logits/rejected": 0.08035139739513397, "logps/chosen": -489.2454528808594, "logps/rejected": -587.1968994140625, "loss": 0.4981, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.3722403049468994, "rewards/margins": 0.9075069427490234, "rewards/rejected": -3.279747486114502, "step": 4990 }, { "epoch": 0.65, "learning_rate": 1.6096277458062417e-06, "logits/chosen": 0.17417654395103455, "logits/rejected": 0.0011325478553771973, "logps/chosen": -417.90740966796875, "logps/rejected": -536.4146118164062, "loss": 0.4916, "rewards/accuracies": 0.8125, "rewards/chosen": -2.131730556488037, "rewards/margins": 1.022687554359436, "rewards/rejected": -3.1544182300567627, "step": 5000 }, { "epoch": 0.65, "eval_logits/chosen": 1.3780100345611572, "eval_logits/rejected": 2.116060256958008, "eval_logps/chosen": -478.1451110839844, "eval_logps/rejected": -558.6430053710938, "eval_loss": 0.5087121725082397, "eval_rewards/accuracies": 0.7300000190734863, "eval_rewards/chosen": -2.096550941467285, "eval_rewards/margins": 1.0040674209594727, "eval_rewards/rejected": -3.100618362426758, "eval_runtime": 1192.3166, "eval_samples_per_second": 1.677, "eval_steps_per_second": 0.839, "step": 5000 }, { "epoch": 0.66, "learning_rate": 1.5989637343018705e-06, "logits/chosen": -0.31024664640426636, "logits/rejected": 0.315248966217041, "logps/chosen": -463.248046875, "logps/rejected": -594.2037963867188, "loss": 0.4, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.942427635192871, "rewards/margins": 1.2658753395080566, "rewards/rejected": -3.2083029747009277, "step": 5010 }, { "epoch": 0.66, "learning_rate": 1.5883185319628824e-06, "logits/chosen": -0.7604548931121826, "logits/rejected": 1.1575146913528442, "logps/chosen": -514.0056762695312, "logps/rejected": -576.3931884765625, "loss": 0.4749, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.3210787773132324, "rewards/margins": 1.0514533519744873, "rewards/rejected": -3.372532367706299, "step": 5020 }, { "epoch": 0.66, "learning_rate": 1.5776923610082695e-06, "logits/chosen": -0.40637367963790894, "logits/rejected": 0.5249187350273132, "logps/chosen": -471.66650390625, "logps/rejected": -582.8086547851562, "loss": 0.4789, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.1091582775115967, "rewards/margins": 1.3221811056137085, "rewards/rejected": -3.431339979171753, "step": 5030 }, { "epoch": 0.66, "learning_rate": 1.5670854432597433e-06, "logits/chosen": -0.29735487699508667, "logits/rejected": 0.3211146891117096, "logps/chosen": -496.14306640625, "logps/rejected": -521.6513061523438, "loss": 0.5256, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9637601375579834, "rewards/margins": 0.7890554666519165, "rewards/rejected": -2.7528157234191895, "step": 5040 }, { "epoch": 0.66, "learning_rate": 1.556498000137104e-06, "logits/chosen": -0.15755796432495117, "logits/rejected": 0.6389847993850708, "logps/chosen": -417.54052734375, "logps/rejected": -540.6213989257812, "loss": 0.3919, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8478233814239502, "rewards/margins": 1.3877325057983398, "rewards/rejected": -3.235556125640869, "step": 5050 }, { "epoch": 0.66, "learning_rate": 1.5459302526536188e-06, "logits/chosen": -0.493274986743927, "logits/rejected": -0.03875049203634262, "logps/chosen": -449.4249572753906, "logps/rejected": -511.86492919921875, "loss": 0.5453, "rewards/accuracies": 0.75, "rewards/chosen": -1.8050098419189453, "rewards/margins": 0.8924520611763, "rewards/rejected": -2.697462320327759, "step": 5060 }, { "epoch": 0.66, "learning_rate": 1.5353824214114075e-06, "logits/chosen": -0.6162487268447876, "logits/rejected": 0.14038828015327454, "logps/chosen": -444.60736083984375, "logps/rejected": -520.192138671875, "loss": 0.5187, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.8896453380584717, "rewards/margins": 0.8513407707214355, "rewards/rejected": -2.7409861087799072, "step": 5070 }, { "epoch": 0.66, "learning_rate": 1.5248547265968373e-06, "logits/chosen": -0.5640738606452942, "logits/rejected": -0.3062174618244171, "logps/chosen": -428.0648498535156, "logps/rejected": -505.9144592285156, "loss": 0.5312, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.8717830181121826, "rewards/margins": 0.8837350606918335, "rewards/rejected": -2.7555181980133057, "step": 5080 }, { "epoch": 0.67, "learning_rate": 1.5143473879759265e-06, "logits/chosen": -0.9101265668869019, "logits/rejected": 0.7031489610671997, "logps/chosen": -447.75201416015625, "logps/rejected": -512.3680419921875, "loss": 0.5284, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0770153999328613, "rewards/margins": 1.0809861421585083, "rewards/rejected": -3.158001184463501, "step": 5090 }, { "epoch": 0.67, "learning_rate": 1.5038606248897586e-06, "logits/chosen": -0.36409762501716614, "logits/rejected": 0.16153445839881897, "logps/chosen": -540.3499755859375, "logps/rejected": -589.29150390625, "loss": 0.5806, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.5957887172698975, "rewards/margins": 0.6685230135917664, "rewards/rejected": -3.2643120288848877, "step": 5100 }, { "epoch": 0.67, "eval_logits/chosen": 1.3594954013824463, "eval_logits/rejected": 2.0896875858306885, "eval_logps/chosen": -491.28375244140625, "eval_logps/rejected": -572.3603515625, "eval_loss": 0.5089035034179688, "eval_rewards/accuracies": 0.7304999828338623, "eval_rewards/chosen": -2.2279369831085205, "eval_rewards/margins": 1.0098555088043213, "eval_rewards/rejected": -3.237792491912842, "eval_runtime": 1197.34, "eval_samples_per_second": 1.67, "eval_steps_per_second": 0.835, "step": 5100 }, { "epoch": 0.67, "learning_rate": 1.4933946562499008e-06, "logits/chosen": -0.3227179944515228, "logits/rejected": 0.6649230718612671, "logps/chosen": -481.3219299316406, "logps/rejected": -565.8980712890625, "loss": 0.5032, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.204648733139038, "rewards/margins": 1.1594634056091309, "rewards/rejected": -3.364112377166748, "step": 5110 }, { "epoch": 0.67, "learning_rate": 1.482949700533835e-06, "logits/chosen": -0.20165733993053436, "logits/rejected": 0.22539961338043213, "logps/chosen": -431.33514404296875, "logps/rejected": -507.3214416503906, "loss": 0.5052, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.136174440383911, "rewards/margins": 0.8829553723335266, "rewards/rejected": -3.019129753112793, "step": 5120 }, { "epoch": 0.67, "learning_rate": 1.4725259757803983e-06, "logits/chosen": -0.3899513781070709, "logits/rejected": 0.26980915665626526, "logps/chosen": -543.8787841796875, "logps/rejected": -611.9580688476562, "loss": 0.5236, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.3227076530456543, "rewards/margins": 1.0733088254928589, "rewards/rejected": -3.3960158824920654, "step": 5130 }, { "epoch": 0.67, "learning_rate": 1.4621236995852314e-06, "logits/chosen": -0.9624841809272766, "logits/rejected": 0.41740506887435913, "logps/chosen": -478.22625732421875, "logps/rejected": -570.9786987304688, "loss": 0.4712, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.1282503604888916, "rewards/margins": 1.1468855142593384, "rewards/rejected": -3.2751357555389404, "step": 5140 }, { "epoch": 0.67, "learning_rate": 1.4517430890962337e-06, "logits/chosen": -1.003804087638855, "logits/rejected": 0.8981930613517761, "logps/chosen": -504.6312561035156, "logps/rejected": -505.14471435546875, "loss": 0.5248, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.165656328201294, "rewards/margins": 0.9904571771621704, "rewards/rejected": -3.1561131477355957, "step": 5150 }, { "epoch": 0.68, "learning_rate": 1.4413843610090342e-06, "logits/chosen": -1.1295411586761475, "logits/rejected": 0.7629106044769287, "logps/chosen": -517.0686645507812, "logps/rejected": -583.6041870117188, "loss": 0.5029, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.2716588973999023, "rewards/margins": 0.9855988621711731, "rewards/rejected": -3.2572574615478516, "step": 5160 }, { "epoch": 0.68, "learning_rate": 1.4310477315624637e-06, "logits/chosen": -0.5515908598899841, "logits/rejected": 0.39060476422309875, "logps/chosen": -463.7108459472656, "logps/rejected": -558.4481201171875, "loss": 0.603, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.238138198852539, "rewards/margins": 0.9213913679122925, "rewards/rejected": -3.1595299243927, "step": 5170 }, { "epoch": 0.68, "learning_rate": 1.420733416534045e-06, "logits/chosen": 0.11524833738803864, "logits/rejected": 0.43751248717308044, "logps/chosen": -463.92352294921875, "logps/rejected": -566.92578125, "loss": 0.5296, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1641011238098145, "rewards/margins": 0.9625070691108704, "rewards/rejected": -3.126608371734619, "step": 5180 }, { "epoch": 0.68, "learning_rate": 1.410441631235487e-06, "logits/chosen": -0.6287524700164795, "logits/rejected": -0.010029402561485767, "logps/chosen": -475.24285888671875, "logps/rejected": -560.3567504882812, "loss": 0.4824, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9610084295272827, "rewards/margins": 0.988863468170166, "rewards/rejected": -2.949871778488159, "step": 5190 }, { "epoch": 0.68, "learning_rate": 1.4001725905081868e-06, "logits/chosen": -0.594923198223114, "logits/rejected": 0.47926950454711914, "logps/chosen": -443.66815185546875, "logps/rejected": -482.48907470703125, "loss": 0.5027, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1409497261047363, "rewards/margins": 0.8297210931777954, "rewards/rejected": -2.9706709384918213, "step": 5200 }, { "epoch": 0.68, "eval_logits/chosen": 1.071418285369873, "eval_logits/rejected": 1.8013691902160645, "eval_logps/chosen": -458.1094970703125, "eval_logps/rejected": -531.8433837890625, "eval_loss": 0.5037881135940552, "eval_rewards/accuracies": 0.737500011920929, "eval_rewards/chosen": -1.8961946964263916, "eval_rewards/margins": 0.936427116394043, "eval_rewards/rejected": -2.8326218128204346, "eval_runtime": 1205.2329, "eval_samples_per_second": 1.659, "eval_steps_per_second": 0.83, "step": 5200 }, { "epoch": 0.68, "learning_rate": 1.3899265087187507e-06, "logits/chosen": -0.2585764527320862, "logits/rejected": 0.0724058523774147, "logps/chosen": -413.6856994628906, "logps/rejected": -479.63323974609375, "loss": 0.494, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.8029801845550537, "rewards/margins": 0.9064146876335144, "rewards/rejected": -2.709394931793213, "step": 5210 }, { "epoch": 0.68, "learning_rate": 1.3797035997545144e-06, "logits/chosen": -0.7779779434204102, "logits/rejected": -0.05586876720190048, "logps/chosen": -463.6990661621094, "logps/rejected": -529.9464721679688, "loss": 0.4447, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.761859655380249, "rewards/margins": 1.0219517946243286, "rewards/rejected": -2.7838118076324463, "step": 5220 }, { "epoch": 0.68, "learning_rate": 1.3695040770190816e-06, "logits/chosen": -0.9211093187332153, "logits/rejected": 0.09622125327587128, "logps/chosen": -420.81951904296875, "logps/rejected": -507.4986267089844, "loss": 0.5245, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.7863895893096924, "rewards/margins": 0.9270517230033875, "rewards/rejected": -2.7134411334991455, "step": 5230 }, { "epoch": 0.69, "learning_rate": 1.3593281534278651e-06, "logits/chosen": -0.5812297463417053, "logits/rejected": -0.16451668739318848, "logps/chosen": -413.26971435546875, "logps/rejected": -532.844482421875, "loss": 0.4485, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.8284438848495483, "rewards/margins": 1.1127674579620361, "rewards/rejected": -2.941211223602295, "step": 5240 }, { "epoch": 0.69, "learning_rate": 1.3491760414036478e-06, "logits/chosen": -0.8652269244194031, "logits/rejected": 0.4699738919734955, "logps/chosen": -487.8330993652344, "logps/rejected": -502.11395263671875, "loss": 0.6038, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9750216007232666, "rewards/margins": 0.6716020703315735, "rewards/rejected": -2.6466238498687744, "step": 5250 }, { "epoch": 0.69, "learning_rate": 1.3390479528721444e-06, "logits/chosen": -0.4403937757015228, "logits/rejected": 0.20110268890857697, "logps/chosen": -475.3594665527344, "logps/rejected": -581.604248046875, "loss": 0.4959, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.161839723587036, "rewards/margins": 1.0040169954299927, "rewards/rejected": -3.1658565998077393, "step": 5260 }, { "epoch": 0.69, "learning_rate": 1.3289440992575756e-06, "logits/chosen": -0.5245973467826843, "logits/rejected": 0.14496035873889923, "logps/chosen": -487.95831298828125, "logps/rejected": -544.9544677734375, "loss": 0.4847, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9425252676010132, "rewards/margins": 0.839037299156189, "rewards/rejected": -2.781562566757202, "step": 5270 }, { "epoch": 0.69, "learning_rate": 1.3188646914782616e-06, "logits/chosen": -1.1343739032745361, "logits/rejected": 0.7802155613899231, "logps/chosen": -519.5667724609375, "logps/rejected": -522.45166015625, "loss": 0.4612, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.960554838180542, "rewards/margins": 0.9846093058586121, "rewards/rejected": -2.945164203643799, "step": 5280 }, { "epoch": 0.69, "learning_rate": 1.3088099399422109e-06, "logits/chosen": -0.3516455292701721, "logits/rejected": -0.15082845091819763, "logps/chosen": -515.0338745117188, "logps/rejected": -570.6123657226562, "loss": 0.5449, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2784736156463623, "rewards/margins": 0.777103066444397, "rewards/rejected": -3.0555763244628906, "step": 5290 }, { "epoch": 0.69, "learning_rate": 1.2987800545427353e-06, "logits/chosen": -0.7941345572471619, "logits/rejected": 0.2526698708534241, "logps/chosen": -451.58319091796875, "logps/rejected": -526.3946533203125, "loss": 0.4554, "rewards/accuracies": 0.75, "rewards/chosen": -1.7788721323013306, "rewards/margins": 1.0764901638031006, "rewards/rejected": -2.8553624153137207, "step": 5300 }, { "epoch": 0.69, "eval_logits/chosen": 1.155483365058899, "eval_logits/rejected": 1.890524983406067, "eval_logps/chosen": -463.9870300292969, "eval_logps/rejected": -540.6599731445312, "eval_loss": 0.505189836025238, "eval_rewards/accuracies": 0.7329999804496765, "eval_rewards/chosen": -1.9549702405929565, "eval_rewards/margins": 0.9658184051513672, "eval_rewards/rejected": -2.920788526535034, "eval_runtime": 1180.1738, "eval_samples_per_second": 1.695, "eval_steps_per_second": 0.847, "step": 5300 }, { "epoch": 0.69, "learning_rate": 1.288775244654062e-06, "logits/chosen": -0.3898960053920746, "logits/rejected": -0.007041037082672119, "logps/chosen": -516.4864501953125, "logps/rejected": -554.4014282226562, "loss": 0.5911, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0645229816436768, "rewards/margins": 0.8199642896652222, "rewards/rejected": -2.8844873905181885, "step": 5310 }, { "epoch": 0.7, "learning_rate": 1.2787957191269696e-06, "logits/chosen": -0.3055559992790222, "logits/rejected": 0.39844006299972534, "logps/chosen": -468.44097900390625, "logps/rejected": -557.8650512695312, "loss": 0.5665, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0604255199432373, "rewards/margins": 0.7497578859329224, "rewards/rejected": -2.81018328666687, "step": 5320 }, { "epoch": 0.7, "learning_rate": 1.2688416862844193e-06, "logits/chosen": -0.3095241189002991, "logits/rejected": 0.23874731361865997, "logps/chosen": -409.28021240234375, "logps/rejected": -558.3905029296875, "loss": 0.4496, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.7695989608764648, "rewards/margins": 1.3319097757339478, "rewards/rejected": -3.101508378982544, "step": 5330 }, { "epoch": 0.7, "learning_rate": 1.2589133539172193e-06, "logits/chosen": -0.9595244526863098, "logits/rejected": 0.1507810652256012, "logps/chosen": -486.26629638671875, "logps/rejected": -545.3350830078125, "loss": 0.432, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.7377183437347412, "rewards/margins": 1.0882227420806885, "rewards/rejected": -2.8259410858154297, "step": 5340 }, { "epoch": 0.7, "learning_rate": 1.249010929279672e-06, "logits/chosen": -0.8987747430801392, "logits/rejected": -0.06229814141988754, "logps/chosen": -492.7890625, "logps/rejected": -579.2696533203125, "loss": 0.5204, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0684757232666016, "rewards/margins": 0.983476996421814, "rewards/rejected": -3.051952600479126, "step": 5350 }, { "epoch": 0.7, "learning_rate": 1.2391346190852603e-06, "logits/chosen": -0.8224785923957825, "logits/rejected": 0.3233809769153595, "logps/chosen": -468.3160095214844, "logps/rejected": -521.6168823242188, "loss": 0.6492, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1017160415649414, "rewards/margins": 0.6927504539489746, "rewards/rejected": -2.794466495513916, "step": 5360 }, { "epoch": 0.7, "learning_rate": 1.2292846295023222e-06, "logits/chosen": -0.5089510679244995, "logits/rejected": 0.2342415750026703, "logps/chosen": -515.0882568359375, "logps/rejected": -560.9594116210938, "loss": 0.5775, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1873621940612793, "rewards/margins": 0.6423591375350952, "rewards/rejected": -2.829720973968506, "step": 5370 }, { "epoch": 0.7, "learning_rate": 1.2194611661497576e-06, "logits/chosen": -0.40925416350364685, "logits/rejected": 0.3876747190952301, "logps/chosen": -480.6631774902344, "logps/rejected": -569.6943969726562, "loss": 0.4608, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.132833242416382, "rewards/margins": 1.046400547027588, "rewards/rejected": -3.1792335510253906, "step": 5380 }, { "epoch": 0.71, "learning_rate": 1.2096644340927247e-06, "logits/chosen": -0.4503072202205658, "logits/rejected": -0.0540265329182148, "logps/chosen": -478.57281494140625, "logps/rejected": -541.1819458007812, "loss": 0.5218, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.94533371925354, "rewards/margins": 0.8256410360336304, "rewards/rejected": -2.770974636077881, "step": 5390 }, { "epoch": 0.71, "learning_rate": 1.19989463783837e-06, "logits/chosen": -1.0149070024490356, "logits/rejected": 0.19897064566612244, "logps/chosen": -478.80670166015625, "logps/rejected": -576.4320068359375, "loss": 0.4521, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.8361622095108032, "rewards/margins": 1.1377449035644531, "rewards/rejected": -2.973906993865967, "step": 5400 }, { "epoch": 0.71, "eval_logits/chosen": 1.1075793504714966, "eval_logits/rejected": 1.8436723947525024, "eval_logps/chosen": -467.6123962402344, "eval_logps/rejected": -543.2982177734375, "eval_loss": 0.5038532018661499, "eval_rewards/accuracies": 0.7369999885559082, "eval_rewards/chosen": -1.9912235736846924, "eval_rewards/margins": 0.9559467434883118, "eval_rewards/rejected": -2.9471704959869385, "eval_runtime": 1197.7441, "eval_samples_per_second": 1.67, "eval_steps_per_second": 0.835, "step": 5400 }, { "epoch": 0.71, "learning_rate": 1.1901519813315495e-06, "logits/chosen": -0.39034947752952576, "logits/rejected": 0.27100151777267456, "logps/chosen": -449.6952209472656, "logps/rejected": -528.5842895507812, "loss": 0.4638, "rewards/accuracies": 0.75, "rewards/chosen": -1.9907958507537842, "rewards/margins": 1.0050820112228394, "rewards/rejected": -2.995877742767334, "step": 5410 }, { "epoch": 0.71, "learning_rate": 1.1804366679505798e-06, "logits/chosen": -0.8501359820365906, "logits/rejected": 0.513168215751648, "logps/chosen": -513.7134399414062, "logps/rejected": -545.6766967773438, "loss": 0.5751, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.186096668243408, "rewards/margins": 0.8673110008239746, "rewards/rejected": -3.053407907485962, "step": 5420 }, { "epoch": 0.71, "learning_rate": 1.1707489005029877e-06, "logits/chosen": -0.03637596219778061, "logits/rejected": 0.2914156913757324, "logps/chosen": -467.412353515625, "logps/rejected": -572.0277099609375, "loss": 0.4408, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.0735740661621094, "rewards/margins": 1.2480766773223877, "rewards/rejected": -3.321650743484497, "step": 5430 }, { "epoch": 0.71, "learning_rate": 1.1610888812212749e-06, "logits/chosen": -0.569449245929718, "logits/rejected": 0.4190862774848938, "logps/chosen": -465.05218505859375, "logps/rejected": -561.7803955078125, "loss": 0.4018, "rewards/accuracies": 0.8125, "rewards/chosen": -1.994209885597229, "rewards/margins": 1.1598801612854004, "rewards/rejected": -3.1540896892547607, "step": 5440 }, { "epoch": 0.71, "learning_rate": 1.1514568117587035e-06, "logits/chosen": -0.14006878435611725, "logits/rejected": -0.16314168274402618, "logps/chosen": -490.553955078125, "logps/rejected": -563.6068115234375, "loss": 0.5097, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.2165277004241943, "rewards/margins": 0.8935906291007996, "rewards/rejected": -3.1101181507110596, "step": 5450 }, { "epoch": 0.71, "learning_rate": 1.1418528931850781e-06, "logits/chosen": -0.2969356179237366, "logits/rejected": 0.7354004979133606, "logps/chosen": -481.17120361328125, "logps/rejected": -559.3109130859375, "loss": 0.4667, "rewards/accuracies": 0.75, "rewards/chosen": -2.0389533042907715, "rewards/margins": 1.261207103729248, "rewards/rejected": -3.3001601696014404, "step": 5460 }, { "epoch": 0.72, "learning_rate": 1.1322773259825563e-06, "logits/chosen": -0.7932473421096802, "logits/rejected": 0.7015919089317322, "logps/chosen": -505.8838806152344, "logps/rejected": -538.8965454101562, "loss": 0.4926, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.2662813663482666, "rewards/margins": 1.0218729972839355, "rewards/rejected": -3.288154125213623, "step": 5470 }, { "epoch": 0.72, "learning_rate": 1.1227303100414552e-06, "logits/chosen": -0.3481768071651459, "logits/rejected": 0.19559481739997864, "logps/chosen": -453.82781982421875, "logps/rejected": -582.0299682617188, "loss": 0.4953, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.3037056922912598, "rewards/margins": 1.0858337879180908, "rewards/rejected": -3.3895392417907715, "step": 5480 }, { "epoch": 0.72, "learning_rate": 1.113212044656087e-06, "logits/chosen": -0.41049233078956604, "logits/rejected": 0.1941242218017578, "logps/chosen": -451.33038330078125, "logps/rejected": -546.5862426757812, "loss": 0.542, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.117616653442383, "rewards/margins": 0.8589200973510742, "rewards/rejected": -2.976536750793457, "step": 5490 }, { "epoch": 0.72, "learning_rate": 1.1037227285205951e-06, "logits/chosen": 0.08584056794643402, "logits/rejected": -0.11161471903324127, "logps/chosen": -493.79400634765625, "logps/rejected": -588.3775634765625, "loss": 0.5869, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.3220810890197754, "rewards/margins": 0.9301006197929382, "rewards/rejected": -3.2521815299987793, "step": 5500 }, { "epoch": 0.72, "eval_logits/chosen": 1.1574229001998901, "eval_logits/rejected": 1.886451244354248, "eval_logps/chosen": -485.5281066894531, "eval_logps/rejected": -564.9520874023438, "eval_loss": 0.5054470896720886, "eval_rewards/accuracies": 0.7360000014305115, "eval_rewards/chosen": -2.1703803539276123, "eval_rewards/margins": 0.9933285117149353, "eval_rewards/rejected": -3.1637091636657715, "eval_runtime": 1194.4996, "eval_samples_per_second": 1.674, "eval_steps_per_second": 0.837, "step": 5500 }, { "epoch": 0.72, "learning_rate": 1.0942625597248028e-06, "logits/chosen": -0.8230582475662231, "logits/rejected": 0.5852819681167603, "logps/chosen": -475.0633239746094, "logps/rejected": -559.2849731445312, "loss": 0.5008, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.2042269706726074, "rewards/margins": 1.2232129573822021, "rewards/rejected": -3.4274401664733887, "step": 5510 }, { "epoch": 0.72, "learning_rate": 1.0848317357500854e-06, "logits/chosen": -0.4844978451728821, "logits/rejected": 0.25016266107559204, "logps/chosen": -517.09814453125, "logps/rejected": -525.1922607421875, "loss": 0.5246, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.2888691425323486, "rewards/margins": 0.7657750248908997, "rewards/rejected": -3.0546441078186035, "step": 5520 }, { "epoch": 0.72, "learning_rate": 1.0754304534652404e-06, "logits/chosen": -0.05556102842092514, "logits/rejected": -0.6705011129379272, "logps/chosen": -480.46502685546875, "logps/rejected": -580.5036010742188, "loss": 0.5881, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2701926231384277, "rewards/margins": 0.6363264322280884, "rewards/rejected": -2.9065189361572266, "step": 5530 }, { "epoch": 0.72, "learning_rate": 1.0660589091223854e-06, "logits/chosen": -0.6355140209197998, "logits/rejected": -0.15657804906368256, "logps/chosen": -415.057373046875, "logps/rejected": -514.7366943359375, "loss": 0.4432, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.001784563064575, "rewards/margins": 1.1080721616744995, "rewards/rejected": -3.1098568439483643, "step": 5540 }, { "epoch": 0.73, "learning_rate": 1.0567172983528534e-06, "logits/chosen": -0.4708939492702484, "logits/rejected": 0.3893592357635498, "logps/chosen": -413.32177734375, "logps/rejected": -507.1968688964844, "loss": 0.4412, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.0215418338775635, "rewards/margins": 0.9789319038391113, "rewards/rejected": -3.000473976135254, "step": 5550 }, { "epoch": 0.73, "learning_rate": 1.0474058161631168e-06, "logits/chosen": -0.2970888018608093, "logits/rejected": -0.11767357587814331, "logps/chosen": -529.619140625, "logps/rejected": -596.6094970703125, "loss": 0.5635, "rewards/accuracies": 0.625, "rewards/chosen": -2.1467816829681396, "rewards/margins": 0.8911802172660828, "rewards/rejected": -3.037961721420288, "step": 5560 }, { "epoch": 0.73, "learning_rate": 1.0381246569307077e-06, "logits/chosen": -0.535216212272644, "logits/rejected": 0.25071626901626587, "logps/chosen": -521.4472045898438, "logps/rejected": -551.7024536132812, "loss": 0.6022, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.2295970916748047, "rewards/margins": 0.6778484582901001, "rewards/rejected": -2.9074459075927734, "step": 5570 }, { "epoch": 0.73, "learning_rate": 1.0288740144001722e-06, "logits/chosen": -1.0160603523254395, "logits/rejected": 0.10044028609991074, "logps/chosen": -462.79730224609375, "logps/rejected": -512.1188354492188, "loss": 0.57, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.9899721145629883, "rewards/margins": 0.9382216334342957, "rewards/rejected": -2.928194046020508, "step": 5580 }, { "epoch": 0.73, "learning_rate": 1.0196540816790127e-06, "logits/chosen": -0.79753178358078, "logits/rejected": 0.11815383285284042, "logps/chosen": -427.5205078125, "logps/rejected": -464.82562255859375, "loss": 0.5304, "rewards/accuracies": 0.75, "rewards/chosen": -1.933569312095642, "rewards/margins": 0.8004889488220215, "rewards/rejected": -2.7340588569641113, "step": 5590 }, { "epoch": 0.73, "learning_rate": 1.0104650512336679e-06, "logits/chosen": -0.9227102994918823, "logits/rejected": 0.0573890320956707, "logps/chosen": -458.4912109375, "logps/rejected": -493.012939453125, "loss": 0.5924, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.8620121479034424, "rewards/margins": 0.8347771763801575, "rewards/rejected": -2.696789264678955, "step": 5600 }, { "epoch": 0.73, "eval_logits/chosen": 0.8215131759643555, "eval_logits/rejected": 1.5324963331222534, "eval_logps/chosen": -450.29345703125, "eval_logps/rejected": -527.0138549804688, "eval_loss": 0.5064495801925659, "eval_rewards/accuracies": 0.7319999933242798, "eval_rewards/chosen": -1.8180345296859741, "eval_rewards/margins": 0.9662933349609375, "eval_rewards/rejected": -2.784327745437622, "eval_runtime": 1192.2692, "eval_samples_per_second": 1.677, "eval_steps_per_second": 0.839, "step": 5600 }, { "epoch": 0.73, "learning_rate": 1.0013071148854861e-06, "logits/chosen": -0.46502774953842163, "logits/rejected": -0.10884647071361542, "logps/chosen": -406.987060546875, "logps/rejected": -543.9107055664062, "loss": 0.4149, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.725001573562622, "rewards/margins": 1.3317546844482422, "rewards/rejected": -3.0567562580108643, "step": 5610 }, { "epoch": 0.74, "learning_rate": 9.921804638067292e-07, "logits/chosen": -0.9193164110183716, "logits/rejected": 0.3256033957004547, "logps/chosen": -473.353759765625, "logps/rejected": -545.8274536132812, "loss": 0.4718, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.0329298973083496, "rewards/margins": 1.113075852394104, "rewards/rejected": -3.146005868911743, "step": 5620 }, { "epoch": 0.74, "learning_rate": 9.830852885165749e-07, "logits/chosen": -0.1020597368478775, "logits/rejected": -0.6451666355133057, "logps/chosen": -406.77081298828125, "logps/rejected": -512.661865234375, "loss": 0.5619, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9078305959701538, "rewards/margins": 0.6681450605392456, "rewards/rejected": -2.5759758949279785, "step": 5630 }, { "epoch": 0.74, "learning_rate": 9.740217788771453e-07, "logits/chosen": -0.8085994720458984, "logits/rejected": 0.037676215171813965, "logps/chosen": -455.20208740234375, "logps/rejected": -509.16680908203125, "loss": 0.4926, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.7756178379058838, "rewards/margins": 0.8752728700637817, "rewards/rejected": -2.650890588760376, "step": 5640 }, { "epoch": 0.74, "learning_rate": 9.649901240895374e-07, "logits/chosen": -0.11386583000421524, "logits/rejected": 0.1979323923587799, "logps/chosen": -440.43438720703125, "logps/rejected": -523.9313354492188, "loss": 0.5482, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.9702411890029907, "rewards/margins": 0.900561511516571, "rewards/rejected": -2.870802640914917, "step": 5650 }, { "epoch": 0.74, "learning_rate": 9.559905126898803e-07, "logits/chosen": -1.057208776473999, "logits/rejected": 0.41706523299217224, "logps/chosen": -467.60906982421875, "logps/rejected": -559.2456665039062, "loss": 0.3784, "rewards/accuracies": 0.875, "rewards/chosen": -1.945220947265625, "rewards/margins": 1.2823781967163086, "rewards/rejected": -3.2275986671447754, "step": 5660 }, { "epoch": 0.74, "learning_rate": 9.470231325453958e-07, "logits/chosen": -0.6564953923225403, "logits/rejected": 0.41940754652023315, "logps/chosen": -467.0869140625, "logps/rejected": -520.7767944335938, "loss": 0.5324, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.068354368209839, "rewards/margins": 0.8983513116836548, "rewards/rejected": -2.966705799102783, "step": 5670 }, { "epoch": 0.74, "learning_rate": 9.380881708504741e-07, "logits/chosen": -0.29646626114845276, "logits/rejected": 0.6381570100784302, "logps/chosen": -402.63690185546875, "logps/rejected": -467.7684020996094, "loss": 0.4885, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.7983324527740479, "rewards/margins": 0.9997227787971497, "rewards/rejected": -2.798055410385132, "step": 5680 }, { "epoch": 0.74, "learning_rate": 9.291858141227733e-07, "logits/chosen": -0.41397857666015625, "logits/rejected": -0.3768884539604187, "logps/chosen": -433.42816162109375, "logps/rejected": -586.0889892578125, "loss": 0.4259, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8530666828155518, "rewards/margins": 1.3527576923370361, "rewards/rejected": -3.205824375152588, "step": 5690 }, { "epoch": 0.75, "learning_rate": 9.203162481993175e-07, "logits/chosen": -0.9961411356925964, "logits/rejected": -0.5173764228820801, "logps/chosen": -484.4798278808594, "logps/rejected": -600.5260009765625, "loss": 0.4275, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.856370210647583, "rewards/margins": 1.2594194412231445, "rewards/rejected": -3.1157894134521484, "step": 5700 }, { "epoch": 0.75, "eval_logits/chosen": 0.995962917804718, "eval_logits/rejected": 1.7229074239730835, "eval_logps/chosen": -469.19317626953125, "eval_logps/rejected": -549.8818969726562, "eval_loss": 0.5055263042449951, "eval_rewards/accuracies": 0.734000027179718, "eval_rewards/chosen": -2.0070316791534424, "eval_rewards/margins": 1.0059758424758911, "eval_rewards/rejected": -3.013007402420044, "eval_runtime": 1227.9162, "eval_samples_per_second": 1.629, "eval_steps_per_second": 0.814, "step": 5700 }, { "epoch": 0.75, "learning_rate": 9.114796582326255e-07, "logits/chosen": -1.0405280590057373, "logits/rejected": 0.4770817756652832, "logps/chosen": -472.912109375, "logps/rejected": -529.1992797851562, "loss": 0.5751, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.202134609222412, "rewards/margins": 0.855462908744812, "rewards/rejected": -3.0575973987579346, "step": 5710 }, { "epoch": 0.75, "learning_rate": 9.026762286868373e-07, "logits/chosen": -0.797855794429779, "logits/rejected": -0.14750805497169495, "logps/chosen": -454.8798828125, "logps/rejected": -599.2950439453125, "loss": 0.418, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9340975284576416, "rewards/margins": 1.3620237112045288, "rewards/rejected": -3.2961208820343018, "step": 5720 }, { "epoch": 0.75, "learning_rate": 8.939061433338722e-07, "logits/chosen": -0.7406786680221558, "logits/rejected": -0.19209416210651398, "logps/chosen": -473.05560302734375, "logps/rejected": -562.7134399414062, "loss": 0.5286, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0807507038116455, "rewards/margins": 0.8903197050094604, "rewards/rejected": -2.9710705280303955, "step": 5730 }, { "epoch": 0.75, "learning_rate": 8.851695852495867e-07, "logits/chosen": -0.4927915632724762, "logits/rejected": -0.5158900618553162, "logps/chosen": -431.3685607910156, "logps/rejected": -555.056884765625, "loss": 0.5171, "rewards/accuracies": 0.75, "rewards/chosen": -2.0437633991241455, "rewards/margins": 1.1984370946884155, "rewards/rejected": -3.2422003746032715, "step": 5740 }, { "epoch": 0.75, "learning_rate": 8.764667368099525e-07, "logits/chosen": -0.2646760046482086, "logits/rejected": 0.0725071132183075, "logps/chosen": -458.35369873046875, "logps/rejected": -541.1822509765625, "loss": 0.5088, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1217246055603027, "rewards/margins": 1.0628745555877686, "rewards/rejected": -3.184598922729492, "step": 5750 }, { "epoch": 0.75, "learning_rate": 8.677977796872541e-07, "logits/chosen": -0.7068579792976379, "logits/rejected": 0.5747413039207458, "logps/chosen": -504.84881591796875, "logps/rejected": -543.8521728515625, "loss": 0.5338, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.211315870285034, "rewards/margins": 1.0215401649475098, "rewards/rejected": -3.232856273651123, "step": 5760 }, { "epoch": 0.76, "learning_rate": 8.591628948462913e-07, "logits/chosen": -0.13534298539161682, "logits/rejected": 0.21535761654376984, "logps/chosen": -481.2478942871094, "logps/rejected": -595.2032470703125, "loss": 0.4979, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.09712553024292, "rewards/margins": 1.1030064821243286, "rewards/rejected": -3.200131893157959, "step": 5770 }, { "epoch": 0.76, "learning_rate": 8.505622625406054e-07, "logits/chosen": -0.13635878264904022, "logits/rejected": -0.033559300005435944, "logps/chosen": -466.84637451171875, "logps/rejected": -594.5919189453125, "loss": 0.4774, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.1044180393218994, "rewards/margins": 1.287061095237732, "rewards/rejected": -3.391479015350342, "step": 5780 }, { "epoch": 0.76, "learning_rate": 8.419960623087129e-07, "logits/chosen": 0.0363604798913002, "logits/rejected": 0.037777043879032135, "logps/chosen": -419.9930114746094, "logps/rejected": -560.1504516601562, "loss": 0.5144, "rewards/accuracies": 0.75, "rewards/chosen": -2.1091957092285156, "rewards/margins": 1.1116769313812256, "rewards/rejected": -3.220872402191162, "step": 5790 }, { "epoch": 0.76, "learning_rate": 8.334644729703617e-07, "logits/chosen": -0.32428237795829773, "logits/rejected": -0.13345181941986084, "logps/chosen": -442.7701721191406, "logps/rejected": -567.7952270507812, "loss": 0.4746, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.127009391784668, "rewards/margins": 1.1997586488723755, "rewards/rejected": -3.326767683029175, "step": 5800 }, { "epoch": 0.76, "eval_logits/chosen": 1.1167664527893066, "eval_logits/rejected": 1.8506929874420166, "eval_logps/chosen": -489.1824951171875, "eval_logps/rejected": -573.2806396484375, "eval_loss": 0.5072213411331177, "eval_rewards/accuracies": 0.7300000190734863, "eval_rewards/chosen": -2.2069249153137207, "eval_rewards/margins": 1.0400696992874146, "eval_rewards/rejected": -3.2469944953918457, "eval_runtime": 1256.1768, "eval_samples_per_second": 1.592, "eval_steps_per_second": 0.796, "step": 5800 }, { "epoch": 0.76, "learning_rate": 8.249676726227931e-07, "logits/chosen": -0.2723647356033325, "logits/rejected": 0.3790988326072693, "logps/chosen": -546.9454345703125, "logps/rejected": -591.0533447265625, "loss": 0.5515, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.456743001937866, "rewards/margins": 0.8524373173713684, "rewards/rejected": -3.3091800212860107, "step": 5810 }, { "epoch": 0.76, "learning_rate": 8.165058386370314e-07, "logits/chosen": -0.2161664515733719, "logits/rejected": 0.19322898983955383, "logps/chosen": -473.7935485839844, "logps/rejected": -592.0934448242188, "loss": 0.488, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.100954055786133, "rewards/margins": 1.0307704210281372, "rewards/rejected": -3.1317245960235596, "step": 5820 }, { "epoch": 0.76, "learning_rate": 8.080791476541721e-07, "logits/chosen": -0.2463117390871048, "logits/rejected": 0.10424462705850601, "logps/chosen": -435.74786376953125, "logps/rejected": -540.4898681640625, "loss": 0.4526, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9524482488632202, "rewards/margins": 1.2612793445587158, "rewards/rejected": -3.2137274742126465, "step": 5830 }, { "epoch": 0.76, "learning_rate": 7.996877755817026e-07, "logits/chosen": -0.7921704053878784, "logits/rejected": 0.26169005036354065, "logps/chosen": -465.552490234375, "logps/rejected": -492.36956787109375, "loss": 0.5762, "rewards/accuracies": 0.6875, "rewards/chosen": -2.133111000061035, "rewards/margins": 0.5900529623031616, "rewards/rejected": -2.7231638431549072, "step": 5840 }, { "epoch": 0.77, "learning_rate": 7.913318975898238e-07, "logits/chosen": -0.8258234858512878, "logits/rejected": 0.3698134422302246, "logps/chosen": -555.0818481445312, "logps/rejected": -597.4561767578125, "loss": 0.5686, "rewards/accuracies": 0.75, "rewards/chosen": -2.23042631149292, "rewards/margins": 1.0009479522705078, "rewards/rejected": -3.2313742637634277, "step": 5850 }, { "epoch": 0.77, "learning_rate": 7.830116881077992e-07, "logits/chosen": -0.42663684487342834, "logits/rejected": 0.7298521399497986, "logps/chosen": -476.6568298339844, "logps/rejected": -559.68310546875, "loss": 0.4522, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.9448516368865967, "rewards/margins": 1.070634126663208, "rewards/rejected": -3.015486001968384, "step": 5860 }, { "epoch": 0.77, "learning_rate": 7.747273208203096e-07, "logits/chosen": -0.327279657125473, "logits/rejected": 0.10556366294622421, "logps/chosen": -494.0747985839844, "logps/rejected": -612.4318237304688, "loss": 0.498, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2703728675842285, "rewards/margins": 1.0807685852050781, "rewards/rejected": -3.3511414527893066, "step": 5870 }, { "epoch": 0.77, "learning_rate": 7.664789686638272e-07, "logits/chosen": -0.753741443157196, "logits/rejected": 0.26916906237602234, "logps/chosen": -444.783447265625, "logps/rejected": -573.6693115234375, "loss": 0.4559, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.905149221420288, "rewards/margins": 1.2002140283584595, "rewards/rejected": -3.105363130569458, "step": 5880 }, { "epoch": 0.77, "learning_rate": 7.582668038230089e-07, "logits/chosen": -0.6280697584152222, "logits/rejected": 0.23410716652870178, "logps/chosen": -483.71844482421875, "logps/rejected": -548.9215087890625, "loss": 0.6371, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.051710367202759, "rewards/margins": 0.8779205083847046, "rewards/rejected": -2.929630756378174, "step": 5890 }, { "epoch": 0.77, "learning_rate": 7.500909977271007e-07, "logits/chosen": -0.55290687084198, "logits/rejected": -0.04884564131498337, "logps/chosen": -483.20465087890625, "logps/rejected": -569.9694213867188, "loss": 0.5033, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9960193634033203, "rewards/margins": 1.1004588603973389, "rewards/rejected": -3.096478223800659, "step": 5900 }, { "epoch": 0.77, "eval_logits/chosen": 0.9675183892250061, "eval_logits/rejected": 1.7070553302764893, "eval_logps/chosen": -458.106201171875, "eval_logps/rejected": -536.0161743164062, "eval_loss": 0.5060694217681885, "eval_rewards/accuracies": 0.7275000214576721, "eval_rewards/chosen": -1.896161675453186, "eval_rewards/margins": 0.9781885147094727, "eval_rewards/rejected": -2.874350070953369, "eval_runtime": 1176.3994, "eval_samples_per_second": 1.7, "eval_steps_per_second": 0.85, "step": 5900 }, { "epoch": 0.77, "learning_rate": 7.41951721046357e-07, "logits/chosen": -0.6494289040565491, "logits/rejected": 0.32405179738998413, "logps/chosen": -428.957275390625, "logps/rejected": -514.9093017578125, "loss": 0.4849, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.763302206993103, "rewards/margins": 0.9204890131950378, "rewards/rejected": -2.683791399002075, "step": 5910 }, { "epoch": 0.77, "learning_rate": 7.338491436884787e-07, "logits/chosen": -0.2216249257326126, "logits/rejected": -0.0639793649315834, "logps/chosen": -431.1812438964844, "logps/rejected": -554.4732666015625, "loss": 0.4382, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.016435146331787, "rewards/margins": 1.169579267501831, "rewards/rejected": -3.1860146522521973, "step": 5920 }, { "epoch": 0.78, "learning_rate": 7.257834347950693e-07, "logits/chosen": -0.5539714097976685, "logits/rejected": 0.5835639238357544, "logps/chosen": -446.4956970214844, "logps/rejected": -492.2032165527344, "loss": 0.5289, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.8876628875732422, "rewards/margins": 0.7628009915351868, "rewards/rejected": -2.650463819503784, "step": 5930 }, { "epoch": 0.78, "learning_rate": 7.177547627380987e-07, "logits/chosen": -0.4449450373649597, "logits/rejected": -0.001817166805267334, "logps/chosen": -462.5804748535156, "logps/rejected": -559.3094482421875, "loss": 0.4137, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7668288946151733, "rewards/margins": 1.125380277633667, "rewards/rejected": -2.89220929145813, "step": 5940 }, { "epoch": 0.78, "learning_rate": 7.097632951163949e-07, "logits/chosen": -0.49515801668167114, "logits/rejected": 0.028278637677431107, "logps/chosen": -472.94647216796875, "logps/rejected": -543.9949340820312, "loss": 0.5109, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.7570574283599854, "rewards/margins": 1.0789839029312134, "rewards/rejected": -2.8360414505004883, "step": 5950 }, { "epoch": 0.78, "learning_rate": 7.018091987521386e-07, "logits/chosen": -0.8645265698432922, "logits/rejected": 0.3468485176563263, "logps/chosen": -471.080810546875, "logps/rejected": -537.1405029296875, "loss": 0.5746, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.022472381591797, "rewards/margins": 0.8672344088554382, "rewards/rejected": -2.889706611633301, "step": 5960 }, { "epoch": 0.78, "learning_rate": 6.93892639687386e-07, "logits/chosen": -0.655937671661377, "logits/rejected": -0.05244150012731552, "logps/chosen": -465.4962463378906, "logps/rejected": -507.5498046875, "loss": 0.5212, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.7037566900253296, "rewards/margins": 0.9643377065658569, "rewards/rejected": -2.6680946350097656, "step": 5970 }, { "epoch": 0.78, "learning_rate": 6.860137831806018e-07, "logits/chosen": -0.5556502938270569, "logits/rejected": -0.3047857880592346, "logps/chosen": -465.64697265625, "logps/rejected": -526.3079223632812, "loss": 0.5657, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.8184341192245483, "rewards/margins": 0.9852198362350464, "rewards/rejected": -2.8036537170410156, "step": 5980 }, { "epoch": 0.78, "learning_rate": 6.781727937032054e-07, "logits/chosen": -0.3701472878456116, "logits/rejected": 0.17485050857067108, "logps/chosen": -410.296630859375, "logps/rejected": -538.2269897460938, "loss": 0.3979, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.5407458543777466, "rewards/margins": 1.3232797384262085, "rewards/rejected": -2.864025354385376, "step": 5990 }, { "epoch": 0.79, "learning_rate": 6.703698349361437e-07, "logits/chosen": -0.6748332977294922, "logits/rejected": 0.417569637298584, "logps/chosen": -427.6536560058594, "logps/rejected": -490.7718200683594, "loss": 0.4517, "rewards/accuracies": 0.75, "rewards/chosen": -1.7474530935287476, "rewards/margins": 0.9970604777336121, "rewards/rejected": -2.744513988494873, "step": 6000 }, { "epoch": 0.79, "eval_logits/chosen": 0.8156449198722839, "eval_logits/rejected": 1.5612982511520386, "eval_logps/chosen": -441.7278747558594, "eval_logps/rejected": -516.7131958007812, "eval_loss": 0.5105239152908325, "eval_rewards/accuracies": 0.7264999747276306, "eval_rewards/chosen": -1.7323784828186035, "eval_rewards/margins": 0.9489423632621765, "eval_rewards/rejected": -2.6813206672668457, "eval_runtime": 1173.428, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.852, "step": 6000 }, { "epoch": 0.79, "learning_rate": 6.626050697664682e-07, "logits/chosen": -0.5815194249153137, "logits/rejected": 0.18539538979530334, "logps/chosen": -448.04583740234375, "logps/rejected": -505.4095153808594, "loss": 0.4312, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.7882267236709595, "rewards/margins": 1.0168880224227905, "rewards/rejected": -2.80511474609375, "step": 6010 }, { "epoch": 0.79, "learning_rate": 6.548786602839404e-07, "logits/chosen": -0.5067921876907349, "logits/rejected": 0.06481216102838516, "logps/chosen": -400.51556396484375, "logps/rejected": -506.417236328125, "loss": 0.4459, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6396009922027588, "rewards/margins": 1.3462693691253662, "rewards/rejected": -2.985870361328125, "step": 6020 }, { "epoch": 0.79, "learning_rate": 6.471907677776426e-07, "logits/chosen": -0.8723716735839844, "logits/rejected": 0.1587451696395874, "logps/chosen": -461.92376708984375, "logps/rejected": -495.30029296875, "loss": 0.5527, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.7867259979248047, "rewards/margins": 0.7085933685302734, "rewards/rejected": -2.495319366455078, "step": 6030 }, { "epoch": 0.79, "learning_rate": 6.39541552732617e-07, "logits/chosen": -0.41061049699783325, "logits/rejected": -0.26837730407714844, "logps/chosen": -455.27044677734375, "logps/rejected": -573.69091796875, "loss": 0.5308, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.901277780532837, "rewards/margins": 0.9423490762710571, "rewards/rejected": -2.8436269760131836, "step": 6040 }, { "epoch": 0.79, "learning_rate": 6.319311748265086e-07, "logits/chosen": -0.3957150876522064, "logits/rejected": 0.8088628053665161, "logps/chosen": -577.0421752929688, "logps/rejected": -617.0074462890625, "loss": 0.5536, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.2539684772491455, "rewards/margins": 1.012986660003662, "rewards/rejected": -3.2669551372528076, "step": 6050 }, { "epoch": 0.79, "learning_rate": 6.243597929262404e-07, "logits/chosen": -0.17666508257389069, "logits/rejected": 0.17739292979240417, "logps/chosen": -379.2519836425781, "logps/rejected": -559.8756103515625, "loss": 0.492, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.7480781078338623, "rewards/margins": 1.3037656545639038, "rewards/rejected": -3.0518438816070557, "step": 6060 }, { "epoch": 0.79, "learning_rate": 6.168275650846875e-07, "logits/chosen": -0.7394101619720459, "logits/rejected": -0.0025915740989148617, "logps/chosen": -452.69830322265625, "logps/rejected": -493.15216064453125, "loss": 0.5109, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.5727591514587402, "rewards/margins": 0.9079796671867371, "rewards/rejected": -2.480738878250122, "step": 6070 }, { "epoch": 0.8, "learning_rate": 6.093346485373863e-07, "logits/chosen": -0.5971829295158386, "logits/rejected": 0.7547621130943298, "logps/chosen": -476.29693603515625, "logps/rejected": -533.7948608398438, "loss": 0.5113, "rewards/accuracies": 0.75, "rewards/chosen": -1.9149093627929688, "rewards/margins": 0.8861688375473022, "rewards/rejected": -2.8010783195495605, "step": 6080 }, { "epoch": 0.8, "learning_rate": 6.018811996992455e-07, "logits/chosen": -0.6492999792098999, "logits/rejected": 0.5133501291275024, "logps/chosen": -446.62933349609375, "logps/rejected": -528.8438110351562, "loss": 0.3894, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6136525869369507, "rewards/margins": 1.323101282119751, "rewards/rejected": -2.936753749847412, "step": 6090 }, { "epoch": 0.8, "learning_rate": 5.944673741612866e-07, "logits/chosen": -0.5191225409507751, "logits/rejected": -0.2703114449977875, "logps/chosen": -488.6419372558594, "logps/rejected": -571.1328125, "loss": 0.5071, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.037747859954834, "rewards/margins": 0.8893720507621765, "rewards/rejected": -2.9271202087402344, "step": 6100 }, { "epoch": 0.8, "eval_logits/chosen": 0.9369565844535828, "eval_logits/rejected": 1.6894502639770508, "eval_logps/chosen": -454.8271789550781, "eval_logps/rejected": -534.7506103515625, "eval_loss": 0.5116304159164429, "eval_rewards/accuracies": 0.7275000214576721, "eval_rewards/chosen": -1.8633716106414795, "eval_rewards/margins": 0.9983232617378235, "eval_rewards/rejected": -2.861694574356079, "eval_runtime": 1179.4032, "eval_samples_per_second": 1.696, "eval_steps_per_second": 0.848, "step": 6100 }, { "epoch": 0.8, "learning_rate": 5.870933266873916e-07, "logits/chosen": -0.28973495960235596, "logits/rejected": 0.016529571264982224, "logps/chosen": -409.6407775878906, "logps/rejected": -498.581787109375, "loss": 0.5488, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.7589409351348877, "rewards/margins": 0.8432859182357788, "rewards/rejected": -2.602226734161377, "step": 6110 }, { "epoch": 0.8, "learning_rate": 5.797592112110734e-07, "logits/chosen": -0.19786319136619568, "logits/rejected": 0.2796550989151001, "logps/chosen": -398.9677734375, "logps/rejected": -460.76220703125, "loss": 0.5853, "rewards/accuracies": 0.6875, "rewards/chosen": -1.8883498907089233, "rewards/margins": 0.7973678708076477, "rewards/rejected": -2.685717821121216, "step": 6120 }, { "epoch": 0.8, "learning_rate": 5.724651808322645e-07, "logits/chosen": -0.3768353760242462, "logits/rejected": 0.02244797721505165, "logps/chosen": -426.26446533203125, "logps/rejected": -552.0750122070312, "loss": 0.4562, "rewards/accuracies": 0.75, "rewards/chosen": -1.7751553058624268, "rewards/margins": 1.0461262464523315, "rewards/rejected": -2.821281671524048, "step": 6130 }, { "epoch": 0.8, "learning_rate": 5.652113878141194e-07, "logits/chosen": -0.6332122087478638, "logits/rejected": 0.4572317600250244, "logps/chosen": -385.82476806640625, "logps/rejected": -482.82281494140625, "loss": 0.5261, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.835436463356018, "rewards/margins": 0.9859844446182251, "rewards/rejected": -2.821420669555664, "step": 6140 }, { "epoch": 0.8, "learning_rate": 5.579979835798361e-07, "logits/chosen": -0.4648275375366211, "logits/rejected": 0.25945791602134705, "logps/chosen": -436.0567321777344, "logps/rejected": -553.8167114257812, "loss": 0.4439, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9036706686019897, "rewards/margins": 1.2614033222198486, "rewards/rejected": -3.165074110031128, "step": 6150 }, { "epoch": 0.81, "learning_rate": 5.508251187094932e-07, "logits/chosen": -0.7267307043075562, "logits/rejected": 0.3853886127471924, "logps/chosen": -506.763427734375, "logps/rejected": -537.0343017578125, "loss": 0.6427, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1635117530822754, "rewards/margins": 0.7928264737129211, "rewards/rejected": -2.9563381671905518, "step": 6160 }, { "epoch": 0.81, "learning_rate": 5.436929429369122e-07, "logits/chosen": -0.4642343521118164, "logits/rejected": 0.12719163298606873, "logps/chosen": -440.3096618652344, "logps/rejected": -517.3118286132812, "loss": 0.5682, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9598861932754517, "rewards/margins": 0.9272395968437195, "rewards/rejected": -2.8871257305145264, "step": 6170 }, { "epoch": 0.81, "learning_rate": 5.366016051465245e-07, "logits/chosen": -0.5060332417488098, "logits/rejected": 0.35478395223617554, "logps/chosen": -435.00872802734375, "logps/rejected": -564.3742065429688, "loss": 0.4017, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.828107476234436, "rewards/margins": 1.3373243808746338, "rewards/rejected": -3.1654322147369385, "step": 6180 }, { "epoch": 0.81, "learning_rate": 5.295512533702701e-07, "logits/chosen": -0.12531735002994537, "logits/rejected": 0.5097149014472961, "logps/chosen": -410.29974365234375, "logps/rejected": -509.9228515625, "loss": 0.532, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.8042405843734741, "rewards/margins": 1.0363777875900269, "rewards/rejected": -2.840618371963501, "step": 6190 }, { "epoch": 0.81, "learning_rate": 5.225420347845023e-07, "logits/chosen": -0.7511438131332397, "logits/rejected": 0.019868087023496628, "logps/chosen": -490.9335021972656, "logps/rejected": -552.3634033203125, "loss": 0.6455, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0654990673065186, "rewards/margins": 0.7708456516265869, "rewards/rejected": -2.8363449573516846, "step": 6200 }, { "epoch": 0.81, "eval_logits/chosen": 0.9542487263679504, "eval_logits/rejected": 1.7120394706726074, "eval_logps/chosen": -456.4508056640625, "eval_logps/rejected": -536.0125732421875, "eval_loss": 0.5110178589820862, "eval_rewards/accuracies": 0.7250000238418579, "eval_rewards/chosen": -1.8796085119247437, "eval_rewards/margins": 0.994705319404602, "eval_rewards/rejected": -2.8743135929107666, "eval_runtime": 1204.1855, "eval_samples_per_second": 1.661, "eval_steps_per_second": 0.83, "step": 6200 }, { "epoch": 0.81, "learning_rate": 5.155740957069186e-07, "logits/chosen": -0.6087840795516968, "logits/rejected": 0.2238648384809494, "logps/chosen": -457.75323486328125, "logps/rejected": -550.1719970703125, "loss": 0.4296, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.8891584873199463, "rewards/margins": 1.2166895866394043, "rewards/rejected": -3.1058480739593506, "step": 6210 }, { "epoch": 0.81, "learning_rate": 5.08647581593506e-07, "logits/chosen": -0.24337653815746307, "logits/rejected": 0.19924645125865936, "logps/chosen": -430.85003662109375, "logps/rejected": -523.4949340820312, "loss": 0.4476, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.682952880859375, "rewards/margins": 1.0686895847320557, "rewards/rejected": -2.7516424655914307, "step": 6220 }, { "epoch": 0.82, "learning_rate": 5.017626370355014e-07, "logits/chosen": -0.8117885589599609, "logits/rejected": 0.5791851878166199, "logps/chosen": -433.12237548828125, "logps/rejected": -513.7730712890625, "loss": 0.4269, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.7210042476654053, "rewards/margins": 1.2424871921539307, "rewards/rejected": -2.963491678237915, "step": 6230 }, { "epoch": 0.82, "learning_rate": 4.949194057563783e-07, "logits/chosen": -0.7460896372795105, "logits/rejected": 0.34127911925315857, "logps/chosen": -460.87713623046875, "logps/rejected": -520.8004150390625, "loss": 0.4998, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.9422998428344727, "rewards/margins": 1.0103784799575806, "rewards/rejected": -2.9526784420013428, "step": 6240 }, { "epoch": 0.82, "learning_rate": 4.881180306088418e-07, "logits/chosen": -0.7913476228713989, "logits/rejected": 0.48180437088012695, "logps/chosen": -464.13397216796875, "logps/rejected": -523.867919921875, "loss": 0.4899, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8360408544540405, "rewards/margins": 1.0949831008911133, "rewards/rejected": -2.9310240745544434, "step": 6250 }, { "epoch": 0.82, "learning_rate": 4.813586535718512e-07, "logits/chosen": -0.7295901775360107, "logits/rejected": 1.1600463390350342, "logps/chosen": -491.99139404296875, "logps/rejected": -539.6505126953125, "loss": 0.472, "rewards/accuracies": 0.75, "rewards/chosen": -1.8388845920562744, "rewards/margins": 1.2725069522857666, "rewards/rejected": -3.111391544342041, "step": 6260 }, { "epoch": 0.82, "learning_rate": 4.746414157476506e-07, "logits/chosen": -1.1962846517562866, "logits/rejected": 0.240956112742424, "logps/chosen": -408.2303771972656, "logps/rejected": -522.8681640625, "loss": 0.4429, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.7451229095458984, "rewards/margins": 1.4104244709014893, "rewards/rejected": -3.1555473804473877, "step": 6270 }, { "epoch": 0.82, "learning_rate": 4.679664573588294e-07, "logits/chosen": -0.38419827818870544, "logits/rejected": 0.4673822522163391, "logps/chosen": -407.20782470703125, "logps/rejected": -497.8232421875, "loss": 0.4743, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.7693662643432617, "rewards/margins": 1.090094804763794, "rewards/rejected": -2.8594608306884766, "step": 6280 }, { "epoch": 0.82, "learning_rate": 4.6133391774538903e-07, "logits/chosen": -0.8859823942184448, "logits/rejected": 0.3551492989063263, "logps/chosen": -481.630615234375, "logps/rejected": -541.5256958007812, "loss": 0.5493, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.8950306177139282, "rewards/margins": 1.0568093061447144, "rewards/rejected": -2.9518399238586426, "step": 6290 }, { "epoch": 0.82, "learning_rate": 4.5474393536184214e-07, "logits/chosen": -0.8629859685897827, "logits/rejected": 0.4089199900627136, "logps/chosen": -455.08062744140625, "logps/rejected": -525.6192626953125, "loss": 0.4796, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9132658243179321, "rewards/margins": 1.0315920114517212, "rewards/rejected": -2.9448578357696533, "step": 6300 }, { "epoch": 0.82, "eval_logits/chosen": 1.020259976387024, "eval_logits/rejected": 1.7784451246261597, "eval_logps/chosen": -460.98785400390625, "eval_logps/rejected": -543.0519409179688, "eval_loss": 0.5111602544784546, "eval_rewards/accuracies": 0.7260000109672546, "eval_rewards/chosen": -1.924978494644165, "eval_rewards/margins": 1.019729495048523, "eval_rewards/rejected": -2.9447081089019775, "eval_runtime": 1194.9348, "eval_samples_per_second": 1.674, "eval_steps_per_second": 0.837, "step": 6300 }, { "epoch": 0.83, "learning_rate": 4.4819664777431243e-07, "logits/chosen": -0.18434950709342957, "logits/rejected": 0.1741354763507843, "logps/chosen": -412.4391174316406, "logps/rejected": -475.8851013183594, "loss": 0.5578, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9152721166610718, "rewards/margins": 0.8140000104904175, "rewards/rejected": -2.72927188873291, "step": 6310 }, { "epoch": 0.83, "learning_rate": 4.416921916576722e-07, "logits/chosen": -0.6540490388870239, "logits/rejected": 0.48029765486717224, "logps/chosen": -492.7520446777344, "logps/rejected": -564.4111328125, "loss": 0.5657, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.991019606590271, "rewards/margins": 0.8219622373580933, "rewards/rejected": -2.8129818439483643, "step": 6320 }, { "epoch": 0.83, "learning_rate": 4.352307027926828e-07, "logits/chosen": -0.6802606582641602, "logits/rejected": 0.12222106754779816, "logps/chosen": -441.7935485839844, "logps/rejected": -548.2308349609375, "loss": 0.4019, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.7764034271240234, "rewards/margins": 1.371129035949707, "rewards/rejected": -3.1475327014923096, "step": 6330 }, { "epoch": 0.83, "learning_rate": 4.288123160631624e-07, "logits/chosen": 0.02034485712647438, "logits/rejected": 0.16564354300498962, "logps/chosen": -435.69488525390625, "logps/rejected": -519.12060546875, "loss": 0.5531, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9254554510116577, "rewards/margins": 0.9142245054244995, "rewards/rejected": -2.839679718017578, "step": 6340 }, { "epoch": 0.83, "learning_rate": 4.224371654531731e-07, "logits/chosen": -0.42781931161880493, "logits/rejected": 0.401696115732193, "logps/chosen": -449.87701416015625, "logps/rejected": -493.78302001953125, "loss": 0.5935, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.9719626903533936, "rewards/margins": 0.7792240381240845, "rewards/rejected": -2.7511868476867676, "step": 6350 }, { "epoch": 0.83, "learning_rate": 4.1610538404421837e-07, "logits/chosen": -0.12257415056228638, "logits/rejected": -0.3362657427787781, "logps/chosen": -416.6104431152344, "logps/rejected": -562.0303344726562, "loss": 0.4317, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.695469617843628, "rewards/margins": 1.2062984704971313, "rewards/rejected": -2.901768207550049, "step": 6360 }, { "epoch": 0.83, "learning_rate": 4.098171040124699e-07, "logits/chosen": -0.9566270112991333, "logits/rejected": 0.6268806457519531, "logps/chosen": -514.7496337890625, "logps/rejected": -540.041748046875, "loss": 0.6099, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.057263135910034, "rewards/margins": 0.8319833874702454, "rewards/rejected": -2.889246702194214, "step": 6370 }, { "epoch": 0.83, "learning_rate": 4.03572456626006e-07, "logits/chosen": -0.10828417539596558, "logits/rejected": -0.13383126258850098, "logps/chosen": -456.984130859375, "logps/rejected": -528.4546508789062, "loss": 0.4909, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.877745270729065, "rewards/margins": 0.9030615091323853, "rewards/rejected": -2.7808070182800293, "step": 6380 }, { "epoch": 0.84, "learning_rate": 3.9737157224207265e-07, "logits/chosen": -0.623379647731781, "logits/rejected": 0.0637415200471878, "logps/chosen": -429.89349365234375, "logps/rejected": -523.8753051757812, "loss": 0.5763, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.002406597137451, "rewards/margins": 0.9444215893745422, "rewards/rejected": -2.9468283653259277, "step": 6390 }, { "epoch": 0.84, "learning_rate": 3.912145803043596e-07, "logits/chosen": -0.6023394465446472, "logits/rejected": 0.09874238818883896, "logps/chosen": -470.8539123535156, "logps/rejected": -520.394287109375, "loss": 0.5568, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.9863427877426147, "rewards/margins": 0.7905007600784302, "rewards/rejected": -2.776843309402466, "step": 6400 }, { "epoch": 0.84, "eval_logits/chosen": 1.115229845046997, "eval_logits/rejected": 1.8763548135757446, "eval_logps/chosen": -463.8809509277344, "eval_logps/rejected": -545.5327758789062, "eval_loss": 0.5085515379905701, "eval_rewards/accuracies": 0.7275000214576721, "eval_rewards/chosen": -1.953909158706665, "eval_rewards/margins": 1.0156067609786987, "eval_rewards/rejected": -2.969515800476074, "eval_runtime": 1193.0934, "eval_samples_per_second": 1.676, "eval_steps_per_second": 0.838, "step": 6400 }, { "epoch": 0.84, "learning_rate": 3.851016093403023e-07, "logits/chosen": -0.14243006706237793, "logits/rejected": 0.18332967162132263, "logps/chosen": -435.7933044433594, "logps/rejected": -534.3563842773438, "loss": 0.5032, "rewards/accuracies": 0.75, "rewards/chosen": -2.0880608558654785, "rewards/margins": 1.0540637969970703, "rewards/rejected": -3.142124652862549, "step": 6410 }, { "epoch": 0.84, "learning_rate": 3.7903278695839456e-07, "logits/chosen": -0.13955774903297424, "logits/rejected": -0.20281191170215607, "logps/chosen": -465.5072326660156, "logps/rejected": -532.3043212890625, "loss": 0.5635, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.9887361526489258, "rewards/margins": 0.9076420068740845, "rewards/rejected": -2.8963780403137207, "step": 6420 }, { "epoch": 0.84, "learning_rate": 3.7300823984552983e-07, "logits/chosen": -0.5505388975143433, "logits/rejected": -0.24104097485542297, "logps/chosen": -418.98516845703125, "logps/rejected": -541.846923828125, "loss": 0.4913, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.963343620300293, "rewards/margins": 1.0439479351043701, "rewards/rejected": -3.007291793823242, "step": 6430 }, { "epoch": 0.84, "learning_rate": 3.670280937643503e-07, "logits/chosen": -0.5132700204849243, "logits/rejected": 0.6271919012069702, "logps/chosen": -444.8404846191406, "logps/rejected": -527.2811279296875, "loss": 0.4893, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.886971116065979, "rewards/margins": 1.1429336071014404, "rewards/rejected": -3.029904842376709, "step": 6440 }, { "epoch": 0.84, "learning_rate": 3.610924735506274e-07, "logits/chosen": -0.8992152214050293, "logits/rejected": 0.8895284533500671, "logps/chosen": -502.09051513671875, "logps/rejected": -532.7025146484375, "loss": 0.5641, "rewards/accuracies": 0.75, "rewards/chosen": -2.00895094871521, "rewards/margins": 0.9643281102180481, "rewards/rejected": -2.9732792377471924, "step": 6450 }, { "epoch": 0.85, "learning_rate": 3.5520150311065316e-07, "logits/chosen": -0.26017525792121887, "logits/rejected": 0.5120875239372253, "logps/chosen": -459.91436767578125, "logps/rejected": -549.1633911132812, "loss": 0.4416, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.8477706909179688, "rewards/margins": 1.1013177633285522, "rewards/rejected": -2.9490883350372314, "step": 6460 }, { "epoch": 0.85, "learning_rate": 3.493553054186527e-07, "logits/chosen": -0.6854721307754517, "logits/rejected": 0.3193429112434387, "logps/chosen": -469.05230712890625, "logps/rejected": -552.3963623046875, "loss": 0.5335, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.117194652557373, "rewards/margins": 0.8773033022880554, "rewards/rejected": -2.994497776031494, "step": 6470 }, { "epoch": 0.85, "learning_rate": 3.4355400251421977e-07, "logits/chosen": -0.15283913910388947, "logits/rejected": 0.5103310346603394, "logps/chosen": -456.74993896484375, "logps/rejected": -499.0576171875, "loss": 0.6538, "rewards/accuracies": 0.6875, "rewards/chosen": -2.07171368598938, "rewards/margins": 0.6756519079208374, "rewards/rejected": -2.747365713119507, "step": 6480 }, { "epoch": 0.85, "learning_rate": 3.3779771549976637e-07, "logits/chosen": -0.47592344880104065, "logits/rejected": 0.43167513608932495, "logps/chosen": -438.9248962402344, "logps/rejected": -505.8505859375, "loss": 0.5395, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.9432265758514404, "rewards/margins": 0.8998059034347534, "rewards/rejected": -2.8430323600769043, "step": 6490 }, { "epoch": 0.85, "learning_rate": 3.3208656453799783e-07, "logits/chosen": -0.7136383056640625, "logits/rejected": 0.3548789918422699, "logps/chosen": -434.717529296875, "logps/rejected": -514.3789672851562, "loss": 0.4335, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8354284763336182, "rewards/margins": 1.087013840675354, "rewards/rejected": -2.9224421977996826, "step": 6500 }, { "epoch": 0.85, "eval_logits/chosen": 1.1822218894958496, "eval_logits/rejected": 1.9424830675125122, "eval_logps/chosen": -468.96807861328125, "eval_logps/rejected": -550.4982299804688, "eval_loss": 0.5067179203033447, "eval_rewards/accuracies": 0.7294999957084656, "eval_rewards/chosen": -2.0047807693481445, "eval_rewards/margins": 1.014390230178833, "eval_rewards/rejected": -3.0191712379455566, "eval_runtime": 1181.0567, "eval_samples_per_second": 1.693, "eval_steps_per_second": 0.847, "step": 6500 }, { "epoch": 0.85, "learning_rate": 3.2642066884940064e-07, "logits/chosen": -0.5502122640609741, "logits/rejected": 0.032435137778520584, "logps/chosen": -474.4972229003906, "logps/rejected": -589.9862670898438, "loss": 0.594, "rewards/accuracies": 0.75, "rewards/chosen": -2.066648006439209, "rewards/margins": 1.129963755607605, "rewards/rejected": -3.1966123580932617, "step": 6510 }, { "epoch": 0.85, "learning_rate": 3.2080014670975825e-07, "logits/chosen": -0.5147031545639038, "logits/rejected": -0.046147845685482025, "logps/chosen": -433.7167053222656, "logps/rejected": -496.712646484375, "loss": 0.5683, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.8814866542816162, "rewards/margins": 0.8404488563537598, "rewards/rejected": -2.721935510635376, "step": 6520 }, { "epoch": 0.85, "learning_rate": 3.152251154476765e-07, "logits/chosen": -0.5666553378105164, "logits/rejected": 0.2234136164188385, "logps/chosen": -429.125732421875, "logps/rejected": -543.9380493164062, "loss": 0.4476, "rewards/accuracies": 0.75, "rewards/chosen": -1.934643030166626, "rewards/margins": 1.1717562675476074, "rewards/rejected": -3.1063990592956543, "step": 6530 }, { "epoch": 0.86, "learning_rate": 3.0969569144214147e-07, "logits/chosen": -0.9073036313056946, "logits/rejected": 0.5169941186904907, "logps/chosen": -476.5718688964844, "logps/rejected": -547.4609985351562, "loss": 0.4676, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.0425972938537598, "rewards/margins": 0.9983150362968445, "rewards/rejected": -3.040912628173828, "step": 6540 }, { "epoch": 0.86, "learning_rate": 3.042119901200824e-07, "logits/chosen": -0.1191103607416153, "logits/rejected": 0.11132284253835678, "logps/chosen": -444.5992736816406, "logps/rejected": -555.7691650390625, "loss": 0.5972, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1316585540771484, "rewards/margins": 0.7841309309005737, "rewards/rejected": -2.915789842605591, "step": 6550 }, { "epoch": 0.86, "learning_rate": 2.9877412595396726e-07, "logits/chosen": -0.8357526063919067, "logits/rejected": -0.019801050424575806, "logps/chosen": -504.7652893066406, "logps/rejected": -582.4708251953125, "loss": 0.4626, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9639135599136353, "rewards/margins": 1.1901118755340576, "rewards/rejected": -3.1540255546569824, "step": 6560 }, { "epoch": 0.86, "learning_rate": 2.933822124594124e-07, "logits/chosen": -0.2868785560131073, "logits/rejected": 0.4834575057029724, "logps/chosen": -471.68603515625, "logps/rejected": -518.7160034179688, "loss": 0.5799, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0963294506073, "rewards/margins": 0.8177496194839478, "rewards/rejected": -2.914079189300537, "step": 6570 }, { "epoch": 0.86, "learning_rate": 2.880363621928106e-07, "logits/chosen": -0.4423336982727051, "logits/rejected": 0.6005432605743408, "logps/chosen": -483.3235778808594, "logps/rejected": -539.4915771484375, "loss": 0.5047, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.052248239517212, "rewards/margins": 0.9304612874984741, "rewards/rejected": -2.9827091693878174, "step": 6580 }, { "epoch": 0.86, "learning_rate": 2.82736686748985e-07, "logits/chosen": -0.6261542439460754, "logits/rejected": 0.38417965173721313, "logps/chosen": -470.73077392578125, "logps/rejected": -533.3613891601562, "loss": 0.4692, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9056422710418701, "rewards/margins": 1.2279062271118164, "rewards/rejected": -3.1335487365722656, "step": 6590 }, { "epoch": 0.86, "learning_rate": 2.774832967588556e-07, "logits/chosen": -0.7487810850143433, "logits/rejected": 0.46375662088394165, "logps/chosen": -487.33544921875, "logps/rejected": -587.102783203125, "loss": 0.5263, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.039247512817383, "rewards/margins": 1.248397707939148, "rewards/rejected": -3.2876453399658203, "step": 6600 }, { "epoch": 0.86, "eval_logits/chosen": 1.180580735206604, "eval_logits/rejected": 1.9389982223510742, "eval_logps/chosen": -465.3099060058594, "eval_logps/rejected": -546.2759399414062, "eval_loss": 0.5066229104995728, "eval_rewards/accuracies": 0.7310000061988831, "eval_rewards/chosen": -1.9681991338729858, "eval_rewards/margins": 1.0087487697601318, "eval_rewards/rejected": -2.976947784423828, "eval_runtime": 1275.913, "eval_samples_per_second": 1.568, "eval_steps_per_second": 0.784, "step": 6600 }, { "epoch": 0.86, "learning_rate": 2.7227630188713326e-07, "logits/chosen": -0.889278769493103, "logits/rejected": 0.699964702129364, "logps/chosen": -502.218017578125, "logps/rejected": -545.4527587890625, "loss": 0.504, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0107197761535645, "rewards/margins": 1.0104124546051025, "rewards/rejected": -3.021131992340088, "step": 6610 }, { "epoch": 0.87, "learning_rate": 2.671158108300284e-07, "logits/chosen": -0.6139100790023804, "logits/rejected": -0.24325446784496307, "logps/chosen": -461.9278259277344, "logps/rejected": -540.6260986328125, "loss": 0.5506, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.021791934967041, "rewards/margins": 0.7798693776130676, "rewards/rejected": -2.801661252975464, "step": 6620 }, { "epoch": 0.87, "learning_rate": 2.6200193131298376e-07, "logits/chosen": -0.6526001691818237, "logits/rejected": -0.10106471925973892, "logps/chosen": -480.90618896484375, "logps/rejected": -579.097900390625, "loss": 0.4102, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.9294904470443726, "rewards/margins": 1.2845728397369385, "rewards/rejected": -3.2140636444091797, "step": 6630 }, { "epoch": 0.87, "learning_rate": 2.569347700884217e-07, "logits/chosen": -0.69781893491745, "logits/rejected": 0.686926543712616, "logps/chosen": -466.5616149902344, "logps/rejected": -538.9299926757812, "loss": 0.473, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9398361444473267, "rewards/margins": 1.1922200918197632, "rewards/rejected": -3.13205623626709, "step": 6640 }, { "epoch": 0.87, "learning_rate": 2.5191443293352186e-07, "logits/chosen": -0.3311184346675873, "logits/rejected": 0.31631073355674744, "logps/chosen": -480.02655029296875, "logps/rejected": -576.1092529296875, "loss": 0.5448, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.015752077102661, "rewards/margins": 0.9452459216117859, "rewards/rejected": -2.960998058319092, "step": 6650 }, { "epoch": 0.87, "learning_rate": 2.469410246480067e-07, "logits/chosen": -0.21244564652442932, "logits/rejected": 0.6931090950965881, "logps/chosen": -439.51812744140625, "logps/rejected": -541.0764770507812, "loss": 0.4631, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.069284439086914, "rewards/margins": 1.1991032361984253, "rewards/rejected": -3.26838755607605, "step": 6660 }, { "epoch": 0.87, "learning_rate": 2.4201464905195955e-07, "logits/chosen": -0.42047929763793945, "logits/rejected": -0.40434974431991577, "logps/chosen": -454.3592224121094, "logps/rejected": -524.0152587890625, "loss": 0.5885, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.9843008518218994, "rewards/margins": 0.6817992925643921, "rewards/rejected": -2.666100025177002, "step": 6670 }, { "epoch": 0.87, "learning_rate": 2.3713540898365196e-07, "logits/chosen": -0.5671381950378418, "logits/rejected": 0.32992106676101685, "logps/chosen": -449.9239196777344, "logps/rejected": -534.2097778320312, "loss": 0.4748, "rewards/accuracies": 0.75, "rewards/chosen": -1.7972313165664673, "rewards/margins": 1.1687263250350952, "rewards/rejected": -2.9659574031829834, "step": 6680 }, { "epoch": 0.88, "learning_rate": 2.3230340629740166e-07, "logits/chosen": -0.6218796968460083, "logits/rejected": -0.1357102394104004, "logps/chosen": -465.1273498535156, "logps/rejected": -495.3822326660156, "loss": 0.6258, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9846065044403076, "rewards/margins": 0.5649687647819519, "rewards/rejected": -2.5495753288269043, "step": 6690 }, { "epoch": 0.88, "learning_rate": 2.2751874186144357e-07, "logits/chosen": -1.0036194324493408, "logits/rejected": 0.0544402189552784, "logps/chosen": -472.434326171875, "logps/rejected": -515.3161010742188, "loss": 0.5263, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.8698005676269531, "rewards/margins": 0.915500819683075, "rewards/rejected": -2.7853012084960938, "step": 6700 }, { "epoch": 0.88, "eval_logits/chosen": 1.1793560981750488, "eval_logits/rejected": 1.9366413354873657, "eval_logps/chosen": -465.6783752441406, "eval_logps/rejected": -546.6119384765625, "eval_loss": 0.5065844058990479, "eval_rewards/accuracies": 0.7319999933242798, "eval_rewards/chosen": -1.9718834161758423, "eval_rewards/margins": 1.0084247589111328, "eval_rewards/rejected": -2.9803082942962646, "eval_runtime": 1228.3254, "eval_samples_per_second": 1.628, "eval_steps_per_second": 0.814, "step": 6700 }, { "epoch": 0.88, "learning_rate": 2.227815155558241e-07, "logits/chosen": -0.715009331703186, "logits/rejected": 0.11820618808269501, "logps/chosen": -483.3104553222656, "logps/rejected": -584.90478515625, "loss": 0.5043, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.07963228225708, "rewards/margins": 1.144766092300415, "rewards/rejected": -3.224398136138916, "step": 6710 }, { "epoch": 0.88, "learning_rate": 2.1809182627031883e-07, "logits/chosen": -0.9190937280654907, "logits/rejected": 0.36419257521629333, "logps/chosen": -471.60467529296875, "logps/rejected": -543.3637084960938, "loss": 0.5222, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.8738210201263428, "rewards/margins": 0.989733099937439, "rewards/rejected": -2.8635544776916504, "step": 6720 }, { "epoch": 0.88, "learning_rate": 2.1344977190236372e-07, "logits/chosen": 0.27573806047439575, "logits/rejected": 0.18469476699829102, "logps/chosen": -439.60107421875, "logps/rejected": -560.4583740234375, "loss": 0.4557, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9545615911483765, "rewards/margins": 1.2339239120483398, "rewards/rejected": -3.188485622406006, "step": 6730 }, { "epoch": 0.88, "learning_rate": 2.0885544935501656e-07, "logits/chosen": -0.5300592184066772, "logits/rejected": -0.35172906517982483, "logps/chosen": -451.72027587890625, "logps/rejected": -578.4183349609375, "loss": 0.4219, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.9533073902130127, "rewards/margins": 1.2389609813690186, "rewards/rejected": -3.1922686100006104, "step": 6740 }, { "epoch": 0.88, "learning_rate": 2.0430895453492944e-07, "logits/chosen": -0.5228718519210815, "logits/rejected": -0.06577786058187485, "logps/chosen": -495.9405212402344, "logps/rejected": -537.6466674804688, "loss": 0.5773, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0149998664855957, "rewards/margins": 0.7745741605758667, "rewards/rejected": -2.789574384689331, "step": 6750 }, { "epoch": 0.88, "learning_rate": 1.9981038235035111e-07, "logits/chosen": -0.12807337939739227, "logits/rejected": -0.08245428651571274, "logps/chosen": -441.65008544921875, "logps/rejected": -548.439697265625, "loss": 0.3802, "rewards/accuracies": 0.8125, "rewards/chosen": -1.779802680015564, "rewards/margins": 1.3215081691741943, "rewards/rejected": -3.101310968399048, "step": 6760 }, { "epoch": 0.89, "learning_rate": 1.9535982670914112e-07, "logits/chosen": -0.55552738904953, "logits/rejected": 0.4736878275871277, "logps/chosen": -500.26171875, "logps/rejected": -571.901611328125, "loss": 0.5424, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.030512571334839, "rewards/margins": 0.9621774554252625, "rewards/rejected": -2.9926905632019043, "step": 6770 }, { "epoch": 0.89, "learning_rate": 1.9095738051681412e-07, "logits/chosen": -0.19979307055473328, "logits/rejected": 0.008223796263337135, "logps/chosen": -459.77935791015625, "logps/rejected": -547.7391357421875, "loss": 0.5411, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.210814952850342, "rewards/margins": 0.9136184453964233, "rewards/rejected": -3.1244330406188965, "step": 6780 }, { "epoch": 0.89, "learning_rate": 1.8660313567459703e-07, "logits/chosen": -0.09574131667613983, "logits/rejected": -0.14765064418315887, "logps/chosen": -426.57159423828125, "logps/rejected": -542.1318359375, "loss": 0.5432, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9452426433563232, "rewards/margins": 1.1776217222213745, "rewards/rejected": -3.1228644847869873, "step": 6790 }, { "epoch": 0.89, "learning_rate": 1.8229718307751165e-07, "logits/chosen": -0.45521458983421326, "logits/rejected": 0.7231898903846741, "logps/chosen": -494.8271484375, "logps/rejected": -564.8556518554688, "loss": 0.4939, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0975873470306396, "rewards/margins": 1.2642689943313599, "rewards/rejected": -3.361856460571289, "step": 6800 }, { "epoch": 0.89, "eval_logits/chosen": 1.2238198518753052, "eval_logits/rejected": 1.979459524154663, "eval_logps/chosen": -470.53741455078125, "eval_logps/rejected": -551.8628540039062, "eval_loss": 0.5062793493270874, "eval_rewards/accuracies": 0.7325000166893005, "eval_rewards/chosen": -2.0204737186431885, "eval_rewards/margins": 1.0123436450958252, "eval_rewards/rejected": -3.0328176021575928, "eval_runtime": 1180.1566, "eval_samples_per_second": 1.695, "eval_steps_per_second": 0.847, "step": 6800 }, { "epoch": 0.89, "learning_rate": 1.7803961261247864e-07, "logits/chosen": -0.2857457995414734, "logits/rejected": 0.201734259724617, "logps/chosen": -467.9620666503906, "logps/rejected": -601.2489013671875, "loss": 0.4212, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9273033142089844, "rewards/margins": 1.3749698400497437, "rewards/rejected": -3.3022735118865967, "step": 6810 }, { "epoch": 0.89, "learning_rate": 1.7383051315643772e-07, "logits/chosen": -0.7285705804824829, "logits/rejected": 0.5277493000030518, "logps/chosen": -507.9571228027344, "logps/rejected": -551.97607421875, "loss": 0.58, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2432968616485596, "rewards/margins": 0.8464199304580688, "rewards/rejected": -3.0897164344787598, "step": 6820 }, { "epoch": 0.89, "learning_rate": 1.6966997257449685e-07, "logits/chosen": -0.5658080577850342, "logits/rejected": 0.22322329878807068, "logps/chosen": -476.03692626953125, "logps/rejected": -543.3971557617188, "loss": 0.526, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0512733459472656, "rewards/margins": 0.8895019292831421, "rewards/rejected": -2.9407753944396973, "step": 6830 }, { "epoch": 0.9, "learning_rate": 1.6555807771809375e-07, "logits/chosen": -0.6819769144058228, "logits/rejected": 0.3543395698070526, "logps/chosen": -454.736328125, "logps/rejected": -513.91650390625, "loss": 0.4701, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9653873443603516, "rewards/margins": 1.1591241359710693, "rewards/rejected": -3.124511241912842, "step": 6840 }, { "epoch": 0.9, "learning_rate": 1.6149491442318617e-07, "logits/chosen": -0.37542515993118286, "logits/rejected": -0.02834094688296318, "logps/chosen": -455.55657958984375, "logps/rejected": -529.7952880859375, "loss": 0.5826, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.912413239479065, "rewards/margins": 0.8809484243392944, "rewards/rejected": -2.7933619022369385, "step": 6850 }, { "epoch": 0.9, "learning_rate": 1.5748056750845786e-07, "logits/chosen": -0.7669013738632202, "logits/rejected": 0.44335660338401794, "logps/chosen": -499.58685302734375, "logps/rejected": -512.37353515625, "loss": 0.5071, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.2117278575897217, "rewards/margins": 0.8127554655075073, "rewards/rejected": -3.0244839191436768, "step": 6860 }, { "epoch": 0.9, "learning_rate": 1.5351512077355024e-07, "logits/chosen": -0.5717117786407471, "logits/rejected": 0.31644728779792786, "logps/chosen": -493.24652099609375, "logps/rejected": -646.7489624023438, "loss": 0.4034, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9442113637924194, "rewards/margins": 1.276631236076355, "rewards/rejected": -3.2208428382873535, "step": 6870 }, { "epoch": 0.9, "learning_rate": 1.4959865699730902e-07, "logits/chosen": -0.39092275500297546, "logits/rejected": 0.6129297614097595, "logps/chosen": -439.7764587402344, "logps/rejected": -524.3287353515625, "loss": 0.4971, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0712218284606934, "rewards/margins": 1.1431468725204468, "rewards/rejected": -3.214369297027588, "step": 6880 }, { "epoch": 0.9, "learning_rate": 1.4573125793606202e-07, "logits/chosen": -0.4366453289985657, "logits/rejected": 0.452403724193573, "logps/chosen": -431.4530334472656, "logps/rejected": -524.9496459960938, "loss": 0.5224, "rewards/accuracies": 0.75, "rewards/chosen": -2.097776174545288, "rewards/margins": 1.0594654083251953, "rewards/rejected": -3.1572415828704834, "step": 6890 }, { "epoch": 0.9, "learning_rate": 1.4191300432190634e-07, "logits/chosen": -0.45784568786621094, "logits/rejected": 0.9001695513725281, "logps/chosen": -504.549072265625, "logps/rejected": -562.7247314453125, "loss": 0.5763, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.3490102291107178, "rewards/margins": 0.8059114217758179, "rewards/rejected": -3.154921770095825, "step": 6900 }, { "epoch": 0.9, "eval_logits/chosen": 1.202736258506775, "eval_logits/rejected": 1.9579252004623413, "eval_logps/chosen": -469.47125244140625, "eval_logps/rejected": -550.4862670898438, "eval_loss": 0.5060390830039978, "eval_rewards/accuracies": 0.7329999804496765, "eval_rewards/chosen": -2.009812116622925, "eval_rewards/margins": 1.009238600730896, "eval_rewards/rejected": -3.0190508365631104, "eval_runtime": 1171.5664, "eval_samples_per_second": 1.707, "eval_steps_per_second": 0.854, "step": 6900 }, { "epoch": 0.9, "learning_rate": 1.381439758610284e-07, "logits/chosen": -0.6321390271186829, "logits/rejected": 0.20315834879875183, "logps/chosen": -462.337158203125, "logps/rejected": -521.3242797851562, "loss": 0.5273, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.02864408493042, "rewards/margins": 0.7589557766914368, "rewards/rejected": -2.787600040435791, "step": 6910 }, { "epoch": 0.91, "learning_rate": 1.3442425123203596e-07, "logits/chosen": -0.7073062658309937, "logits/rejected": 0.02772808074951172, "logps/chosen": -461.3926696777344, "logps/rejected": -560.2494506835938, "loss": 0.5085, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.046745538711548, "rewards/margins": 1.014560580253601, "rewards/rejected": -3.0613059997558594, "step": 6920 }, { "epoch": 0.91, "learning_rate": 1.3075390808431897e-07, "logits/chosen": -0.37788501381874084, "logits/rejected": 0.6019797325134277, "logps/chosen": -435.75726318359375, "logps/rejected": -511.6985778808594, "loss": 0.4686, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9663892984390259, "rewards/margins": 1.0337203741073608, "rewards/rejected": -3.0001096725463867, "step": 6930 }, { "epoch": 0.91, "learning_rate": 1.271330230364262e-07, "logits/chosen": -0.14386829733848572, "logits/rejected": 0.21935054659843445, "logps/chosen": -460.65582275390625, "logps/rejected": -620.8492431640625, "loss": 0.4858, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.145711898803711, "rewards/margins": 1.1036556959152222, "rewards/rejected": -3.2493674755096436, "step": 6940 }, { "epoch": 0.91, "learning_rate": 1.2356167167446698e-07, "logits/chosen": -0.04140012338757515, "logits/rejected": 0.4257062077522278, "logps/chosen": -467.8836975097656, "logps/rejected": -582.3963012695312, "loss": 0.5375, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.3314528465270996, "rewards/margins": 1.0109634399414062, "rewards/rejected": -3.342416286468506, "step": 6950 }, { "epoch": 0.91, "learning_rate": 1.2003992855053326e-07, "logits/chosen": -0.09045709669589996, "logits/rejected": 0.434389591217041, "logps/chosen": -432.42889404296875, "logps/rejected": -556.8145751953125, "loss": 0.5105, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9629653692245483, "rewards/margins": 1.2737910747528076, "rewards/rejected": -3.2367560863494873, "step": 6960 }, { "epoch": 0.91, "learning_rate": 1.1656786718114239e-07, "logits/chosen": -0.14372889697551727, "logits/rejected": 0.17852702736854553, "logps/chosen": -464.07659912109375, "logps/rejected": -544.1351318359375, "loss": 0.5259, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1138415336608887, "rewards/margins": 0.9006859064102173, "rewards/rejected": -3.0145275592803955, "step": 6970 }, { "epoch": 0.91, "learning_rate": 1.1314556004570487e-07, "logits/chosen": -0.35672527551651, "logits/rejected": -0.19655278325080872, "logps/chosen": -405.0440979003906, "logps/rejected": -522.1256713867188, "loss": 0.5576, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9034448862075806, "rewards/margins": 0.9194092750549316, "rewards/rejected": -2.8228540420532227, "step": 6980 }, { "epoch": 0.91, "learning_rate": 1.0977307858500818e-07, "logits/chosen": -0.7549588084220886, "logits/rejected": 0.3601745367050171, "logps/chosen": -438.78955078125, "logps/rejected": -511.4344787597656, "loss": 0.4611, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.8593467473983765, "rewards/margins": 1.0313808917999268, "rewards/rejected": -2.8907275199890137, "step": 6990 }, { "epoch": 0.92, "learning_rate": 1.0645049319972789e-07, "logits/chosen": -0.11657045036554337, "logits/rejected": 0.3493362367153168, "logps/chosen": -482.05938720703125, "logps/rejected": -546.6885375976562, "loss": 0.5062, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1746017932891846, "rewards/margins": 1.1042107343673706, "rewards/rejected": -3.278812885284424, "step": 7000 }, { "epoch": 0.92, "eval_logits/chosen": 1.2018110752105713, "eval_logits/rejected": 1.957441806793213, "eval_logps/chosen": -468.7945556640625, "eval_logps/rejected": -549.6513671875, "eval_loss": 0.5059376955032349, "eval_rewards/accuracies": 0.7319999933242798, "eval_rewards/chosen": -2.0030453205108643, "eval_rewards/margins": 1.0076566934585571, "eval_rewards/rejected": -3.010701894760132, "eval_runtime": 1176.4736, "eval_samples_per_second": 1.7, "eval_steps_per_second": 0.85, "step": 7000 }, { "epoch": 0.92, "learning_rate": 1.0317787324895634e-07, "logits/chosen": -0.35694876313209534, "logits/rejected": 0.610944926738739, "logps/chosen": -489.6722717285156, "logps/rejected": -578.5293579101562, "loss": 0.3914, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.016587495803833, "rewards/margins": 1.268135666847229, "rewards/rejected": -3.2847228050231934, "step": 7010 }, { "epoch": 0.92, "learning_rate": 9.995528704875635e-08, "logits/chosen": -0.07247890532016754, "logits/rejected": -0.2989567816257477, "logps/chosen": -438.46728515625, "logps/rejected": -547.0955810546875, "loss": 0.5225, "rewards/accuracies": 0.75, "rewards/chosen": -2.047827959060669, "rewards/margins": 0.896246075630188, "rewards/rejected": -2.9440741539001465, "step": 7020 }, { "epoch": 0.92, "learning_rate": 9.678280187073452e-08, "logits/chosen": -0.27913758158683777, "logits/rejected": 0.5736603140830994, "logps/chosen": -463.31243896484375, "logps/rejected": -556.4307861328125, "loss": 0.3972, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.7716821432113647, "rewards/margins": 1.4218103885650635, "rewards/rejected": -3.1934924125671387, "step": 7030 }, { "epoch": 0.92, "learning_rate": 9.366048394063549e-08, "logits/chosen": -0.41250452399253845, "logits/rejected": -0.0662340372800827, "logps/chosen": -456.4169006347656, "logps/rejected": -561.72021484375, "loss": 0.5114, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.850750207901001, "rewards/margins": 0.9853949546813965, "rewards/rejected": -2.8361451625823975, "step": 7040 }, { "epoch": 0.92, "learning_rate": 9.058839843696237e-08, "logits/chosen": -0.4489797055721283, "logits/rejected": 0.25591760873794556, "logps/chosen": -483.81707763671875, "logps/rejected": -559.30908203125, "loss": 0.4706, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0449726581573486, "rewards/margins": 1.0600306987762451, "rewards/rejected": -3.1050033569335938, "step": 7050 }, { "epoch": 0.92, "learning_rate": 8.756660948961299e-08, "logits/chosen": -0.4514777660369873, "logits/rejected": -0.20608548820018768, "logps/chosen": -442.100341796875, "logps/rejected": -545.4368286132812, "loss": 0.5293, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.016775608062744, "rewards/margins": 0.8498009443283081, "rewards/rejected": -2.866576671600342, "step": 7060 }, { "epoch": 0.93, "learning_rate": 8.459518017854412e-08, "logits/chosen": -0.6188634634017944, "logits/rejected": 0.048554904758930206, "logps/chosen": -459.34503173828125, "logps/rejected": -503.36541748046875, "loss": 0.5722, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.9700435400009155, "rewards/margins": 0.6844844818115234, "rewards/rejected": -2.6545281410217285, "step": 7070 }, { "epoch": 0.93, "learning_rate": 8.167417253245213e-08, "logits/chosen": -0.7391661405563354, "logits/rejected": 0.666763424873352, "logps/chosen": -460.7491760253906, "logps/rejected": -533.9959716796875, "loss": 0.5173, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.060835123062134, "rewards/margins": 0.9698716998100281, "rewards/rejected": -3.0307066440582275, "step": 7080 }, { "epoch": 0.93, "learning_rate": 7.880364752747948e-08, "logits/chosen": -0.42400145530700684, "logits/rejected": -0.08642569929361343, "logps/chosen": -453.1158142089844, "logps/rejected": -535.9979248046875, "loss": 0.5474, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1991636753082275, "rewards/margins": 0.8412439227104187, "rewards/rejected": -3.04040789604187, "step": 7090 }, { "epoch": 0.93, "learning_rate": 7.598366508594245e-08, "logits/chosen": -0.19622935354709625, "logits/rejected": -0.13577821850776672, "logps/chosen": -504.1654357910156, "logps/rejected": -597.3096313476562, "loss": 0.4432, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.007800340652466, "rewards/margins": 1.198746919631958, "rewards/rejected": -3.2065467834472656, "step": 7100 }, { "epoch": 0.93, "eval_logits/chosen": 1.2115496397018433, "eval_logits/rejected": 1.9674791097640991, "eval_logps/chosen": -469.8140869140625, "eval_logps/rejected": -550.7593994140625, "eval_loss": 0.5058996081352234, "eval_rewards/accuracies": 0.7329999804496765, "eval_rewards/chosen": -2.0132408142089844, "eval_rewards/margins": 1.0085415840148926, "eval_rewards/rejected": -3.021782398223877, "eval_runtime": 1186.4135, "eval_samples_per_second": 1.686, "eval_steps_per_second": 0.843, "step": 7100 }, { "epoch": 0.93, "learning_rate": 7.32142840750788e-08, "logits/chosen": -0.5224785804748535, "logits/rejected": 0.3103681802749634, "logps/chosen": -489.0027770996094, "logps/rejected": -570.6793212890625, "loss": 0.4273, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8569695949554443, "rewards/margins": 1.2805449962615967, "rewards/rejected": -3.137514591217041, "step": 7110 }, { "epoch": 0.93, "learning_rate": 7.049556230581872e-08, "logits/chosen": -0.19431808590888977, "logits/rejected": 0.6167198419570923, "logps/chosen": -450.33013916015625, "logps/rejected": -534.5645751953125, "loss": 0.5363, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.142000675201416, "rewards/margins": 1.0167112350463867, "rewards/rejected": -3.1587119102478027, "step": 7120 }, { "epoch": 0.93, "learning_rate": 6.782755653158085e-08, "logits/chosen": -0.4115025997161865, "logits/rejected": 0.14585857093334198, "logps/chosen": -475.48480224609375, "logps/rejected": -537.6339111328125, "loss": 0.517, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.024827480316162, "rewards/margins": 0.8768397569656372, "rewards/rejected": -2.901667356491089, "step": 7130 }, { "epoch": 0.93, "learning_rate": 6.521032244708375e-08, "logits/chosen": -0.2687646448612213, "logits/rejected": 0.2106965035200119, "logps/chosen": -456.90948486328125, "logps/rejected": -554.6942138671875, "loss": 0.5336, "rewards/accuracies": 0.75, "rewards/chosen": -1.990630865097046, "rewards/margins": 0.9876958727836609, "rewards/rejected": -2.9783265590667725, "step": 7140 }, { "epoch": 0.94, "learning_rate": 6.264391468718628e-08, "logits/chosen": -0.7374650239944458, "logits/rejected": 0.049775220453739166, "logps/chosen": -459.86181640625, "logps/rejected": -549.9932250976562, "loss": 0.4637, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.8901011943817139, "rewards/margins": 1.0680241584777832, "rewards/rejected": -2.958125591278076, "step": 7150 }, { "epoch": 0.94, "learning_rate": 6.012838682574462e-08, "logits/chosen": -0.6180638074874878, "logits/rejected": 0.6540043950080872, "logps/chosen": -475.832763671875, "logps/rejected": -497.9046325683594, "loss": 0.5072, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0510411262512207, "rewards/margins": 0.8002778887748718, "rewards/rejected": -2.851318836212158, "step": 7160 }, { "epoch": 0.94, "learning_rate": 5.766379137449624e-08, "logits/chosen": -0.36336010694503784, "logits/rejected": 0.0748773068189621, "logps/chosen": -423.7010192871094, "logps/rejected": -554.7673950195312, "loss": 0.4622, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9527654647827148, "rewards/margins": 1.0933210849761963, "rewards/rejected": -3.0460867881774902, "step": 7170 }, { "epoch": 0.94, "learning_rate": 5.525017978196295e-08, "logits/chosen": -0.5020288825035095, "logits/rejected": 0.6828367114067078, "logps/chosen": -488.1974182128906, "logps/rejected": -566.2771606445312, "loss": 0.4981, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.0206267833709717, "rewards/margins": 1.190063714981079, "rewards/rejected": -3.21069073677063, "step": 7180 }, { "epoch": 0.94, "learning_rate": 5.288760243237545e-08, "logits/chosen": -0.7169966697692871, "logits/rejected": 0.34568727016448975, "logps/chosen": -526.5534057617188, "logps/rejected": -571.4498291015625, "loss": 0.5102, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.1718175411224365, "rewards/margins": 0.957715630531311, "rewards/rejected": -3.129533052444458, "step": 7190 }, { "epoch": 0.94, "learning_rate": 5.0576108644623536e-08, "logits/chosen": -0.5569435954093933, "logits/rejected": 0.4495469927787781, "logps/chosen": -521.062255859375, "logps/rejected": -563.0753173828125, "loss": 0.5294, "rewards/accuracies": 0.75, "rewards/chosen": -2.2122786045074463, "rewards/margins": 1.0275580883026123, "rewards/rejected": -3.2398364543914795, "step": 7200 }, { "epoch": 0.94, "eval_logits/chosen": 1.2122623920440674, "eval_logits/rejected": 1.967880129814148, "eval_logps/chosen": -469.9013671875, "eval_logps/rejected": -550.8819580078125, "eval_loss": 0.5059316158294678, "eval_rewards/accuracies": 0.7315000295639038, "eval_rewards/chosen": -2.014113426208496, "eval_rewards/margins": 1.0088937282562256, "eval_rewards/rejected": -3.0230071544647217, "eval_runtime": 1186.2494, "eval_samples_per_second": 1.686, "eval_steps_per_second": 0.843, "step": 7200 }, { "epoch": 0.94, "learning_rate": 4.8315746671225296e-08, "logits/chosen": -0.5705938339233398, "logits/rejected": 0.5357488989830017, "logps/chosen": -488.63177490234375, "logps/rejected": -576.7957763671875, "loss": 0.4553, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.8494971990585327, "rewards/margins": 1.1614658832550049, "rewards/rejected": -3.0109634399414062, "step": 7210 }, { "epoch": 0.94, "learning_rate": 4.6106563697320695e-08, "logits/chosen": -0.6351941823959351, "logits/rejected": 0.735435962677002, "logps/chosen": -432.3277282714844, "logps/rejected": -521.2852783203125, "loss": 0.4985, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9638311862945557, "rewards/margins": 1.1676876544952393, "rewards/rejected": -3.131518840789795, "step": 7220 }, { "epoch": 0.95, "learning_rate": 4.394860583968624e-08, "logits/chosen": 0.21465528011322021, "logits/rejected": -0.21870703995227814, "logps/chosen": -392.111328125, "logps/rejected": -517.2110595703125, "loss": 0.5165, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.8918077945709229, "rewards/margins": 1.0091055631637573, "rewards/rejected": -2.9009132385253906, "step": 7230 }, { "epoch": 0.95, "learning_rate": 4.1841918145771874e-08, "logits/chosen": -0.24777980148792267, "logits/rejected": 0.0036219656467437744, "logps/chosen": -458.74249267578125, "logps/rejected": -550.5509033203125, "loss": 0.4325, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.8298200368881226, "rewards/margins": 1.063430905342102, "rewards/rejected": -2.8932509422302246, "step": 7240 }, { "epoch": 0.95, "learning_rate": 3.978654459276088e-08, "logits/chosen": -0.7501617670059204, "logits/rejected": 0.33591216802597046, "logps/chosen": -512.9222412109375, "logps/rejected": -573.7879028320312, "loss": 0.4945, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.9614841938018799, "rewards/margins": 1.2372934818267822, "rewards/rejected": -3.198777437210083, "step": 7250 }, { "epoch": 0.95, "learning_rate": 3.778252808665284e-08, "logits/chosen": -0.8246575593948364, "logits/rejected": 0.21241407096385956, "logps/chosen": -523.8837890625, "logps/rejected": -519.209716796875, "loss": 0.5693, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0833640098571777, "rewards/margins": 0.7381225824356079, "rewards/rejected": -2.821486711502075, "step": 7260 }, { "epoch": 0.95, "learning_rate": 3.5829910461366023e-08, "logits/chosen": 0.01369396410882473, "logits/rejected": 0.08776617050170898, "logps/chosen": -458.39581298828125, "logps/rejected": -524.14013671875, "loss": 0.6843, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1614108085632324, "rewards/margins": 0.7248591780662537, "rewards/rejected": -2.8862698078155518, "step": 7270 }, { "epoch": 0.95, "learning_rate": 3.39287324778656e-08, "logits/chosen": -0.7124800086021423, "logits/rejected": 0.3487294912338257, "logps/chosen": -543.2281494140625, "logps/rejected": -586.5289916992188, "loss": 0.6138, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2240188121795654, "rewards/margins": 0.852017879486084, "rewards/rejected": -3.0760366916656494, "step": 7280 }, { "epoch": 0.95, "learning_rate": 3.207903382331262e-08, "logits/chosen": -0.7476862072944641, "logits/rejected": 0.375562846660614, "logps/chosen": -478.54052734375, "logps/rejected": -548.2950439453125, "loss": 0.4735, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.7987592220306396, "rewards/margins": 1.1323232650756836, "rewards/rejected": -2.9310824871063232, "step": 7290 }, { "epoch": 0.96, "learning_rate": 3.028085311023443e-08, "logits/chosen": -0.5295546054840088, "logits/rejected": 0.3997403085231781, "logps/chosen": -470.9478454589844, "logps/rejected": -556.5192260742188, "loss": 0.4488, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.9478727579116821, "rewards/margins": 1.1733306646347046, "rewards/rejected": -3.1212034225463867, "step": 7300 }, { "epoch": 0.96, "eval_logits/chosen": 1.2130078077316284, "eval_logits/rejected": 1.9687855243682861, "eval_logps/chosen": -469.9289245605469, "eval_logps/rejected": -550.9682006835938, "eval_loss": 0.5058298707008362, "eval_rewards/accuracies": 0.7319999933242798, "eval_rewards/chosen": -2.0143890380859375, "eval_rewards/margins": 1.00948166847229, "eval_rewards/rejected": -3.0238709449768066, "eval_runtime": 1188.4057, "eval_samples_per_second": 1.683, "eval_steps_per_second": 0.841, "step": 7300 }, { "epoch": 0.96, "learning_rate": 2.8534227875720576e-08, "logits/chosen": -0.3639126420021057, "logits/rejected": -0.1453981250524521, "logps/chosen": -465.8466796875, "logps/rejected": -564.0140991210938, "loss": 0.5208, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.014688014984131, "rewards/margins": 1.0618035793304443, "rewards/rejected": -3.0764918327331543, "step": 7310 }, { "epoch": 0.96, "learning_rate": 2.683919458063705e-08, "logits/chosen": -0.6341809630393982, "logits/rejected": 0.7493041753768921, "logps/chosen": -391.2576599121094, "logps/rejected": -444.69024658203125, "loss": 0.5043, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.8518688678741455, "rewards/margins": 0.9767493009567261, "rewards/rejected": -2.828618288040161, "step": 7320 }, { "epoch": 0.96, "learning_rate": 2.5195788608866345e-08, "logits/chosen": -0.28447026014328003, "logits/rejected": 0.6122376322746277, "logps/chosen": -529.1468505859375, "logps/rejected": -554.7265625, "loss": 0.5484, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.015683889389038, "rewards/margins": 0.9786075353622437, "rewards/rejected": -2.994291305541992, "step": 7330 }, { "epoch": 0.96, "learning_rate": 2.3604044266569426e-08, "logits/chosen": -0.8208459615707397, "logits/rejected": 0.9798334836959839, "logps/chosen": -484.0049743652344, "logps/rejected": -558.9443969726562, "loss": 0.4936, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.064974546432495, "rewards/margins": 1.0787862539291382, "rewards/rejected": -3.1437606811523438, "step": 7340 }, { "epoch": 0.96, "learning_rate": 2.2063994781468256e-08, "logits/chosen": -0.2842608094215393, "logits/rejected": 0.27055567502975464, "logps/chosen": -452.8561096191406, "logps/rejected": -518.8817749023438, "loss": 0.5172, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.8908157348632812, "rewards/margins": 1.0217288732528687, "rewards/rejected": -2.9125447273254395, "step": 7350 }, { "epoch": 0.96, "learning_rate": 2.057567230215246e-08, "logits/chosen": -0.3838343322277069, "logits/rejected": -0.392334908246994, "logps/chosen": -468.7618103027344, "logps/rejected": -558.9362182617188, "loss": 0.5131, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0254180431365967, "rewards/margins": 0.84428870677948, "rewards/rejected": -2.869706630706787, "step": 7360 }, { "epoch": 0.96, "learning_rate": 1.9139107897409303e-08, "logits/chosen": -0.3951881229877472, "logits/rejected": 0.7453802824020386, "logps/chosen": -481.59222412109375, "logps/rejected": -544.9617919921875, "loss": 0.4279, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9218416213989258, "rewards/margins": 1.2567031383514404, "rewards/rejected": -3.178544521331787, "step": 7370 }, { "epoch": 0.97, "learning_rate": 1.7754331555573656e-08, "logits/chosen": -0.5559927821159363, "logits/rejected": -0.07539238035678864, "logps/chosen": -494.11053466796875, "logps/rejected": -615.8850708007812, "loss": 0.4835, "rewards/accuracies": 0.75, "rewards/chosen": -2.090358018875122, "rewards/margins": 0.9464312791824341, "rewards/rejected": -3.0367894172668457, "step": 7380 }, { "epoch": 0.97, "learning_rate": 1.642137218390294e-08, "logits/chosen": -0.41712522506713867, "logits/rejected": 0.7344776391983032, "logps/chosen": -474.60247802734375, "logps/rejected": -519.6890869140625, "loss": 0.5292, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9687159061431885, "rewards/margins": 0.937299370765686, "rewards/rejected": -2.906015396118164, "step": 7390 }, { "epoch": 0.97, "learning_rate": 1.514025760797344e-08, "logits/chosen": -0.8390815854072571, "logits/rejected": 0.6691681742668152, "logps/chosen": -529.1636962890625, "logps/rejected": -570.968017578125, "loss": 0.4747, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.024813175201416, "rewards/margins": 1.0949513912200928, "rewards/rejected": -3.119764804840088, "step": 7400 }, { "epoch": 0.97, "eval_logits/chosen": 1.2122125625610352, "eval_logits/rejected": 1.9678871631622314, "eval_logps/chosen": -469.9052429199219, "eval_logps/rejected": -550.9177856445312, "eval_loss": 0.5057068467140198, "eval_rewards/accuracies": 0.7325000166893005, "eval_rewards/chosen": -2.0141522884368896, "eval_rewards/margins": 1.0092144012451172, "eval_rewards/rejected": -3.023366689682007, "eval_runtime": 1179.1327, "eval_samples_per_second": 1.696, "eval_steps_per_second": 0.848, "step": 7400 }, { "epoch": 0.97, "learning_rate": 1.3911014571098835e-08, "logits/chosen": -0.4136602282524109, "logits/rejected": -0.011446630582213402, "logps/chosen": -441.872314453125, "logps/rejected": -560.9342041015625, "loss": 0.4565, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9357706308364868, "rewards/margins": 1.1018954515457153, "rewards/rejected": -3.037665843963623, "step": 7410 }, { "epoch": 0.97, "learning_rate": 1.2733668733773685e-08, "logits/chosen": -0.6516170501708984, "logits/rejected": 0.346381813287735, "logps/chosen": -461.1044006347656, "logps/rejected": -534.1959228515625, "loss": 0.4793, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.893280029296875, "rewards/margins": 1.116193175315857, "rewards/rejected": -3.0094730854034424, "step": 7420 }, { "epoch": 0.97, "learning_rate": 1.160824467313526e-08, "logits/chosen": -0.6387825608253479, "logits/rejected": 0.31129929423332214, "logps/chosen": -525.2669067382812, "logps/rejected": -617.6837158203125, "loss": 0.4894, "rewards/accuracies": 0.75, "rewards/chosen": -2.132877826690674, "rewards/margins": 1.1318700313568115, "rewards/rejected": -3.2647480964660645, "step": 7430 }, { "epoch": 0.97, "learning_rate": 1.0534765882453113e-08, "logits/chosen": -0.9205204248428345, "logits/rejected": 0.434356689453125, "logps/chosen": -449.462890625, "logps/rejected": -524.9171752929688, "loss": 0.5114, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.9196815490722656, "rewards/margins": 0.9198969602584839, "rewards/rejected": -2.83957839012146, "step": 7440 }, { "epoch": 0.97, "learning_rate": 9.513254770636138e-09, "logits/chosen": -0.3660666048526764, "logits/rejected": 0.5724458694458008, "logps/chosen": -530.5885009765625, "logps/rejected": -592.3338623046875, "loss": 0.5655, "rewards/accuracies": 0.6875, "rewards/chosen": -2.310126543045044, "rewards/margins": 0.7468729019165039, "rewards/rejected": -3.056999444961548, "step": 7450 }, { "epoch": 0.98, "learning_rate": 8.543732661767113e-09, "logits/chosen": -0.14984729886054993, "logits/rejected": 0.1948297917842865, "logps/chosen": -473.29302978515625, "logps/rejected": -562.1162109375, "loss": 0.5471, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9029672145843506, "rewards/margins": 0.876223087310791, "rewards/rejected": -2.7791905403137207, "step": 7460 }, { "epoch": 0.98, "learning_rate": 7.626219794655553e-09, "logits/chosen": -0.7110680937767029, "logits/rejected": 0.13748976588249207, "logps/chosen": -441.283203125, "logps/rejected": -585.7564086914062, "loss": 0.4372, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.9341871738433838, "rewards/margins": 1.3025288581848145, "rewards/rejected": -3.236715793609619, "step": 7470 }, { "epoch": 0.98, "learning_rate": 6.7607353224163896e-09, "logits/chosen": -0.5482068657875061, "logits/rejected": 0.41012755036354065, "logps/chosen": -485.97540283203125, "logps/rejected": -520.5703125, "loss": 0.5626, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1319339275360107, "rewards/margins": 0.7751830220222473, "rewards/rejected": -2.907116413116455, "step": 7480 }, { "epoch": 0.98, "learning_rate": 5.947297312070554e-09, "logits/chosen": -0.6903416514396667, "logits/rejected": 0.9713506698608398, "logps/chosen": -506.7986755371094, "logps/rejected": -553.6888427734375, "loss": 0.4687, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.060804843902588, "rewards/margins": 1.1317999362945557, "rewards/rejected": -3.1926045417785645, "step": 7490 }, { "epoch": 0.98, "learning_rate": 5.185922744166128e-09, "logits/chosen": -0.4292621612548828, "logits/rejected": 0.4434017539024353, "logps/chosen": -476.03125, "logps/rejected": -570.34375, "loss": 0.4494, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9747473001480103, "rewards/margins": 1.1069953441619873, "rewards/rejected": -3.081742525100708, "step": 7500 }, { "epoch": 0.98, "eval_logits/chosen": 1.212050437927246, "eval_logits/rejected": 1.967947244644165, "eval_logps/chosen": -469.93450927734375, "eval_logps/rejected": -550.9584350585938, "eval_loss": 0.5057631134986877, "eval_rewards/accuracies": 0.7350000143051147, "eval_rewards/chosen": -2.0144448280334473, "eval_rewards/margins": 1.0093281269073486, "eval_rewards/rejected": -3.023772954940796, "eval_runtime": 1173.1475, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.852, "step": 7500 }, { "epoch": 0.98, "learning_rate": 4.476627512425558e-09, "logits/chosen": -0.2599974274635315, "logits/rejected": -0.058055657893419266, "logps/chosen": -464.0921936035156, "logps/rejected": -562.7515869140625, "loss": 0.4728, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9272934198379517, "rewards/margins": 1.0648444890975952, "rewards/rejected": -2.992137908935547, "step": 7510 }, { "epoch": 0.98, "learning_rate": 3.819426423412875e-09, "logits/chosen": -0.6782014966011047, "logits/rejected": 0.14074628055095673, "logps/chosen": -494.22979736328125, "logps/rejected": -555.0218505859375, "loss": 0.6022, "rewards/accuracies": 0.75, "rewards/chosen": -2.09846830368042, "rewards/margins": 0.9207183122634888, "rewards/rejected": -3.0191867351531982, "step": 7520 }, { "epoch": 0.99, "learning_rate": 3.2143331962256053e-09, "logits/chosen": -0.24794983863830566, "logits/rejected": 0.10213349014520645, "logps/chosen": -464.79620361328125, "logps/rejected": -570.9036865234375, "loss": 0.4865, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.8833863735198975, "rewards/margins": 1.067216157913208, "rewards/rejected": -2.9506025314331055, "step": 7530 }, { "epoch": 0.99, "learning_rate": 2.6613604622066635e-09, "logits/chosen": -0.35508936643600464, "logits/rejected": -0.39137864112854004, "logps/chosen": -444.24786376953125, "logps/rejected": -548.8897705078125, "loss": 0.5311, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.8555755615234375, "rewards/margins": 0.892242431640625, "rewards/rejected": -2.7478179931640625, "step": 7540 }, { "epoch": 0.99, "learning_rate": 2.1605197646826228e-09, "logits/chosen": -0.4565364718437195, "logits/rejected": 0.6792299151420593, "logps/chosen": -427.64044189453125, "logps/rejected": -537.2177734375, "loss": 0.4213, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -1.849523901939392, "rewards/margins": 1.3361475467681885, "rewards/rejected": -3.185671806335449, "step": 7550 }, { "epoch": 0.99, "learning_rate": 1.711821558721405e-09, "logits/chosen": -0.9418695569038391, "logits/rejected": 0.49443039298057556, "logps/chosen": -510.28033447265625, "logps/rejected": -537.0841674804688, "loss": 0.4911, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.0567002296447754, "rewards/margins": 0.8696410059928894, "rewards/rejected": -2.9263408184051514, "step": 7560 }, { "epoch": 0.99, "learning_rate": 1.3152752109149569e-09, "logits/chosen": -0.4310362935066223, "logits/rejected": 0.2431652992963791, "logps/chosen": -481.66925048828125, "logps/rejected": -550.9615478515625, "loss": 0.5462, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9845142364501953, "rewards/margins": 0.914382815361023, "rewards/rejected": -2.898897171020508, "step": 7570 }, { "epoch": 0.99, "learning_rate": 9.708889991830173e-10, "logits/chosen": -0.8171736001968384, "logits/rejected": 0.5985888242721558, "logps/chosen": -484.5865783691406, "logps/rejected": -540.4600219726562, "loss": 0.4686, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.115016460418701, "rewards/margins": 1.2058589458465576, "rewards/rejected": -3.3208751678466797, "step": 7580 }, { "epoch": 0.99, "learning_rate": 6.786701125999218e-10, "logits/chosen": 0.09515878558158875, "logits/rejected": 0.33796870708465576, "logps/chosen": -481.95538330078125, "logps/rejected": -555.1029052734375, "loss": 0.572, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.285205364227295, "rewards/margins": 0.8154329061508179, "rewards/rejected": -3.100637912750244, "step": 7590 }, { "epoch": 0.99, "learning_rate": 4.3862465124638873e-10, "logits/chosen": -0.09601716697216034, "logits/rejected": -0.01322057843208313, "logps/chosen": -466.84674072265625, "logps/rejected": -536.1382446289062, "loss": 0.5319, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0459887981414795, "rewards/margins": 0.8657206296920776, "rewards/rejected": -2.9117093086242676, "step": 7600 }, { "epoch": 0.99, "eval_logits/chosen": 1.212050437927246, "eval_logits/rejected": 1.967947244644165, "eval_logps/chosen": -469.93450927734375, "eval_logps/rejected": -550.9584350585938, "eval_loss": 0.5057631134986877, "eval_rewards/accuracies": 0.7350000143051147, "eval_rewards/chosen": -2.0144448280334473, "eval_rewards/margins": 1.0093281269073486, "eval_rewards/rejected": -3.023772954940796, "eval_runtime": 1169.363, "eval_samples_per_second": 1.71, "eval_steps_per_second": 0.855, "step": 7600 }, { "epoch": 1.0, "learning_rate": 2.507576260799005e-10, "logits/chosen": -0.826134979724884, "logits/rejected": 0.014235076494514942, "logps/chosen": -475.70306396484375, "logps/rejected": -575.2286376953125, "loss": 0.4462, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.839888334274292, "rewards/margins": 1.1129884719848633, "rewards/rejected": -2.952876567840576, "step": 7610 }, { "epoch": 1.0, "learning_rate": 1.1507295883145253e-10, "logits/chosen": -0.5861895680427551, "logits/rejected": 0.36244645714759827, "logps/chosen": -472.48651123046875, "logps/rejected": -567.5523681640625, "loss": 0.5132, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.945277214050293, "rewards/margins": 0.905789852142334, "rewards/rejected": -2.851067543029785, "step": 7620 }, { "epoch": 1.0, "learning_rate": 3.1573481923952156e-11, "logits/chosen": -0.40739935636520386, "logits/rejected": 0.2489284723997116, "logps/chosen": -516.2659912109375, "logps/rejected": -590.5760498046875, "loss": 0.5475, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.035932779312134, "rewards/margins": 1.092670202255249, "rewards/rejected": -3.1286027431488037, "step": 7630 }, { "epoch": 1.0, "learning_rate": 2.609384119889313e-13, "logits/chosen": -0.07999588549137115, "logits/rejected": 0.025267338380217552, "logps/chosen": -458.60418701171875, "logps/rejected": -581.3901977539062, "loss": 0.4471, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0217761993408203, "rewards/margins": 1.1485462188720703, "rewards/rejected": -3.1703224182128906, "step": 7640 }, { "epoch": 1.0, "step": 7641, "total_flos": 0.0, "train_loss": 0.0026284047499827543, "train_runtime": 361.3652, "train_samples_per_second": 169.178, "train_steps_per_second": 21.145 } ], "logging_steps": 10, "max_steps": 7641, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }