{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9992404101785035, "eval_steps": 500, "global_step": 1316, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 7.575757575757576e-09, "logits/chosen": -1.3807122707366943, "logits/rejected": -1.4181761741638184, "logps/chosen": -67.83101654052734, "logps/rejected": -72.67066955566406, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.5151515151515152e-08, "logits/chosen": -1.5273715257644653, "logits/rejected": -1.59983491897583, "logps/chosen": -63.50884246826172, "logps/rejected": -72.82698059082031, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 2 }, { "epoch": 0.0, "learning_rate": 2.2727272727272725e-08, "logits/chosen": -1.293489933013916, "logits/rejected": -1.282623529434204, "logps/chosen": -57.460914611816406, "logps/rejected": -81.09254455566406, "loss": 0.6937, "rewards/accuracies": 0.625, "rewards/chosen": -0.0016430141404271126, "rewards/margins": -0.0033949974458664656, "rewards/rejected": 0.0017519830726087093, "step": 3 }, { "epoch": 0.01, "learning_rate": 3.0303030303030305e-08, "logits/chosen": -1.381151556968689, "logits/rejected": -1.3468399047851562, "logps/chosen": -52.592010498046875, "logps/rejected": -66.39985656738281, "loss": 0.6944, "rewards/accuracies": 0.625, "rewards/chosen": -0.002350753638893366, "rewards/margins": -0.0003365101292729378, "rewards/rejected": -0.002014243509620428, "step": 4 }, { "epoch": 0.01, "learning_rate": 3.787878787878788e-08, "logits/chosen": -1.365487813949585, "logits/rejected": -1.4028127193450928, "logps/chosen": -48.654415130615234, "logps/rejected": -48.227760314941406, "loss": 0.694, "rewards/accuracies": 0.4375, "rewards/chosen": -0.004889738745987415, "rewards/margins": -0.00731813907623291, "rewards/rejected": 0.002428400330245495, "step": 5 }, { "epoch": 0.01, "learning_rate": 4.545454545454545e-08, "logits/chosen": -1.4787240028381348, "logits/rejected": -1.6369270086288452, "logps/chosen": -55.18429183959961, "logps/rejected": -50.92931365966797, "loss": 0.6947, "rewards/accuracies": 0.375, "rewards/chosen": -0.0007439616019837558, "rewards/margins": -0.0010132314637303352, "rewards/rejected": 0.0002692697453312576, "step": 6 }, { "epoch": 0.01, "learning_rate": 5.303030303030303e-08, "logits/chosen": -1.3732951879501343, "logits/rejected": -1.3683916330337524, "logps/chosen": -75.06265258789062, "logps/rejected": -93.20909118652344, "loss": 0.6926, "rewards/accuracies": 0.5625, "rewards/chosen": 2.2769207134842873e-05, "rewards/margins": 0.0028623221442103386, "rewards/rejected": -0.0028395531699061394, "step": 7 }, { "epoch": 0.01, "learning_rate": 6.060606060606061e-08, "logits/chosen": -1.5228297710418701, "logits/rejected": -1.6090095043182373, "logps/chosen": -63.288326263427734, "logps/rejected": -85.08869171142578, "loss": 0.6934, "rewards/accuracies": 0.625, "rewards/chosen": -0.004945838823914528, "rewards/margins": -6.294244667515159e-05, "rewards/rejected": -0.004882895387709141, "step": 8 }, { "epoch": 0.01, "learning_rate": 6.818181818181817e-08, "logits/chosen": -1.41131591796875, "logits/rejected": -1.5036777257919312, "logps/chosen": -66.2701644897461, "logps/rejected": -61.75237274169922, "loss": 0.6937, "rewards/accuracies": 0.625, "rewards/chosen": 0.009852385148406029, "rewards/margins": 0.008082007989287376, "rewards/rejected": 0.0017703771591186523, "step": 9 }, { "epoch": 0.02, "learning_rate": 7.575757575757576e-08, "logits/chosen": -1.3245943784713745, "logits/rejected": -1.297118067741394, "logps/chosen": -62.03919219970703, "logps/rejected": -86.58907318115234, "loss": 0.6927, "rewards/accuracies": 0.625, "rewards/chosen": 0.0011534811928868294, "rewards/margins": -0.004392742644995451, "rewards/rejected": 0.005546224303543568, "step": 10 }, { "epoch": 0.02, "learning_rate": 8.333333333333333e-08, "logits/chosen": -1.374742031097412, "logits/rejected": -1.3923227787017822, "logps/chosen": -63.650299072265625, "logps/rejected": -75.1951904296875, "loss": 0.6913, "rewards/accuracies": 0.375, "rewards/chosen": 1.7667189240455627e-05, "rewards/margins": -0.0065226079896092415, "rewards/rejected": 0.0065402742475271225, "step": 11 }, { "epoch": 0.02, "learning_rate": 9.09090909090909e-08, "logits/chosen": -1.3470630645751953, "logits/rejected": -1.3567157983779907, "logps/chosen": -58.94744873046875, "logps/rejected": -63.96583557128906, "loss": 0.6928, "rewards/accuracies": 0.375, "rewards/chosen": -0.005938088521361351, "rewards/margins": -0.006357109639793634, "rewards/rejected": 0.00041902053635567427, "step": 12 }, { "epoch": 0.02, "learning_rate": 9.848484848484848e-08, "logits/chosen": -1.3704334497451782, "logits/rejected": -1.4031784534454346, "logps/chosen": -64.4774169921875, "logps/rejected": -74.84626770019531, "loss": 0.6925, "rewards/accuracies": 0.375, "rewards/chosen": -0.0072515010833740234, "rewards/margins": -0.0038907527923583984, "rewards/rejected": -0.0033607487566769123, "step": 13 }, { "epoch": 0.02, "learning_rate": 1.0606060606060605e-07, "logits/chosen": -1.2817316055297852, "logits/rejected": -1.2899329662322998, "logps/chosen": -76.54801177978516, "logps/rejected": -71.5182113647461, "loss": 0.6941, "rewards/accuracies": 0.3125, "rewards/chosen": -0.005866062827408314, "rewards/margins": -0.009477593004703522, "rewards/rejected": 0.0036115292459726334, "step": 14 }, { "epoch": 0.02, "learning_rate": 1.1363636363636363e-07, "logits/chosen": -1.212456226348877, "logits/rejected": -1.2076374292373657, "logps/chosen": -44.21965789794922, "logps/rejected": -67.9637680053711, "loss": 0.6938, "rewards/accuracies": 0.375, "rewards/chosen": 0.0028303861618041992, "rewards/margins": -0.004396808333694935, "rewards/rejected": 0.007227194495499134, "step": 15 }, { "epoch": 0.02, "learning_rate": 1.2121212121212122e-07, "logits/chosen": -1.4582772254943848, "logits/rejected": -1.4182686805725098, "logps/chosen": -45.72659683227539, "logps/rejected": -59.42350769042969, "loss": 0.6925, "rewards/accuracies": 0.3125, "rewards/chosen": -0.0005197167047299445, "rewards/margins": -0.0064195385202765465, "rewards/rejected": 0.0058998228050768375, "step": 16 }, { "epoch": 0.03, "learning_rate": 1.2878787878787877e-07, "logits/chosen": -1.4755264520645142, "logits/rejected": -1.50910222530365, "logps/chosen": -58.174442291259766, "logps/rejected": -64.99958801269531, "loss": 0.6939, "rewards/accuracies": 0.375, "rewards/chosen": 0.0004933952586725354, "rewards/margins": -0.0005321386270225048, "rewards/rejected": 0.0010255335364490747, "step": 17 }, { "epoch": 0.03, "learning_rate": 1.3636363636363635e-07, "logits/chosen": -1.495653510093689, "logits/rejected": -1.5463732481002808, "logps/chosen": -47.88037872314453, "logps/rejected": -57.69328308105469, "loss": 0.6927, "rewards/accuracies": 0.5, "rewards/chosen": 0.0021576883736997843, "rewards/margins": -0.004288875963538885, "rewards/rejected": 0.006446564570069313, "step": 18 }, { "epoch": 0.03, "learning_rate": 1.4393939393939395e-07, "logits/chosen": -1.1813757419586182, "logits/rejected": -1.181291103363037, "logps/chosen": -45.04245376586914, "logps/rejected": -59.69392776489258, "loss": 0.6924, "rewards/accuracies": 0.625, "rewards/chosen": 0.0029491903260350227, "rewards/margins": 0.003839898156002164, "rewards/rejected": -0.0008907080627977848, "step": 19 }, { "epoch": 0.03, "learning_rate": 1.5151515151515152e-07, "logits/chosen": -1.2665643692016602, "logits/rejected": -1.3037891387939453, "logps/chosen": -55.89977264404297, "logps/rejected": -67.01914978027344, "loss": 0.6908, "rewards/accuracies": 0.625, "rewards/chosen": 0.01057122927159071, "rewards/margins": 0.008913875557482243, "rewards/rejected": 0.0016573548782616854, "step": 20 }, { "epoch": 0.03, "learning_rate": 1.5909090909090907e-07, "logits/chosen": -1.468496322631836, "logits/rejected": -1.4428430795669556, "logps/chosen": -60.451072692871094, "logps/rejected": -83.61201477050781, "loss": 0.6912, "rewards/accuracies": 0.5, "rewards/chosen": 0.01200780924409628, "rewards/margins": 0.004002022091299295, "rewards/rejected": 0.008005785755813122, "step": 21 }, { "epoch": 0.03, "learning_rate": 1.6666666666666665e-07, "logits/chosen": -1.2354681491851807, "logits/rejected": -1.279544711112976, "logps/chosen": -64.74099731445312, "logps/rejected": -64.46685791015625, "loss": 0.6939, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0025492431595921516, "rewards/margins": 9.844335727393627e-05, "rewards/rejected": 0.002450800035148859, "step": 22 }, { "epoch": 0.03, "learning_rate": 1.7424242424242425e-07, "logits/chosen": -1.4172277450561523, "logits/rejected": -1.4344230890274048, "logps/chosen": -65.05791473388672, "logps/rejected": -69.5328140258789, "loss": 0.6918, "rewards/accuracies": 0.4375, "rewards/chosen": 0.004820013418793678, "rewards/margins": 0.0006602166686207056, "rewards/rejected": 0.004159796051681042, "step": 23 }, { "epoch": 0.04, "learning_rate": 1.818181818181818e-07, "logits/chosen": -1.2062236070632935, "logits/rejected": -1.2450206279754639, "logps/chosen": -50.588783264160156, "logps/rejected": -53.00873565673828, "loss": 0.6918, "rewards/accuracies": 0.625, "rewards/chosen": 0.000991773558780551, "rewards/margins": 0.0025939582847058773, "rewards/rejected": -0.0016021848423406482, "step": 24 }, { "epoch": 0.04, "learning_rate": 1.8939393939393938e-07, "logits/chosen": -1.419810175895691, "logits/rejected": -1.4961423873901367, "logps/chosen": -64.14460754394531, "logps/rejected": -82.07677459716797, "loss": 0.692, "rewards/accuracies": 0.625, "rewards/chosen": 0.011395109817385674, "rewards/margins": 0.0041154976934194565, "rewards/rejected": 0.007279610726982355, "step": 25 }, { "epoch": 0.04, "learning_rate": 1.9696969696969696e-07, "logits/chosen": -1.3785347938537598, "logits/rejected": -1.447749376296997, "logps/chosen": -55.51921844482422, "logps/rejected": -60.01416778564453, "loss": 0.6921, "rewards/accuracies": 0.5625, "rewards/chosen": 0.012294232845306396, "rewards/margins": 0.0012017728295177221, "rewards/rejected": 0.01109245978295803, "step": 26 }, { "epoch": 0.04, "learning_rate": 2.0454545454545456e-07, "logits/chosen": -1.3943463563919067, "logits/rejected": -1.5082389116287231, "logps/chosen": -63.7392578125, "logps/rejected": -64.67830657958984, "loss": 0.6895, "rewards/accuracies": 0.5, "rewards/chosen": 0.015080487355589867, "rewards/margins": 0.004277646541595459, "rewards/rejected": 0.010802840813994408, "step": 27 }, { "epoch": 0.04, "learning_rate": 2.121212121212121e-07, "logits/chosen": -1.3473079204559326, "logits/rejected": -1.3881864547729492, "logps/chosen": -62.74684143066406, "logps/rejected": -65.70709228515625, "loss": 0.6891, "rewards/accuracies": 0.625, "rewards/chosen": 0.01610391214489937, "rewards/margins": 0.006725156679749489, "rewards/rejected": 0.00937875546514988, "step": 28 }, { "epoch": 0.04, "learning_rate": 2.1969696969696968e-07, "logits/chosen": -1.2148323059082031, "logits/rejected": -1.2324485778808594, "logps/chosen": -52.017539978027344, "logps/rejected": -69.51564025878906, "loss": 0.6887, "rewards/accuracies": 0.5625, "rewards/chosen": 0.011382735334336758, "rewards/margins": -0.00423963088542223, "rewards/rejected": 0.015622366219758987, "step": 29 }, { "epoch": 0.05, "learning_rate": 2.2727272727272726e-07, "logits/chosen": -1.3029567003250122, "logits/rejected": -1.3896591663360596, "logps/chosen": -63.54433059692383, "logps/rejected": -73.599365234375, "loss": 0.6866, "rewards/accuracies": 0.75, "rewards/chosen": 0.030646946281194687, "rewards/margins": 0.020649565383791924, "rewards/rejected": 0.009997379966080189, "step": 30 }, { "epoch": 0.05, "learning_rate": 2.3484848484848486e-07, "logits/chosen": -1.0819051265716553, "logits/rejected": -1.1202231645584106, "logps/chosen": -49.7504997253418, "logps/rejected": -48.87813949584961, "loss": 0.6898, "rewards/accuracies": 0.5625, "rewards/chosen": 0.003466451307758689, "rewards/margins": 0.0005712390411645174, "rewards/rejected": 0.002895212033763528, "step": 31 }, { "epoch": 0.05, "learning_rate": 2.4242424242424244e-07, "logits/chosen": -1.436554193496704, "logits/rejected": -1.4304763078689575, "logps/chosen": -66.21983337402344, "logps/rejected": -80.84318542480469, "loss": 0.686, "rewards/accuracies": 0.5625, "rewards/chosen": 0.04641828313469887, "rewards/margins": 0.012152241542935371, "rewards/rejected": 0.034266043454408646, "step": 32 }, { "epoch": 0.05, "learning_rate": 2.5e-07, "logits/chosen": -1.416285753250122, "logits/rejected": -1.388656497001648, "logps/chosen": -47.95892333984375, "logps/rejected": -57.33323287963867, "loss": 0.6873, "rewards/accuracies": 0.6875, "rewards/chosen": 0.021166132763028145, "rewards/margins": 0.012613797560334206, "rewards/rejected": 0.008552337065339088, "step": 33 }, { "epoch": 0.05, "learning_rate": 2.5757575757575754e-07, "logits/chosen": -1.4820764064788818, "logits/rejected": -1.4750490188598633, "logps/chosen": -57.51579284667969, "logps/rejected": -61.07271194458008, "loss": 0.6807, "rewards/accuracies": 0.8125, "rewards/chosen": 0.04040107876062393, "rewards/margins": 0.02762734889984131, "rewards/rejected": 0.012773728929460049, "step": 34 }, { "epoch": 0.05, "learning_rate": 2.6515151515151514e-07, "logits/chosen": -1.3491145372390747, "logits/rejected": -1.3379892110824585, "logps/chosen": -54.85727310180664, "logps/rejected": -59.90010070800781, "loss": 0.6894, "rewards/accuracies": 0.5625, "rewards/chosen": 0.030424263328313828, "rewards/margins": 0.008832884952425957, "rewards/rejected": 0.02159137651324272, "step": 35 }, { "epoch": 0.05, "learning_rate": 2.727272727272727e-07, "logits/chosen": -1.4148902893066406, "logits/rejected": -1.3810207843780518, "logps/chosen": -45.91991424560547, "logps/rejected": -46.03016662597656, "loss": 0.6874, "rewards/accuracies": 0.625, "rewards/chosen": 0.03214998543262482, "rewards/margins": 0.01864071935415268, "rewards/rejected": 0.013509261421859264, "step": 36 }, { "epoch": 0.06, "learning_rate": 2.8030303030303024e-07, "logits/chosen": -1.4646170139312744, "logits/rejected": -1.4692356586456299, "logps/chosen": -61.599639892578125, "logps/rejected": -69.2748794555664, "loss": 0.6861, "rewards/accuracies": 0.75, "rewards/chosen": 0.05117490142583847, "rewards/margins": 0.03509356081485748, "rewards/rejected": 0.016081344336271286, "step": 37 }, { "epoch": 0.06, "learning_rate": 2.878787878787879e-07, "logits/chosen": -1.5223915576934814, "logits/rejected": -1.633446216583252, "logps/chosen": -54.41788101196289, "logps/rejected": -71.25310516357422, "loss": 0.6828, "rewards/accuracies": 0.6875, "rewards/chosen": 0.054975785315036774, "rewards/margins": 0.015454876236617565, "rewards/rejected": 0.03952091187238693, "step": 38 }, { "epoch": 0.06, "learning_rate": 2.9545454545454545e-07, "logits/chosen": -1.29225754737854, "logits/rejected": -1.3325889110565186, "logps/chosen": -60.84377670288086, "logps/rejected": -72.92223358154297, "loss": 0.6801, "rewards/accuracies": 0.6875, "rewards/chosen": 0.06242671236395836, "rewards/margins": 0.02597186714410782, "rewards/rejected": 0.03645484521985054, "step": 39 }, { "epoch": 0.06, "learning_rate": 3.0303030303030305e-07, "logits/chosen": -1.478752851486206, "logits/rejected": -1.4469019174575806, "logps/chosen": -58.46867370605469, "logps/rejected": -72.66609191894531, "loss": 0.677, "rewards/accuracies": 0.625, "rewards/chosen": 0.08628889918327332, "rewards/margins": 0.0161750428378582, "rewards/rejected": 0.07011385262012482, "step": 40 }, { "epoch": 0.06, "learning_rate": 3.106060606060606e-07, "logits/chosen": -1.3904973268508911, "logits/rejected": -1.4015754461288452, "logps/chosen": -54.29767990112305, "logps/rejected": -68.59740447998047, "loss": 0.6778, "rewards/accuracies": 0.625, "rewards/chosen": 0.0754782184958458, "rewards/margins": 0.036283429712057114, "rewards/rejected": 0.03919479250907898, "step": 41 }, { "epoch": 0.06, "learning_rate": 3.1818181818181815e-07, "logits/chosen": -1.5571492910385132, "logits/rejected": -1.5671147108078003, "logps/chosen": -55.728492736816406, "logps/rejected": -67.99396514892578, "loss": 0.6832, "rewards/accuracies": 0.5, "rewards/chosen": 0.06205673888325691, "rewards/margins": -0.0022810909431427717, "rewards/rejected": 0.0643378272652626, "step": 42 }, { "epoch": 0.07, "learning_rate": 3.2575757575757575e-07, "logits/chosen": -1.3594117164611816, "logits/rejected": -1.3907921314239502, "logps/chosen": -58.898040771484375, "logps/rejected": -68.76325988769531, "loss": 0.6672, "rewards/accuracies": 0.75, "rewards/chosen": 0.07264361530542374, "rewards/margins": 0.025914786383509636, "rewards/rejected": 0.04672882333397865, "step": 43 }, { "epoch": 0.07, "learning_rate": 3.333333333333333e-07, "logits/chosen": -1.4709270000457764, "logits/rejected": -1.5028572082519531, "logps/chosen": -62.69089889526367, "logps/rejected": -77.4356918334961, "loss": 0.6712, "rewards/accuracies": 0.75, "rewards/chosen": 0.08955918252468109, "rewards/margins": 0.03507918864488602, "rewards/rejected": 0.054479993879795074, "step": 44 }, { "epoch": 0.07, "learning_rate": 3.4090909090909085e-07, "logits/chosen": -1.2753313779830933, "logits/rejected": -1.3156425952911377, "logps/chosen": -49.9275016784668, "logps/rejected": -63.02009582519531, "loss": 0.6723, "rewards/accuracies": 0.625, "rewards/chosen": 0.07214108854532242, "rewards/margins": 0.01590968668460846, "rewards/rejected": 0.05623140186071396, "step": 45 }, { "epoch": 0.07, "learning_rate": 3.484848484848485e-07, "logits/chosen": -1.5745067596435547, "logits/rejected": -1.5774452686309814, "logps/chosen": -67.59754943847656, "logps/rejected": -87.30855560302734, "loss": 0.6665, "rewards/accuracies": 0.6875, "rewards/chosen": 0.12661635875701904, "rewards/margins": 0.0753166526556015, "rewards/rejected": 0.051299698650836945, "step": 46 }, { "epoch": 0.07, "learning_rate": 3.5606060606060606e-07, "logits/chosen": -1.285461664199829, "logits/rejected": -1.3261513710021973, "logps/chosen": -44.92738342285156, "logps/rejected": -51.15544891357422, "loss": 0.6695, "rewards/accuracies": 0.75, "rewards/chosen": 0.08344868570566177, "rewards/margins": 0.04822618141770363, "rewards/rejected": 0.035222504287958145, "step": 47 }, { "epoch": 0.07, "learning_rate": 3.636363636363636e-07, "logits/chosen": -1.2779335975646973, "logits/rejected": -1.413496494293213, "logps/chosen": -55.0052490234375, "logps/rejected": -67.77876281738281, "loss": 0.6682, "rewards/accuracies": 0.6875, "rewards/chosen": 0.14483140408992767, "rewards/margins": 0.09184764325618744, "rewards/rejected": 0.052983760833740234, "step": 48 }, { "epoch": 0.07, "learning_rate": 3.712121212121212e-07, "logits/chosen": -1.3702609539031982, "logits/rejected": -1.4781056642532349, "logps/chosen": -77.20408630371094, "logps/rejected": -74.00489807128906, "loss": 0.6635, "rewards/accuracies": 0.875, "rewards/chosen": 0.14278045296669006, "rewards/margins": 0.061780668795108795, "rewards/rejected": 0.08099978417158127, "step": 49 }, { "epoch": 0.08, "learning_rate": 3.7878787878787876e-07, "logits/chosen": -1.3604373931884766, "logits/rejected": -1.378310203552246, "logps/chosen": -58.75284194946289, "logps/rejected": -82.84333038330078, "loss": 0.6589, "rewards/accuracies": 0.6875, "rewards/chosen": 0.19303418695926666, "rewards/margins": 0.09257876873016357, "rewards/rejected": 0.10045541822910309, "step": 50 }, { "epoch": 0.08, "learning_rate": 3.8636363636363636e-07, "logits/chosen": -1.3076860904693604, "logits/rejected": -1.4157476425170898, "logps/chosen": -47.252445220947266, "logps/rejected": -48.10454559326172, "loss": 0.6674, "rewards/accuracies": 0.8125, "rewards/chosen": 0.11059942096471786, "rewards/margins": 0.06811001151800156, "rewards/rejected": 0.04248940199613571, "step": 51 }, { "epoch": 0.08, "learning_rate": 3.939393939393939e-07, "logits/chosen": -1.503462791442871, "logits/rejected": -1.5425465106964111, "logps/chosen": -60.388336181640625, "logps/rejected": -70.0484619140625, "loss": 0.6614, "rewards/accuracies": 0.5625, "rewards/chosen": 0.16037140786647797, "rewards/margins": 0.019282342866063118, "rewards/rejected": 0.1410890817642212, "step": 52 }, { "epoch": 0.08, "learning_rate": 4.0151515151515146e-07, "logits/chosen": -1.3380749225616455, "logits/rejected": -1.3209658861160278, "logps/chosen": -52.98939514160156, "logps/rejected": -68.4948501586914, "loss": 0.6593, "rewards/accuracies": 0.5625, "rewards/chosen": 0.19923309981822968, "rewards/margins": 0.01286761462688446, "rewards/rejected": 0.18636548519134521, "step": 53 }, { "epoch": 0.08, "learning_rate": 4.090909090909091e-07, "logits/chosen": -1.282020092010498, "logits/rejected": -1.3592822551727295, "logps/chosen": -48.89101028442383, "logps/rejected": -62.19300079345703, "loss": 0.6654, "rewards/accuracies": 0.625, "rewards/chosen": 0.17862869799137115, "rewards/margins": 0.048022858798503876, "rewards/rejected": 0.13060584664344788, "step": 54 }, { "epoch": 0.08, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -1.3051087856292725, "logits/rejected": -1.3205571174621582, "logps/chosen": -59.4260368347168, "logps/rejected": -80.87752532958984, "loss": 0.6517, "rewards/accuracies": 0.625, "rewards/chosen": 0.21322180330753326, "rewards/margins": 0.056215398013591766, "rewards/rejected": 0.1570064127445221, "step": 55 }, { "epoch": 0.09, "learning_rate": 4.242424242424242e-07, "logits/chosen": -1.3351942300796509, "logits/rejected": -1.404450535774231, "logps/chosen": -59.53591537475586, "logps/rejected": -56.089290618896484, "loss": 0.6681, "rewards/accuracies": 0.5625, "rewards/chosen": 0.12059932202100754, "rewards/margins": 0.06928794831037521, "rewards/rejected": 0.051311373710632324, "step": 56 }, { "epoch": 0.09, "learning_rate": 4.318181818181818e-07, "logits/chosen": -1.3024204969406128, "logits/rejected": -1.389835238456726, "logps/chosen": -53.57666778564453, "logps/rejected": -51.41597366333008, "loss": 0.6459, "rewards/accuracies": 0.5, "rewards/chosen": 0.16042464971542358, "rewards/margins": 0.10563376545906067, "rewards/rejected": 0.05479089915752411, "step": 57 }, { "epoch": 0.09, "learning_rate": 4.3939393939393937e-07, "logits/chosen": -1.2929675579071045, "logits/rejected": -1.3011419773101807, "logps/chosen": -53.317405700683594, "logps/rejected": -68.7337875366211, "loss": 0.6455, "rewards/accuracies": 0.4375, "rewards/chosen": 0.1711544543504715, "rewards/margins": 0.006437983829528093, "rewards/rejected": 0.16471648216247559, "step": 58 }, { "epoch": 0.09, "learning_rate": 4.469696969696969e-07, "logits/chosen": -1.5048664808273315, "logits/rejected": -1.5094908475875854, "logps/chosen": -64.76486206054688, "logps/rejected": -78.0581283569336, "loss": 0.641, "rewards/accuracies": 0.625, "rewards/chosen": 0.24148711562156677, "rewards/margins": 0.10250819474458694, "rewards/rejected": 0.13897892832756042, "step": 59 }, { "epoch": 0.09, "learning_rate": 4.545454545454545e-07, "logits/chosen": -1.3591467142105103, "logits/rejected": -1.4223358631134033, "logps/chosen": -52.74089813232422, "logps/rejected": -76.95367431640625, "loss": 0.642, "rewards/accuracies": 0.6875, "rewards/chosen": 0.2069980502128601, "rewards/margins": 0.2083958089351654, "rewards/rejected": -0.001397751271724701, "step": 60 }, { "epoch": 0.09, "learning_rate": 4.6212121212121207e-07, "logits/chosen": -1.4500656127929688, "logits/rejected": -1.4579286575317383, "logps/chosen": -61.141475677490234, "logps/rejected": -67.77528381347656, "loss": 0.6357, "rewards/accuracies": 0.875, "rewards/chosen": 0.1464468240737915, "rewards/margins": 0.12135310471057892, "rewards/rejected": 0.02509371004998684, "step": 61 }, { "epoch": 0.09, "learning_rate": 4.696969696969697e-07, "logits/chosen": -1.3917747735977173, "logits/rejected": -1.3550325632095337, "logps/chosen": -52.82176971435547, "logps/rejected": -73.30123138427734, "loss": 0.6488, "rewards/accuracies": 0.625, "rewards/chosen": 0.14380736649036407, "rewards/margins": 0.0667564794421196, "rewards/rejected": 0.07705089449882507, "step": 62 }, { "epoch": 0.1, "learning_rate": 4.772727272727273e-07, "logits/chosen": -1.5208649635314941, "logits/rejected": -1.4892252683639526, "logps/chosen": -55.34352111816406, "logps/rejected": -67.55403137207031, "loss": 0.6419, "rewards/accuracies": 0.6875, "rewards/chosen": 0.27572011947631836, "rewards/margins": 0.14066900312900543, "rewards/rejected": 0.13505114614963531, "step": 63 }, { "epoch": 0.1, "learning_rate": 4.848484848484849e-07, "logits/chosen": -1.4660046100616455, "logits/rejected": -1.6109917163848877, "logps/chosen": -62.14973449707031, "logps/rejected": -81.52274322509766, "loss": 0.6379, "rewards/accuracies": 0.625, "rewards/chosen": 0.20028699934482574, "rewards/margins": 0.19850493967533112, "rewards/rejected": 0.001782064326107502, "step": 64 }, { "epoch": 0.1, "learning_rate": 4.924242424242424e-07, "logits/chosen": -1.290839672088623, "logits/rejected": -1.2918977737426758, "logps/chosen": -62.30453872680664, "logps/rejected": -71.33212280273438, "loss": 0.6337, "rewards/accuracies": 0.5625, "rewards/chosen": 0.1621263325214386, "rewards/margins": 0.04839283600449562, "rewards/rejected": 0.11373350024223328, "step": 65 }, { "epoch": 0.1, "learning_rate": 5e-07, "logits/chosen": -1.2274879217147827, "logits/rejected": -1.2406153678894043, "logps/chosen": -57.76105499267578, "logps/rejected": -83.968994140625, "loss": 0.6177, "rewards/accuracies": 0.6875, "rewards/chosen": 0.23292019963264465, "rewards/margins": 0.2149038016796112, "rewards/rejected": 0.018016403540968895, "step": 66 }, { "epoch": 0.1, "learning_rate": 5.075757575757576e-07, "logits/chosen": -1.1510586738586426, "logits/rejected": -1.186632513999939, "logps/chosen": -54.71442413330078, "logps/rejected": -70.42817687988281, "loss": 0.6217, "rewards/accuracies": 0.6875, "rewards/chosen": 0.09175221621990204, "rewards/margins": -0.007499314844608307, "rewards/rejected": 0.09925152361392975, "step": 67 }, { "epoch": 0.1, "learning_rate": 5.151515151515151e-07, "logits/chosen": -1.2038687467575073, "logits/rejected": -1.2474325895309448, "logps/chosen": -61.707618713378906, "logps/rejected": -67.5180892944336, "loss": 0.6296, "rewards/accuracies": 0.8125, "rewards/chosen": 0.11510595679283142, "rewards/margins": 0.09858663380146027, "rewards/rejected": 0.016519328579306602, "step": 68 }, { "epoch": 0.1, "learning_rate": 5.227272727272727e-07, "logits/chosen": -1.5192761421203613, "logits/rejected": -1.531144618988037, "logps/chosen": -60.95773696899414, "logps/rejected": -63.483619689941406, "loss": 0.6137, "rewards/accuracies": 0.6875, "rewards/chosen": 0.1562405526638031, "rewards/margins": 0.13872182369232178, "rewards/rejected": 0.01751871407032013, "step": 69 }, { "epoch": 0.11, "learning_rate": 5.303030303030303e-07, "logits/chosen": -1.46200430393219, "logits/rejected": -1.4206883907318115, "logps/chosen": -53.62306594848633, "logps/rejected": -73.84425354003906, "loss": 0.6008, "rewards/accuracies": 0.75, "rewards/chosen": 0.08211082965135574, "rewards/margins": 0.17571085691452026, "rewards/rejected": -0.09360002726316452, "step": 70 }, { "epoch": 0.11, "learning_rate": 5.378787878787878e-07, "logits/chosen": -1.3949741125106812, "logits/rejected": -1.3646326065063477, "logps/chosen": -50.04539108276367, "logps/rejected": -71.26206970214844, "loss": 0.6139, "rewards/accuracies": 0.875, "rewards/chosen": 0.14944177865982056, "rewards/margins": 0.17371398210525513, "rewards/rejected": -0.02427222579717636, "step": 71 }, { "epoch": 0.11, "learning_rate": 5.454545454545454e-07, "logits/chosen": -1.2625566720962524, "logits/rejected": -1.30435049533844, "logps/chosen": -42.66155242919922, "logps/rejected": -49.39188003540039, "loss": 0.6212, "rewards/accuracies": 0.625, "rewards/chosen": 0.04858655110001564, "rewards/margins": 0.08578862994909286, "rewards/rejected": -0.03720208257436752, "step": 72 }, { "epoch": 0.11, "learning_rate": 5.53030303030303e-07, "logits/chosen": -1.5173357725143433, "logits/rejected": -1.532628059387207, "logps/chosen": -67.30522918701172, "logps/rejected": -71.20864868164062, "loss": 0.592, "rewards/accuracies": 0.875, "rewards/chosen": 0.016212619841098785, "rewards/margins": 0.17678993940353394, "rewards/rejected": -0.16057732701301575, "step": 73 }, { "epoch": 0.11, "learning_rate": 5.606060606060605e-07, "logits/chosen": -1.314016342163086, "logits/rejected": -1.355907917022705, "logps/chosen": -77.79779815673828, "logps/rejected": -84.5552978515625, "loss": 0.5903, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0052999164909124374, "rewards/margins": 0.1510595828294754, "rewards/rejected": -0.1563594937324524, "step": 74 }, { "epoch": 0.11, "learning_rate": 5.681818181818182e-07, "logits/chosen": -1.2979378700256348, "logits/rejected": -1.4032480716705322, "logps/chosen": -54.55912780761719, "logps/rejected": -66.57144165039062, "loss": 0.6116, "rewards/accuracies": 0.75, "rewards/chosen": 0.04821896553039551, "rewards/margins": 0.2483121156692505, "rewards/rejected": -0.2000931352376938, "step": 75 }, { "epoch": 0.12, "learning_rate": 5.757575757575758e-07, "logits/chosen": -1.4066622257232666, "logits/rejected": -1.4482512474060059, "logps/chosen": -60.11941909790039, "logps/rejected": -67.46295928955078, "loss": 0.6201, "rewards/accuracies": 0.6875, "rewards/chosen": 0.03395650535821915, "rewards/margins": 0.29136669635772705, "rewards/rejected": -0.2574101984500885, "step": 76 }, { "epoch": 0.12, "learning_rate": 5.833333333333334e-07, "logits/chosen": -1.3306366205215454, "logits/rejected": -1.3925280570983887, "logps/chosen": -52.87839889526367, "logps/rejected": -55.65316390991211, "loss": 0.6075, "rewards/accuracies": 0.6875, "rewards/chosen": -0.050175078213214874, "rewards/margins": 0.07563228905200958, "rewards/rejected": -0.12580737471580505, "step": 77 }, { "epoch": 0.12, "learning_rate": 5.909090909090909e-07, "logits/chosen": -1.249314546585083, "logits/rejected": -1.247490644454956, "logps/chosen": -54.777767181396484, "logps/rejected": -61.90538024902344, "loss": 0.5993, "rewards/accuracies": 0.6875, "rewards/chosen": 0.041874658316373825, "rewards/margins": 0.25838491320610046, "rewards/rejected": -0.21651026606559753, "step": 78 }, { "epoch": 0.12, "learning_rate": 5.984848484848485e-07, "logits/chosen": -1.4066355228424072, "logits/rejected": -1.3875881433486938, "logps/chosen": -55.322410583496094, "logps/rejected": -82.0425033569336, "loss": 0.5885, "rewards/accuracies": 0.6875, "rewards/chosen": -0.033765971660614014, "rewards/margins": 0.2818932831287384, "rewards/rejected": -0.3156592845916748, "step": 79 }, { "epoch": 0.12, "learning_rate": 6.060606060606061e-07, "logits/chosen": -1.3568596839904785, "logits/rejected": -1.355711579322815, "logps/chosen": -63.79819869995117, "logps/rejected": -87.84752655029297, "loss": 0.5669, "rewards/accuracies": 0.875, "rewards/chosen": -0.0628647729754448, "rewards/margins": 0.43628817796707153, "rewards/rejected": -0.49915292859077454, "step": 80 }, { "epoch": 0.12, "learning_rate": 6.136363636363636e-07, "logits/chosen": -1.2920787334442139, "logits/rejected": -1.2909367084503174, "logps/chosen": -72.8036880493164, "logps/rejected": -84.17581176757812, "loss": 0.5831, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06573189795017242, "rewards/margins": 0.2303919941186905, "rewards/rejected": -0.2961239218711853, "step": 81 }, { "epoch": 0.12, "learning_rate": 6.212121212121212e-07, "logits/chosen": -1.2058041095733643, "logits/rejected": -1.2265359163284302, "logps/chosen": -54.95501708984375, "logps/rejected": -68.9476318359375, "loss": 0.5531, "rewards/accuracies": 0.6875, "rewards/chosen": -0.1221294105052948, "rewards/margins": 0.22339659929275513, "rewards/rejected": -0.3455260097980499, "step": 82 }, { "epoch": 0.13, "learning_rate": 6.287878787878788e-07, "logits/chosen": -1.2530492544174194, "logits/rejected": -1.2605494260787964, "logps/chosen": -50.06599807739258, "logps/rejected": -68.54713439941406, "loss": 0.5978, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0020802952349185944, "rewards/margins": 0.3581036925315857, "rewards/rejected": -0.3601840138435364, "step": 83 }, { "epoch": 0.13, "learning_rate": 6.363636363636363e-07, "logits/chosen": -1.2875895500183105, "logits/rejected": -1.2904086112976074, "logps/chosen": -63.59492111206055, "logps/rejected": -92.8702163696289, "loss": 0.5374, "rewards/accuracies": 1.0, "rewards/chosen": -0.033722538501024246, "rewards/margins": 0.7385894656181335, "rewards/rejected": -0.7723120450973511, "step": 84 }, { "epoch": 0.13, "learning_rate": 6.439393939393939e-07, "logits/chosen": -1.319128155708313, "logits/rejected": -1.2768744230270386, "logps/chosen": -65.39422607421875, "logps/rejected": -85.60679626464844, "loss": 0.5433, "rewards/accuracies": 0.75, "rewards/chosen": -0.2017182856798172, "rewards/margins": 0.40415698289871216, "rewards/rejected": -0.6058752536773682, "step": 85 }, { "epoch": 0.13, "learning_rate": 6.515151515151515e-07, "logits/chosen": -1.2891089916229248, "logits/rejected": -1.3245259523391724, "logps/chosen": -70.21843719482422, "logps/rejected": -87.76618194580078, "loss": 0.598, "rewards/accuracies": 0.75, "rewards/chosen": -0.12800543010234833, "rewards/margins": 0.27882272005081177, "rewards/rejected": -0.4068281352519989, "step": 86 }, { "epoch": 0.13, "learning_rate": 6.59090909090909e-07, "logits/chosen": -1.4151794910430908, "logits/rejected": -1.401626467704773, "logps/chosen": -59.638519287109375, "logps/rejected": -68.63182830810547, "loss": 0.5671, "rewards/accuracies": 0.6875, "rewards/chosen": -0.18449069559574127, "rewards/margins": 0.26218655705451965, "rewards/rejected": -0.4466772973537445, "step": 87 }, { "epoch": 0.13, "learning_rate": 6.666666666666666e-07, "logits/chosen": -1.4876341819763184, "logits/rejected": -1.5497742891311646, "logps/chosen": -53.505035400390625, "logps/rejected": -62.61443328857422, "loss": 0.5365, "rewards/accuracies": 0.875, "rewards/chosen": 0.03885147348046303, "rewards/margins": 0.4145755469799042, "rewards/rejected": -0.37572407722473145, "step": 88 }, { "epoch": 0.14, "learning_rate": 6.742424242424242e-07, "logits/chosen": -1.4708367586135864, "logits/rejected": -1.3771567344665527, "logps/chosen": -61.228538513183594, "logps/rejected": -93.87561798095703, "loss": 0.545, "rewards/accuracies": 0.8125, "rewards/chosen": 0.0057802870869636536, "rewards/margins": 0.9394412040710449, "rewards/rejected": -0.9336608648300171, "step": 89 }, { "epoch": 0.14, "learning_rate": 6.818181818181817e-07, "logits/chosen": -1.2658331394195557, "logits/rejected": -1.2438105344772339, "logps/chosen": -64.95509338378906, "logps/rejected": -86.05477905273438, "loss": 0.5244, "rewards/accuracies": 0.75, "rewards/chosen": -0.18022720515727997, "rewards/margins": 0.41284486651420593, "rewards/rejected": -0.5930720567703247, "step": 90 }, { "epoch": 0.14, "learning_rate": 6.893939393939394e-07, "logits/chosen": -1.3612754344940186, "logits/rejected": -1.4404058456420898, "logps/chosen": -59.00148391723633, "logps/rejected": -73.94998931884766, "loss": 0.5279, "rewards/accuracies": 0.6875, "rewards/chosen": -0.15143747627735138, "rewards/margins": 0.48363757133483887, "rewards/rejected": -0.635075032711029, "step": 91 }, { "epoch": 0.14, "learning_rate": 6.96969696969697e-07, "logits/chosen": -1.200008511543274, "logits/rejected": -1.2430647611618042, "logps/chosen": -67.3596420288086, "logps/rejected": -78.98017883300781, "loss": 0.5235, "rewards/accuracies": 0.75, "rewards/chosen": -0.24770724773406982, "rewards/margins": 0.5027315020561218, "rewards/rejected": -0.750438928604126, "step": 92 }, { "epoch": 0.14, "learning_rate": 7.045454545454545e-07, "logits/chosen": -1.2663078308105469, "logits/rejected": -1.3646279573440552, "logps/chosen": -56.52938461303711, "logps/rejected": -52.08761215209961, "loss": 0.5377, "rewards/accuracies": 0.75, "rewards/chosen": -0.09041959047317505, "rewards/margins": 0.3694082498550415, "rewards/rejected": -0.45982789993286133, "step": 93 }, { "epoch": 0.14, "learning_rate": 7.121212121212121e-07, "logits/chosen": -1.483896017074585, "logits/rejected": -1.491857647895813, "logps/chosen": -73.71580505371094, "logps/rejected": -99.14268493652344, "loss": 0.5347, "rewards/accuracies": 0.875, "rewards/chosen": -0.40936753153800964, "rewards/margins": 0.4666964113712311, "rewards/rejected": -0.876063883304596, "step": 94 }, { "epoch": 0.14, "learning_rate": 7.196969696969697e-07, "logits/chosen": -1.2688690423965454, "logits/rejected": -1.2628036737442017, "logps/chosen": -54.340267181396484, "logps/rejected": -68.0820083618164, "loss": 0.5163, "rewards/accuracies": 0.75, "rewards/chosen": -0.13473013043403625, "rewards/margins": 0.5201348662376404, "rewards/rejected": -0.654865026473999, "step": 95 }, { "epoch": 0.15, "learning_rate": 7.272727272727272e-07, "logits/chosen": -1.4142446517944336, "logits/rejected": -1.478228211402893, "logps/chosen": -52.192970275878906, "logps/rejected": -60.232112884521484, "loss": 0.5442, "rewards/accuracies": 0.5625, "rewards/chosen": 0.002001902088522911, "rewards/margins": 0.3312196135520935, "rewards/rejected": -0.32921773195266724, "step": 96 }, { "epoch": 0.15, "learning_rate": 7.348484848484848e-07, "logits/chosen": -1.3240199089050293, "logits/rejected": -1.3259228467941284, "logps/chosen": -55.829490661621094, "logps/rejected": -55.56840896606445, "loss": 0.5668, "rewards/accuracies": 0.5625, "rewards/chosen": -0.15988041460514069, "rewards/margins": 0.2161427140235901, "rewards/rejected": -0.3760231137275696, "step": 97 }, { "epoch": 0.15, "learning_rate": 7.424242424242424e-07, "logits/chosen": -1.319766879081726, "logits/rejected": -1.4090697765350342, "logps/chosen": -73.19944763183594, "logps/rejected": -93.69942474365234, "loss": 0.5177, "rewards/accuracies": 0.9375, "rewards/chosen": -0.215353861451149, "rewards/margins": 0.9013051390647888, "rewards/rejected": -1.1166590452194214, "step": 98 }, { "epoch": 0.15, "learning_rate": 7.5e-07, "logits/chosen": -1.4314652681350708, "logits/rejected": -1.3855012655258179, "logps/chosen": -48.45336151123047, "logps/rejected": -68.33036041259766, "loss": 0.5303, "rewards/accuracies": 0.6875, "rewards/chosen": -0.10106388479471207, "rewards/margins": 0.4430334270000458, "rewards/rejected": -0.5440973043441772, "step": 99 }, { "epoch": 0.15, "learning_rate": 7.575757575757575e-07, "logits/chosen": -1.2473658323287964, "logits/rejected": -1.2665302753448486, "logps/chosen": -55.45205307006836, "logps/rejected": -78.337158203125, "loss": 0.4852, "rewards/accuracies": 0.875, "rewards/chosen": -0.011083535850048065, "rewards/margins": 0.746026337146759, "rewards/rejected": -0.7571098208427429, "step": 100 }, { "epoch": 0.15, "learning_rate": 7.651515151515151e-07, "logits/chosen": -1.236082911491394, "logits/rejected": -1.2414617538452148, "logps/chosen": -53.64671325683594, "logps/rejected": -63.89655685424805, "loss": 0.4884, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08944252133369446, "rewards/margins": 0.36017340421676636, "rewards/rejected": -0.4496158957481384, "step": 101 }, { "epoch": 0.15, "learning_rate": 7.727272727272727e-07, "logits/chosen": -1.2842726707458496, "logits/rejected": -1.2782001495361328, "logps/chosen": -60.47846221923828, "logps/rejected": -79.14984130859375, "loss": 0.5645, "rewards/accuracies": 0.6875, "rewards/chosen": -0.32786887884140015, "rewards/margins": 0.5022405982017517, "rewards/rejected": -0.8301095366477966, "step": 102 }, { "epoch": 0.16, "learning_rate": 7.803030303030302e-07, "logits/chosen": -1.2620081901550293, "logits/rejected": -1.2633405923843384, "logps/chosen": -55.8270263671875, "logps/rejected": -70.67143249511719, "loss": 0.4981, "rewards/accuracies": 0.625, "rewards/chosen": -0.3648158013820648, "rewards/margins": 0.2358836680650711, "rewards/rejected": -0.6006994843482971, "step": 103 }, { "epoch": 0.16, "learning_rate": 7.878787878787878e-07, "logits/chosen": -1.277204990386963, "logits/rejected": -1.1961885690689087, "logps/chosen": -82.07122039794922, "logps/rejected": -129.94085693359375, "loss": 0.4846, "rewards/accuracies": 0.8125, "rewards/chosen": -0.860465407371521, "rewards/margins": 1.5867725610733032, "rewards/rejected": -2.447237730026245, "step": 104 }, { "epoch": 0.16, "learning_rate": 7.954545454545454e-07, "logits/chosen": -1.3882551193237305, "logits/rejected": -1.36410391330719, "logps/chosen": -65.95565795898438, "logps/rejected": -90.56163787841797, "loss": 0.5046, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6373096704483032, "rewards/margins": 0.6799200177192688, "rewards/rejected": -1.3172297477722168, "step": 105 }, { "epoch": 0.16, "learning_rate": 8.030303030303029e-07, "logits/chosen": -1.3937575817108154, "logits/rejected": -1.3328791856765747, "logps/chosen": -57.98670959472656, "logps/rejected": -79.21855926513672, "loss": 0.4929, "rewards/accuracies": 0.875, "rewards/chosen": -0.134065181016922, "rewards/margins": 0.8324288725852966, "rewards/rejected": -0.966494083404541, "step": 106 }, { "epoch": 0.16, "learning_rate": 8.106060606060605e-07, "logits/chosen": -1.4467005729675293, "logits/rejected": -1.4381572008132935, "logps/chosen": -55.658119201660156, "logps/rejected": -83.31585693359375, "loss": 0.4194, "rewards/accuracies": 0.75, "rewards/chosen": -0.19678352773189545, "rewards/margins": 0.6790809035301208, "rewards/rejected": -0.8758644461631775, "step": 107 }, { "epoch": 0.16, "learning_rate": 8.181818181818182e-07, "logits/chosen": -1.5325909852981567, "logits/rejected": -1.5394513607025146, "logps/chosen": -65.08736419677734, "logps/rejected": -81.30486297607422, "loss": 0.48, "rewards/accuracies": 0.8125, "rewards/chosen": -0.41833633184432983, "rewards/margins": 0.7128567099571228, "rewards/rejected": -1.1311931610107422, "step": 108 }, { "epoch": 0.17, "learning_rate": 8.257575757575757e-07, "logits/chosen": -1.427512526512146, "logits/rejected": -1.4086472988128662, "logps/chosen": -54.339439392089844, "logps/rejected": -65.26164245605469, "loss": 0.4509, "rewards/accuracies": 0.6875, "rewards/chosen": -0.36659955978393555, "rewards/margins": 0.5181149840354919, "rewards/rejected": -0.8847146034240723, "step": 109 }, { "epoch": 0.17, "learning_rate": 8.333333333333333e-07, "logits/chosen": -1.4420437812805176, "logits/rejected": -1.508358120918274, "logps/chosen": -61.45826721191406, "logps/rejected": -67.23968505859375, "loss": 0.5122, "rewards/accuracies": 0.875, "rewards/chosen": -0.21704250574111938, "rewards/margins": 0.7157589197158813, "rewards/rejected": -0.9328014850616455, "step": 110 }, { "epoch": 0.17, "learning_rate": 8.409090909090909e-07, "logits/chosen": -1.3607494831085205, "logits/rejected": -1.3761709928512573, "logps/chosen": -69.81553649902344, "logps/rejected": -89.93431091308594, "loss": 0.5065, "rewards/accuracies": 0.8125, "rewards/chosen": -0.39086639881134033, "rewards/margins": 0.9802790880203247, "rewards/rejected": -1.3711453676223755, "step": 111 }, { "epoch": 0.17, "learning_rate": 8.484848484848484e-07, "logits/chosen": -1.4498722553253174, "logits/rejected": -1.448058843612671, "logps/chosen": -74.12974548339844, "logps/rejected": -102.22052764892578, "loss": 0.4436, "rewards/accuracies": 0.875, "rewards/chosen": -0.5492932796478271, "rewards/margins": 1.652042031288147, "rewards/rejected": -2.2013354301452637, "step": 112 }, { "epoch": 0.17, "learning_rate": 8.56060606060606e-07, "logits/chosen": -1.2922266721725464, "logits/rejected": -1.374589204788208, "logps/chosen": -63.6357421875, "logps/rejected": -59.48933029174805, "loss": 0.5046, "rewards/accuracies": 0.6875, "rewards/chosen": -0.13962438702583313, "rewards/margins": 0.5601056814193726, "rewards/rejected": -0.6997300982475281, "step": 113 }, { "epoch": 0.17, "learning_rate": 8.636363636363636e-07, "logits/chosen": -1.510749101638794, "logits/rejected": -1.550467848777771, "logps/chosen": -81.38545227050781, "logps/rejected": -88.50811004638672, "loss": 0.451, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6597554087638855, "rewards/margins": 1.0560970306396484, "rewards/rejected": -1.7158524990081787, "step": 114 }, { "epoch": 0.17, "learning_rate": 8.712121212121211e-07, "logits/chosen": -1.3277736902236938, "logits/rejected": -1.3583914041519165, "logps/chosen": -78.43363952636719, "logps/rejected": -99.82984924316406, "loss": 0.5054, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9456002712249756, "rewards/margins": 1.1029608249664307, "rewards/rejected": -2.0485610961914062, "step": 115 }, { "epoch": 0.18, "learning_rate": 8.787878787878787e-07, "logits/chosen": -1.2548108100891113, "logits/rejected": -1.3178308010101318, "logps/chosen": -62.914791107177734, "logps/rejected": -77.83401489257812, "loss": 0.4835, "rewards/accuracies": 0.75, "rewards/chosen": -0.17691561579704285, "rewards/margins": 1.2272669076919556, "rewards/rejected": -1.4041826725006104, "step": 116 }, { "epoch": 0.18, "learning_rate": 8.863636363636363e-07, "logits/chosen": -1.2925139665603638, "logits/rejected": -1.2939532995224, "logps/chosen": -71.66761779785156, "logps/rejected": -99.22976684570312, "loss": 0.4363, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5545389652252197, "rewards/margins": 1.1594491004943848, "rewards/rejected": -1.713987946510315, "step": 117 }, { "epoch": 0.18, "learning_rate": 8.939393939393938e-07, "logits/chosen": -1.3311680555343628, "logits/rejected": -1.3106731176376343, "logps/chosen": -86.64567565917969, "logps/rejected": -128.3566436767578, "loss": 0.4361, "rewards/accuracies": 0.875, "rewards/chosen": -1.0433001518249512, "rewards/margins": 1.8462791442871094, "rewards/rejected": -2.8895792961120605, "step": 118 }, { "epoch": 0.18, "learning_rate": 9.015151515151514e-07, "logits/chosen": -1.1761585474014282, "logits/rejected": -1.1626079082489014, "logps/chosen": -83.92056274414062, "logps/rejected": -114.5244140625, "loss": 0.4723, "rewards/accuracies": 0.75, "rewards/chosen": -1.011547327041626, "rewards/margins": 1.5983021259307861, "rewards/rejected": -2.609849691390991, "step": 119 }, { "epoch": 0.18, "learning_rate": 9.09090909090909e-07, "logits/chosen": -1.3708627223968506, "logits/rejected": -1.380934715270996, "logps/chosen": -51.2702751159668, "logps/rejected": -69.57611083984375, "loss": 0.4129, "rewards/accuracies": 0.9375, "rewards/chosen": 0.15028327703475952, "rewards/margins": 1.162183165550232, "rewards/rejected": -1.0118999481201172, "step": 120 }, { "epoch": 0.18, "learning_rate": 9.166666666666665e-07, "logits/chosen": -1.3708436489105225, "logits/rejected": -1.4203503131866455, "logps/chosen": -84.83291625976562, "logps/rejected": -139.04513549804688, "loss": 0.3989, "rewards/accuracies": 0.875, "rewards/chosen": -1.1144614219665527, "rewards/margins": 2.0489420890808105, "rewards/rejected": -3.1634035110473633, "step": 121 }, { "epoch": 0.19, "learning_rate": 9.242424242424241e-07, "logits/chosen": -1.3400905132293701, "logits/rejected": -1.424918532371521, "logps/chosen": -82.42213439941406, "logps/rejected": -95.20804595947266, "loss": 0.5044, "rewards/accuracies": 0.625, "rewards/chosen": -1.1995664834976196, "rewards/margins": 1.1138718128204346, "rewards/rejected": -2.3134384155273438, "step": 122 }, { "epoch": 0.19, "learning_rate": 9.318181818181817e-07, "logits/chosen": -1.2418166399002075, "logits/rejected": -1.2201536893844604, "logps/chosen": -46.085845947265625, "logps/rejected": -55.85955810546875, "loss": 0.4017, "rewards/accuracies": 0.8125, "rewards/chosen": 0.09070870280265808, "rewards/margins": 0.5683165192604065, "rewards/rejected": -0.477607786655426, "step": 123 }, { "epoch": 0.19, "learning_rate": 9.393939393939395e-07, "logits/chosen": -1.3297370672225952, "logits/rejected": -1.3739547729492188, "logps/chosen": -66.36666107177734, "logps/rejected": -73.92923736572266, "loss": 0.4433, "rewards/accuracies": 0.625, "rewards/chosen": -0.41327372193336487, "rewards/margins": 0.7713428735733032, "rewards/rejected": -1.1846165657043457, "step": 124 }, { "epoch": 0.19, "learning_rate": 9.46969696969697e-07, "logits/chosen": -1.3880527019500732, "logits/rejected": -1.376543402671814, "logps/chosen": -65.552734375, "logps/rejected": -87.94761657714844, "loss": 0.4418, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7102217674255371, "rewards/margins": 1.1007181406021118, "rewards/rejected": -1.810939908027649, "step": 125 }, { "epoch": 0.19, "learning_rate": 9.545454545454546e-07, "logits/chosen": -1.6728534698486328, "logits/rejected": -1.6981301307678223, "logps/chosen": -69.73593139648438, "logps/rejected": -79.36700439453125, "loss": 0.5261, "rewards/accuracies": 0.625, "rewards/chosen": -0.649253785610199, "rewards/margins": 0.627336859703064, "rewards/rejected": -1.2765905857086182, "step": 126 }, { "epoch": 0.19, "learning_rate": 9.62121212121212e-07, "logits/chosen": -1.3037967681884766, "logits/rejected": -1.2259622812271118, "logps/chosen": -60.86170959472656, "logps/rejected": -105.67593383789062, "loss": 0.4194, "rewards/accuracies": 0.8125, "rewards/chosen": -0.348906934261322, "rewards/margins": 2.006357431411743, "rewards/rejected": -2.355264186859131, "step": 127 }, { "epoch": 0.19, "learning_rate": 9.696969696969698e-07, "logits/chosen": -1.259075403213501, "logits/rejected": -1.2268812656402588, "logps/chosen": -58.85130310058594, "logps/rejected": -86.45140838623047, "loss": 0.4037, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3437346816062927, "rewards/margins": 1.4063847064971924, "rewards/rejected": -1.7501193284988403, "step": 128 }, { "epoch": 0.2, "learning_rate": 9.772727272727273e-07, "logits/chosen": -1.3443701267242432, "logits/rejected": -1.3386574983596802, "logps/chosen": -57.95923614501953, "logps/rejected": -102.79905700683594, "loss": 0.3809, "rewards/accuracies": 0.8125, "rewards/chosen": -0.22556929290294647, "rewards/margins": 2.10101580619812, "rewards/rejected": -2.326585292816162, "step": 129 }, { "epoch": 0.2, "learning_rate": 9.848484848484847e-07, "logits/chosen": -1.279806137084961, "logits/rejected": -1.2831989526748657, "logps/chosen": -64.24227905273438, "logps/rejected": -79.32403564453125, "loss": 0.3785, "rewards/accuracies": 0.875, "rewards/chosen": -0.04765484482049942, "rewards/margins": 1.1074268817901611, "rewards/rejected": -1.155081868171692, "step": 130 }, { "epoch": 0.2, "learning_rate": 9.924242424242425e-07, "logits/chosen": -1.3265643119812012, "logits/rejected": -1.3697947263717651, "logps/chosen": -94.21955871582031, "logps/rejected": -132.9693603515625, "loss": 0.3656, "rewards/accuracies": 1.0, "rewards/chosen": -0.6402519941329956, "rewards/margins": 2.4263713359832764, "rewards/rejected": -3.0666234493255615, "step": 131 }, { "epoch": 0.2, "learning_rate": 1e-06, "logits/chosen": -1.4270625114440918, "logits/rejected": -1.464454174041748, "logps/chosen": -71.64118194580078, "logps/rejected": -91.30876159667969, "loss": 0.4357, "rewards/accuracies": 0.875, "rewards/chosen": -0.22757545113563538, "rewards/margins": 1.7800164222717285, "rewards/rejected": -2.007591962814331, "step": 132 }, { "epoch": 0.2, "learning_rate": 9.999982399050598e-07, "logits/chosen": -1.386523723602295, "logits/rejected": -1.3936899900436401, "logps/chosen": -49.9768180847168, "logps/rejected": -69.42765045166016, "loss": 0.4158, "rewards/accuracies": 0.75, "rewards/chosen": 0.28144755959510803, "rewards/margins": 1.190410852432251, "rewards/rejected": -0.9089633822441101, "step": 133 }, { "epoch": 0.2, "learning_rate": 9.999929596326304e-07, "logits/chosen": -1.3149131536483765, "logits/rejected": -1.3466674089431763, "logps/chosen": -68.77780151367188, "logps/rejected": -82.54281616210938, "loss": 0.4099, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05727508291602135, "rewards/margins": 1.5080410242080688, "rewards/rejected": -1.5653159618377686, "step": 134 }, { "epoch": 0.21, "learning_rate": 9.999841592198874e-07, "logits/chosen": -1.215659260749817, "logits/rejected": -1.192206621170044, "logps/chosen": -60.977264404296875, "logps/rejected": -91.90313720703125, "loss": 0.3569, "rewards/accuracies": 0.9375, "rewards/chosen": 0.03573242574930191, "rewards/margins": 1.7384833097457886, "rewards/rejected": -1.7027509212493896, "step": 135 }, { "epoch": 0.21, "learning_rate": 9.99971838728789e-07, "logits/chosen": -1.2210043668746948, "logits/rejected": -1.244004249572754, "logps/chosen": -73.441650390625, "logps/rejected": -119.06057739257812, "loss": 0.3778, "rewards/accuracies": 0.875, "rewards/chosen": -0.45418426394462585, "rewards/margins": 2.3686211109161377, "rewards/rejected": -2.822805643081665, "step": 136 }, { "epoch": 0.21, "learning_rate": 9.99955998246076e-07, "logits/chosen": -1.4085693359375, "logits/rejected": -1.4438612461090088, "logps/chosen": -51.40415954589844, "logps/rejected": -74.1273193359375, "loss": 0.3579, "rewards/accuracies": 0.875, "rewards/chosen": 0.07564006000757217, "rewards/margins": 1.2299882173538208, "rewards/rejected": -1.1543481349945068, "step": 137 }, { "epoch": 0.21, "learning_rate": 9.99936637883271e-07, "logits/chosen": -1.30047607421875, "logits/rejected": -1.3149194717407227, "logps/chosen": -62.247467041015625, "logps/rejected": -81.31385803222656, "loss": 0.3915, "rewards/accuracies": 0.6875, "rewards/chosen": 0.048665329813957214, "rewards/margins": 1.2062422037124634, "rewards/rejected": -1.1575767993927002, "step": 138 }, { "epoch": 0.21, "learning_rate": 9.999137577766792e-07, "logits/chosen": -1.3509010076522827, "logits/rejected": -1.3184211254119873, "logps/chosen": -56.17451095581055, "logps/rejected": -91.11962890625, "loss": 0.3801, "rewards/accuracies": 0.9375, "rewards/chosen": -0.08695222437381744, "rewards/margins": 1.9988842010498047, "rewards/rejected": -2.085836410522461, "step": 139 }, { "epoch": 0.21, "learning_rate": 9.998873580873846e-07, "logits/chosen": -1.379082202911377, "logits/rejected": -1.3597187995910645, "logps/chosen": -68.40926361083984, "logps/rejected": -92.97391510009766, "loss": 0.4229, "rewards/accuracies": 0.75, "rewards/chosen": -0.08268654346466064, "rewards/margins": 0.9664369225502014, "rewards/rejected": -1.0491235256195068, "step": 140 }, { "epoch": 0.21, "learning_rate": 9.998574390012513e-07, "logits/chosen": -1.411400556564331, "logits/rejected": -1.4529073238372803, "logps/chosen": -61.78650665283203, "logps/rejected": -81.70208740234375, "loss": 0.3474, "rewards/accuracies": 0.625, "rewards/chosen": -0.1299721598625183, "rewards/margins": 1.5512878894805908, "rewards/rejected": -1.6812599897384644, "step": 141 }, { "epoch": 0.22, "learning_rate": 9.99824000728921e-07, "logits/chosen": -1.2770847082138062, "logits/rejected": -1.2510144710540771, "logps/chosen": -39.36260986328125, "logps/rejected": -66.36067199707031, "loss": 0.3939, "rewards/accuracies": 0.9375, "rewards/chosen": 0.3309634327888489, "rewards/margins": 1.3281357288360596, "rewards/rejected": -0.9971722364425659, "step": 142 }, { "epoch": 0.22, "learning_rate": 9.997870435058115e-07, "logits/chosen": -1.340492606163025, "logits/rejected": -1.3173463344573975, "logps/chosen": -49.027099609375, "logps/rejected": -73.99095153808594, "loss": 0.3731, "rewards/accuracies": 0.75, "rewards/chosen": 0.4593149721622467, "rewards/margins": 1.2830872535705566, "rewards/rejected": -0.8237722516059875, "step": 143 }, { "epoch": 0.22, "learning_rate": 9.997465675921162e-07, "logits/chosen": -1.1999183893203735, "logits/rejected": -1.1477478742599487, "logps/chosen": -68.66267395019531, "logps/rejected": -112.85301971435547, "loss": 0.3905, "rewards/accuracies": 0.6875, "rewards/chosen": -0.46990731358528137, "rewards/margins": 2.243889808654785, "rewards/rejected": -2.713797092437744, "step": 144 }, { "epoch": 0.22, "learning_rate": 9.997025732728006e-07, "logits/chosen": -1.4426615238189697, "logits/rejected": -1.471621036529541, "logps/chosen": -81.21881103515625, "logps/rejected": -91.69154357910156, "loss": 0.4525, "rewards/accuracies": 0.625, "rewards/chosen": -1.194870114326477, "rewards/margins": 0.6878187656402588, "rewards/rejected": -1.8826889991760254, "step": 145 }, { "epoch": 0.22, "learning_rate": 9.996550608576013e-07, "logits/chosen": -1.2865259647369385, "logits/rejected": -1.3044980764389038, "logps/chosen": -53.92206954956055, "logps/rejected": -68.4139175415039, "loss": 0.3055, "rewards/accuracies": 0.75, "rewards/chosen": 0.4023975133895874, "rewards/margins": 1.186065673828125, "rewards/rejected": -0.7836681604385376, "step": 146 }, { "epoch": 0.22, "learning_rate": 9.996040306810242e-07, "logits/chosen": -1.2497471570968628, "logits/rejected": -1.3371587991714478, "logps/chosen": -59.97561264038086, "logps/rejected": -72.31210327148438, "loss": 0.3395, "rewards/accuracies": 0.75, "rewards/chosen": 0.3099679946899414, "rewards/margins": 1.4178704023361206, "rewards/rejected": -1.1079025268554688, "step": 147 }, { "epoch": 0.22, "learning_rate": 9.995494831023408e-07, "logits/chosen": -1.3360226154327393, "logits/rejected": -1.3658947944641113, "logps/chosen": -55.79327392578125, "logps/rejected": -78.41800689697266, "loss": 0.3534, "rewards/accuracies": 0.6875, "rewards/chosen": 0.2654246985912323, "rewards/margins": 1.3563873767852783, "rewards/rejected": -1.0909627676010132, "step": 148 }, { "epoch": 0.23, "learning_rate": 9.994914185055867e-07, "logits/chosen": -1.3814332485198975, "logits/rejected": -1.3996297121047974, "logps/chosen": -65.52349853515625, "logps/rejected": -92.29167175292969, "loss": 0.372, "rewards/accuracies": 0.875, "rewards/chosen": 0.14941172301769257, "rewards/margins": 1.968096137046814, "rewards/rejected": -1.8186845779418945, "step": 149 }, { "epoch": 0.23, "learning_rate": 9.99429837299559e-07, "logits/chosen": -1.2469031810760498, "logits/rejected": -1.2378968000411987, "logps/chosen": -55.9729118347168, "logps/rejected": -81.20050811767578, "loss": 0.3292, "rewards/accuracies": 0.8125, "rewards/chosen": -0.061600759625434875, "rewards/margins": 1.8637723922729492, "rewards/rejected": -1.9253731966018677, "step": 150 }, { "epoch": 0.23, "learning_rate": 9.993647399178123e-07, "logits/chosen": -1.1675657033920288, "logits/rejected": -1.0918349027633667, "logps/chosen": -59.07560348510742, "logps/rejected": -112.66256713867188, "loss": 0.368, "rewards/accuracies": 0.875, "rewards/chosen": -0.7189707159996033, "rewards/margins": 2.3366379737854004, "rewards/rejected": -3.0556085109710693, "step": 151 }, { "epoch": 0.23, "learning_rate": 9.992961268186572e-07, "logits/chosen": -1.3458789587020874, "logits/rejected": -1.322076439857483, "logps/chosen": -50.53932571411133, "logps/rejected": -84.67821502685547, "loss": 0.3689, "rewards/accuracies": 0.875, "rewards/chosen": 0.14475339651107788, "rewards/margins": 1.5784586668014526, "rewards/rejected": -1.43370521068573, "step": 152 }, { "epoch": 0.23, "learning_rate": 9.992239984851562e-07, "logits/chosen": -1.5444601774215698, "logits/rejected": -1.5224881172180176, "logps/chosen": -65.60322570800781, "logps/rejected": -96.27873229980469, "loss": 0.3173, "rewards/accuracies": 0.875, "rewards/chosen": -0.181745707988739, "rewards/margins": 1.8834576606750488, "rewards/rejected": -2.0652034282684326, "step": 153 }, { "epoch": 0.23, "learning_rate": 9.9914835542512e-07, "logits/chosen": -1.2116072177886963, "logits/rejected": -1.2211272716522217, "logps/chosen": -66.64263916015625, "logps/rejected": -97.93750762939453, "loss": 0.3044, "rewards/accuracies": 0.875, "rewards/chosen": 0.26799696683883667, "rewards/margins": 2.5864644050598145, "rewards/rejected": -2.318467617034912, "step": 154 }, { "epoch": 0.24, "learning_rate": 9.990691981711042e-07, "logits/chosen": -1.2259119749069214, "logits/rejected": -1.3609488010406494, "logps/chosen": -55.120872497558594, "logps/rejected": -86.47576141357422, "loss": 0.3215, "rewards/accuracies": 0.875, "rewards/chosen": 0.20785972476005554, "rewards/margins": 2.307239532470703, "rewards/rejected": -2.0993800163269043, "step": 155 }, { "epoch": 0.24, "learning_rate": 9.989865272804063e-07, "logits/chosen": -1.2843170166015625, "logits/rejected": -1.2973507642745972, "logps/chosen": -68.40337371826172, "logps/rejected": -103.91558074951172, "loss": 0.2876, "rewards/accuracies": 0.875, "rewards/chosen": -0.2823537588119507, "rewards/margins": 2.520174026489258, "rewards/rejected": -2.802527666091919, "step": 156 }, { "epoch": 0.24, "learning_rate": 9.989003433350606e-07, "logits/chosen": -1.3156423568725586, "logits/rejected": -1.2813544273376465, "logps/chosen": -49.433929443359375, "logps/rejected": -66.11580657958984, "loss": 0.4019, "rewards/accuracies": 0.6875, "rewards/chosen": 0.211796373128891, "rewards/margins": 1.0712887048721313, "rewards/rejected": -0.859492301940918, "step": 157 }, { "epoch": 0.24, "learning_rate": 9.988106469418345e-07, "logits/chosen": -1.396899700164795, "logits/rejected": -1.3826349973678589, "logps/chosen": -49.73061752319336, "logps/rejected": -65.69740295410156, "loss": 0.3342, "rewards/accuracies": 0.9375, "rewards/chosen": 0.2982715368270874, "rewards/margins": 1.3624143600463867, "rewards/rejected": -1.0641428232192993, "step": 158 }, { "epoch": 0.24, "learning_rate": 9.98717438732225e-07, "logits/chosen": -1.414467692375183, "logits/rejected": -1.382720708847046, "logps/chosen": -55.65098571777344, "logps/rejected": -86.2964859008789, "loss": 0.2899, "rewards/accuracies": 1.0, "rewards/chosen": -0.11677652597427368, "rewards/margins": 2.016666889190674, "rewards/rejected": -2.1334433555603027, "step": 159 }, { "epoch": 0.24, "learning_rate": 9.986207193624536e-07, "logits/chosen": -1.189499020576477, "logits/rejected": -1.2207542657852173, "logps/chosen": -56.323951721191406, "logps/rejected": -104.06111145019531, "loss": 0.2893, "rewards/accuracies": 0.75, "rewards/chosen": 0.07249424606561661, "rewards/margins": 2.821072816848755, "rewards/rejected": -2.7485785484313965, "step": 160 }, { "epoch": 0.24, "learning_rate": 9.985204895134607e-07, "logits/chosen": -1.0789204835891724, "logits/rejected": -1.0679121017456055, "logps/chosen": -60.86206817626953, "logps/rejected": -104.61509704589844, "loss": 0.3352, "rewards/accuracies": 0.8125, "rewards/chosen": 0.19751030206680298, "rewards/margins": 2.8937127590179443, "rewards/rejected": -2.696202516555786, "step": 161 }, { "epoch": 0.25, "learning_rate": 9.98416749890903e-07, "logits/chosen": -1.1677380800247192, "logits/rejected": -1.162474274635315, "logps/chosen": -74.80203247070312, "logps/rejected": -125.43745422363281, "loss": 0.3155, "rewards/accuracies": 0.75, "rewards/chosen": -0.34360015392303467, "rewards/margins": 3.467597723007202, "rewards/rejected": -3.8111977577209473, "step": 162 }, { "epoch": 0.25, "learning_rate": 9.983095012251467e-07, "logits/chosen": -1.2328966856002808, "logits/rejected": -1.223034381866455, "logps/chosen": -50.8116455078125, "logps/rejected": -91.66698455810547, "loss": 0.276, "rewards/accuracies": 0.9375, "rewards/chosen": 0.16697055101394653, "rewards/margins": 2.390049695968628, "rewards/rejected": -2.223079204559326, "step": 163 }, { "epoch": 0.25, "learning_rate": 9.98198744271263e-07, "logits/chosen": -1.2168715000152588, "logits/rejected": -1.3405088186264038, "logps/chosen": -74.32440185546875, "logps/rejected": -107.49734497070312, "loss": 0.3153, "rewards/accuracies": 0.9375, "rewards/chosen": -0.514243483543396, "rewards/margins": 3.4297971725463867, "rewards/rejected": -3.9440410137176514, "step": 164 }, { "epoch": 0.25, "learning_rate": 9.980844798090233e-07, "logits/chosen": -1.2657420635223389, "logits/rejected": -1.2185579538345337, "logps/chosen": -87.26468658447266, "logps/rejected": -117.57492065429688, "loss": 0.3192, "rewards/accuracies": 0.9375, "rewards/chosen": -1.79978346824646, "rewards/margins": 2.257918357849121, "rewards/rejected": -4.05770206451416, "step": 165 }, { "epoch": 0.25, "learning_rate": 9.979667086428925e-07, "logits/chosen": -1.0757566690444946, "logits/rejected": -1.0354247093200684, "logps/chosen": -57.32990646362305, "logps/rejected": -102.35804748535156, "loss": 0.3785, "rewards/accuracies": 0.75, "rewards/chosen": -0.5685438513755798, "rewards/margins": 2.6311147212982178, "rewards/rejected": -3.1996583938598633, "step": 166 }, { "epoch": 0.25, "learning_rate": 9.978454316020244e-07, "logits/chosen": -1.0912585258483887, "logits/rejected": -1.1047651767730713, "logps/chosen": -78.06692504882812, "logps/rejected": -97.17657470703125, "loss": 0.3359, "rewards/accuracies": 0.75, "rewards/chosen": -1.1147944927215576, "rewards/margins": 1.6447203159332275, "rewards/rejected": -2.759514808654785, "step": 167 }, { "epoch": 0.26, "learning_rate": 9.977206495402552e-07, "logits/chosen": -1.2980186939239502, "logits/rejected": -1.243241786956787, "logps/chosen": -57.57561111450195, "logps/rejected": -105.88298034667969, "loss": 0.2606, "rewards/accuracies": 0.875, "rewards/chosen": -0.19986918568611145, "rewards/margins": 2.2533702850341797, "rewards/rejected": -2.453239679336548, "step": 168 }, { "epoch": 0.26, "learning_rate": 9.975923633360984e-07, "logits/chosen": -1.2510402202606201, "logits/rejected": -1.2807258367538452, "logps/chosen": -72.90382385253906, "logps/rejected": -104.51939392089844, "loss": 0.3472, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4487581253051758, "rewards/margins": 3.244266986846924, "rewards/rejected": -3.6930251121520996, "step": 169 }, { "epoch": 0.26, "learning_rate": 9.974605738927374e-07, "logits/chosen": -1.0293492078781128, "logits/rejected": -1.0240594148635864, "logps/chosen": -54.199405670166016, "logps/rejected": -85.39093780517578, "loss": 0.3387, "rewards/accuracies": 0.875, "rewards/chosen": 0.00886218249797821, "rewards/margins": 2.4028754234313965, "rewards/rejected": -2.3940131664276123, "step": 170 }, { "epoch": 0.26, "learning_rate": 9.973252821380198e-07, "logits/chosen": -1.195522665977478, "logits/rejected": -1.2614532709121704, "logps/chosen": -57.30488967895508, "logps/rejected": -89.09839630126953, "loss": 0.3472, "rewards/accuracies": 0.875, "rewards/chosen": 0.08645845949649811, "rewards/margins": 2.9136736392974854, "rewards/rejected": -2.8272151947021484, "step": 171 }, { "epoch": 0.26, "learning_rate": 9.971864890244513e-07, "logits/chosen": -1.032387137413025, "logits/rejected": -0.9830933809280396, "logps/chosen": -81.36624145507812, "logps/rejected": -131.3253936767578, "loss": 0.276, "rewards/accuracies": 0.875, "rewards/chosen": -1.3707561492919922, "rewards/margins": 2.856058359146118, "rewards/rejected": -4.226814270019531, "step": 172 }, { "epoch": 0.26, "learning_rate": 9.970441955291877e-07, "logits/chosen": -1.1625866889953613, "logits/rejected": -1.2023017406463623, "logps/chosen": -70.9615707397461, "logps/rejected": -101.77046966552734, "loss": 0.2518, "rewards/accuracies": 0.9375, "rewards/chosen": -0.08144064992666245, "rewards/margins": 2.5768473148345947, "rewards/rejected": -2.65828800201416, "step": 173 }, { "epoch": 0.26, "learning_rate": 9.968984026540296e-07, "logits/chosen": -1.0575189590454102, "logits/rejected": -1.1296477317810059, "logps/chosen": -44.96015167236328, "logps/rejected": -86.93184661865234, "loss": 0.2624, "rewards/accuracies": 0.9375, "rewards/chosen": 0.8304497599601746, "rewards/margins": 3.159740686416626, "rewards/rejected": -2.3292911052703857, "step": 174 }, { "epoch": 0.27, "learning_rate": 9.96749111425414e-07, "logits/chosen": -1.0364952087402344, "logits/rejected": -1.1027450561523438, "logps/chosen": -69.6294937133789, "logps/rejected": -111.92215728759766, "loss": 0.2836, "rewards/accuracies": 0.875, "rewards/chosen": -0.2097761183977127, "rewards/margins": 2.9586079120635986, "rewards/rejected": -3.168384313583374, "step": 175 }, { "epoch": 0.27, "learning_rate": 9.965963228944076e-07, "logits/chosen": -1.1743711233139038, "logits/rejected": -1.1658631563186646, "logps/chosen": -57.486873626708984, "logps/rejected": -104.13549041748047, "loss": 0.2484, "rewards/accuracies": 1.0, "rewards/chosen": 0.3816438317298889, "rewards/margins": 3.8634872436523438, "rewards/rejected": -3.4818437099456787, "step": 176 }, { "epoch": 0.27, "learning_rate": 9.964400381367002e-07, "logits/chosen": -1.299519658088684, "logits/rejected": -1.297895908355713, "logps/chosen": -53.962806701660156, "logps/rejected": -98.18833923339844, "loss": 0.2612, "rewards/accuracies": 0.875, "rewards/chosen": 0.29737111926078796, "rewards/margins": 3.225770950317383, "rewards/rejected": -2.9283993244171143, "step": 177 }, { "epoch": 0.27, "learning_rate": 9.962802582525957e-07, "logits/chosen": -1.2027112245559692, "logits/rejected": -1.1979259252548218, "logps/chosen": -52.533470153808594, "logps/rejected": -99.93737030029297, "loss": 0.3, "rewards/accuracies": 0.875, "rewards/chosen": 0.5650640726089478, "rewards/margins": 3.342776298522949, "rewards/rejected": -2.777712106704712, "step": 178 }, { "epoch": 0.27, "learning_rate": 9.96116984367005e-07, "logits/chosen": -1.3352652788162231, "logits/rejected": -1.313057541847229, "logps/chosen": -60.1168327331543, "logps/rejected": -139.58270263671875, "loss": 0.376, "rewards/accuracies": 0.9375, "rewards/chosen": 0.5508058667182922, "rewards/margins": 4.521890163421631, "rewards/rejected": -3.9710841178894043, "step": 179 }, { "epoch": 0.27, "learning_rate": 9.959502176294382e-07, "logits/chosen": -1.3684498071670532, "logits/rejected": -1.4441702365875244, "logps/chosen": -47.63343811035156, "logps/rejected": -90.16297912597656, "loss": 0.2907, "rewards/accuracies": 0.9375, "rewards/chosen": 0.8288148641586304, "rewards/margins": 3.3544418811798096, "rewards/rejected": -2.525627374649048, "step": 180 }, { "epoch": 0.27, "learning_rate": 9.95779959213997e-07, "logits/chosen": -1.2334184646606445, "logits/rejected": -1.2272242307662964, "logps/chosen": -55.64075469970703, "logps/rejected": -104.79720306396484, "loss": 0.2692, "rewards/accuracies": 0.9375, "rewards/chosen": 0.4517117738723755, "rewards/margins": 3.394285202026367, "rewards/rejected": -2.9425735473632812, "step": 181 }, { "epoch": 0.28, "learning_rate": 9.956062103193646e-07, "logits/chosen": -1.1894168853759766, "logits/rejected": -1.1877219676971436, "logps/chosen": -70.23963928222656, "logps/rejected": -126.3914794921875, "loss": 0.2532, "rewards/accuracies": 0.875, "rewards/chosen": -0.06568023562431335, "rewards/margins": 3.421466588973999, "rewards/rejected": -3.487147092819214, "step": 182 }, { "epoch": 0.28, "learning_rate": 9.954289721687996e-07, "logits/chosen": -0.8898589015007019, "logits/rejected": -0.928949773311615, "logps/chosen": -75.32648468017578, "logps/rejected": -101.60359191894531, "loss": 0.2379, "rewards/accuracies": 0.875, "rewards/chosen": -0.34705063700675964, "rewards/margins": 2.680817127227783, "rewards/rejected": -3.0278680324554443, "step": 183 }, { "epoch": 0.28, "learning_rate": 9.95248246010126e-07, "logits/chosen": -1.1582351922988892, "logits/rejected": -1.1300815343856812, "logps/chosen": -68.10409545898438, "logps/rejected": -137.97059631347656, "loss": 0.2815, "rewards/accuracies": 0.9375, "rewards/chosen": -0.38671061396598816, "rewards/margins": 4.390896797180176, "rewards/rejected": -4.777607440948486, "step": 184 }, { "epoch": 0.28, "learning_rate": 9.95064033115724e-07, "logits/chosen": -1.1521022319793701, "logits/rejected": -1.104382872581482, "logps/chosen": -68.8747329711914, "logps/rejected": -105.67164611816406, "loss": 0.3112, "rewards/accuracies": 0.75, "rewards/chosen": -0.5325960516929626, "rewards/margins": 2.1952645778656006, "rewards/rejected": -2.727860689163208, "step": 185 }, { "epoch": 0.28, "learning_rate": 9.948763347825228e-07, "logits/chosen": -0.9579401016235352, "logits/rejected": -0.9352121949195862, "logps/chosen": -47.36273193359375, "logps/rejected": -93.3355941772461, "loss": 0.3195, "rewards/accuracies": 0.8125, "rewards/chosen": -0.1721753031015396, "rewards/margins": 3.135554313659668, "rewards/rejected": -3.307729721069336, "step": 186 }, { "epoch": 0.28, "learning_rate": 9.946851523319902e-07, "logits/chosen": -1.2986584901809692, "logits/rejected": -1.3009908199310303, "logps/chosen": -62.94734573364258, "logps/rejected": -113.10549926757812, "loss": 0.28, "rewards/accuracies": 1.0, "rewards/chosen": -0.2780383825302124, "rewards/margins": 3.869349241256714, "rewards/rejected": -4.147387504577637, "step": 187 }, { "epoch": 0.29, "learning_rate": 9.944904871101226e-07, "logits/chosen": -1.2574782371520996, "logits/rejected": -1.2606135606765747, "logps/chosen": -70.70069885253906, "logps/rejected": -127.10391998291016, "loss": 0.1957, "rewards/accuracies": 1.0, "rewards/chosen": -0.3717065453529358, "rewards/margins": 3.6318910121917725, "rewards/rejected": -4.003597259521484, "step": 188 }, { "epoch": 0.29, "learning_rate": 9.942923404874375e-07, "logits/chosen": -1.2451214790344238, "logits/rejected": -1.2099590301513672, "logps/chosen": -77.06783294677734, "logps/rejected": -159.39584350585938, "loss": 0.2788, "rewards/accuracies": 0.875, "rewards/chosen": -0.39847332239151, "rewards/margins": 5.332844257354736, "rewards/rejected": -5.731317520141602, "step": 189 }, { "epoch": 0.29, "learning_rate": 9.940907138589622e-07, "logits/chosen": -1.1888874769210815, "logits/rejected": -1.2535089254379272, "logps/chosen": -50.92986297607422, "logps/rejected": -92.7094955444336, "loss": 0.2835, "rewards/accuracies": 0.9375, "rewards/chosen": -0.09499190747737885, "rewards/margins": 3.2975873947143555, "rewards/rejected": -3.3925790786743164, "step": 190 }, { "epoch": 0.29, "learning_rate": 9.93885608644225e-07, "logits/chosen": -1.2434340715408325, "logits/rejected": -1.249043583869934, "logps/chosen": -60.137359619140625, "logps/rejected": -101.47135925292969, "loss": 0.2708, "rewards/accuracies": 0.6875, "rewards/chosen": 0.4408572316169739, "rewards/margins": 3.2096920013427734, "rewards/rejected": -2.768834352493286, "step": 191 }, { "epoch": 0.29, "learning_rate": 9.936770262872443e-07, "logits/chosen": -1.2576887607574463, "logits/rejected": -1.2570699453353882, "logps/chosen": -64.34721374511719, "logps/rejected": -108.09801483154297, "loss": 0.3193, "rewards/accuracies": 0.875, "rewards/chosen": 0.3119131922721863, "rewards/margins": 3.3604087829589844, "rewards/rejected": -3.048495292663574, "step": 192 }, { "epoch": 0.29, "learning_rate": 9.934649682565191e-07, "logits/chosen": -1.1765260696411133, "logits/rejected": -1.2171003818511963, "logps/chosen": -54.831790924072266, "logps/rejected": -84.8888931274414, "loss": 0.2554, "rewards/accuracies": 0.75, "rewards/chosen": 0.1674397885799408, "rewards/margins": 2.4152469635009766, "rewards/rejected": -2.247807264328003, "step": 193 }, { "epoch": 0.29, "learning_rate": 9.932494360450184e-07, "logits/chosen": -1.1340199708938599, "logits/rejected": -1.1504125595092773, "logps/chosen": -70.6257095336914, "logps/rejected": -103.02034759521484, "loss": 0.242, "rewards/accuracies": 0.9375, "rewards/chosen": 0.10550439357757568, "rewards/margins": 3.1961519718170166, "rewards/rejected": -3.0906476974487305, "step": 194 }, { "epoch": 0.3, "learning_rate": 9.930304311701708e-07, "logits/chosen": -1.1937754154205322, "logits/rejected": -1.2025905847549438, "logps/chosen": -50.76273727416992, "logps/rejected": -83.76286315917969, "loss": 0.2905, "rewards/accuracies": 0.875, "rewards/chosen": 0.6456944346427917, "rewards/margins": 2.7552313804626465, "rewards/rejected": -2.109536647796631, "step": 195 }, { "epoch": 0.3, "learning_rate": 9.928079551738541e-07, "logits/chosen": -1.2389689683914185, "logits/rejected": -1.2017418146133423, "logps/chosen": -63.504573822021484, "logps/rejected": -107.82794952392578, "loss": 0.2614, "rewards/accuracies": 0.875, "rewards/chosen": 0.2145373821258545, "rewards/margins": 2.6952617168426514, "rewards/rejected": -2.480724334716797, "step": 196 }, { "epoch": 0.3, "learning_rate": 9.925820096223836e-07, "logits/chosen": -1.2189017534255981, "logits/rejected": -1.194325566291809, "logps/chosen": -69.2046127319336, "logps/rejected": -106.79684448242188, "loss": 0.3245, "rewards/accuracies": 0.875, "rewards/chosen": -0.10513018071651459, "rewards/margins": 2.890178918838501, "rewards/rejected": -2.9953088760375977, "step": 197 }, { "epoch": 0.3, "learning_rate": 9.923525961065017e-07, "logits/chosen": -1.2128559350967407, "logits/rejected": -1.1879463195800781, "logps/chosen": -47.020294189453125, "logps/rejected": -67.4661636352539, "loss": 0.2451, "rewards/accuracies": 0.875, "rewards/chosen": -0.01765884831547737, "rewards/margins": 1.2542365789413452, "rewards/rejected": -1.271895408630371, "step": 198 }, { "epoch": 0.3, "learning_rate": 9.92119716241367e-07, "logits/chosen": -1.1332924365997314, "logits/rejected": -1.2118947505950928, "logps/chosen": -74.53262329101562, "logps/rejected": -114.64993286132812, "loss": 0.2726, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5381104946136475, "rewards/margins": 3.5477006435394287, "rewards/rejected": -4.085811138153076, "step": 199 }, { "epoch": 0.3, "learning_rate": 9.918833716665418e-07, "logits/chosen": -1.3250250816345215, "logits/rejected": -1.3644132614135742, "logps/chosen": -57.79987335205078, "logps/rejected": -91.33990478515625, "loss": 0.2522, "rewards/accuracies": 0.6875, "rewards/chosen": -0.40653082728385925, "rewards/margins": 2.3988020420074463, "rewards/rejected": -2.805332899093628, "step": 200 }, { "epoch": 0.31, "learning_rate": 9.916435640459816e-07, "logits/chosen": -1.116808295249939, "logits/rejected": -1.1277410984039307, "logps/chosen": -58.26860427856445, "logps/rejected": -125.79907989501953, "loss": 0.1899, "rewards/accuracies": 1.0, "rewards/chosen": 0.07275482267141342, "rewards/margins": 4.3385233879089355, "rewards/rejected": -4.265768051147461, "step": 201 }, { "epoch": 0.31, "learning_rate": 9.914002950680238e-07, "logits/chosen": -1.174288272857666, "logits/rejected": -1.2112677097320557, "logps/chosen": -67.45306396484375, "logps/rejected": -127.85252380371094, "loss": 0.243, "rewards/accuracies": 1.0, "rewards/chosen": -0.22681477665901184, "rewards/margins": 4.279499053955078, "rewards/rejected": -4.506314277648926, "step": 202 }, { "epoch": 0.31, "learning_rate": 9.911535664453736e-07, "logits/chosen": -1.1215676069259644, "logits/rejected": -1.160021185874939, "logps/chosen": -74.76897430419922, "logps/rejected": -114.86557006835938, "loss": 0.224, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1561341285705566, "rewards/margins": 3.272386074066162, "rewards/rejected": -4.428520202636719, "step": 203 }, { "epoch": 0.31, "learning_rate": 9.909033799150946e-07, "logits/chosen": -1.0842782258987427, "logits/rejected": -1.1059097051620483, "logps/chosen": -53.11720657348633, "logps/rejected": -105.79139709472656, "loss": 0.3057, "rewards/accuracies": 0.9375, "rewards/chosen": 0.6020134687423706, "rewards/margins": 4.452319622039795, "rewards/rejected": -3.850306272506714, "step": 204 }, { "epoch": 0.31, "learning_rate": 9.906497372385948e-07, "logits/chosen": -1.105309247970581, "logits/rejected": -1.138174057006836, "logps/chosen": -44.412696838378906, "logps/rejected": -71.90165710449219, "loss": 0.2139, "rewards/accuracies": 1.0, "rewards/chosen": 0.4434084892272949, "rewards/margins": 2.3051414489746094, "rewards/rejected": -1.8617329597473145, "step": 205 }, { "epoch": 0.31, "learning_rate": 9.90392640201615e-07, "logits/chosen": -1.3656604290008545, "logits/rejected": -1.3901586532592773, "logps/chosen": -75.88610076904297, "logps/rejected": -128.65650939941406, "loss": 0.2505, "rewards/accuracies": 0.875, "rewards/chosen": -0.6848592162132263, "rewards/margins": 3.7529115676879883, "rewards/rejected": -4.437770843505859, "step": 206 }, { "epoch": 0.31, "learning_rate": 9.901320906142164e-07, "logits/chosen": -1.0678926706314087, "logits/rejected": -1.1116093397140503, "logps/chosen": -58.641170501708984, "logps/rejected": -92.04139709472656, "loss": 0.3181, "rewards/accuracies": 1.0, "rewards/chosen": 0.1833564043045044, "rewards/margins": 3.2483644485473633, "rewards/rejected": -3.0650081634521484, "step": 207 }, { "epoch": 0.32, "learning_rate": 9.898680903107666e-07, "logits/chosen": -1.4462071657180786, "logits/rejected": -1.4025518894195557, "logps/chosen": -52.31578826904297, "logps/rejected": -99.5600357055664, "loss": 0.2927, "rewards/accuracies": 0.875, "rewards/chosen": -0.09301090240478516, "rewards/margins": 3.4032158851623535, "rewards/rejected": -3.4962263107299805, "step": 208 }, { "epoch": 0.32, "learning_rate": 9.89600641149928e-07, "logits/chosen": -1.1321766376495361, "logits/rejected": -1.1258957386016846, "logps/chosen": -60.00716018676758, "logps/rejected": -103.17889404296875, "loss": 0.2202, "rewards/accuracies": 0.875, "rewards/chosen": 0.041563332080841064, "rewards/margins": 3.4386918544769287, "rewards/rejected": -3.3971283435821533, "step": 209 }, { "epoch": 0.32, "learning_rate": 9.893297450146444e-07, "logits/chosen": -1.1479976177215576, "logits/rejected": -1.25742506980896, "logps/chosen": -47.2739372253418, "logps/rejected": -67.57801818847656, "loss": 0.2437, "rewards/accuracies": 0.875, "rewards/chosen": 0.7823903560638428, "rewards/margins": 2.870223045349121, "rewards/rejected": -2.0878329277038574, "step": 210 }, { "epoch": 0.32, "learning_rate": 9.890554038121272e-07, "logits/chosen": -1.3039093017578125, "logits/rejected": -1.329890489578247, "logps/chosen": -83.64131164550781, "logps/rejected": -140.93768310546875, "loss": 0.1856, "rewards/accuracies": 1.0, "rewards/chosen": -0.3537253439426422, "rewards/margins": 4.876173973083496, "rewards/rejected": -5.229898929595947, "step": 211 }, { "epoch": 0.32, "learning_rate": 9.887776194738431e-07, "logits/chosen": -1.1970943212509155, "logits/rejected": -1.1890867948532104, "logps/chosen": -50.33533477783203, "logps/rejected": -102.55012512207031, "loss": 0.2216, "rewards/accuracies": 0.9375, "rewards/chosen": 0.2218342274427414, "rewards/margins": 4.181835174560547, "rewards/rejected": -3.960000991821289, "step": 212 }, { "epoch": 0.32, "learning_rate": 9.88496393955499e-07, "logits/chosen": -1.1353914737701416, "logits/rejected": -1.090192198753357, "logps/chosen": -60.373779296875, "logps/rejected": -134.13621520996094, "loss": 0.2127, "rewards/accuracies": 0.9375, "rewards/chosen": 0.05712069571018219, "rewards/margins": 5.0528154373168945, "rewards/rejected": -4.995695114135742, "step": 213 }, { "epoch": 0.33, "learning_rate": 9.882117292370295e-07, "logits/chosen": -1.2194281816482544, "logits/rejected": -1.2036105394363403, "logps/chosen": -57.46979904174805, "logps/rejected": -101.6165542602539, "loss": 0.2892, "rewards/accuracies": 0.9375, "rewards/chosen": 0.1329529583454132, "rewards/margins": 3.2106685638427734, "rewards/rejected": -3.0777151584625244, "step": 214 }, { "epoch": 0.33, "learning_rate": 9.87923627322582e-07, "logits/chosen": -1.1778866052627563, "logits/rejected": -1.2296476364135742, "logps/chosen": -68.89556121826172, "logps/rejected": -125.4426498413086, "loss": 0.2999, "rewards/accuracies": 0.9375, "rewards/chosen": -0.49690526723861694, "rewards/margins": 4.3306884765625, "rewards/rejected": -4.827593803405762, "step": 215 }, { "epoch": 0.33, "learning_rate": 9.87632090240504e-07, "logits/chosen": -1.0935524702072144, "logits/rejected": -1.1171512603759766, "logps/chosen": -55.67235565185547, "logps/rejected": -103.5525131225586, "loss": 0.2469, "rewards/accuracies": 0.875, "rewards/chosen": 0.23011675477027893, "rewards/margins": 4.043491840362549, "rewards/rejected": -3.8133747577667236, "step": 216 }, { "epoch": 0.33, "learning_rate": 9.873371200433268e-07, "logits/chosen": -1.3669008016586304, "logits/rejected": -1.4004020690917969, "logps/chosen": -75.74365234375, "logps/rejected": -142.23971557617188, "loss": 0.2561, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0013476610183716, "rewards/margins": 4.953007221221924, "rewards/rejected": -5.954354763031006, "step": 217 }, { "epoch": 0.33, "learning_rate": 9.87038718807753e-07, "logits/chosen": -1.1259562969207764, "logits/rejected": -1.133103370666504, "logps/chosen": -50.491188049316406, "logps/rejected": -103.2459716796875, "loss": 0.2994, "rewards/accuracies": 1.0, "rewards/chosen": -0.14921081066131592, "rewards/margins": 3.5174503326416016, "rewards/rejected": -3.666661024093628, "step": 218 }, { "epoch": 0.33, "learning_rate": 9.867368886346399e-07, "logits/chosen": -1.2843149900436401, "logits/rejected": -1.2164344787597656, "logps/chosen": -56.01287078857422, "logps/rejected": -117.17066955566406, "loss": 0.2595, "rewards/accuracies": 0.8125, "rewards/chosen": 0.04519526660442352, "rewards/margins": 3.4883525371551514, "rewards/rejected": -3.443157196044922, "step": 219 }, { "epoch": 0.33, "learning_rate": 9.864316316489872e-07, "logits/chosen": -1.1004031896591187, "logits/rejected": -1.053981065750122, "logps/chosen": -57.81025314331055, "logps/rejected": -111.32896423339844, "loss": 0.3343, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6295335292816162, "rewards/margins": 3.3364522457122803, "rewards/rejected": -3.9659860134124756, "step": 220 }, { "epoch": 0.34, "learning_rate": 9.8612294999992e-07, "logits/chosen": -1.2155131101608276, "logits/rejected": -1.3413316011428833, "logps/chosen": -44.69001770019531, "logps/rejected": -102.89868927001953, "loss": 0.2707, "rewards/accuracies": 1.0, "rewards/chosen": 0.5029670000076294, "rewards/margins": 4.66603422164917, "rewards/rejected": -4.16306734085083, "step": 221 }, { "epoch": 0.34, "learning_rate": 9.858108458606738e-07, "logits/chosen": -1.024707317352295, "logits/rejected": -1.0007303953170776, "logps/chosen": -50.13225173950195, "logps/rejected": -92.38772583007812, "loss": 0.2625, "rewards/accuracies": 0.875, "rewards/chosen": 0.6516220569610596, "rewards/margins": 3.045931816101074, "rewards/rejected": -2.3943097591400146, "step": 222 }, { "epoch": 0.34, "learning_rate": 9.854953214285807e-07, "logits/chosen": -1.240480899810791, "logits/rejected": -1.207828402519226, "logps/chosen": -60.037818908691406, "logps/rejected": -124.46925354003906, "loss": 0.3404, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1628638654947281, "rewards/margins": 4.1099772453308105, "rewards/rejected": -4.272841453552246, "step": 223 }, { "epoch": 0.34, "learning_rate": 9.851763789250525e-07, "logits/chosen": -1.2094461917877197, "logits/rejected": -1.18070650100708, "logps/chosen": -48.78962326049805, "logps/rejected": -94.73063659667969, "loss": 0.1944, "rewards/accuracies": 0.9375, "rewards/chosen": 0.5968643426895142, "rewards/margins": 3.2663774490356445, "rewards/rejected": -2.669512987136841, "step": 224 }, { "epoch": 0.34, "learning_rate": 9.848540205955653e-07, "logits/chosen": -1.097152590751648, "logits/rejected": -1.0911446809768677, "logps/chosen": -64.8131103515625, "logps/rejected": -99.00874328613281, "loss": 0.2257, "rewards/accuracies": 0.875, "rewards/chosen": -0.23178008198738098, "rewards/margins": 3.4839677810668945, "rewards/rejected": -3.7157483100891113, "step": 225 }, { "epoch": 0.34, "learning_rate": 9.845282487096447e-07, "logits/chosen": -1.266671061515808, "logits/rejected": -1.2435628175735474, "logps/chosen": -64.22138977050781, "logps/rejected": -128.06129455566406, "loss": 0.2491, "rewards/accuracies": 1.0, "rewards/chosen": 0.2566155195236206, "rewards/margins": 4.87740421295166, "rewards/rejected": -4.620787620544434, "step": 226 }, { "epoch": 0.34, "learning_rate": 9.841990655608478e-07, "logits/chosen": -1.2105844020843506, "logits/rejected": -1.2728626728057861, "logps/chosen": -55.29273986816406, "logps/rejected": -86.1402359008789, "loss": 0.2898, "rewards/accuracies": 0.9375, "rewards/chosen": 0.6476824283599854, "rewards/margins": 3.0936102867126465, "rewards/rejected": -2.445927858352661, "step": 227 }, { "epoch": 0.35, "learning_rate": 9.838664734667495e-07, "logits/chosen": -1.390692114830017, "logits/rejected": -1.3265589475631714, "logps/chosen": -64.66275024414062, "logps/rejected": -111.31877136230469, "loss": 0.2887, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8149236440658569, "rewards/margins": 2.563906192779541, "rewards/rejected": -3.3788299560546875, "step": 228 }, { "epoch": 0.35, "learning_rate": 9.83530474768924e-07, "logits/chosen": -1.4756298065185547, "logits/rejected": -1.4475144147872925, "logps/chosen": -56.14897155761719, "logps/rejected": -102.84004974365234, "loss": 0.2724, "rewards/accuracies": 0.875, "rewards/chosen": -0.16208699345588684, "rewards/margins": 3.177507162094116, "rewards/rejected": -3.3395941257476807, "step": 229 }, { "epoch": 0.35, "learning_rate": 9.831910718329301e-07, "logits/chosen": -1.2161978483200073, "logits/rejected": -1.305418848991394, "logps/chosen": -67.6595687866211, "logps/rejected": -103.6363754272461, "loss": 0.3052, "rewards/accuracies": 0.625, "rewards/chosen": -0.19699662923812866, "rewards/margins": 3.100790500640869, "rewards/rejected": -3.2977874279022217, "step": 230 }, { "epoch": 0.35, "learning_rate": 9.828482670482934e-07, "logits/chosen": -1.1499532461166382, "logits/rejected": -1.1783616542816162, "logps/chosen": -52.1451416015625, "logps/rejected": -112.85889434814453, "loss": 0.2487, "rewards/accuracies": 1.0, "rewards/chosen": 0.501251757144928, "rewards/margins": 4.617308139801025, "rewards/rejected": -4.116055965423584, "step": 231 }, { "epoch": 0.35, "learning_rate": 9.825020628284895e-07, "logits/chosen": -1.0289162397384644, "logits/rejected": -1.109811544418335, "logps/chosen": -46.92026138305664, "logps/rejected": -62.872901916503906, "loss": 0.2997, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0598130077123642, "rewards/margins": 1.7254122495651245, "rewards/rejected": -1.78522527217865, "step": 232 }, { "epoch": 0.35, "learning_rate": 9.821524616109275e-07, "logits/chosen": -1.3550623655319214, "logits/rejected": -1.3795663118362427, "logps/chosen": -51.85411071777344, "logps/rejected": -72.90492248535156, "loss": 0.2706, "rewards/accuracies": 0.875, "rewards/chosen": -0.14976592361927032, "rewards/margins": 2.097384214401245, "rewards/rejected": -2.24714994430542, "step": 233 }, { "epoch": 0.36, "learning_rate": 9.817994658569332e-07, "logits/chosen": -1.3676050901412964, "logits/rejected": -1.3363038301467896, "logps/chosen": -64.43699645996094, "logps/rejected": -100.00003814697266, "loss": 0.2658, "rewards/accuracies": 0.8125, "rewards/chosen": -0.08322476595640182, "rewards/margins": 2.7351748943328857, "rewards/rejected": -2.818399429321289, "step": 234 }, { "epoch": 0.36, "learning_rate": 9.814430780517304e-07, "logits/chosen": -1.149186372756958, "logits/rejected": -1.2141780853271484, "logps/chosen": -55.30868911743164, "logps/rejected": -103.93698120117188, "loss": 0.1667, "rewards/accuracies": 1.0, "rewards/chosen": 0.1783062219619751, "rewards/margins": 3.7370057106018066, "rewards/rejected": -3.558699131011963, "step": 235 }, { "epoch": 0.36, "learning_rate": 9.810833007044246e-07, "logits/chosen": -1.3775757551193237, "logits/rejected": -1.3689650297164917, "logps/chosen": -47.52128601074219, "logps/rejected": -105.66326904296875, "loss": 0.2027, "rewards/accuracies": 0.9375, "rewards/chosen": 0.23522144556045532, "rewards/margins": 3.8973565101623535, "rewards/rejected": -3.662134885787964, "step": 236 }, { "epoch": 0.36, "learning_rate": 9.80720136347985e-07, "logits/chosen": -1.1896703243255615, "logits/rejected": -1.1428377628326416, "logps/chosen": -67.61991119384766, "logps/rejected": -140.35009765625, "loss": 0.2339, "rewards/accuracies": 0.9375, "rewards/chosen": -0.19219180941581726, "rewards/margins": 5.137165546417236, "rewards/rejected": -5.329357147216797, "step": 237 }, { "epoch": 0.36, "learning_rate": 9.80353587539227e-07, "logits/chosen": -1.1677360534667969, "logits/rejected": -1.2059762477874756, "logps/chosen": -45.55229568481445, "logps/rejected": -69.67662048339844, "loss": 0.2635, "rewards/accuracies": 0.875, "rewards/chosen": 0.6372277736663818, "rewards/margins": 2.0313403606414795, "rewards/rejected": -1.394112467765808, "step": 238 }, { "epoch": 0.36, "learning_rate": 9.799836568587927e-07, "logits/chosen": -1.1546509265899658, "logits/rejected": -1.1933931112289429, "logps/chosen": -43.91341781616211, "logps/rejected": -63.79443359375, "loss": 0.2546, "rewards/accuracies": 0.75, "rewards/chosen": 0.706580400466919, "rewards/margins": 1.6172070503234863, "rewards/rejected": -0.9106266498565674, "step": 239 }, { "epoch": 0.36, "learning_rate": 9.796103469111349e-07, "logits/chosen": -1.25909423828125, "logits/rejected": -1.2822978496551514, "logps/chosen": -71.47257232666016, "logps/rejected": -104.23622131347656, "loss": 0.2619, "rewards/accuracies": 0.875, "rewards/chosen": -0.5179034471511841, "rewards/margins": 2.7368874549865723, "rewards/rejected": -3.254790782928467, "step": 240 }, { "epoch": 0.37, "learning_rate": 9.792336603244977e-07, "logits/chosen": -1.4345574378967285, "logits/rejected": -1.4878321886062622, "logps/chosen": -83.87324523925781, "logps/rejected": -128.76010131835938, "loss": 0.2103, "rewards/accuracies": 0.6875, "rewards/chosen": -0.596457839012146, "rewards/margins": 3.8281967639923096, "rewards/rejected": -4.424654960632324, "step": 241 }, { "epoch": 0.37, "learning_rate": 9.78853599750897e-07, "logits/chosen": -1.3332566022872925, "logits/rejected": -1.3457403182983398, "logps/chosen": -73.25218963623047, "logps/rejected": -116.97953033447266, "loss": 0.2221, "rewards/accuracies": 0.9375, "rewards/chosen": -0.44591355323791504, "rewards/margins": 4.094986915588379, "rewards/rejected": -4.540900707244873, "step": 242 }, { "epoch": 0.37, "learning_rate": 9.784701678661044e-07, "logits/chosen": -1.2335759401321411, "logits/rejected": -1.1885710954666138, "logps/chosen": -60.02530288696289, "logps/rejected": -103.60508728027344, "loss": 0.2076, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3031735420227051, "rewards/margins": 3.3130042552948, "rewards/rejected": -3.616178035736084, "step": 243 }, { "epoch": 0.37, "learning_rate": 9.780833673696254e-07, "logits/chosen": -1.1043503284454346, "logits/rejected": -1.105530023574829, "logps/chosen": -71.57035064697266, "logps/rejected": -129.08958435058594, "loss": 0.2174, "rewards/accuracies": 0.875, "rewards/chosen": -0.9568031430244446, "rewards/margins": 4.0563225746154785, "rewards/rejected": -5.013125419616699, "step": 244 }, { "epoch": 0.37, "learning_rate": 9.776932009846824e-07, "logits/chosen": -1.1673836708068848, "logits/rejected": -1.2674115896224976, "logps/chosen": -68.6380615234375, "logps/rejected": -120.55182647705078, "loss": 0.3264, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9497154951095581, "rewards/margins": 4.242981433868408, "rewards/rejected": -5.192696571350098, "step": 245 }, { "epoch": 0.37, "learning_rate": 9.772996714581956e-07, "logits/chosen": -1.362168788909912, "logits/rejected": -1.3981695175170898, "logps/chosen": -58.71257781982422, "logps/rejected": -120.30799865722656, "loss": 0.2208, "rewards/accuracies": 0.9375, "rewards/chosen": 0.5725867748260498, "rewards/margins": 5.25115442276001, "rewards/rejected": -4.678566932678223, "step": 246 }, { "epoch": 0.38, "learning_rate": 9.769027815607614e-07, "logits/chosen": -1.2044932842254639, "logits/rejected": -1.2403883934020996, "logps/chosen": -82.71932983398438, "logps/rejected": -135.8838348388672, "loss": 0.223, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8911706209182739, "rewards/margins": 4.8175129890441895, "rewards/rejected": -5.708683967590332, "step": 247 }, { "epoch": 0.38, "learning_rate": 9.76502534086636e-07, "logits/chosen": -1.0574793815612793, "logits/rejected": -1.0156506299972534, "logps/chosen": -56.54512405395508, "logps/rejected": -107.8660659790039, "loss": 0.2031, "rewards/accuracies": 1.0, "rewards/chosen": 0.5758581161499023, "rewards/margins": 3.545283317565918, "rewards/rejected": -2.9694252014160156, "step": 248 }, { "epoch": 0.38, "learning_rate": 9.760989318537132e-07, "logits/chosen": -1.0622340440750122, "logits/rejected": -1.083043098449707, "logps/chosen": -57.89778137207031, "logps/rejected": -117.26451873779297, "loss": 0.2621, "rewards/accuracies": 0.9375, "rewards/chosen": -0.18930283188819885, "rewards/margins": 4.901935577392578, "rewards/rejected": -5.091238021850586, "step": 249 }, { "epoch": 0.38, "learning_rate": 9.756919777035064e-07, "logits/chosen": -1.177422285079956, "logits/rejected": -1.177398920059204, "logps/chosen": -57.54464340209961, "logps/rejected": -103.27391052246094, "loss": 0.2271, "rewards/accuracies": 0.75, "rewards/chosen": -0.6480403542518616, "rewards/margins": 3.5536599159240723, "rewards/rejected": -4.201700210571289, "step": 250 }, { "epoch": 0.38, "learning_rate": 9.752816745011272e-07, "logits/chosen": -1.1077170372009277, "logits/rejected": -1.1722453832626343, "logps/chosen": -40.07292938232422, "logps/rejected": -72.1737060546875, "loss": 0.1764, "rewards/accuracies": 1.0, "rewards/chosen": 0.33403611183166504, "rewards/margins": 3.103440999984741, "rewards/rejected": -2.769404888153076, "step": 251 }, { "epoch": 0.38, "learning_rate": 9.748680251352658e-07, "logits/chosen": -1.0602819919586182, "logits/rejected": -1.024991750717163, "logps/chosen": -69.75623321533203, "logps/rejected": -145.09066772460938, "loss": 0.201, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4222402274608612, "rewards/margins": 5.389873027801514, "rewards/rejected": -5.812112808227539, "step": 252 }, { "epoch": 0.38, "learning_rate": 9.744510325181711e-07, "logits/chosen": -1.0655499696731567, "logits/rejected": -1.152011513710022, "logps/chosen": -72.12405395507812, "logps/rejected": -142.640625, "loss": 0.2077, "rewards/accuracies": 1.0, "rewards/chosen": -0.5751222372055054, "rewards/margins": 5.4383440017700195, "rewards/rejected": -6.0134663581848145, "step": 253 }, { "epoch": 0.39, "learning_rate": 9.740306995856293e-07, "logits/chosen": -1.2082499265670776, "logits/rejected": -1.2432973384857178, "logps/chosen": -50.88384246826172, "logps/rejected": -86.19475555419922, "loss": 0.2242, "rewards/accuracies": 0.75, "rewards/chosen": 0.03699183464050293, "rewards/margins": 3.2802982330322266, "rewards/rejected": -3.2433063983917236, "step": 254 }, { "epoch": 0.39, "learning_rate": 9.73607029296944e-07, "logits/chosen": -1.1245856285095215, "logits/rejected": -1.0869303941726685, "logps/chosen": -47.463680267333984, "logps/rejected": -108.2414779663086, "loss": 0.3035, "rewards/accuracies": 0.875, "rewards/chosen": 0.35966920852661133, "rewards/margins": 4.96986722946167, "rewards/rejected": -4.610198020935059, "step": 255 }, { "epoch": 0.39, "learning_rate": 9.731800246349147e-07, "logits/chosen": -1.1222176551818848, "logits/rejected": -1.070405125617981, "logps/chosen": -62.034725189208984, "logps/rejected": -150.425537109375, "loss": 0.2123, "rewards/accuracies": 0.875, "rewards/chosen": 0.1317824423313141, "rewards/margins": 5.905609130859375, "rewards/rejected": -5.773827075958252, "step": 256 }, { "epoch": 0.39, "learning_rate": 9.727496886058167e-07, "logits/chosen": -1.0427227020263672, "logits/rejected": -1.0754176378250122, "logps/chosen": -69.98577117919922, "logps/rejected": -121.99186706542969, "loss": 0.2175, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5424370765686035, "rewards/margins": 4.675653457641602, "rewards/rejected": -5.218091011047363, "step": 257 }, { "epoch": 0.39, "learning_rate": 9.723160242393786e-07, "logits/chosen": -1.2104133367538452, "logits/rejected": -1.2432409524917603, "logps/chosen": -60.75482940673828, "logps/rejected": -103.19210052490234, "loss": 0.1689, "rewards/accuracies": 0.9375, "rewards/chosen": -0.21256500482559204, "rewards/margins": 3.6828160285949707, "rewards/rejected": -3.895380973815918, "step": 258 }, { "epoch": 0.39, "learning_rate": 9.718790345887628e-07, "logits/chosen": -1.1556942462921143, "logits/rejected": -1.2230334281921387, "logps/chosen": -57.88835144042969, "logps/rejected": -95.50018310546875, "loss": 0.2567, "rewards/accuracies": 0.8125, "rewards/chosen": -0.3873271942138672, "rewards/margins": 3.189028739929199, "rewards/rejected": -3.5763559341430664, "step": 259 }, { "epoch": 0.39, "learning_rate": 9.71438722730542e-07, "logits/chosen": -1.127741813659668, "logits/rejected": -1.088086485862732, "logps/chosen": -68.16934204101562, "logps/rejected": -144.72586059570312, "loss": 0.2004, "rewards/accuracies": 0.9375, "rewards/chosen": -0.0546388179063797, "rewards/margins": 6.129822731018066, "rewards/rejected": -6.1844611167907715, "step": 260 }, { "epoch": 0.4, "learning_rate": 9.70995091764679e-07, "logits/chosen": -1.1576268672943115, "logits/rejected": -1.1194590330123901, "logps/chosen": -62.47630310058594, "logps/rejected": -138.0134735107422, "loss": 0.1726, "rewards/accuracies": 0.9375, "rewards/chosen": 0.17312532663345337, "rewards/margins": 5.465775489807129, "rewards/rejected": -5.29265022277832, "step": 261 }, { "epoch": 0.4, "learning_rate": 9.705481448145044e-07, "logits/chosen": -1.1115840673446655, "logits/rejected": -1.0342121124267578, "logps/chosen": -70.50397491455078, "logps/rejected": -139.21095275878906, "loss": 0.214, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6507506966590881, "rewards/margins": 5.1096391677856445, "rewards/rejected": -5.760389804840088, "step": 262 }, { "epoch": 0.4, "learning_rate": 9.700978850266943e-07, "logits/chosen": -1.100415825843811, "logits/rejected": -1.117796540260315, "logps/chosen": -35.40311050415039, "logps/rejected": -65.78343200683594, "loss": 0.2342, "rewards/accuracies": 0.9375, "rewards/chosen": 0.6401301622390747, "rewards/margins": 2.996103286743164, "rewards/rejected": -2.355973243713379, "step": 263 }, { "epoch": 0.4, "learning_rate": 9.696443155712487e-07, "logits/chosen": -0.9698693752288818, "logits/rejected": -0.9955704212188721, "logps/chosen": -51.29049301147461, "logps/rejected": -91.20393371582031, "loss": 0.2086, "rewards/accuracies": 0.9375, "rewards/chosen": 0.1662522405385971, "rewards/margins": 3.645151138305664, "rewards/rejected": -3.478898763656616, "step": 264 }, { "epoch": 0.4, "learning_rate": 9.691874396414685e-07, "logits/chosen": -0.9918010234832764, "logits/rejected": -0.9242657423019409, "logps/chosen": -60.58504104614258, "logps/rejected": -118.39722442626953, "loss": 0.2594, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3300020694732666, "rewards/margins": 4.74561071395874, "rewards/rejected": -5.075613021850586, "step": 265 }, { "epoch": 0.4, "learning_rate": 9.687272604539342e-07, "logits/chosen": -1.218865990638733, "logits/rejected": -1.1760666370391846, "logps/chosen": -59.44304656982422, "logps/rejected": -107.35780334472656, "loss": 0.2932, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07953006029129028, "rewards/margins": 3.563913345336914, "rewards/rejected": -3.6434435844421387, "step": 266 }, { "epoch": 0.41, "learning_rate": 9.68263781248482e-07, "logits/chosen": -1.1206682920455933, "logits/rejected": -1.141585350036621, "logps/chosen": -69.39141845703125, "logps/rejected": -125.5979232788086, "loss": 0.2859, "rewards/accuracies": 0.8125, "rewards/chosen": -0.17774266004562378, "rewards/margins": 3.902479410171509, "rewards/rejected": -4.080222129821777, "step": 267 }, { "epoch": 0.41, "learning_rate": 9.67797005288181e-07, "logits/chosen": -1.0456727743148804, "logits/rejected": -1.0675475597381592, "logps/chosen": -90.54875183105469, "logps/rejected": -148.25794982910156, "loss": 0.2292, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8046572208404541, "rewards/margins": 5.290049076080322, "rewards/rejected": -6.094706058502197, "step": 268 }, { "epoch": 0.41, "learning_rate": 9.67326935859312e-07, "logits/chosen": -1.1662824153900146, "logits/rejected": -1.1143728494644165, "logps/chosen": -57.08251953125, "logps/rejected": -104.96754455566406, "loss": 0.2181, "rewards/accuracies": 0.8125, "rewards/chosen": -0.577634334564209, "rewards/margins": 3.61334228515625, "rewards/rejected": -4.190977096557617, "step": 269 }, { "epoch": 0.41, "learning_rate": 9.668535762713415e-07, "logits/chosen": -1.2060467004776, "logits/rejected": -1.277174472808838, "logps/chosen": -64.08255004882812, "logps/rejected": -100.80244445800781, "loss": 0.1997, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6261399388313293, "rewards/margins": 3.175387144088745, "rewards/rejected": -3.8015270233154297, "step": 270 }, { "epoch": 0.41, "learning_rate": 9.663769298569013e-07, "logits/chosen": -1.2676234245300293, "logits/rejected": -1.3310580253601074, "logps/chosen": -70.11847686767578, "logps/rejected": -106.50775909423828, "loss": 0.2802, "rewards/accuracies": 0.8125, "rewards/chosen": -1.3052479028701782, "rewards/margins": 3.024759531021118, "rewards/rejected": -4.330007553100586, "step": 271 }, { "epoch": 0.41, "learning_rate": 9.65896999971763e-07, "logits/chosen": -1.2232937812805176, "logits/rejected": -1.1774309873580933, "logps/chosen": -81.23110961914062, "logps/rejected": -145.268798828125, "loss": 0.2673, "rewards/accuracies": 0.875, "rewards/chosen": -0.7276835441589355, "rewards/margins": 4.985037326812744, "rewards/rejected": -5.71272087097168, "step": 272 }, { "epoch": 0.41, "learning_rate": 9.654137899948155e-07, "logits/chosen": -1.0879088640213013, "logits/rejected": -1.1381860971450806, "logps/chosen": -50.63553237915039, "logps/rejected": -112.82023620605469, "loss": 0.3102, "rewards/accuracies": 0.9375, "rewards/chosen": 0.28322744369506836, "rewards/margins": 4.86517858505249, "rewards/rejected": -4.581951141357422, "step": 273 }, { "epoch": 0.42, "learning_rate": 9.649273033280399e-07, "logits/chosen": -0.9488300085067749, "logits/rejected": -0.9138250350952148, "logps/chosen": -42.005889892578125, "logps/rejected": -77.50165557861328, "loss": 0.2053, "rewards/accuracies": 1.0, "rewards/chosen": 0.3961612582206726, "rewards/margins": 2.9810876846313477, "rewards/rejected": -2.584926128387451, "step": 274 }, { "epoch": 0.42, "learning_rate": 9.644375433964878e-07, "logits/chosen": -1.3720015287399292, "logits/rejected": -1.371315360069275, "logps/chosen": -63.39384078979492, "logps/rejected": -117.76875305175781, "loss": 0.2601, "rewards/accuracies": 0.9375, "rewards/chosen": 0.0266212597489357, "rewards/margins": 3.921591281890869, "rewards/rejected": -3.894970417022705, "step": 275 }, { "epoch": 0.42, "learning_rate": 9.639445136482546e-07, "logits/chosen": -1.2813271284103394, "logits/rejected": -1.2421196699142456, "logps/chosen": -49.05527877807617, "logps/rejected": -90.07084655761719, "loss": 0.2312, "rewards/accuracies": 0.9375, "rewards/chosen": 0.08839581906795502, "rewards/margins": 3.386387825012207, "rewards/rejected": -3.2979917526245117, "step": 276 }, { "epoch": 0.42, "learning_rate": 9.634482175544572e-07, "logits/chosen": -1.1708801984786987, "logits/rejected": -1.0792722702026367, "logps/chosen": -59.987640380859375, "logps/rejected": -119.44989776611328, "loss": 0.1914, "rewards/accuracies": 0.9375, "rewards/chosen": 0.23230049014091492, "rewards/margins": 3.9318790435791016, "rewards/rejected": -3.6995785236358643, "step": 277 }, { "epoch": 0.42, "learning_rate": 9.629486586092086e-07, "logits/chosen": -1.2014925479888916, "logits/rejected": -1.2414860725402832, "logps/chosen": -69.78931427001953, "logps/rejected": -132.54397583007812, "loss": 0.2746, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3791479468345642, "rewards/margins": 4.3308329582214355, "rewards/rejected": -4.7099809646606445, "step": 278 }, { "epoch": 0.42, "learning_rate": 9.624458403295934e-07, "logits/chosen": -1.213075876235962, "logits/rejected": -1.19390070438385, "logps/chosen": -43.44424057006836, "logps/rejected": -76.5701675415039, "loss": 0.2144, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0699257180094719, "rewards/margins": 1.9740471839904785, "rewards/rejected": -2.0439724922180176, "step": 279 }, { "epoch": 0.43, "learning_rate": 9.619397662556433e-07, "logits/chosen": -0.9972809553146362, "logits/rejected": -1.01755952835083, "logps/chosen": -40.813270568847656, "logps/rejected": -62.96757507324219, "loss": 0.2012, "rewards/accuracies": 0.8125, "rewards/chosen": 0.38587141036987305, "rewards/margins": 2.0120716094970703, "rewards/rejected": -1.6262001991271973, "step": 280 }, { "epoch": 0.43, "learning_rate": 9.614304399503119e-07, "logits/chosen": -1.2082014083862305, "logits/rejected": -1.1905056238174438, "logps/chosen": -66.61445617675781, "logps/rejected": -118.85020446777344, "loss": 0.1423, "rewards/accuracies": 1.0, "rewards/chosen": 0.19420230388641357, "rewards/margins": 4.226605415344238, "rewards/rejected": -4.032402515411377, "step": 281 }, { "epoch": 0.43, "learning_rate": 9.609178649994497e-07, "logits/chosen": -1.3044588565826416, "logits/rejected": -1.3541276454925537, "logps/chosen": -52.766082763671875, "logps/rejected": -90.2623519897461, "loss": 0.1909, "rewards/accuracies": 0.9375, "rewards/chosen": -0.12392090260982513, "rewards/margins": 3.198810577392578, "rewards/rejected": -3.3227314949035645, "step": 282 }, { "epoch": 0.43, "learning_rate": 9.604020450117795e-07, "logits/chosen": -1.1252713203430176, "logits/rejected": -1.1225484609603882, "logps/chosen": -54.23025131225586, "logps/rejected": -95.14872741699219, "loss": 0.21, "rewards/accuracies": 0.9375, "rewards/chosen": 0.1901187002658844, "rewards/margins": 2.9272286891937256, "rewards/rejected": -2.737109899520874, "step": 283 }, { "epoch": 0.43, "learning_rate": 9.598829836188693e-07, "logits/chosen": -1.232951283454895, "logits/rejected": -1.172524333000183, "logps/chosen": -41.288700103759766, "logps/rejected": -89.16956329345703, "loss": 0.2321, "rewards/accuracies": 1.0, "rewards/chosen": 0.7929279804229736, "rewards/margins": 3.3749027252197266, "rewards/rejected": -2.581974983215332, "step": 284 }, { "epoch": 0.43, "learning_rate": 9.593606844751088e-07, "logits/chosen": -1.143338918685913, "logits/rejected": -1.110314965248108, "logps/chosen": -53.824607849121094, "logps/rejected": -98.65978240966797, "loss": 0.1841, "rewards/accuracies": 0.9375, "rewards/chosen": 0.462774395942688, "rewards/margins": 3.597409248352051, "rewards/rejected": -3.1346347332000732, "step": 285 }, { "epoch": 0.43, "learning_rate": 9.588351512576822e-07, "logits/chosen": -1.1007797718048096, "logits/rejected": -1.1043678522109985, "logps/chosen": -46.744407653808594, "logps/rejected": -114.91055297851562, "loss": 0.1739, "rewards/accuracies": 0.875, "rewards/chosen": 0.7569597959518433, "rewards/margins": 5.05133581161499, "rewards/rejected": -4.294375896453857, "step": 286 }, { "epoch": 0.44, "learning_rate": 9.583063876665427e-07, "logits/chosen": -0.8524032831192017, "logits/rejected": -0.8362274169921875, "logps/chosen": -47.458534240722656, "logps/rejected": -105.9702377319336, "loss": 0.2093, "rewards/accuracies": 1.0, "rewards/chosen": 1.144188404083252, "rewards/margins": 5.193797588348389, "rewards/rejected": -4.049609184265137, "step": 287 }, { "epoch": 0.44, "learning_rate": 9.577743974243872e-07, "logits/chosen": -1.138148546218872, "logits/rejected": -1.0631217956542969, "logps/chosen": -50.755393981933594, "logps/rejected": -98.93721771240234, "loss": 0.1719, "rewards/accuracies": 0.875, "rewards/chosen": -0.38429662585258484, "rewards/margins": 3.2172207832336426, "rewards/rejected": -3.6015172004699707, "step": 288 }, { "epoch": 0.44, "learning_rate": 9.572391842766289e-07, "logits/chosen": -1.3621189594268799, "logits/rejected": -1.4565434455871582, "logps/chosen": -43.73139572143555, "logps/rejected": -75.05216217041016, "loss": 0.1819, "rewards/accuracies": 0.9375, "rewards/chosen": 0.4174540638923645, "rewards/margins": 3.1869609355926514, "rewards/rejected": -2.7695069313049316, "step": 289 }, { "epoch": 0.44, "learning_rate": 9.567007519913716e-07, "logits/chosen": -1.2581207752227783, "logits/rejected": -1.2855815887451172, "logps/chosen": -57.83710861206055, "logps/rejected": -90.186279296875, "loss": 0.1771, "rewards/accuracies": 0.9375, "rewards/chosen": -0.07886456698179245, "rewards/margins": 2.7510035037994385, "rewards/rejected": -2.8298683166503906, "step": 290 }, { "epoch": 0.44, "learning_rate": 9.561591043593827e-07, "logits/chosen": -1.5122599601745605, "logits/rejected": -1.458139181137085, "logps/chosen": -64.88407135009766, "logps/rejected": -127.58905029296875, "loss": 0.2801, "rewards/accuracies": 0.9375, "rewards/chosen": -0.14722773432731628, "rewards/margins": 4.665119647979736, "rewards/rejected": -4.812347888946533, "step": 291 }, { "epoch": 0.44, "learning_rate": 9.556142451940679e-07, "logits/chosen": -1.184052586555481, "logits/rejected": -1.2342268228530884, "logps/chosen": -89.00273895263672, "logps/rejected": -147.20021057128906, "loss": 0.1953, "rewards/accuracies": 0.875, "rewards/chosen": -0.8906569480895996, "rewards/margins": 5.703664779663086, "rewards/rejected": -6.594321250915527, "step": 292 }, { "epoch": 0.45, "learning_rate": 9.55066178331442e-07, "logits/chosen": -1.000017523765564, "logits/rejected": -0.9489910006523132, "logps/chosen": -66.79763793945312, "logps/rejected": -114.90792083740234, "loss": 0.2076, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5531197786331177, "rewards/margins": 3.8519601821899414, "rewards/rejected": -4.4050798416137695, "step": 293 }, { "epoch": 0.45, "learning_rate": 9.545149076301043e-07, "logits/chosen": -1.0007249116897583, "logits/rejected": -1.0427497625350952, "logps/chosen": -57.55965805053711, "logps/rejected": -101.47200012207031, "loss": 0.2055, "rewards/accuracies": 0.8125, "rewards/chosen": -0.12303817272186279, "rewards/margins": 4.29061222076416, "rewards/rejected": -4.413650035858154, "step": 294 }, { "epoch": 0.45, "learning_rate": 9.539604369712098e-07, "logits/chosen": -1.0703601837158203, "logits/rejected": -1.0638940334320068, "logps/chosen": -82.14090728759766, "logps/rejected": -144.56829833984375, "loss": 0.2249, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1805057525634766, "rewards/margins": 5.812443733215332, "rewards/rejected": -6.992949485778809, "step": 295 }, { "epoch": 0.45, "learning_rate": 9.534027702584424e-07, "logits/chosen": -1.241573691368103, "logits/rejected": -1.092793345451355, "logps/chosen": -51.69071960449219, "logps/rejected": -119.88025665283203, "loss": 0.229, "rewards/accuracies": 0.875, "rewards/chosen": -0.14432412385940552, "rewards/margins": 4.62017297744751, "rewards/rejected": -4.76449728012085, "step": 296 }, { "epoch": 0.45, "learning_rate": 9.528419114179876e-07, "logits/chosen": -1.3773949146270752, "logits/rejected": -1.326578140258789, "logps/chosen": -66.81124114990234, "logps/rejected": -117.5950927734375, "loss": 0.2108, "rewards/accuracies": 0.9375, "rewards/chosen": -1.232625961303711, "rewards/margins": 3.5316824913024902, "rewards/rejected": -4.764308452606201, "step": 297 }, { "epoch": 0.45, "learning_rate": 9.522778643985044e-07, "logits/chosen": -1.1971187591552734, "logits/rejected": -1.2293506860733032, "logps/chosen": -73.28286743164062, "logps/rejected": -128.9477081298828, "loss": 0.2332, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6712610721588135, "rewards/margins": 5.412903308868408, "rewards/rejected": -6.084164619445801, "step": 298 }, { "epoch": 0.45, "learning_rate": 9.517106331710984e-07, "logits/chosen": -1.168115496635437, "logits/rejected": -0.9988553524017334, "logps/chosen": -62.709564208984375, "logps/rejected": -160.54025268554688, "loss": 0.1577, "rewards/accuracies": 0.875, "rewards/chosen": 0.40043070912361145, "rewards/margins": 6.569068908691406, "rewards/rejected": -6.168639183044434, "step": 299 }, { "epoch": 0.46, "learning_rate": 9.511402217292925e-07, "logits/chosen": -1.0160331726074219, "logits/rejected": -0.9988321661949158, "logps/chosen": -58.68785095214844, "logps/rejected": -117.34202575683594, "loss": 0.1469, "rewards/accuracies": 1.0, "rewards/chosen": -0.07157469540834427, "rewards/margins": 5.047296047210693, "rewards/rejected": -5.118870735168457, "step": 300 }, { "epoch": 0.46, "learning_rate": 9.505666340890002e-07, "logits/chosen": -1.2377873659133911, "logits/rejected": -1.3541501760482788, "logps/chosen": -52.6754264831543, "logps/rejected": -105.81607818603516, "loss": 0.2249, "rewards/accuracies": 0.875, "rewards/chosen": 0.6247602701187134, "rewards/margins": 4.983515739440918, "rewards/rejected": -4.358755588531494, "step": 301 }, { "epoch": 0.46, "learning_rate": 9.499898742884962e-07, "logits/chosen": -1.3011903762817383, "logits/rejected": -1.27763831615448, "logps/chosen": -57.895233154296875, "logps/rejected": -112.72341918945312, "loss": 0.228, "rewards/accuracies": 1.0, "rewards/chosen": -0.05821986496448517, "rewards/margins": 4.705685138702393, "rewards/rejected": -4.763904571533203, "step": 302 }, { "epoch": 0.46, "learning_rate": 9.494099463883884e-07, "logits/chosen": -1.1723133325576782, "logits/rejected": -1.2371587753295898, "logps/chosen": -77.86163330078125, "logps/rejected": -140.04586791992188, "loss": 0.1893, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4797315001487732, "rewards/margins": 5.164061546325684, "rewards/rejected": -5.643793106079102, "step": 303 }, { "epoch": 0.46, "learning_rate": 9.488268544715895e-07, "logits/chosen": -1.2617186307907104, "logits/rejected": -1.282090187072754, "logps/chosen": -67.55493927001953, "logps/rejected": -151.58358764648438, "loss": 0.219, "rewards/accuracies": 0.875, "rewards/chosen": -0.05920267105102539, "rewards/margins": 6.089982509613037, "rewards/rejected": -6.149186134338379, "step": 304 }, { "epoch": 0.46, "learning_rate": 9.48240602643288e-07, "logits/chosen": -1.101471185684204, "logits/rejected": -1.0918614864349365, "logps/chosen": -65.73365783691406, "logps/rejected": -136.63345336914062, "loss": 0.1837, "rewards/accuracies": 1.0, "rewards/chosen": -0.6864421367645264, "rewards/margins": 5.417891502380371, "rewards/rejected": -6.104333877563477, "step": 305 }, { "epoch": 0.46, "learning_rate": 9.476511950309197e-07, "logits/chosen": -1.332780122756958, "logits/rejected": -1.2241826057434082, "logps/chosen": -93.1464614868164, "logps/rejected": -171.5055389404297, "loss": 0.2151, "rewards/accuracies": 0.875, "rewards/chosen": -1.9534928798675537, "rewards/margins": 5.41372537612915, "rewards/rejected": -7.367218494415283, "step": 306 }, { "epoch": 0.47, "learning_rate": 9.470586357841377e-07, "logits/chosen": -1.3806670904159546, "logits/rejected": -1.364810585975647, "logps/chosen": -80.15160369873047, "logps/rejected": -118.8318862915039, "loss": 0.1924, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4739937782287598, "rewards/margins": 3.5889570713043213, "rewards/rejected": -5.06295108795166, "step": 307 }, { "epoch": 0.47, "learning_rate": 9.464629290747842e-07, "logits/chosen": -1.0424054861068726, "logits/rejected": -1.0304687023162842, "logps/chosen": -57.2526969909668, "logps/rejected": -138.89817810058594, "loss": 0.1773, "rewards/accuracies": 0.8125, "rewards/chosen": 0.1174016147851944, "rewards/margins": 6.3627142906188965, "rewards/rejected": -6.2453131675720215, "step": 308 }, { "epoch": 0.47, "learning_rate": 9.458640790968606e-07, "logits/chosen": -1.050306797027588, "logits/rejected": -0.9519209265708923, "logps/chosen": -81.77310180664062, "logps/rejected": -163.30325317382812, "loss": 0.2278, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8570660948753357, "rewards/margins": 6.205819606781006, "rewards/rejected": -7.0628862380981445, "step": 309 }, { "epoch": 0.47, "learning_rate": 9.452620900664985e-07, "logits/chosen": -1.0299391746520996, "logits/rejected": -0.9257128238677979, "logps/chosen": -52.693416595458984, "logps/rejected": -125.26481628417969, "loss": 0.1896, "rewards/accuracies": 1.0, "rewards/chosen": -0.04881918430328369, "rewards/margins": 5.693671703338623, "rewards/rejected": -5.742491722106934, "step": 310 }, { "epoch": 0.47, "learning_rate": 9.446569662219288e-07, "logits/chosen": -1.1465742588043213, "logits/rejected": -1.0597347021102905, "logps/chosen": -73.81082153320312, "logps/rejected": -138.83419799804688, "loss": 0.2467, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1154011487960815, "rewards/margins": 4.6926374435424805, "rewards/rejected": -5.80803918838501, "step": 311 }, { "epoch": 0.47, "learning_rate": 9.440487118234534e-07, "logits/chosen": -1.2268579006195068, "logits/rejected": -1.2430446147918701, "logps/chosen": -67.38180541992188, "logps/rejected": -122.89826965332031, "loss": 0.1632, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8113257884979248, "rewards/margins": 4.62271785736084, "rewards/rejected": -5.434043884277344, "step": 312 }, { "epoch": 0.48, "learning_rate": 9.434373311534145e-07, "logits/chosen": -1.173991084098816, "logits/rejected": -1.0881710052490234, "logps/chosen": -60.490787506103516, "logps/rejected": -127.25591278076172, "loss": 0.2422, "rewards/accuracies": 0.9375, "rewards/chosen": -1.091408371925354, "rewards/margins": 4.285440921783447, "rewards/rejected": -5.3768486976623535, "step": 313 }, { "epoch": 0.48, "learning_rate": 9.428228285161638e-07, "logits/chosen": -1.2301355600357056, "logits/rejected": -1.164600133895874, "logps/chosen": -70.10157012939453, "logps/rejected": -118.09928131103516, "loss": 0.2747, "rewards/accuracies": 0.875, "rewards/chosen": -0.4453577995300293, "rewards/margins": 4.48175573348999, "rewards/rejected": -4.9271135330200195, "step": 314 }, { "epoch": 0.48, "learning_rate": 9.422052082380334e-07, "logits/chosen": -1.1066889762878418, "logits/rejected": -1.1147856712341309, "logps/chosen": -62.315975189208984, "logps/rejected": -130.76109313964844, "loss": 0.1691, "rewards/accuracies": 0.875, "rewards/chosen": -0.3953535556793213, "rewards/margins": 5.75650691986084, "rewards/rejected": -6.151860237121582, "step": 315 }, { "epoch": 0.48, "learning_rate": 9.415844746673046e-07, "logits/chosen": -1.182931661605835, "logits/rejected": -1.141818881034851, "logps/chosen": -64.85267639160156, "logps/rejected": -149.100830078125, "loss": 0.2965, "rewards/accuracies": 0.9375, "rewards/chosen": -0.32952946424484253, "rewards/margins": 6.445108413696289, "rewards/rejected": -6.7746381759643555, "step": 316 }, { "epoch": 0.48, "learning_rate": 9.409606321741774e-07, "logits/chosen": -0.9379570484161377, "logits/rejected": -0.9440093636512756, "logps/chosen": -55.83263397216797, "logps/rejected": -94.1610336303711, "loss": 0.272, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3002450466156006, "rewards/margins": 3.7583513259887695, "rewards/rejected": -4.058596134185791, "step": 317 }, { "epoch": 0.48, "learning_rate": 9.4033368515074e-07, "logits/chosen": -0.9900854825973511, "logits/rejected": -1.0676978826522827, "logps/chosen": -59.72868347167969, "logps/rejected": -94.28669738769531, "loss": 0.1464, "rewards/accuracies": 0.875, "rewards/chosen": -0.35381263494491577, "rewards/margins": 3.47701358795166, "rewards/rejected": -3.8308260440826416, "step": 318 }, { "epoch": 0.48, "learning_rate": 9.397036380109376e-07, "logits/chosen": -1.237237572669983, "logits/rejected": -1.2699791193008423, "logps/chosen": -62.2976188659668, "logps/rejected": -114.91215515136719, "loss": 0.2888, "rewards/accuracies": 0.875, "rewards/chosen": -0.7755670547485352, "rewards/margins": 4.2638468742370605, "rewards/rejected": -5.039413928985596, "step": 319 }, { "epoch": 0.49, "learning_rate": 9.390704951905411e-07, "logits/chosen": -1.0435210466384888, "logits/rejected": -1.0626487731933594, "logps/chosen": -54.04743957519531, "logps/rejected": -99.78398132324219, "loss": 0.2092, "rewards/accuracies": 0.9375, "rewards/chosen": -0.44715026021003723, "rewards/margins": 4.157877445220947, "rewards/rejected": -4.60502815246582, "step": 320 }, { "epoch": 0.49, "learning_rate": 9.384342611471164e-07, "logits/chosen": -1.2864059209823608, "logits/rejected": -1.2670793533325195, "logps/chosen": -58.702476501464844, "logps/rejected": -120.93059539794922, "loss": 0.1706, "rewards/accuracies": 0.9375, "rewards/chosen": -0.18780693411827087, "rewards/margins": 4.7762651443481445, "rewards/rejected": -4.964072227478027, "step": 321 }, { "epoch": 0.49, "learning_rate": 9.377949403599927e-07, "logits/chosen": -1.1650375127792358, "logits/rejected": -1.25117826461792, "logps/chosen": -62.39760971069336, "logps/rejected": -106.74618530273438, "loss": 0.2156, "rewards/accuracies": 0.9375, "rewards/chosen": -0.0015542954206466675, "rewards/margins": 3.9890871047973633, "rewards/rejected": -3.9906415939331055, "step": 322 }, { "epoch": 0.49, "learning_rate": 9.371525373302316e-07, "logits/chosen": -1.234785556793213, "logits/rejected": -1.229178547859192, "logps/chosen": -36.128318786621094, "logps/rejected": -83.70571899414062, "loss": 0.1872, "rewards/accuracies": 0.9375, "rewards/chosen": 0.3476123809814453, "rewards/margins": 3.839707612991333, "rewards/rejected": -3.4920952320098877, "step": 323 }, { "epoch": 0.49, "learning_rate": 9.36507056580594e-07, "logits/chosen": -1.111757516860962, "logits/rejected": -1.0463680028915405, "logps/chosen": -60.20932388305664, "logps/rejected": -113.43062591552734, "loss": 0.1969, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5552247762680054, "rewards/margins": 4.3409104347229, "rewards/rejected": -4.896135330200195, "step": 324 }, { "epoch": 0.49, "learning_rate": 9.358585026555097e-07, "logits/chosen": -0.9682977199554443, "logits/rejected": -1.0178247690200806, "logps/chosen": -41.273834228515625, "logps/rejected": -77.8062515258789, "loss": 0.2324, "rewards/accuracies": 0.9375, "rewards/chosen": 0.5879894495010376, "rewards/margins": 3.407402992248535, "rewards/rejected": -2.819413661956787, "step": 325 }, { "epoch": 0.5, "learning_rate": 9.352068801210444e-07, "logits/chosen": -1.2803910970687866, "logits/rejected": -1.3069097995758057, "logps/chosen": -64.29356384277344, "logps/rejected": -111.56470489501953, "loss": 0.2316, "rewards/accuracies": 0.8125, "rewards/chosen": -0.3384580612182617, "rewards/margins": 4.075242519378662, "rewards/rejected": -4.413699626922607, "step": 326 }, { "epoch": 0.5, "learning_rate": 9.345521935648684e-07, "logits/chosen": -1.156507968902588, "logits/rejected": -1.0567623376846313, "logps/chosen": -62.299598693847656, "logps/rejected": -154.32818603515625, "loss": 0.2469, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4583524465560913, "rewards/margins": 6.333713054656982, "rewards/rejected": -6.792065620422363, "step": 327 }, { "epoch": 0.5, "learning_rate": 9.338944475962236e-07, "logits/chosen": -1.2198809385299683, "logits/rejected": -1.1411675214767456, "logps/chosen": -68.92489624023438, "logps/rejected": -138.6844940185547, "loss": 0.1705, "rewards/accuracies": 1.0, "rewards/chosen": -0.7334094643592834, "rewards/margins": 5.524423599243164, "rewards/rejected": -6.257833480834961, "step": 328 }, { "epoch": 0.5, "learning_rate": 9.332336468458913e-07, "logits/chosen": -1.2047687768936157, "logits/rejected": -1.1440612077713013, "logps/chosen": -69.50259399414062, "logps/rejected": -136.95941162109375, "loss": 0.2702, "rewards/accuracies": 0.875, "rewards/chosen": -1.1296145915985107, "rewards/margins": 4.767106533050537, "rewards/rejected": -5.896721363067627, "step": 329 }, { "epoch": 0.5, "learning_rate": 9.325697959661601e-07, "logits/chosen": -1.1211073398590088, "logits/rejected": -1.1430073976516724, "logps/chosen": -68.41262817382812, "logps/rejected": -147.62721252441406, "loss": 0.1273, "rewards/accuracies": 1.0, "rewards/chosen": -0.4298883080482483, "rewards/margins": 6.240133762359619, "rewards/rejected": -6.670022487640381, "step": 330 }, { "epoch": 0.5, "learning_rate": 9.319028996307918e-07, "logits/chosen": -1.2578020095825195, "logits/rejected": -1.253657579421997, "logps/chosen": -68.27984619140625, "logps/rejected": -114.69712829589844, "loss": 0.1736, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4018743932247162, "rewards/margins": 4.116807460784912, "rewards/rejected": -4.518681526184082, "step": 331 }, { "epoch": 0.5, "learning_rate": 9.312329625349901e-07, "logits/chosen": -1.149940848350525, "logits/rejected": -1.1540448665618896, "logps/chosen": -74.48561096191406, "logps/rejected": -133.59381103515625, "loss": 0.1977, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7744425535202026, "rewards/margins": 4.682015419006348, "rewards/rejected": -5.45645809173584, "step": 332 }, { "epoch": 0.51, "learning_rate": 9.305599893953669e-07, "logits/chosen": -1.065702199935913, "logits/rejected": -0.961828887462616, "logps/chosen": -75.67964172363281, "logps/rejected": -139.41537475585938, "loss": 0.2296, "rewards/accuracies": 0.75, "rewards/chosen": -1.7512010335922241, "rewards/margins": 4.466395378112793, "rewards/rejected": -6.217596530914307, "step": 333 }, { "epoch": 0.51, "learning_rate": 9.298839849499081e-07, "logits/chosen": -1.1605356931686401, "logits/rejected": -1.2287828922271729, "logps/chosen": -92.33887481689453, "logps/rejected": -138.165771484375, "loss": 0.285, "rewards/accuracies": 0.9375, "rewards/chosen": -1.864888072013855, "rewards/margins": 4.075357437133789, "rewards/rejected": -5.940246105194092, "step": 334 }, { "epoch": 0.51, "learning_rate": 9.29204953957942e-07, "logits/chosen": -0.9757486581802368, "logits/rejected": -0.9516485333442688, "logps/chosen": -70.66392517089844, "logps/rejected": -120.23057556152344, "loss": 0.1818, "rewards/accuracies": 0.875, "rewards/chosen": -1.0067274570465088, "rewards/margins": 4.125641822814941, "rewards/rejected": -5.1323699951171875, "step": 335 }, { "epoch": 0.51, "learning_rate": 9.285229012001046e-07, "logits/chosen": -1.2529933452606201, "logits/rejected": -1.1360479593276978, "logps/chosen": -66.08387756347656, "logps/rejected": -158.76080322265625, "loss": 0.1779, "rewards/accuracies": 1.0, "rewards/chosen": -0.6589143872261047, "rewards/margins": 6.218050479888916, "rewards/rejected": -6.876964092254639, "step": 336 }, { "epoch": 0.51, "learning_rate": 9.278378314783064e-07, "logits/chosen": -1.018735647201538, "logits/rejected": -0.9996989369392395, "logps/chosen": -62.99300003051758, "logps/rejected": -109.50791931152344, "loss": 0.221, "rewards/accuracies": 0.9375, "rewards/chosen": -0.2736590504646301, "rewards/margins": 4.201366424560547, "rewards/rejected": -4.4750261306762695, "step": 337 }, { "epoch": 0.51, "learning_rate": 9.271497496156983e-07, "logits/chosen": -1.1342700719833374, "logits/rejected": -1.0868401527404785, "logps/chosen": -76.54375457763672, "logps/rejected": -150.23513793945312, "loss": 0.2337, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5463071465492249, "rewards/margins": 6.337887763977051, "rewards/rejected": -6.884194850921631, "step": 338 }, { "epoch": 0.52, "learning_rate": 9.26458660456638e-07, "logits/chosen": -1.1688416004180908, "logits/rejected": -1.167934536933899, "logps/chosen": -51.82984161376953, "logps/rejected": -99.10894012451172, "loss": 0.2227, "rewards/accuracies": 1.0, "rewards/chosen": 0.23706474900245667, "rewards/margins": 3.779388904571533, "rewards/rejected": -3.5423243045806885, "step": 339 }, { "epoch": 0.52, "learning_rate": 9.257645688666555e-07, "logits/chosen": -1.1728771924972534, "logits/rejected": -1.1348259449005127, "logps/chosen": -74.75570678710938, "logps/rejected": -150.23471069335938, "loss": 0.1724, "rewards/accuracies": 1.0, "rewards/chosen": -1.3358078002929688, "rewards/margins": 5.848766803741455, "rewards/rejected": -7.184574604034424, "step": 340 }, { "epoch": 0.52, "learning_rate": 9.250674797324196e-07, "logits/chosen": -1.222875714302063, "logits/rejected": -1.204736590385437, "logps/chosen": -71.1811752319336, "logps/rejected": -137.2476806640625, "loss": 0.1718, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6591771245002747, "rewards/margins": 5.725841522216797, "rewards/rejected": -6.385018825531006, "step": 341 }, { "epoch": 0.52, "learning_rate": 9.243673979617019e-07, "logits/chosen": -1.2629741430282593, "logits/rejected": -1.2538414001464844, "logps/chosen": -85.21867370605469, "logps/rejected": -159.30917358398438, "loss": 0.2231, "rewards/accuracies": 0.875, "rewards/chosen": -0.6282932758331299, "rewards/margins": 6.88715124130249, "rewards/rejected": -7.515444278717041, "step": 342 }, { "epoch": 0.52, "learning_rate": 9.236643284833445e-07, "logits/chosen": -1.2181004285812378, "logits/rejected": -1.2108968496322632, "logps/chosen": -68.36640930175781, "logps/rejected": -135.7064208984375, "loss": 0.1703, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9516810178756714, "rewards/margins": 5.099062442779541, "rewards/rejected": -6.050743103027344, "step": 343 }, { "epoch": 0.52, "learning_rate": 9.22958276247223e-07, "logits/chosen": -1.1332156658172607, "logits/rejected": -1.049139380455017, "logps/chosen": -74.32099914550781, "logps/rejected": -163.8959503173828, "loss": 0.1956, "rewards/accuracies": 1.0, "rewards/chosen": -0.6016435623168945, "rewards/margins": 7.112662315368652, "rewards/rejected": -7.714305877685547, "step": 344 }, { "epoch": 0.52, "learning_rate": 9.222492462242137e-07, "logits/chosen": -1.0189480781555176, "logits/rejected": -0.9224266409873962, "logps/chosen": -85.83133697509766, "logps/rejected": -165.11000061035156, "loss": 0.2467, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4347052574157715, "rewards/margins": 6.352794647216797, "rewards/rejected": -7.787499904632568, "step": 345 }, { "epoch": 0.53, "learning_rate": 9.215372434061572e-07, "logits/chosen": -0.9589766263961792, "logits/rejected": -0.9576123952865601, "logps/chosen": -51.86228561401367, "logps/rejected": -117.89081573486328, "loss": 0.1868, "rewards/accuracies": 1.0, "rewards/chosen": 0.08749858289957047, "rewards/margins": 5.508365154266357, "rewards/rejected": -5.420866966247559, "step": 346 }, { "epoch": 0.53, "learning_rate": 9.208222728058235e-07, "logits/chosen": -1.132300615310669, "logits/rejected": -1.0416042804718018, "logps/chosen": -65.39353942871094, "logps/rejected": -144.50401306152344, "loss": 0.2118, "rewards/accuracies": 0.9375, "rewards/chosen": -0.678921103477478, "rewards/margins": 5.8557209968566895, "rewards/rejected": -6.534642219543457, "step": 347 }, { "epoch": 0.53, "learning_rate": 9.201043394568771e-07, "logits/chosen": -1.2169915437698364, "logits/rejected": -1.121902346611023, "logps/chosen": -68.50205993652344, "logps/rejected": -144.74172973632812, "loss": 0.2686, "rewards/accuracies": 0.875, "rewards/chosen": -0.5129822492599487, "rewards/margins": 6.310784339904785, "rewards/rejected": -6.823767185211182, "step": 348 }, { "epoch": 0.53, "learning_rate": 9.193834484138417e-07, "logits/chosen": -1.0599150657653809, "logits/rejected": -1.107330083847046, "logps/chosen": -62.24093246459961, "logps/rejected": -143.25433349609375, "loss": 0.251, "rewards/accuracies": 1.0, "rewards/chosen": -0.33035796880722046, "rewards/margins": 6.537755489349365, "rewards/rejected": -6.8681135177612305, "step": 349 }, { "epoch": 0.53, "learning_rate": 9.186596047520638e-07, "logits/chosen": -1.3434659242630005, "logits/rejected": -1.2892396450042725, "logps/chosen": -60.81199264526367, "logps/rejected": -114.70281982421875, "loss": 0.2041, "rewards/accuracies": 0.875, "rewards/chosen": -0.15349704027175903, "rewards/margins": 4.696380615234375, "rewards/rejected": -4.84987735748291, "step": 350 }, { "epoch": 0.53, "learning_rate": 9.179328135676778e-07, "logits/chosen": -1.1484850645065308, "logits/rejected": -1.020574927330017, "logps/chosen": -59.90319061279297, "logps/rejected": -120.53746795654297, "loss": 0.1556, "rewards/accuracies": 1.0, "rewards/chosen": -0.6321442723274231, "rewards/margins": 4.3459978103637695, "rewards/rejected": -4.978141784667969, "step": 351 }, { "epoch": 0.53, "learning_rate": 9.172030799775698e-07, "logits/chosen": -1.0592190027236938, "logits/rejected": -1.0199298858642578, "logps/chosen": -53.4327392578125, "logps/rejected": -110.98649597167969, "loss": 0.2109, "rewards/accuracies": 0.9375, "rewards/chosen": 0.19057221710681915, "rewards/margins": 5.060029983520508, "rewards/rejected": -4.869457721710205, "step": 352 }, { "epoch": 0.54, "learning_rate": 9.16470409119341e-07, "logits/chosen": -0.8089007139205933, "logits/rejected": -0.7608518004417419, "logps/chosen": -88.42681884765625, "logps/rejected": -136.05258178710938, "loss": 0.121, "rewards/accuracies": 1.0, "rewards/chosen": -2.1394758224487305, "rewards/margins": 4.462666988372803, "rewards/rejected": -6.602142333984375, "step": 353 }, { "epoch": 0.54, "learning_rate": 9.157348061512726e-07, "logits/chosen": -1.0326448678970337, "logits/rejected": -0.95633465051651, "logps/chosen": -50.31575012207031, "logps/rejected": -92.37877655029297, "loss": 0.228, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07662263512611389, "rewards/margins": 3.4256300926208496, "rewards/rejected": -3.5022528171539307, "step": 354 }, { "epoch": 0.54, "learning_rate": 9.149962762522889e-07, "logits/chosen": -1.1547660827636719, "logits/rejected": -1.166226863861084, "logps/chosen": -68.40359497070312, "logps/rejected": -98.83624267578125, "loss": 0.2265, "rewards/accuracies": 0.75, "rewards/chosen": -0.9433976411819458, "rewards/margins": 3.1959171295166016, "rewards/rejected": -4.139314651489258, "step": 355 }, { "epoch": 0.54, "learning_rate": 9.14254824621921e-07, "logits/chosen": -1.0819573402404785, "logits/rejected": -1.0202829837799072, "logps/chosen": -44.65376281738281, "logps/rejected": -94.65969848632812, "loss": 0.2218, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07252389192581177, "rewards/margins": 3.7984731197357178, "rewards/rejected": -3.8709967136383057, "step": 356 }, { "epoch": 0.54, "learning_rate": 9.135104564802698e-07, "logits/chosen": -1.1751701831817627, "logits/rejected": -1.098080039024353, "logps/chosen": -84.35014343261719, "logps/rejected": -157.07864379882812, "loss": 0.2225, "rewards/accuracies": 0.875, "rewards/chosen": -1.8365371227264404, "rewards/margins": 5.801826477050781, "rewards/rejected": -7.638364315032959, "step": 357 }, { "epoch": 0.54, "learning_rate": 9.127631770679697e-07, "logits/chosen": -0.8695126175880432, "logits/rejected": -0.8332204818725586, "logps/chosen": -78.99563598632812, "logps/rejected": -179.3597412109375, "loss": 0.1488, "rewards/accuracies": 0.875, "rewards/chosen": -1.2126437425613403, "rewards/margins": 7.655306339263916, "rewards/rejected": -8.867949485778809, "step": 358 }, { "epoch": 0.55, "learning_rate": 9.120129916461516e-07, "logits/chosen": -1.0260288715362549, "logits/rejected": -0.9362490773200989, "logps/chosen": -70.77383422851562, "logps/rejected": -133.34637451171875, "loss": 0.2292, "rewards/accuracies": 1.0, "rewards/chosen": -1.0889973640441895, "rewards/margins": 5.2399444580078125, "rewards/rejected": -6.328941822052002, "step": 359 }, { "epoch": 0.55, "learning_rate": 9.112599054964057e-07, "logits/chosen": -1.1575307846069336, "logits/rejected": -1.1207451820373535, "logps/chosen": -61.183433532714844, "logps/rejected": -120.67449188232422, "loss": 0.1292, "rewards/accuracies": 0.875, "rewards/chosen": -0.23736169934272766, "rewards/margins": 4.948629379272461, "rewards/rejected": -5.185991287231445, "step": 360 }, { "epoch": 0.55, "learning_rate": 9.105039239207446e-07, "logits/chosen": -0.8983496427536011, "logits/rejected": -0.8002632260322571, "logps/chosen": -60.88401794433594, "logps/rejected": -127.92584228515625, "loss": 0.249, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4497146010398865, "rewards/margins": 5.143093109130859, "rewards/rejected": -5.592807769775391, "step": 361 }, { "epoch": 0.55, "learning_rate": 9.097450522415655e-07, "logits/chosen": -1.0423619747161865, "logits/rejected": -1.0030453205108643, "logps/chosen": -69.3251953125, "logps/rejected": -130.1721954345703, "loss": 0.2145, "rewards/accuracies": 1.0, "rewards/chosen": -1.3751425743103027, "rewards/margins": 4.741582870483398, "rewards/rejected": -6.116725444793701, "step": 362 }, { "epoch": 0.55, "learning_rate": 9.089832958016135e-07, "logits/chosen": -0.9802379608154297, "logits/rejected": -0.7710241079330444, "logps/chosen": -54.990028381347656, "logps/rejected": -149.1652374267578, "loss": 0.1517, "rewards/accuracies": 0.9375, "rewards/chosen": -0.02962431311607361, "rewards/margins": 7.011379718780518, "rewards/rejected": -7.041004657745361, "step": 363 }, { "epoch": 0.55, "learning_rate": 9.082186599639427e-07, "logits/chosen": -1.2180869579315186, "logits/rejected": -1.288204550743103, "logps/chosen": -63.243385314941406, "logps/rejected": -113.26071166992188, "loss": 0.1491, "rewards/accuracies": 1.0, "rewards/chosen": -0.6312990784645081, "rewards/margins": 4.588357448577881, "rewards/rejected": -5.219655990600586, "step": 364 }, { "epoch": 0.55, "learning_rate": 9.074511501118805e-07, "logits/chosen": -1.1787549257278442, "logits/rejected": -1.2597904205322266, "logps/chosen": -68.79399871826172, "logps/rejected": -132.29443359375, "loss": 0.1933, "rewards/accuracies": 0.9375, "rewards/chosen": -0.25832653045654297, "rewards/margins": 5.924032211303711, "rewards/rejected": -6.182358741760254, "step": 365 }, { "epoch": 0.56, "learning_rate": 9.066807716489871e-07, "logits/chosen": -0.8737149238586426, "logits/rejected": -0.8693994283676147, "logps/chosen": -47.5757942199707, "logps/rejected": -100.08030700683594, "loss": 0.1697, "rewards/accuracies": 0.9375, "rewards/chosen": 0.3265417218208313, "rewards/margins": 4.437649726867676, "rewards/rejected": -4.11110782623291, "step": 366 }, { "epoch": 0.56, "learning_rate": 9.059075299990197e-07, "logits/chosen": -0.7666571140289307, "logits/rejected": -0.7947217226028442, "logps/chosen": -50.94587707519531, "logps/rejected": -99.97550201416016, "loss": 0.1475, "rewards/accuracies": 1.0, "rewards/chosen": 0.01705726981163025, "rewards/margins": 4.673468589782715, "rewards/rejected": -4.656411647796631, "step": 367 }, { "epoch": 0.56, "learning_rate": 9.051314306058933e-07, "logits/chosen": -1.194947600364685, "logits/rejected": -1.2012661695480347, "logps/chosen": -57.371673583984375, "logps/rejected": -103.62713623046875, "loss": 0.2171, "rewards/accuracies": 0.9375, "rewards/chosen": 0.020657628774642944, "rewards/margins": 4.762262344360352, "rewards/rejected": -4.741604804992676, "step": 368 }, { "epoch": 0.56, "learning_rate": 9.043524789336422e-07, "logits/chosen": -1.0432727336883545, "logits/rejected": -1.1021490097045898, "logps/chosen": -55.0340690612793, "logps/rejected": -96.28125, "loss": 0.1971, "rewards/accuracies": 0.875, "rewards/chosen": -0.2624248266220093, "rewards/margins": 4.110570907592773, "rewards/rejected": -4.372995853424072, "step": 369 }, { "epoch": 0.56, "learning_rate": 9.035706804663818e-07, "logits/chosen": -1.3030118942260742, "logits/rejected": -1.3033980131149292, "logps/chosen": -53.38115310668945, "logps/rejected": -101.26620483398438, "loss": 0.2095, "rewards/accuracies": 0.8125, "rewards/chosen": 0.3574654459953308, "rewards/margins": 3.6292953491210938, "rewards/rejected": -3.271829605102539, "step": 370 }, { "epoch": 0.56, "learning_rate": 9.027860407082706e-07, "logits/chosen": -1.0482661724090576, "logits/rejected": -1.0609327554702759, "logps/chosen": -43.941986083984375, "logps/rejected": -121.45335388183594, "loss": 0.1873, "rewards/accuracies": 0.8125, "rewards/chosen": 0.4450610280036926, "rewards/margins": 6.240961074829102, "rewards/rejected": -5.795900344848633, "step": 371 }, { "epoch": 0.57, "learning_rate": 9.019985651834703e-07, "logits/chosen": -1.0152562856674194, "logits/rejected": -1.011940360069275, "logps/chosen": -67.2203140258789, "logps/rejected": -114.62171936035156, "loss": 0.1769, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6607855558395386, "rewards/margins": 4.153834819793701, "rewards/rejected": -4.814620018005371, "step": 372 }, { "epoch": 0.57, "learning_rate": 9.012082594361075e-07, "logits/chosen": -0.9807397127151489, "logits/rejected": -1.0203466415405273, "logps/chosen": -56.298583984375, "logps/rejected": -98.74402618408203, "loss": 0.171, "rewards/accuracies": 0.875, "rewards/chosen": 0.09569351375102997, "rewards/margins": 4.102729797363281, "rewards/rejected": -4.0070366859436035, "step": 373 }, { "epoch": 0.57, "learning_rate": 9.004151290302349e-07, "logits/chosen": -1.163325548171997, "logits/rejected": -1.0914274454116821, "logps/chosen": -68.55766296386719, "logps/rejected": -156.38412475585938, "loss": 0.1497, "rewards/accuracies": 0.9375, "rewards/chosen": -0.25641486048698425, "rewards/margins": 7.406113624572754, "rewards/rejected": -7.6625285148620605, "step": 374 }, { "epoch": 0.57, "learning_rate": 8.996191795497919e-07, "logits/chosen": -1.2768843173980713, "logits/rejected": -1.240263819694519, "logps/chosen": -68.02412414550781, "logps/rejected": -153.4176788330078, "loss": 0.1802, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3844533860683441, "rewards/margins": 7.109435558319092, "rewards/rejected": -7.493888854980469, "step": 375 }, { "epoch": 0.57, "learning_rate": 8.988204165985649e-07, "logits/chosen": -1.1378958225250244, "logits/rejected": -1.1830037832260132, "logps/chosen": -70.71237182617188, "logps/rejected": -103.13880920410156, "loss": 0.2277, "rewards/accuracies": 0.875, "rewards/chosen": -0.7702474594116211, "rewards/margins": 3.580688238143921, "rewards/rejected": -4.350935459136963, "step": 376 }, { "epoch": 0.57, "learning_rate": 8.980188458001485e-07, "logits/chosen": -1.1306016445159912, "logits/rejected": -1.0990478992462158, "logps/chosen": -55.12686538696289, "logps/rejected": -116.69119262695312, "loss": 0.1967, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9413877129554749, "rewards/margins": 5.052735328674316, "rewards/rejected": -5.9941229820251465, "step": 377 }, { "epoch": 0.57, "learning_rate": 8.972144727979055e-07, "logits/chosen": -1.2199490070343018, "logits/rejected": -1.1264431476593018, "logps/chosen": -72.9341049194336, "logps/rejected": -156.02508544921875, "loss": 0.2169, "rewards/accuracies": 1.0, "rewards/chosen": -1.3027689456939697, "rewards/margins": 6.640481948852539, "rewards/rejected": -7.943251609802246, "step": 378 }, { "epoch": 0.58, "learning_rate": 8.964073032549274e-07, "logits/chosen": -1.0452485084533691, "logits/rejected": -0.9996328949928284, "logps/chosen": -82.88663482666016, "logps/rejected": -162.11898803710938, "loss": 0.267, "rewards/accuracies": 0.75, "rewards/chosen": -1.3536241054534912, "rewards/margins": 6.493758678436279, "rewards/rejected": -7.847382545471191, "step": 379 }, { "epoch": 0.58, "learning_rate": 8.955973428539942e-07, "logits/chosen": -0.8747669458389282, "logits/rejected": -0.786665678024292, "logps/chosen": -77.19548797607422, "logps/rejected": -172.11715698242188, "loss": 0.1487, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6060906648635864, "rewards/margins": 6.885627269744873, "rewards/rejected": -8.491718292236328, "step": 380 }, { "epoch": 0.58, "learning_rate": 8.947845972975347e-07, "logits/chosen": -1.1507608890533447, "logits/rejected": -1.093747615814209, "logps/chosen": -45.77164077758789, "logps/rejected": -102.840576171875, "loss": 0.1672, "rewards/accuracies": 1.0, "rewards/chosen": 0.4861528277397156, "rewards/margins": 4.780185699462891, "rewards/rejected": -4.294033050537109, "step": 381 }, { "epoch": 0.58, "learning_rate": 8.939690723075864e-07, "logits/chosen": -0.8670116066932678, "logits/rejected": -0.8109041452407837, "logps/chosen": -90.49297332763672, "logps/rejected": -167.0618896484375, "loss": 0.1867, "rewards/accuracies": 0.75, "rewards/chosen": -1.614911437034607, "rewards/margins": 5.28792142868042, "rewards/rejected": -6.902832508087158, "step": 382 }, { "epoch": 0.58, "learning_rate": 8.931507736257548e-07, "logits/chosen": -1.2159194946289062, "logits/rejected": -1.1381936073303223, "logps/chosen": -58.98343276977539, "logps/rejected": -132.68360900878906, "loss": 0.2547, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4426393508911133, "rewards/margins": 6.3477582931518555, "rewards/rejected": -6.790397644042969, "step": 383 }, { "epoch": 0.58, "learning_rate": 8.923297070131737e-07, "logits/chosen": -1.3936126232147217, "logits/rejected": -1.4558279514312744, "logps/chosen": -61.366458892822266, "logps/rejected": -142.0255584716797, "loss": 0.1539, "rewards/accuracies": 1.0, "rewards/chosen": -0.5890704989433289, "rewards/margins": 6.175658226013184, "rewards/rejected": -6.764728546142578, "step": 384 }, { "epoch": 0.58, "learning_rate": 8.915058782504634e-07, "logits/chosen": -0.9364544749259949, "logits/rejected": -0.9187269806861877, "logps/chosen": -51.528404235839844, "logps/rejected": -127.87124633789062, "loss": 0.2382, "rewards/accuracies": 1.0, "rewards/chosen": 0.18791763484477997, "rewards/margins": 5.724534034729004, "rewards/rejected": -5.536615371704102, "step": 385 }, { "epoch": 0.59, "learning_rate": 8.906792931376914e-07, "logits/chosen": -0.9947543740272522, "logits/rejected": -1.000620722770691, "logps/chosen": -73.4270248413086, "logps/rejected": -113.23756408691406, "loss": 0.1668, "rewards/accuracies": 0.9375, "rewards/chosen": -0.42094066739082336, "rewards/margins": 4.028453350067139, "rewards/rejected": -4.449394226074219, "step": 386 }, { "epoch": 0.59, "learning_rate": 8.898499574943309e-07, "logits/chosen": -1.025214672088623, "logits/rejected": -0.9233087301254272, "logps/chosen": -49.42487335205078, "logps/rejected": -108.94894409179688, "loss": 0.1782, "rewards/accuracies": 0.9375, "rewards/chosen": 0.6033812761306763, "rewards/margins": 5.041341304779053, "rewards/rejected": -4.437960147857666, "step": 387 }, { "epoch": 0.59, "learning_rate": 8.890178771592197e-07, "logits/chosen": -1.1705193519592285, "logits/rejected": -1.122748851776123, "logps/chosen": -36.10163879394531, "logps/rejected": -83.32039642333984, "loss": 0.1825, "rewards/accuracies": 0.9375, "rewards/chosen": 0.7399885654449463, "rewards/margins": 3.629817247390747, "rewards/rejected": -2.889828681945801, "step": 388 }, { "epoch": 0.59, "learning_rate": 8.881830579905194e-07, "logits/chosen": -1.4119915962219238, "logits/rejected": -1.378150224685669, "logps/chosen": -82.31924438476562, "logps/rejected": -147.86798095703125, "loss": 0.1762, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6388192176818848, "rewards/margins": 6.267736434936523, "rewards/rejected": -6.906556129455566, "step": 389 }, { "epoch": 0.59, "learning_rate": 8.87345505865674e-07, "logits/chosen": -1.1888000965118408, "logits/rejected": -1.082751750946045, "logps/chosen": -69.81549835205078, "logps/rejected": -129.64540100097656, "loss": 0.2374, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8034672737121582, "rewards/margins": 4.446656703948975, "rewards/rejected": -5.250123977661133, "step": 390 }, { "epoch": 0.59, "learning_rate": 8.865052266813685e-07, "logits/chosen": -1.2450262308120728, "logits/rejected": -1.1116446256637573, "logps/chosen": -82.40213775634766, "logps/rejected": -175.6051788330078, "loss": 0.2482, "rewards/accuracies": 0.875, "rewards/chosen": -1.3452680110931396, "rewards/margins": 6.234772682189941, "rewards/rejected": -7.580041408538818, "step": 391 }, { "epoch": 0.6, "learning_rate": 8.856622263534874e-07, "logits/chosen": -1.3854156732559204, "logits/rejected": -1.3954724073410034, "logps/chosen": -50.992088317871094, "logps/rejected": -120.94493865966797, "loss": 0.18, "rewards/accuracies": 1.0, "rewards/chosen": 0.28074389696121216, "rewards/margins": 6.036993980407715, "rewards/rejected": -5.75624942779541, "step": 392 }, { "epoch": 0.6, "learning_rate": 8.848165108170731e-07, "logits/chosen": -1.0285769701004028, "logits/rejected": -0.9848490357398987, "logps/chosen": -77.45350646972656, "logps/rejected": -174.41880798339844, "loss": 0.1501, "rewards/accuracies": 1.0, "rewards/chosen": 0.0908023938536644, "rewards/margins": 8.113433837890625, "rewards/rejected": -8.02263069152832, "step": 393 }, { "epoch": 0.6, "learning_rate": 8.839680860262844e-07, "logits/chosen": -0.9170817136764526, "logits/rejected": -0.8319286108016968, "logps/chosen": -41.26301193237305, "logps/rejected": -123.33838653564453, "loss": 0.1444, "rewards/accuracies": 1.0, "rewards/chosen": 0.7206428647041321, "rewards/margins": 5.508768081665039, "rewards/rejected": -4.788125514984131, "step": 394 }, { "epoch": 0.6, "learning_rate": 8.831169579543538e-07, "logits/chosen": -1.1199142932891846, "logits/rejected": -1.2598261833190918, "logps/chosen": -62.73654556274414, "logps/rejected": -113.8770523071289, "loss": 0.208, "rewards/accuracies": 0.875, "rewards/chosen": -0.5435269474983215, "rewards/margins": 4.305499076843262, "rewards/rejected": -4.849026679992676, "step": 395 }, { "epoch": 0.6, "learning_rate": 8.822631325935463e-07, "logits/chosen": -1.354875087738037, "logits/rejected": -1.3106560707092285, "logps/chosen": -56.36314010620117, "logps/rejected": -105.28909301757812, "loss": 0.1578, "rewards/accuracies": 1.0, "rewards/chosen": 0.7352412939071655, "rewards/margins": 4.004746437072754, "rewards/rejected": -3.269505262374878, "step": 396 }, { "epoch": 0.6, "learning_rate": 8.814066159551165e-07, "logits/chosen": -1.1495639085769653, "logits/rejected": -1.042298436164856, "logps/chosen": -66.9743881225586, "logps/rejected": -137.67288208007812, "loss": 0.2452, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3965058922767639, "rewards/margins": 5.131819725036621, "rewards/rejected": -5.52832555770874, "step": 397 }, { "epoch": 0.6, "learning_rate": 8.805474140692669e-07, "logits/chosen": -1.2664752006530762, "logits/rejected": -1.214312195777893, "logps/chosen": -73.468017578125, "logps/rejected": -154.4588165283203, "loss": 0.1813, "rewards/accuracies": 0.9375, "rewards/chosen": -0.47094786167144775, "rewards/margins": 6.118612766265869, "rewards/rejected": -6.589560508728027, "step": 398 }, { "epoch": 0.61, "learning_rate": 8.796855329851052e-07, "logits/chosen": -1.1437548398971558, "logits/rejected": -1.1186609268188477, "logps/chosen": -52.121925354003906, "logps/rejected": -108.3409652709961, "loss": 0.1657, "rewards/accuracies": 1.0, "rewards/chosen": 0.2030365765094757, "rewards/margins": 4.836932182312012, "rewards/rejected": -4.633895397186279, "step": 399 }, { "epoch": 0.61, "learning_rate": 8.788209787706014e-07, "logits/chosen": -1.1624184846878052, "logits/rejected": -1.1892285346984863, "logps/chosen": -50.03237533569336, "logps/rejected": -90.02861022949219, "loss": 0.2179, "rewards/accuracies": 0.9375, "rewards/chosen": -0.05295214056968689, "rewards/margins": 4.0794854164123535, "rewards/rejected": -4.132437705993652, "step": 400 }, { "epoch": 0.61, "learning_rate": 8.779537575125455e-07, "logits/chosen": -1.2393620014190674, "logits/rejected": -1.1940300464630127, "logps/chosen": -49.26935958862305, "logps/rejected": -111.75259399414062, "loss": 0.2443, "rewards/accuracies": 0.875, "rewards/chosen": 0.5827302932739258, "rewards/margins": 4.7527031898498535, "rewards/rejected": -4.1699724197387695, "step": 401 }, { "epoch": 0.61, "learning_rate": 8.770838753165044e-07, "logits/chosen": -0.9424704313278198, "logits/rejected": -0.8532572984695435, "logps/chosen": -51.71316146850586, "logps/rejected": -123.20469665527344, "loss": 0.196, "rewards/accuracies": 0.875, "rewards/chosen": 0.06543193012475967, "rewards/margins": 4.748517036437988, "rewards/rejected": -4.6830854415893555, "step": 402 }, { "epoch": 0.61, "learning_rate": 8.762113383067793e-07, "logits/chosen": -1.0254884958267212, "logits/rejected": -0.8957661390304565, "logps/chosen": -66.71129608154297, "logps/rejected": -159.77406311035156, "loss": 0.2056, "rewards/accuracies": 0.875, "rewards/chosen": -0.3826755881309509, "rewards/margins": 6.118356704711914, "rewards/rejected": -6.501031875610352, "step": 403 }, { "epoch": 0.61, "learning_rate": 8.753361526263621e-07, "logits/chosen": -1.2008506059646606, "logits/rejected": -1.0892599821090698, "logps/chosen": -67.92991638183594, "logps/rejected": -161.24533081054688, "loss": 0.1915, "rewards/accuracies": 0.875, "rewards/chosen": -0.2895081639289856, "rewards/margins": 5.8394365310668945, "rewards/rejected": -6.128944396972656, "step": 404 }, { "epoch": 0.62, "learning_rate": 8.744583244368923e-07, "logits/chosen": -1.0438123941421509, "logits/rejected": -1.0480939149856567, "logps/chosen": -70.23876953125, "logps/rejected": -138.24124145507812, "loss": 0.2158, "rewards/accuracies": 1.0, "rewards/chosen": -0.04963761568069458, "rewards/margins": 4.248196125030518, "rewards/rejected": -4.297833442687988, "step": 405 }, { "epoch": 0.62, "learning_rate": 8.735778599186136e-07, "logits/chosen": -1.226418137550354, "logits/rejected": -1.176148533821106, "logps/chosen": -50.929847717285156, "logps/rejected": -96.35321807861328, "loss": 0.1878, "rewards/accuracies": 0.75, "rewards/chosen": 0.22736504673957825, "rewards/margins": 3.7616052627563477, "rewards/rejected": -3.5342397689819336, "step": 406 }, { "epoch": 0.62, "learning_rate": 8.726947652703307e-07, "logits/chosen": -1.170417308807373, "logits/rejected": -1.1636468172073364, "logps/chosen": -57.47322082519531, "logps/rejected": -143.91566467285156, "loss": 0.2255, "rewards/accuracies": 1.0, "rewards/chosen": 0.058366671204566956, "rewards/margins": 5.934195518493652, "rewards/rejected": -5.875828742980957, "step": 407 }, { "epoch": 0.62, "learning_rate": 8.718090467093653e-07, "logits/chosen": -1.2647299766540527, "logits/rejected": -1.3104499578475952, "logps/chosen": -68.82608032226562, "logps/rejected": -103.34474182128906, "loss": 0.1692, "rewards/accuracies": 0.875, "rewards/chosen": 0.005170680582523346, "rewards/margins": 3.650045394897461, "rewards/rejected": -3.6448748111724854, "step": 408 }, { "epoch": 0.62, "learning_rate": 8.709207104715124e-07, "logits/chosen": -1.369813084602356, "logits/rejected": -1.238029956817627, "logps/chosen": -51.8563346862793, "logps/rejected": -129.11083984375, "loss": 0.2409, "rewards/accuracies": 0.9375, "rewards/chosen": 0.6820074319839478, "rewards/margins": 4.948735237121582, "rewards/rejected": -4.266727924346924, "step": 409 }, { "epoch": 0.62, "learning_rate": 8.700297628109964e-07, "logits/chosen": -1.0370895862579346, "logits/rejected": -0.9580932259559631, "logps/chosen": -70.1031723022461, "logps/rejected": -165.79930114746094, "loss": 0.2278, "rewards/accuracies": 0.875, "rewards/chosen": 0.26600658893585205, "rewards/margins": 6.668997764587402, "rewards/rejected": -6.40299129486084, "step": 410 }, { "epoch": 0.62, "learning_rate": 8.691362100004273e-07, "logits/chosen": -1.1846193075180054, "logits/rejected": -1.2501457929611206, "logps/chosen": -75.08912658691406, "logps/rejected": -120.98992919921875, "loss": 0.2238, "rewards/accuracies": 0.8125, "rewards/chosen": -0.41689085960388184, "rewards/margins": 4.603246212005615, "rewards/rejected": -5.020136833190918, "step": 411 }, { "epoch": 0.63, "learning_rate": 8.68240058330756e-07, "logits/chosen": -1.1766867637634277, "logits/rejected": -1.1819841861724854, "logps/chosen": -40.93233108520508, "logps/rejected": -90.56710052490234, "loss": 0.2187, "rewards/accuracies": 0.875, "rewards/chosen": 0.7101311683654785, "rewards/margins": 4.047136306762695, "rewards/rejected": -3.337005376815796, "step": 412 }, { "epoch": 0.63, "learning_rate": 8.673413141112309e-07, "logits/chosen": -1.2025368213653564, "logits/rejected": -1.137385606765747, "logps/chosen": -53.99886703491211, "logps/rejected": -119.89103698730469, "loss": 0.1404, "rewards/accuracies": 0.9375, "rewards/chosen": 0.3699154257774353, "rewards/margins": 5.138401985168457, "rewards/rejected": -4.768486499786377, "step": 413 }, { "epoch": 0.63, "learning_rate": 8.664399836693525e-07, "logits/chosen": -1.2454607486724854, "logits/rejected": -1.167482852935791, "logps/chosen": -60.050811767578125, "logps/rejected": -140.74850463867188, "loss": 0.1651, "rewards/accuracies": 1.0, "rewards/chosen": -0.5211348533630371, "rewards/margins": 5.840010643005371, "rewards/rejected": -6.361145973205566, "step": 414 }, { "epoch": 0.63, "learning_rate": 8.655360733508292e-07, "logits/chosen": -1.2281296253204346, "logits/rejected": -1.205501675605774, "logps/chosen": -54.05847930908203, "logps/rejected": -114.76741027832031, "loss": 0.2068, "rewards/accuracies": 1.0, "rewards/chosen": 0.048486582934856415, "rewards/margins": 4.931014060974121, "rewards/rejected": -4.882527828216553, "step": 415 }, { "epoch": 0.63, "learning_rate": 8.646295895195333e-07, "logits/chosen": -1.1683878898620605, "logits/rejected": -1.1041896343231201, "logps/chosen": -48.70958709716797, "logps/rejected": -115.05414581298828, "loss": 0.1353, "rewards/accuracies": 0.875, "rewards/chosen": 0.47555655241012573, "rewards/margins": 5.370617389678955, "rewards/rejected": -4.895061016082764, "step": 416 }, { "epoch": 0.63, "learning_rate": 8.637205385574547e-07, "logits/chosen": -1.1590019464492798, "logits/rejected": -1.2345454692840576, "logps/chosen": -70.25973510742188, "logps/rejected": -153.3055877685547, "loss": 0.1685, "rewards/accuracies": 1.0, "rewards/chosen": 0.06854680180549622, "rewards/margins": 7.001974582672119, "rewards/rejected": -6.933426856994629, "step": 417 }, { "epoch": 0.64, "learning_rate": 8.628089268646579e-07, "logits/chosen": -0.9455069303512573, "logits/rejected": -0.9382145404815674, "logps/chosen": -60.832069396972656, "logps/rejected": -111.65571594238281, "loss": 0.1859, "rewards/accuracies": 0.9375, "rewards/chosen": -0.29948708415031433, "rewards/margins": 3.8152897357940674, "rewards/rejected": -4.114776611328125, "step": 418 }, { "epoch": 0.64, "learning_rate": 8.618947608592351e-07, "logits/chosen": -1.022441029548645, "logits/rejected": -1.0866788625717163, "logps/chosen": -69.27745056152344, "logps/rejected": -123.41693115234375, "loss": 0.112, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3073914349079132, "rewards/margins": 5.595177173614502, "rewards/rejected": -5.902567386627197, "step": 419 }, { "epoch": 0.64, "learning_rate": 8.609780469772621e-07, "logits/chosen": -1.0906612873077393, "logits/rejected": -1.0917662382125854, "logps/chosen": -84.9109115600586, "logps/rejected": -181.5916748046875, "loss": 0.2012, "rewards/accuracies": 0.875, "rewards/chosen": -0.4173294007778168, "rewards/margins": 8.144667625427246, "rewards/rejected": -8.561997413635254, "step": 420 }, { "epoch": 0.64, "learning_rate": 8.600587916727532e-07, "logits/chosen": -0.818186342716217, "logits/rejected": -0.8237495422363281, "logps/chosen": -46.085514068603516, "logps/rejected": -100.67369842529297, "loss": 0.1745, "rewards/accuracies": 0.9375, "rewards/chosen": 0.13958895206451416, "rewards/margins": 4.293861389160156, "rewards/rejected": -4.15427303314209, "step": 421 }, { "epoch": 0.64, "learning_rate": 8.591370014176144e-07, "logits/chosen": -1.2785382270812988, "logits/rejected": -1.270959496498108, "logps/chosen": -81.02368927001953, "logps/rejected": -142.10545349121094, "loss": 0.1524, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0255403518676758, "rewards/margins": 5.374872207641602, "rewards/rejected": -6.4004130363464355, "step": 422 }, { "epoch": 0.64, "learning_rate": 8.582126827015992e-07, "logits/chosen": -1.2582919597625732, "logits/rejected": -1.2366271018981934, "logps/chosen": -90.80106353759766, "logps/rejected": -185.97265625, "loss": 0.1604, "rewards/accuracies": 0.875, "rewards/chosen": -1.2223063707351685, "rewards/margins": 8.304044723510742, "rewards/rejected": -9.526350975036621, "step": 423 }, { "epoch": 0.64, "learning_rate": 8.572858420322627e-07, "logits/chosen": -0.9271082878112793, "logits/rejected": -0.9362425804138184, "logps/chosen": -77.71623992919922, "logps/rejected": -125.13803100585938, "loss": 0.2093, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0138859748840332, "rewards/margins": 4.141689777374268, "rewards/rejected": -5.155575752258301, "step": 424 }, { "epoch": 0.65, "learning_rate": 8.563564859349147e-07, "logits/chosen": -1.1701502799987793, "logits/rejected": -1.0111591815948486, "logps/chosen": -71.44900512695312, "logps/rejected": -142.65487670898438, "loss": 0.1933, "rewards/accuracies": 1.0, "rewards/chosen": -0.7668020725250244, "rewards/margins": 5.6033477783203125, "rewards/rejected": -6.3701491355896, "step": 425 }, { "epoch": 0.65, "learning_rate": 8.554246209525755e-07, "logits/chosen": -1.1665081977844238, "logits/rejected": -1.1051981449127197, "logps/chosen": -52.993370056152344, "logps/rejected": -123.57511138916016, "loss": 0.2014, "rewards/accuracies": 0.875, "rewards/chosen": -0.2413010597229004, "rewards/margins": 5.42091178894043, "rewards/rejected": -5.662213325500488, "step": 426 }, { "epoch": 0.65, "learning_rate": 8.544902536459283e-07, "logits/chosen": -1.0885193347930908, "logits/rejected": -1.0061079263687134, "logps/chosen": -78.6636962890625, "logps/rejected": -166.54043579101562, "loss": 0.1941, "rewards/accuracies": 1.0, "rewards/chosen": -1.6128672361373901, "rewards/margins": 6.642463207244873, "rewards/rejected": -8.255331039428711, "step": 427 }, { "epoch": 0.65, "learning_rate": 8.535533905932737e-07, "logits/chosen": -1.0796353816986084, "logits/rejected": -1.1003433465957642, "logps/chosen": -59.647865295410156, "logps/rejected": -116.34637451171875, "loss": 0.1723, "rewards/accuracies": 0.875, "rewards/chosen": -0.1189768984913826, "rewards/margins": 5.141149044036865, "rewards/rejected": -5.260126113891602, "step": 428 }, { "epoch": 0.65, "learning_rate": 8.526140383904836e-07, "logits/chosen": -0.939657986164093, "logits/rejected": -0.9173675775527954, "logps/chosen": -66.11197662353516, "logps/rejected": -147.4273681640625, "loss": 0.1459, "rewards/accuracies": 1.0, "rewards/chosen": -0.6731550097465515, "rewards/margins": 6.669175148010254, "rewards/rejected": -7.342329978942871, "step": 429 }, { "epoch": 0.65, "learning_rate": 8.516722036509538e-07, "logits/chosen": -1.3927438259124756, "logits/rejected": -1.3029999732971191, "logps/chosen": -64.37356567382812, "logps/rejected": -149.1137237548828, "loss": 0.1832, "rewards/accuracies": 0.9375, "rewards/chosen": -0.45377394556999207, "rewards/margins": 6.534273147583008, "rewards/rejected": -6.988047122955322, "step": 430 }, { "epoch": 0.65, "learning_rate": 8.50727893005559e-07, "logits/chosen": -1.2839081287384033, "logits/rejected": -1.3670390844345093, "logps/chosen": -75.1611328125, "logps/rejected": -152.00440979003906, "loss": 0.1642, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9996129274368286, "rewards/margins": 6.434969425201416, "rewards/rejected": -7.434582710266113, "step": 431 }, { "epoch": 0.66, "learning_rate": 8.497811131026045e-07, "logits/chosen": -1.0323083400726318, "logits/rejected": -0.9453625679016113, "logps/chosen": -67.27543640136719, "logps/rejected": -137.97413635253906, "loss": 0.1446, "rewards/accuracies": 0.875, "rewards/chosen": -0.5392396450042725, "rewards/margins": 5.336440086364746, "rewards/rejected": -5.875679969787598, "step": 432 }, { "epoch": 0.66, "learning_rate": 8.488318706077805e-07, "logits/chosen": -1.1992932558059692, "logits/rejected": -1.1836185455322266, "logps/chosen": -57.8773307800293, "logps/rejected": -136.941162109375, "loss": 0.219, "rewards/accuracies": 0.9375, "rewards/chosen": -0.13526064157485962, "rewards/margins": 6.538956642150879, "rewards/rejected": -6.674217700958252, "step": 433 }, { "epoch": 0.66, "learning_rate": 8.478801722041146e-07, "logits/chosen": -1.2053327560424805, "logits/rejected": -1.054445743560791, "logps/chosen": -64.59292602539062, "logps/rejected": -153.5313262939453, "loss": 0.1623, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6657881140708923, "rewards/margins": 6.392370700836182, "rewards/rejected": -7.058159351348877, "step": 434 }, { "epoch": 0.66, "learning_rate": 8.46926024591925e-07, "logits/chosen": -0.9527780413627625, "logits/rejected": -0.9377985596656799, "logps/chosen": -68.6094970703125, "logps/rejected": -112.81304931640625, "loss": 0.1744, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9481337070465088, "rewards/margins": 4.392820358276367, "rewards/rejected": -5.340954303741455, "step": 435 }, { "epoch": 0.66, "learning_rate": 8.459694344887731e-07, "logits/chosen": -0.9847865104675293, "logits/rejected": -1.007370948791504, "logps/chosen": -53.81441116333008, "logps/rejected": -91.65478515625, "loss": 0.1789, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1929657459259033, "rewards/margins": 3.4950058460235596, "rewards/rejected": -4.687971591949463, "step": 436 }, { "epoch": 0.66, "learning_rate": 8.450104086294165e-07, "logits/chosen": -1.2699943780899048, "logits/rejected": -1.2473492622375488, "logps/chosen": -82.6616439819336, "logps/rejected": -169.1042938232422, "loss": 0.1898, "rewards/accuracies": 1.0, "rewards/chosen": -1.1959168910980225, "rewards/margins": 6.592803955078125, "rewards/rejected": -7.788720607757568, "step": 437 }, { "epoch": 0.67, "learning_rate": 8.440489537657618e-07, "logits/chosen": -1.339113473892212, "logits/rejected": -1.269339680671692, "logps/chosen": -55.6357307434082, "logps/rejected": -125.44326782226562, "loss": 0.2618, "rewards/accuracies": 0.8125, "rewards/chosen": -0.15911231935024261, "rewards/margins": 5.041744232177734, "rewards/rejected": -5.200855731964111, "step": 438 }, { "epoch": 0.67, "learning_rate": 8.430850766668161e-07, "logits/chosen": -1.1586527824401855, "logits/rejected": -1.1263474225997925, "logps/chosen": -59.57755661010742, "logps/rejected": -128.5335693359375, "loss": 0.2132, "rewards/accuracies": 0.875, "rewards/chosen": -0.4526219069957733, "rewards/margins": 4.85505485534668, "rewards/rejected": -5.307676315307617, "step": 439 }, { "epoch": 0.67, "learning_rate": 8.421187841186401e-07, "logits/chosen": -1.1829062700271606, "logits/rejected": -1.114036202430725, "logps/chosen": -77.67008209228516, "logps/rejected": -159.51492309570312, "loss": 0.1727, "rewards/accuracies": 1.0, "rewards/chosen": -1.0863698720932007, "rewards/margins": 6.396143436431885, "rewards/rejected": -7.482513427734375, "step": 440 }, { "epoch": 0.67, "learning_rate": 8.411500829243005e-07, "logits/chosen": -1.3695244789123535, "logits/rejected": -1.2936193943023682, "logps/chosen": -57.488224029541016, "logps/rejected": -125.73988342285156, "loss": 0.1468, "rewards/accuracies": 0.8125, "rewards/chosen": -0.4893732964992523, "rewards/margins": 4.295994281768799, "rewards/rejected": -4.785367012023926, "step": 441 }, { "epoch": 0.67, "learning_rate": 8.401789799038216e-07, "logits/chosen": -1.1487072706222534, "logits/rejected": -1.1367806196212769, "logps/chosen": -69.29662322998047, "logps/rejected": -139.33428955078125, "loss": 0.2547, "rewards/accuracies": 0.875, "rewards/chosen": -0.7461766004562378, "rewards/margins": 5.8961663246154785, "rewards/rejected": -6.642343044281006, "step": 442 }, { "epoch": 0.67, "learning_rate": 8.392054818941374e-07, "logits/chosen": -1.072195291519165, "logits/rejected": -1.1083810329437256, "logps/chosen": -51.24053192138672, "logps/rejected": -133.1757354736328, "loss": 0.2195, "rewards/accuracies": 1.0, "rewards/chosen": 0.6234368085861206, "rewards/margins": 6.912321090698242, "rewards/rejected": -6.288885116577148, "step": 443 }, { "epoch": 0.67, "learning_rate": 8.382295957490435e-07, "logits/chosen": -1.0317915678024292, "logits/rejected": -1.020350456237793, "logps/chosen": -54.91352844238281, "logps/rejected": -111.52494049072266, "loss": 0.184, "rewards/accuracies": 0.8125, "rewards/chosen": 0.05642195791006088, "rewards/margins": 5.561900615692139, "rewards/rejected": -5.505478382110596, "step": 444 }, { "epoch": 0.68, "learning_rate": 8.372513283391489e-07, "logits/chosen": -1.2246626615524292, "logits/rejected": -1.1847219467163086, "logps/chosen": -67.77139282226562, "logps/rejected": -144.07675170898438, "loss": 0.1231, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8761771321296692, "rewards/margins": 5.497477054595947, "rewards/rejected": -6.373654365539551, "step": 445 }, { "epoch": 0.68, "learning_rate": 8.36270686551828e-07, "logits/chosen": -1.3635679483413696, "logits/rejected": -1.4009004831314087, "logps/chosen": -57.39094924926758, "logps/rejected": -99.71837615966797, "loss": 0.1374, "rewards/accuracies": 0.8125, "rewards/chosen": -0.02806789055466652, "rewards/margins": 4.0962934494018555, "rewards/rejected": -4.124361515045166, "step": 446 }, { "epoch": 0.68, "learning_rate": 8.35287677291171e-07, "logits/chosen": -1.0316144227981567, "logits/rejected": -0.9815775156021118, "logps/chosen": -49.208290100097656, "logps/rejected": -99.20479583740234, "loss": 0.2081, "rewards/accuracies": 0.9375, "rewards/chosen": -0.08192768692970276, "rewards/margins": 3.8505711555480957, "rewards/rejected": -3.9324989318847656, "step": 447 }, { "epoch": 0.68, "learning_rate": 8.343023074779368e-07, "logits/chosen": -0.8122367858886719, "logits/rejected": -0.8544483780860901, "logps/chosen": -42.28432846069336, "logps/rejected": -93.08744049072266, "loss": 0.1256, "rewards/accuracies": 0.9375, "rewards/chosen": 0.23971232771873474, "rewards/margins": 4.2626051902771, "rewards/rejected": -4.022892475128174, "step": 448 }, { "epoch": 0.68, "learning_rate": 8.333145840495027e-07, "logits/chosen": -1.1597957611083984, "logits/rejected": -1.127702236175537, "logps/chosen": -57.63410568237305, "logps/rejected": -108.86802673339844, "loss": 0.1385, "rewards/accuracies": 0.9375, "rewards/chosen": 0.15871790051460266, "rewards/margins": 4.300457000732422, "rewards/rejected": -4.141739845275879, "step": 449 }, { "epoch": 0.68, "learning_rate": 8.32324513959817e-07, "logits/chosen": -1.239582896232605, "logits/rejected": -1.27329683303833, "logps/chosen": -65.73753356933594, "logps/rejected": -123.59443664550781, "loss": 0.1486, "rewards/accuracies": 0.875, "rewards/chosen": -0.25332725048065186, "rewards/margins": 4.592188835144043, "rewards/rejected": -4.845516681671143, "step": 450 }, { "epoch": 0.69, "learning_rate": 8.313321041793491e-07, "logits/chosen": -1.1276596784591675, "logits/rejected": -1.1869280338287354, "logps/chosen": -61.36394119262695, "logps/rejected": -142.14089965820312, "loss": 0.1277, "rewards/accuracies": 1.0, "rewards/chosen": 0.573635995388031, "rewards/margins": 6.9199676513671875, "rewards/rejected": -6.346331596374512, "step": 451 }, { "epoch": 0.69, "learning_rate": 8.303373616950406e-07, "logits/chosen": -0.9326416254043579, "logits/rejected": -0.9587483406066895, "logps/chosen": -43.44063949584961, "logps/rejected": -98.64341735839844, "loss": 0.2942, "rewards/accuracies": 0.875, "rewards/chosen": 0.524234414100647, "rewards/margins": 4.821613311767578, "rewards/rejected": -4.297379016876221, "step": 452 }, { "epoch": 0.69, "learning_rate": 8.293402935102566e-07, "logits/chosen": -1.2515349388122559, "logits/rejected": -1.300333023071289, "logps/chosen": -76.35446166992188, "logps/rejected": -148.16094970703125, "loss": 0.1673, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3025237023830414, "rewards/margins": 6.630747318267822, "rewards/rejected": -6.933270454406738, "step": 453 }, { "epoch": 0.69, "learning_rate": 8.283409066447355e-07, "logits/chosen": -1.1542006731033325, "logits/rejected": -1.1590346097946167, "logps/chosen": -55.921749114990234, "logps/rejected": -125.4809799194336, "loss": 0.1627, "rewards/accuracies": 1.0, "rewards/chosen": 0.44540640711784363, "rewards/margins": 5.576838493347168, "rewards/rejected": -5.131432056427002, "step": 454 }, { "epoch": 0.69, "learning_rate": 8.273392081345404e-07, "logits/chosen": -1.192995548248291, "logits/rejected": -1.053298830986023, "logps/chosen": -66.12108612060547, "logps/rejected": -146.68898010253906, "loss": 0.1636, "rewards/accuracies": 0.9375, "rewards/chosen": 0.0854855328798294, "rewards/margins": 5.814007759094238, "rewards/rejected": -5.728522300720215, "step": 455 }, { "epoch": 0.69, "learning_rate": 8.263352050320094e-07, "logits/chosen": -1.0546703338623047, "logits/rejected": -1.040149450302124, "logps/chosen": -54.151493072509766, "logps/rejected": -125.35000610351562, "loss": 0.1212, "rewards/accuracies": 1.0, "rewards/chosen": -0.0205656997859478, "rewards/margins": 5.513425827026367, "rewards/rejected": -5.53399133682251, "step": 456 }, { "epoch": 0.69, "learning_rate": 8.253289044057053e-07, "logits/chosen": -1.203826904296875, "logits/rejected": -1.198434829711914, "logps/chosen": -55.11236572265625, "logps/rejected": -112.63180541992188, "loss": 0.1271, "rewards/accuracies": 0.875, "rewards/chosen": -0.22854329645633698, "rewards/margins": 4.5432257652282715, "rewards/rejected": -4.771769046783447, "step": 457 }, { "epoch": 0.7, "learning_rate": 8.243203133403671e-07, "logits/chosen": -1.0231239795684814, "logits/rejected": -0.9769048094749451, "logps/chosen": -47.63315200805664, "logps/rejected": -90.92638397216797, "loss": 0.1778, "rewards/accuracies": 0.875, "rewards/chosen": 0.21240568161010742, "rewards/margins": 4.013316631317139, "rewards/rejected": -3.800910472869873, "step": 458 }, { "epoch": 0.7, "learning_rate": 8.233094389368584e-07, "logits/chosen": -1.0631383657455444, "logits/rejected": -1.0314054489135742, "logps/chosen": -59.08259582519531, "logps/rejected": -123.09644317626953, "loss": 0.1874, "rewards/accuracies": 0.875, "rewards/chosen": 0.03381906449794769, "rewards/margins": 5.111746788024902, "rewards/rejected": -5.077927589416504, "step": 459 }, { "epoch": 0.7, "learning_rate": 8.222962883121195e-07, "logits/chosen": -1.1790649890899658, "logits/rejected": -1.1812490224838257, "logps/chosen": -72.05509948730469, "logps/rejected": -149.14263916015625, "loss": 0.1478, "rewards/accuracies": 0.8125, "rewards/chosen": 0.024528905749320984, "rewards/margins": 6.314554214477539, "rewards/rejected": -6.290025234222412, "step": 460 }, { "epoch": 0.7, "learning_rate": 8.21280868599115e-07, "logits/chosen": -1.2503714561462402, "logits/rejected": -1.3049639463424683, "logps/chosen": -55.94298553466797, "logps/rejected": -105.9774169921875, "loss": 0.1861, "rewards/accuracies": 0.9375, "rewards/chosen": 0.1556202620267868, "rewards/margins": 4.34453010559082, "rewards/rejected": -4.188909530639648, "step": 461 }, { "epoch": 0.7, "learning_rate": 8.202631869467858e-07, "logits/chosen": -1.1742231845855713, "logits/rejected": -1.0610723495483398, "logps/chosen": -61.1878662109375, "logps/rejected": -159.06466674804688, "loss": 0.1741, "rewards/accuracies": 1.0, "rewards/chosen": 0.2823966145515442, "rewards/margins": 7.47265625, "rewards/rejected": -7.190260410308838, "step": 462 }, { "epoch": 0.7, "learning_rate": 8.192432505199966e-07, "logits/chosen": -1.2517600059509277, "logits/rejected": -1.2136309146881104, "logps/chosen": -68.1708755493164, "logps/rejected": -165.7637939453125, "loss": 0.1884, "rewards/accuracies": 1.0, "rewards/chosen": -0.06372582912445068, "rewards/margins": 7.516479969024658, "rewards/rejected": -7.580205917358398, "step": 463 }, { "epoch": 0.7, "learning_rate": 8.182210664994877e-07, "logits/chosen": -1.2376246452331543, "logits/rejected": -1.2937442064285278, "logps/chosen": -62.00231170654297, "logps/rejected": -131.11273193359375, "loss": 0.2543, "rewards/accuracies": 1.0, "rewards/chosen": 0.1723182648420334, "rewards/margins": 6.003687381744385, "rewards/rejected": -5.831368446350098, "step": 464 }, { "epoch": 0.71, "learning_rate": 8.171966420818227e-07, "logits/chosen": -1.1604633331298828, "logits/rejected": -1.1980036497116089, "logps/chosen": -68.14649963378906, "logps/rejected": -130.35452270507812, "loss": 0.1336, "rewards/accuracies": 1.0, "rewards/chosen": -0.35314807295799255, "rewards/margins": 5.461781978607178, "rewards/rejected": -5.814929962158203, "step": 465 }, { "epoch": 0.71, "learning_rate": 8.161699844793384e-07, "logits/chosen": -1.22269606590271, "logits/rejected": -1.1873986721038818, "logps/chosen": -58.53391647338867, "logps/rejected": -124.73702239990234, "loss": 0.2184, "rewards/accuracies": 0.9375, "rewards/chosen": 0.0939812958240509, "rewards/margins": 4.914519309997559, "rewards/rejected": -4.82053804397583, "step": 466 }, { "epoch": 0.71, "learning_rate": 8.151411009200941e-07, "logits/chosen": -0.9256694316864014, "logits/rejected": -0.9075564742088318, "logps/chosen": -48.61707305908203, "logps/rejected": -124.52507019042969, "loss": 0.1166, "rewards/accuracies": 0.9375, "rewards/chosen": 0.005536448210477829, "rewards/margins": 5.7331953048706055, "rewards/rejected": -5.727659225463867, "step": 467 }, { "epoch": 0.71, "learning_rate": 8.141099986478212e-07, "logits/chosen": -1.1238139867782593, "logits/rejected": -1.0240187644958496, "logps/chosen": -69.22377014160156, "logps/rejected": -145.86741638183594, "loss": 0.1237, "rewards/accuracies": 0.9375, "rewards/chosen": -0.32074204087257385, "rewards/margins": 6.419075965881348, "rewards/rejected": -6.739818572998047, "step": 468 }, { "epoch": 0.71, "learning_rate": 8.130766849218708e-07, "logits/chosen": -1.5412321090698242, "logits/rejected": -1.4462655782699585, "logps/chosen": -47.1432991027832, "logps/rejected": -125.11244201660156, "loss": 0.1467, "rewards/accuracies": 0.8125, "rewards/chosen": 0.27659279108047485, "rewards/margins": 5.030959129333496, "rewards/rejected": -4.754365921020508, "step": 469 }, { "epoch": 0.71, "learning_rate": 8.120411670171642e-07, "logits/chosen": -1.2974660396575928, "logits/rejected": -1.2382835149765015, "logps/chosen": -49.07151412963867, "logps/rejected": -117.94173431396484, "loss": 0.1567, "rewards/accuracies": 1.0, "rewards/chosen": 0.8536134958267212, "rewards/margins": 5.4294915199279785, "rewards/rejected": -4.575877666473389, "step": 470 }, { "epoch": 0.72, "learning_rate": 8.110034522241407e-07, "logits/chosen": -1.2068498134613037, "logits/rejected": -1.191725254058838, "logps/chosen": -68.56617736816406, "logps/rejected": -155.53311157226562, "loss": 0.1059, "rewards/accuracies": 0.9375, "rewards/chosen": 0.3536372780799866, "rewards/margins": 6.959292411804199, "rewards/rejected": -6.605655193328857, "step": 471 }, { "epoch": 0.72, "learning_rate": 8.099635478487064e-07, "logits/chosen": -1.2709250450134277, "logits/rejected": -1.2953561544418335, "logps/chosen": -58.28156280517578, "logps/rejected": -143.70582580566406, "loss": 0.1743, "rewards/accuracies": 1.0, "rewards/chosen": 0.09408218413591385, "rewards/margins": 7.253283977508545, "rewards/rejected": -7.1592020988464355, "step": 472 }, { "epoch": 0.72, "learning_rate": 8.08921461212183e-07, "logits/chosen": -1.327064037322998, "logits/rejected": -1.2830547094345093, "logps/chosen": -58.22392272949219, "logps/rejected": -138.6402130126953, "loss": 0.143, "rewards/accuracies": 0.9375, "rewards/chosen": 0.7211057543754578, "rewards/margins": 5.940493106842041, "rewards/rejected": -5.219387531280518, "step": 473 }, { "epoch": 0.72, "learning_rate": 8.078771996512565e-07, "logits/chosen": -1.2072927951812744, "logits/rejected": -1.2078496217727661, "logps/chosen": -54.813236236572266, "logps/rejected": -107.05663299560547, "loss": 0.1953, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3342176079750061, "rewards/margins": 4.0993547439575195, "rewards/rejected": -4.433572292327881, "step": 474 }, { "epoch": 0.72, "learning_rate": 8.068307705179246e-07, "logits/chosen": -1.2271807193756104, "logits/rejected": -1.1826114654541016, "logps/chosen": -69.37525939941406, "logps/rejected": -181.13885498046875, "loss": 0.1718, "rewards/accuracies": 0.9375, "rewards/chosen": 0.07493260502815247, "rewards/margins": 8.193195343017578, "rewards/rejected": -8.118261337280273, "step": 475 }, { "epoch": 0.72, "learning_rate": 8.057821811794457e-07, "logits/chosen": -0.9722121357917786, "logits/rejected": -1.0049093961715698, "logps/chosen": -55.88921356201172, "logps/rejected": -112.35614013671875, "loss": 0.1317, "rewards/accuracies": 0.9375, "rewards/chosen": -0.43203961849212646, "rewards/margins": 4.577415943145752, "rewards/rejected": -5.009455680847168, "step": 476 }, { "epoch": 0.72, "learning_rate": 8.047314390182871e-07, "logits/chosen": -1.0967211723327637, "logits/rejected": -1.109215497970581, "logps/chosen": -47.45706558227539, "logps/rejected": -87.63819885253906, "loss": 0.2356, "rewards/accuracies": 0.9375, "rewards/chosen": 0.3036121428012848, "rewards/margins": 4.191996097564697, "rewards/rejected": -3.8883838653564453, "step": 477 }, { "epoch": 0.73, "learning_rate": 8.036785514320725e-07, "logits/chosen": -1.1749989986419678, "logits/rejected": -1.1614140272140503, "logps/chosen": -41.67080307006836, "logps/rejected": -100.26565551757812, "loss": 0.1679, "rewards/accuracies": 0.9375, "rewards/chosen": 0.3063531517982483, "rewards/margins": 4.6997246742248535, "rewards/rejected": -4.39337158203125, "step": 478 }, { "epoch": 0.73, "learning_rate": 8.026235258335306e-07, "logits/chosen": -1.225369930267334, "logits/rejected": -1.1531744003295898, "logps/chosen": -62.20580291748047, "logps/rejected": -149.44842529296875, "loss": 0.1963, "rewards/accuracies": 1.0, "rewards/chosen": 0.44672101736068726, "rewards/margins": 6.923643112182617, "rewards/rejected": -6.476922512054443, "step": 479 }, { "epoch": 0.73, "learning_rate": 8.015663696504423e-07, "logits/chosen": -1.3094971179962158, "logits/rejected": -1.230414628982544, "logps/chosen": -57.678443908691406, "logps/rejected": -137.21197509765625, "loss": 0.1987, "rewards/accuracies": 1.0, "rewards/chosen": -0.13374830782413483, "rewards/margins": 6.491249084472656, "rewards/rejected": -6.624998092651367, "step": 480 }, { "epoch": 0.73, "learning_rate": 8.005070903255881e-07, "logits/chosen": -1.253745436668396, "logits/rejected": -1.2687311172485352, "logps/chosen": -70.95086669921875, "logps/rejected": -124.78730773925781, "loss": 0.1432, "rewards/accuracies": 0.9375, "rewards/chosen": -0.0652434229850769, "rewards/margins": 5.830631256103516, "rewards/rejected": -5.8958740234375, "step": 481 }, { "epoch": 0.73, "learning_rate": 7.994456953166972e-07, "logits/chosen": -1.1457329988479614, "logits/rejected": -1.1045362949371338, "logps/chosen": -56.931854248046875, "logps/rejected": -122.42547607421875, "loss": 0.1672, "rewards/accuracies": 1.0, "rewards/chosen": 0.08954182267189026, "rewards/margins": 5.752513408660889, "rewards/rejected": -5.662971496582031, "step": 482 }, { "epoch": 0.73, "learning_rate": 7.983821920963935e-07, "logits/chosen": -1.155022144317627, "logits/rejected": -1.2023171186447144, "logps/chosen": -52.90994644165039, "logps/rejected": -98.7842788696289, "loss": 0.1284, "rewards/accuracies": 0.9375, "rewards/chosen": -0.056086815893650055, "rewards/margins": 4.271111965179443, "rewards/rejected": -4.327198505401611, "step": 483 }, { "epoch": 0.74, "learning_rate": 7.973165881521433e-07, "logits/chosen": -1.0429362058639526, "logits/rejected": -1.0378212928771973, "logps/chosen": -73.509521484375, "logps/rejected": -155.39572143554688, "loss": 0.1305, "rewards/accuracies": 1.0, "rewards/chosen": -0.593109667301178, "rewards/margins": 7.16144323348999, "rewards/rejected": -7.754552841186523, "step": 484 }, { "epoch": 0.74, "learning_rate": 7.962488909862033e-07, "logits/chosen": -1.0125764608383179, "logits/rejected": -0.977193295955658, "logps/chosen": -48.4239616394043, "logps/rejected": -117.32792663574219, "loss": 0.1391, "rewards/accuracies": 1.0, "rewards/chosen": 0.21351009607315063, "rewards/margins": 6.008133411407471, "rewards/rejected": -5.794623374938965, "step": 485 }, { "epoch": 0.74, "learning_rate": 7.951791081155668e-07, "logits/chosen": -1.4310243129730225, "logits/rejected": -1.364517331123352, "logps/chosen": -66.78572082519531, "logps/rejected": -142.0985107421875, "loss": 0.1661, "rewards/accuracies": 0.9375, "rewards/chosen": -0.2934994101524353, "rewards/margins": 5.637195110321045, "rewards/rejected": -5.930694103240967, "step": 486 }, { "epoch": 0.74, "learning_rate": 7.941072470719116e-07, "logits/chosen": -1.2644519805908203, "logits/rejected": -1.2363717555999756, "logps/chosen": -76.28678894042969, "logps/rejected": -141.6587677001953, "loss": 0.1902, "rewards/accuracies": 0.875, "rewards/chosen": -0.9511383175849915, "rewards/margins": 5.374536991119385, "rewards/rejected": -6.325675010681152, "step": 487 }, { "epoch": 0.74, "learning_rate": 7.930333154015465e-07, "logits/chosen": -1.2524796724319458, "logits/rejected": -1.2368062734603882, "logps/chosen": -55.79368209838867, "logps/rejected": -108.14014434814453, "loss": 0.123, "rewards/accuracies": 0.875, "rewards/chosen": -0.6211267113685608, "rewards/margins": 4.497018337249756, "rewards/rejected": -5.118144989013672, "step": 488 }, { "epoch": 0.74, "learning_rate": 7.919573206653582e-07, "logits/chosen": -1.0712414979934692, "logits/rejected": -1.0710827112197876, "logps/chosen": -52.496517181396484, "logps/rejected": -93.24199676513672, "loss": 0.2383, "rewards/accuracies": 0.875, "rewards/chosen": 0.07469668984413147, "rewards/margins": 3.50506854057312, "rewards/rejected": -3.4303717613220215, "step": 489 }, { "epoch": 0.74, "learning_rate": 7.908792704387583e-07, "logits/chosen": -1.2998534440994263, "logits/rejected": -1.2329963445663452, "logps/chosen": -59.98297882080078, "logps/rejected": -114.86883544921875, "loss": 0.1563, "rewards/accuracies": 0.875, "rewards/chosen": -0.42221516370773315, "rewards/margins": 4.252401351928711, "rewards/rejected": -4.674615859985352, "step": 490 }, { "epoch": 0.75, "learning_rate": 7.8979917231163e-07, "logits/chosen": -1.4007737636566162, "logits/rejected": -1.4391206502914429, "logps/chosen": -85.83271789550781, "logps/rejected": -160.2294921875, "loss": 0.1032, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0596859455108643, "rewards/margins": 5.792131423950195, "rewards/rejected": -6.8518171310424805, "step": 491 }, { "epoch": 0.75, "learning_rate": 7.88717033888274e-07, "logits/chosen": -1.094614863395691, "logits/rejected": -1.0967289209365845, "logps/chosen": -75.23745727539062, "logps/rejected": -125.14196014404297, "loss": 0.1518, "rewards/accuracies": 0.75, "rewards/chosen": -1.2838397026062012, "rewards/margins": 4.288742542266846, "rewards/rejected": -5.572582721710205, "step": 492 }, { "epoch": 0.75, "learning_rate": 7.876328627873561e-07, "logits/chosen": -1.2592332363128662, "logits/rejected": -1.2118233442306519, "logps/chosen": -60.95644760131836, "logps/rejected": -116.6041259765625, "loss": 0.1176, "rewards/accuracies": 1.0, "rewards/chosen": 0.2381046712398529, "rewards/margins": 4.668372631072998, "rewards/rejected": -4.430268287658691, "step": 493 }, { "epoch": 0.75, "learning_rate": 7.865466666418521e-07, "logits/chosen": -1.3860349655151367, "logits/rejected": -1.296807885169983, "logps/chosen": -66.50694274902344, "logps/rejected": -189.37033081054688, "loss": 0.1512, "rewards/accuracies": 1.0, "rewards/chosen": 0.0313073992729187, "rewards/margins": 9.400177955627441, "rewards/rejected": -9.36886978149414, "step": 494 }, { "epoch": 0.75, "learning_rate": 7.854584530989956e-07, "logits/chosen": -1.0123242139816284, "logits/rejected": -1.0580317974090576, "logps/chosen": -53.1533088684082, "logps/rejected": -86.56595611572266, "loss": 0.184, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8372935056686401, "rewards/margins": 3.333448648452759, "rewards/rejected": -4.170742034912109, "step": 495 }, { "epoch": 0.75, "learning_rate": 7.843682298202234e-07, "logits/chosen": -1.0129303932189941, "logits/rejected": -0.9923812747001648, "logps/chosen": -44.199432373046875, "logps/rejected": -97.03451538085938, "loss": 0.1203, "rewards/accuracies": 1.0, "rewards/chosen": 0.19193431735038757, "rewards/margins": 4.424201011657715, "rewards/rejected": -4.232266426086426, "step": 496 }, { "epoch": 0.76, "learning_rate": 7.83276004481121e-07, "logits/chosen": -1.1978256702423096, "logits/rejected": -1.222652554512024, "logps/chosen": -76.8789291381836, "logps/rejected": -130.401123046875, "loss": 0.201, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2580636739730835, "rewards/margins": 4.810042381286621, "rewards/rejected": -6.068106651306152, "step": 497 }, { "epoch": 0.76, "learning_rate": 7.821817847713701e-07, "logits/chosen": -1.1676831245422363, "logits/rejected": -1.07413649559021, "logps/chosen": -70.05974578857422, "logps/rejected": -140.12713623046875, "loss": 0.1508, "rewards/accuracies": 0.875, "rewards/chosen": -0.9246506690979004, "rewards/margins": 5.467977046966553, "rewards/rejected": -6.392627716064453, "step": 498 }, { "epoch": 0.76, "learning_rate": 7.810855783946926e-07, "logits/chosen": -1.2983942031860352, "logits/rejected": -1.2959508895874023, "logps/chosen": -61.566444396972656, "logps/rejected": -148.5758514404297, "loss": 0.1084, "rewards/accuracies": 1.0, "rewards/chosen": -0.5822048187255859, "rewards/margins": 7.1837873458862305, "rewards/rejected": -7.765992641448975, "step": 499 }, { "epoch": 0.76, "learning_rate": 7.799873930687977e-07, "logits/chosen": -1.300523042678833, "logits/rejected": -1.2762212753295898, "logps/chosen": -63.04548263549805, "logps/rejected": -120.72624206542969, "loss": 0.0995, "rewards/accuracies": 1.0, "rewards/chosen": -0.772101879119873, "rewards/margins": 4.946941375732422, "rewards/rejected": -5.719043254852295, "step": 500 }, { "epoch": 0.76, "learning_rate": 7.788872365253271e-07, "logits/chosen": -1.1984803676605225, "logits/rejected": -1.142225742340088, "logps/chosen": -57.733360290527344, "logps/rejected": -120.88352966308594, "loss": 0.1906, "rewards/accuracies": 0.8125, "rewards/chosen": -0.4514312148094177, "rewards/margins": 4.9673752784729, "rewards/rejected": -5.418806076049805, "step": 501 }, { "epoch": 0.76, "learning_rate": 7.777851165098011e-07, "logits/chosen": -1.0005409717559814, "logits/rejected": -1.0943784713745117, "logps/chosen": -84.2463607788086, "logps/rejected": -187.10287475585938, "loss": 0.1315, "rewards/accuracies": 1.0, "rewards/chosen": -0.6938743591308594, "rewards/margins": 8.654955863952637, "rewards/rejected": -9.348830223083496, "step": 502 }, { "epoch": 0.76, "learning_rate": 7.766810407815628e-07, "logits/chosen": -1.0287072658538818, "logits/rejected": -1.0100324153900146, "logps/chosen": -42.83229446411133, "logps/rejected": -114.5429458618164, "loss": 0.1766, "rewards/accuracies": 0.875, "rewards/chosen": 0.15994618833065033, "rewards/margins": 5.648642539978027, "rewards/rejected": -5.488696575164795, "step": 503 }, { "epoch": 0.77, "learning_rate": 7.755750171137244e-07, "logits/chosen": -1.1835378408432007, "logits/rejected": -1.1462066173553467, "logps/chosen": -88.63304901123047, "logps/rejected": -169.634033203125, "loss": 0.1234, "rewards/accuracies": 1.0, "rewards/chosen": -1.4358795881271362, "rewards/margins": 6.794912338256836, "rewards/rejected": -8.230792045593262, "step": 504 }, { "epoch": 0.77, "learning_rate": 7.74467053293113e-07, "logits/chosen": -0.9608476161956787, "logits/rejected": -0.9537318348884583, "logps/chosen": -71.47065734863281, "logps/rejected": -114.87274169921875, "loss": 0.1906, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2017298936843872, "rewards/margins": 4.127964973449707, "rewards/rejected": -5.329694747924805, "step": 505 }, { "epoch": 0.77, "learning_rate": 7.733571571202144e-07, "logits/chosen": -1.047137975692749, "logits/rejected": -0.9563291072845459, "logps/chosen": -78.42756652832031, "logps/rejected": -176.69107055664062, "loss": 0.149, "rewards/accuracies": 1.0, "rewards/chosen": -1.3879927396774292, "rewards/margins": 7.720418930053711, "rewards/rejected": -9.10841178894043, "step": 506 }, { "epoch": 0.77, "learning_rate": 7.722453364091193e-07, "logits/chosen": -1.3432402610778809, "logits/rejected": -1.3647172451019287, "logps/chosen": -54.534358978271484, "logps/rejected": -105.29655456542969, "loss": 0.1106, "rewards/accuracies": 0.9375, "rewards/chosen": -0.03609006106853485, "rewards/margins": 4.315974235534668, "rewards/rejected": -4.35206413269043, "step": 507 }, { "epoch": 0.77, "learning_rate": 7.711315989874676e-07, "logits/chosen": -1.2090696096420288, "logits/rejected": -1.1107300519943237, "logps/chosen": -72.83460998535156, "logps/rejected": -151.87924194335938, "loss": 0.1057, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6429700255393982, "rewards/margins": 5.898809432983398, "rewards/rejected": -6.5417799949646, "step": 508 }, { "epoch": 0.77, "learning_rate": 7.700159526963936e-07, "logits/chosen": -1.324645757675171, "logits/rejected": -1.3539719581604004, "logps/chosen": -85.37342834472656, "logps/rejected": -151.45509338378906, "loss": 0.1649, "rewards/accuracies": 1.0, "rewards/chosen": -1.5722832679748535, "rewards/margins": 5.511928081512451, "rewards/rejected": -7.0842108726501465, "step": 509 }, { "epoch": 0.77, "learning_rate": 7.688984053904713e-07, "logits/chosen": -1.3109506368637085, "logits/rejected": -1.2842274904251099, "logps/chosen": -60.5789680480957, "logps/rejected": -147.7485809326172, "loss": 0.1137, "rewards/accuracies": 1.0, "rewards/chosen": -0.20602567493915558, "rewards/margins": 7.15122652053833, "rewards/rejected": -7.3572516441345215, "step": 510 }, { "epoch": 0.78, "learning_rate": 7.677789649376575e-07, "logits/chosen": -1.1558939218521118, "logits/rejected": -1.1040030717849731, "logps/chosen": -62.67195129394531, "logps/rejected": -129.86436462402344, "loss": 0.1495, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3074299693107605, "rewards/margins": 5.887986183166504, "rewards/rejected": -6.195416450500488, "step": 511 }, { "epoch": 0.78, "learning_rate": 7.666576392192388e-07, "logits/chosen": -1.19840669631958, "logits/rejected": -1.178692102432251, "logps/chosen": -58.70737838745117, "logps/rejected": -129.68954467773438, "loss": 0.1251, "rewards/accuracies": 0.875, "rewards/chosen": -0.15469014644622803, "rewards/margins": 6.070980072021484, "rewards/rejected": -6.225669860839844, "step": 512 }, { "epoch": 0.78, "learning_rate": 7.655344361297735e-07, "logits/chosen": -1.0739836692810059, "logits/rejected": -1.0780495405197144, "logps/chosen": -64.47042846679688, "logps/rejected": -133.5172576904297, "loss": 0.1737, "rewards/accuracies": 0.9375, "rewards/chosen": -0.27823784947395325, "rewards/margins": 6.497269630432129, "rewards/rejected": -6.775506973266602, "step": 513 }, { "epoch": 0.78, "learning_rate": 7.644093635770384e-07, "logits/chosen": -1.2686853408813477, "logits/rejected": -1.2785826921463013, "logps/chosen": -60.34698486328125, "logps/rejected": -119.00567626953125, "loss": 0.0953, "rewards/accuracies": 0.9375, "rewards/chosen": 0.033090412616729736, "rewards/margins": 4.969561576843262, "rewards/rejected": -4.936470985412598, "step": 514 }, { "epoch": 0.78, "learning_rate": 7.632824294819711e-07, "logits/chosen": -1.1431541442871094, "logits/rejected": -1.0436018705368042, "logps/chosen": -60.22837448120117, "logps/rejected": -147.8499755859375, "loss": 0.1022, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5924397706985474, "rewards/margins": 6.656856060028076, "rewards/rejected": -7.249295711517334, "step": 515 }, { "epoch": 0.78, "learning_rate": 7.621536417786158e-07, "logits/chosen": -1.0664671659469604, "logits/rejected": -1.052040696144104, "logps/chosen": -43.781002044677734, "logps/rejected": -87.1745834350586, "loss": 0.1714, "rewards/accuracies": 1.0, "rewards/chosen": 0.9652446508407593, "rewards/margins": 4.096286773681641, "rewards/rejected": -3.131042242050171, "step": 516 }, { "epoch": 0.79, "learning_rate": 7.610230084140667e-07, "logits/chosen": -1.2166173458099365, "logits/rejected": -1.1928117275238037, "logps/chosen": -68.93216705322266, "logps/rejected": -122.04391479492188, "loss": 0.1437, "rewards/accuracies": 0.875, "rewards/chosen": -1.0721417665481567, "rewards/margins": 4.350822925567627, "rewards/rejected": -5.422965049743652, "step": 517 }, { "epoch": 0.79, "learning_rate": 7.598905373484119e-07, "logits/chosen": -1.1036087274551392, "logits/rejected": -1.0387972593307495, "logps/chosen": -67.01268768310547, "logps/rejected": -168.45962524414062, "loss": 0.1378, "rewards/accuracies": 0.9375, "rewards/chosen": -0.06719581037759781, "rewards/margins": 7.95098876953125, "rewards/rejected": -8.018184661865234, "step": 518 }, { "epoch": 0.79, "learning_rate": 7.587562365546776e-07, "logits/chosen": -1.326158046722412, "logits/rejected": -1.3189661502838135, "logps/chosen": -66.62142944335938, "logps/rejected": -146.9497528076172, "loss": 0.2035, "rewards/accuracies": 0.875, "rewards/chosen": -0.548250675201416, "rewards/margins": 6.232966899871826, "rewards/rejected": -6.781217575073242, "step": 519 }, { "epoch": 0.79, "learning_rate": 7.576201140187725e-07, "logits/chosen": -0.9910123944282532, "logits/rejected": -1.0392197370529175, "logps/chosen": -69.34978485107422, "logps/rejected": -113.86421966552734, "loss": 0.1181, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5841732025146484, "rewards/margins": 4.638482093811035, "rewards/rejected": -5.222655296325684, "step": 520 }, { "epoch": 0.79, "learning_rate": 7.564821777394306e-07, "logits/chosen": -1.0627343654632568, "logits/rejected": -1.0298466682434082, "logps/chosen": -64.04617309570312, "logps/rejected": -119.627685546875, "loss": 0.1754, "rewards/accuracies": 0.875, "rewards/chosen": -0.22364619374275208, "rewards/margins": 4.598440170288086, "rewards/rejected": -4.822085857391357, "step": 521 }, { "epoch": 0.79, "learning_rate": 7.553424357281555e-07, "logits/chosen": -1.1028233766555786, "logits/rejected": -1.0683115720748901, "logps/chosen": -65.89503479003906, "logps/rejected": -132.91787719726562, "loss": 0.1563, "rewards/accuracies": 0.9375, "rewards/chosen": -0.12011255323886871, "rewards/margins": 6.085408687591553, "rewards/rejected": -6.205521583557129, "step": 522 }, { "epoch": 0.79, "learning_rate": 7.542008960091635e-07, "logits/chosen": -1.1913988590240479, "logits/rejected": -1.1794086694717407, "logps/chosen": -68.01545715332031, "logps/rejected": -131.48208618164062, "loss": 0.0884, "rewards/accuracies": 1.0, "rewards/chosen": -0.9747741222381592, "rewards/margins": 5.525202751159668, "rewards/rejected": -6.499977111816406, "step": 523 }, { "epoch": 0.8, "learning_rate": 7.530575666193282e-07, "logits/chosen": -1.0506024360656738, "logits/rejected": -1.0830491781234741, "logps/chosen": -70.56978607177734, "logps/rejected": -100.9091796875, "loss": 0.1489, "rewards/accuracies": 0.875, "rewards/chosen": -1.2066999673843384, "rewards/margins": 2.789134979248047, "rewards/rejected": -3.9958345890045166, "step": 524 }, { "epoch": 0.8, "learning_rate": 7.519124556081222e-07, "logits/chosen": -1.0187995433807373, "logits/rejected": -1.0285779237747192, "logps/chosen": -65.80451202392578, "logps/rejected": -122.89828491210938, "loss": 0.2987, "rewards/accuracies": 0.75, "rewards/chosen": -1.276435375213623, "rewards/margins": 4.4974870681762695, "rewards/rejected": -5.773921966552734, "step": 525 }, { "epoch": 0.8, "learning_rate": 7.507655710375621e-07, "logits/chosen": -1.194640874862671, "logits/rejected": -1.1428656578063965, "logps/chosen": -94.79371643066406, "logps/rejected": -182.0157470703125, "loss": 0.2202, "rewards/accuracies": 1.0, "rewards/chosen": -1.2120563983917236, "rewards/margins": 7.609585762023926, "rewards/rejected": -8.82164192199707, "step": 526 }, { "epoch": 0.8, "learning_rate": 7.49616920982151e-07, "logits/chosen": -1.3102431297302246, "logits/rejected": -1.3399966955184937, "logps/chosen": -63.27342224121094, "logps/rejected": -119.10459899902344, "loss": 0.2103, "rewards/accuracies": 0.9375, "rewards/chosen": -0.139730766415596, "rewards/margins": 5.59067964553833, "rewards/rejected": -5.730410575866699, "step": 527 }, { "epoch": 0.8, "learning_rate": 7.484665135288213e-07, "logits/chosen": -1.1653929948806763, "logits/rejected": -1.1708984375, "logps/chosen": -59.638404846191406, "logps/rejected": -116.32239532470703, "loss": 0.161, "rewards/accuracies": 0.8125, "rewards/chosen": -0.47278904914855957, "rewards/margins": 4.590404987335205, "rewards/rejected": -5.063194274902344, "step": 528 }, { "epoch": 0.8, "learning_rate": 7.473143567768785e-07, "logits/chosen": -1.0732041597366333, "logits/rejected": -1.0529849529266357, "logps/chosen": -59.536346435546875, "logps/rejected": -114.80162048339844, "loss": 0.2172, "rewards/accuracies": 0.9375, "rewards/chosen": -0.2741459906101227, "rewards/margins": 4.777864933013916, "rewards/rejected": -5.052010536193848, "step": 529 }, { "epoch": 0.81, "learning_rate": 7.461604588379435e-07, "logits/chosen": -0.9924188852310181, "logits/rejected": -1.008417010307312, "logps/chosen": -64.02005004882812, "logps/rejected": -123.69132995605469, "loss": 0.0818, "rewards/accuracies": 0.875, "rewards/chosen": -1.6893237829208374, "rewards/margins": 4.819268703460693, "rewards/rejected": -6.50859260559082, "step": 530 }, { "epoch": 0.81, "learning_rate": 7.450048278358961e-07, "logits/chosen": -1.071373462677002, "logits/rejected": -1.0413519144058228, "logps/chosen": -56.579383850097656, "logps/rejected": -115.09782409667969, "loss": 0.198, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5916543006896973, "rewards/margins": 5.022871017456055, "rewards/rejected": -5.61452579498291, "step": 531 }, { "epoch": 0.81, "learning_rate": 7.438474719068173e-07, "logits/chosen": -1.0422636270523071, "logits/rejected": -0.933167576789856, "logps/chosen": -87.04025268554688, "logps/rejected": -203.26632690429688, "loss": 0.1469, "rewards/accuracies": 0.875, "rewards/chosen": -1.167875051498413, "rewards/margins": 8.060665130615234, "rewards/rejected": -9.22853946685791, "step": 532 }, { "epoch": 0.81, "learning_rate": 7.426883991989324e-07, "logits/chosen": -1.1454071998596191, "logits/rejected": -1.075371265411377, "logps/chosen": -72.74165344238281, "logps/rejected": -129.0819854736328, "loss": 0.0906, "rewards/accuracies": 0.875, "rewards/chosen": -0.2687061131000519, "rewards/margins": 5.253970623016357, "rewards/rejected": -5.522676467895508, "step": 533 }, { "epoch": 0.81, "learning_rate": 7.415276178725537e-07, "logits/chosen": -1.0539616346359253, "logits/rejected": -1.0729026794433594, "logps/chosen": -46.86689376831055, "logps/rejected": -89.4149398803711, "loss": 0.1801, "rewards/accuracies": 0.9375, "rewards/chosen": -0.18019194900989532, "rewards/margins": 3.8524868488311768, "rewards/rejected": -4.032678604125977, "step": 534 }, { "epoch": 0.81, "learning_rate": 7.403651361000223e-07, "logits/chosen": -1.3698267936706543, "logits/rejected": -1.2763550281524658, "logps/chosen": -56.74170684814453, "logps/rejected": -132.7080535888672, "loss": 0.1605, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5413241386413574, "rewards/margins": 5.412938117980957, "rewards/rejected": -5.954262733459473, "step": 535 }, { "epoch": 0.81, "learning_rate": 7.392009620656511e-07, "logits/chosen": -1.1172040700912476, "logits/rejected": -1.0188947916030884, "logps/chosen": -50.54315948486328, "logps/rejected": -130.28665161132812, "loss": 0.0991, "rewards/accuracies": 0.875, "rewards/chosen": 0.25699636340141296, "rewards/margins": 6.52001953125, "rewards/rejected": -6.2630228996276855, "step": 536 }, { "epoch": 0.82, "learning_rate": 7.38035103965668e-07, "logits/chosen": -1.1840345859527588, "logits/rejected": -1.196291446685791, "logps/chosen": -39.160369873046875, "logps/rejected": -95.95783996582031, "loss": 0.1399, "rewards/accuracies": 1.0, "rewards/chosen": 0.5449946522712708, "rewards/margins": 5.224076271057129, "rewards/rejected": -4.679081916809082, "step": 537 }, { "epoch": 0.82, "learning_rate": 7.368675700081564e-07, "logits/chosen": -1.2831922769546509, "logits/rejected": -1.2046195268630981, "logps/chosen": -77.98712921142578, "logps/rejected": -171.9889678955078, "loss": 0.0773, "rewards/accuracies": 1.0, "rewards/chosen": -0.9342814683914185, "rewards/margins": 7.7829389572143555, "rewards/rejected": -8.717220306396484, "step": 538 }, { "epoch": 0.82, "learning_rate": 7.356983684129989e-07, "logits/chosen": -1.4122549295425415, "logits/rejected": -1.3278307914733887, "logps/chosen": -60.05818176269531, "logps/rejected": -172.47239685058594, "loss": 0.0933, "rewards/accuracies": 1.0, "rewards/chosen": -0.32834285497665405, "rewards/margins": 8.709205627441406, "rewards/rejected": -9.037548065185547, "step": 539 }, { "epoch": 0.82, "learning_rate": 7.345275074118185e-07, "logits/chosen": -0.9864742755889893, "logits/rejected": -1.0224814414978027, "logps/chosen": -55.72515106201172, "logps/rejected": -108.23019409179688, "loss": 0.205, "rewards/accuracies": 0.9375, "rewards/chosen": -0.12911073863506317, "rewards/margins": 4.800266265869141, "rewards/rejected": -4.92937707901001, "step": 540 }, { "epoch": 0.82, "learning_rate": 7.333549952479214e-07, "logits/chosen": -1.0197805166244507, "logits/rejected": -0.9988608360290527, "logps/chosen": -77.59547424316406, "logps/rejected": -171.5909423828125, "loss": 0.1427, "rewards/accuracies": 1.0, "rewards/chosen": -0.20403766632080078, "rewards/margins": 7.874151229858398, "rewards/rejected": -8.078189849853516, "step": 541 }, { "epoch": 0.82, "learning_rate": 7.321808401762389e-07, "logits/chosen": -1.1090089082717896, "logits/rejected": -1.0911915302276611, "logps/chosen": -67.35092163085938, "logps/rejected": -138.5771942138672, "loss": 0.1552, "rewards/accuracies": 1.0, "rewards/chosen": -0.2441098392009735, "rewards/margins": 6.130369186401367, "rewards/rejected": -6.374478816986084, "step": 542 }, { "epoch": 0.82, "learning_rate": 7.310050504632679e-07, "logits/chosen": -1.143861174583435, "logits/rejected": -1.1029807329177856, "logps/chosen": -77.17582702636719, "logps/rejected": -171.35406494140625, "loss": 0.0631, "rewards/accuracies": 1.0, "rewards/chosen": -0.21549063920974731, "rewards/margins": 8.488594055175781, "rewards/rejected": -8.704084396362305, "step": 543 }, { "epoch": 0.83, "learning_rate": 7.298276343870151e-07, "logits/chosen": -1.391092300415039, "logits/rejected": -1.4528110027313232, "logps/chosen": -78.30352020263672, "logps/rejected": -143.43536376953125, "loss": 0.141, "rewards/accuracies": 0.9375, "rewards/chosen": 0.030500110238790512, "rewards/margins": 6.488924980163574, "rewards/rejected": -6.4584245681762695, "step": 544 }, { "epoch": 0.83, "learning_rate": 7.286486002369365e-07, "logits/chosen": -1.1510714292526245, "logits/rejected": -1.0716849565505981, "logps/chosen": -66.09734344482422, "logps/rejected": -164.27682495117188, "loss": 0.0946, "rewards/accuracies": 1.0, "rewards/chosen": -0.6337064504623413, "rewards/margins": 8.143341064453125, "rewards/rejected": -8.777047157287598, "step": 545 }, { "epoch": 0.83, "learning_rate": 7.274679563138804e-07, "logits/chosen": -1.1280611753463745, "logits/rejected": -1.1545814275741577, "logps/chosen": -53.178977966308594, "logps/rejected": -125.91136932373047, "loss": 0.1733, "rewards/accuracies": 0.875, "rewards/chosen": -0.28666895627975464, "rewards/margins": 5.748191833496094, "rewards/rejected": -6.034860134124756, "step": 546 }, { "epoch": 0.83, "learning_rate": 7.262857109300282e-07, "logits/chosen": -1.1145057678222656, "logits/rejected": -1.0593397617340088, "logps/chosen": -86.71113586425781, "logps/rejected": -195.0640869140625, "loss": 0.1717, "rewards/accuracies": 1.0, "rewards/chosen": -0.8704631924629211, "rewards/margins": 8.772705078125, "rewards/rejected": -9.643167495727539, "step": 547 }, { "epoch": 0.83, "learning_rate": 7.251018724088366e-07, "logits/chosen": -1.4092875719070435, "logits/rejected": -1.3813461065292358, "logps/chosen": -50.07902526855469, "logps/rejected": -97.85746765136719, "loss": 0.0668, "rewards/accuracies": 0.9375, "rewards/chosen": -0.31925973296165466, "rewards/margins": 4.2447614669799805, "rewards/rejected": -4.564021110534668, "step": 548 }, { "epoch": 0.83, "learning_rate": 7.239164490849783e-07, "logits/chosen": -1.1854861974716187, "logits/rejected": -1.1800771951675415, "logps/chosen": -80.69862365722656, "logps/rejected": -177.3069305419922, "loss": 0.1178, "rewards/accuracies": 1.0, "rewards/chosen": -1.1843868494033813, "rewards/margins": 7.85844087600708, "rewards/rejected": -9.042828559875488, "step": 549 }, { "epoch": 0.84, "learning_rate": 7.227294493042837e-07, "logits/chosen": -1.6006877422332764, "logits/rejected": -1.5721409320831299, "logps/chosen": -53.85042190551758, "logps/rejected": -107.69651794433594, "loss": 0.1282, "rewards/accuracies": 0.8125, "rewards/chosen": 0.15171164274215698, "rewards/margins": 4.337007522583008, "rewards/rejected": -4.185296535491943, "step": 550 }, { "epoch": 0.84, "learning_rate": 7.215408814236818e-07, "logits/chosen": -1.2671542167663574, "logits/rejected": -1.2413151264190674, "logps/chosen": -75.32283020019531, "logps/rejected": -147.53758239746094, "loss": 0.0976, "rewards/accuracies": 1.0, "rewards/chosen": -1.085512638092041, "rewards/margins": 6.20933723449707, "rewards/rejected": -7.294849395751953, "step": 551 }, { "epoch": 0.84, "learning_rate": 7.203507538111421e-07, "logits/chosen": -1.025465488433838, "logits/rejected": -1.0870991945266724, "logps/chosen": -59.177955627441406, "logps/rejected": -120.17818450927734, "loss": 0.1112, "rewards/accuracies": 1.0, "rewards/chosen": 0.023819267749786377, "rewards/margins": 5.563221454620361, "rewards/rejected": -5.539402008056641, "step": 552 }, { "epoch": 0.84, "learning_rate": 7.19159074845615e-07, "logits/chosen": -1.0638189315795898, "logits/rejected": -1.1864960193634033, "logps/chosen": -90.62945556640625, "logps/rejected": -163.56187438964844, "loss": 0.1703, "rewards/accuracies": 0.9375, "rewards/chosen": -1.929221510887146, "rewards/margins": 6.858667373657227, "rewards/rejected": -8.787888526916504, "step": 553 }, { "epoch": 0.84, "learning_rate": 7.179658529169727e-07, "logits/chosen": -1.1522774696350098, "logits/rejected": -1.1589596271514893, "logps/chosen": -48.853660583496094, "logps/rejected": -120.89286804199219, "loss": 0.1392, "rewards/accuracies": 1.0, "rewards/chosen": 0.2509755492210388, "rewards/margins": 6.310558795928955, "rewards/rejected": -6.05958366394043, "step": 554 }, { "epoch": 0.84, "learning_rate": 7.16771096425951e-07, "logits/chosen": -1.0318248271942139, "logits/rejected": -1.0054022073745728, "logps/chosen": -67.14990997314453, "logps/rejected": -145.93971252441406, "loss": 0.1306, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8901978731155396, "rewards/margins": 6.804965972900391, "rewards/rejected": -7.695164203643799, "step": 555 }, { "epoch": 0.84, "learning_rate": 7.155748137840892e-07, "logits/chosen": -0.9326344728469849, "logits/rejected": -0.9701443314552307, "logps/chosen": -74.25255584716797, "logps/rejected": -122.07107543945312, "loss": 0.1298, "rewards/accuracies": 1.0, "rewards/chosen": -0.6644315719604492, "rewards/margins": 4.801297187805176, "rewards/rejected": -5.465729236602783, "step": 556 }, { "epoch": 0.85, "learning_rate": 7.143770134136713e-07, "logits/chosen": -1.305212140083313, "logits/rejected": -1.285457968711853, "logps/chosen": -80.28656768798828, "logps/rejected": -155.32589721679688, "loss": 0.14, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7364399433135986, "rewards/margins": 5.878342628479004, "rewards/rejected": -7.614782333374023, "step": 557 }, { "epoch": 0.85, "learning_rate": 7.131777037476668e-07, "logits/chosen": -1.2526302337646484, "logits/rejected": -1.2162461280822754, "logps/chosen": -74.2842788696289, "logps/rejected": -152.82522583007812, "loss": 0.0943, "rewards/accuracies": 0.875, "rewards/chosen": -0.8585233688354492, "rewards/margins": 6.765040874481201, "rewards/rejected": -7.62356424331665, "step": 558 }, { "epoch": 0.85, "learning_rate": 7.119768932296715e-07, "logits/chosen": -1.1978999376296997, "logits/rejected": -1.1723815202713013, "logps/chosen": -50.21682357788086, "logps/rejected": -119.23377990722656, "loss": 0.1388, "rewards/accuracies": 1.0, "rewards/chosen": 0.23364275693893433, "rewards/margins": 5.576208591461182, "rewards/rejected": -5.342565059661865, "step": 559 }, { "epoch": 0.85, "learning_rate": 7.107745903138471e-07, "logits/chosen": -1.3260363340377808, "logits/rejected": -1.3038979768753052, "logps/chosen": -54.60324478149414, "logps/rejected": -108.67857360839844, "loss": 0.131, "rewards/accuracies": 0.875, "rewards/chosen": -0.049276888370513916, "rewards/margins": 5.071126461029053, "rewards/rejected": -5.120403289794922, "step": 560 }, { "epoch": 0.85, "learning_rate": 7.095708034648629e-07, "logits/chosen": -1.2655694484710693, "logits/rejected": -1.4480382204055786, "logps/chosen": -89.42362976074219, "logps/rejected": -167.7763671875, "loss": 0.0957, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0199987888336182, "rewards/margins": 7.234724521636963, "rewards/rejected": -8.25472354888916, "step": 561 }, { "epoch": 0.85, "learning_rate": 7.083655411578355e-07, "logits/chosen": -1.3500678539276123, "logits/rejected": -1.3585700988769531, "logps/chosen": -48.68550109863281, "logps/rejected": -109.5958480834961, "loss": 0.1312, "rewards/accuracies": 0.875, "rewards/chosen": 0.08222457766532898, "rewards/margins": 5.2090911865234375, "rewards/rejected": -5.126866817474365, "step": 562 }, { "epoch": 0.86, "learning_rate": 7.071588118782692e-07, "logits/chosen": -1.164905309677124, "logits/rejected": -1.1342130899429321, "logps/chosen": -64.54033660888672, "logps/rejected": -140.8212127685547, "loss": 0.1478, "rewards/accuracies": 0.9375, "rewards/chosen": -0.039550647139549255, "rewards/margins": 6.7797017097473145, "rewards/rejected": -6.819252014160156, "step": 563 }, { "epoch": 0.86, "learning_rate": 7.059506241219964e-07, "logits/chosen": -1.1916640996932983, "logits/rejected": -1.2581453323364258, "logps/chosen": -68.16947937011719, "logps/rejected": -125.39520263671875, "loss": 0.164, "rewards/accuracies": 1.0, "rewards/chosen": -0.5781932473182678, "rewards/margins": 5.795152187347412, "rewards/rejected": -6.373345375061035, "step": 564 }, { "epoch": 0.86, "learning_rate": 7.047409863951176e-07, "logits/chosen": -1.318346381187439, "logits/rejected": -1.313532829284668, "logps/chosen": -65.78179931640625, "logps/rejected": -132.06607055664062, "loss": 0.1061, "rewards/accuracies": 1.0, "rewards/chosen": -0.21447968482971191, "rewards/margins": 6.131742477416992, "rewards/rejected": -6.346221923828125, "step": 565 }, { "epoch": 0.86, "learning_rate": 7.035299072139419e-07, "logits/chosen": -1.2092915773391724, "logits/rejected": -1.2645493745803833, "logps/chosen": -60.975196838378906, "logps/rejected": -91.39009094238281, "loss": 0.1434, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0362534523010254, "rewards/margins": 3.428194999694824, "rewards/rejected": -4.46444845199585, "step": 566 }, { "epoch": 0.86, "learning_rate": 7.023173951049267e-07, "logits/chosen": -1.1492639780044556, "logits/rejected": -1.1428110599517822, "logps/chosen": -89.39848327636719, "logps/rejected": -193.7574462890625, "loss": 0.1345, "rewards/accuracies": 1.0, "rewards/chosen": -1.329371690750122, "rewards/margins": 8.847994804382324, "rewards/rejected": -10.177366256713867, "step": 567 }, { "epoch": 0.86, "learning_rate": 7.011034586046176e-07, "logits/chosen": -1.1973826885223389, "logits/rejected": -1.2077950239181519, "logps/chosen": -59.40647888183594, "logps/rejected": -117.23844909667969, "loss": 0.1824, "rewards/accuracies": 0.9375, "rewards/chosen": -0.42822229862213135, "rewards/margins": 5.149479866027832, "rewards/rejected": -5.577702045440674, "step": 568 }, { "epoch": 0.86, "learning_rate": 6.998881062595886e-07, "logits/chosen": -1.154420018196106, "logits/rejected": -1.1807911396026611, "logps/chosen": -57.72307586669922, "logps/rejected": -118.18482208251953, "loss": 0.117, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6299920082092285, "rewards/margins": 5.129215240478516, "rewards/rejected": -5.759207248687744, "step": 569 }, { "epoch": 0.87, "learning_rate": 6.986713466263817e-07, "logits/chosen": -1.1156989336013794, "logits/rejected": -1.0888198614120483, "logps/chosen": -66.96090698242188, "logps/rejected": -145.86105346679688, "loss": 0.1571, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1332242488861084, "rewards/margins": 6.20496940612793, "rewards/rejected": -7.338193416595459, "step": 570 }, { "epoch": 0.87, "learning_rate": 6.974531882714471e-07, "logits/chosen": -1.1809208393096924, "logits/rejected": -1.129634141921997, "logps/chosen": -64.9968032836914, "logps/rejected": -133.65103149414062, "loss": 0.1651, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7019888758659363, "rewards/margins": 5.911318302154541, "rewards/rejected": -6.613307476043701, "step": 571 }, { "epoch": 0.87, "learning_rate": 6.962336397710819e-07, "logits/chosen": -1.1546497344970703, "logits/rejected": -1.1487202644348145, "logps/chosen": -78.28816223144531, "logps/rejected": -186.17295837402344, "loss": 0.1536, "rewards/accuracies": 1.0, "rewards/chosen": -1.483786940574646, "rewards/margins": 8.131475448608398, "rewards/rejected": -9.615262031555176, "step": 572 }, { "epoch": 0.87, "learning_rate": 6.950127097113707e-07, "logits/chosen": -0.9886271357536316, "logits/rejected": -1.040745735168457, "logps/chosen": -66.25126647949219, "logps/rejected": -147.1013946533203, "loss": 0.1091, "rewards/accuracies": 0.9375, "rewards/chosen": -0.27855655550956726, "rewards/margins": 7.534342288970947, "rewards/rejected": -7.812899112701416, "step": 573 }, { "epoch": 0.87, "learning_rate": 6.93790406688125e-07, "logits/chosen": -1.3215430974960327, "logits/rejected": -1.2974023818969727, "logps/chosen": -53.79179000854492, "logps/rejected": -101.81126403808594, "loss": 0.1484, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5321861505508423, "rewards/margins": 3.803168296813965, "rewards/rejected": -4.335354328155518, "step": 574 }, { "epoch": 0.87, "learning_rate": 6.92566739306822e-07, "logits/chosen": -1.151102900505066, "logits/rejected": -1.27048921585083, "logps/chosen": -72.40945434570312, "logps/rejected": -127.10850524902344, "loss": 0.1173, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7070590257644653, "rewards/margins": 4.9336981773376465, "rewards/rejected": -6.640757083892822, "step": 575 }, { "epoch": 0.88, "learning_rate": 6.913417161825449e-07, "logits/chosen": -1.1125006675720215, "logits/rejected": -1.107159972190857, "logps/chosen": -80.56257629394531, "logps/rejected": -132.2965087890625, "loss": 0.1024, "rewards/accuracies": 1.0, "rewards/chosen": -1.789968729019165, "rewards/margins": 5.229074001312256, "rewards/rejected": -7.019042491912842, "step": 576 }, { "epoch": 0.88, "learning_rate": 6.901153459399217e-07, "logits/chosen": -1.2071489095687866, "logits/rejected": -1.1959619522094727, "logps/chosen": -76.89067840576172, "logps/rejected": -159.0953826904297, "loss": 0.1308, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8559060096740723, "rewards/margins": 6.734124660491943, "rewards/rejected": -8.590030670166016, "step": 577 }, { "epoch": 0.88, "learning_rate": 6.888876372130646e-07, "logits/chosen": -1.2549490928649902, "logits/rejected": -1.1717318296432495, "logps/chosen": -84.55844116210938, "logps/rejected": -171.13616943359375, "loss": 0.1948, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0151565074920654, "rewards/margins": 6.452093601226807, "rewards/rejected": -8.467248916625977, "step": 578 }, { "epoch": 0.88, "learning_rate": 6.876585986455095e-07, "logits/chosen": -1.0063295364379883, "logits/rejected": -1.043433427810669, "logps/chosen": -61.25851058959961, "logps/rejected": -123.28639221191406, "loss": 0.1691, "rewards/accuracies": 1.0, "rewards/chosen": -1.2367548942565918, "rewards/margins": 5.889279842376709, "rewards/rejected": -7.126034736633301, "step": 579 }, { "epoch": 0.88, "learning_rate": 6.864282388901543e-07, "logits/chosen": -1.2562803030014038, "logits/rejected": -1.2214447259902954, "logps/chosen": -93.09823608398438, "logps/rejected": -180.29898071289062, "loss": 0.1201, "rewards/accuracies": 0.9375, "rewards/chosen": -2.6010587215423584, "rewards/margins": 6.928165912628174, "rewards/rejected": -9.52922534942627, "step": 580 }, { "epoch": 0.88, "learning_rate": 6.851965666091992e-07, "logits/chosen": -1.1490026712417603, "logits/rejected": -1.1077204942703247, "logps/chosen": -71.9451904296875, "logps/rejected": -160.8985595703125, "loss": 0.13, "rewards/accuracies": 1.0, "rewards/chosen": -1.9476263523101807, "rewards/margins": 6.7807416915893555, "rewards/rejected": -8.72836971282959, "step": 581 }, { "epoch": 0.88, "learning_rate": 6.839635904740845e-07, "logits/chosen": -1.1142712831497192, "logits/rejected": -1.0776047706604004, "logps/chosen": -75.88325500488281, "logps/rejected": -142.70265197753906, "loss": 0.0962, "rewards/accuracies": 1.0, "rewards/chosen": -1.5644887685775757, "rewards/margins": 5.492844581604004, "rewards/rejected": -7.057333469390869, "step": 582 }, { "epoch": 0.89, "learning_rate": 6.827293191654308e-07, "logits/chosen": -1.191672921180725, "logits/rejected": -1.1895473003387451, "logps/chosen": -63.94920349121094, "logps/rejected": -106.04363250732422, "loss": 0.1393, "rewards/accuracies": 0.9375, "rewards/chosen": -1.49468994140625, "rewards/margins": 3.7244133949279785, "rewards/rejected": -5.21910285949707, "step": 583 }, { "epoch": 0.89, "learning_rate": 6.814937613729765e-07, "logits/chosen": -1.0876585245132446, "logits/rejected": -1.0104507207870483, "logps/chosen": -69.12646484375, "logps/rejected": -184.89944458007812, "loss": 0.128, "rewards/accuracies": 1.0, "rewards/chosen": -0.6190570592880249, "rewards/margins": 8.769519805908203, "rewards/rejected": -9.388575553894043, "step": 584 }, { "epoch": 0.89, "learning_rate": 6.80256925795518e-07, "logits/chosen": -1.4268258810043335, "logits/rejected": -1.5291452407836914, "logps/chosen": -67.29776763916016, "logps/rejected": -103.68289947509766, "loss": 0.1222, "rewards/accuracies": 1.0, "rewards/chosen": -0.23457276821136475, "rewards/margins": 4.427850723266602, "rewards/rejected": -4.662423610687256, "step": 585 }, { "epoch": 0.89, "learning_rate": 6.790188211408471e-07, "logits/chosen": -1.3198050260543823, "logits/rejected": -1.3658562898635864, "logps/chosen": -61.914459228515625, "logps/rejected": -129.01361083984375, "loss": 0.1319, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0598723888397217, "rewards/margins": 6.038536071777344, "rewards/rejected": -7.0984086990356445, "step": 586 }, { "epoch": 0.89, "learning_rate": 6.777794561256913e-07, "logits/chosen": -1.0327891111373901, "logits/rejected": -1.0176843404769897, "logps/chosen": -104.78642272949219, "logps/rejected": -210.1189422607422, "loss": 0.1325, "rewards/accuracies": 0.875, "rewards/chosen": -2.38815975189209, "rewards/margins": 9.216400146484375, "rewards/rejected": -11.604558944702148, "step": 587 }, { "epoch": 0.89, "learning_rate": 6.765388394756504e-07, "logits/chosen": -1.4385006427764893, "logits/rejected": -1.4017232656478882, "logps/chosen": -69.59347534179688, "logps/rejected": -148.25234985351562, "loss": 0.1039, "rewards/accuracies": 1.0, "rewards/chosen": -0.3803633451461792, "rewards/margins": 6.397822380065918, "rewards/rejected": -6.7781853675842285, "step": 588 }, { "epoch": 0.89, "learning_rate": 6.75296979925137e-07, "logits/chosen": -1.3260376453399658, "logits/rejected": -1.3459455966949463, "logps/chosen": -75.15542602539062, "logps/rejected": -171.506103515625, "loss": 0.1724, "rewards/accuracies": 1.0, "rewards/chosen": -1.2072641849517822, "rewards/margins": 7.993558883666992, "rewards/rejected": -9.200822830200195, "step": 589 }, { "epoch": 0.9, "learning_rate": 6.740538862173139e-07, "logits/chosen": -0.9990830421447754, "logits/rejected": -0.9321877956390381, "logps/chosen": -80.32003021240234, "logps/rejected": -161.963134765625, "loss": 0.0994, "rewards/accuracies": 0.9375, "rewards/chosen": -2.020470142364502, "rewards/margins": 6.135712623596191, "rewards/rejected": -8.156183242797852, "step": 590 }, { "epoch": 0.9, "learning_rate": 6.728095671040329e-07, "logits/chosen": -1.2907447814941406, "logits/rejected": -1.4420808553695679, "logps/chosen": -89.08474731445312, "logps/rejected": -205.51153564453125, "loss": 0.1139, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2016139030456543, "rewards/margins": 10.155261993408203, "rewards/rejected": -11.356876373291016, "step": 591 }, { "epoch": 0.9, "learning_rate": 6.715640313457732e-07, "logits/chosen": -1.2335330247879028, "logits/rejected": -1.212240219116211, "logps/chosen": -75.88554382324219, "logps/rejected": -139.1956787109375, "loss": 0.1957, "rewards/accuracies": 0.9375, "rewards/chosen": -2.228327751159668, "rewards/margins": 5.448575496673584, "rewards/rejected": -7.676904201507568, "step": 592 }, { "epoch": 0.9, "learning_rate": 6.703172877115793e-07, "logits/chosen": -1.1978495121002197, "logits/rejected": -1.222267746925354, "logps/chosen": -78.69413757324219, "logps/rejected": -159.76504516601562, "loss": 0.2183, "rewards/accuracies": 0.9375, "rewards/chosen": -1.261566162109375, "rewards/margins": 7.104775428771973, "rewards/rejected": -8.366341590881348, "step": 593 }, { "epoch": 0.9, "learning_rate": 6.690693449790001e-07, "logits/chosen": -0.9641166925430298, "logits/rejected": -0.947120189666748, "logps/chosen": -66.4343032836914, "logps/rejected": -141.80267333984375, "loss": 0.0958, "rewards/accuracies": 1.0, "rewards/chosen": -0.8348309397697449, "rewards/margins": 6.30582332611084, "rewards/rejected": -7.140653610229492, "step": 594 }, { "epoch": 0.9, "learning_rate": 6.678202119340262e-07, "logits/chosen": -1.1618077754974365, "logits/rejected": -1.1629685163497925, "logps/chosen": -53.093605041503906, "logps/rejected": -144.32376098632812, "loss": 0.129, "rewards/accuracies": 0.9375, "rewards/chosen": 0.05835476517677307, "rewards/margins": 7.923296928405762, "rewards/rejected": -7.8649420738220215, "step": 595 }, { "epoch": 0.91, "learning_rate": 6.665698973710288e-07, "logits/chosen": -1.1058599948883057, "logits/rejected": -1.0854295492172241, "logps/chosen": -49.76495361328125, "logps/rejected": -157.39927673339844, "loss": 0.0839, "rewards/accuracies": 1.0, "rewards/chosen": 0.4173562824726105, "rewards/margins": 9.607122421264648, "rewards/rejected": -9.189765930175781, "step": 596 }, { "epoch": 0.91, "learning_rate": 6.653184100926969e-07, "logits/chosen": -1.0652377605438232, "logits/rejected": -1.038432240486145, "logps/chosen": -65.50021362304688, "logps/rejected": -153.48757934570312, "loss": 0.1159, "rewards/accuracies": 1.0, "rewards/chosen": -0.08607436716556549, "rewards/margins": 6.7374043464660645, "rewards/rejected": -6.823478698730469, "step": 597 }, { "epoch": 0.91, "learning_rate": 6.640657589099767e-07, "logits/chosen": -1.2282792329788208, "logits/rejected": -1.1877415180206299, "logps/chosen": -64.80270385742188, "logps/rejected": -132.14723205566406, "loss": 0.0669, "rewards/accuracies": 1.0, "rewards/chosen": -0.5253083109855652, "rewards/margins": 5.846732139587402, "rewards/rejected": -6.372040271759033, "step": 598 }, { "epoch": 0.91, "learning_rate": 6.628119526420078e-07, "logits/chosen": -1.1812118291854858, "logits/rejected": -1.1249544620513916, "logps/chosen": -69.10399627685547, "logps/rejected": -144.18438720703125, "loss": 0.1138, "rewards/accuracies": 0.875, "rewards/chosen": -0.5147157907485962, "rewards/margins": 6.449079990386963, "rewards/rejected": -6.963795185089111, "step": 599 }, { "epoch": 0.91, "learning_rate": 6.615570001160625e-07, "logits/chosen": -1.1095051765441895, "logits/rejected": -1.180436611175537, "logps/chosen": -53.987178802490234, "logps/rejected": -113.84768676757812, "loss": 0.1409, "rewards/accuracies": 0.8125, "rewards/chosen": -0.4846702218055725, "rewards/margins": 5.244781970977783, "rewards/rejected": -5.729451656341553, "step": 600 }, { "epoch": 0.91, "learning_rate": 6.603009101674835e-07, "logits/chosen": -1.1934107542037964, "logits/rejected": -1.3291200399398804, "logps/chosen": -63.650306701660156, "logps/rejected": -107.93333435058594, "loss": 0.0963, "rewards/accuracies": 1.0, "rewards/chosen": -0.12526728212833405, "rewards/margins": 4.8406572341918945, "rewards/rejected": -4.9659247398376465, "step": 601 }, { "epoch": 0.91, "learning_rate": 6.590436916396207e-07, "logits/chosen": -1.046741008758545, "logits/rejected": -1.0239633321762085, "logps/chosen": -84.33003234863281, "logps/rejected": -186.45541381835938, "loss": 0.1105, "rewards/accuracies": 1.0, "rewards/chosen": -1.0560230016708374, "rewards/margins": 8.320999145507812, "rewards/rejected": -9.377021789550781, "step": 602 }, { "epoch": 0.92, "learning_rate": 6.577853533837703e-07, "logits/chosen": -1.112562656402588, "logits/rejected": -1.0928502082824707, "logps/chosen": -33.96232223510742, "logps/rejected": -91.48005676269531, "loss": 0.108, "rewards/accuracies": 0.875, "rewards/chosen": 0.8726045489311218, "rewards/margins": 5.010224342346191, "rewards/rejected": -4.137619972229004, "step": 603 }, { "epoch": 0.92, "learning_rate": 6.565259042591111e-07, "logits/chosen": -1.0827683210372925, "logits/rejected": -1.0390585660934448, "logps/chosen": -45.63076400756836, "logps/rejected": -153.36795043945312, "loss": 0.0915, "rewards/accuracies": 1.0, "rewards/chosen": 0.4685389995574951, "rewards/margins": 7.915752410888672, "rewards/rejected": -7.447213649749756, "step": 604 }, { "epoch": 0.92, "learning_rate": 6.552653531326436e-07, "logits/chosen": -1.059299349784851, "logits/rejected": -1.070773959159851, "logps/chosen": -40.399654388427734, "logps/rejected": -85.68155670166016, "loss": 0.0989, "rewards/accuracies": 0.9375, "rewards/chosen": 0.15366578102111816, "rewards/margins": 3.8557958602905273, "rewards/rejected": -3.70212984085083, "step": 605 }, { "epoch": 0.92, "learning_rate": 6.540037088791263e-07, "logits/chosen": -1.1014400720596313, "logits/rejected": -1.147748589515686, "logps/chosen": -55.5066032409668, "logps/rejected": -123.78807067871094, "loss": 0.0967, "rewards/accuracies": 1.0, "rewards/chosen": 0.33187153935432434, "rewards/margins": 6.414957523345947, "rewards/rejected": -6.083086967468262, "step": 606 }, { "epoch": 0.92, "learning_rate": 6.527409803810136e-07, "logits/chosen": -1.2319551706314087, "logits/rejected": -1.2486858367919922, "logps/chosen": -89.27584838867188, "logps/rejected": -172.9617919921875, "loss": 0.1595, "rewards/accuracies": 0.875, "rewards/chosen": -1.6647179126739502, "rewards/margins": 6.800714015960693, "rewards/rejected": -8.465432167053223, "step": 607 }, { "epoch": 0.92, "learning_rate": 6.514771765283942e-07, "logits/chosen": -1.3046828508377075, "logits/rejected": -1.3457309007644653, "logps/chosen": -59.46671676635742, "logps/rejected": -164.77854919433594, "loss": 0.1361, "rewards/accuracies": 1.0, "rewards/chosen": 0.5135937929153442, "rewards/margins": 8.90949821472168, "rewards/rejected": -8.395904541015625, "step": 608 }, { "epoch": 0.93, "learning_rate": 6.502123062189268e-07, "logits/chosen": -1.025585412979126, "logits/rejected": -0.9563760161399841, "logps/chosen": -65.9186019897461, "logps/rejected": -135.0490264892578, "loss": 0.1325, "rewards/accuracies": 1.0, "rewards/chosen": -0.41695117950439453, "rewards/margins": 6.163644790649414, "rewards/rejected": -6.580595970153809, "step": 609 }, { "epoch": 0.93, "learning_rate": 6.489463783577786e-07, "logits/chosen": -1.0755833387374878, "logits/rejected": -1.0082464218139648, "logps/chosen": -54.03449630737305, "logps/rejected": -155.23272705078125, "loss": 0.2115, "rewards/accuracies": 0.875, "rewards/chosen": 0.22527417540550232, "rewards/margins": 7.68045711517334, "rewards/rejected": -7.4551825523376465, "step": 610 }, { "epoch": 0.93, "learning_rate": 6.476794018575629e-07, "logits/chosen": -0.8818612694740295, "logits/rejected": -0.8964678645133972, "logps/chosen": -69.8212890625, "logps/rejected": -94.2854995727539, "loss": 0.1577, "rewards/accuracies": 0.875, "rewards/chosen": -1.6802802085876465, "rewards/margins": 3.0808987617492676, "rewards/rejected": -4.761178970336914, "step": 611 }, { "epoch": 0.93, "learning_rate": 6.464113856382751e-07, "logits/chosen": -1.3570644855499268, "logits/rejected": -1.4353338479995728, "logps/chosen": -44.93012619018555, "logps/rejected": -113.91537475585938, "loss": 0.123, "rewards/accuracies": 1.0, "rewards/chosen": 0.6995503902435303, "rewards/margins": 6.318906784057617, "rewards/rejected": -5.619356155395508, "step": 612 }, { "epoch": 0.93, "learning_rate": 6.451423386272311e-07, "logits/chosen": -1.081716537475586, "logits/rejected": -1.080958366394043, "logps/chosen": -66.42369842529297, "logps/rejected": -146.11070251464844, "loss": 0.1011, "rewards/accuracies": 1.0, "rewards/chosen": 0.007286667823791504, "rewards/margins": 7.636693000793457, "rewards/rejected": -7.629406929016113, "step": 613 }, { "epoch": 0.93, "learning_rate": 6.438722697590038e-07, "logits/chosen": -1.254135012626648, "logits/rejected": -1.28207266330719, "logps/chosen": -56.42732238769531, "logps/rejected": -133.7398223876953, "loss": 0.1432, "rewards/accuracies": 0.9375, "rewards/chosen": -0.33388689160346985, "rewards/margins": 6.535743236541748, "rewards/rejected": -6.869630336761475, "step": 614 }, { "epoch": 0.93, "learning_rate": 6.426011879753601e-07, "logits/chosen": -1.1966629028320312, "logits/rejected": -1.195890188217163, "logps/chosen": -70.52161407470703, "logps/rejected": -149.12550354003906, "loss": 0.1196, "rewards/accuracies": 1.0, "rewards/chosen": -0.7128872871398926, "rewards/margins": 6.004786968231201, "rewards/rejected": -6.717674255371094, "step": 615 }, { "epoch": 0.94, "learning_rate": 6.413291022251989e-07, "logits/chosen": -1.1653528213500977, "logits/rejected": -1.158420205116272, "logps/chosen": -74.13883972167969, "logps/rejected": -167.26708984375, "loss": 0.1286, "rewards/accuracies": 1.0, "rewards/chosen": -0.6810168623924255, "rewards/margins": 8.279891967773438, "rewards/rejected": -8.960908889770508, "step": 616 }, { "epoch": 0.94, "learning_rate": 6.400560214644864e-07, "logits/chosen": -0.7961425185203552, "logits/rejected": -0.6948035359382629, "logps/chosen": -31.742719650268555, "logps/rejected": -98.01490783691406, "loss": 0.1302, "rewards/accuracies": 0.9375, "rewards/chosen": 0.42019566893577576, "rewards/margins": 5.272375106811523, "rewards/rejected": -4.852179527282715, "step": 617 }, { "epoch": 0.94, "learning_rate": 6.387819546561953e-07, "logits/chosen": -1.3594671487808228, "logits/rejected": -1.3466832637786865, "logps/chosen": -79.9526596069336, "logps/rejected": -133.43017578125, "loss": 0.1571, "rewards/accuracies": 0.875, "rewards/chosen": -1.4597373008728027, "rewards/margins": 4.369678974151611, "rewards/rejected": -5.829415798187256, "step": 618 }, { "epoch": 0.94, "learning_rate": 6.375069107702392e-07, "logits/chosen": -1.2339191436767578, "logits/rejected": -1.30702543258667, "logps/chosen": -69.71017456054688, "logps/rejected": -168.69784545898438, "loss": 0.079, "rewards/accuracies": 1.0, "rewards/chosen": -0.4460238814353943, "rewards/margins": 8.786514282226562, "rewards/rejected": -9.232538223266602, "step": 619 }, { "epoch": 0.94, "learning_rate": 6.362308987834115e-07, "logits/chosen": -1.2344510555267334, "logits/rejected": -1.2518203258514404, "logps/chosen": -85.0146484375, "logps/rejected": -191.4388885498047, "loss": 0.0993, "rewards/accuracies": 1.0, "rewards/chosen": -0.8330772519111633, "rewards/margins": 9.515462875366211, "rewards/rejected": -10.348539352416992, "step": 620 }, { "epoch": 0.94, "learning_rate": 6.349539276793211e-07, "logits/chosen": -1.0936007499694824, "logits/rejected": -1.0148943662643433, "logps/chosen": -62.80587387084961, "logps/rejected": -144.54388427734375, "loss": 0.1469, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3292310833930969, "rewards/margins": 6.688003063201904, "rewards/rejected": -7.017233848571777, "step": 621 }, { "epoch": 0.94, "learning_rate": 6.336760064483295e-07, "logits/chosen": -1.2999571561813354, "logits/rejected": -1.261387586593628, "logps/chosen": -85.21759033203125, "logps/rejected": -180.78350830078125, "loss": 0.0991, "rewards/accuracies": 1.0, "rewards/chosen": -1.6249150037765503, "rewards/margins": 8.264779090881348, "rewards/rejected": -9.889694213867188, "step": 622 }, { "epoch": 0.95, "learning_rate": 6.323971440874877e-07, "logits/chosen": -1.0580947399139404, "logits/rejected": -1.1377376317977905, "logps/chosen": -76.76827239990234, "logps/rejected": -186.6950225830078, "loss": 0.1408, "rewards/accuracies": 1.0, "rewards/chosen": -0.8835163116455078, "rewards/margins": 9.402647972106934, "rewards/rejected": -10.286163330078125, "step": 623 }, { "epoch": 0.95, "learning_rate": 6.311173496004723e-07, "logits/chosen": -1.0193018913269043, "logits/rejected": -1.020062804222107, "logps/chosen": -67.74279022216797, "logps/rejected": -119.82437133789062, "loss": 0.1549, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1380265951156616, "rewards/margins": 4.763696670532227, "rewards/rejected": -5.901723384857178, "step": 624 }, { "epoch": 0.95, "learning_rate": 6.298366319975221e-07, "logits/chosen": -0.9675927758216858, "logits/rejected": -0.9189658164978027, "logps/chosen": -66.28350067138672, "logps/rejected": -123.21947479248047, "loss": 0.1642, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3978164196014404, "rewards/margins": 4.934099197387695, "rewards/rejected": -6.331915378570557, "step": 625 }, { "epoch": 0.95, "learning_rate": 6.28555000295376e-07, "logits/chosen": -1.0939494371414185, "logits/rejected": -1.1183608770370483, "logps/chosen": -74.27719116210938, "logps/rejected": -154.91586303710938, "loss": 0.1485, "rewards/accuracies": 1.0, "rewards/chosen": -0.6623119711875916, "rewards/margins": 7.408330917358398, "rewards/rejected": -8.07064151763916, "step": 626 }, { "epoch": 0.95, "learning_rate": 6.272724635172074e-07, "logits/chosen": -0.9520808458328247, "logits/rejected": -0.8811392784118652, "logps/chosen": -70.15853118896484, "logps/rejected": -183.83602905273438, "loss": 0.1547, "rewards/accuracies": 0.9375, "rewards/chosen": -0.32549893856048584, "rewards/margins": 9.788957595825195, "rewards/rejected": -10.114457130432129, "step": 627 }, { "epoch": 0.95, "learning_rate": 6.259890306925626e-07, "logits/chosen": -0.9270368814468384, "logits/rejected": -0.8135693073272705, "logps/chosen": -63.346412658691406, "logps/rejected": -158.90957641601562, "loss": 0.1442, "rewards/accuracies": 0.875, "rewards/chosen": -1.1709436178207397, "rewards/margins": 7.03597354888916, "rewards/rejected": -8.206917762756348, "step": 628 }, { "epoch": 0.96, "learning_rate": 6.247047108572959e-07, "logits/chosen": -1.070979118347168, "logits/rejected": -0.8744301199913025, "logps/chosen": -69.09797668457031, "logps/rejected": -215.55731201171875, "loss": 0.0971, "rewards/accuracies": 1.0, "rewards/chosen": -0.5460328459739685, "rewards/margins": 10.441033363342285, "rewards/rejected": -10.987065315246582, "step": 629 }, { "epoch": 0.96, "learning_rate": 6.234195130535068e-07, "logits/chosen": -1.290129542350769, "logits/rejected": -1.2997944355010986, "logps/chosen": -62.63139724731445, "logps/rejected": -130.76480102539062, "loss": 0.0977, "rewards/accuracies": 0.875, "rewards/chosen": -0.8450179100036621, "rewards/margins": 5.818835735321045, "rewards/rejected": -6.663854598999023, "step": 630 }, { "epoch": 0.96, "learning_rate": 6.221334463294759e-07, "logits/chosen": -1.1602667570114136, "logits/rejected": -1.1095179319381714, "logps/chosen": -64.07215881347656, "logps/rejected": -114.46161651611328, "loss": 0.0942, "rewards/accuracies": 1.0, "rewards/chosen": -0.4669833779335022, "rewards/margins": 5.138500690460205, "rewards/rejected": -5.605484485626221, "step": 631 }, { "epoch": 0.96, "learning_rate": 6.208465197396012e-07, "logits/chosen": -1.2838671207427979, "logits/rejected": -1.2515268325805664, "logps/chosen": -93.52930450439453, "logps/rejected": -193.46759033203125, "loss": 0.1292, "rewards/accuracies": 1.0, "rewards/chosen": -1.9162299633026123, "rewards/margins": 7.588293075561523, "rewards/rejected": -9.504523277282715, "step": 632 }, { "epoch": 0.96, "learning_rate": 6.195587423443348e-07, "logits/chosen": -1.1156113147735596, "logits/rejected": -1.0549241304397583, "logps/chosen": -51.68001174926758, "logps/rejected": -130.57142639160156, "loss": 0.0914, "rewards/accuracies": 0.9375, "rewards/chosen": -0.12324819713830948, "rewards/margins": 6.490993976593018, "rewards/rejected": -6.614241600036621, "step": 633 }, { "epoch": 0.96, "learning_rate": 6.182701232101184e-07, "logits/chosen": -0.997816264629364, "logits/rejected": -0.973334550857544, "logps/chosen": -62.83354568481445, "logps/rejected": -147.64317321777344, "loss": 0.1306, "rewards/accuracies": 1.0, "rewards/chosen": -0.7381342053413391, "rewards/margins": 7.281171798706055, "rewards/rejected": -8.019306182861328, "step": 634 }, { "epoch": 0.96, "learning_rate": 6.169806714093203e-07, "logits/chosen": -1.2990257740020752, "logits/rejected": -1.242817997932434, "logps/chosen": -93.56485748291016, "logps/rejected": -203.56536865234375, "loss": 0.1267, "rewards/accuracies": 1.0, "rewards/chosen": -1.3125959634780884, "rewards/margins": 8.971162796020508, "rewards/rejected": -10.283758163452148, "step": 635 }, { "epoch": 0.97, "learning_rate": 6.156903960201708e-07, "logits/chosen": -1.0316754579544067, "logits/rejected": -0.9951988458633423, "logps/chosen": -89.90121459960938, "logps/rejected": -209.39590454101562, "loss": 0.1043, "rewards/accuracies": 1.0, "rewards/chosen": -1.4963710308074951, "rewards/margins": 9.438202857971191, "rewards/rejected": -10.93457317352295, "step": 636 }, { "epoch": 0.97, "learning_rate": 6.143993061266985e-07, "logits/chosen": -0.9450584053993225, "logits/rejected": -0.9507660865783691, "logps/chosen": -61.39887619018555, "logps/rejected": -100.94102478027344, "loss": 0.1086, "rewards/accuracies": 1.0, "rewards/chosen": -1.806435465812683, "rewards/margins": 3.4823813438415527, "rewards/rejected": -5.288816928863525, "step": 637 }, { "epoch": 0.97, "learning_rate": 6.131074108186665e-07, "logits/chosen": -1.2073445320129395, "logits/rejected": -1.1909887790679932, "logps/chosen": -75.20411682128906, "logps/rejected": -137.83969116210938, "loss": 0.224, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8708873987197876, "rewards/margins": 5.583239555358887, "rewards/rejected": -6.454126834869385, "step": 638 }, { "epoch": 0.97, "learning_rate": 6.118147191915087e-07, "logits/chosen": -1.032888412475586, "logits/rejected": -0.9318363070487976, "logps/chosen": -87.4946517944336, "logps/rejected": -209.93983459472656, "loss": 0.1081, "rewards/accuracies": 1.0, "rewards/chosen": -2.2013015747070312, "rewards/margins": 9.4359712600708, "rewards/rejected": -11.637272834777832, "step": 639 }, { "epoch": 0.97, "learning_rate": 6.105212403462649e-07, "logits/chosen": -0.9409430027008057, "logits/rejected": -0.8462395668029785, "logps/chosen": -74.47772979736328, "logps/rejected": -165.8389129638672, "loss": 0.096, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5092302560806274, "rewards/margins": 7.441671848297119, "rewards/rejected": -8.95090103149414, "step": 640 }, { "epoch": 0.97, "learning_rate": 6.092269833895174e-07, "logits/chosen": -1.1518549919128418, "logits/rejected": -1.1024231910705566, "logps/chosen": -101.31788635253906, "logps/rejected": -186.49029541015625, "loss": 0.1025, "rewards/accuracies": 1.0, "rewards/chosen": -2.271648645401001, "rewards/margins": 7.782963752746582, "rewards/rejected": -10.054612159729004, "step": 641 }, { "epoch": 0.98, "learning_rate": 6.079319574333266e-07, "logits/chosen": -1.2157964706420898, "logits/rejected": -1.2053083181381226, "logps/chosen": -74.29100799560547, "logps/rejected": -162.5016326904297, "loss": 0.1264, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9477126598358154, "rewards/margins": 7.87605619430542, "rewards/rejected": -8.823769569396973, "step": 642 }, { "epoch": 0.98, "learning_rate": 6.06636171595167e-07, "logits/chosen": -1.2841458320617676, "logits/rejected": -1.2092390060424805, "logps/chosen": -56.44172668457031, "logps/rejected": -134.9115447998047, "loss": 0.0907, "rewards/accuracies": 1.0, "rewards/chosen": -0.044044479727745056, "rewards/margins": 6.794293403625488, "rewards/rejected": -6.8383378982543945, "step": 643 }, { "epoch": 0.98, "learning_rate": 6.053396349978631e-07, "logits/chosen": -1.1039835214614868, "logits/rejected": -1.0356587171554565, "logps/chosen": -76.9747543334961, "logps/rejected": -172.63150024414062, "loss": 0.1098, "rewards/accuracies": 0.9375, "rewards/chosen": -1.146126389503479, "rewards/margins": 7.621996879577637, "rewards/rejected": -8.768123626708984, "step": 644 }, { "epoch": 0.98, "learning_rate": 6.04042356769525e-07, "logits/chosen": -1.264845609664917, "logits/rejected": -1.2502721548080444, "logps/chosen": -85.20533752441406, "logps/rejected": -194.13870239257812, "loss": 0.1003, "rewards/accuracies": 1.0, "rewards/chosen": -0.8115068674087524, "rewards/margins": 9.807979583740234, "rewards/rejected": -10.619485855102539, "step": 645 }, { "epoch": 0.98, "learning_rate": 6.02744346043484e-07, "logits/chosen": -1.2147037982940674, "logits/rejected": -1.1134579181671143, "logps/chosen": -87.7901382446289, "logps/rejected": -193.6415252685547, "loss": 0.1185, "rewards/accuracies": 0.875, "rewards/chosen": -1.5775516033172607, "rewards/margins": 8.363851547241211, "rewards/rejected": -9.941402435302734, "step": 646 }, { "epoch": 0.98, "learning_rate": 6.014456119582284e-07, "logits/chosen": -1.2615370750427246, "logits/rejected": -1.1352978944778442, "logps/chosen": -86.64071655273438, "logps/rejected": -170.6793975830078, "loss": 0.1168, "rewards/accuracies": 0.9375, "rewards/chosen": -1.381377100944519, "rewards/margins": 7.180846214294434, "rewards/rejected": -8.562223434448242, "step": 647 }, { "epoch": 0.98, "learning_rate": 6.001461636573396e-07, "logits/chosen": -1.0515718460083008, "logits/rejected": -0.9838770031929016, "logps/chosen": -59.810577392578125, "logps/rejected": -126.24922180175781, "loss": 0.0817, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5042865872383118, "rewards/margins": 5.515542984008789, "rewards/rejected": -6.019829750061035, "step": 648 }, { "epoch": 0.99, "learning_rate": 5.98846010289427e-07, "logits/chosen": -1.1093281507492065, "logits/rejected": -1.077587604522705, "logps/chosen": -64.88356018066406, "logps/rejected": -154.37062072753906, "loss": 0.0841, "rewards/accuracies": 0.9375, "rewards/chosen": -0.40832287073135376, "rewards/margins": 7.622497081756592, "rewards/rejected": -8.030820846557617, "step": 649 }, { "epoch": 0.99, "learning_rate": 5.975451610080642e-07, "logits/chosen": -0.8989431858062744, "logits/rejected": -0.9193077087402344, "logps/chosen": -54.4132080078125, "logps/rejected": -117.14277648925781, "loss": 0.1351, "rewards/accuracies": 1.0, "rewards/chosen": -0.4165533185005188, "rewards/margins": 5.752285003662109, "rewards/rejected": -6.1688385009765625, "step": 650 }, { "epoch": 0.99, "learning_rate": 5.962436249717239e-07, "logits/chosen": -1.2257202863693237, "logits/rejected": -1.159430980682373, "logps/chosen": -69.6572036743164, "logps/rejected": -169.44932556152344, "loss": 0.098, "rewards/accuracies": 0.9375, "rewards/chosen": 0.28377628326416016, "rewards/margins": 8.203788757324219, "rewards/rejected": -7.920012474060059, "step": 651 }, { "epoch": 0.99, "learning_rate": 5.949414113437141e-07, "logits/chosen": -1.2154548168182373, "logits/rejected": -1.2897298336029053, "logps/chosen": -54.942771911621094, "logps/rejected": -133.26797485351562, "loss": 0.0998, "rewards/accuracies": 1.0, "rewards/chosen": -0.29021626710891724, "rewards/margins": 6.945834159851074, "rewards/rejected": -7.236050128936768, "step": 652 }, { "epoch": 0.99, "learning_rate": 5.936385292921135e-07, "logits/chosen": -1.2028741836547852, "logits/rejected": -1.119994044303894, "logps/chosen": -69.02983093261719, "logps/rejected": -143.38055419921875, "loss": 0.1776, "rewards/accuracies": 0.875, "rewards/chosen": -1.5186907052993774, "rewards/margins": 4.823304653167725, "rewards/rejected": -6.3419952392578125, "step": 653 }, { "epoch": 0.99, "learning_rate": 5.923349879897064e-07, "logits/chosen": -1.2728404998779297, "logits/rejected": -1.2099133729934692, "logps/chosen": -61.5665283203125, "logps/rejected": -172.33697509765625, "loss": 0.1038, "rewards/accuracies": 1.0, "rewards/chosen": -0.6831641793251038, "rewards/margins": 8.591301918029785, "rewards/rejected": -9.274466514587402, "step": 654 }, { "epoch": 1.0, "learning_rate": 5.910307966139186e-07, "logits/chosen": -1.142350196838379, "logits/rejected": -1.0968624353408813, "logps/chosen": -64.24427795410156, "logps/rejected": -144.29763793945312, "loss": 0.0897, "rewards/accuracies": 1.0, "rewards/chosen": -0.833078145980835, "rewards/margins": 6.916269302368164, "rewards/rejected": -7.74934720993042, "step": 655 }, { "epoch": 1.0, "learning_rate": 5.897259643467527e-07, "logits/chosen": -1.0089267492294312, "logits/rejected": -0.9968331456184387, "logps/chosen": -54.45252990722656, "logps/rejected": -102.59598541259766, "loss": 0.1371, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8583024740219116, "rewards/margins": 4.281520843505859, "rewards/rejected": -5.139822959899902, "step": 656 }, { "epoch": 1.0, "learning_rate": 5.884205003747232e-07, "logits/chosen": -1.1683781147003174, "logits/rejected": -1.1888394355773926, "logps/chosen": -62.79545593261719, "logps/rejected": -126.7721176147461, "loss": 0.1603, "rewards/accuracies": 1.0, "rewards/chosen": -0.5220464468002319, "rewards/margins": 6.045437812805176, "rewards/rejected": -6.567483901977539, "step": 657 }, { "epoch": 1.0, "learning_rate": 5.871144138887925e-07, "logits/chosen": -1.076742172241211, "logits/rejected": -1.0606688261032104, "logps/chosen": -65.71561431884766, "logps/rejected": -129.7261962890625, "loss": 0.131, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1118013858795166, "rewards/margins": 6.00579833984375, "rewards/rejected": -7.117599964141846, "step": 658 }, { "epoch": 1.0, "learning_rate": 5.858077140843052e-07, "logits/chosen": -1.158136010169983, "logits/rejected": -1.1475896835327148, "logps/chosen": -60.0682258605957, "logps/rejected": -131.699462890625, "loss": 0.1057, "rewards/accuracies": 1.0, "rewards/chosen": -0.2798491418361664, "rewards/margins": 6.119170665740967, "rewards/rejected": -6.399019718170166, "step": 659 }, { "epoch": 1.0, "learning_rate": 5.845004101609246e-07, "logits/chosen": -1.4042613506317139, "logits/rejected": -1.4403735399246216, "logps/chosen": -67.27396392822266, "logps/rejected": -124.99378967285156, "loss": 0.0976, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6702808141708374, "rewards/margins": 5.292868614196777, "rewards/rejected": -5.963149070739746, "step": 660 }, { "epoch": 1.0, "learning_rate": 5.831925113225663e-07, "logits/chosen": -1.1096900701522827, "logits/rejected": -1.030463695526123, "logps/chosen": -80.2137680053711, "logps/rejected": -188.515869140625, "loss": 0.0865, "rewards/accuracies": 1.0, "rewards/chosen": -1.5864979028701782, "rewards/margins": 8.952061653137207, "rewards/rejected": -10.53856086730957, "step": 661 }, { "epoch": 1.01, "learning_rate": 5.818840267773349e-07, "logits/chosen": -0.9514877200126648, "logits/rejected": -0.8386536836624146, "logps/chosen": -54.629302978515625, "logps/rejected": -147.75096130371094, "loss": 0.1456, "rewards/accuracies": 0.875, "rewards/chosen": -0.3283122777938843, "rewards/margins": 7.612514972686768, "rewards/rejected": -7.940827369689941, "step": 662 }, { "epoch": 1.01, "learning_rate": 5.805749657374588e-07, "logits/chosen": -1.1909164190292358, "logits/rejected": -1.1824414730072021, "logps/chosen": -63.21338653564453, "logps/rejected": -125.61174011230469, "loss": 0.0928, "rewards/accuracies": 1.0, "rewards/chosen": -0.7514258623123169, "rewards/margins": 5.9479804039001465, "rewards/rejected": -6.699406623840332, "step": 663 }, { "epoch": 1.01, "learning_rate": 5.792653374192245e-07, "logits/chosen": -1.1508904695510864, "logits/rejected": -1.3032901287078857, "logps/chosen": -61.64646911621094, "logps/rejected": -123.74299621582031, "loss": 0.09, "rewards/accuracies": 1.0, "rewards/chosen": -0.6384060382843018, "rewards/margins": 6.525364875793457, "rewards/rejected": -7.163771152496338, "step": 664 }, { "epoch": 1.01, "learning_rate": 5.77955151042913e-07, "logits/chosen": -1.2269688844680786, "logits/rejected": -1.102317452430725, "logps/chosen": -93.61415100097656, "logps/rejected": -197.66168212890625, "loss": 0.1308, "rewards/accuracies": 0.9375, "rewards/chosen": -1.865402102470398, "rewards/margins": 8.771673202514648, "rewards/rejected": -10.63707447052002, "step": 665 }, { "epoch": 1.01, "learning_rate": 5.766444158327337e-07, "logits/chosen": -1.2553646564483643, "logits/rejected": -1.2647689580917358, "logps/chosen": -69.70663452148438, "logps/rejected": -166.05563354492188, "loss": 0.0516, "rewards/accuracies": 1.0, "rewards/chosen": -0.5620317459106445, "rewards/margins": 7.381424903869629, "rewards/rejected": -7.943456649780273, "step": 666 }, { "epoch": 1.01, "learning_rate": 5.753331410167603e-07, "logits/chosen": -1.1728168725967407, "logits/rejected": -1.2360785007476807, "logps/chosen": -57.126678466796875, "logps/rejected": -115.8658218383789, "loss": 0.0699, "rewards/accuracies": 1.0, "rewards/chosen": -0.10996098071336746, "rewards/margins": 6.259988307952881, "rewards/rejected": -6.369948387145996, "step": 667 }, { "epoch": 1.01, "learning_rate": 5.740213358268658e-07, "logits/chosen": -0.9818707704544067, "logits/rejected": -0.935701310634613, "logps/chosen": -77.70005798339844, "logps/rejected": -182.51943969726562, "loss": 0.1275, "rewards/accuracies": 1.0, "rewards/chosen": -0.9125512838363647, "rewards/margins": 8.70441722869873, "rewards/rejected": -9.616969108581543, "step": 668 }, { "epoch": 1.02, "learning_rate": 5.727090094986565e-07, "logits/chosen": -1.1261427402496338, "logits/rejected": -1.0947426557540894, "logps/chosen": -68.04671478271484, "logps/rejected": -151.35067749023438, "loss": 0.12, "rewards/accuracies": 1.0, "rewards/chosen": -0.8326579928398132, "rewards/margins": 7.375097274780273, "rewards/rejected": -8.207755088806152, "step": 669 }, { "epoch": 1.02, "learning_rate": 5.713961712714081e-07, "logits/chosen": -0.9983550310134888, "logits/rejected": -0.9126099944114685, "logps/chosen": -63.41878128051758, "logps/rejected": -154.33389282226562, "loss": 0.0974, "rewards/accuracies": 0.875, "rewards/chosen": -0.12092608213424683, "rewards/margins": 7.515420913696289, "rewards/rejected": -7.636347770690918, "step": 670 }, { "epoch": 1.02, "learning_rate": 5.700828303880006e-07, "logits/chosen": -1.217376470565796, "logits/rejected": -1.2310144901275635, "logps/chosen": -81.50133514404297, "logps/rejected": -163.76443481445312, "loss": 0.1043, "rewards/accuracies": 1.0, "rewards/chosen": -1.1945643424987793, "rewards/margins": 7.473373889923096, "rewards/rejected": -8.667938232421875, "step": 671 }, { "epoch": 1.02, "learning_rate": 5.687689960948525e-07, "logits/chosen": -1.0253369808197021, "logits/rejected": -1.0140191316604614, "logps/chosen": -84.7760238647461, "logps/rejected": -134.62786865234375, "loss": 0.0932, "rewards/accuracies": 1.0, "rewards/chosen": -0.9770020246505737, "rewards/margins": 5.535608291625977, "rewards/rejected": -6.51261043548584, "step": 672 }, { "epoch": 1.02, "learning_rate": 5.674546776418559e-07, "logits/chosen": -0.7614624500274658, "logits/rejected": -0.7193310856819153, "logps/chosen": -57.43425369262695, "logps/rejected": -145.88856506347656, "loss": 0.1625, "rewards/accuracies": 0.875, "rewards/chosen": -1.0262550115585327, "rewards/margins": 6.430345058441162, "rewards/rejected": -7.456599712371826, "step": 673 }, { "epoch": 1.02, "learning_rate": 5.661398842823121e-07, "logits/chosen": -1.3408358097076416, "logits/rejected": -1.267204761505127, "logps/chosen": -42.15625, "logps/rejected": -116.25086975097656, "loss": 0.0826, "rewards/accuracies": 1.0, "rewards/chosen": 0.09829363226890564, "rewards/margins": 6.09843111038208, "rewards/rejected": -6.0001373291015625, "step": 674 }, { "epoch": 1.03, "learning_rate": 5.648246252728657e-07, "logits/chosen": -1.1724097728729248, "logits/rejected": -1.1310337781906128, "logps/chosen": -55.03935623168945, "logps/rejected": -119.53876495361328, "loss": 0.0846, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5357794165611267, "rewards/margins": 6.028209209442139, "rewards/rejected": -6.563988208770752, "step": 675 }, { "epoch": 1.03, "learning_rate": 5.635089098734393e-07, "logits/chosen": -1.2830966711044312, "logits/rejected": -1.3221076726913452, "logps/chosen": -68.17839813232422, "logps/rejected": -150.27639770507812, "loss": 0.1029, "rewards/accuracies": 1.0, "rewards/chosen": -1.5815032720565796, "rewards/margins": 7.11434268951416, "rewards/rejected": -8.695844650268555, "step": 676 }, { "epoch": 1.03, "learning_rate": 5.621927473471694e-07, "logits/chosen": -0.9430112838745117, "logits/rejected": -0.9096443057060242, "logps/chosen": -58.878753662109375, "logps/rejected": -130.36016845703125, "loss": 0.1278, "rewards/accuracies": 0.875, "rewards/chosen": -0.7708883881568909, "rewards/margins": 5.786856174468994, "rewards/rejected": -6.55774450302124, "step": 677 }, { "epoch": 1.03, "learning_rate": 5.608761469603397e-07, "logits/chosen": -0.8836669325828552, "logits/rejected": -0.8740648031234741, "logps/chosen": -52.28067398071289, "logps/rejected": -135.568115234375, "loss": 0.1107, "rewards/accuracies": 0.875, "rewards/chosen": -0.21213576197624207, "rewards/margins": 7.3774189949035645, "rewards/rejected": -7.589554786682129, "step": 678 }, { "epoch": 1.03, "learning_rate": 5.595591179823169e-07, "logits/chosen": -1.1525969505310059, "logits/rejected": -1.0391813516616821, "logps/chosen": -84.83979797363281, "logps/rejected": -174.16342163085938, "loss": 0.0779, "rewards/accuracies": 1.0, "rewards/chosen": -1.6824384927749634, "rewards/margins": 6.232317924499512, "rewards/rejected": -7.914756774902344, "step": 679 }, { "epoch": 1.03, "learning_rate": 5.582416696854852e-07, "logits/chosen": -0.9567781686782837, "logits/rejected": -0.9765966534614563, "logps/chosen": -63.5609245300293, "logps/rejected": -121.71739196777344, "loss": 0.1131, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3712667226791382, "rewards/margins": 5.644521713256836, "rewards/rejected": -7.015789031982422, "step": 680 }, { "epoch": 1.03, "learning_rate": 5.569238113451812e-07, "logits/chosen": -1.154358148574829, "logits/rejected": -1.1891201734542847, "logps/chosen": -89.4549560546875, "logps/rejected": -149.6629180908203, "loss": 0.082, "rewards/accuracies": 0.875, "rewards/chosen": -1.4142396450042725, "rewards/margins": 5.829531669616699, "rewards/rejected": -7.243770599365234, "step": 681 }, { "epoch": 1.04, "learning_rate": 5.556055522396278e-07, "logits/chosen": -0.9599528908729553, "logits/rejected": -0.918071985244751, "logps/chosen": -47.804283142089844, "logps/rejected": -113.82530975341797, "loss": 0.0802, "rewards/accuracies": 0.875, "rewards/chosen": -0.1189282089471817, "rewards/margins": 5.937498569488525, "rewards/rejected": -6.056427001953125, "step": 682 }, { "epoch": 1.04, "learning_rate": 5.542869016498698e-07, "logits/chosen": -1.0925543308258057, "logits/rejected": -1.1219251155853271, "logps/chosen": -81.27593231201172, "logps/rejected": -157.37948608398438, "loss": 0.0534, "rewards/accuracies": 1.0, "rewards/chosen": -1.6112722158432007, "rewards/margins": 6.5523152351379395, "rewards/rejected": -8.16358757019043, "step": 683 }, { "epoch": 1.04, "learning_rate": 5.52967868859708e-07, "logits/chosen": -1.1082345247268677, "logits/rejected": -1.1396856307983398, "logps/chosen": -62.943992614746094, "logps/rejected": -148.8440399169922, "loss": 0.1339, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6015743613243103, "rewards/margins": 8.0671968460083, "rewards/rejected": -8.668770790100098, "step": 684 }, { "epoch": 1.04, "learning_rate": 5.516484631556344e-07, "logits/chosen": -1.0701913833618164, "logits/rejected": -1.1307215690612793, "logps/chosen": -72.89879608154297, "logps/rejected": -143.15277099609375, "loss": 0.1164, "rewards/accuracies": 0.875, "rewards/chosen": -0.9773444533348083, "rewards/margins": 6.877935886383057, "rewards/rejected": -7.85528039932251, "step": 685 }, { "epoch": 1.04, "learning_rate": 5.50328693826766e-07, "logits/chosen": -0.9838832020759583, "logits/rejected": -0.962506115436554, "logps/chosen": -73.59190368652344, "logps/rejected": -140.7711181640625, "loss": 0.1997, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6346116065979004, "rewards/margins": 6.033596038818359, "rewards/rejected": -7.66820764541626, "step": 686 }, { "epoch": 1.04, "learning_rate": 5.490085701647804e-07, "logits/chosen": -0.9443256855010986, "logits/rejected": -0.8821437358856201, "logps/chosen": -71.37468719482422, "logps/rejected": -148.31710815429688, "loss": 0.1297, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4407132863998413, "rewards/margins": 6.951409339904785, "rewards/rejected": -8.392123222351074, "step": 687 }, { "epoch": 1.05, "learning_rate": 5.47688101463849e-07, "logits/chosen": -1.0126395225524902, "logits/rejected": -0.9197251200675964, "logps/chosen": -75.32856750488281, "logps/rejected": -186.19622802734375, "loss": 0.1014, "rewards/accuracies": 0.875, "rewards/chosen": -1.0315072536468506, "rewards/margins": 9.135555267333984, "rewards/rejected": -10.16706371307373, "step": 688 }, { "epoch": 1.05, "learning_rate": 5.463672970205733e-07, "logits/chosen": -0.8753904104232788, "logits/rejected": -0.926545262336731, "logps/chosen": -58.62400817871094, "logps/rejected": -125.887451171875, "loss": 0.099, "rewards/accuracies": 1.0, "rewards/chosen": -0.2632928490638733, "rewards/margins": 6.818629741668701, "rewards/rejected": -7.08192253112793, "step": 689 }, { "epoch": 1.05, "learning_rate": 5.450461661339182e-07, "logits/chosen": -1.041439175605774, "logits/rejected": -0.9572014808654785, "logps/chosen": -69.90070343017578, "logps/rejected": -182.95445251464844, "loss": 0.0985, "rewards/accuracies": 0.875, "rewards/chosen": -0.5516085028648376, "rewards/margins": 10.155409812927246, "rewards/rejected": -10.70701789855957, "step": 690 }, { "epoch": 1.05, "learning_rate": 5.437247181051465e-07, "logits/chosen": -1.1116018295288086, "logits/rejected": -1.052459955215454, "logps/chosen": -48.75881576538086, "logps/rejected": -117.23747253417969, "loss": 0.1613, "rewards/accuracies": 1.0, "rewards/chosen": -0.09138579666614532, "rewards/margins": 5.856074333190918, "rewards/rejected": -5.947459697723389, "step": 691 }, { "epoch": 1.05, "learning_rate": 5.424029622377546e-07, "logits/chosen": -1.282362937927246, "logits/rejected": -1.273923635482788, "logps/chosen": -51.164772033691406, "logps/rejected": -126.75209045410156, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": 0.18534865975379944, "rewards/margins": 7.127676486968994, "rewards/rejected": -6.942327976226807, "step": 692 }, { "epoch": 1.05, "learning_rate": 5.410809078374054e-07, "logits/chosen": -1.0203771591186523, "logits/rejected": -0.940859854221344, "logps/chosen": -76.81683349609375, "logps/rejected": -148.52162170410156, "loss": 0.0958, "rewards/accuracies": 1.0, "rewards/chosen": -1.5949409008026123, "rewards/margins": 6.496974468231201, "rewards/rejected": -8.091915130615234, "step": 693 }, { "epoch": 1.05, "learning_rate": 5.397585642118642e-07, "logits/chosen": -1.1015177965164185, "logits/rejected": -1.0720669031143188, "logps/chosen": -40.1603889465332, "logps/rejected": -75.624267578125, "loss": 0.0951, "rewards/accuracies": 0.9375, "rewards/chosen": -0.00646597146987915, "rewards/margins": 3.2788314819335938, "rewards/rejected": -3.285297393798828, "step": 694 }, { "epoch": 1.06, "learning_rate": 5.384359406709321e-07, "logits/chosen": -1.0925345420837402, "logits/rejected": -1.019098162651062, "logps/chosen": -58.54066467285156, "logps/rejected": -149.29501342773438, "loss": 0.1441, "rewards/accuracies": 1.0, "rewards/chosen": 0.05131404101848602, "rewards/margins": 8.232137680053711, "rewards/rejected": -8.180824279785156, "step": 695 }, { "epoch": 1.06, "learning_rate": 5.371130465263812e-07, "logits/chosen": -1.318426489830017, "logits/rejected": -1.385157585144043, "logps/chosen": -71.2918930053711, "logps/rejected": -153.06190490722656, "loss": 0.099, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2728750705718994, "rewards/margins": 7.585086345672607, "rewards/rejected": -8.85796070098877, "step": 696 }, { "epoch": 1.06, "learning_rate": 5.357898910918888e-07, "logits/chosen": -1.0089809894561768, "logits/rejected": -1.0487353801727295, "logps/chosen": -68.19590759277344, "logps/rejected": -142.8970947265625, "loss": 0.1073, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8273400664329529, "rewards/margins": 6.7305779457092285, "rewards/rejected": -7.557918548583984, "step": 697 }, { "epoch": 1.06, "learning_rate": 5.344664836829714e-07, "logits/chosen": -1.3282719850540161, "logits/rejected": -1.225130558013916, "logps/chosen": -77.95903015136719, "logps/rejected": -204.59254455566406, "loss": 0.1005, "rewards/accuracies": 0.9375, "rewards/chosen": -1.112104892730713, "rewards/margins": 10.1814603805542, "rewards/rejected": -11.29356575012207, "step": 698 }, { "epoch": 1.06, "learning_rate": 5.331428336169198e-07, "logits/chosen": -0.9961109757423401, "logits/rejected": -0.9357060194015503, "logps/chosen": -63.64835739135742, "logps/rejected": -147.5065155029297, "loss": 0.072, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9150161743164062, "rewards/margins": 7.08585262298584, "rewards/rejected": -8.000868797302246, "step": 699 }, { "epoch": 1.06, "learning_rate": 5.318189502127331e-07, "logits/chosen": -1.2980554103851318, "logits/rejected": -1.264257550239563, "logps/chosen": -59.385414123535156, "logps/rejected": -149.30679321289062, "loss": 0.136, "rewards/accuracies": 1.0, "rewards/chosen": -0.2459990531206131, "rewards/margins": 7.867907524108887, "rewards/rejected": -8.113906860351562, "step": 700 }, { "epoch": 1.06, "learning_rate": 5.304948427910534e-07, "logits/chosen": -1.3335720300674438, "logits/rejected": -1.3249469995498657, "logps/chosen": -73.70133972167969, "logps/rejected": -159.0903778076172, "loss": 0.0867, "rewards/accuracies": 1.0, "rewards/chosen": -1.3161687850952148, "rewards/margins": 7.501766204833984, "rewards/rejected": -8.8179349899292, "step": 701 }, { "epoch": 1.07, "learning_rate": 5.291705206740996e-07, "logits/chosen": -1.1001014709472656, "logits/rejected": -1.0837841033935547, "logps/chosen": -51.384239196777344, "logps/rejected": -125.36265563964844, "loss": 0.0738, "rewards/accuracies": 1.0, "rewards/chosen": -0.3471025228500366, "rewards/margins": 6.566239356994629, "rewards/rejected": -6.913341045379639, "step": 702 }, { "epoch": 1.07, "learning_rate": 5.278459931856026e-07, "logits/chosen": -1.2043085098266602, "logits/rejected": -1.137099266052246, "logps/chosen": -86.36485290527344, "logps/rejected": -184.11178588867188, "loss": 0.1143, "rewards/accuracies": 0.9375, "rewards/chosen": -2.149524688720703, "rewards/margins": 8.116349220275879, "rewards/rejected": -10.265874862670898, "step": 703 }, { "epoch": 1.07, "learning_rate": 5.265212696507386e-07, "logits/chosen": -1.2772239446640015, "logits/rejected": -1.2306042909622192, "logps/chosen": -76.60639953613281, "logps/rejected": -166.2823486328125, "loss": 0.0798, "rewards/accuracies": 1.0, "rewards/chosen": -1.8685569763183594, "rewards/margins": 7.428231716156006, "rewards/rejected": -9.296789169311523, "step": 704 }, { "epoch": 1.07, "learning_rate": 5.251963593960646e-07, "logits/chosen": -0.8318597078323364, "logits/rejected": -0.8045398592948914, "logps/chosen": -71.75132751464844, "logps/rejected": -161.3377685546875, "loss": 0.0748, "rewards/accuracies": 1.0, "rewards/chosen": -1.956432580947876, "rewards/margins": 7.814340591430664, "rewards/rejected": -9.770774841308594, "step": 705 }, { "epoch": 1.07, "learning_rate": 5.238712717494517e-07, "logits/chosen": -1.0191956758499146, "logits/rejected": -1.0569789409637451, "logps/chosen": -78.18263244628906, "logps/rejected": -167.52734375, "loss": 0.0967, "rewards/accuracies": 1.0, "rewards/chosen": -1.746780514717102, "rewards/margins": 7.930570125579834, "rewards/rejected": -9.677350044250488, "step": 706 }, { "epoch": 1.07, "learning_rate": 5.225460160400204e-07, "logits/chosen": -1.0451310873031616, "logits/rejected": -1.1556812524795532, "logps/chosen": -95.20954132080078, "logps/rejected": -165.25765991210938, "loss": 0.1126, "rewards/accuracies": 1.0, "rewards/chosen": -2.155846118927002, "rewards/margins": 7.055589199066162, "rewards/rejected": -9.211434364318848, "step": 707 }, { "epoch": 1.08, "learning_rate": 5.212206015980741e-07, "logits/chosen": -1.0930476188659668, "logits/rejected": -1.013002872467041, "logps/chosen": -83.26821899414062, "logps/rejected": -197.95700073242188, "loss": 0.0779, "rewards/accuracies": 0.875, "rewards/chosen": -2.0534558296203613, "rewards/margins": 9.577096939086914, "rewards/rejected": -11.630553245544434, "step": 708 }, { "epoch": 1.08, "learning_rate": 5.198950377550338e-07, "logits/chosen": -1.0042184591293335, "logits/rejected": -1.0928484201431274, "logps/chosen": -66.0024185180664, "logps/rejected": -122.8520278930664, "loss": 0.1358, "rewards/accuracies": 1.0, "rewards/chosen": -1.2644383907318115, "rewards/margins": 5.515614032745361, "rewards/rejected": -6.780052185058594, "step": 709 }, { "epoch": 1.08, "learning_rate": 5.185693338433723e-07, "logits/chosen": -1.212796688079834, "logits/rejected": -1.2040300369262695, "logps/chosen": -68.95840454101562, "logps/rejected": -158.29884338378906, "loss": 0.1171, "rewards/accuracies": 1.0, "rewards/chosen": -1.497808814048767, "rewards/margins": 7.9521260261535645, "rewards/rejected": -9.449934005737305, "step": 710 }, { "epoch": 1.08, "learning_rate": 5.172434991965486e-07, "logits/chosen": -1.2450261116027832, "logits/rejected": -1.1200731992721558, "logps/chosen": -77.91986083984375, "logps/rejected": -185.11241149902344, "loss": 0.0812, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8076395988464355, "rewards/margins": 9.032342910766602, "rewards/rejected": -10.839981079101562, "step": 711 }, { "epoch": 1.08, "learning_rate": 5.159175431489423e-07, "logits/chosen": -0.7942067980766296, "logits/rejected": -0.7741928100585938, "logps/chosen": -62.70838165283203, "logps/rejected": -142.3878631591797, "loss": 0.061, "rewards/accuracies": 1.0, "rewards/chosen": -1.2866111993789673, "rewards/margins": 6.572202682495117, "rewards/rejected": -7.858813285827637, "step": 712 }, { "epoch": 1.08, "learning_rate": 5.145914750357871e-07, "logits/chosen": -1.1640188694000244, "logits/rejected": -1.197671890258789, "logps/chosen": -68.85122680664062, "logps/rejected": -147.9771728515625, "loss": 0.0701, "rewards/accuracies": 1.0, "rewards/chosen": -1.203361988067627, "rewards/margins": 7.032736301422119, "rewards/rejected": -8.236098289489746, "step": 713 }, { "epoch": 1.08, "learning_rate": 5.132653041931066e-07, "logits/chosen": -1.0039440393447876, "logits/rejected": -1.0339112281799316, "logps/chosen": -76.23931121826172, "logps/rejected": -160.29669189453125, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": -1.1478352546691895, "rewards/margins": 7.5223469734191895, "rewards/rejected": -8.670182228088379, "step": 714 }, { "epoch": 1.09, "learning_rate": 5.119390399576468e-07, "logits/chosen": -0.8470580577850342, "logits/rejected": -0.9359773993492126, "logps/chosen": -57.678245544433594, "logps/rejected": -119.07586669921875, "loss": 0.0875, "rewards/accuracies": 0.9375, "rewards/chosen": -0.49064531922340393, "rewards/margins": 6.0010857582092285, "rewards/rejected": -6.4917311668396, "step": 715 }, { "epoch": 1.09, "learning_rate": 5.106126916668118e-07, "logits/chosen": -1.144890308380127, "logits/rejected": -1.0987181663513184, "logps/chosen": -64.77657318115234, "logps/rejected": -135.87718200683594, "loss": 0.0479, "rewards/accuracies": 1.0, "rewards/chosen": -0.6729116439819336, "rewards/margins": 6.5454912185668945, "rewards/rejected": -7.2184038162231445, "step": 716 }, { "epoch": 1.09, "learning_rate": 5.09286268658597e-07, "logits/chosen": -1.190121054649353, "logits/rejected": -1.1410903930664062, "logps/chosen": -83.95819091796875, "logps/rejected": -177.8282470703125, "loss": 0.0826, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7526713609695435, "rewards/margins": 7.734089374542236, "rewards/rejected": -9.486761093139648, "step": 717 }, { "epoch": 1.09, "learning_rate": 5.079597802715244e-07, "logits/chosen": -1.1426438093185425, "logits/rejected": -1.1871936321258545, "logps/chosen": -57.17544937133789, "logps/rejected": -133.5987548828125, "loss": 0.0571, "rewards/accuracies": 0.9375, "rewards/chosen": -0.2977384924888611, "rewards/margins": 6.940124988555908, "rewards/rejected": -7.237863540649414, "step": 718 }, { "epoch": 1.09, "learning_rate": 5.066332358445759e-07, "logits/chosen": -1.3283708095550537, "logits/rejected": -1.2297604084014893, "logps/chosen": -73.53825378417969, "logps/rejected": -182.50379943847656, "loss": 0.0742, "rewards/accuracies": 1.0, "rewards/chosen": -0.9024177193641663, "rewards/margins": 8.631746292114258, "rewards/rejected": -9.534162521362305, "step": 719 }, { "epoch": 1.09, "learning_rate": 5.053066447171282e-07, "logits/chosen": -1.1588116884231567, "logits/rejected": -1.0975029468536377, "logps/chosen": -52.616249084472656, "logps/rejected": -128.71990966796875, "loss": 0.0525, "rewards/accuracies": 1.0, "rewards/chosen": 0.11890238523483276, "rewards/margins": 6.472389221191406, "rewards/rejected": -6.35348653793335, "step": 720 }, { "epoch": 1.1, "learning_rate": 5.039800162288861e-07, "logits/chosen": -1.2566020488739014, "logits/rejected": -1.1881121397018433, "logps/chosen": -66.51480102539062, "logps/rejected": -154.63040161132812, "loss": 0.0662, "rewards/accuracies": 1.0, "rewards/chosen": -0.4931876063346863, "rewards/margins": 7.1658220291137695, "rewards/rejected": -7.6590094566345215, "step": 721 }, { "epoch": 1.1, "learning_rate": 5.026533597198185e-07, "logits/chosen": -1.1822502613067627, "logits/rejected": -1.270106315612793, "logps/chosen": -58.7943115234375, "logps/rejected": -158.77145385742188, "loss": 0.0786, "rewards/accuracies": 1.0, "rewards/chosen": -0.7609018683433533, "rewards/margins": 8.432029724121094, "rewards/rejected": -9.19293212890625, "step": 722 }, { "epoch": 1.1, "learning_rate": 5.013266845300907e-07, "logits/chosen": -1.0390766859054565, "logits/rejected": -1.0265178680419922, "logps/chosen": -81.84119415283203, "logps/rejected": -185.984619140625, "loss": 0.1292, "rewards/accuracies": 1.0, "rewards/chosen": -0.834659993648529, "rewards/margins": 9.396257400512695, "rewards/rejected": -10.230918884277344, "step": 723 }, { "epoch": 1.1, "learning_rate": 5e-07, "logits/chosen": -1.0787450075149536, "logits/rejected": -1.0342082977294922, "logps/chosen": -63.2120475769043, "logps/rejected": -176.89993286132812, "loss": 0.0899, "rewards/accuracies": 1.0, "rewards/chosen": -0.23369872570037842, "rewards/margins": 9.546956062316895, "rewards/rejected": -9.780654907226562, "step": 724 }, { "epoch": 1.1, "learning_rate": 4.986733154699093e-07, "logits/chosen": -0.9337677955627441, "logits/rejected": -0.9124805927276611, "logps/chosen": -61.646949768066406, "logps/rejected": -143.59019470214844, "loss": 0.0293, "rewards/accuracies": 1.0, "rewards/chosen": -1.0404541492462158, "rewards/margins": 6.85941743850708, "rewards/rejected": -7.899870872497559, "step": 725 }, { "epoch": 1.1, "learning_rate": 4.973466402801817e-07, "logits/chosen": -0.9301645755767822, "logits/rejected": -0.9025458097457886, "logps/chosen": -73.25839233398438, "logps/rejected": -163.77919006347656, "loss": 0.1275, "rewards/accuracies": 0.9375, "rewards/chosen": -0.781755805015564, "rewards/margins": 7.804413795471191, "rewards/rejected": -8.586169242858887, "step": 726 }, { "epoch": 1.1, "learning_rate": 4.96019983771114e-07, "logits/chosen": -1.2382862567901611, "logits/rejected": -1.2279412746429443, "logps/chosen": -59.01118850708008, "logps/rejected": -126.78030395507812, "loss": 0.058, "rewards/accuracies": 0.9375, "rewards/chosen": 0.22560197114944458, "rewards/margins": 6.76422643661499, "rewards/rejected": -6.538625240325928, "step": 727 }, { "epoch": 1.11, "learning_rate": 4.946933552828719e-07, "logits/chosen": -1.1101487874984741, "logits/rejected": -1.0449861288070679, "logps/chosen": -67.08702087402344, "logps/rejected": -158.53958129882812, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": -0.9119900465011597, "rewards/margins": 7.477686882019043, "rewards/rejected": -8.389676094055176, "step": 728 }, { "epoch": 1.11, "learning_rate": 4.933667641554239e-07, "logits/chosen": -1.3548754453659058, "logits/rejected": -1.3058669567108154, "logps/chosen": -49.29551696777344, "logps/rejected": -130.4243927001953, "loss": 0.0712, "rewards/accuracies": 0.875, "rewards/chosen": -0.08856362104415894, "rewards/margins": 6.403277397155762, "rewards/rejected": -6.491840362548828, "step": 729 }, { "epoch": 1.11, "learning_rate": 4.920402197284755e-07, "logits/chosen": -1.146132469177246, "logits/rejected": -1.08512544631958, "logps/chosen": -42.733612060546875, "logps/rejected": -121.80816650390625, "loss": 0.0672, "rewards/accuracies": 1.0, "rewards/chosen": 0.32784441113471985, "rewards/margins": 6.611300945281982, "rewards/rejected": -6.283456325531006, "step": 730 }, { "epoch": 1.11, "learning_rate": 4.907137313414029e-07, "logits/chosen": -1.1103661060333252, "logits/rejected": -1.1141341924667358, "logps/chosen": -51.15158462524414, "logps/rejected": -115.07316589355469, "loss": 0.134, "rewards/accuracies": 1.0, "rewards/chosen": 0.09807372093200684, "rewards/margins": 6.130924701690674, "rewards/rejected": -6.032850742340088, "step": 731 }, { "epoch": 1.11, "learning_rate": 4.893873083331882e-07, "logits/chosen": -1.0536171197891235, "logits/rejected": -1.0562716722488403, "logps/chosen": -109.25856018066406, "logps/rejected": -196.98011779785156, "loss": 0.1269, "rewards/accuracies": 1.0, "rewards/chosen": -1.6777667999267578, "rewards/margins": 8.406275749206543, "rewards/rejected": -10.0840425491333, "step": 732 }, { "epoch": 1.11, "learning_rate": 4.880609600423532e-07, "logits/chosen": -1.1645655632019043, "logits/rejected": -1.2337405681610107, "logps/chosen": -54.793243408203125, "logps/rejected": -112.279541015625, "loss": 0.0856, "rewards/accuracies": 0.875, "rewards/chosen": -0.10907458513975143, "rewards/margins": 5.2775654792785645, "rewards/rejected": -5.386639595031738, "step": 733 }, { "epoch": 1.12, "learning_rate": 4.867346958068934e-07, "logits/chosen": -1.1355609893798828, "logits/rejected": -1.1663737297058105, "logps/chosen": -80.99940490722656, "logps/rejected": -165.85794067382812, "loss": 0.0913, "rewards/accuracies": 1.0, "rewards/chosen": -1.6650447845458984, "rewards/margins": 7.612807273864746, "rewards/rejected": -9.277853012084961, "step": 734 }, { "epoch": 1.12, "learning_rate": 4.854085249642127e-07, "logits/chosen": -1.149308681488037, "logits/rejected": -1.1109539270401, "logps/chosen": -51.61331558227539, "logps/rejected": -123.82764434814453, "loss": 0.1262, "rewards/accuracies": 1.0, "rewards/chosen": -0.23800213634967804, "rewards/margins": 6.6616621017456055, "rewards/rejected": -6.899663925170898, "step": 735 }, { "epoch": 1.12, "learning_rate": 4.840824568510579e-07, "logits/chosen": -1.017869472503662, "logits/rejected": -1.0294277667999268, "logps/chosen": -53.62151336669922, "logps/rejected": -113.24836730957031, "loss": 0.0701, "rewards/accuracies": 1.0, "rewards/chosen": -0.047505542635917664, "rewards/margins": 5.728215217590332, "rewards/rejected": -5.775720596313477, "step": 736 }, { "epoch": 1.12, "learning_rate": 4.827565008034513e-07, "logits/chosen": -1.199344515800476, "logits/rejected": -1.1634290218353271, "logps/chosen": -58.40774154663086, "logps/rejected": -160.16136169433594, "loss": 0.0303, "rewards/accuracies": 1.0, "rewards/chosen": -0.14727118611335754, "rewards/margins": 8.634309768676758, "rewards/rejected": -8.781580924987793, "step": 737 }, { "epoch": 1.12, "learning_rate": 4.814306661566276e-07, "logits/chosen": -1.1273925304412842, "logits/rejected": -1.0789045095443726, "logps/chosen": -44.35386276245117, "logps/rejected": -138.4711151123047, "loss": 0.0861, "rewards/accuracies": 1.0, "rewards/chosen": 0.6509190797805786, "rewards/margins": 7.575811862945557, "rewards/rejected": -6.924892425537109, "step": 738 }, { "epoch": 1.12, "learning_rate": 4.801049622449661e-07, "logits/chosen": -1.1626410484313965, "logits/rejected": -1.0463327169418335, "logps/chosen": -91.6008529663086, "logps/rejected": -211.8026580810547, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": -1.2525895833969116, "rewards/margins": 10.100064277648926, "rewards/rejected": -11.352653503417969, "step": 739 }, { "epoch": 1.12, "learning_rate": 4.787793984019259e-07, "logits/chosen": -0.9621848464012146, "logits/rejected": -0.9286797046661377, "logps/chosen": -56.69253158569336, "logps/rejected": -142.10772705078125, "loss": 0.0685, "rewards/accuracies": 1.0, "rewards/chosen": -0.3490813374519348, "rewards/margins": 7.836462497711182, "rewards/rejected": -8.18554401397705, "step": 740 }, { "epoch": 1.13, "learning_rate": 4.774539839599795e-07, "logits/chosen": -1.0346853733062744, "logits/rejected": -0.9935694336891174, "logps/chosen": -58.07199478149414, "logps/rejected": -158.4248046875, "loss": 0.0885, "rewards/accuracies": 1.0, "rewards/chosen": -0.49401575326919556, "rewards/margins": 8.622398376464844, "rewards/rejected": -9.116415023803711, "step": 741 }, { "epoch": 1.13, "learning_rate": 4.7612872825054817e-07, "logits/chosen": -1.0496529340744019, "logits/rejected": -0.9647389054298401, "logps/chosen": -62.4256591796875, "logps/rejected": -206.42465209960938, "loss": 0.1518, "rewards/accuracies": 1.0, "rewards/chosen": -0.1937393844127655, "rewards/margins": 12.18332290649414, "rewards/rejected": -12.377062797546387, "step": 742 }, { "epoch": 1.13, "learning_rate": 4.748036406039355e-07, "logits/chosen": -1.1184381246566772, "logits/rejected": -1.0451523065567017, "logps/chosen": -66.47551727294922, "logps/rejected": -167.14010620117188, "loss": 0.0533, "rewards/accuracies": 1.0, "rewards/chosen": -0.6011109948158264, "rewards/margins": 8.966035842895508, "rewards/rejected": -9.567147254943848, "step": 743 }, { "epoch": 1.13, "learning_rate": 4.7347873034926146e-07, "logits/chosen": -0.9570289254188538, "logits/rejected": -0.9113000631332397, "logps/chosen": -77.79519653320312, "logps/rejected": -189.1246795654297, "loss": 0.167, "rewards/accuracies": 1.0, "rewards/chosen": -0.7696459889411926, "rewards/margins": 9.580473899841309, "rewards/rejected": -10.350118637084961, "step": 744 }, { "epoch": 1.13, "learning_rate": 4.7215400681439743e-07, "logits/chosen": -1.0565179586410522, "logits/rejected": -1.0138272047042847, "logps/chosen": -78.86212158203125, "logps/rejected": -174.55279541015625, "loss": 0.0503, "rewards/accuracies": 1.0, "rewards/chosen": -1.0645883083343506, "rewards/margins": 8.36618423461914, "rewards/rejected": -9.43077278137207, "step": 745 }, { "epoch": 1.13, "learning_rate": 4.708294793259004e-07, "logits/chosen": -1.296499252319336, "logits/rejected": -1.2552895545959473, "logps/chosen": -48.52935028076172, "logps/rejected": -123.3971939086914, "loss": 0.0774, "rewards/accuracies": 1.0, "rewards/chosen": -0.389154851436615, "rewards/margins": 6.000330448150635, "rewards/rejected": -6.3894853591918945, "step": 746 }, { "epoch": 1.13, "learning_rate": 4.6950515720894655e-07, "logits/chosen": -1.2907040119171143, "logits/rejected": -1.2215681076049805, "logps/chosen": -69.718994140625, "logps/rejected": -184.017333984375, "loss": 0.0883, "rewards/accuracies": 1.0, "rewards/chosen": -0.23798534274101257, "rewards/margins": 10.50714111328125, "rewards/rejected": -10.745126724243164, "step": 747 }, { "epoch": 1.14, "learning_rate": 4.681810497872668e-07, "logits/chosen": -1.1115891933441162, "logits/rejected": -1.0429208278656006, "logps/chosen": -75.26403045654297, "logps/rejected": -183.2494659423828, "loss": 0.0792, "rewards/accuracies": 1.0, "rewards/chosen": -1.2758240699768066, "rewards/margins": 8.724857330322266, "rewards/rejected": -10.00068187713623, "step": 748 }, { "epoch": 1.14, "learning_rate": 4.6685716638308016e-07, "logits/chosen": -1.1416871547698975, "logits/rejected": -1.0443594455718994, "logps/chosen": -57.0560417175293, "logps/rejected": -134.08177185058594, "loss": 0.0655, "rewards/accuracies": 1.0, "rewards/chosen": -0.595421314239502, "rewards/margins": 6.1352925300598145, "rewards/rejected": -6.730713844299316, "step": 749 }, { "epoch": 1.14, "learning_rate": 4.6553351631702877e-07, "logits/chosen": -1.1689854860305786, "logits/rejected": -1.208797812461853, "logps/chosen": -77.22966766357422, "logps/rejected": -198.64471435546875, "loss": 0.074, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0628044605255127, "rewards/margins": 11.271867752075195, "rewards/rejected": -12.334672927856445, "step": 750 }, { "epoch": 1.14, "learning_rate": 4.6421010890811124e-07, "logits/chosen": -0.98302161693573, "logits/rejected": -1.1160837411880493, "logps/chosen": -66.82015991210938, "logps/rejected": -113.2428970336914, "loss": 0.0948, "rewards/accuracies": 1.0, "rewards/chosen": -0.4074597954750061, "rewards/margins": 5.847441673278809, "rewards/rejected": -6.25490140914917, "step": 751 }, { "epoch": 1.14, "learning_rate": 4.628869534736187e-07, "logits/chosen": -1.2641061544418335, "logits/rejected": -1.1559064388275146, "logps/chosen": -76.30941772460938, "logps/rejected": -177.64637756347656, "loss": 0.0812, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2699906826019287, "rewards/margins": 8.294047355651855, "rewards/rejected": -9.564037322998047, "step": 752 }, { "epoch": 1.14, "learning_rate": 4.615640593290679e-07, "logits/chosen": -1.1859060525894165, "logits/rejected": -1.150189995765686, "logps/chosen": -60.840484619140625, "logps/rejected": -160.75506591796875, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": -0.03157242760062218, "rewards/margins": 8.924668312072754, "rewards/rejected": -8.9562406539917, "step": 753 }, { "epoch": 1.15, "learning_rate": 4.6024143578813585e-07, "logits/chosen": -1.1408933401107788, "logits/rejected": -1.115440011024475, "logps/chosen": -47.346519470214844, "logps/rejected": -107.71475219726562, "loss": 0.1403, "rewards/accuracies": 0.875, "rewards/chosen": 0.033688440918922424, "rewards/margins": 5.437056541442871, "rewards/rejected": -5.40336799621582, "step": 754 }, { "epoch": 1.15, "learning_rate": 4.589190921625945e-07, "logits/chosen": -1.2576284408569336, "logits/rejected": -1.3243356943130493, "logps/chosen": -54.915672302246094, "logps/rejected": -117.61421203613281, "loss": 0.076, "rewards/accuracies": 0.9375, "rewards/chosen": 0.27700039744377136, "rewards/margins": 6.533145427703857, "rewards/rejected": -6.256145000457764, "step": 755 }, { "epoch": 1.15, "learning_rate": 4.5759703776224555e-07, "logits/chosen": -1.008704423904419, "logits/rejected": -1.0297294855117798, "logps/chosen": -71.18299865722656, "logps/rejected": -167.969970703125, "loss": 0.1322, "rewards/accuracies": 1.0, "rewards/chosen": -0.2875728905200958, "rewards/margins": 9.068160057067871, "rewards/rejected": -9.355732917785645, "step": 756 }, { "epoch": 1.15, "learning_rate": 4.562752818948535e-07, "logits/chosen": -1.204525351524353, "logits/rejected": -1.1385844945907593, "logps/chosen": -61.3125, "logps/rejected": -150.655029296875, "loss": 0.0791, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0254082679748535, "rewards/margins": 7.04811954498291, "rewards/rejected": -8.073528289794922, "step": 757 }, { "epoch": 1.15, "learning_rate": 4.549538338660819e-07, "logits/chosen": -1.015628695487976, "logits/rejected": -1.0417346954345703, "logps/chosen": -59.74385070800781, "logps/rejected": -128.64451599121094, "loss": 0.0606, "rewards/accuracies": 1.0, "rewards/chosen": -0.5621964335441589, "rewards/margins": 5.849483013153076, "rewards/rejected": -6.411679744720459, "step": 758 }, { "epoch": 1.15, "learning_rate": 4.536327029794266e-07, "logits/chosen": -1.0389518737792969, "logits/rejected": -0.9769172072410583, "logps/chosen": -52.239601135253906, "logps/rejected": -131.2997589111328, "loss": 0.0649, "rewards/accuracies": 1.0, "rewards/chosen": -0.1164703369140625, "rewards/margins": 6.758030891418457, "rewards/rejected": -6.874501705169678, "step": 759 }, { "epoch": 1.15, "learning_rate": 4.52311898536151e-07, "logits/chosen": -1.0630335807800293, "logits/rejected": -0.9963873028755188, "logps/chosen": -58.79426574707031, "logps/rejected": -149.952392578125, "loss": 0.0734, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3736463189125061, "rewards/margins": 7.867257595062256, "rewards/rejected": -8.240903854370117, "step": 760 }, { "epoch": 1.16, "learning_rate": 4.5099142983521963e-07, "logits/chosen": -1.0330792665481567, "logits/rejected": -1.0546962022781372, "logps/chosen": -52.70293045043945, "logps/rejected": -146.3146514892578, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": 0.17132572829723358, "rewards/margins": 8.526296615600586, "rewards/rejected": -8.354971885681152, "step": 761 }, { "epoch": 1.16, "learning_rate": 4.4967130617323396e-07, "logits/chosen": -0.9147458672523499, "logits/rejected": -0.6992746591567993, "logps/chosen": -74.70170593261719, "logps/rejected": -243.60031127929688, "loss": 0.0623, "rewards/accuracies": 1.0, "rewards/chosen": -0.19599175453186035, "rewards/margins": 13.283580780029297, "rewards/rejected": -13.479572296142578, "step": 762 }, { "epoch": 1.16, "learning_rate": 4.4835153684436567e-07, "logits/chosen": -1.1678838729858398, "logits/rejected": -1.1080782413482666, "logps/chosen": -64.56056213378906, "logps/rejected": -157.8324432373047, "loss": 0.0514, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7837637066841125, "rewards/margins": 7.835894584655762, "rewards/rejected": -8.619658470153809, "step": 763 }, { "epoch": 1.16, "learning_rate": 4.47032131140292e-07, "logits/chosen": -1.1793973445892334, "logits/rejected": -1.0700643062591553, "logps/chosen": -70.48400115966797, "logps/rejected": -186.6561279296875, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": -0.7886835932731628, "rewards/margins": 10.022167205810547, "rewards/rejected": -10.81085205078125, "step": 764 }, { "epoch": 1.16, "learning_rate": 4.4571309835013023e-07, "logits/chosen": -1.312821626663208, "logits/rejected": -1.2918648719787598, "logps/chosen": -52.059349060058594, "logps/rejected": -124.93989562988281, "loss": 0.077, "rewards/accuracies": 1.0, "rewards/chosen": -0.7141761779785156, "rewards/margins": 5.787374019622803, "rewards/rejected": -6.50154972076416, "step": 765 }, { "epoch": 1.16, "learning_rate": 4.4439444776037217e-07, "logits/chosen": -1.3349573612213135, "logits/rejected": -1.3212847709655762, "logps/chosen": -77.37397766113281, "logps/rejected": -165.1543731689453, "loss": 0.0884, "rewards/accuracies": 1.0, "rewards/chosen": -1.3379329442977905, "rewards/margins": 7.1123127937316895, "rewards/rejected": -8.450244903564453, "step": 766 }, { "epoch": 1.17, "learning_rate": 4.430761886548189e-07, "logits/chosen": -1.183845043182373, "logits/rejected": -1.145720362663269, "logps/chosen": -57.94763946533203, "logps/rejected": -123.90760803222656, "loss": 0.0495, "rewards/accuracies": 0.9375, "rewards/chosen": -0.35080718994140625, "rewards/margins": 6.037484645843506, "rewards/rejected": -6.388291835784912, "step": 767 }, { "epoch": 1.17, "learning_rate": 4.417583303145147e-07, "logits/chosen": -1.2359590530395508, "logits/rejected": -1.1837353706359863, "logps/chosen": -59.06725311279297, "logps/rejected": -127.3287582397461, "loss": 0.0818, "rewards/accuracies": 0.9375, "rewards/chosen": -0.2838151752948761, "rewards/margins": 6.447656631469727, "rewards/rejected": -6.731472015380859, "step": 768 }, { "epoch": 1.17, "learning_rate": 4.4044088201768305e-07, "logits/chosen": -1.1694906949996948, "logits/rejected": -1.2630912065505981, "logps/chosen": -59.07255935668945, "logps/rejected": -146.60903930664062, "loss": 0.0751, "rewards/accuracies": 1.0, "rewards/chosen": 0.7602745890617371, "rewards/margins": 8.458105087280273, "rewards/rejected": -7.6978302001953125, "step": 769 }, { "epoch": 1.17, "learning_rate": 4.391238530396605e-07, "logits/chosen": -1.2331902980804443, "logits/rejected": -1.1801191568374634, "logps/chosen": -63.41059112548828, "logps/rejected": -170.8983154296875, "loss": 0.0899, "rewards/accuracies": 0.9375, "rewards/chosen": 0.19194763898849487, "rewards/margins": 9.201603889465332, "rewards/rejected": -9.009657859802246, "step": 770 }, { "epoch": 1.17, "learning_rate": 4.378072526528307e-07, "logits/chosen": -1.1899259090423584, "logits/rejected": -1.2759406566619873, "logps/chosen": -55.15724563598633, "logps/rejected": -125.78352355957031, "loss": 0.0513, "rewards/accuracies": 1.0, "rewards/chosen": 0.8366152048110962, "rewards/margins": 8.044254302978516, "rewards/rejected": -7.207639694213867, "step": 771 }, { "epoch": 1.17, "learning_rate": 4.364910901265606e-07, "logits/chosen": -1.1296507120132446, "logits/rejected": -1.1952402591705322, "logps/chosen": -77.33753967285156, "logps/rejected": -154.226806640625, "loss": 0.0335, "rewards/accuracies": 0.9375, "rewards/chosen": -0.2563350200653076, "rewards/margins": 8.133346557617188, "rewards/rejected": -8.389680862426758, "step": 772 }, { "epoch": 1.17, "learning_rate": 4.351753747271345e-07, "logits/chosen": -1.1918877363204956, "logits/rejected": -1.2347781658172607, "logps/chosen": -68.38977813720703, "logps/rejected": -152.2249755859375, "loss": 0.1108, "rewards/accuracies": 1.0, "rewards/chosen": -0.38625311851501465, "rewards/margins": 8.195649147033691, "rewards/rejected": -8.581901550292969, "step": 773 }, { "epoch": 1.18, "learning_rate": 4.3386011571768793e-07, "logits/chosen": -1.0707042217254639, "logits/rejected": -1.1057032346725464, "logps/chosen": -69.99359893798828, "logps/rejected": -173.17208862304688, "loss": 0.0633, "rewards/accuracies": 1.0, "rewards/chosen": -0.719723641872406, "rewards/margins": 8.93809986114502, "rewards/rejected": -9.657824516296387, "step": 774 }, { "epoch": 1.18, "learning_rate": 4.3254532235814413e-07, "logits/chosen": -1.1420342922210693, "logits/rejected": -1.1160727739334106, "logps/chosen": -69.51921081542969, "logps/rejected": -153.108154296875, "loss": 0.061, "rewards/accuracies": 1.0, "rewards/chosen": -0.3570714294910431, "rewards/margins": 7.29402494430542, "rewards/rejected": -7.651096343994141, "step": 775 }, { "epoch": 1.18, "learning_rate": 4.312310039051476e-07, "logits/chosen": -1.095452070236206, "logits/rejected": -1.0368789434432983, "logps/chosen": -83.53199768066406, "logps/rejected": -195.30474853515625, "loss": 0.1013, "rewards/accuracies": 1.0, "rewards/chosen": -1.0821952819824219, "rewards/margins": 9.629199981689453, "rewards/rejected": -10.711397171020508, "step": 776 }, { "epoch": 1.18, "learning_rate": 4.2991716961199944e-07, "logits/chosen": -0.8502026796340942, "logits/rejected": -0.7925280928611755, "logps/chosen": -79.38339233398438, "logps/rejected": -211.51736450195312, "loss": 0.0997, "rewards/accuracies": 1.0, "rewards/chosen": -0.30591297149658203, "rewards/margins": 11.01159954071045, "rewards/rejected": -11.317513465881348, "step": 777 }, { "epoch": 1.18, "learning_rate": 4.2860382872859183e-07, "logits/chosen": -1.3151302337646484, "logits/rejected": -1.2342097759246826, "logps/chosen": -58.19136047363281, "logps/rejected": -159.36239624023438, "loss": 0.0546, "rewards/accuracies": 1.0, "rewards/chosen": 0.2004951536655426, "rewards/margins": 8.885076522827148, "rewards/rejected": -8.684579849243164, "step": 778 }, { "epoch": 1.18, "learning_rate": 4.2729099050134356e-07, "logits/chosen": -1.213114857673645, "logits/rejected": -1.2209107875823975, "logps/chosen": -74.55901336669922, "logps/rejected": -196.17433166503906, "loss": 0.0438, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4468156695365906, "rewards/margins": 9.722015380859375, "rewards/rejected": -10.168830871582031, "step": 779 }, { "epoch": 1.18, "learning_rate": 4.259786641731343e-07, "logits/chosen": -1.1325987577438354, "logits/rejected": -1.2110825777053833, "logps/chosen": -73.87116241455078, "logps/rejected": -161.33755493164062, "loss": 0.0677, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8084357976913452, "rewards/margins": 8.306604385375977, "rewards/rejected": -9.115039825439453, "step": 780 }, { "epoch": 1.19, "learning_rate": 4.246668589832396e-07, "logits/chosen": -1.125104546546936, "logits/rejected": -1.0887819528579712, "logps/chosen": -59.11391067504883, "logps/rejected": -134.1604766845703, "loss": 0.0513, "rewards/accuracies": 1.0, "rewards/chosen": 0.044446349143981934, "rewards/margins": 6.709604740142822, "rewards/rejected": -6.665159225463867, "step": 781 }, { "epoch": 1.19, "learning_rate": 4.2335558416726627e-07, "logits/chosen": -1.0693738460540771, "logits/rejected": -1.0744346380233765, "logps/chosen": -52.576473236083984, "logps/rejected": -119.21489715576172, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": 0.3390309512615204, "rewards/margins": 6.409571170806885, "rewards/rejected": -6.070540428161621, "step": 782 }, { "epoch": 1.19, "learning_rate": 4.2204484895708714e-07, "logits/chosen": -1.290875792503357, "logits/rejected": -1.282123327255249, "logps/chosen": -56.516658782958984, "logps/rejected": -121.05769348144531, "loss": 0.1121, "rewards/accuracies": 0.875, "rewards/chosen": -0.08843201398849487, "rewards/margins": 6.462881565093994, "rewards/rejected": -6.551313400268555, "step": 783 }, { "epoch": 1.19, "learning_rate": 4.2073466258077556e-07, "logits/chosen": -1.4439988136291504, "logits/rejected": -1.4561644792556763, "logps/chosen": -73.92353820800781, "logps/rejected": -142.3273162841797, "loss": 0.1049, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6018531918525696, "rewards/margins": 6.388706207275391, "rewards/rejected": -6.9905595779418945, "step": 784 }, { "epoch": 1.19, "learning_rate": 4.194250342625413e-07, "logits/chosen": -1.2636879682540894, "logits/rejected": -1.0962374210357666, "logps/chosen": -57.09210205078125, "logps/rejected": -185.80169677734375, "loss": 0.0585, "rewards/accuracies": 0.9375, "rewards/chosen": 0.26259636878967285, "rewards/margins": 10.314652442932129, "rewards/rejected": -10.052056312561035, "step": 785 }, { "epoch": 1.19, "learning_rate": 4.18115973222665e-07, "logits/chosen": -1.1007850170135498, "logits/rejected": -0.9807206392288208, "logps/chosen": -54.816993713378906, "logps/rejected": -158.8098907470703, "loss": 0.1046, "rewards/accuracies": 1.0, "rewards/chosen": -0.10611987113952637, "rewards/margins": 8.275077819824219, "rewards/rejected": -8.381197929382324, "step": 786 }, { "epoch": 1.2, "learning_rate": 4.1680748867743385e-07, "logits/chosen": -1.102643609046936, "logits/rejected": -1.0911848545074463, "logps/chosen": -53.98735809326172, "logps/rejected": -147.85398864746094, "loss": 0.0512, "rewards/accuracies": 1.0, "rewards/chosen": 0.2515929639339447, "rewards/margins": 8.081626892089844, "rewards/rejected": -7.830034255981445, "step": 787 }, { "epoch": 1.2, "learning_rate": 4.154995898390755e-07, "logits/chosen": -1.157513976097107, "logits/rejected": -1.1232832670211792, "logps/chosen": -61.06407928466797, "logps/rejected": -150.46083068847656, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/chosen": -0.15333883464336395, "rewards/margins": 7.626152038574219, "rewards/rejected": -7.779490947723389, "step": 788 }, { "epoch": 1.2, "learning_rate": 4.1419228591569466e-07, "logits/chosen": -1.1882693767547607, "logits/rejected": -1.1941783428192139, "logps/chosen": -80.35359191894531, "logps/rejected": -202.92909240722656, "loss": 0.0402, "rewards/accuracies": 1.0, "rewards/chosen": -0.3616008162498474, "rewards/margins": 11.065812110900879, "rewards/rejected": -11.427413940429688, "step": 789 }, { "epoch": 1.2, "learning_rate": 4.1288558611120755e-07, "logits/chosen": -1.2141138315200806, "logits/rejected": -1.2202541828155518, "logps/chosen": -78.9452133178711, "logps/rejected": -175.32675170898438, "loss": 0.0625, "rewards/accuracies": 1.0, "rewards/chosen": 0.28721117973327637, "rewards/margins": 9.105508804321289, "rewards/rejected": -8.818297386169434, "step": 790 }, { "epoch": 1.2, "learning_rate": 4.115794996252768e-07, "logits/chosen": -1.2565443515777588, "logits/rejected": -1.2638403177261353, "logps/chosen": -42.99260711669922, "logps/rejected": -106.03839874267578, "loss": 0.035, "rewards/accuracies": 1.0, "rewards/chosen": 0.6804541349411011, "rewards/margins": 5.897651672363281, "rewards/rejected": -5.217196941375732, "step": 791 }, { "epoch": 1.2, "learning_rate": 4.102740356532473e-07, "logits/chosen": -1.3011318445205688, "logits/rejected": -1.3293147087097168, "logps/chosen": -63.77526092529297, "logps/rejected": -138.43740844726562, "loss": 0.0363, "rewards/accuracies": 1.0, "rewards/chosen": 0.5142470598220825, "rewards/margins": 7.463236331939697, "rewards/rejected": -6.948988914489746, "step": 792 }, { "epoch": 1.2, "learning_rate": 4.089692033860815e-07, "logits/chosen": -0.9690642356872559, "logits/rejected": -0.9215153455734253, "logps/chosen": -64.09129333496094, "logps/rejected": -166.85317993164062, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": -0.1762584149837494, "rewards/margins": 8.921332359313965, "rewards/rejected": -9.097590446472168, "step": 793 }, { "epoch": 1.21, "learning_rate": 4.0766501201029363e-07, "logits/chosen": -1.0228105783462524, "logits/rejected": -0.9508757591247559, "logps/chosen": -81.23611450195312, "logps/rejected": -198.60714721679688, "loss": 0.0862, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1831510066986084, "rewards/margins": 9.553539276123047, "rewards/rejected": -10.736690521240234, "step": 794 }, { "epoch": 1.21, "learning_rate": 4.0636147070788643e-07, "logits/chosen": -1.290502667427063, "logits/rejected": -1.2714859247207642, "logps/chosen": -56.200138092041016, "logps/rejected": -134.22470092773438, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": -0.44425803422927856, "rewards/margins": 6.808338165283203, "rewards/rejected": -7.252595901489258, "step": 795 }, { "epoch": 1.21, "learning_rate": 4.0505858865628575e-07, "logits/chosen": -1.2448666095733643, "logits/rejected": -1.2700674533843994, "logps/chosen": -70.80340576171875, "logps/rejected": -155.4434051513672, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/chosen": -0.6390804052352905, "rewards/margins": 7.447866916656494, "rewards/rejected": -8.086947441101074, "step": 796 }, { "epoch": 1.21, "learning_rate": 4.0375637502827617e-07, "logits/chosen": -1.1842539310455322, "logits/rejected": -1.098061442375183, "logps/chosen": -56.274715423583984, "logps/rejected": -158.17466735839844, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": 0.02078975737094879, "rewards/margins": 9.005572319030762, "rewards/rejected": -8.984783172607422, "step": 797 }, { "epoch": 1.21, "learning_rate": 4.0245483899193586e-07, "logits/chosen": -1.2747535705566406, "logits/rejected": -1.1594761610031128, "logps/chosen": -74.98949432373047, "logps/rejected": -149.13185119628906, "loss": 0.0583, "rewards/accuracies": 1.0, "rewards/chosen": -1.5290836095809937, "rewards/margins": 6.066576957702637, "rewards/rejected": -7.595660209655762, "step": 798 }, { "epoch": 1.21, "learning_rate": 4.011539897105729e-07, "logits/chosen": -1.3800020217895508, "logits/rejected": -1.3897123336791992, "logps/chosen": -66.18260192871094, "logps/rejected": -163.44715881347656, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": -0.5650410652160645, "rewards/margins": 8.63727855682373, "rewards/rejected": -9.202320098876953, "step": 799 }, { "epoch": 1.22, "learning_rate": 3.9985383634266047e-07, "logits/chosen": -1.1964800357818604, "logits/rejected": -1.191982626914978, "logps/chosen": -55.670166015625, "logps/rejected": -126.46448516845703, "loss": 0.0501, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5088732838630676, "rewards/margins": 6.745451927185059, "rewards/rejected": -7.254324436187744, "step": 800 }, { "epoch": 1.22, "learning_rate": 3.985543880417716e-07, "logits/chosen": -1.2433134317398071, "logits/rejected": -1.143684983253479, "logps/chosen": -52.155784606933594, "logps/rejected": -148.7881317138672, "loss": 0.0432, "rewards/accuracies": 1.0, "rewards/chosen": -0.35071849822998047, "rewards/margins": 7.880721569061279, "rewards/rejected": -8.231439590454102, "step": 801 }, { "epoch": 1.22, "learning_rate": 3.9725565395651604e-07, "logits/chosen": -1.1196131706237793, "logits/rejected": -1.0766024589538574, "logps/chosen": -71.76590728759766, "logps/rejected": -156.6300811767578, "loss": 0.0427, "rewards/accuracies": 1.0, "rewards/chosen": -1.136284589767456, "rewards/margins": 7.6406779289245605, "rewards/rejected": -8.776963233947754, "step": 802 }, { "epoch": 1.22, "learning_rate": 3.9595764323047494e-07, "logits/chosen": -1.2914059162139893, "logits/rejected": -1.2229104042053223, "logps/chosen": -91.37772369384766, "logps/rejected": -204.822021484375, "loss": 0.0845, "rewards/accuracies": 0.9375, "rewards/chosen": -1.874307632446289, "rewards/margins": 9.642812728881836, "rewards/rejected": -11.517120361328125, "step": 803 }, { "epoch": 1.22, "learning_rate": 3.94660365002137e-07, "logits/chosen": -1.113565444946289, "logits/rejected": -1.1023532152175903, "logps/chosen": -63.81866455078125, "logps/rejected": -147.45748901367188, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": -0.1928701400756836, "rewards/margins": 8.079753875732422, "rewards/rejected": -8.272624015808105, "step": 804 }, { "epoch": 1.22, "learning_rate": 3.933638284048331e-07, "logits/chosen": -1.329550862312317, "logits/rejected": -1.4320032596588135, "logps/chosen": -67.80931854248047, "logps/rejected": -131.1941375732422, "loss": 0.0468, "rewards/accuracies": 1.0, "rewards/chosen": -0.4901169240474701, "rewards/margins": 6.499993801116943, "rewards/rejected": -6.990111351013184, "step": 805 }, { "epoch": 1.22, "learning_rate": 3.920680425666735e-07, "logits/chosen": -1.1646313667297363, "logits/rejected": -1.1623016595840454, "logps/chosen": -63.63052749633789, "logps/rejected": -144.2456512451172, "loss": 0.0449, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0916926860809326, "rewards/margins": 6.8504252433776855, "rewards/rejected": -7.942118167877197, "step": 806 }, { "epoch": 1.23, "learning_rate": 3.907730166104827e-07, "logits/chosen": -1.2554950714111328, "logits/rejected": -1.1933375597000122, "logps/chosen": -79.49732971191406, "logps/rejected": -189.63839721679688, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -0.5523549318313599, "rewards/margins": 10.084354400634766, "rewards/rejected": -10.636709213256836, "step": 807 }, { "epoch": 1.23, "learning_rate": 3.894787596537351e-07, "logits/chosen": -1.1525908708572388, "logits/rejected": -1.1488728523254395, "logps/chosen": -59.99504852294922, "logps/rejected": -141.78363037109375, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": -0.23121660947799683, "rewards/margins": 7.967068672180176, "rewards/rejected": -8.198285102844238, "step": 808 }, { "epoch": 1.23, "learning_rate": 3.881852808084912e-07, "logits/chosen": -1.095705270767212, "logits/rejected": -0.9978544116020203, "logps/chosen": -59.360530853271484, "logps/rejected": -158.34080505371094, "loss": 0.0475, "rewards/accuracies": 1.0, "rewards/chosen": -1.3611246347427368, "rewards/margins": 7.7559919357299805, "rewards/rejected": -9.117116928100586, "step": 809 }, { "epoch": 1.23, "learning_rate": 3.868925891813335e-07, "logits/chosen": -1.2054696083068848, "logits/rejected": -1.1077433824539185, "logps/chosen": -70.31130981445312, "logps/rejected": -193.58937072753906, "loss": 0.0494, "rewards/accuracies": 1.0, "rewards/chosen": -1.4210721254348755, "rewards/margins": 9.549147605895996, "rewards/rejected": -10.970220565795898, "step": 810 }, { "epoch": 1.23, "learning_rate": 3.856006938733016e-07, "logits/chosen": -1.473289966583252, "logits/rejected": -1.4685642719268799, "logps/chosen": -50.909339904785156, "logps/rejected": -128.1400909423828, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": 0.2504890561103821, "rewards/margins": 7.203729152679443, "rewards/rejected": -6.953239440917969, "step": 811 }, { "epoch": 1.23, "learning_rate": 3.8430960397982926e-07, "logits/chosen": -1.1097276210784912, "logits/rejected": -1.0613540410995483, "logps/chosen": -91.93956756591797, "logps/rejected": -215.58001708984375, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.9927880764007568, "rewards/margins": 11.322046279907227, "rewards/rejected": -12.314833641052246, "step": 812 }, { "epoch": 1.24, "learning_rate": 3.830193285906796e-07, "logits/chosen": -1.081833839416504, "logits/rejected": -1.1834239959716797, "logps/chosen": -47.86272048950195, "logps/rejected": -114.7242431640625, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": -0.044774383306503296, "rewards/margins": 6.425144672393799, "rewards/rejected": -6.469919204711914, "step": 813 }, { "epoch": 1.24, "learning_rate": 3.817298767898816e-07, "logits/chosen": -1.0170581340789795, "logits/rejected": -0.9848339557647705, "logps/chosen": -74.92778015136719, "logps/rejected": -209.4849853515625, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -0.8389643430709839, "rewards/margins": 11.954238891601562, "rewards/rejected": -12.793205261230469, "step": 814 }, { "epoch": 1.24, "learning_rate": 3.804412576556652e-07, "logits/chosen": -1.427770972251892, "logits/rejected": -1.3738195896148682, "logps/chosen": -62.669166564941406, "logps/rejected": -155.71170043945312, "loss": 0.0534, "rewards/accuracies": 1.0, "rewards/chosen": -0.2841857075691223, "rewards/margins": 8.41563606262207, "rewards/rejected": -8.699822425842285, "step": 815 }, { "epoch": 1.24, "learning_rate": 3.791534802603987e-07, "logits/chosen": -1.2190669775009155, "logits/rejected": -1.1652584075927734, "logps/chosen": -52.3328971862793, "logps/rejected": -121.18106079101562, "loss": 0.0304, "rewards/accuracies": 1.0, "rewards/chosen": -0.26607978343963623, "rewards/margins": 6.681756019592285, "rewards/rejected": -6.947835445404053, "step": 816 }, { "epoch": 1.24, "learning_rate": 3.778665536705242e-07, "logits/chosen": -1.4151263236999512, "logits/rejected": -1.3526246547698975, "logps/chosen": -58.03881072998047, "logps/rejected": -131.90274047851562, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -0.752575159072876, "rewards/margins": 6.559599876403809, "rewards/rejected": -7.312175750732422, "step": 817 }, { "epoch": 1.24, "learning_rate": 3.765804869464932e-07, "logits/chosen": -1.149936318397522, "logits/rejected": -1.1367430686950684, "logps/chosen": -75.39720916748047, "logps/rejected": -209.57176208496094, "loss": 0.0389, "rewards/accuracies": 1.0, "rewards/chosen": -1.0461437702178955, "rewards/margins": 11.42244815826416, "rewards/rejected": -12.468591690063477, "step": 818 }, { "epoch": 1.24, "learning_rate": 3.75295289142704e-07, "logits/chosen": -0.8759934902191162, "logits/rejected": -0.82037353515625, "logps/chosen": -57.84587860107422, "logps/rejected": -180.59344482421875, "loss": 0.0417, "rewards/accuracies": 1.0, "rewards/chosen": -0.02280382812023163, "rewards/margins": 10.916155815124512, "rewards/rejected": -10.938959121704102, "step": 819 }, { "epoch": 1.25, "learning_rate": 3.7401096930743746e-07, "logits/chosen": -1.0861873626708984, "logits/rejected": -1.0199942588806152, "logps/chosen": -77.1715316772461, "logps/rejected": -179.63955688476562, "loss": 0.0446, "rewards/accuracies": 1.0, "rewards/chosen": -0.759091854095459, "rewards/margins": 9.110944747924805, "rewards/rejected": -9.870037078857422, "step": 820 }, { "epoch": 1.25, "learning_rate": 3.727275364827926e-07, "logits/chosen": -1.1666381359100342, "logits/rejected": -1.1186622381210327, "logps/chosen": -66.40373229980469, "logps/rejected": -203.10476684570312, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -0.6987287402153015, "rewards/margins": 11.257447242736816, "rewards/rejected": -11.9561767578125, "step": 821 }, { "epoch": 1.25, "learning_rate": 3.714449997046241e-07, "logits/chosen": -1.1445003747940063, "logits/rejected": -1.2067350149154663, "logps/chosen": -82.43601989746094, "logps/rejected": -173.05908203125, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -1.9472718238830566, "rewards/margins": 9.079296112060547, "rewards/rejected": -11.026567459106445, "step": 822 }, { "epoch": 1.25, "learning_rate": 3.7016336800247775e-07, "logits/chosen": -1.030447244644165, "logits/rejected": -0.9534213542938232, "logps/chosen": -78.53152465820312, "logps/rejected": -185.37857055664062, "loss": 0.0293, "rewards/accuracies": 1.0, "rewards/chosen": -1.0761345624923706, "rewards/margins": 10.263667106628418, "rewards/rejected": -11.339801788330078, "step": 823 }, { "epoch": 1.25, "learning_rate": 3.6888265039952795e-07, "logits/chosen": -1.092881202697754, "logits/rejected": -0.9708372950553894, "logps/chosen": -63.030845642089844, "logps/rejected": -187.64028930664062, "loss": 0.059, "rewards/accuracies": 0.875, "rewards/chosen": -0.619552493095398, "rewards/margins": 10.110761642456055, "rewards/rejected": -10.730313301086426, "step": 824 }, { "epoch": 1.25, "learning_rate": 3.6760285591251226e-07, "logits/chosen": -0.9170098900794983, "logits/rejected": -0.921739399433136, "logps/chosen": -59.816246032714844, "logps/rejected": -146.72691345214844, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": -0.2829318344593048, "rewards/margins": 8.726126670837402, "rewards/rejected": -9.009058952331543, "step": 825 }, { "epoch": 1.25, "learning_rate": 3.663239935516704e-07, "logits/chosen": -1.335066318511963, "logits/rejected": -1.2408074140548706, "logps/chosen": -78.4832992553711, "logps/rejected": -193.43527221679688, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -1.1176406145095825, "rewards/margins": 8.863214492797852, "rewards/rejected": -9.980854034423828, "step": 826 }, { "epoch": 1.26, "learning_rate": 3.650460723206791e-07, "logits/chosen": -1.0009205341339111, "logits/rejected": -0.9793943762779236, "logps/chosen": -64.65348052978516, "logps/rejected": -153.94056701660156, "loss": 0.0406, "rewards/accuracies": 1.0, "rewards/chosen": -1.0533581972122192, "rewards/margins": 8.320476531982422, "rewards/rejected": -9.373835563659668, "step": 827 }, { "epoch": 1.26, "learning_rate": 3.637691012165886e-07, "logits/chosen": -1.1569173336029053, "logits/rejected": -1.1553266048431396, "logps/chosen": -69.02538299560547, "logps/rejected": -170.2616424560547, "loss": 0.0614, "rewards/accuracies": 1.0, "rewards/chosen": -0.2905741333961487, "rewards/margins": 10.302106857299805, "rewards/rejected": -10.592680931091309, "step": 828 }, { "epoch": 1.26, "learning_rate": 3.6249308922976086e-07, "logits/chosen": -1.0629199743270874, "logits/rejected": -1.1610177755355835, "logps/chosen": -64.80350494384766, "logps/rejected": -153.3122100830078, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": -0.4723438024520874, "rewards/margins": 8.992136001586914, "rewards/rejected": -9.464479446411133, "step": 829 }, { "epoch": 1.26, "learning_rate": 3.6121804534380496e-07, "logits/chosen": -0.9190477132797241, "logits/rejected": -0.8003554940223694, "logps/chosen": -71.7896957397461, "logps/rejected": -185.8133544921875, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": -1.156874656677246, "rewards/margins": 9.655036926269531, "rewards/rejected": -10.811910629272461, "step": 830 }, { "epoch": 1.26, "learning_rate": 3.5994397853551356e-07, "logits/chosen": -1.0027974843978882, "logits/rejected": -0.952623724937439, "logps/chosen": -83.22335815429688, "logps/rejected": -215.75918579101562, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -1.064950704574585, "rewards/margins": 11.617621421813965, "rewards/rejected": -12.682572364807129, "step": 831 }, { "epoch": 1.26, "learning_rate": 3.586708977748012e-07, "logits/chosen": -1.0339128971099854, "logits/rejected": -1.0624173879623413, "logps/chosen": -60.685386657714844, "logps/rejected": -165.6641082763672, "loss": 0.0643, "rewards/accuracies": 1.0, "rewards/chosen": -0.003927305340766907, "rewards/margins": 9.649250030517578, "rewards/rejected": -9.653176307678223, "step": 832 }, { "epoch": 1.27, "learning_rate": 3.5739881202463975e-07, "logits/chosen": -0.8943543434143066, "logits/rejected": -0.9420751333236694, "logps/chosen": -72.37063598632812, "logps/rejected": -166.33132934570312, "loss": 0.0425, "rewards/accuracies": 1.0, "rewards/chosen": -1.4235678911209106, "rewards/margins": 8.860400199890137, "rewards/rejected": -10.28396987915039, "step": 833 }, { "epoch": 1.27, "learning_rate": 3.561277302409962e-07, "logits/chosen": -1.1255451440811157, "logits/rejected": -1.038440465927124, "logps/chosen": -81.85516357421875, "logps/rejected": -203.66510009765625, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": -2.0208683013916016, "rewards/margins": 10.475278854370117, "rewards/rejected": -12.496146202087402, "step": 834 }, { "epoch": 1.27, "learning_rate": 3.548576613727689e-07, "logits/chosen": -1.1847028732299805, "logits/rejected": -1.1377449035644531, "logps/chosen": -71.52493286132812, "logps/rejected": -189.09942626953125, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -0.7599396705627441, "rewards/margins": 10.621832847595215, "rewards/rejected": -11.3817720413208, "step": 835 }, { "epoch": 1.27, "learning_rate": 3.535886143617248e-07, "logits/chosen": -1.2053256034851074, "logits/rejected": -1.1074981689453125, "logps/chosen": -64.15154266357422, "logps/rejected": -173.67425537109375, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -0.6199431419372559, "rewards/margins": 9.468947410583496, "rewards/rejected": -10.08889102935791, "step": 836 }, { "epoch": 1.27, "learning_rate": 3.5232059814243713e-07, "logits/chosen": -1.1888346672058105, "logits/rejected": -1.097103476524353, "logps/chosen": -55.30160140991211, "logps/rejected": -211.38125610351562, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": 0.3952859938144684, "rewards/margins": 12.751258850097656, "rewards/rejected": -12.355973243713379, "step": 837 }, { "epoch": 1.27, "learning_rate": 3.510536216422213e-07, "logits/chosen": -1.2884656190872192, "logits/rejected": -1.2741146087646484, "logps/chosen": -62.34508514404297, "logps/rejected": -171.33905029296875, "loss": 0.0475, "rewards/accuracies": 1.0, "rewards/chosen": -0.272865891456604, "rewards/margins": 9.452814102172852, "rewards/rejected": -9.725680351257324, "step": 838 }, { "epoch": 1.27, "learning_rate": 3.497876937810732e-07, "logits/chosen": -1.2582658529281616, "logits/rejected": -1.2361618280410767, "logps/chosen": -70.01296997070312, "logps/rejected": -195.9732666015625, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": -0.9307251572608948, "rewards/margins": 11.011921882629395, "rewards/rejected": -11.942646026611328, "step": 839 }, { "epoch": 1.28, "learning_rate": 3.485228234716058e-07, "logits/chosen": -1.075059175491333, "logits/rejected": -1.038475513458252, "logps/chosen": -72.45191955566406, "logps/rejected": -187.55517578125, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6160510778427124, "rewards/margins": 9.863422393798828, "rewards/rejected": -10.479473114013672, "step": 840 }, { "epoch": 1.28, "learning_rate": 3.472590196189864e-07, "logits/chosen": -1.0447841882705688, "logits/rejected": -1.0268189907073975, "logps/chosen": -75.5906753540039, "logps/rejected": -186.53822326660156, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -0.042884960770606995, "rewards/margins": 10.343118667602539, "rewards/rejected": -10.386002540588379, "step": 841 }, { "epoch": 1.28, "learning_rate": 3.459962911208738e-07, "logits/chosen": -1.0275884866714478, "logits/rejected": -0.9557711482048035, "logps/chosen": -76.58280181884766, "logps/rejected": -199.26832580566406, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -1.4326503276824951, "rewards/margins": 11.00889778137207, "rewards/rejected": -12.441548347473145, "step": 842 }, { "epoch": 1.28, "learning_rate": 3.447346468673563e-07, "logits/chosen": -1.1951040029525757, "logits/rejected": -1.0923504829406738, "logps/chosen": -84.25811004638672, "logps/rejected": -200.55804443359375, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -1.4290473461151123, "rewards/margins": 9.567483901977539, "rewards/rejected": -10.99653148651123, "step": 843 }, { "epoch": 1.28, "learning_rate": 3.4347409574088894e-07, "logits/chosen": -0.8634222149848938, "logits/rejected": -0.7933879494667053, "logps/chosen": -41.86826705932617, "logps/rejected": -135.14114379882812, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -0.08744090050458908, "rewards/margins": 8.024269104003906, "rewards/rejected": -8.111709594726562, "step": 844 }, { "epoch": 1.28, "learning_rate": 3.4221464661622977e-07, "logits/chosen": -1.0899560451507568, "logits/rejected": -1.0018041133880615, "logps/chosen": -67.12272644042969, "logps/rejected": -200.95716857910156, "loss": 0.0339, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1569205522537231, "rewards/margins": 11.434322357177734, "rewards/rejected": -12.591243743896484, "step": 845 }, { "epoch": 1.29, "learning_rate": 3.409563083603793e-07, "logits/chosen": -1.3234093189239502, "logits/rejected": -1.3561903238296509, "logps/chosen": -81.11517333984375, "logps/rejected": -161.14039611816406, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -1.16130530834198, "rewards/margins": 7.4840898513793945, "rewards/rejected": -8.645394325256348, "step": 846 }, { "epoch": 1.29, "learning_rate": 3.396990898325166e-07, "logits/chosen": -1.1908986568450928, "logits/rejected": -1.0249378681182861, "logps/chosen": -75.77396392822266, "logps/rejected": -279.65606689453125, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -0.8964182734489441, "rewards/margins": 15.995891571044922, "rewards/rejected": -16.892311096191406, "step": 847 }, { "epoch": 1.29, "learning_rate": 3.384429998839375e-07, "logits/chosen": -1.1977099180221558, "logits/rejected": -1.2463692426681519, "logps/chosen": -65.84039306640625, "logps/rejected": -143.89614868164062, "loss": 0.0359, "rewards/accuracies": 1.0, "rewards/chosen": -0.6831881999969482, "rewards/margins": 8.220685958862305, "rewards/rejected": -8.903874397277832, "step": 848 }, { "epoch": 1.29, "learning_rate": 3.3718804735799225e-07, "logits/chosen": -1.318107008934021, "logits/rejected": -1.2315919399261475, "logps/chosen": -71.2174072265625, "logps/rejected": -191.1319122314453, "loss": 0.0429, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7054893970489502, "rewards/margins": 10.151110649108887, "rewards/rejected": -10.856599807739258, "step": 849 }, { "epoch": 1.29, "learning_rate": 3.3593424109002335e-07, "logits/chosen": -1.2710603475570679, "logits/rejected": -1.2152063846588135, "logps/chosen": -61.5471076965332, "logps/rejected": -159.23065185546875, "loss": 0.0321, "rewards/accuracies": 0.9375, "rewards/chosen": -0.08005739003419876, "rewards/margins": 8.800928115844727, "rewards/rejected": -8.880986213684082, "step": 850 }, { "epoch": 1.29, "learning_rate": 3.34681589907303e-07, "logits/chosen": -1.0689845085144043, "logits/rejected": -1.0591791868209839, "logps/chosen": -65.96229553222656, "logps/rejected": -159.93722534179688, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -0.3127668499946594, "rewards/margins": 8.901169776916504, "rewards/rejected": -9.213937759399414, "step": 851 }, { "epoch": 1.29, "learning_rate": 3.334301026289712e-07, "logits/chosen": -1.0889052152633667, "logits/rejected": -1.119452714920044, "logps/chosen": -85.632568359375, "logps/rejected": -188.4246063232422, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -1.2408623695373535, "rewards/margins": 9.83039665222168, "rewards/rejected": -11.071259498596191, "step": 852 }, { "epoch": 1.3, "learning_rate": 3.321797880659737e-07, "logits/chosen": -1.0406701564788818, "logits/rejected": -1.022117257118225, "logps/chosen": -59.495052337646484, "logps/rejected": -135.61944580078125, "loss": 0.051, "rewards/accuracies": 1.0, "rewards/chosen": -0.4191983640193939, "rewards/margins": 7.46809196472168, "rewards/rejected": -7.887290954589844, "step": 853 }, { "epoch": 1.3, "learning_rate": 3.309306550209999e-07, "logits/chosen": -1.2750275135040283, "logits/rejected": -1.2143597602844238, "logps/chosen": -62.65305709838867, "logps/rejected": -146.83177185058594, "loss": 0.0551, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5164964199066162, "rewards/margins": 7.442717552185059, "rewards/rejected": -7.959214210510254, "step": 854 }, { "epoch": 1.3, "learning_rate": 3.296827122884207e-07, "logits/chosen": -1.1609838008880615, "logits/rejected": -1.0337468385696411, "logps/chosen": -98.77885437011719, "logps/rejected": -226.49143981933594, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/chosen": -1.982264518737793, "rewards/margins": 11.136384963989258, "rewards/rejected": -13.118648529052734, "step": 855 }, { "epoch": 1.3, "learning_rate": 3.2843596865422684e-07, "logits/chosen": -1.2014923095703125, "logits/rejected": -1.1845405101776123, "logps/chosen": -52.282691955566406, "logps/rejected": -130.2438201904297, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": -0.42395687103271484, "rewards/margins": 7.3297038078308105, "rewards/rejected": -7.753661155700684, "step": 856 }, { "epoch": 1.3, "learning_rate": 3.271904328959672e-07, "logits/chosen": -1.0705763101577759, "logits/rejected": -1.0866658687591553, "logps/chosen": -82.63775634765625, "logps/rejected": -174.16571044921875, "loss": 0.0353, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7364760637283325, "rewards/margins": 8.609589576721191, "rewards/rejected": -10.346065521240234, "step": 857 }, { "epoch": 1.3, "learning_rate": 3.2594611378268614e-07, "logits/chosen": -1.2007231712341309, "logits/rejected": -1.2071821689605713, "logps/chosen": -67.26966094970703, "logps/rejected": -188.78158569335938, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.009529948234558, "rewards/margins": 10.658390045166016, "rewards/rejected": -11.667919158935547, "step": 858 }, { "epoch": 1.3, "learning_rate": 3.2470302007486303e-07, "logits/chosen": -1.2172272205352783, "logits/rejected": -1.1473592519760132, "logps/chosen": -58.447723388671875, "logps/rejected": -184.71939086914062, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": -0.5823120474815369, "rewards/margins": 10.512475967407227, "rewards/rejected": -11.094788551330566, "step": 859 }, { "epoch": 1.31, "learning_rate": 3.234611605243496e-07, "logits/chosen": -1.0577294826507568, "logits/rejected": -1.0574690103530884, "logps/chosen": -66.60233306884766, "logps/rejected": -186.69564819335938, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -0.5992780327796936, "rewards/margins": 10.619293212890625, "rewards/rejected": -11.218571662902832, "step": 860 }, { "epoch": 1.31, "learning_rate": 3.222205438743089e-07, "logits/chosen": -0.9650871753692627, "logits/rejected": -0.9335703253746033, "logps/chosen": -94.79300689697266, "logps/rejected": -228.11412048339844, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -2.3178277015686035, "rewards/margins": 12.129016876220703, "rewards/rejected": -14.446845054626465, "step": 861 }, { "epoch": 1.31, "learning_rate": 3.2098117885915276e-07, "logits/chosen": -1.1251049041748047, "logits/rejected": -1.097298264503479, "logps/chosen": -62.1351318359375, "logps/rejected": -166.1202392578125, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": -0.5515062212944031, "rewards/margins": 9.523969650268555, "rewards/rejected": -10.075475692749023, "step": 862 }, { "epoch": 1.31, "learning_rate": 3.1974307420448197e-07, "logits/chosen": -1.0989717245101929, "logits/rejected": -1.1117409467697144, "logps/chosen": -63.93228530883789, "logps/rejected": -137.04348754882812, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.0571870803833008, "rewards/margins": 7.001034736633301, "rewards/rejected": -8.058222770690918, "step": 863 }, { "epoch": 1.31, "learning_rate": 3.185062386270234e-07, "logits/chosen": -1.2424949407577515, "logits/rejected": -1.1999640464782715, "logps/chosen": -70.60078430175781, "logps/rejected": -175.35520935058594, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": -0.8002070188522339, "rewards/margins": 9.057512283325195, "rewards/rejected": -9.857719421386719, "step": 864 }, { "epoch": 1.31, "learning_rate": 3.172706808345692e-07, "logits/chosen": -1.1185640096664429, "logits/rejected": -1.1533507108688354, "logps/chosen": -77.15667724609375, "logps/rejected": -178.48419189453125, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": -0.9563284516334534, "rewards/margins": 10.11512565612793, "rewards/rejected": -11.071453094482422, "step": 865 }, { "epoch": 1.32, "learning_rate": 3.1603640952591536e-07, "logits/chosen": -1.3847885131835938, "logits/rejected": -1.2616667747497559, "logps/chosen": -63.544517517089844, "logps/rejected": -188.02212524414062, "loss": 0.0401, "rewards/accuracies": 1.0, "rewards/chosen": -1.1738569736480713, "rewards/margins": 10.61096477508545, "rewards/rejected": -11.784821510314941, "step": 866 }, { "epoch": 1.32, "learning_rate": 3.1480343339080094e-07, "logits/chosen": -1.1350643634796143, "logits/rejected": -1.117767333984375, "logps/chosen": -51.981014251708984, "logps/rejected": -133.83802795410156, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -0.2095465511083603, "rewards/margins": 7.825038909912109, "rewards/rejected": -8.034584999084473, "step": 867 }, { "epoch": 1.32, "learning_rate": 3.135717611098457e-07, "logits/chosen": -1.0594005584716797, "logits/rejected": -1.1397340297698975, "logps/chosen": -70.15467834472656, "logps/rejected": -154.9226531982422, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -0.378109872341156, "rewards/margins": 9.015453338623047, "rewards/rejected": -9.393564224243164, "step": 868 }, { "epoch": 1.32, "learning_rate": 3.123414013544905e-07, "logits/chosen": -1.168516755104065, "logits/rejected": -1.1186997890472412, "logps/chosen": -72.56488800048828, "logps/rejected": -190.87008666992188, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -0.64239501953125, "rewards/margins": 10.973026275634766, "rewards/rejected": -11.615421295166016, "step": 869 }, { "epoch": 1.32, "learning_rate": 3.1111236278693525e-07, "logits/chosen": -1.1072605848312378, "logits/rejected": -1.0726549625396729, "logps/chosen": -76.71082305908203, "logps/rejected": -182.40530395507812, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -1.745031476020813, "rewards/margins": 9.732573509216309, "rewards/rejected": -11.477605819702148, "step": 870 }, { "epoch": 1.32, "learning_rate": 3.0988465406007837e-07, "logits/chosen": -1.0790177583694458, "logits/rejected": -0.9793330430984497, "logps/chosen": -68.66360473632812, "logps/rejected": -213.48367309570312, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -0.07770824432373047, "rewards/margins": 12.449033737182617, "rewards/rejected": -12.526741027832031, "step": 871 }, { "epoch": 1.32, "learning_rate": 3.086582838174551e-07, "logits/chosen": -1.2810640335083008, "logits/rejected": -1.2028204202651978, "logps/chosen": -67.54934692382812, "logps/rejected": -183.29315185546875, "loss": 0.0189, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3115649223327637, "rewards/margins": 10.0007963180542, "rewards/rejected": -11.312360763549805, "step": 872 }, { "epoch": 1.33, "learning_rate": 3.07433260693178e-07, "logits/chosen": -1.0981544256210327, "logits/rejected": -1.101778268814087, "logps/chosen": -69.46454620361328, "logps/rejected": -196.09434509277344, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -1.0734939575195312, "rewards/margins": 11.262301445007324, "rewards/rejected": -12.335796356201172, "step": 873 }, { "epoch": 1.33, "learning_rate": 3.062095933118752e-07, "logits/chosen": -1.1423189640045166, "logits/rejected": -1.0814754962921143, "logps/chosen": -68.90902709960938, "logps/rejected": -190.8826904296875, "loss": 0.0593, "rewards/accuracies": 1.0, "rewards/chosen": -0.6413151025772095, "rewards/margins": 11.242748260498047, "rewards/rejected": -11.884064674377441, "step": 874 }, { "epoch": 1.33, "learning_rate": 3.0498729028862933e-07, "logits/chosen": -1.263510823249817, "logits/rejected": -1.2098865509033203, "logps/chosen": -98.67642974853516, "logps/rejected": -226.28778076171875, "loss": 0.0203, "rewards/accuracies": 0.9375, "rewards/chosen": -3.4813520908355713, "rewards/margins": 11.984416007995605, "rewards/rejected": -15.46576976776123, "step": 875 }, { "epoch": 1.33, "learning_rate": 3.037663602289181e-07, "logits/chosen": -1.1308614015579224, "logits/rejected": -1.0499013662338257, "logps/chosen": -64.38939666748047, "logps/rejected": -198.14913940429688, "loss": 0.0504, "rewards/accuracies": 1.0, "rewards/chosen": -0.9843230843544006, "rewards/margins": 10.785235404968262, "rewards/rejected": -11.76955795288086, "step": 876 }, { "epoch": 1.33, "learning_rate": 3.025468117285529e-07, "logits/chosen": -1.167764663696289, "logits/rejected": -1.0539222955703735, "logps/chosen": -68.39847564697266, "logps/rejected": -165.08724975585938, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -1.7871071100234985, "rewards/margins": 7.700448036193848, "rewards/rejected": -9.487555503845215, "step": 877 }, { "epoch": 1.33, "learning_rate": 3.013286533736183e-07, "logits/chosen": -1.1149120330810547, "logits/rejected": -0.9907094836235046, "logps/chosen": -81.09086608886719, "logps/rejected": -223.73294067382812, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": -2.606088161468506, "rewards/margins": 11.589728355407715, "rewards/rejected": -14.195816993713379, "step": 878 }, { "epoch": 1.34, "learning_rate": 3.0011189374041145e-07, "logits/chosen": -0.905259370803833, "logits/rejected": -0.8425391316413879, "logps/chosen": -62.18824768066406, "logps/rejected": -181.83108520507812, "loss": 0.0422, "rewards/accuracies": 1.0, "rewards/chosen": -1.1463936567306519, "rewards/margins": 10.523120880126953, "rewards/rejected": -11.669514656066895, "step": 879 }, { "epoch": 1.34, "learning_rate": 2.9889654139538244e-07, "logits/chosen": -1.012556552886963, "logits/rejected": -1.1096618175506592, "logps/chosen": -42.681976318359375, "logps/rejected": -131.0863037109375, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": 0.7648807764053345, "rewards/margins": 9.021519660949707, "rewards/rejected": -8.25663948059082, "step": 880 }, { "epoch": 1.34, "learning_rate": 2.9768260489507335e-07, "logits/chosen": -1.2592908143997192, "logits/rejected": -1.1268333196640015, "logps/chosen": -73.65986633300781, "logps/rejected": -220.07272338867188, "loss": 0.0638, "rewards/accuracies": 1.0, "rewards/chosen": -1.2277494668960571, "rewards/margins": 11.876537322998047, "rewards/rejected": -13.104288101196289, "step": 881 }, { "epoch": 1.34, "learning_rate": 2.9647009278605803e-07, "logits/chosen": -1.167679786682129, "logits/rejected": -1.0784720182418823, "logps/chosen": -67.72370910644531, "logps/rejected": -196.77841186523438, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -0.8135742545127869, "rewards/margins": 11.023904800415039, "rewards/rejected": -11.837478637695312, "step": 882 }, { "epoch": 1.34, "learning_rate": 2.9525901360488235e-07, "logits/chosen": -0.9997549057006836, "logits/rejected": -0.9432014226913452, "logps/chosen": -74.05624389648438, "logps/rejected": -158.91029357910156, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": -1.751084804534912, "rewards/margins": 8.212854385375977, "rewards/rejected": -9.963939666748047, "step": 883 }, { "epoch": 1.34, "learning_rate": 2.940493758780037e-07, "logits/chosen": -1.043877363204956, "logits/rejected": -0.949622631072998, "logps/chosen": -71.61334228515625, "logps/rejected": -199.0709686279297, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.8465366363525391, "rewards/margins": 11.850892066955566, "rewards/rejected": -12.697427749633789, "step": 884 }, { "epoch": 1.34, "learning_rate": 2.9284118812173085e-07, "logits/chosen": -1.1592051982879639, "logits/rejected": -1.1231443881988525, "logps/chosen": -66.36738586425781, "logps/rejected": -169.8032989501953, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -0.8645541667938232, "rewards/margins": 9.401374816894531, "rewards/rejected": -10.265928268432617, "step": 885 }, { "epoch": 1.35, "learning_rate": 2.916344588421645e-07, "logits/chosen": -1.2972654104232788, "logits/rejected": -1.255252718925476, "logps/chosen": -93.02581787109375, "logps/rejected": -201.748046875, "loss": 0.023, "rewards/accuracies": 0.9375, "rewards/chosen": -2.3491549491882324, "rewards/margins": 9.73039436340332, "rewards/rejected": -12.079549789428711, "step": 886 }, { "epoch": 1.35, "learning_rate": 2.904291965351369e-07, "logits/chosen": -1.4914642572402954, "logits/rejected": -1.396189570426941, "logps/chosen": -53.96504211425781, "logps/rejected": -151.813232421875, "loss": 0.0586, "rewards/accuracies": 1.0, "rewards/chosen": -0.09026004374027252, "rewards/margins": 8.757599830627441, "rewards/rejected": -8.847860336303711, "step": 887 }, { "epoch": 1.35, "learning_rate": 2.8922540968615283e-07, "logits/chosen": -1.1544334888458252, "logits/rejected": -1.1415927410125732, "logps/chosen": -76.50936889648438, "logps/rejected": -199.18942260742188, "loss": 0.0466, "rewards/accuracies": 1.0, "rewards/chosen": -1.2693109512329102, "rewards/margins": 11.33557415008545, "rewards/rejected": -12.60488510131836, "step": 888 }, { "epoch": 1.35, "learning_rate": 2.880231067703285e-07, "logits/chosen": -1.0120527744293213, "logits/rejected": -0.9444929361343384, "logps/chosen": -56.10468292236328, "logps/rejected": -187.27781677246094, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.14829185605049133, "rewards/margins": 11.569108009338379, "rewards/rejected": -11.717399597167969, "step": 889 }, { "epoch": 1.35, "learning_rate": 2.8682229625233296e-07, "logits/chosen": -1.0918954610824585, "logits/rejected": -1.128036618232727, "logps/chosen": -60.16938018798828, "logps/rejected": -146.3104248046875, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": -0.7777913808822632, "rewards/margins": 8.487791061401367, "rewards/rejected": -9.265583038330078, "step": 890 }, { "epoch": 1.35, "learning_rate": 2.856229865863288e-07, "logits/chosen": -1.184349536895752, "logits/rejected": -1.1841895580291748, "logps/chosen": -66.1183853149414, "logps/rejected": -151.86642456054688, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": -1.2216904163360596, "rewards/margins": 8.370936393737793, "rewards/rejected": -9.592626571655273, "step": 891 }, { "epoch": 1.36, "learning_rate": 2.8442518621591084e-07, "logits/chosen": -1.2903201580047607, "logits/rejected": -1.2698955535888672, "logps/chosen": -64.32032012939453, "logps/rejected": -158.66183471679688, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": -1.156549096107483, "rewards/margins": 8.878497123718262, "rewards/rejected": -10.035046577453613, "step": 892 }, { "epoch": 1.36, "learning_rate": 2.8322890357404907e-07, "logits/chosen": -1.0020402669906616, "logits/rejected": -0.8951206803321838, "logps/chosen": -68.79203033447266, "logps/rejected": -186.751953125, "loss": 0.0406, "rewards/accuracies": 1.0, "rewards/chosen": -0.653872549533844, "rewards/margins": 10.672158241271973, "rewards/rejected": -11.326029777526855, "step": 893 }, { "epoch": 1.36, "learning_rate": 2.820341470830273e-07, "logits/chosen": -1.3208019733428955, "logits/rejected": -1.3319730758666992, "logps/chosen": -58.48689651489258, "logps/rejected": -175.47265625, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.8185827136039734, "rewards/margins": 10.243091583251953, "rewards/rejected": -11.061674118041992, "step": 894 }, { "epoch": 1.36, "learning_rate": 2.808409251543852e-07, "logits/chosen": -1.2520232200622559, "logits/rejected": -1.1039667129516602, "logps/chosen": -88.31317138671875, "logps/rejected": -239.4126739501953, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -2.2212610244750977, "rewards/margins": 12.960351943969727, "rewards/rejected": -15.18161392211914, "step": 895 }, { "epoch": 1.36, "learning_rate": 2.7964924618885776e-07, "logits/chosen": -1.0469504594802856, "logits/rejected": -1.0014100074768066, "logps/chosen": -54.08551025390625, "logps/rejected": -160.48800659179688, "loss": 0.0387, "rewards/accuracies": 1.0, "rewards/chosen": -0.3419005870819092, "rewards/margins": 9.732646942138672, "rewards/rejected": -10.07454776763916, "step": 896 }, { "epoch": 1.36, "learning_rate": 2.784591185763182e-07, "logits/chosen": -1.2565423250198364, "logits/rejected": -1.2446802854537964, "logps/chosen": -61.27532958984375, "logps/rejected": -143.79840087890625, "loss": 0.0445, "rewards/accuracies": 1.0, "rewards/chosen": -0.73479825258255, "rewards/margins": 7.8365888595581055, "rewards/rejected": -8.571386337280273, "step": 897 }, { "epoch": 1.36, "learning_rate": 2.772705506957164e-07, "logits/chosen": -1.2247684001922607, "logits/rejected": -1.2073692083358765, "logps/chosen": -89.75728607177734, "logps/rejected": -168.08990478515625, "loss": 0.0496, "rewards/accuracies": 0.9375, "rewards/chosen": -2.443594217300415, "rewards/margins": 7.36766242980957, "rewards/rejected": -9.811256408691406, "step": 898 }, { "epoch": 1.37, "learning_rate": 2.760835509150218e-07, "logits/chosen": -1.235434651374817, "logits/rejected": -1.2565488815307617, "logps/chosen": -97.0019760131836, "logps/rejected": -203.24266052246094, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -2.119580030441284, "rewards/margins": 10.19863510131836, "rewards/rejected": -12.318215370178223, "step": 899 }, { "epoch": 1.37, "learning_rate": 2.748981275911633e-07, "logits/chosen": -1.2976423501968384, "logits/rejected": -1.2595832347869873, "logps/chosen": -89.51211547851562, "logps/rejected": -189.83468627929688, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -2.3441162109375, "rewards/margins": 9.426911354064941, "rewards/rejected": -11.771027565002441, "step": 900 }, { "epoch": 1.37, "learning_rate": 2.737142890699717e-07, "logits/chosen": -1.294965386390686, "logits/rejected": -1.2420074939727783, "logps/chosen": -71.51050567626953, "logps/rejected": -161.4373016357422, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -1.2380168437957764, "rewards/margins": 8.606657028198242, "rewards/rejected": -9.844674110412598, "step": 901 }, { "epoch": 1.37, "learning_rate": 2.725320436861197e-07, "logits/chosen": -0.9856861233711243, "logits/rejected": -0.8646342158317566, "logps/chosen": -92.23006439208984, "logps/rejected": -200.3787841796875, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -3.5142266750335693, "rewards/margins": 8.967628479003906, "rewards/rejected": -12.481855392456055, "step": 902 }, { "epoch": 1.37, "learning_rate": 2.7135139976306344e-07, "logits/chosen": -1.17044198513031, "logits/rejected": -1.1947274208068848, "logps/chosen": -93.87423706054688, "logps/rejected": -209.61061096191406, "loss": 0.0369, "rewards/accuracies": 1.0, "rewards/chosen": -2.7065603733062744, "rewards/margins": 10.6439790725708, "rewards/rejected": -13.350540161132812, "step": 903 }, { "epoch": 1.37, "learning_rate": 2.701723656129851e-07, "logits/chosen": -1.2732253074645996, "logits/rejected": -1.2353371381759644, "logps/chosen": -76.51490783691406, "logps/rejected": -197.60107421875, "loss": 0.0485, "rewards/accuracies": 1.0, "rewards/chosen": -1.4052677154541016, "rewards/margins": 10.876425743103027, "rewards/rejected": -12.281692504882812, "step": 904 }, { "epoch": 1.37, "learning_rate": 2.6899494953673204e-07, "logits/chosen": -1.1939207315444946, "logits/rejected": -1.193476915359497, "logps/chosen": -99.97769165039062, "logps/rejected": -203.38156127929688, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -2.6601791381835938, "rewards/margins": 10.198871612548828, "rewards/rejected": -12.859050750732422, "step": 905 }, { "epoch": 1.38, "learning_rate": 2.6781915982376124e-07, "logits/chosen": -1.0214887857437134, "logits/rejected": -0.8655680418014526, "logps/chosen": -71.07418823242188, "logps/rejected": -215.4347381591797, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -0.7634833455085754, "rewards/margins": 12.360746383666992, "rewards/rejected": -13.12423038482666, "step": 906 }, { "epoch": 1.38, "learning_rate": 2.666450047520784e-07, "logits/chosen": -1.0108181238174438, "logits/rejected": -0.9619965553283691, "logps/chosen": -65.72087097167969, "logps/rejected": -172.912109375, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -1.2579199075698853, "rewards/margins": 9.430094718933105, "rewards/rejected": -10.68801498413086, "step": 907 }, { "epoch": 1.38, "learning_rate": 2.6547249258818163e-07, "logits/chosen": -1.0633748769760132, "logits/rejected": -1.0480328798294067, "logps/chosen": -62.42863464355469, "logps/rejected": -164.85560607910156, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -0.9218290448188782, "rewards/margins": 9.805809020996094, "rewards/rejected": -10.727638244628906, "step": 908 }, { "epoch": 1.38, "learning_rate": 2.6430163158700113e-07, "logits/chosen": -1.2999529838562012, "logits/rejected": -1.3151614665985107, "logps/chosen": -54.91654586791992, "logps/rejected": -136.32664489746094, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -0.7202343344688416, "rewards/margins": 7.663908004760742, "rewards/rejected": -8.38414192199707, "step": 909 }, { "epoch": 1.38, "learning_rate": 2.631324299918436e-07, "logits/chosen": -1.0010817050933838, "logits/rejected": -0.9361670613288879, "logps/chosen": -75.12582397460938, "logps/rejected": -207.30267333984375, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.3898661136627197, "rewards/margins": 11.849888801574707, "rewards/rejected": -13.239754676818848, "step": 910 }, { "epoch": 1.38, "learning_rate": 2.61964896034332e-07, "logits/chosen": -1.0734792947769165, "logits/rejected": -1.0554006099700928, "logps/chosen": -69.49102020263672, "logps/rejected": -178.4953155517578, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -1.8538084030151367, "rewards/margins": 9.01848030090332, "rewards/rejected": -10.87228775024414, "step": 911 }, { "epoch": 1.39, "learning_rate": 2.6079903793434887e-07, "logits/chosen": -1.1782310009002686, "logits/rejected": -1.2167466878890991, "logps/chosen": -86.17131042480469, "logps/rejected": -211.46607971191406, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.6431995630264282, "rewards/margins": 11.687231063842773, "rewards/rejected": -13.33043098449707, "step": 912 }, { "epoch": 1.39, "learning_rate": 2.596348638999778e-07, "logits/chosen": -1.1233220100402832, "logits/rejected": -1.0774215459823608, "logps/chosen": -61.160343170166016, "logps/rejected": -161.19154357910156, "loss": 0.0522, "rewards/accuracies": 1.0, "rewards/chosen": -1.0652896165847778, "rewards/margins": 9.370325088500977, "rewards/rejected": -10.435614585876465, "step": 913 }, { "epoch": 1.39, "learning_rate": 2.584723821274464e-07, "logits/chosen": -0.9774546027183533, "logits/rejected": -0.8842563033103943, "logps/chosen": -61.087642669677734, "logps/rejected": -179.74330139160156, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": -0.837219774723053, "rewards/margins": 9.888167381286621, "rewards/rejected": -10.725385665893555, "step": 914 }, { "epoch": 1.39, "learning_rate": 2.573116008010676e-07, "logits/chosen": -1.2813230752944946, "logits/rejected": -1.1978058815002441, "logps/chosen": -68.40570068359375, "logps/rejected": -208.50985717773438, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": -0.7870779633522034, "rewards/margins": 12.170300483703613, "rewards/rejected": -12.957378387451172, "step": 915 }, { "epoch": 1.39, "learning_rate": 2.561525280931828e-07, "logits/chosen": -1.0995240211486816, "logits/rejected": -1.116148829460144, "logps/chosen": -84.81562042236328, "logps/rejected": -194.66766357421875, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -1.6737269163131714, "rewards/margins": 10.005136489868164, "rewards/rejected": -11.678862571716309, "step": 916 }, { "epoch": 1.39, "learning_rate": 2.5499517216410395e-07, "logits/chosen": -1.1118059158325195, "logits/rejected": -1.1057324409484863, "logps/chosen": -67.32406616210938, "logps/rejected": -156.33616638183594, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -1.0997236967086792, "rewards/margins": 8.82157039642334, "rewards/rejected": -9.921294212341309, "step": 917 }, { "epoch": 1.39, "learning_rate": 2.5383954116205654e-07, "logits/chosen": -1.3008923530578613, "logits/rejected": -1.3098472356796265, "logps/chosen": -76.5130615234375, "logps/rejected": -188.46707153320312, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -1.248567819595337, "rewards/margins": 10.24660587310791, "rewards/rejected": -11.495174407958984, "step": 918 }, { "epoch": 1.4, "learning_rate": 2.526856432231216e-07, "logits/chosen": -1.0838638544082642, "logits/rejected": -0.9704316854476929, "logps/chosen": -68.77642059326172, "logps/rejected": -216.4132843017578, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -0.5356925129890442, "rewards/margins": 12.238455772399902, "rewards/rejected": -12.774148941040039, "step": 919 }, { "epoch": 1.4, "learning_rate": 2.5153348647117856e-07, "logits/chosen": -1.1567587852478027, "logits/rejected": -1.0584115982055664, "logps/chosen": -90.18541717529297, "logps/rejected": -229.05059814453125, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -2.25400447845459, "rewards/margins": 12.28674602508545, "rewards/rejected": -14.540752410888672, "step": 920 }, { "epoch": 1.4, "learning_rate": 2.5038307901784904e-07, "logits/chosen": -1.1250687837600708, "logits/rejected": -1.1044715642929077, "logps/chosen": -44.823463439941406, "logps/rejected": -126.68445587158203, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -0.23479335010051727, "rewards/margins": 7.561722278594971, "rewards/rejected": -7.796515464782715, "step": 921 }, { "epoch": 1.4, "learning_rate": 2.492344289624378e-07, "logits/chosen": -1.1347781419754028, "logits/rejected": -1.0972087383270264, "logps/chosen": -69.84147644042969, "logps/rejected": -156.32574462890625, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -1.4627699851989746, "rewards/margins": 8.338072776794434, "rewards/rejected": -9.800844192504883, "step": 922 }, { "epoch": 1.4, "learning_rate": 2.4808754439187787e-07, "logits/chosen": -0.8981828689575195, "logits/rejected": -0.7866100668907166, "logps/chosen": -63.81999969482422, "logps/rejected": -186.1649169921875, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -0.6151156425476074, "rewards/margins": 11.367277145385742, "rewards/rejected": -11.982391357421875, "step": 923 }, { "epoch": 1.4, "learning_rate": 2.469424333806718e-07, "logits/chosen": -1.3061175346374512, "logits/rejected": -1.2760297060012817, "logps/chosen": -64.2739486694336, "logps/rejected": -141.9420623779297, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -0.9888159036636353, "rewards/margins": 7.480759620666504, "rewards/rejected": -8.469575881958008, "step": 924 }, { "epoch": 1.41, "learning_rate": 2.457991039908366e-07, "logits/chosen": -1.2425321340560913, "logits/rejected": -1.1818774938583374, "logps/chosen": -75.50288391113281, "logps/rejected": -207.9646759033203, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": -1.088285207748413, "rewards/margins": 10.636645317077637, "rewards/rejected": -11.724930763244629, "step": 925 }, { "epoch": 1.41, "learning_rate": 2.446575642718445e-07, "logits/chosen": -1.065239429473877, "logits/rejected": -1.007594108581543, "logps/chosen": -81.72013854980469, "logps/rejected": -202.5800018310547, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.3376941680908203, "rewards/margins": 11.226885795593262, "rewards/rejected": -12.564579963684082, "step": 926 }, { "epoch": 1.41, "learning_rate": 2.435178222605694e-07, "logits/chosen": -1.0966297388076782, "logits/rejected": -1.0844006538391113, "logps/chosen": -81.39220428466797, "logps/rejected": -178.13729858398438, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -1.0578868389129639, "rewards/margins": 9.506040573120117, "rewards/rejected": -10.563926696777344, "step": 927 }, { "epoch": 1.41, "learning_rate": 2.423798859812275e-07, "logits/chosen": -1.332690715789795, "logits/rejected": -1.3125532865524292, "logps/chosen": -69.65312957763672, "logps/rejected": -174.8885040283203, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.8267205953598022, "rewards/margins": 9.282591819763184, "rewards/rejected": -11.109312057495117, "step": 928 }, { "epoch": 1.41, "learning_rate": 2.4124376344532244e-07, "logits/chosen": -1.3996777534484863, "logits/rejected": -1.4283418655395508, "logps/chosen": -86.49020385742188, "logps/rejected": -194.10888671875, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.7857542037963867, "rewards/margins": 10.515002250671387, "rewards/rejected": -12.30075740814209, "step": 929 }, { "epoch": 1.41, "learning_rate": 2.4010946265158815e-07, "logits/chosen": -1.1814100742340088, "logits/rejected": -1.0767438411712646, "logps/chosen": -80.02114868164062, "logps/rejected": -203.32337951660156, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -1.471388578414917, "rewards/margins": 11.117501258850098, "rewards/rejected": -12.588889122009277, "step": 930 }, { "epoch": 1.41, "learning_rate": 2.389769915859334e-07, "logits/chosen": -1.0770986080169678, "logits/rejected": -1.0264323949813843, "logps/chosen": -71.16810607910156, "logps/rejected": -199.77919006347656, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/chosen": -1.39163076877594, "rewards/margins": 11.245088577270508, "rewards/rejected": -12.63671875, "step": 931 }, { "epoch": 1.42, "learning_rate": 2.378463582213842e-07, "logits/chosen": -1.1755002737045288, "logits/rejected": -1.146294355392456, "logps/chosen": -58.88164520263672, "logps/rejected": -172.73194885253906, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.0842905044555664, "rewards/margins": 10.46119213104248, "rewards/rejected": -11.545482635498047, "step": 932 }, { "epoch": 1.42, "learning_rate": 2.3671757051802882e-07, "logits/chosen": -1.2189280986785889, "logits/rejected": -1.1856813430786133, "logps/chosen": -65.66744995117188, "logps/rejected": -178.7213134765625, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -0.6168245077133179, "rewards/margins": 9.953254699707031, "rewards/rejected": -10.57007884979248, "step": 933 }, { "epoch": 1.42, "learning_rate": 2.3559063642296163e-07, "logits/chosen": -1.2218457460403442, "logits/rejected": -1.1435271501541138, "logps/chosen": -58.35125732421875, "logps/rejected": -175.0428466796875, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -0.49362438917160034, "rewards/margins": 10.824305534362793, "rewards/rejected": -11.317931175231934, "step": 934 }, { "epoch": 1.42, "learning_rate": 2.3446556387022644e-07, "logits/chosen": -1.4056589603424072, "logits/rejected": -1.2883325815200806, "logps/chosen": -76.65266418457031, "logps/rejected": -184.23593139648438, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -2.369076728820801, "rewards/margins": 9.001468658447266, "rewards/rejected": -11.370546340942383, "step": 935 }, { "epoch": 1.42, "learning_rate": 2.3334236078076126e-07, "logits/chosen": -1.1952333450317383, "logits/rejected": -1.1824768781661987, "logps/chosen": -107.90875244140625, "logps/rejected": -253.9272003173828, "loss": 0.0241, "rewards/accuracies": 0.9375, "rewards/chosen": -3.239903211593628, "rewards/margins": 12.69080924987793, "rewards/rejected": -15.93071174621582, "step": 936 }, { "epoch": 1.42, "learning_rate": 2.322210350623423e-07, "logits/chosen": -1.2290904521942139, "logits/rejected": -1.187170147895813, "logps/chosen": -59.167877197265625, "logps/rejected": -143.3047637939453, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -1.4827635288238525, "rewards/margins": 7.0325422286987305, "rewards/rejected": -8.515304565429688, "step": 937 }, { "epoch": 1.42, "learning_rate": 2.3110159460952894e-07, "logits/chosen": -1.0048002004623413, "logits/rejected": -1.0073782205581665, "logps/chosen": -42.752349853515625, "logps/rejected": -119.19047546386719, "loss": 0.0169, "rewards/accuracies": 0.9375, "rewards/chosen": 0.19428841769695282, "rewards/margins": 7.295149326324463, "rewards/rejected": -7.100860595703125, "step": 938 }, { "epoch": 1.43, "learning_rate": 2.2998404730360632e-07, "logits/chosen": -1.1652112007141113, "logits/rejected": -1.1065256595611572, "logps/chosen": -86.21635437011719, "logps/rejected": -182.9861602783203, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": -2.4450039863586426, "rewards/margins": 8.922704696655273, "rewards/rejected": -11.367709159851074, "step": 939 }, { "epoch": 1.43, "learning_rate": 2.2886840101253247e-07, "logits/chosen": -1.4147456884384155, "logits/rejected": -1.3730257749557495, "logps/chosen": -61.425506591796875, "logps/rejected": -178.63778686523438, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.104467749595642, "rewards/margins": 10.780162811279297, "rewards/rejected": -11.884631156921387, "step": 940 }, { "epoch": 1.43, "learning_rate": 2.277546635908808e-07, "logits/chosen": -1.211792230606079, "logits/rejected": -1.1947301626205444, "logps/chosen": -63.770347595214844, "logps/rejected": -158.47808837890625, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -1.442031741142273, "rewards/margins": 8.224104881286621, "rewards/rejected": -9.666136741638184, "step": 941 }, { "epoch": 1.43, "learning_rate": 2.2664284287978568e-07, "logits/chosen": -1.1722586154937744, "logits/rejected": -1.0732977390289307, "logps/chosen": -73.99296569824219, "logps/rejected": -199.77081298828125, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.9483563303947449, "rewards/margins": 11.3140287399292, "rewards/rejected": -12.262385368347168, "step": 942 }, { "epoch": 1.43, "learning_rate": 2.2553294670688693e-07, "logits/chosen": -1.2367777824401855, "logits/rejected": -1.2036511898040771, "logps/chosen": -68.31490325927734, "logps/rejected": -156.6162872314453, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": -1.096004843711853, "rewards/margins": 8.337125778198242, "rewards/rejected": -9.433130264282227, "step": 943 }, { "epoch": 1.43, "learning_rate": 2.2442498288627555e-07, "logits/chosen": -1.0678187608718872, "logits/rejected": -0.9708797931671143, "logps/chosen": -58.706153869628906, "logps/rejected": -194.00180053710938, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -0.7698225975036621, "rewards/margins": 11.374448776245117, "rewards/rejected": -12.144271850585938, "step": 944 }, { "epoch": 1.44, "learning_rate": 2.2331895921843736e-07, "logits/chosen": -0.8866348266601562, "logits/rejected": -0.8945915102958679, "logps/chosen": -71.54261016845703, "logps/rejected": -213.23483276367188, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.1069694757461548, "rewards/margins": 13.323320388793945, "rewards/rejected": -14.430290222167969, "step": 945 }, { "epoch": 1.44, "learning_rate": 2.2221488349019902e-07, "logits/chosen": -0.9755781292915344, "logits/rejected": -0.8473294377326965, "logps/chosen": -62.1094856262207, "logps/rejected": -177.36032104492188, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.2787407636642456, "rewards/margins": 10.158745765686035, "rewards/rejected": -11.43748664855957, "step": 946 }, { "epoch": 1.44, "learning_rate": 2.2111276347467273e-07, "logits/chosen": -1.450989007949829, "logits/rejected": -1.4887750148773193, "logps/chosen": -49.60207748413086, "logps/rejected": -119.30854797363281, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -0.638967752456665, "rewards/margins": 6.7576751708984375, "rewards/rejected": -7.396642684936523, "step": 947 }, { "epoch": 1.44, "learning_rate": 2.2001260693120232e-07, "logits/chosen": -1.2514420747756958, "logits/rejected": -1.2486284971237183, "logps/chosen": -73.5145263671875, "logps/rejected": -168.6020965576172, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.3822462558746338, "rewards/margins": 9.278196334838867, "rewards/rejected": -10.660442352294922, "step": 948 }, { "epoch": 1.44, "learning_rate": 2.189144216053075e-07, "logits/chosen": -1.361900806427002, "logits/rejected": -1.2939060926437378, "logps/chosen": -66.65200805664062, "logps/rejected": -193.84371948242188, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": -0.5823947787284851, "rewards/margins": 11.211771011352539, "rewards/rejected": -11.794166564941406, "step": 949 }, { "epoch": 1.44, "learning_rate": 2.1781821522862982e-07, "logits/chosen": -1.2740370035171509, "logits/rejected": -1.25923490524292, "logps/chosen": -83.86727905273438, "logps/rejected": -196.86184692382812, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -1.2171200513839722, "rewards/margins": 10.744917869567871, "rewards/rejected": -11.9620361328125, "step": 950 }, { "epoch": 1.44, "learning_rate": 2.1672399551887882e-07, "logits/chosen": -1.039217472076416, "logits/rejected": -0.9383636116981506, "logps/chosen": -87.74999237060547, "logps/rejected": -198.6109161376953, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -1.7518144845962524, "rewards/margins": 10.704536437988281, "rewards/rejected": -12.456352233886719, "step": 951 }, { "epoch": 1.45, "learning_rate": 2.1563177017977657e-07, "logits/chosen": -1.0178163051605225, "logits/rejected": -1.0056424140930176, "logps/chosen": -62.35600280761719, "logps/rejected": -155.6488494873047, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.5035408735275269, "rewards/margins": 8.838569641113281, "rewards/rejected": -9.342110633850098, "step": 952 }, { "epoch": 1.45, "learning_rate": 2.1454154690100434e-07, "logits/chosen": -0.967587947845459, "logits/rejected": -0.9174187779426575, "logps/chosen": -77.53561401367188, "logps/rejected": -187.43785095214844, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -1.443828821182251, "rewards/margins": 10.672246932983398, "rewards/rejected": -12.116077423095703, "step": 953 }, { "epoch": 1.45, "learning_rate": 2.134533333581478e-07, "logits/chosen": -1.0567262172698975, "logits/rejected": -0.9337789416313171, "logps/chosen": -63.441650390625, "logps/rejected": -181.72671508789062, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -0.6463605165481567, "rewards/margins": 10.389936447143555, "rewards/rejected": -11.036297798156738, "step": 954 }, { "epoch": 1.45, "learning_rate": 2.1236713721264416e-07, "logits/chosen": -1.3626728057861328, "logits/rejected": -1.2640372514724731, "logps/chosen": -73.54210662841797, "logps/rejected": -187.09765625, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -1.5929640531539917, "rewards/margins": 9.273469924926758, "rewards/rejected": -10.866434097290039, "step": 955 }, { "epoch": 1.45, "learning_rate": 2.112829661117259e-07, "logits/chosen": -1.2080678939819336, "logits/rejected": -1.213911533355713, "logps/chosen": -61.92439270019531, "logps/rejected": -166.87149047851562, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -0.23983648419380188, "rewards/margins": 10.470162391662598, "rewards/rejected": -10.709999084472656, "step": 956 }, { "epoch": 1.45, "learning_rate": 2.1020082768837e-07, "logits/chosen": -1.2203989028930664, "logits/rejected": -0.993392825126648, "logps/chosen": -79.013427734375, "logps/rejected": -252.40550231933594, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.2387796640396118, "rewards/margins": 14.322129249572754, "rewards/rejected": -15.560907363891602, "step": 957 }, { "epoch": 1.46, "learning_rate": 2.0912072956124166e-07, "logits/chosen": -0.9172457456588745, "logits/rejected": -0.803143322467804, "logps/chosen": -72.17263793945312, "logps/rejected": -193.0008544921875, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.4307657480239868, "rewards/margins": 10.762880325317383, "rewards/rejected": -12.193644523620605, "step": 958 }, { "epoch": 1.46, "learning_rate": 2.0804267933464192e-07, "logits/chosen": -1.259220004081726, "logits/rejected": -1.3154269456863403, "logps/chosen": -68.33270263671875, "logps/rejected": -174.4493408203125, "loss": 0.0351, "rewards/accuracies": 1.0, "rewards/chosen": -0.544312059879303, "rewards/margins": 10.16675090789795, "rewards/rejected": -10.711063385009766, "step": 959 }, { "epoch": 1.46, "learning_rate": 2.0696668459845352e-07, "logits/chosen": -1.0741063356399536, "logits/rejected": -1.010119080543518, "logps/chosen": -57.818946838378906, "logps/rejected": -186.72503662109375, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.5061759948730469, "rewards/margins": 11.49766731262207, "rewards/rejected": -12.003843307495117, "step": 960 }, { "epoch": 1.46, "learning_rate": 2.0589275292808844e-07, "logits/chosen": -1.4062213897705078, "logits/rejected": -1.4807547330856323, "logps/chosen": -71.68291473388672, "logps/rejected": -164.98974609375, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.5269719362258911, "rewards/margins": 9.205770492553711, "rewards/rejected": -9.732741355895996, "step": 961 }, { "epoch": 1.46, "learning_rate": 2.048208918844333e-07, "logits/chosen": -1.1317811012268066, "logits/rejected": -1.110825538635254, "logps/chosen": -88.1626205444336, "logps/rejected": -243.67288208007812, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1779937744140625, "rewards/margins": 13.70100212097168, "rewards/rejected": -14.878996849060059, "step": 962 }, { "epoch": 1.46, "learning_rate": 2.0375110901379672e-07, "logits/chosen": -1.1075657606124878, "logits/rejected": -1.0189461708068848, "logps/chosen": -83.45532989501953, "logps/rejected": -226.39210510253906, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.797542929649353, "rewards/margins": 12.032994270324707, "rewards/rejected": -13.830537796020508, "step": 963 }, { "epoch": 1.46, "learning_rate": 2.026834118478567e-07, "logits/chosen": -1.2279272079467773, "logits/rejected": -1.0915871858596802, "logps/chosen": -90.93171691894531, "logps/rejected": -213.2340545654297, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -3.0455241203308105, "rewards/margins": 10.05965805053711, "rewards/rejected": -13.105180740356445, "step": 964 }, { "epoch": 1.47, "learning_rate": 2.0161780790360656e-07, "logits/chosen": -1.2595267295837402, "logits/rejected": -1.1609396934509277, "logps/chosen": -93.37518310546875, "logps/rejected": -227.83084106445312, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -2.054603099822998, "rewards/margins": 12.473885536193848, "rewards/rejected": -14.528489112854004, "step": 965 }, { "epoch": 1.47, "learning_rate": 2.005543046833028e-07, "logits/chosen": -1.173004388809204, "logits/rejected": -1.1194164752960205, "logps/chosen": -77.56388854980469, "logps/rejected": -209.70628356933594, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.4635385274887085, "rewards/margins": 11.74592399597168, "rewards/rejected": -13.209463119506836, "step": 966 }, { "epoch": 1.47, "learning_rate": 1.994929096744118e-07, "logits/chosen": -1.002015471458435, "logits/rejected": -0.8927529454231262, "logps/chosen": -84.05574798583984, "logps/rejected": -219.0233917236328, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": -1.806456208229065, "rewards/margins": 11.940408706665039, "rewards/rejected": -13.746864318847656, "step": 967 }, { "epoch": 1.47, "learning_rate": 1.9843363034955795e-07, "logits/chosen": -1.0354502201080322, "logits/rejected": -0.826656699180603, "logps/chosen": -75.4397201538086, "logps/rejected": -226.78677368164062, "loss": 0.0515, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4359992742538452, "rewards/margins": 13.272261619567871, "rewards/rejected": -14.708261489868164, "step": 968 }, { "epoch": 1.47, "learning_rate": 1.9737647416646935e-07, "logits/chosen": -0.9954381585121155, "logits/rejected": -0.926822304725647, "logps/chosen": -69.42559814453125, "logps/rejected": -169.87826538085938, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -1.4862395524978638, "rewards/margins": 9.249549865722656, "rewards/rejected": -10.735790252685547, "step": 969 }, { "epoch": 1.47, "learning_rate": 1.9632144856792748e-07, "logits/chosen": -1.2075226306915283, "logits/rejected": -1.0959432125091553, "logps/chosen": -83.94832611083984, "logps/rejected": -228.6538543701172, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -1.8144811391830444, "rewards/margins": 12.6270751953125, "rewards/rejected": -14.441555976867676, "step": 970 }, { "epoch": 1.48, "learning_rate": 1.9526856098171285e-07, "logits/chosen": -1.2695893049240112, "logits/rejected": -1.1151591539382935, "logps/chosen": -65.54235076904297, "logps/rejected": -203.7481689453125, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -1.806349754333496, "rewards/margins": 11.04599380493164, "rewards/rejected": -12.852344512939453, "step": 971 }, { "epoch": 1.48, "learning_rate": 1.9421781882055443e-07, "logits/chosen": -1.1956473588943481, "logits/rejected": -1.1801730394363403, "logps/chosen": -71.11809539794922, "logps/rejected": -146.0307159423828, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -1.5277948379516602, "rewards/margins": 7.8370041847229, "rewards/rejected": -9.364798545837402, "step": 972 }, { "epoch": 1.48, "learning_rate": 1.9316922948207542e-07, "logits/chosen": -1.0702711343765259, "logits/rejected": -1.00505793094635, "logps/chosen": -76.5950927734375, "logps/rejected": -201.7117462158203, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -1.2814533710479736, "rewards/margins": 11.382299423217773, "rewards/rejected": -12.663751602172852, "step": 973 }, { "epoch": 1.48, "learning_rate": 1.921228003487435e-07, "logits/chosen": -1.1192896366119385, "logits/rejected": -1.006330966949463, "logps/chosen": -82.68030548095703, "logps/rejected": -230.17153930664062, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": -1.709291696548462, "rewards/margins": 12.382524490356445, "rewards/rejected": -14.091817855834961, "step": 974 }, { "epoch": 1.48, "learning_rate": 1.9107853878781693e-07, "logits/chosen": -0.970392644405365, "logits/rejected": -0.9290090203285217, "logps/chosen": -71.13276672363281, "logps/rejected": -173.7963409423828, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -1.4579185247421265, "rewards/margins": 10.079607963562012, "rewards/rejected": -11.537527084350586, "step": 975 }, { "epoch": 1.48, "learning_rate": 1.9003645215129355e-07, "logits/chosen": -0.8659820556640625, "logits/rejected": -0.9184397459030151, "logps/chosen": -51.90268325805664, "logps/rejected": -124.06462097167969, "loss": 0.0266, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5599133372306824, "rewards/margins": 7.936857223510742, "rewards/rejected": -8.496770858764648, "step": 976 }, { "epoch": 1.48, "learning_rate": 1.8899654777585932e-07, "logits/chosen": -1.1589514017105103, "logits/rejected": -1.1574431657791138, "logps/chosen": -79.15308380126953, "logps/rejected": -195.00845336914062, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -2.4611399173736572, "rewards/margins": 10.257494926452637, "rewards/rejected": -12.718634605407715, "step": 977 }, { "epoch": 1.49, "learning_rate": 1.8795883298283583e-07, "logits/chosen": -0.9930282235145569, "logits/rejected": -0.9695369005203247, "logps/chosen": -70.81267547607422, "logps/rejected": -166.0614776611328, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -2.1345221996307373, "rewards/margins": 8.868050575256348, "rewards/rejected": -11.002573013305664, "step": 978 }, { "epoch": 1.49, "learning_rate": 1.8692331507812925e-07, "logits/chosen": -1.2379798889160156, "logits/rejected": -1.1774482727050781, "logps/chosen": -74.19889068603516, "logps/rejected": -196.72525024414062, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.3309439420700073, "rewards/margins": 11.356307029724121, "rewards/rejected": -12.687250137329102, "step": 979 }, { "epoch": 1.49, "learning_rate": 1.858900013521788e-07, "logits/chosen": -1.2760841846466064, "logits/rejected": -1.3011666536331177, "logps/chosen": -78.61241149902344, "logps/rejected": -183.62718200683594, "loss": 0.0153, "rewards/accuracies": 0.9375, "rewards/chosen": -2.298687696456909, "rewards/margins": 9.355890274047852, "rewards/rejected": -11.65457820892334, "step": 980 }, { "epoch": 1.49, "learning_rate": 1.8485889907990576e-07, "logits/chosen": -1.2428977489471436, "logits/rejected": -1.201936960220337, "logps/chosen": -68.67544555664062, "logps/rejected": -179.7180938720703, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -1.5715904235839844, "rewards/margins": 10.29904556274414, "rewards/rejected": -11.870635986328125, "step": 981 }, { "epoch": 1.49, "learning_rate": 1.8383001552066162e-07, "logits/chosen": -0.9922041893005371, "logits/rejected": -0.9825817346572876, "logps/chosen": -59.647212982177734, "logps/rejected": -136.5089569091797, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -1.3703937530517578, "rewards/margins": 7.617739677429199, "rewards/rejected": -8.988134384155273, "step": 982 }, { "epoch": 1.49, "learning_rate": 1.828033579181773e-07, "logits/chosen": -1.0681895017623901, "logits/rejected": -0.9357763528823853, "logps/chosen": -62.36621856689453, "logps/rejected": -179.41033935546875, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -1.1313633918762207, "rewards/margins": 10.333325386047363, "rewards/rejected": -11.464689254760742, "step": 983 }, { "epoch": 1.49, "learning_rate": 1.817789335005121e-07, "logits/chosen": -1.045304536819458, "logits/rejected": -1.0401690006256104, "logps/chosen": -71.23977661132812, "logps/rejected": -191.34725952148438, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -1.3006216287612915, "rewards/margins": 11.322778701782227, "rewards/rejected": -12.623400688171387, "step": 984 }, { "epoch": 1.5, "learning_rate": 1.807567494800034e-07, "logits/chosen": -1.2853546142578125, "logits/rejected": -1.1250349283218384, "logps/chosen": -85.35986328125, "logps/rejected": -244.93942260742188, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": -2.169968605041504, "rewards/margins": 13.561067581176758, "rewards/rejected": -15.731036186218262, "step": 985 }, { "epoch": 1.5, "learning_rate": 1.7973681305321426e-07, "logits/chosen": -1.049625039100647, "logits/rejected": -0.8729156255722046, "logps/chosen": -73.04606628417969, "logps/rejected": -207.68057250976562, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.765155553817749, "rewards/margins": 11.577472686767578, "rewards/rejected": -13.342628479003906, "step": 986 }, { "epoch": 1.5, "learning_rate": 1.7871913140088497e-07, "logits/chosen": -1.1029876470565796, "logits/rejected": -1.0141730308532715, "logps/chosen": -93.17534637451172, "logps/rejected": -228.5102081298828, "loss": 0.0516, "rewards/accuracies": 1.0, "rewards/chosen": -2.936983346939087, "rewards/margins": 11.797006607055664, "rewards/rejected": -14.733988761901855, "step": 987 }, { "epoch": 1.5, "learning_rate": 1.777037116878804e-07, "logits/chosen": -1.1183948516845703, "logits/rejected": -1.0615265369415283, "logps/chosen": -69.53657531738281, "logps/rejected": -185.73117065429688, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -1.2220745086669922, "rewards/margins": 9.990640640258789, "rewards/rejected": -11.212716102600098, "step": 988 }, { "epoch": 1.5, "learning_rate": 1.7669056106314162e-07, "logits/chosen": -1.1812067031860352, "logits/rejected": -1.1323652267456055, "logps/chosen": -89.53622436523438, "logps/rejected": -201.0779571533203, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -2.0934267044067383, "rewards/margins": 10.49223804473877, "rewards/rejected": -12.585663795471191, "step": 989 }, { "epoch": 1.5, "learning_rate": 1.7567968665963296e-07, "logits/chosen": -1.2452868223190308, "logits/rejected": -1.234442114830017, "logps/chosen": -75.95500183105469, "logps/rejected": -168.5236053466797, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -1.4998321533203125, "rewards/margins": 8.998800277709961, "rewards/rejected": -10.498632431030273, "step": 990 }, { "epoch": 1.51, "learning_rate": 1.7467109559429466e-07, "logits/chosen": -0.9743750691413879, "logits/rejected": -0.8114648461341858, "logps/chosen": -91.20507049560547, "logps/rejected": -236.1385498046875, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -2.929940700531006, "rewards/margins": 12.097036361694336, "rewards/rejected": -15.026976585388184, "step": 991 }, { "epoch": 1.51, "learning_rate": 1.7366479496799074e-07, "logits/chosen": -1.07231605052948, "logits/rejected": -1.0738083124160767, "logps/chosen": -88.92713928222656, "logps/rejected": -203.30935668945312, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": -1.9227832555770874, "rewards/margins": 10.71860122680664, "rewards/rejected": -12.641385078430176, "step": 992 }, { "epoch": 1.51, "learning_rate": 1.7266079186545956e-07, "logits/chosen": -1.1185206174850464, "logits/rejected": -1.049841046333313, "logps/chosen": -77.78147888183594, "logps/rejected": -178.53648376464844, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": -2.051157236099243, "rewards/margins": 9.125386238098145, "rewards/rejected": -11.176543235778809, "step": 993 }, { "epoch": 1.51, "learning_rate": 1.7165909335526453e-07, "logits/chosen": -0.9872387051582336, "logits/rejected": -0.817636251449585, "logps/chosen": -98.48197937011719, "logps/rejected": -267.2848205566406, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -2.6456620693206787, "rewards/margins": 13.687142372131348, "rewards/rejected": -16.332805633544922, "step": 994 }, { "epoch": 1.51, "learning_rate": 1.7065970648974343e-07, "logits/chosen": -1.0143423080444336, "logits/rejected": -0.9600812792778015, "logps/chosen": -59.890235900878906, "logps/rejected": -154.62332153320312, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -0.262808620929718, "rewards/margins": 8.929404258728027, "rewards/rejected": -9.19221305847168, "step": 995 }, { "epoch": 1.51, "learning_rate": 1.6966263830495935e-07, "logits/chosen": -1.0823190212249756, "logits/rejected": -1.0538568496704102, "logps/chosen": -62.140281677246094, "logps/rejected": -173.5354461669922, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.23087503015995026, "rewards/margins": 10.62917709350586, "rewards/rejected": -10.860052108764648, "step": 996 }, { "epoch": 1.51, "learning_rate": 1.6866789582065078e-07, "logits/chosen": -1.244469404220581, "logits/rejected": -1.1900691986083984, "logps/chosen": -72.99565124511719, "logps/rejected": -180.4878387451172, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -1.373180627822876, "rewards/margins": 9.928434371948242, "rewards/rejected": -11.301615715026855, "step": 997 }, { "epoch": 1.52, "learning_rate": 1.6767548604018289e-07, "logits/chosen": -1.1198558807373047, "logits/rejected": -1.0448769330978394, "logps/chosen": -90.15465545654297, "logps/rejected": -233.24017333984375, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -2.387511968612671, "rewards/margins": 12.50436019897461, "rewards/rejected": -14.89187240600586, "step": 998 }, { "epoch": 1.52, "learning_rate": 1.6668541595049724e-07, "logits/chosen": -1.2688748836517334, "logits/rejected": -1.2117234468460083, "logps/chosen": -74.76568603515625, "logps/rejected": -207.88917541503906, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -1.0294783115386963, "rewards/margins": 12.35433578491211, "rewards/rejected": -13.383813858032227, "step": 999 }, { "epoch": 1.52, "learning_rate": 1.6569769252206328e-07, "logits/chosen": -1.2145278453826904, "logits/rejected": -1.1683098077774048, "logps/chosen": -89.501953125, "logps/rejected": -212.30300903320312, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -1.642409324645996, "rewards/margins": 11.852131843566895, "rewards/rejected": -13.49454116821289, "step": 1000 }, { "epoch": 1.52, "learning_rate": 1.6471232270882883e-07, "logits/chosen": -1.2460477352142334, "logits/rejected": -1.2110188007354736, "logps/chosen": -73.52095031738281, "logps/rejected": -198.0554656982422, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -1.7613444328308105, "rewards/margins": 11.186271667480469, "rewards/rejected": -12.947614669799805, "step": 1001 }, { "epoch": 1.52, "learning_rate": 1.6372931344817214e-07, "logits/chosen": -1.0738894939422607, "logits/rejected": -0.957828938961029, "logps/chosen": -100.32572174072266, "logps/rejected": -272.6159362792969, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -2.615443706512451, "rewards/margins": 14.619828224182129, "rewards/rejected": -17.235271453857422, "step": 1002 }, { "epoch": 1.52, "learning_rate": 1.6274867166085105e-07, "logits/chosen": -1.0579982995986938, "logits/rejected": -0.90909343957901, "logps/chosen": -82.44677734375, "logps/rejected": -228.25204467773438, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": -1.4460911750793457, "rewards/margins": 13.131321907043457, "rewards/rejected": -14.577413558959961, "step": 1003 }, { "epoch": 1.53, "learning_rate": 1.6177040425095663e-07, "logits/chosen": -0.9436768889427185, "logits/rejected": -0.8684688806533813, "logps/chosen": -64.3193130493164, "logps/rejected": -185.3251953125, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -0.9520430564880371, "rewards/margins": 11.001495361328125, "rewards/rejected": -11.95353889465332, "step": 1004 }, { "epoch": 1.53, "learning_rate": 1.6079451810586276e-07, "logits/chosen": -1.2696284055709839, "logits/rejected": -1.1688989400863647, "logps/chosen": -83.22187805175781, "logps/rejected": -239.7696533203125, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.726318597793579, "rewards/margins": 13.411822319030762, "rewards/rejected": -15.138140678405762, "step": 1005 }, { "epoch": 1.53, "learning_rate": 1.5982102009617832e-07, "logits/chosen": -1.1807959079742432, "logits/rejected": -1.1397391557693481, "logps/chosen": -65.00617980957031, "logps/rejected": -173.939697265625, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -0.8103467226028442, "rewards/margins": 10.269601821899414, "rewards/rejected": -11.079949378967285, "step": 1006 }, { "epoch": 1.53, "learning_rate": 1.5884991707569945e-07, "logits/chosen": -1.0643589496612549, "logits/rejected": -1.0183255672454834, "logps/chosen": -80.94669342041016, "logps/rejected": -239.7604522705078, "loss": 0.0506, "rewards/accuracies": 1.0, "rewards/chosen": -1.8289381265640259, "rewards/margins": 13.630366325378418, "rewards/rejected": -15.45930290222168, "step": 1007 }, { "epoch": 1.53, "learning_rate": 1.5788121588135972e-07, "logits/chosen": -1.4639328718185425, "logits/rejected": -1.3777185678482056, "logps/chosen": -74.71759796142578, "logps/rejected": -170.49945068359375, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -1.5641496181488037, "rewards/margins": 8.775116920471191, "rewards/rejected": -10.339265823364258, "step": 1008 }, { "epoch": 1.53, "learning_rate": 1.5691492333318402e-07, "logits/chosen": -1.0215314626693726, "logits/rejected": -0.8911815285682678, "logps/chosen": -59.70145034790039, "logps/rejected": -179.14625549316406, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.588193416595459, "rewards/margins": 10.755075454711914, "rewards/rejected": -11.343268394470215, "step": 1009 }, { "epoch": 1.53, "learning_rate": 1.559510462342381e-07, "logits/chosen": -1.0152214765548706, "logits/rejected": -0.9721912741661072, "logps/chosen": -64.24443817138672, "logps/rejected": -162.21929931640625, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -1.1861426830291748, "rewards/margins": 9.191162109375, "rewards/rejected": -10.377304077148438, "step": 1010 }, { "epoch": 1.54, "learning_rate": 1.5498959137058339e-07, "logits/chosen": -1.160400152206421, "logits/rejected": -1.0179263353347778, "logps/chosen": -77.59323120117188, "logps/rejected": -207.6278533935547, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.159334659576416, "rewards/margins": 12.358423233032227, "rewards/rejected": -13.5177583694458, "step": 1011 }, { "epoch": 1.54, "learning_rate": 1.5403056551122694e-07, "logits/chosen": -0.99965500831604, "logits/rejected": -0.8801881074905396, "logps/chosen": -63.581539154052734, "logps/rejected": -166.3328857421875, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.1613078117370605, "rewards/margins": 9.462004661560059, "rewards/rejected": -10.623311996459961, "step": 1012 }, { "epoch": 1.54, "learning_rate": 1.530739754080751e-07, "logits/chosen": -1.1453322172164917, "logits/rejected": -1.1141060590744019, "logps/chosen": -69.9078369140625, "logps/rejected": -170.01576232910156, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.9004390239715576, "rewards/margins": 9.831196784973145, "rewards/rejected": -10.731636047363281, "step": 1013 }, { "epoch": 1.54, "learning_rate": 1.5211982779588534e-07, "logits/chosen": -1.238998293876648, "logits/rejected": -1.168033242225647, "logps/chosen": -44.39051055908203, "logps/rejected": -132.7353515625, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": 0.0709717869758606, "rewards/margins": 8.28502368927002, "rewards/rejected": -8.214052200317383, "step": 1014 }, { "epoch": 1.54, "learning_rate": 1.5116812939221962e-07, "logits/chosen": -1.2873257398605347, "logits/rejected": -1.162929892539978, "logps/chosen": -84.09979248046875, "logps/rejected": -211.19015502929688, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -2.2492868900299072, "rewards/margins": 10.819801330566406, "rewards/rejected": -13.069087028503418, "step": 1015 }, { "epoch": 1.54, "learning_rate": 1.5021888689739547e-07, "logits/chosen": -0.9515085816383362, "logits/rejected": -0.8473897576332092, "logps/chosen": -65.8515853881836, "logps/rejected": -222.41104125976562, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.4497348368167877, "rewards/margins": 13.304035186767578, "rewards/rejected": -13.753767013549805, "step": 1016 }, { "epoch": 1.55, "learning_rate": 1.4927210699444103e-07, "logits/chosen": -1.0177326202392578, "logits/rejected": -0.946979820728302, "logps/chosen": -101.52448272705078, "logps/rejected": -212.36209106445312, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -3.146263360977173, "rewards/margins": 10.454294204711914, "rewards/rejected": -13.600556373596191, "step": 1017 }, { "epoch": 1.55, "learning_rate": 1.4832779634904608e-07, "logits/chosen": -1.0603152513504028, "logits/rejected": -0.9408301711082458, "logps/chosen": -73.39222717285156, "logps/rejected": -208.1505126953125, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.2289427518844604, "rewards/margins": 12.148762702941895, "rewards/rejected": -13.377705574035645, "step": 1018 }, { "epoch": 1.55, "learning_rate": 1.4738596160951645e-07, "logits/chosen": -1.0486927032470703, "logits/rejected": -0.9102394580841064, "logps/chosen": -61.55266571044922, "logps/rejected": -188.68402099609375, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.9976965188980103, "rewards/margins": 11.183516502380371, "rewards/rejected": -12.18121337890625, "step": 1019 }, { "epoch": 1.55, "learning_rate": 1.4644660940672627e-07, "logits/chosen": -1.242224931716919, "logits/rejected": -1.1961392164230347, "logps/chosen": -61.681758880615234, "logps/rejected": -154.76602172851562, "loss": 0.0424, "rewards/accuracies": 1.0, "rewards/chosen": -1.1015557050704956, "rewards/margins": 8.659936904907227, "rewards/rejected": -9.761491775512695, "step": 1020 }, { "epoch": 1.55, "learning_rate": 1.455097463540717e-07, "logits/chosen": -1.018207311630249, "logits/rejected": -0.7073631286621094, "logps/chosen": -84.21482849121094, "logps/rejected": -262.6379699707031, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -2.0382328033447266, "rewards/margins": 14.893991470336914, "rewards/rejected": -16.932226181030273, "step": 1021 }, { "epoch": 1.55, "learning_rate": 1.445753790474245e-07, "logits/chosen": -1.041068196296692, "logits/rejected": -0.9955125451087952, "logps/chosen": -67.77099609375, "logps/rejected": -174.4007110595703, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.6224466562271118, "rewards/margins": 9.725898742675781, "rewards/rejected": -11.348344802856445, "step": 1022 }, { "epoch": 1.55, "learning_rate": 1.436435140650852e-07, "logits/chosen": -1.3501616716384888, "logits/rejected": -1.4464925527572632, "logps/chosen": -88.64131927490234, "logps/rejected": -190.63385009765625, "loss": 0.0155, "rewards/accuracies": 0.9375, "rewards/chosen": -2.100098133087158, "rewards/margins": 9.95816421508789, "rewards/rejected": -12.058262825012207, "step": 1023 }, { "epoch": 1.56, "learning_rate": 1.427141579677374e-07, "logits/chosen": -0.9608927369117737, "logits/rejected": -0.9767427444458008, "logps/chosen": -54.372188568115234, "logps/rejected": -143.82839965820312, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -0.326077938079834, "rewards/margins": 8.830038070678711, "rewards/rejected": -9.156116485595703, "step": 1024 }, { "epoch": 1.56, "learning_rate": 1.417873172984006e-07, "logits/chosen": -0.7643793225288391, "logits/rejected": -0.7025930881500244, "logps/chosen": -54.08414077758789, "logps/rejected": -185.32861328125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.43912678956985474, "rewards/margins": 12.152729988098145, "rewards/rejected": -12.591856956481934, "step": 1025 }, { "epoch": 1.56, "learning_rate": 1.408629985823857e-07, "logits/chosen": -1.1674954891204834, "logits/rejected": -1.1374739408493042, "logps/chosen": -76.21036529541016, "logps/rejected": -189.28546142578125, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -1.2749236822128296, "rewards/margins": 11.543883323669434, "rewards/rejected": -12.818806648254395, "step": 1026 }, { "epoch": 1.56, "learning_rate": 1.3994120832724677e-07, "logits/chosen": -1.1271106004714966, "logits/rejected": -1.1465332508087158, "logps/chosen": -67.489990234375, "logps/rejected": -147.8130645751953, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": -1.7015074491500854, "rewards/margins": 7.949741840362549, "rewards/rejected": -9.651248931884766, "step": 1027 }, { "epoch": 1.56, "learning_rate": 1.3902195302273778e-07, "logits/chosen": -1.2397160530090332, "logits/rejected": -1.264036774635315, "logps/chosen": -48.882606506347656, "logps/rejected": -148.37850952148438, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": 0.11703987419605255, "rewards/margins": 9.144170761108398, "rewards/rejected": -9.027130126953125, "step": 1028 }, { "epoch": 1.56, "learning_rate": 1.38105239140765e-07, "logits/chosen": -1.2493691444396973, "logits/rejected": -1.210587739944458, "logps/chosen": -59.75130844116211, "logps/rejected": -181.18606567382812, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -0.3405154347419739, "rewards/margins": 11.08544921875, "rewards/rejected": -11.425966262817383, "step": 1029 }, { "epoch": 1.56, "learning_rate": 1.3719107313534223e-07, "logits/chosen": -0.9990692734718323, "logits/rejected": -0.9542756080627441, "logps/chosen": -70.06559753417969, "logps/rejected": -171.16131591796875, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -1.5102357864379883, "rewards/margins": 8.921667098999023, "rewards/rejected": -10.431903839111328, "step": 1030 }, { "epoch": 1.57, "learning_rate": 1.362794614425452e-07, "logits/chosen": -1.1448475122451782, "logits/rejected": -1.093543529510498, "logps/chosen": -79.64584350585938, "logps/rejected": -173.28114318847656, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -1.9164886474609375, "rewards/margins": 8.729981422424316, "rewards/rejected": -10.646470069885254, "step": 1031 }, { "epoch": 1.57, "learning_rate": 1.3537041048046692e-07, "logits/chosen": -1.044460415840149, "logits/rejected": -1.036932349205017, "logps/chosen": -70.70366668701172, "logps/rejected": -182.9157257080078, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.5457505583763123, "rewards/margins": 11.093820571899414, "rewards/rejected": -11.639572143554688, "step": 1032 }, { "epoch": 1.57, "learning_rate": 1.344639266491708e-07, "logits/chosen": -1.442973017692566, "logits/rejected": -1.3494670391082764, "logps/chosen": -73.3709487915039, "logps/rejected": -219.76773071289062, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/chosen": -1.2042829990386963, "rewards/margins": 13.003188133239746, "rewards/rejected": -14.20747184753418, "step": 1033 }, { "epoch": 1.57, "learning_rate": 1.3356001633064761e-07, "logits/chosen": -1.2116889953613281, "logits/rejected": -1.123871088027954, "logps/chosen": -72.24166107177734, "logps/rejected": -189.9979248046875, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -1.5618987083435059, "rewards/margins": 10.573122024536133, "rewards/rejected": -12.135021209716797, "step": 1034 }, { "epoch": 1.57, "learning_rate": 1.32658685888769e-07, "logits/chosen": -1.1446404457092285, "logits/rejected": -1.1779420375823975, "logps/chosen": -79.43793487548828, "logps/rejected": -170.514404296875, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": -2.1790692806243896, "rewards/margins": 9.217103958129883, "rewards/rejected": -11.396173477172852, "step": 1035 }, { "epoch": 1.57, "learning_rate": 1.3175994166924392e-07, "logits/chosen": -1.1650694608688354, "logits/rejected": -1.0598572492599487, "logps/chosen": -65.18163299560547, "logps/rejected": -196.41819763183594, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.2281684875488281, "rewards/margins": 11.982068061828613, "rewards/rejected": -13.210236549377441, "step": 1036 }, { "epoch": 1.58, "learning_rate": 1.3086378999957276e-07, "logits/chosen": -1.1600673198699951, "logits/rejected": -1.049504041671753, "logps/chosen": -97.90890502929688, "logps/rejected": -248.05784606933594, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": -2.4427404403686523, "rewards/margins": 13.142251968383789, "rewards/rejected": -15.584993362426758, "step": 1037 }, { "epoch": 1.58, "learning_rate": 1.2997023718900352e-07, "logits/chosen": -0.8932716846466064, "logits/rejected": -0.770018458366394, "logps/chosen": -82.77156066894531, "logps/rejected": -231.54843139648438, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": -2.1219401359558105, "rewards/margins": 12.232475280761719, "rewards/rejected": -14.354415893554688, "step": 1038 }, { "epoch": 1.58, "learning_rate": 1.2907928952848773e-07, "logits/chosen": -1.12833833694458, "logits/rejected": -1.050106406211853, "logps/chosen": -55.28139114379883, "logps/rejected": -156.7615966796875, "loss": 0.0439, "rewards/accuracies": 1.0, "rewards/chosen": -0.4876255393028259, "rewards/margins": 9.004829406738281, "rewards/rejected": -9.492454528808594, "step": 1039 }, { "epoch": 1.58, "learning_rate": 1.2819095329063466e-07, "logits/chosen": -0.8341787457466125, "logits/rejected": -0.7984386682510376, "logps/chosen": -90.12042236328125, "logps/rejected": -218.397216796875, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -2.062849283218384, "rewards/margins": 11.312891960144043, "rewards/rejected": -13.375741004943848, "step": 1040 }, { "epoch": 1.58, "learning_rate": 1.2730523472966924e-07, "logits/chosen": -1.3362005949020386, "logits/rejected": -1.1511693000793457, "logps/chosen": -72.99873352050781, "logps/rejected": -211.25875854492188, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": -1.2961386442184448, "rewards/margins": 11.985052108764648, "rewards/rejected": -13.281190872192383, "step": 1041 }, { "epoch": 1.58, "learning_rate": 1.2642214008138642e-07, "logits/chosen": -1.3697905540466309, "logits/rejected": -1.3827335834503174, "logps/chosen": -80.32099151611328, "logps/rejected": -196.6895294189453, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/chosen": -2.268894910812378, "rewards/margins": 10.322794914245605, "rewards/rejected": -12.591690063476562, "step": 1042 }, { "epoch": 1.58, "learning_rate": 1.255416755631078e-07, "logits/chosen": -1.0065165758132935, "logits/rejected": -0.987860918045044, "logps/chosen": -58.82838439941406, "logps/rejected": -180.65725708007812, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -1.2405383586883545, "rewards/margins": 9.941941261291504, "rewards/rejected": -11.182479858398438, "step": 1043 }, { "epoch": 1.59, "learning_rate": 1.2466384737363779e-07, "logits/chosen": -1.1970065832138062, "logits/rejected": -1.130141258239746, "logps/chosen": -90.82254028320312, "logps/rejected": -208.41156005859375, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -1.5388426780700684, "rewards/margins": 11.361875534057617, "rewards/rejected": -12.900716781616211, "step": 1044 }, { "epoch": 1.59, "learning_rate": 1.2378866169322062e-07, "logits/chosen": -0.7908114790916443, "logits/rejected": -0.6253232955932617, "logps/chosen": -73.36567687988281, "logps/rejected": -178.9694061279297, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -1.912410855293274, "rewards/margins": 9.460705757141113, "rewards/rejected": -11.373116493225098, "step": 1045 }, { "epoch": 1.59, "learning_rate": 1.2291612468349554e-07, "logits/chosen": -1.3395980596542358, "logits/rejected": -1.2968026399612427, "logps/chosen": -49.36256408691406, "logps/rejected": -129.70005798339844, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -0.3536704182624817, "rewards/margins": 7.407713413238525, "rewards/rejected": -7.7613844871521, "step": 1046 }, { "epoch": 1.59, "learning_rate": 1.220462424874546e-07, "logits/chosen": -1.2717788219451904, "logits/rejected": -1.1471407413482666, "logps/chosen": -82.47772216796875, "logps/rejected": -215.64578247070312, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -1.8614273071289062, "rewards/margins": 11.546565055847168, "rewards/rejected": -13.407992362976074, "step": 1047 }, { "epoch": 1.59, "learning_rate": 1.211790212293986e-07, "logits/chosen": -1.333188772201538, "logits/rejected": -1.2694209814071655, "logps/chosen": -83.06002807617188, "logps/rejected": -161.90350341796875, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": -1.4736416339874268, "rewards/margins": 8.215384483337402, "rewards/rejected": -9.68902587890625, "step": 1048 }, { "epoch": 1.59, "learning_rate": 1.2031446701489478e-07, "logits/chosen": -1.0990818738937378, "logits/rejected": -0.9182289838790894, "logps/chosen": -101.83406829833984, "logps/rejected": -278.59649658203125, "loss": 0.0199, "rewards/accuracies": 0.9375, "rewards/chosen": -3.2921340465545654, "rewards/margins": 13.88831901550293, "rewards/rejected": -17.180452346801758, "step": 1049 }, { "epoch": 1.6, "learning_rate": 1.194525859307331e-07, "logits/chosen": -1.4561948776245117, "logits/rejected": -1.4749445915222168, "logps/chosen": -70.3445053100586, "logps/rejected": -168.951904296875, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -1.176849126815796, "rewards/margins": 9.659462928771973, "rewards/rejected": -10.836312294006348, "step": 1050 }, { "epoch": 1.6, "learning_rate": 1.1859338404488339e-07, "logits/chosen": -1.0425996780395508, "logits/rejected": -0.8686420321464539, "logps/chosen": -83.05058288574219, "logps/rejected": -254.65374755859375, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.5835621356964111, "rewards/margins": 15.090636253356934, "rewards/rejected": -16.6742000579834, "step": 1051 }, { "epoch": 1.6, "learning_rate": 1.1773686740645383e-07, "logits/chosen": -1.0283887386322021, "logits/rejected": -0.8973429799079895, "logps/chosen": -69.80270385742188, "logps/rejected": -225.36180114746094, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.8019357323646545, "rewards/margins": 12.729826927185059, "rewards/rejected": -13.531761169433594, "step": 1052 }, { "epoch": 1.6, "learning_rate": 1.1688304204564614e-07, "logits/chosen": -1.1241620779037476, "logits/rejected": -1.2368371486663818, "logps/chosen": -58.863121032714844, "logps/rejected": -160.85916137695312, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -0.7072696089744568, "rewards/margins": 9.642600059509277, "rewards/rejected": -10.349868774414062, "step": 1053 }, { "epoch": 1.6, "learning_rate": 1.1603191397371558e-07, "logits/chosen": -1.241661548614502, "logits/rejected": -1.1888474225997925, "logps/chosen": -77.02825927734375, "logps/rejected": -177.23056030273438, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.189224123954773, "rewards/margins": 9.070510864257812, "rewards/rejected": -10.259737014770508, "step": 1054 }, { "epoch": 1.6, "learning_rate": 1.1518348918292675e-07, "logits/chosen": -1.247529149055481, "logits/rejected": -1.127182960510254, "logps/chosen": -75.47073364257812, "logps/rejected": -201.03326416015625, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -1.533700942993164, "rewards/margins": 10.706246376037598, "rewards/rejected": -12.239948272705078, "step": 1055 }, { "epoch": 1.6, "learning_rate": 1.143377736465127e-07, "logits/chosen": -1.1370017528533936, "logits/rejected": -1.0267651081085205, "logps/chosen": -104.05082702636719, "logps/rejected": -227.39601135253906, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -3.871164321899414, "rewards/margins": 11.378036499023438, "rewards/rejected": -15.249199867248535, "step": 1056 }, { "epoch": 1.61, "learning_rate": 1.134947733186315e-07, "logits/chosen": -1.0723297595977783, "logits/rejected": -0.9450559616088867, "logps/chosen": -79.4171142578125, "logps/rejected": -208.52902221679688, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -1.940129280090332, "rewards/margins": 10.709497451782227, "rewards/rejected": -12.649626731872559, "step": 1057 }, { "epoch": 1.61, "learning_rate": 1.1265449413432598e-07, "logits/chosen": -1.2580574750900269, "logits/rejected": -1.2264395952224731, "logps/chosen": -71.53569030761719, "logps/rejected": -161.5540771484375, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -1.8972296714782715, "rewards/margins": 8.918797492980957, "rewards/rejected": -10.81602668762207, "step": 1058 }, { "epoch": 1.61, "learning_rate": 1.118169420094806e-07, "logits/chosen": -1.21419095993042, "logits/rejected": -1.1768602132797241, "logps/chosen": -63.22785568237305, "logps/rejected": -188.97410583496094, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -0.7150611877441406, "rewards/margins": 11.163619995117188, "rewards/rejected": -11.878681182861328, "step": 1059 }, { "epoch": 1.61, "learning_rate": 1.1098212284078035e-07, "logits/chosen": -0.8862547278404236, "logits/rejected": -0.7847481369972229, "logps/chosen": -57.296485900878906, "logps/rejected": -165.74615478515625, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -1.118424654006958, "rewards/margins": 9.22490406036377, "rewards/rejected": -10.343327522277832, "step": 1060 }, { "epoch": 1.61, "learning_rate": 1.1015004250566906e-07, "logits/chosen": -1.0470751523971558, "logits/rejected": -0.8667370676994324, "logps/chosen": -92.5534896850586, "logps/rejected": -254.15220642089844, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -2.8795552253723145, "rewards/margins": 12.72509479522705, "rewards/rejected": -15.60464859008789, "step": 1061 }, { "epoch": 1.61, "learning_rate": 1.093207068623086e-07, "logits/chosen": -1.079001784324646, "logits/rejected": -0.9033374786376953, "logps/chosen": -91.79341125488281, "logps/rejected": -251.64300537109375, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -2.8336005210876465, "rewards/margins": 13.242752075195312, "rewards/rejected": -16.076351165771484, "step": 1062 }, { "epoch": 1.61, "learning_rate": 1.0849412174953671e-07, "logits/chosen": -0.9910807013511658, "logits/rejected": -0.8828264474868774, "logps/chosen": -101.47459411621094, "logps/rejected": -259.4543762207031, "loss": 0.0363, "rewards/accuracies": 0.9375, "rewards/chosen": -3.174511671066284, "rewards/margins": 12.094611167907715, "rewards/rejected": -15.269123077392578, "step": 1063 }, { "epoch": 1.62, "learning_rate": 1.0767029298682639e-07, "logits/chosen": -1.1676360368728638, "logits/rejected": -1.095585584640503, "logps/chosen": -64.70406341552734, "logps/rejected": -168.17710876464844, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -1.0001543760299683, "rewards/margins": 9.639240264892578, "rewards/rejected": -10.639395713806152, "step": 1064 }, { "epoch": 1.62, "learning_rate": 1.0684922637424504e-07, "logits/chosen": -1.244273066520691, "logits/rejected": -1.1721899509429932, "logps/chosen": -85.20843505859375, "logps/rejected": -225.7948455810547, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": -2.6695289611816406, "rewards/margins": 11.740074157714844, "rewards/rejected": -14.409602165222168, "step": 1065 }, { "epoch": 1.62, "learning_rate": 1.060309276924135e-07, "logits/chosen": -1.1296035051345825, "logits/rejected": -1.0875864028930664, "logps/chosen": -66.95491027832031, "logps/rejected": -172.0052490234375, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": -0.7586946487426758, "rewards/margins": 9.80600357055664, "rewards/rejected": -10.564699172973633, "step": 1066 }, { "epoch": 1.62, "learning_rate": 1.0521540270246526e-07, "logits/chosen": -1.3834147453308105, "logits/rejected": -1.2829577922821045, "logps/chosen": -77.07857513427734, "logps/rejected": -214.35385131835938, "loss": 0.0488, "rewards/accuracies": 1.0, "rewards/chosen": -0.8441226482391357, "rewards/margins": 11.873616218566895, "rewards/rejected": -12.717740058898926, "step": 1067 }, { "epoch": 1.62, "learning_rate": 1.0440265714600571e-07, "logits/chosen": -1.1304280757904053, "logits/rejected": -1.0598454475402832, "logps/chosen": -70.51648712158203, "logps/rejected": -190.3502197265625, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -0.9114686250686646, "rewards/margins": 10.219488143920898, "rewards/rejected": -11.13095760345459, "step": 1068 }, { "epoch": 1.62, "learning_rate": 1.0359269674507271e-07, "logits/chosen": -1.2193621397018433, "logits/rejected": -1.2017300128936768, "logps/chosen": -96.5212173461914, "logps/rejected": -248.94580078125, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -1.753895878791809, "rewards/margins": 13.867959022521973, "rewards/rejected": -15.621854782104492, "step": 1069 }, { "epoch": 1.63, "learning_rate": 1.0278552720209449e-07, "logits/chosen": -1.0258492231369019, "logits/rejected": -0.9258626699447632, "logps/chosen": -64.7344970703125, "logps/rejected": -187.8410186767578, "loss": 0.0422, "rewards/accuracies": 1.0, "rewards/chosen": -1.0400198698043823, "rewards/margins": 11.164020538330078, "rewards/rejected": -12.204039573669434, "step": 1070 }, { "epoch": 1.63, "learning_rate": 1.0198115419985154e-07, "logits/chosen": -1.2533814907073975, "logits/rejected": -1.221890926361084, "logps/chosen": -59.12885284423828, "logps/rejected": -154.86053466796875, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.6402745246887207, "rewards/margins": 8.871824264526367, "rewards/rejected": -9.512099266052246, "step": 1071 }, { "epoch": 1.63, "learning_rate": 1.0117958340143506e-07, "logits/chosen": -1.2478764057159424, "logits/rejected": -1.072830080986023, "logps/chosen": -79.64214324951172, "logps/rejected": -229.7861785888672, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -2.148681879043579, "rewards/margins": 12.207650184631348, "rewards/rejected": -14.356331825256348, "step": 1072 }, { "epoch": 1.63, "learning_rate": 1.0038082045020824e-07, "logits/chosen": -1.1577354669570923, "logits/rejected": -1.1522295475006104, "logps/chosen": -69.2307357788086, "logps/rejected": -181.38600158691406, "loss": 0.0496, "rewards/accuracies": 1.0, "rewards/chosen": -1.3045772314071655, "rewards/margins": 10.574820518493652, "rewards/rejected": -11.879398345947266, "step": 1073 }, { "epoch": 1.63, "learning_rate": 9.958487096976504e-08, "logits/chosen": -1.1740790605545044, "logits/rejected": -1.0017123222351074, "logps/chosen": -49.28252029418945, "logps/rejected": -193.6754913330078, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.09034234285354614, "rewards/margins": 12.283891677856445, "rewards/rejected": -12.374235153198242, "step": 1074 }, { "epoch": 1.63, "learning_rate": 9.87917405638925e-08, "logits/chosen": -1.1445330381393433, "logits/rejected": -1.0902642011642456, "logps/chosen": -71.83496856689453, "logps/rejected": -209.97894287109375, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7673781514167786, "rewards/margins": 12.23061466217041, "rewards/rejected": -12.997993469238281, "step": 1075 }, { "epoch": 1.63, "learning_rate": 9.800143481652979e-08, "logits/chosen": -1.0803571939468384, "logits/rejected": -1.0840669870376587, "logps/chosen": -81.00335693359375, "logps/rejected": -198.65615844726562, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -1.196942687034607, "rewards/margins": 10.90712833404541, "rewards/rejected": -12.104071617126465, "step": 1076 }, { "epoch": 1.64, "learning_rate": 9.721395929172943e-08, "logits/chosen": -0.961615800857544, "logits/rejected": -0.988190770149231, "logps/chosen": -82.76693725585938, "logps/rejected": -191.41744995117188, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.2863515615463257, "rewards/margins": 11.099027633666992, "rewards/rejected": -12.38537883758545, "step": 1077 }, { "epoch": 1.64, "learning_rate": 9.642931953361805e-08, "logits/chosen": -1.0419639348983765, "logits/rejected": -1.0288581848144531, "logps/chosen": -85.21007537841797, "logps/rejected": -221.56063842773438, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.589748740196228, "rewards/margins": 12.904745101928711, "rewards/rejected": -14.49449348449707, "step": 1078 }, { "epoch": 1.64, "learning_rate": 9.564752106635781e-08, "logits/chosen": -0.9632241129875183, "logits/rejected": -0.9397358894348145, "logps/chosen": -65.1313705444336, "logps/rejected": -187.80287170410156, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.9772980213165283, "rewards/margins": 10.745587348937988, "rewards/rejected": -11.722885131835938, "step": 1079 }, { "epoch": 1.64, "learning_rate": 9.48685693941067e-08, "logits/chosen": -1.1339447498321533, "logits/rejected": -1.0799775123596191, "logps/chosen": -83.80926513671875, "logps/rejected": -219.28671264648438, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.5794214010238647, "rewards/margins": 12.531013488769531, "rewards/rejected": -14.110435485839844, "step": 1080 }, { "epoch": 1.64, "learning_rate": 9.409247000098009e-08, "logits/chosen": -1.3046482801437378, "logits/rejected": -1.250767469406128, "logps/chosen": -91.93920135498047, "logps/rejected": -236.53927612304688, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -2.001004457473755, "rewards/margins": 13.450885772705078, "rewards/rejected": -15.45189094543457, "step": 1081 }, { "epoch": 1.64, "learning_rate": 9.33192283510128e-08, "logits/chosen": -0.9359659552574158, "logits/rejected": -0.8699568510055542, "logps/chosen": -102.46256256103516, "logps/rejected": -237.10525512695312, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -3.088146448135376, "rewards/margins": 12.04832649230957, "rewards/rejected": -15.136472702026367, "step": 1082 }, { "epoch": 1.65, "learning_rate": 9.254884988811951e-08, "logits/chosen": -1.1467124223709106, "logits/rejected": -1.0064427852630615, "logps/chosen": -69.78543853759766, "logps/rejected": -175.60227966308594, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -0.6399104595184326, "rewards/margins": 10.142022132873535, "rewards/rejected": -10.78193187713623, "step": 1083 }, { "epoch": 1.65, "learning_rate": 9.17813400360572e-08, "logits/chosen": -1.236464500427246, "logits/rejected": -1.1323363780975342, "logps/chosen": -65.01455688476562, "logps/rejected": -166.62130737304688, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -1.599092960357666, "rewards/margins": 8.680694580078125, "rewards/rejected": -10.279787063598633, "step": 1084 }, { "epoch": 1.65, "learning_rate": 9.101670419838652e-08, "logits/chosen": -1.1149910688400269, "logits/rejected": -0.9636414647102356, "logps/chosen": -90.10865783691406, "logps/rejected": -251.3350067138672, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -2.6027493476867676, "rewards/margins": 13.645480155944824, "rewards/rejected": -16.24822998046875, "step": 1085 }, { "epoch": 1.65, "learning_rate": 9.025494775843456e-08, "logits/chosen": -1.1713343858718872, "logits/rejected": -1.116579294204712, "logps/chosen": -73.0295181274414, "logps/rejected": -194.81912231445312, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -1.4419115781784058, "rewards/margins": 11.159817695617676, "rewards/rejected": -12.601728439331055, "step": 1086 }, { "epoch": 1.65, "learning_rate": 8.949607607925541e-08, "logits/chosen": -0.9148304462432861, "logits/rejected": -0.8945529460906982, "logps/chosen": -68.20335388183594, "logps/rejected": -201.86953735351562, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.7701525688171387, "rewards/margins": 12.294547080993652, "rewards/rejected": -13.064699172973633, "step": 1087 }, { "epoch": 1.65, "learning_rate": 8.874009450359426e-08, "logits/chosen": -1.4068900346755981, "logits/rejected": -1.3015477657318115, "logps/chosen": -82.90198516845703, "logps/rejected": -216.49276733398438, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.9591162204742432, "rewards/margins": 11.598179817199707, "rewards/rejected": -13.557294845581055, "step": 1088 }, { "epoch": 1.65, "learning_rate": 8.798700835384842e-08, "logits/chosen": -1.3619306087493896, "logits/rejected": -1.400967001914978, "logps/chosen": -77.90013885498047, "logps/rejected": -219.6149139404297, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -1.714892864227295, "rewards/margins": 13.306224822998047, "rewards/rejected": -15.0211181640625, "step": 1089 }, { "epoch": 1.66, "learning_rate": 8.723682293203033e-08, "logits/chosen": -1.1045377254486084, "logits/rejected": -0.9827411770820618, "logps/chosen": -74.56683349609375, "logps/rejected": -211.0465087890625, "loss": 0.0203, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3548325300216675, "rewards/margins": 10.95444107055664, "rewards/rejected": -12.309274673461914, "step": 1090 }, { "epoch": 1.66, "learning_rate": 8.648954351973015e-08, "logits/chosen": -1.2276021242141724, "logits/rejected": -1.1442418098449707, "logps/chosen": -69.00041961669922, "logps/rejected": -185.90890502929688, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -0.6087831258773804, "rewards/margins": 11.31124210357666, "rewards/rejected": -11.920024871826172, "step": 1091 }, { "epoch": 1.66, "learning_rate": 8.574517537807896e-08, "logits/chosen": -1.1629654169082642, "logits/rejected": -1.0115498304367065, "logps/chosen": -67.43597412109375, "logps/rejected": -219.76559448242188, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -1.2888271808624268, "rewards/margins": 12.611084938049316, "rewards/rejected": -13.899911880493164, "step": 1092 }, { "epoch": 1.66, "learning_rate": 8.500372374771103e-08, "logits/chosen": -0.9780749082565308, "logits/rejected": -0.8621728420257568, "logps/chosen": -70.65216064453125, "logps/rejected": -178.79026794433594, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -1.0733259916305542, "rewards/margins": 10.05894660949707, "rewards/rejected": -11.132272720336914, "step": 1093 }, { "epoch": 1.66, "learning_rate": 8.426519384872732e-08, "logits/chosen": -1.104317307472229, "logits/rejected": -1.0625383853912354, "logps/chosen": -55.13422393798828, "logps/rejected": -145.50135803222656, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.2227928638458252, "rewards/margins": 8.60960865020752, "rewards/rejected": -9.832401275634766, "step": 1094 }, { "epoch": 1.66, "learning_rate": 8.352959088065903e-08, "logits/chosen": -1.2082970142364502, "logits/rejected": -1.2114728689193726, "logps/chosen": -86.84661865234375, "logps/rejected": -216.3503875732422, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -2.226700782775879, "rewards/margins": 11.329172134399414, "rewards/rejected": -13.555872917175293, "step": 1095 }, { "epoch": 1.67, "learning_rate": 8.279692002243028e-08, "logits/chosen": -1.2522339820861816, "logits/rejected": -1.1314581632614136, "logps/chosen": -71.96464538574219, "logps/rejected": -202.87396240234375, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -1.396548867225647, "rewards/margins": 11.046591758728027, "rewards/rejected": -12.443140029907227, "step": 1096 }, { "epoch": 1.67, "learning_rate": 8.206718643232207e-08, "logits/chosen": -1.1508793830871582, "logits/rejected": -1.076686143875122, "logps/chosen": -70.49022674560547, "logps/rejected": -211.82797241210938, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.335092544555664, "rewards/margins": 11.92380428314209, "rewards/rejected": -13.258896827697754, "step": 1097 }, { "epoch": 1.67, "learning_rate": 8.134039524793601e-08, "logits/chosen": -1.2321585416793823, "logits/rejected": -1.1257213354110718, "logps/chosen": -91.0789566040039, "logps/rejected": -230.8169403076172, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -2.274380922317505, "rewards/margins": 12.497110366821289, "rewards/rejected": -14.771492004394531, "step": 1098 }, { "epoch": 1.67, "learning_rate": 8.061655158615821e-08, "logits/chosen": -1.4044198989868164, "logits/rejected": -1.2616177797317505, "logps/chosen": -73.60513305664062, "logps/rejected": -224.2547607421875, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.9663087129592896, "rewards/margins": 12.21403694152832, "rewards/rejected": -14.180344581604004, "step": 1099 }, { "epoch": 1.67, "learning_rate": 7.989566054312286e-08, "logits/chosen": -1.2367472648620605, "logits/rejected": -1.132240891456604, "logps/chosen": -68.86763763427734, "logps/rejected": -181.22474670410156, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/chosen": -1.8581984043121338, "rewards/margins": 9.85179615020752, "rewards/rejected": -11.70999526977539, "step": 1100 }, { "epoch": 1.67, "learning_rate": 7.91777271941766e-08, "logits/chosen": -1.0721819400787354, "logits/rejected": -1.093714952468872, "logps/chosen": -72.05963134765625, "logps/rejected": -216.3887176513672, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -0.9113300442695618, "rewards/margins": 13.480352401733398, "rewards/rejected": -14.391682624816895, "step": 1101 }, { "epoch": 1.67, "learning_rate": 7.846275659384278e-08, "logits/chosen": -1.0708647966384888, "logits/rejected": -1.0220985412597656, "logps/chosen": -73.78739166259766, "logps/rejected": -198.44204711914062, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -1.0984382629394531, "rewards/margins": 12.221569061279297, "rewards/rejected": -13.320008277893066, "step": 1102 }, { "epoch": 1.68, "learning_rate": 7.775075377578633e-08, "logits/chosen": -1.2051002979278564, "logits/rejected": -1.12346613407135, "logps/chosen": -69.5119857788086, "logps/rejected": -186.70733642578125, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.6519767045974731, "rewards/margins": 10.11180591583252, "rewards/rejected": -11.763782501220703, "step": 1103 }, { "epoch": 1.68, "learning_rate": 7.70417237527769e-08, "logits/chosen": -1.1896698474884033, "logits/rejected": -1.0908278226852417, "logps/chosen": -66.09146881103516, "logps/rejected": -185.23272705078125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.2258577346801758, "rewards/margins": 10.83736515045166, "rewards/rejected": -12.063223838806152, "step": 1104 }, { "epoch": 1.68, "learning_rate": 7.63356715166556e-08, "logits/chosen": -1.2933841943740845, "logits/rejected": -1.330116629600525, "logps/chosen": -58.654380798339844, "logps/rejected": -125.71812438964844, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.5604220628738403, "rewards/margins": 6.564835548400879, "rewards/rejected": -7.12525749206543, "step": 1105 }, { "epoch": 1.68, "learning_rate": 7.563260203829808e-08, "logits/chosen": -0.9199784994125366, "logits/rejected": -0.8492215275764465, "logps/chosen": -66.62592315673828, "logps/rejected": -181.0642852783203, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.576340913772583, "rewards/margins": 10.062987327575684, "rewards/rejected": -11.639326095581055, "step": 1106 }, { "epoch": 1.68, "learning_rate": 7.49325202675805e-08, "logits/chosen": -1.0409053564071655, "logits/rejected": -0.9511809945106506, "logps/chosen": -58.40936279296875, "logps/rejected": -156.097412109375, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.83051997423172, "rewards/margins": 8.35158634185791, "rewards/rejected": -9.182106018066406, "step": 1107 }, { "epoch": 1.68, "learning_rate": 7.423543113334435e-08, "logits/chosen": -1.1387490034103394, "logits/rejected": -1.1770416498184204, "logps/chosen": -70.22821807861328, "logps/rejected": -193.7679443359375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.4090297818183899, "rewards/margins": 11.673224449157715, "rewards/rejected": -12.082253456115723, "step": 1108 }, { "epoch": 1.68, "learning_rate": 7.3541339543362e-08, "logits/chosen": -1.0563454627990723, "logits/rejected": -1.0912545919418335, "logps/chosen": -85.34751892089844, "logps/rejected": -231.6593017578125, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.5773392915725708, "rewards/margins": 13.65406608581543, "rewards/rejected": -15.231405258178711, "step": 1109 }, { "epoch": 1.69, "learning_rate": 7.285025038430171e-08, "logits/chosen": -0.9881210327148438, "logits/rejected": -0.99253249168396, "logps/chosen": -40.646854400634766, "logps/rejected": -143.0417938232422, "loss": 0.0363, "rewards/accuracies": 1.0, "rewards/chosen": 0.40884098410606384, "rewards/margins": 9.594618797302246, "rewards/rejected": -9.185776710510254, "step": 1110 }, { "epoch": 1.69, "learning_rate": 7.21621685216936e-08, "logits/chosen": -1.2693352699279785, "logits/rejected": -1.2725985050201416, "logps/chosen": -100.3951187133789, "logps/rejected": -227.0180206298828, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -2.245497226715088, "rewards/margins": 12.094511032104492, "rewards/rejected": -14.340007781982422, "step": 1111 }, { "epoch": 1.69, "learning_rate": 7.147709879989539e-08, "logits/chosen": -1.149064064025879, "logits/rejected": -1.0589556694030762, "logps/chosen": -77.8072509765625, "logps/rejected": -206.65069580078125, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -1.5425877571105957, "rewards/margins": 11.325196266174316, "rewards/rejected": -12.86778450012207, "step": 1112 }, { "epoch": 1.69, "learning_rate": 7.079504604205805e-08, "logits/chosen": -1.0684857368469238, "logits/rejected": -0.9011760950088501, "logps/chosen": -70.0149154663086, "logps/rejected": -208.98086547851562, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": -1.238356590270996, "rewards/margins": 11.44414234161377, "rewards/rejected": -12.68249797821045, "step": 1113 }, { "epoch": 1.69, "learning_rate": 7.011601505009196e-08, "logits/chosen": -1.1593823432922363, "logits/rejected": -1.0746428966522217, "logps/chosen": -71.18487548828125, "logps/rejected": -203.63807678222656, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.0665754079818726, "rewards/margins": 11.836189270019531, "rewards/rejected": -12.902764320373535, "step": 1114 }, { "epoch": 1.69, "learning_rate": 6.944001060463311e-08, "logits/chosen": -0.9904294610023499, "logits/rejected": -0.9577285647392273, "logps/chosen": -60.490360260009766, "logps/rejected": -159.77786254882812, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.0130398273468018, "rewards/margins": 8.982420921325684, "rewards/rejected": -9.995460510253906, "step": 1115 }, { "epoch": 1.7, "learning_rate": 6.876703746500984e-08, "logits/chosen": -1.1600037813186646, "logits/rejected": -1.0456393957138062, "logps/chosen": -50.8457145690918, "logps/rejected": -146.4720916748047, "loss": 0.0239, "rewards/accuracies": 0.875, "rewards/chosen": -0.8990418910980225, "rewards/margins": 8.573019981384277, "rewards/rejected": -9.472062110900879, "step": 1116 }, { "epoch": 1.7, "learning_rate": 6.809710036920818e-08, "logits/chosen": -1.0500625371932983, "logits/rejected": -1.011877179145813, "logps/chosen": -75.9246597290039, "logps/rejected": -194.84063720703125, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -0.75282883644104, "rewards/margins": 11.360499382019043, "rewards/rejected": -12.11332893371582, "step": 1117 }, { "epoch": 1.7, "learning_rate": 6.743020403383997e-08, "logits/chosen": -1.1439069509506226, "logits/rejected": -1.0360243320465088, "logps/chosen": -84.5653076171875, "logps/rejected": -237.63262939453125, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -1.4115619659423828, "rewards/margins": 13.395352363586426, "rewards/rejected": -14.806913375854492, "step": 1118 }, { "epoch": 1.7, "learning_rate": 6.676635315410855e-08, "logits/chosen": -1.220572829246521, "logits/rejected": -1.3106917142868042, "logps/chosen": -59.45014953613281, "logps/rejected": -133.43246459960938, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": -0.6084131002426147, "rewards/margins": 7.392858505249023, "rewards/rejected": -8.00127124786377, "step": 1119 }, { "epoch": 1.7, "learning_rate": 6.610555240377652e-08, "logits/chosen": -1.1922484636306763, "logits/rejected": -1.024374008178711, "logps/chosen": -75.49510955810547, "logps/rejected": -239.79537963867188, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.1918355226516724, "rewards/margins": 13.71989631652832, "rewards/rejected": -14.911730766296387, "step": 1120 }, { "epoch": 1.7, "learning_rate": 6.544780643513159e-08, "logits/chosen": -1.0872753858566284, "logits/rejected": -0.9683758020401001, "logps/chosen": -75.62218475341797, "logps/rejected": -247.2213592529297, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.1390029191970825, "rewards/margins": 15.164216995239258, "rewards/rejected": -16.303220748901367, "step": 1121 }, { "epoch": 1.7, "learning_rate": 6.479311987895558e-08, "logits/chosen": -1.113074779510498, "logits/rejected": -1.0629303455352783, "logps/chosen": -73.87572479248047, "logps/rejected": -216.52003479003906, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.9578240513801575, "rewards/margins": 12.23649787902832, "rewards/rejected": -13.19432258605957, "step": 1122 }, { "epoch": 1.71, "learning_rate": 6.414149734449037e-08, "logits/chosen": -1.2681244611740112, "logits/rejected": -1.3629964590072632, "logps/chosen": -83.48455810546875, "logps/rejected": -167.93890380859375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.735581874847412, "rewards/margins": 9.227690696716309, "rewards/rejected": -10.963273048400879, "step": 1123 }, { "epoch": 1.71, "learning_rate": 6.349294341940592e-08, "logits/chosen": -1.1680818796157837, "logits/rejected": -1.0755884647369385, "logps/chosen": -76.60441589355469, "logps/rejected": -218.47650146484375, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -1.6303492784500122, "rewards/margins": 11.845648765563965, "rewards/rejected": -13.475997924804688, "step": 1124 }, { "epoch": 1.71, "learning_rate": 6.284746266976832e-08, "logits/chosen": -1.0293267965316772, "logits/rejected": -1.013458251953125, "logps/chosen": -74.43067169189453, "logps/rejected": -183.85137939453125, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -2.1002659797668457, "rewards/margins": 9.507320404052734, "rewards/rejected": -11.607585906982422, "step": 1125 }, { "epoch": 1.71, "learning_rate": 6.220505964000716e-08, "logits/chosen": -1.0351016521453857, "logits/rejected": -0.8751451373100281, "logps/chosen": -87.07731628417969, "logps/rejected": -228.90122985839844, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.9597899913787842, "rewards/margins": 12.763002395629883, "rewards/rejected": -14.722792625427246, "step": 1126 }, { "epoch": 1.71, "learning_rate": 6.156573885288375e-08, "logits/chosen": -1.4418346881866455, "logits/rejected": -1.2614383697509766, "logps/chosen": -57.170623779296875, "logps/rejected": -194.32440185546875, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -0.6441372632980347, "rewards/margins": 10.866323471069336, "rewards/rejected": -11.510459899902344, "step": 1127 }, { "epoch": 1.71, "learning_rate": 6.092950480945897e-08, "logits/chosen": -1.3853294849395752, "logits/rejected": -1.2317214012145996, "logps/chosen": -58.2214469909668, "logps/rejected": -191.49949645996094, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.45231327414512634, "rewards/margins": 11.297001838684082, "rewards/rejected": -11.749314308166504, "step": 1128 }, { "epoch": 1.72, "learning_rate": 6.029636198906246e-08, "logits/chosen": -1.1470444202423096, "logits/rejected": -1.064419150352478, "logps/chosen": -82.69210815429688, "logps/rejected": -216.30880737304688, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.6808559894561768, "rewards/margins": 12.330316543579102, "rewards/rejected": -14.011174201965332, "step": 1129 }, { "epoch": 1.72, "learning_rate": 5.966631484925993e-08, "logits/chosen": -1.0834418535232544, "logits/rejected": -1.071006178855896, "logps/chosen": -87.85391998291016, "logps/rejected": -242.10052490234375, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -1.9494872093200684, "rewards/margins": 14.41417121887207, "rewards/rejected": -16.363658905029297, "step": 1130 }, { "epoch": 1.72, "learning_rate": 5.9039367825822526e-08, "logits/chosen": -1.2992407083511353, "logits/rejected": -1.2516363859176636, "logps/chosen": -83.01346588134766, "logps/rejected": -220.72406005859375, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -1.455176830291748, "rewards/margins": 12.305998802185059, "rewards/rejected": -13.761176109313965, "step": 1131 }, { "epoch": 1.72, "learning_rate": 5.8415525332695334e-08, "logits/chosen": -1.2400323152542114, "logits/rejected": -1.147408366203308, "logps/chosen": -70.5896224975586, "logps/rejected": -168.30020141601562, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.8818989992141724, "rewards/margins": 7.773576736450195, "rewards/rejected": -9.655476570129395, "step": 1132 }, { "epoch": 1.72, "learning_rate": 5.7794791761966664e-08, "logits/chosen": -1.2419061660766602, "logits/rejected": -1.086823582649231, "logps/chosen": -75.44318389892578, "logps/rejected": -233.98483276367188, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -1.3124146461486816, "rewards/margins": 13.510801315307617, "rewards/rejected": -14.823214530944824, "step": 1133 }, { "epoch": 1.72, "learning_rate": 5.717717148383616e-08, "logits/chosen": -0.999576210975647, "logits/rejected": -0.9369627833366394, "logps/chosen": -82.40902709960938, "logps/rejected": -209.4818572998047, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -3.0064735412597656, "rewards/margins": 10.635801315307617, "rewards/rejected": -13.642273902893066, "step": 1134 }, { "epoch": 1.72, "learning_rate": 5.6562668846585504e-08, "logits/chosen": -1.1111935377120972, "logits/rejected": -1.075188159942627, "logps/chosen": -64.33235931396484, "logps/rejected": -160.79803466796875, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -1.1467089653015137, "rewards/margins": 9.329978942871094, "rewards/rejected": -10.476689338684082, "step": 1135 }, { "epoch": 1.73, "learning_rate": 5.595128817654637e-08, "logits/chosen": -1.1287126541137695, "logits/rejected": -1.0575056076049805, "logps/chosen": -65.08252716064453, "logps/rejected": -173.18484497070312, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.643226981163025, "rewards/margins": 9.848438262939453, "rewards/rejected": -11.491665840148926, "step": 1136 }, { "epoch": 1.73, "learning_rate": 5.53430337780712e-08, "logits/chosen": -1.230764389038086, "logits/rejected": -1.082259178161621, "logps/chosen": -75.74681854248047, "logps/rejected": -228.3786163330078, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -1.030249834060669, "rewards/margins": 13.466056823730469, "rewards/rejected": -14.496305465698242, "step": 1137 }, { "epoch": 1.73, "learning_rate": 5.473790993350152e-08, "logits/chosen": -1.206829309463501, "logits/rejected": -1.0737767219543457, "logps/chosen": -64.42081451416016, "logps/rejected": -179.85597229003906, "loss": 0.014, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1375069618225098, "rewards/margins": 10.590378761291504, "rewards/rejected": -11.727885246276855, "step": 1138 }, { "epoch": 1.73, "learning_rate": 5.413592090313929e-08, "logits/chosen": -1.1952977180480957, "logits/rejected": -1.1582542657852173, "logps/chosen": -74.84733581542969, "logps/rejected": -187.56007385253906, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -1.404332160949707, "rewards/margins": 10.64260482788086, "rewards/rejected": -12.04693603515625, "step": 1139 }, { "epoch": 1.73, "learning_rate": 5.353707092521581e-08, "logits/chosen": -1.2151238918304443, "logits/rejected": -1.201400637626648, "logps/chosen": -78.33900451660156, "logps/rejected": -170.19369506835938, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.1412068605422974, "rewards/margins": 9.702839851379395, "rewards/rejected": -10.844047546386719, "step": 1140 }, { "epoch": 1.73, "learning_rate": 5.294136421586226e-08, "logits/chosen": -1.2093555927276611, "logits/rejected": -1.1871004104614258, "logps/chosen": -73.68685913085938, "logps/rejected": -179.29037475585938, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -1.404720425605774, "rewards/margins": 9.682507514953613, "rewards/rejected": -11.087226867675781, "step": 1141 }, { "epoch": 1.73, "learning_rate": 5.234880496908028e-08, "logits/chosen": -0.8909991979598999, "logits/rejected": -0.8302620649337769, "logps/chosen": -81.54849243164062, "logps/rejected": -219.3596649169922, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.5307843685150146, "rewards/margins": 12.892635345458984, "rewards/rejected": -14.423418045043945, "step": 1142 }, { "epoch": 1.74, "learning_rate": 5.175939735671186e-08, "logits/chosen": -0.9654414057731628, "logits/rejected": -0.8610618114471436, "logps/chosen": -61.41965866088867, "logps/rejected": -190.81536865234375, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.285893201828003, "rewards/margins": 11.655067443847656, "rewards/rejected": -12.940960884094238, "step": 1143 }, { "epoch": 1.74, "learning_rate": 5.1173145528410513e-08, "logits/chosen": -1.44536554813385, "logits/rejected": -1.3629040718078613, "logps/chosen": -68.13104248046875, "logps/rejected": -177.6975555419922, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.9723241329193115, "rewards/margins": 10.045774459838867, "rewards/rejected": -11.018098831176758, "step": 1144 }, { "epoch": 1.74, "learning_rate": 5.059005361161156e-08, "logits/chosen": -1.289172649383545, "logits/rejected": -1.2248356342315674, "logps/chosen": -86.52054595947266, "logps/rejected": -204.8133087158203, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -1.5036221742630005, "rewards/margins": 10.382984161376953, "rewards/rejected": -11.886605262756348, "step": 1145 }, { "epoch": 1.74, "learning_rate": 5.001012571150382e-08, "logits/chosen": -1.1975736618041992, "logits/rejected": -1.1259053945541382, "logps/chosen": -63.912940979003906, "logps/rejected": -180.1395263671875, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -1.7342360019683838, "rewards/margins": 10.14150619506836, "rewards/rejected": -11.875743865966797, "step": 1146 }, { "epoch": 1.74, "learning_rate": 4.943336591099978e-08, "logits/chosen": -1.1472722291946411, "logits/rejected": -1.1231780052185059, "logps/chosen": -49.86153793334961, "logps/rejected": -123.01583099365234, "loss": 0.0163, "rewards/accuracies": 0.9375, "rewards/chosen": -0.19975323975086212, "rewards/margins": 7.285927772521973, "rewards/rejected": -7.485680103302002, "step": 1147 }, { "epoch": 1.74, "learning_rate": 4.885977827070747e-08, "logits/chosen": -1.2718511819839478, "logits/rejected": -1.2030029296875, "logps/chosen": -71.72444915771484, "logps/rejected": -181.01756286621094, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -1.0611457824707031, "rewards/margins": 9.881611824035645, "rewards/rejected": -10.942757606506348, "step": 1148 }, { "epoch": 1.75, "learning_rate": 4.828936682890161e-08, "logits/chosen": -1.3181617259979248, "logits/rejected": -1.2724326848983765, "logps/chosen": -92.13249206542969, "logps/rejected": -233.87696838378906, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.3504269123077393, "rewards/margins": 12.301064491271973, "rewards/rejected": -14.651491165161133, "step": 1149 }, { "epoch": 1.75, "learning_rate": 4.772213560149568e-08, "logits/chosen": -1.194939136505127, "logits/rejected": -1.1765731573104858, "logps/chosen": -91.87400817871094, "logps/rejected": -207.25135803222656, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -2.496976852416992, "rewards/margins": 10.776562690734863, "rewards/rejected": -13.273541450500488, "step": 1150 }, { "epoch": 1.75, "learning_rate": 4.715808858201254e-08, "logits/chosen": -1.073747992515564, "logits/rejected": -0.9684121608734131, "logps/chosen": -70.1568603515625, "logps/rejected": -192.5471954345703, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.241762399673462, "rewards/margins": 11.266307830810547, "rewards/rejected": -12.50806999206543, "step": 1151 }, { "epoch": 1.75, "learning_rate": 4.6597229741557666e-08, "logits/chosen": -1.3962565660476685, "logits/rejected": -1.2379064559936523, "logps/chosen": -95.88238525390625, "logps/rejected": -254.2093505859375, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -2.0294032096862793, "rewards/margins": 13.60114860534668, "rewards/rejected": -15.630552291870117, "step": 1152 }, { "epoch": 1.75, "learning_rate": 4.603956302879025e-08, "logits/chosen": -1.119269847869873, "logits/rejected": -1.151871681213379, "logps/chosen": -55.78212356567383, "logps/rejected": -155.22653198242188, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -1.0021774768829346, "rewards/margins": 9.199004173278809, "rewards/rejected": -10.201181411743164, "step": 1153 }, { "epoch": 1.75, "learning_rate": 4.548509236989562e-08, "logits/chosen": -1.0740209817886353, "logits/rejected": -1.0154675245285034, "logps/chosen": -56.02338409423828, "logps/rejected": -152.66915893554688, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -0.5767076015472412, "rewards/margins": 9.372485160827637, "rewards/rejected": -9.949193000793457, "step": 1154 }, { "epoch": 1.75, "learning_rate": 4.4933821668557914e-08, "logits/chosen": -1.001431941986084, "logits/rejected": -0.9505981206893921, "logps/chosen": -68.77754974365234, "logps/rejected": -164.1874542236328, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -1.1711878776550293, "rewards/margins": 8.695853233337402, "rewards/rejected": -9.867039680480957, "step": 1155 }, { "epoch": 1.76, "learning_rate": 4.4385754805932095e-08, "logits/chosen": -1.2005764245986938, "logits/rejected": -1.1242300271987915, "logps/chosen": -76.149658203125, "logps/rejected": -185.9651336669922, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.9307671785354614, "rewards/margins": 9.996943473815918, "rewards/rejected": -11.927711486816406, "step": 1156 }, { "epoch": 1.76, "learning_rate": 4.384089564061727e-08, "logits/chosen": -1.205551028251648, "logits/rejected": -1.0542964935302734, "logps/chosen": -87.11665344238281, "logps/rejected": -245.56309509277344, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.1137125492095947, "rewards/margins": 13.458393096923828, "rewards/rejected": -15.57210636138916, "step": 1157 }, { "epoch": 1.76, "learning_rate": 4.3299248008628495e-08, "logits/chosen": -1.346709132194519, "logits/rejected": -1.3536221981048584, "logps/chosen": -71.81742858886719, "logps/rejected": -193.46884155273438, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.651132345199585, "rewards/margins": 11.359602928161621, "rewards/rejected": -13.010734558105469, "step": 1158 }, { "epoch": 1.76, "learning_rate": 4.276081572337109e-08, "logits/chosen": -1.2646945714950562, "logits/rejected": -1.179612398147583, "logps/chosen": -61.9714469909668, "logps/rejected": -169.14645385742188, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.9661446809768677, "rewards/margins": 9.379840850830078, "rewards/rejected": -10.345985412597656, "step": 1159 }, { "epoch": 1.76, "learning_rate": 4.222560257561275e-08, "logits/chosen": -0.9648147225379944, "logits/rejected": -1.0111454725265503, "logps/chosen": -108.44926452636719, "logps/rejected": -273.5411071777344, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -2.9361164569854736, "rewards/margins": 14.968153953552246, "rewards/rejected": -17.90427017211914, "step": 1160 }, { "epoch": 1.76, "learning_rate": 4.169361233345725e-08, "logits/chosen": -0.99238121509552, "logits/rejected": -0.9608970880508423, "logps/chosen": -41.922630310058594, "logps/rejected": -153.88870239257812, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": 0.029361620545387268, "rewards/margins": 10.260735511779785, "rewards/rejected": -10.231374740600586, "step": 1161 }, { "epoch": 1.77, "learning_rate": 4.116484874231785e-08, "logits/chosen": -1.1681238412857056, "logits/rejected": -1.0290806293487549, "logps/chosen": -112.41152954101562, "logps/rejected": -255.91331481933594, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -4.348160743713379, "rewards/margins": 12.28393268585205, "rewards/rejected": -16.63209342956543, "step": 1162 }, { "epoch": 1.77, "learning_rate": 4.0639315524891304e-08, "logits/chosen": -0.9522886276245117, "logits/rejected": -0.9439799189567566, "logps/chosen": -68.50711822509766, "logps/rejected": -158.6794891357422, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -0.7130775451660156, "rewards/margins": 9.161654472351074, "rewards/rejected": -9.874731063842773, "step": 1163 }, { "epoch": 1.77, "learning_rate": 4.0117016381130634e-08, "logits/chosen": -1.0935475826263428, "logits/rejected": -0.9440351128578186, "logps/chosen": -112.45979309082031, "logps/rejected": -252.8640594482422, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -4.46082878112793, "rewards/margins": 12.47606372833252, "rewards/rejected": -16.936891555786133, "step": 1164 }, { "epoch": 1.77, "learning_rate": 3.959795498822055e-08, "logits/chosen": -1.3229010105133057, "logits/rejected": -1.2653900384902954, "logps/chosen": -63.48313522338867, "logps/rejected": -165.02943420410156, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.9819265604019165, "rewards/margins": 8.870457649230957, "rewards/rejected": -9.852384567260742, "step": 1165 }, { "epoch": 1.77, "learning_rate": 3.9082135000550074e-08, "logits/chosen": -1.1480696201324463, "logits/rejected": -1.058894395828247, "logps/chosen": -76.19339752197266, "logps/rejected": -244.94894409179688, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.2183040380477905, "rewards/margins": 14.837517738342285, "rewards/rejected": -16.05582046508789, "step": 1166 }, { "epoch": 1.77, "learning_rate": 3.856956004968803e-08, "logits/chosen": -1.290866494178772, "logits/rejected": -1.3397711515426636, "logps/chosen": -90.19656372070312, "logps/rejected": -180.97918701171875, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -3.1676547527313232, "rewards/margins": 8.439589500427246, "rewards/rejected": -11.607244491577148, "step": 1167 }, { "epoch": 1.77, "learning_rate": 3.806023374435663e-08, "logits/chosen": -1.1712409257888794, "logits/rejected": -1.1349647045135498, "logps/chosen": -90.69806671142578, "logps/rejected": -242.47714233398438, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.2193498611450195, "rewards/margins": 13.698222160339355, "rewards/rejected": -15.917571067810059, "step": 1168 }, { "epoch": 1.78, "learning_rate": 3.7554159670406535e-08, "logits/chosen": -1.2209597826004028, "logits/rejected": -1.041914701461792, "logps/chosen": -82.41712951660156, "logps/rejected": -221.45584106445312, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -2.1218905448913574, "rewards/margins": 12.437333106994629, "rewards/rejected": -14.559225082397461, "step": 1169 }, { "epoch": 1.78, "learning_rate": 3.705134139079136e-08, "logits/chosen": -1.0915639400482178, "logits/rejected": -0.9363446235656738, "logps/chosen": -62.273983001708984, "logps/rejected": -199.3795623779297, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.6483842730522156, "rewards/margins": 12.303706169128418, "rewards/rejected": -12.9520902633667, "step": 1170 }, { "epoch": 1.78, "learning_rate": 3.655178244554269e-08, "logits/chosen": -1.1844284534454346, "logits/rejected": -1.2165160179138184, "logps/chosen": -66.18008422851562, "logps/rejected": -159.49417114257812, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": -1.4386379718780518, "rewards/margins": 9.355382919311523, "rewards/rejected": -10.794020652770996, "step": 1171 }, { "epoch": 1.78, "learning_rate": 3.6055486351745324e-08, "logits/chosen": -1.2054702043533325, "logits/rejected": -1.1438997983932495, "logps/chosen": -80.12940216064453, "logps/rejected": -199.4840545654297, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.3853845596313477, "rewards/margins": 11.196819305419922, "rewards/rejected": -12.58220386505127, "step": 1172 }, { "epoch": 1.78, "learning_rate": 3.55624566035122e-08, "logits/chosen": -1.2264925241470337, "logits/rejected": -1.1079483032226562, "logps/chosen": -86.93685150146484, "logps/rejected": -231.08782958984375, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -2.1986989974975586, "rewards/margins": 12.410072326660156, "rewards/rejected": -14.608769416809082, "step": 1173 }, { "epoch": 1.78, "learning_rate": 3.507269667196005e-08, "logits/chosen": -1.0240446329116821, "logits/rejected": -0.9470053315162659, "logps/chosen": -52.67414093017578, "logps/rejected": -150.68316650390625, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": 0.0014339275658130646, "rewards/margins": 9.25440788269043, "rewards/rejected": -9.252973556518555, "step": 1174 }, { "epoch": 1.79, "learning_rate": 3.458621000518458e-08, "logits/chosen": -1.119564175605774, "logits/rejected": -1.0477330684661865, "logps/chosen": -73.17056274414062, "logps/rejected": -179.02903747558594, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -1.6565520763397217, "rewards/margins": 9.676838874816895, "rewards/rejected": -11.333391189575195, "step": 1175 }, { "epoch": 1.79, "learning_rate": 3.4103000028236906e-08, "logits/chosen": -1.2833119630813599, "logits/rejected": -1.1382286548614502, "logps/chosen": -79.15277862548828, "logps/rejected": -245.55087280273438, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.2422773838043213, "rewards/margins": 13.866373062133789, "rewards/rejected": -15.108651161193848, "step": 1176 }, { "epoch": 1.79, "learning_rate": 3.362307014309868e-08, "logits/chosen": -1.1669962406158447, "logits/rejected": -1.1485800743103027, "logps/chosen": -77.07050323486328, "logps/rejected": -199.91168212890625, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -1.5542712211608887, "rewards/margins": 11.39111328125, "rewards/rejected": -12.945383071899414, "step": 1177 }, { "epoch": 1.79, "learning_rate": 3.31464237286585e-08, "logits/chosen": -0.9818435907363892, "logits/rejected": -0.9378212690353394, "logps/chosen": -79.21688079833984, "logps/rejected": -173.23800659179688, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.5746930837631226, "rewards/margins": 9.1552095413208, "rewards/rejected": -10.729902267456055, "step": 1178 }, { "epoch": 1.79, "learning_rate": 3.26730641406881e-08, "logits/chosen": -1.07041335105896, "logits/rejected": -1.0159380435943604, "logps/chosen": -66.49639129638672, "logps/rejected": -174.3088836669922, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.8144354820251465, "rewards/margins": 9.70686149597168, "rewards/rejected": -10.521297454833984, "step": 1179 }, { "epoch": 1.79, "learning_rate": 3.220299471181898e-08, "logits/chosen": -1.0575075149536133, "logits/rejected": -0.9967933893203735, "logps/chosen": -54.594669342041016, "logps/rejected": -144.82998657226562, "loss": 0.035, "rewards/accuracies": 0.9375, "rewards/chosen": -0.07825981825590134, "rewards/margins": 8.32921314239502, "rewards/rejected": -8.407472610473633, "step": 1180 }, { "epoch": 1.79, "learning_rate": 3.173621875151811e-08, "logits/chosen": -1.2280124425888062, "logits/rejected": -1.182820439338684, "logps/chosen": -94.21824645996094, "logps/rejected": -223.11990356445312, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -2.3519139289855957, "rewards/margins": 12.459220886230469, "rewards/rejected": -14.811135292053223, "step": 1181 }, { "epoch": 1.8, "learning_rate": 3.127273954606574e-08, "logits/chosen": -1.0089362859725952, "logits/rejected": -0.96539306640625, "logps/chosen": -80.09423065185547, "logps/rejected": -173.8900909423828, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -2.111353874206543, "rewards/margins": 8.772262573242188, "rewards/rejected": -10.88361644744873, "step": 1182 }, { "epoch": 1.8, "learning_rate": 3.0812560358531356e-08, "logits/chosen": -1.041933536529541, "logits/rejected": -1.019261360168457, "logps/chosen": -63.75693130493164, "logps/rejected": -154.80682373046875, "loss": 0.0314, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1399602890014648, "rewards/margins": 8.418811798095703, "rewards/rejected": -9.558772087097168, "step": 1183 }, { "epoch": 1.8, "learning_rate": 3.035568442875136e-08, "logits/chosen": -1.0853257179260254, "logits/rejected": -0.9606943130493164, "logps/chosen": -90.47114562988281, "logps/rejected": -224.79037475585938, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": -1.9242072105407715, "rewards/margins": 11.84377384185791, "rewards/rejected": -13.76798152923584, "step": 1184 }, { "epoch": 1.8, "learning_rate": 2.990211497330569e-08, "logits/chosen": -1.3671926259994507, "logits/rejected": -1.3777002096176147, "logps/chosen": -90.4779281616211, "logps/rejected": -193.55160522460938, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": -1.6756185293197632, "rewards/margins": 10.614287376403809, "rewards/rejected": -12.28990650177002, "step": 1185 }, { "epoch": 1.8, "learning_rate": 2.9451855185495532e-08, "logits/chosen": -0.9833270311355591, "logits/rejected": -0.930718719959259, "logps/chosen": -58.677024841308594, "logps/rejected": -166.68373107910156, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.30341848731040955, "rewards/margins": 10.041521072387695, "rewards/rejected": -10.344938278198242, "step": 1186 }, { "epoch": 1.8, "learning_rate": 2.9004908235320924e-08, "logits/chosen": -1.1783056259155273, "logits/rejected": -1.1107795238494873, "logps/chosen": -63.97560501098633, "logps/rejected": -172.8702850341797, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -0.8006558418273926, "rewards/margins": 9.48755168914795, "rewards/rejected": -10.2882080078125, "step": 1187 }, { "epoch": 1.8, "learning_rate": 2.8561277269457895e-08, "logits/chosen": -0.9910872578620911, "logits/rejected": -0.9650610685348511, "logps/chosen": -75.24369812011719, "logps/rejected": -183.6258544921875, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -2.5646166801452637, "rewards/margins": 9.786337852478027, "rewards/rejected": -12.350954055786133, "step": 1188 }, { "epoch": 1.81, "learning_rate": 2.8120965411237152e-08, "logits/chosen": -0.8922029137611389, "logits/rejected": -0.8234670162200928, "logps/chosen": -55.13329315185547, "logps/rejected": -152.74169921875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.9872815608978271, "rewards/margins": 9.220758438110352, "rewards/rejected": -10.208039283752441, "step": 1189 }, { "epoch": 1.81, "learning_rate": 2.768397576062126e-08, "logits/chosen": -1.1451364755630493, "logits/rejected": -0.9374922513961792, "logps/chosen": -97.60246276855469, "logps/rejected": -283.8183288574219, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -2.52813982963562, "rewards/margins": 14.909027099609375, "rewards/rejected": -17.437166213989258, "step": 1190 }, { "epoch": 1.81, "learning_rate": 2.7250311394183378e-08, "logits/chosen": -1.03672194480896, "logits/rejected": -0.9891068339347839, "logps/chosen": -84.0450210571289, "logps/rejected": -193.334716796875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.645227313041687, "rewards/margins": 10.940584182739258, "rewards/rejected": -12.585811614990234, "step": 1191 }, { "epoch": 1.81, "learning_rate": 2.6819975365085236e-08, "logits/chosen": -1.0003464221954346, "logits/rejected": -0.9259211421012878, "logps/chosen": -62.66392135620117, "logps/rejected": -171.12786865234375, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -0.6360028386116028, "rewards/margins": 10.064746856689453, "rewards/rejected": -10.700749397277832, "step": 1192 }, { "epoch": 1.81, "learning_rate": 2.6392970703056018e-08, "logits/chosen": -1.2499942779541016, "logits/rejected": -1.0891692638397217, "logps/chosen": -59.001625061035156, "logps/rejected": -193.19635009765625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.1801376342773438, "rewards/margins": 10.908477783203125, "rewards/rejected": -12.088614463806152, "step": 1193 }, { "epoch": 1.81, "learning_rate": 2.596930041437062e-08, "logits/chosen": -1.2095203399658203, "logits/rejected": -1.0741009712219238, "logps/chosen": -54.60664749145508, "logps/rejected": -171.01107788085938, "loss": 0.0148, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6850531101226807, "rewards/margins": 10.063095092773438, "rewards/rejected": -10.748147964477539, "step": 1194 }, { "epoch": 1.82, "learning_rate": 2.554896748182883e-08, "logits/chosen": -1.0899300575256348, "logits/rejected": -1.0272502899169922, "logps/chosen": -49.843116760253906, "logps/rejected": -144.82833862304688, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 0.2501094341278076, "rewards/margins": 9.574674606323242, "rewards/rejected": -9.324565887451172, "step": 1195 }, { "epoch": 1.82, "learning_rate": 2.513197486473406e-08, "logits/chosen": -1.2003647089004517, "logits/rejected": -1.0822029113769531, "logps/chosen": -77.27375793457031, "logps/rejected": -232.38385009765625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.034325122833252, "rewards/margins": 13.187532424926758, "rewards/rejected": -15.221856117248535, "step": 1196 }, { "epoch": 1.82, "learning_rate": 2.471832549887276e-08, "logits/chosen": -1.4846813678741455, "logits/rejected": -1.3707112073898315, "logps/chosen": -84.6400375366211, "logps/rejected": -253.81854248046875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.6491315364837646, "rewards/margins": 15.211004257202148, "rewards/rejected": -16.860137939453125, "step": 1197 }, { "epoch": 1.82, "learning_rate": 2.430802229649348e-08, "logits/chosen": -1.04225754737854, "logits/rejected": -1.037789225578308, "logps/chosen": -79.18376159667969, "logps/rejected": -171.23422241210938, "loss": 0.0199, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4485316276550293, "rewards/margins": 8.454278945922852, "rewards/rejected": -10.902810096740723, "step": 1198 }, { "epoch": 1.82, "learning_rate": 2.390106814628662e-08, "logits/chosen": -0.9821594953536987, "logits/rejected": -0.9214787483215332, "logps/chosen": -75.97613525390625, "logps/rejected": -203.15115356445312, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.9587711095809937, "rewards/margins": 11.724018096923828, "rewards/rejected": -12.68278980255127, "step": 1199 }, { "epoch": 1.82, "learning_rate": 2.3497465913364046e-08, "logits/chosen": -1.104460597038269, "logits/rejected": -1.038111686706543, "logps/chosen": -89.49078369140625, "logps/rejected": -228.0952911376953, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.722094178199768, "rewards/margins": 12.232990264892578, "rewards/rejected": -13.955083847045898, "step": 1200 }, { "epoch": 1.82, "learning_rate": 2.309721843923851e-08, "logits/chosen": -1.0601420402526855, "logits/rejected": -0.9911051392555237, "logps/chosen": -74.96670532226562, "logps/rejected": -190.51840209960938, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.3472012281417847, "rewards/margins": 11.10965347290039, "rewards/rejected": -12.456853866577148, "step": 1201 }, { "epoch": 1.83, "learning_rate": 2.2700328541804426e-08, "logits/chosen": -1.3039923906326294, "logits/rejected": -1.2911025285720825, "logps/chosen": -108.84371185302734, "logps/rejected": -235.702392578125, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -2.061046838760376, "rewards/margins": 12.482583999633789, "rewards/rejected": -14.543630599975586, "step": 1202 }, { "epoch": 1.83, "learning_rate": 2.2306799015317292e-08, "logits/chosen": -1.1615723371505737, "logits/rejected": -0.991241455078125, "logps/chosen": -82.74966430664062, "logps/rejected": -252.6170196533203, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.834315299987793, "rewards/margins": 15.210193634033203, "rewards/rejected": -17.04450798034668, "step": 1203 }, { "epoch": 1.83, "learning_rate": 2.1916632630374577e-08, "logits/chosen": -1.174147129058838, "logits/rejected": -1.1671512126922607, "logps/chosen": -55.36254119873047, "logps/rejected": -171.36447143554688, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8767067193984985, "rewards/margins": 10.042999267578125, "rewards/rejected": -10.919706344604492, "step": 1204 }, { "epoch": 1.83, "learning_rate": 2.1529832133895588e-08, "logits/chosen": -1.0668604373931885, "logits/rejected": -0.967597246170044, "logps/chosen": -100.05206298828125, "logps/rejected": -270.90191650390625, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -2.6888511180877686, "rewards/margins": 14.828164100646973, "rewards/rejected": -17.51701545715332, "step": 1205 }, { "epoch": 1.83, "learning_rate": 2.1146400249102802e-08, "logits/chosen": -1.397310733795166, "logits/rejected": -1.3512035608291626, "logps/chosen": -67.1437759399414, "logps/rejected": -159.02467346191406, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": -1.1888231039047241, "rewards/margins": 9.070355415344238, "rewards/rejected": -10.25917911529541, "step": 1206 }, { "epoch": 1.83, "learning_rate": 2.0766339675502397e-08, "logits/chosen": -1.0767494440078735, "logits/rejected": -0.965934693813324, "logps/chosen": -101.39511108398438, "logps/rejected": -261.9713439941406, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -3.6425881385803223, "rewards/margins": 13.892492294311523, "rewards/rejected": -17.535079956054688, "step": 1207 }, { "epoch": 1.84, "learning_rate": 2.0389653088865033e-08, "logits/chosen": -1.4239978790283203, "logits/rejected": -1.3504027128219604, "logps/chosen": -71.2942123413086, "logps/rejected": -186.11898803710938, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.407034158706665, "rewards/margins": 10.300168991088867, "rewards/rejected": -11.707202911376953, "step": 1208 }, { "epoch": 1.84, "learning_rate": 2.001634314120726e-08, "logits/chosen": -1.4119137525558472, "logits/rejected": -1.307218074798584, "logps/chosen": -72.17942810058594, "logps/rejected": -205.65863037109375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.2082171440124512, "rewards/margins": 12.206661224365234, "rewards/rejected": -13.414878845214844, "step": 1209 }, { "epoch": 1.84, "learning_rate": 1.964641246077303e-08, "logits/chosen": -0.993287980556488, "logits/rejected": -1.031794786453247, "logps/chosen": -78.95590209960938, "logps/rejected": -179.0350341796875, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -2.017829179763794, "rewards/margins": 9.196547508239746, "rewards/rejected": -11.214376449584961, "step": 1210 }, { "epoch": 1.84, "learning_rate": 1.9279863652014838e-08, "logits/chosen": -1.060457468032837, "logits/rejected": -1.096437692642212, "logps/chosen": -122.54818725585938, "logps/rejected": -243.62075805664062, "loss": 0.0424, "rewards/accuracies": 1.0, "rewards/chosen": -3.8781747817993164, "rewards/margins": 12.498019218444824, "rewards/rejected": -16.37619400024414, "step": 1211 }, { "epoch": 1.84, "learning_rate": 1.8916699295575323e-08, "logits/chosen": -1.059748649597168, "logits/rejected": -1.059941053390503, "logps/chosen": -54.84103775024414, "logps/rejected": -156.09188842773438, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -1.2279458045959473, "rewards/margins": 8.983017921447754, "rewards/rejected": -10.21096420288086, "step": 1212 }, { "epoch": 1.84, "learning_rate": 1.8556921948269577e-08, "logits/chosen": -1.0731769800186157, "logits/rejected": -0.9469415545463562, "logps/chosen": -90.43501281738281, "logps/rejected": -218.74835205078125, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -2.4426980018615723, "rewards/margins": 11.901433944702148, "rewards/rejected": -14.344131469726562, "step": 1213 }, { "epoch": 1.84, "learning_rate": 1.820053414306677e-08, "logits/chosen": -0.8140149116516113, "logits/rejected": -0.7933439016342163, "logps/chosen": -87.42171478271484, "logps/rejected": -178.2332305908203, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -2.8189470767974854, "rewards/margins": 8.89945125579834, "rewards/rejected": -11.718399047851562, "step": 1214 }, { "epoch": 1.85, "learning_rate": 1.7847538389072435e-08, "logits/chosen": -1.2803846597671509, "logits/rejected": -1.211902379989624, "logps/chosen": -79.93923950195312, "logps/rejected": -209.45614624023438, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.6933627128601074, "rewards/margins": 11.596197128295898, "rewards/rejected": -13.289560317993164, "step": 1215 }, { "epoch": 1.85, "learning_rate": 1.7497937171510545e-08, "logits/chosen": -1.1593124866485596, "logits/rejected": -1.0734546184539795, "logps/chosen": -98.91121673583984, "logps/rejected": -225.10154724121094, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -2.9870173931121826, "rewards/margins": 11.160821914672852, "rewards/rejected": -14.147839546203613, "step": 1216 }, { "epoch": 1.85, "learning_rate": 1.715173295170669e-08, "logits/chosen": -1.214421272277832, "logits/rejected": -1.1262717247009277, "logps/chosen": -64.54765319824219, "logps/rejected": -186.80062866210938, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.9676066040992737, "rewards/margins": 11.020545959472656, "rewards/rejected": -11.988153457641602, "step": 1217 }, { "epoch": 1.85, "learning_rate": 1.6808928167069803e-08, "logits/chosen": -1.2606310844421387, "logits/rejected": -1.1958374977111816, "logps/chosen": -61.314327239990234, "logps/rejected": -165.88563537597656, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.5954817533493042, "rewards/margins": 10.101585388183594, "rewards/rejected": -10.697067260742188, "step": 1218 }, { "epoch": 1.85, "learning_rate": 1.6469525231075977e-08, "logits/chosen": -1.272934913635254, "logits/rejected": -1.4015471935272217, "logps/chosen": -94.45033264160156, "logps/rejected": -221.44044494628906, "loss": 0.0118, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8754397630691528, "rewards/margins": 12.196402549743652, "rewards/rejected": -14.071842193603516, "step": 1219 }, { "epoch": 1.85, "learning_rate": 1.6133526533250563e-08, "logits/chosen": -1.267065405845642, "logits/rejected": -1.24986732006073, "logps/chosen": -62.023494720458984, "logps/rejected": -174.93882751464844, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.1595714092254639, "rewards/margins": 10.430471420288086, "rewards/rejected": -11.590043067932129, "step": 1220 }, { "epoch": 1.85, "learning_rate": 1.58009344391522e-08, "logits/chosen": -1.1788753271102905, "logits/rejected": -1.096854567527771, "logps/chosen": -79.68408203125, "logps/rejected": -203.6868133544922, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.2954856157302856, "rewards/margins": 11.55772590637207, "rewards/rejected": -12.85321044921875, "step": 1221 }, { "epoch": 1.86, "learning_rate": 1.5471751290355385e-08, "logits/chosen": -1.1234623193740845, "logits/rejected": -1.024211049079895, "logps/chosen": -49.120849609375, "logps/rejected": -163.56591796875, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.4070178270339966, "rewards/margins": 10.147906303405762, "rewards/rejected": -10.554924011230469, "step": 1222 }, { "epoch": 1.86, "learning_rate": 1.5145979404434517e-08, "logits/chosen": -1.3019061088562012, "logits/rejected": -1.3134589195251465, "logps/chosen": -94.47854614257812, "logps/rejected": -204.00048828125, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.767600417137146, "rewards/margins": 11.218263626098633, "rewards/rejected": -12.985862731933594, "step": 1223 }, { "epoch": 1.86, "learning_rate": 1.4823621074947501e-08, "logits/chosen": -1.1317193508148193, "logits/rejected": -1.2033860683441162, "logps/chosen": -64.93047332763672, "logps/rejected": -133.66629028320312, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.1771470308303833, "rewards/margins": 7.702424049377441, "rewards/rejected": -8.879571914672852, "step": 1224 }, { "epoch": 1.86, "learning_rate": 1.4504678571419237e-08, "logits/chosen": -1.3028910160064697, "logits/rejected": -1.1780298948287964, "logps/chosen": -78.07183837890625, "logps/rejected": -232.07427978515625, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.245678424835205, "rewards/margins": 13.306718826293945, "rewards/rejected": -14.552396774291992, "step": 1225 }, { "epoch": 1.86, "learning_rate": 1.4189154139326143e-08, "logits/chosen": -1.081373691558838, "logits/rejected": -1.0935312509536743, "logps/chosen": -79.95915222167969, "logps/rejected": -178.8350067138672, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -1.7669938802719116, "rewards/margins": 9.89219856262207, "rewards/rejected": -11.659192085266113, "step": 1226 }, { "epoch": 1.86, "learning_rate": 1.3877050000080104e-08, "logits/chosen": -1.1050331592559814, "logits/rejected": -1.1011483669281006, "logps/chosen": -65.14444732666016, "logps/rejected": -165.34768676757812, "loss": 0.0421, "rewards/accuracies": 1.0, "rewards/chosen": -1.5117905139923096, "rewards/margins": 9.55897331237793, "rewards/rejected": -11.070762634277344, "step": 1227 }, { "epoch": 1.87, "learning_rate": 1.3568368351012716e-08, "logits/chosen": -1.2710386514663696, "logits/rejected": -1.2004417181015015, "logps/chosen": -72.86195373535156, "logps/rejected": -189.37420654296875, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -1.4070632457733154, "rewards/margins": 9.87858772277832, "rewards/rejected": -11.285650253295898, "step": 1228 }, { "epoch": 1.87, "learning_rate": 1.3263111365360014e-08, "logits/chosen": -1.1257786750793457, "logits/rejected": -0.9959360361099243, "logps/chosen": -74.35279846191406, "logps/rejected": -210.28366088867188, "loss": 0.0473, "rewards/accuracies": 0.9375, "rewards/chosen": -1.981217384338379, "rewards/margins": 11.774518966674805, "rewards/rejected": -13.7557373046875, "step": 1229 }, { "epoch": 1.87, "learning_rate": 1.2961281192247097e-08, "logits/chosen": -1.075447678565979, "logits/rejected": -1.0008114576339722, "logps/chosen": -87.16339111328125, "logps/rejected": -248.26565551757812, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -1.7712650299072266, "rewards/margins": 14.019671440124512, "rewards/rejected": -15.790936470031738, "step": 1230 }, { "epoch": 1.87, "learning_rate": 1.2662879956673089e-08, "logits/chosen": -1.1833305358886719, "logits/rejected": -1.167616605758667, "logps/chosen": -70.53716278076172, "logps/rejected": -180.77481079101562, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.8839368224143982, "rewards/margins": 10.646862030029297, "rewards/rejected": -11.530799865722656, "step": 1231 }, { "epoch": 1.87, "learning_rate": 1.236790975949592e-08, "logits/chosen": -1.132072925567627, "logits/rejected": -1.056206226348877, "logps/chosen": -59.7764778137207, "logps/rejected": -148.61737060546875, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.8665260672569275, "rewards/margins": 7.744040489196777, "rewards/rejected": -8.610567092895508, "step": 1232 }, { "epoch": 1.87, "learning_rate": 1.2076372677417734e-08, "logits/chosen": -1.2615550756454468, "logits/rejected": -1.288482904434204, "logps/chosen": -63.36968994140625, "logps/rejected": -149.8604278564453, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.965242862701416, "rewards/margins": 8.132116317749023, "rewards/rejected": -10.097360610961914, "step": 1233 }, { "epoch": 1.87, "learning_rate": 1.1788270762970565e-08, "logits/chosen": -1.0883687734603882, "logits/rejected": -1.0756895542144775, "logps/chosen": -84.82898712158203, "logps/rejected": -194.68531799316406, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -2.6581790447235107, "rewards/margins": 10.328180313110352, "rewards/rejected": -12.986358642578125, "step": 1234 }, { "epoch": 1.88, "learning_rate": 1.1503606044500957e-08, "logits/chosen": -1.0876470804214478, "logits/rejected": -1.0359283685684204, "logps/chosen": -93.50402069091797, "logps/rejected": -222.7373809814453, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -2.1938717365264893, "rewards/margins": 12.126307487487793, "rewards/rejected": -14.320178031921387, "step": 1235 }, { "epoch": 1.88, "learning_rate": 1.1222380526156927e-08, "logits/chosen": -1.1725844144821167, "logits/rejected": -1.0043030977249146, "logps/chosen": -95.12078094482422, "logps/rejected": -236.8339385986328, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -3.277890205383301, "rewards/margins": 11.719527244567871, "rewards/rejected": -14.997417449951172, "step": 1236 }, { "epoch": 1.88, "learning_rate": 1.0944596187872745e-08, "logits/chosen": -1.194044589996338, "logits/rejected": -1.20572030544281, "logps/chosen": -68.72064208984375, "logps/rejected": -159.9511260986328, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -1.520249843597412, "rewards/margins": 8.89706802368164, "rewards/rejected": -10.417318344116211, "step": 1237 }, { "epoch": 1.88, "learning_rate": 1.0670254985355731e-08, "logits/chosen": -1.1252317428588867, "logits/rejected": -0.9940766096115112, "logps/chosen": -79.25250244140625, "logps/rejected": -223.510498046875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.9784678220748901, "rewards/margins": 12.678990364074707, "rewards/rejected": -14.65745735168457, "step": 1238 }, { "epoch": 1.88, "learning_rate": 1.0399358850072038e-08, "logits/chosen": -1.0572007894515991, "logits/rejected": -0.9553701281547546, "logps/chosen": -84.38623809814453, "logps/rejected": -227.75350952148438, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -2.6427552700042725, "rewards/margins": 12.430475234985352, "rewards/rejected": -15.073230743408203, "step": 1239 }, { "epoch": 1.88, "learning_rate": 1.0131909689233442e-08, "logits/chosen": -1.1975586414337158, "logits/rejected": -1.1328387260437012, "logps/chosen": -76.48176574707031, "logps/rejected": -194.64166259765625, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.5488111972808838, "rewards/margins": 10.741307258605957, "rewards/rejected": -12.290119171142578, "step": 1240 }, { "epoch": 1.89, "learning_rate": 9.86790938578358e-09, "logits/chosen": -1.2658780813217163, "logits/rejected": -1.2129406929016113, "logps/chosen": -70.70858001708984, "logps/rejected": -150.29287719726562, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -2.334958553314209, "rewards/margins": 7.42769718170166, "rewards/rejected": -9.762656211853027, "step": 1241 }, { "epoch": 1.89, "learning_rate": 9.607359798384784e-09, "logits/chosen": -1.0291016101837158, "logits/rejected": -0.9019633531570435, "logps/chosen": -71.52269744873047, "logps/rejected": -220.78086853027344, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.1930785179138184, "rewards/margins": 12.374126434326172, "rewards/rejected": -13.567205429077148, "step": 1242 }, { "epoch": 1.89, "learning_rate": 9.35026276140516e-09, "logits/chosen": -1.429419755935669, "logits/rejected": -1.4611481428146362, "logps/chosen": -75.60625457763672, "logps/rejected": -172.8060760498047, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.9017477035522461, "rewards/margins": 9.8096284866333, "rewards/rejected": -10.711376190185547, "step": 1243 }, { "epoch": 1.89, "learning_rate": 9.096620084905471e-09, "logits/chosen": -1.2283917665481567, "logits/rejected": -1.1737825870513916, "logps/chosen": -65.57634735107422, "logps/rejected": -171.305419921875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.3293113708496094, "rewards/margins": 9.985025405883789, "rewards/rejected": -11.314336776733398, "step": 1244 }, { "epoch": 1.89, "learning_rate": 8.846433554626443e-09, "logits/chosen": -1.1852015256881714, "logits/rejected": -1.154024600982666, "logps/chosen": -94.73068237304688, "logps/rejected": -223.88473510742188, "loss": 0.0217, "rewards/accuracies": 0.9375, "rewards/chosen": -3.0769922733306885, "rewards/margins": 12.202123641967773, "rewards/rejected": -15.279114723205566, "step": 1245 }, { "epoch": 1.89, "learning_rate": 8.59970493197626e-09, "logits/chosen": -1.3885767459869385, "logits/rejected": -1.3077125549316406, "logps/chosen": -95.191162109375, "logps/rejected": -229.9364013671875, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.3996024131774902, "rewards/margins": 12.123512268066406, "rewards/rejected": -13.523114204406738, "step": 1246 }, { "epoch": 1.89, "learning_rate": 8.356435954018193e-09, "logits/chosen": -1.3719979524612427, "logits/rejected": -1.3080120086669922, "logps/chosen": -69.03075408935547, "logps/rejected": -218.9082489013672, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.759949803352356, "rewards/margins": 13.311388969421387, "rewards/rejected": -14.071337699890137, "step": 1247 }, { "epoch": 1.9, "learning_rate": 8.11662833345822e-09, "logits/chosen": -0.9629684090614319, "logits/rejected": -0.8659667372703552, "logps/chosen": -73.18289184570312, "logps/rejected": -200.80935668945312, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": -1.4222288131713867, "rewards/margins": 10.841693878173828, "rewards/rejected": -12.263922691345215, "step": 1248 }, { "epoch": 1.9, "learning_rate": 7.880283758633033e-09, "logits/chosen": -1.3673051595687866, "logits/rejected": -1.3786245584487915, "logps/chosen": -103.65737915039062, "logps/rejected": -298.3373718261719, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.8675320148468018, "rewards/margins": 17.153118133544922, "rewards/rejected": -19.020648956298828, "step": 1249 }, { "epoch": 1.9, "learning_rate": 7.647403893498106e-09, "logits/chosen": -1.2449382543563843, "logits/rejected": -1.1799533367156982, "logps/chosen": -80.3458023071289, "logps/rejected": -179.2467803955078, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -3.0060768127441406, "rewards/margins": 9.305712699890137, "rewards/rejected": -12.311788558959961, "step": 1250 }, { "epoch": 1.9, "learning_rate": 7.417990377616312e-09, "logits/chosen": -1.165990948677063, "logits/rejected": -1.119705080986023, "logps/chosen": -95.58226013183594, "logps/rejected": -220.37850952148438, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -2.774306535720825, "rewards/margins": 11.19355583190918, "rewards/rejected": -13.967863082885742, "step": 1251 }, { "epoch": 1.9, "learning_rate": 7.1920448261457715e-09, "logits/chosen": -1.0052763223648071, "logits/rejected": -0.9632813334465027, "logps/chosen": -73.35610961914062, "logps/rejected": -190.4112091064453, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -1.2970337867736816, "rewards/margins": 10.754767417907715, "rewards/rejected": -12.051801681518555, "step": 1252 }, { "epoch": 1.9, "learning_rate": 6.9695688298290754e-09, "logits/chosen": -1.1739317178726196, "logits/rejected": -1.069629430770874, "logps/chosen": -55.94324493408203, "logps/rejected": -171.08547973632812, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -0.967921793460846, "rewards/margins": 10.362225532531738, "rewards/rejected": -11.330146789550781, "step": 1253 }, { "epoch": 1.91, "learning_rate": 6.750563954981636e-09, "logits/chosen": -1.1875897645950317, "logits/rejected": -1.1473288536071777, "logps/chosen": -61.80609130859375, "logps/rejected": -234.46588134765625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.11906401813030243, "rewards/margins": 15.78310489654541, "rewards/rejected": -15.902167320251465, "step": 1254 }, { "epoch": 1.91, "learning_rate": 6.535031743480968e-09, "logits/chosen": -0.9607847332954407, "logits/rejected": -0.8286802172660828, "logps/chosen": -81.98706817626953, "logps/rejected": -215.73744201660156, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -1.6667954921722412, "rewards/margins": 11.247720718383789, "rewards/rejected": -12.91451644897461, "step": 1255 }, { "epoch": 1.91, "learning_rate": 6.322973712755697e-09, "logits/chosen": -1.1424051523208618, "logits/rejected": -1.085515022277832, "logps/chosen": -55.21360778808594, "logps/rejected": -161.61468505859375, "loss": 0.0143, "rewards/accuracies": 0.9375, "rewards/chosen": -0.26040124893188477, "rewards/margins": 10.282100677490234, "rewards/rejected": -10.542502403259277, "step": 1256 }, { "epoch": 1.91, "learning_rate": 6.1143913557749615e-09, "logits/chosen": -1.2211065292358398, "logits/rejected": -1.0838934183120728, "logps/chosen": -73.61480712890625, "logps/rejected": -193.85784912109375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.3676726818084717, "rewards/margins": 10.925106048583984, "rewards/rejected": -12.292778968811035, "step": 1257 }, { "epoch": 1.91, "learning_rate": 5.9092861410376945e-09, "logits/chosen": -1.2819068431854248, "logits/rejected": -1.2782381772994995, "logps/chosen": -71.67346954345703, "logps/rejected": -198.6710662841797, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.2299200296401978, "rewards/margins": 11.510444641113281, "rewards/rejected": -12.740364074707031, "step": 1258 }, { "epoch": 1.91, "learning_rate": 5.7076595125624684e-09, "logits/chosen": -1.0552005767822266, "logits/rejected": -1.138450026512146, "logps/chosen": -73.14871215820312, "logps/rejected": -144.5664825439453, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -2.1182124614715576, "rewards/margins": 7.121225357055664, "rewards/rejected": -9.239437103271484, "step": 1259 }, { "epoch": 1.91, "learning_rate": 5.509512889877332e-09, "logits/chosen": -1.1288034915924072, "logits/rejected": -1.0820059776306152, "logps/chosen": -87.96611022949219, "logps/rejected": -205.0126953125, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.589266300201416, "rewards/margins": 10.964679718017578, "rewards/rejected": -12.553946495056152, "step": 1260 }, { "epoch": 1.92, "learning_rate": 5.314847668009881e-09, "logits/chosen": -1.153718113899231, "logits/rejected": -1.0785784721374512, "logps/chosen": -76.9947280883789, "logps/rejected": -193.70933532714844, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -2.0610992908477783, "rewards/margins": 10.301348686218262, "rewards/rejected": -12.362447738647461, "step": 1261 }, { "epoch": 1.92, "learning_rate": 5.123665217477036e-09, "logits/chosen": -1.086852788925171, "logits/rejected": -0.9465436935424805, "logps/chosen": -58.998016357421875, "logps/rejected": -206.93099975585938, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.247649073600769, "rewards/margins": 12.062419891357422, "rewards/rejected": -13.310070037841797, "step": 1262 }, { "epoch": 1.92, "learning_rate": 4.935966884275944e-09, "logits/chosen": -0.9989705681800842, "logits/rejected": -1.0006415843963623, "logps/chosen": -40.644771575927734, "logps/rejected": -139.12960815429688, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -0.12693235278129578, "rewards/margins": 9.171871185302734, "rewards/rejected": -9.29880428314209, "step": 1263 }, { "epoch": 1.92, "learning_rate": 4.751753989874152e-09, "logits/chosen": -1.135905146598816, "logits/rejected": -1.105804681777954, "logps/chosen": -83.68693542480469, "logps/rejected": -197.93209838867188, "loss": 0.042, "rewards/accuracies": 1.0, "rewards/chosen": -1.8465352058410645, "rewards/margins": 10.878917694091797, "rewards/rejected": -12.725454330444336, "step": 1264 }, { "epoch": 1.92, "learning_rate": 4.571027831200336e-09, "logits/chosen": -1.1909990310668945, "logits/rejected": -1.2340352535247803, "logps/chosen": -69.18940734863281, "logps/rejected": -165.2108917236328, "loss": 0.0303, "rewards/accuracies": 1.0, "rewards/chosen": -1.2262462377548218, "rewards/margins": 9.280619621276855, "rewards/rejected": -10.506864547729492, "step": 1265 }, { "epoch": 1.92, "learning_rate": 4.393789680635307e-09, "logits/chosen": -1.2873908281326294, "logits/rejected": -1.2333879470825195, "logps/chosen": -108.88398742675781, "logps/rejected": -268.0072326660156, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -3.34629225730896, "rewards/margins": 13.214006423950195, "rewards/rejected": -16.560298919677734, "step": 1266 }, { "epoch": 1.92, "learning_rate": 4.2200407860029655e-09, "logits/chosen": -1.0964261293411255, "logits/rejected": -0.9515805244445801, "logps/chosen": -83.07080078125, "logps/rejected": -202.83042907714844, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -2.1373143196105957, "rewards/margins": 10.945977210998535, "rewards/rejected": -13.083292007446289, "step": 1267 }, { "epoch": 1.93, "learning_rate": 4.049782370561583e-09, "logits/chosen": -1.051609992980957, "logits/rejected": -0.8953971266746521, "logps/chosen": -67.84369659423828, "logps/rejected": -204.5872039794922, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -1.2447519302368164, "rewards/margins": 11.80958366394043, "rewards/rejected": -13.054337501525879, "step": 1268 }, { "epoch": 1.93, "learning_rate": 3.8830156329949235e-09, "logits/chosen": -0.9912279844284058, "logits/rejected": -0.9690757393836975, "logps/chosen": -71.34811401367188, "logps/rejected": -150.02572631835938, "loss": 0.0321, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4374864101409912, "rewards/margins": 8.00107192993164, "rewards/rejected": -9.438558578491211, "step": 1269 }, { "epoch": 1.93, "learning_rate": 3.719741747404248e-09, "logits/chosen": -1.1927863359451294, "logits/rejected": -1.2303924560546875, "logps/chosen": -59.63166809082031, "logps/rejected": -160.16390991210938, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -1.2459349632263184, "rewards/margins": 9.462393760681152, "rewards/rejected": -10.708330154418945, "step": 1270 }, { "epoch": 1.93, "learning_rate": 3.5599618632997097e-09, "logits/chosen": -1.103395938873291, "logits/rejected": -1.0654138326644897, "logps/chosen": -76.19361114501953, "logps/rejected": -192.7483367919922, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.0147182941436768, "rewards/margins": 11.459994316101074, "rewards/rejected": -12.474712371826172, "step": 1271 }, { "epoch": 1.93, "learning_rate": 3.403677105592306e-09, "logits/chosen": -1.1428433656692505, "logits/rejected": -1.1347157955169678, "logps/chosen": -62.77545166015625, "logps/rejected": -167.58773803710938, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.135177731513977, "rewards/margins": 10.042524337768555, "rewards/rejected": -11.177702903747559, "step": 1272 }, { "epoch": 1.93, "learning_rate": 3.2508885745861637e-09, "logits/chosen": -1.2463092803955078, "logits/rejected": -1.1767163276672363, "logps/chosen": -87.07160949707031, "logps/rejected": -209.8955078125, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -2.458374500274658, "rewards/margins": 10.535346984863281, "rewards/rejected": -12.993721961975098, "step": 1273 }, { "epoch": 1.94, "learning_rate": 3.1015973459704857e-09, "logits/chosen": -1.2347352504730225, "logits/rejected": -1.1266793012619019, "logps/chosen": -109.47550964355469, "logps/rejected": -266.93524169921875, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -3.782963752746582, "rewards/margins": 13.678717613220215, "rewards/rejected": -17.461681365966797, "step": 1274 }, { "epoch": 1.94, "learning_rate": 2.9558044708123397e-09, "logits/chosen": -0.8758256435394287, "logits/rejected": -0.75980544090271, "logps/chosen": -43.183082580566406, "logps/rejected": -126.92056274414062, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.26970601081848145, "rewards/margins": 7.577888488769531, "rewards/rejected": -7.847594738006592, "step": 1275 }, { "epoch": 1.94, "learning_rate": 2.813510975548772e-09, "logits/chosen": -1.201333999633789, "logits/rejected": -1.1146811246871948, "logps/chosen": -92.7200698852539, "logps/rejected": -198.17112731933594, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -2.748230457305908, "rewards/margins": 9.353466033935547, "rewards/rejected": -12.101696968078613, "step": 1276 }, { "epoch": 1.94, "learning_rate": 2.6747178619800913e-09, "logits/chosen": -1.3739142417907715, "logits/rejected": -1.3986132144927979, "logps/chosen": -59.81341552734375, "logps/rejected": -179.90093994140625, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -1.376999855041504, "rewards/margins": 10.636183738708496, "rewards/rejected": -12.01318359375, "step": 1277 }, { "epoch": 1.94, "learning_rate": 2.539426107262599e-09, "logits/chosen": -1.2071001529693604, "logits/rejected": -1.1412181854248047, "logps/chosen": -110.31078338623047, "logps/rejected": -285.4696044921875, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -2.4827301502227783, "rewards/margins": 16.43672752380371, "rewards/rejected": -18.919456481933594, "step": 1278 }, { "epoch": 1.94, "learning_rate": 2.407636663901591e-09, "logits/chosen": -1.0972704887390137, "logits/rejected": -0.9204725623130798, "logps/chosen": -72.80489349365234, "logps/rejected": -211.97796630859375, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -1.093588948249817, "rewards/margins": 12.024868965148926, "rewards/rejected": -13.11845874786377, "step": 1279 }, { "epoch": 1.94, "learning_rate": 2.2793504597446998e-09, "logits/chosen": -1.2425183057785034, "logits/rejected": -1.156579613685608, "logps/chosen": -91.27490234375, "logps/rejected": -224.80088806152344, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -2.0254929065704346, "rewards/margins": 12.998912811279297, "rewards/rejected": -15.024404525756836, "step": 1280 }, { "epoch": 1.95, "learning_rate": 2.15456839797562e-09, "logits/chosen": -1.0614230632781982, "logits/rejected": -0.9386066794395447, "logps/chosen": -96.80696105957031, "logps/rejected": -256.17938232421875, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -2.9327540397644043, "rewards/margins": 13.764790534973145, "rewards/rejected": -16.69754409790039, "step": 1281 }, { "epoch": 1.95, "learning_rate": 2.0332913571074474e-09, "logits/chosen": -1.1535450220108032, "logits/rejected": -1.2190803289413452, "logps/chosen": -85.00349426269531, "logps/rejected": -179.6030731201172, "loss": 0.0466, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5289652347564697, "rewards/margins": 9.103918075561523, "rewards/rejected": -11.63288402557373, "step": 1282 }, { "epoch": 1.95, "learning_rate": 1.9155201909765717e-09, "logits/chosen": -0.8580641150474548, "logits/rejected": -0.7709234952926636, "logps/chosen": -69.90385437011719, "logps/rejected": -167.78280639648438, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -1.7532331943511963, "rewards/margins": 9.278640747070312, "rewards/rejected": -11.031872749328613, "step": 1283 }, { "epoch": 1.95, "learning_rate": 1.8012557287367391e-09, "logits/chosen": -1.0224920511245728, "logits/rejected": -1.008731484413147, "logps/chosen": -79.84169006347656, "logps/rejected": -211.30001831054688, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.5303840637207031, "rewards/margins": 12.468156814575195, "rewards/rejected": -13.998542785644531, "step": 1284 }, { "epoch": 1.95, "learning_rate": 1.6904987748532217e-09, "logits/chosen": -1.0827257633209229, "logits/rejected": -0.9155064225196838, "logps/chosen": -97.63224029541016, "logps/rejected": -269.2901306152344, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -2.41310453414917, "rewards/margins": 15.162589073181152, "rewards/rejected": -17.575693130493164, "step": 1285 }, { "epoch": 1.95, "learning_rate": 1.5832501090968786e-09, "logits/chosen": -0.9661201238632202, "logits/rejected": -0.7906744480133057, "logps/chosen": -66.0601806640625, "logps/rejected": -214.29466247558594, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.5611439943313599, "rewards/margins": 12.344537734985352, "rewards/rejected": -13.905682563781738, "step": 1286 }, { "epoch": 1.96, "learning_rate": 1.479510486539215e-09, "logits/chosen": -0.9654014110565186, "logits/rejected": -0.713676929473877, "logps/chosen": -72.038818359375, "logps/rejected": -260.5837097167969, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.1215585470199585, "rewards/margins": 14.717130661010742, "rewards/rejected": -15.838688850402832, "step": 1287 }, { "epoch": 1.96, "learning_rate": 1.3792806375464427e-09, "logits/chosen": -1.288902759552002, "logits/rejected": -1.244452953338623, "logps/chosen": -76.58100891113281, "logps/rejected": -184.2479248046875, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -1.9578903913497925, "rewards/margins": 9.693171501159668, "rewards/rejected": -11.65106201171875, "step": 1288 }, { "epoch": 1.96, "learning_rate": 1.2825612677748732e-09, "logits/chosen": -1.3379647731781006, "logits/rejected": -1.2519395351409912, "logps/chosen": -67.4783706665039, "logps/rejected": -164.44052124023438, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.609692394733429, "rewards/margins": 9.941685676574707, "rewards/rejected": -10.55137825012207, "step": 1289 }, { "epoch": 1.96, "learning_rate": 1.1893530581654766e-09, "logits/chosen": -1.2620232105255127, "logits/rejected": -1.195061445236206, "logps/chosen": -78.19368743896484, "logps/rejected": -195.7747344970703, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -1.8783726692199707, "rewards/margins": 9.804338455200195, "rewards/rejected": -11.682710647583008, "step": 1290 }, { "epoch": 1.96, "learning_rate": 1.0996566649395522e-09, "logits/chosen": -1.1842918395996094, "logits/rejected": -1.0768086910247803, "logps/chosen": -81.30087280273438, "logps/rejected": -228.88381958007812, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -1.9906009435653687, "rewards/margins": 12.947613716125488, "rewards/rejected": -14.938216209411621, "step": 1291 }, { "epoch": 1.96, "learning_rate": 1.013472719593733e-09, "logits/chosen": -1.0303940773010254, "logits/rejected": -0.9588882923126221, "logps/chosen": -61.77070617675781, "logps/rejected": -165.2044677734375, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1046134233474731, "rewards/margins": 9.36551570892334, "rewards/rejected": -10.470128059387207, "step": 1292 }, { "epoch": 1.96, "learning_rate": 9.308018288957109e-10, "logits/chosen": -1.2706711292266846, "logits/rejected": -1.1618214845657349, "logps/chosen": -98.66361999511719, "logps/rejected": -245.0448760986328, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -2.179671287536621, "rewards/margins": 12.922852516174316, "rewards/rejected": -15.102523803710938, "step": 1293 }, { "epoch": 1.97, "learning_rate": 8.516445748800172e-10, "logits/chosen": -1.0659732818603516, "logits/rejected": -0.9968633651733398, "logps/chosen": -99.51051330566406, "logps/rejected": -270.474853515625, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -2.3661701679229736, "rewards/margins": 14.43785285949707, "rewards/rejected": -16.80402374267578, "step": 1294 }, { "epoch": 1.97, "learning_rate": 7.760015148436938e-10, "logits/chosen": -1.1200745105743408, "logits/rejected": -1.0644164085388184, "logps/chosen": -74.07913208007812, "logps/rejected": -169.44464111328125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -2.399211883544922, "rewards/margins": 8.491737365722656, "rewards/rejected": -10.890949249267578, "step": 1295 }, { "epoch": 1.97, "learning_rate": 7.038731813426291e-10, "logits/chosen": -0.9871138334274292, "logits/rejected": -0.9330585598945618, "logps/chosen": -66.69938659667969, "logps/rejected": -158.9303436279297, "loss": 0.0484, "rewards/accuracies": 1.0, "rewards/chosen": -1.036125659942627, "rewards/margins": 8.828483581542969, "rewards/rejected": -9.864608764648438, "step": 1296 }, { "epoch": 1.97, "learning_rate": 6.352600821876719e-10, "logits/chosen": -1.2522269487380981, "logits/rejected": -1.1094790697097778, "logps/chosen": -101.69255065917969, "logps/rejected": -275.8647766113281, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -2.7666094303131104, "rewards/margins": 15.001011848449707, "rewards/rejected": -17.767621994018555, "step": 1297 }, { "epoch": 1.97, "learning_rate": 5.701627004411347e-10, "logits/chosen": -0.8893463611602783, "logits/rejected": -0.7074088454246521, "logps/chosen": -83.16688537597656, "logps/rejected": -230.15130615234375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -2.377328872680664, "rewards/margins": 12.509635925292969, "rewards/rejected": -14.886964797973633, "step": 1298 }, { "epoch": 1.97, "learning_rate": 5.08581494413296e-10, "logits/chosen": -1.224255084991455, "logits/rejected": -1.1592600345611572, "logps/chosen": -103.52490234375, "logps/rejected": -236.6667022705078, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.5443618297576904, "rewards/margins": 12.799535751342773, "rewards/rejected": -15.34389591217041, "step": 1299 }, { "epoch": 1.97, "learning_rate": 4.5051689765929213e-10, "logits/chosen": -1.092574119567871, "logits/rejected": -0.9000959396362305, "logps/chosen": -79.19598388671875, "logps/rejected": -217.1548309326172, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -1.9727332592010498, "rewards/margins": 12.164961814880371, "rewards/rejected": -14.1376953125, "step": 1300 }, { "epoch": 1.98, "learning_rate": 3.959693189757862e-10, "logits/chosen": -1.3492411375045776, "logits/rejected": -1.288551926612854, "logps/chosen": -71.91275024414062, "logps/rejected": -187.95205688476562, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.8926293849945068, "rewards/margins": 11.140167236328125, "rewards/rejected": -12.032794952392578, "step": 1301 }, { "epoch": 1.98, "learning_rate": 3.4493914239858146e-10, "logits/chosen": -1.2096384763717651, "logits/rejected": -1.1264015436172485, "logps/chosen": -84.34400939941406, "logps/rejected": -215.83203125, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -2.3935375213623047, "rewards/margins": 11.227790832519531, "rewards/rejected": -13.621328353881836, "step": 1302 }, { "epoch": 1.98, "learning_rate": 2.974267271994013e-10, "logits/chosen": -1.2268598079681396, "logits/rejected": -1.1861000061035156, "logps/chosen": -81.8856430053711, "logps/rejected": -210.980712890625, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.220350980758667, "rewards/margins": 12.413765907287598, "rewards/rejected": -13.63411808013916, "step": 1303 }, { "epoch": 1.98, "learning_rate": 2.5343240788378016e-10, "logits/chosen": -1.2381658554077148, "logits/rejected": -1.0283713340759277, "logps/chosen": -114.27998352050781, "logps/rejected": -293.5376892089844, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -3.651200771331787, "rewards/margins": 15.330041885375977, "rewards/rejected": -18.98124122619629, "step": 1304 }, { "epoch": 1.98, "learning_rate": 2.1295649418839877e-10, "logits/chosen": -1.2112208604812622, "logits/rejected": -1.0418407917022705, "logps/chosen": -97.28136444091797, "logps/rejected": -230.7408447265625, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -2.5646955966949463, "rewards/margins": 12.14634895324707, "rewards/rejected": -14.711043357849121, "step": 1305 }, { "epoch": 1.98, "learning_rate": 1.7599927107908585e-10, "logits/chosen": -1.0789711475372314, "logits/rejected": -0.9465877413749695, "logps/chosen": -71.02899932861328, "logps/rejected": -193.57167053222656, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.4242923259735107, "rewards/margins": 10.871113777160645, "rewards/rejected": -12.295406341552734, "step": 1306 }, { "epoch": 1.99, "learning_rate": 1.4256099874865312e-10, "logits/chosen": -1.2479493618011475, "logits/rejected": -1.1449650526046753, "logps/chosen": -92.4945297241211, "logps/rejected": -238.48211669921875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.360962390899658, "rewards/margins": 12.930143356323242, "rewards/rejected": -15.291106224060059, "step": 1307 }, { "epoch": 1.99, "learning_rate": 1.1264191261528555e-10, "logits/chosen": -0.8584355115890503, "logits/rejected": -0.8667644262313843, "logps/chosen": -54.118743896484375, "logps/rejected": -140.81704711914062, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.6013064384460449, "rewards/margins": 8.267008781433105, "rewards/rejected": -8.868314743041992, "step": 1308 }, { "epoch": 1.99, "learning_rate": 8.62422233207094e-11, "logits/chosen": -1.1232349872589111, "logits/rejected": -0.9467052221298218, "logps/chosen": -80.04724884033203, "logps/rejected": -226.53021240234375, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": -1.100845217704773, "rewards/margins": 12.923338890075684, "rewards/rejected": -14.024184226989746, "step": 1309 }, { "epoch": 1.99, "learning_rate": 6.336211672880453e-11, "logits/chosen": -1.2444984912872314, "logits/rejected": -1.2211215496063232, "logps/chosen": -63.22529983520508, "logps/rejected": -168.42591857910156, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.8757107257843018, "rewards/margins": 9.665054321289062, "rewards/rejected": -10.540765762329102, "step": 1310 }, { "epoch": 1.99, "learning_rate": 4.4001753924105547e-11, "logits/chosen": -1.2597283124923706, "logits/rejected": -1.2255922555923462, "logps/chosen": -90.53179931640625, "logps/rejected": -219.01829528808594, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -3.364285707473755, "rewards/margins": 10.397770881652832, "rewards/rejected": -13.762057304382324, "step": 1311 }, { "epoch": 1.99, "learning_rate": 2.8161271211024628e-11, "logits/chosen": -1.2949028015136719, "logits/rejected": -1.2165987491607666, "logps/chosen": -69.34394836425781, "logps/rejected": -204.77670288085938, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -1.8771134614944458, "rewards/margins": 11.10797119140625, "rewards/rejected": -12.985086441040039, "step": 1312 }, { "epoch": 1.99, "learning_rate": 1.5840780112519325e-11, "logits/chosen": -1.2168735265731812, "logits/rejected": -1.1569671630859375, "logps/chosen": -73.16233825683594, "logps/rejected": -197.2447052001953, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.596511960029602, "rewards/margins": 11.405481338500977, "rewards/rejected": -13.001994132995605, "step": 1313 }, { "epoch": 2.0, "learning_rate": 7.0403673695373925e-12, "logits/chosen": -1.0676594972610474, "logits/rejected": -1.0000176429748535, "logps/chosen": -55.793033599853516, "logps/rejected": -137.31324768066406, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -1.1835592985153198, "rewards/margins": 7.81547212600708, "rewards/rejected": -8.999032020568848, "step": 1314 }, { "epoch": 2.0, "learning_rate": 1.7600949402951647e-12, "logits/chosen": -1.2632269859313965, "logits/rejected": -1.2024056911468506, "logps/chosen": -77.23323059082031, "logps/rejected": -208.28042602539062, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -1.5779087543487549, "rewards/margins": 12.250576972961426, "rewards/rejected": -13.828487396240234, "step": 1315 }, { "epoch": 2.0, "learning_rate": 0.0, "logits/chosen": -0.9433375000953674, "logits/rejected": -0.918114960193634, "logps/chosen": -55.77727508544922, "logps/rejected": -143.7599334716797, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -0.816506028175354, "rewards/margins": 8.368966102600098, "rewards/rejected": -9.185473442077637, "step": 1316 }, { "epoch": 2.0, "step": 1316, "total_flos": 0.0, "train_loss": 0.154733782048524, "train_runtime": 54790.8843, "train_samples_per_second": 3.076, "train_steps_per_second": 0.024 } ], "logging_steps": 1.0, "max_steps": 1316, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }