{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998527896363903, "eval_steps": 100, "global_step": 5094, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.803921568627451e-09, "logits/chosen": -2.973508358001709, "logits/rejected": -3.0340657234191895, "logps/chosen": -228.45870971679688, "logps/rejected": -221.87188720703125, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 9.803921568627452e-08, "logits/chosen": -2.862595558166504, "logits/rejected": -2.9409985542297363, "logps/chosen": -276.31146240234375, "logps/rejected": -225.0379180908203, "loss": 0.6933, "rewards/accuracies": 0.5555555820465088, "rewards/chosen": 0.00011433070176281035, "rewards/margins": 0.0006582156638614833, "rewards/rejected": -0.0005438849912025034, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.9607843137254904e-07, "logits/chosen": -2.9764370918273926, "logits/rejected": -2.8498854637145996, "logps/chosen": -256.53411865234375, "logps/rejected": -240.7257080078125, "loss": 0.6926, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0033233885187655687, "rewards/margins": 0.001273788744583726, "rewards/rejected": 0.0020496000070124865, "step": 20 }, { "epoch": 0.01, "learning_rate": 2.9411764705882356e-07, "logits/chosen": -3.0605838298797607, "logits/rejected": -3.031383991241455, "logps/chosen": -228.6864013671875, "logps/rejected": -264.53155517578125, "loss": 0.6921, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.004886351525783539, "rewards/margins": 0.0012393262004479766, "rewards/rejected": 0.0036470252089202404, "step": 30 }, { "epoch": 0.01, "learning_rate": 3.921568627450981e-07, "logits/chosen": -2.8909895420074463, "logits/rejected": -2.947922468185425, "logps/chosen": -255.6454315185547, "logps/rejected": -217.5404815673828, "loss": 0.6919, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.014554759487509727, "rewards/margins": 0.0036192506086081266, "rewards/rejected": 0.010935508646070957, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.901960784313725e-07, "logits/chosen": -2.9534642696380615, "logits/rejected": -2.8623435497283936, "logps/chosen": -310.3286437988281, "logps/rejected": -262.513427734375, "loss": 0.6909, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.023258253931999207, "rewards/margins": 0.004231014288961887, "rewards/rejected": 0.019027236849069595, "step": 50 }, { "epoch": 0.01, "learning_rate": 5.882352941176471e-07, "logits/chosen": -2.904386043548584, "logits/rejected": -2.8669381141662598, "logps/chosen": -252.37451171875, "logps/rejected": -206.24642944335938, "loss": 0.6893, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.02650384232401848, "rewards/margins": 0.010579499416053295, "rewards/rejected": 0.015924345701932907, "step": 60 }, { "epoch": 0.01, "learning_rate": 6.862745098039217e-07, "logits/chosen": -2.994832754135132, "logits/rejected": -2.9528610706329346, "logps/chosen": -273.45733642578125, "logps/rejected": -279.2757873535156, "loss": 0.6917, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.028729554265737534, "rewards/margins": 0.003322303295135498, "rewards/rejected": 0.025407250970602036, "step": 70 }, { "epoch": 0.02, "learning_rate": 7.843137254901962e-07, "logits/chosen": -2.755485773086548, "logits/rejected": -2.800650119781494, "logps/chosen": -242.27700805664062, "logps/rejected": -236.52713012695312, "loss": 0.6917, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.027540069073438644, "rewards/margins": 0.0055449483916163445, "rewards/rejected": 0.021995123475790024, "step": 80 }, { "epoch": 0.02, "learning_rate": 8.823529411764707e-07, "logits/chosen": -2.7677536010742188, "logits/rejected": -2.863986015319824, "logps/chosen": -220.959716796875, "logps/rejected": -232.0678253173828, "loss": 0.6881, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.030912240967154503, "rewards/margins": 0.008048823103308678, "rewards/rejected": 0.022863419726490974, "step": 90 }, { "epoch": 0.02, "learning_rate": 9.80392156862745e-07, "logits/chosen": -2.8327622413635254, "logits/rejected": -2.894792318344116, "logps/chosen": -234.09976196289062, "logps/rejected": -229.82955932617188, "loss": 0.6884, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.031112805008888245, "rewards/margins": 0.009142523631453514, "rewards/rejected": 0.02197028137743473, "step": 100 }, { "epoch": 0.02, "eval_logits/chosen": -2.336580991744995, "eval_logits/rejected": -2.3475990295410156, "eval_logps/chosen": -259.33624267578125, "eval_logps/rejected": -230.2778778076172, "eval_loss": 0.6868417859077454, "eval_rewards/accuracies": 0.6145833134651184, "eval_rewards/chosen": 0.039044398814439774, "eval_rewards/margins": 0.010639672167599201, "eval_rewards/rejected": 0.028404729440808296, "eval_runtime": 477.4451, "eval_samples_per_second": 4.189, "eval_steps_per_second": 0.176, "step": 100 }, { "epoch": 0.02, "learning_rate": 1.0784313725490197e-06, "logits/chosen": -2.8984100818634033, "logits/rejected": -2.901028633117676, "logps/chosen": -224.906494140625, "logps/rejected": -201.27830505371094, "loss": 0.6873, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.03233223035931587, "rewards/margins": 0.009733730927109718, "rewards/rejected": 0.022598499432206154, "step": 110 }, { "epoch": 0.02, "learning_rate": 1.1764705882352942e-06, "logits/chosen": -2.9261550903320312, "logits/rejected": -2.8849735260009766, "logps/chosen": -229.10400390625, "logps/rejected": -204.9363555908203, "loss": 0.6821, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.03767221421003342, "rewards/margins": 0.0093983830884099, "rewards/rejected": 0.02827383577823639, "step": 120 }, { "epoch": 0.03, "learning_rate": 1.2745098039215686e-06, "logits/chosen": -2.832059383392334, "logits/rejected": -2.76809024810791, "logps/chosen": -241.0215301513672, "logps/rejected": -262.1598205566406, "loss": 0.6838, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.04861343652009964, "rewards/margins": 0.0215291790664196, "rewards/rejected": 0.02708425745368004, "step": 130 }, { "epoch": 0.03, "learning_rate": 1.3725490196078434e-06, "logits/chosen": -2.9472458362579346, "logits/rejected": -2.9169039726257324, "logps/chosen": -270.29833984375, "logps/rejected": -233.29098510742188, "loss": 0.6818, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.054968465119600296, "rewards/margins": 0.03214170038700104, "rewards/rejected": 0.022826772183179855, "step": 140 }, { "epoch": 0.03, "learning_rate": 1.4705882352941177e-06, "logits/chosen": -2.8875279426574707, "logits/rejected": -2.8734564781188965, "logps/chosen": -274.0886535644531, "logps/rejected": -231.80947875976562, "loss": 0.6811, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.05963331460952759, "rewards/margins": 0.05601944774389267, "rewards/rejected": 0.003613865002989769, "step": 150 }, { "epoch": 0.03, "learning_rate": 1.5686274509803923e-06, "logits/chosen": -2.930586338043213, "logits/rejected": -2.765791654586792, "logps/chosen": -236.7968292236328, "logps/rejected": -213.68270874023438, "loss": 0.674, "rewards/accuracies": 0.625, "rewards/chosen": 0.041575588285923004, "rewards/margins": 0.03660685569047928, "rewards/rejected": 0.004968726541846991, "step": 160 }, { "epoch": 0.03, "learning_rate": 1.6666666666666667e-06, "logits/chosen": -2.907088041305542, "logits/rejected": -2.8094048500061035, "logps/chosen": -249.4150390625, "logps/rejected": -232.6487274169922, "loss": 0.6793, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.041034214198589325, "rewards/margins": 0.0442991778254509, "rewards/rejected": -0.003264959901571274, "step": 170 }, { "epoch": 0.04, "learning_rate": 1.7647058823529414e-06, "logits/chosen": -2.873412847518921, "logits/rejected": -2.9365124702453613, "logps/chosen": -230.6173858642578, "logps/rejected": -237.4728546142578, "loss": 0.6789, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.018897254019975662, "rewards/margins": 0.024724815040826797, "rewards/rejected": -0.00582756195217371, "step": 180 }, { "epoch": 0.04, "learning_rate": 1.8627450980392158e-06, "logits/chosen": -2.8295395374298096, "logits/rejected": -2.749323844909668, "logps/chosen": -257.933837890625, "logps/rejected": -229.88223266601562, "loss": 0.6653, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.03761621564626694, "rewards/margins": 0.06116952374577522, "rewards/rejected": -0.023553304374217987, "step": 190 }, { "epoch": 0.04, "learning_rate": 1.96078431372549e-06, "logits/chosen": -2.7251996994018555, "logits/rejected": -2.806915760040283, "logps/chosen": -205.859619140625, "logps/rejected": -223.43838500976562, "loss": 0.6654, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.014988506212830544, "rewards/margins": 0.04508345201611519, "rewards/rejected": -0.030094945803284645, "step": 200 }, { "epoch": 0.04, "eval_logits/chosen": -2.258488178253174, "eval_logits/rejected": -2.263549327850342, "eval_logps/chosen": -259.9051818847656, "eval_logps/rejected": -235.06222534179688, "eval_loss": 0.6657090783119202, "eval_rewards/accuracies": 0.6398809552192688, "eval_rewards/chosen": 0.03335539624094963, "eval_rewards/margins": 0.052794355899095535, "eval_rewards/rejected": -0.019438959658145905, "eval_runtime": 472.5343, "eval_samples_per_second": 4.232, "eval_steps_per_second": 0.178, "step": 200 }, { "epoch": 0.04, "learning_rate": 2.058823529411765e-06, "logits/chosen": -2.9805192947387695, "logits/rejected": -2.8744161128997803, "logps/chosen": -288.2845764160156, "logps/rejected": -254.07949829101562, "loss": 0.6676, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.005574061069637537, "rewards/margins": 0.061119239777326584, "rewards/rejected": -0.05554518848657608, "step": 210 }, { "epoch": 0.04, "learning_rate": 2.1568627450980393e-06, "logits/chosen": -2.911360025405884, "logits/rejected": -2.8783349990844727, "logps/chosen": -235.96401977539062, "logps/rejected": -217.7150421142578, "loss": 0.6562, "rewards/accuracies": 0.625, "rewards/chosen": -0.04486488178372383, "rewards/margins": 0.06540326774120331, "rewards/rejected": -0.11026813834905624, "step": 220 }, { "epoch": 0.05, "learning_rate": 2.254901960784314e-06, "logits/chosen": -2.9542288780212402, "logits/rejected": -2.878969192504883, "logps/chosen": -260.88934326171875, "logps/rejected": -296.89776611328125, "loss": 0.6446, "rewards/accuracies": 0.625, "rewards/chosen": -0.05443768948316574, "rewards/margins": 0.04301251843571663, "rewards/rejected": -0.09745021164417267, "step": 230 }, { "epoch": 0.05, "learning_rate": 2.3529411764705885e-06, "logits/chosen": -2.817126512527466, "logits/rejected": -2.8219687938690186, "logps/chosen": -273.76239013671875, "logps/rejected": -246.7056121826172, "loss": 0.6503, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.019011136144399643, "rewards/margins": 0.09237994253635406, "rewards/rejected": -0.111391082406044, "step": 240 }, { "epoch": 0.05, "learning_rate": 2.450980392156863e-06, "logits/chosen": -2.8546347618103027, "logits/rejected": -2.9686641693115234, "logps/chosen": -275.5454406738281, "logps/rejected": -434.2581481933594, "loss": 0.6438, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03764430433511734, "rewards/margins": 0.1496858149766922, "rewards/rejected": -0.18733009696006775, "step": 250 }, { "epoch": 0.05, "learning_rate": 2.549019607843137e-06, "logits/chosen": -2.9442734718322754, "logits/rejected": -2.8972764015197754, "logps/chosen": -357.8099060058594, "logps/rejected": -298.7733459472656, "loss": 0.6497, "rewards/accuracies": 0.75, "rewards/chosen": -0.22853727638721466, "rewards/margins": 0.1591685563325882, "rewards/rejected": -0.38770583271980286, "step": 260 }, { "epoch": 0.05, "learning_rate": 2.647058823529412e-06, "logits/chosen": -2.94228196144104, "logits/rejected": -2.8907318115234375, "logps/chosen": -284.27130126953125, "logps/rejected": -285.43927001953125, "loss": 0.6602, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2485658824443817, "rewards/margins": 0.1097467914223671, "rewards/rejected": -0.3583126664161682, "step": 270 }, { "epoch": 0.05, "learning_rate": 2.7450980392156867e-06, "logits/chosen": -2.926578998565674, "logits/rejected": -2.8990461826324463, "logps/chosen": -319.3984680175781, "logps/rejected": -296.30694580078125, "loss": 0.69, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.27285149693489075, "rewards/margins": 0.02198920026421547, "rewards/rejected": -0.2948406934738159, "step": 280 }, { "epoch": 0.06, "learning_rate": 2.843137254901961e-06, "logits/chosen": -2.916912078857422, "logits/rejected": -2.9760031700134277, "logps/chosen": -317.46405029296875, "logps/rejected": -306.2522277832031, "loss": 0.6347, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.08358258008956909, "rewards/margins": 0.14197704195976257, "rewards/rejected": -0.22555962204933167, "step": 290 }, { "epoch": 0.06, "learning_rate": 2.9411764705882355e-06, "logits/chosen": -2.7295899391174316, "logits/rejected": -2.7394189834594727, "logps/chosen": -269.99737548828125, "logps/rejected": -244.5452880859375, "loss": 0.6346, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11713893711566925, "rewards/margins": 0.10082261264324188, "rewards/rejected": -0.21796154975891113, "step": 300 }, { "epoch": 0.06, "eval_logits/chosen": -2.2216763496398926, "eval_logits/rejected": -2.2106869220733643, "eval_logps/chosen": -288.878662109375, "eval_logps/rejected": -270.03985595703125, "eval_loss": 0.6431363821029663, "eval_rewards/accuracies": 0.6532738208770752, "eval_rewards/chosen": -0.2563799023628235, "eval_rewards/margins": 0.11283508688211441, "eval_rewards/rejected": -0.3692150413990021, "eval_runtime": 474.5307, "eval_samples_per_second": 4.215, "eval_steps_per_second": 0.177, "step": 300 }, { "epoch": 0.06, "learning_rate": 3.03921568627451e-06, "logits/chosen": -2.82896089553833, "logits/rejected": -2.8039159774780273, "logps/chosen": -269.8193359375, "logps/rejected": -215.48959350585938, "loss": 0.6101, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24714116752147675, "rewards/margins": 0.13896790146827698, "rewards/rejected": -0.38610905408859253, "step": 310 }, { "epoch": 0.06, "learning_rate": 3.1372549019607846e-06, "logits/chosen": -2.8394603729248047, "logits/rejected": -2.9360859394073486, "logps/chosen": -293.5857849121094, "logps/rejected": -340.95806884765625, "loss": 0.6425, "rewards/accuracies": 0.625, "rewards/chosen": -0.12740099430084229, "rewards/margins": 0.14778587222099304, "rewards/rejected": -0.2751868665218353, "step": 320 }, { "epoch": 0.06, "learning_rate": 3.2352941176470594e-06, "logits/chosen": -2.8742594718933105, "logits/rejected": -2.81569504737854, "logps/chosen": -277.4770202636719, "logps/rejected": -286.01336669921875, "loss": 0.6182, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.13143302500247955, "rewards/margins": 0.09543346613645554, "rewards/rejected": -0.2268664538860321, "step": 330 }, { "epoch": 0.07, "learning_rate": 3.3333333333333333e-06, "logits/chosen": -2.8065128326416016, "logits/rejected": -2.8967134952545166, "logps/chosen": -331.92132568359375, "logps/rejected": -347.8213806152344, "loss": 0.5963, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2281709909439087, "rewards/margins": 0.25101691484451294, "rewards/rejected": -0.47918787598609924, "step": 340 }, { "epoch": 0.07, "learning_rate": 3.431372549019608e-06, "logits/chosen": -2.7618370056152344, "logits/rejected": -2.784069538116455, "logps/chosen": -300.3914794921875, "logps/rejected": -342.68963623046875, "loss": 0.6123, "rewards/accuracies": 0.625, "rewards/chosen": -0.3338148295879364, "rewards/margins": 0.19719263911247253, "rewards/rejected": -0.5310074687004089, "step": 350 }, { "epoch": 0.07, "learning_rate": 3.529411764705883e-06, "logits/chosen": -2.8726532459259033, "logits/rejected": -2.837023973464966, "logps/chosen": -314.0732421875, "logps/rejected": -322.673583984375, "loss": 0.6246, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22630402445793152, "rewards/margins": 0.29907548427581787, "rewards/rejected": -0.5253795385360718, "step": 360 }, { "epoch": 0.07, "learning_rate": 3.6274509803921573e-06, "logits/chosen": -2.6191344261169434, "logits/rejected": -2.686654567718506, "logps/chosen": -249.3698272705078, "logps/rejected": -269.16033935546875, "loss": 0.6096, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1989968717098236, "rewards/margins": 0.3198801875114441, "rewards/rejected": -0.5188770890235901, "step": 370 }, { "epoch": 0.07, "learning_rate": 3.7254901960784316e-06, "logits/chosen": -2.65582013130188, "logits/rejected": -2.6156890392303467, "logps/chosen": -316.6374206542969, "logps/rejected": -299.0272216796875, "loss": 0.5863, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3338475227355957, "rewards/margins": 0.2835896611213684, "rewards/rejected": -0.6174371838569641, "step": 380 }, { "epoch": 0.08, "learning_rate": 3.8235294117647055e-06, "logits/chosen": -2.82440447807312, "logits/rejected": -2.8799052238464355, "logps/chosen": -353.44732666015625, "logps/rejected": -368.5728454589844, "loss": 0.5932, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.632414698600769, "rewards/margins": 0.2788391709327698, "rewards/rejected": -0.9112539291381836, "step": 390 }, { "epoch": 0.08, "learning_rate": 3.92156862745098e-06, "logits/chosen": -2.7008981704711914, "logits/rejected": -2.69873309135437, "logps/chosen": -269.3892517089844, "logps/rejected": -307.84246826171875, "loss": 0.5888, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.38206809759140015, "rewards/margins": 0.2594672739505768, "rewards/rejected": -0.6415354013442993, "step": 400 }, { "epoch": 0.08, "eval_logits/chosen": -1.9904673099517822, "eval_logits/rejected": -1.9579482078552246, "eval_logps/chosen": -305.1883850097656, "eval_logps/rejected": -296.24200439453125, "eval_loss": 0.6161777973175049, "eval_rewards/accuracies": 0.6517857313156128, "eval_rewards/chosen": -0.4194769263267517, "eval_rewards/margins": 0.21175935864448547, "eval_rewards/rejected": -0.6312363147735596, "eval_runtime": 472.4636, "eval_samples_per_second": 4.233, "eval_steps_per_second": 0.178, "step": 400 }, { "epoch": 0.08, "learning_rate": 4.019607843137255e-06, "logits/chosen": -2.676201581954956, "logits/rejected": -2.6336846351623535, "logps/chosen": -273.431396484375, "logps/rejected": -287.57550048828125, "loss": 0.6385, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3741230368614197, "rewards/margins": 0.1745728999376297, "rewards/rejected": -0.5486959218978882, "step": 410 }, { "epoch": 0.08, "learning_rate": 4.11764705882353e-06, "logits/chosen": -2.614751100540161, "logits/rejected": -2.624110221862793, "logps/chosen": -290.9742431640625, "logps/rejected": -326.0084533691406, "loss": 0.5882, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3570622503757477, "rewards/margins": 0.3694344162940979, "rewards/rejected": -0.7264967560768127, "step": 420 }, { "epoch": 0.08, "learning_rate": 4.215686274509805e-06, "logits/chosen": -2.639557361602783, "logits/rejected": -2.620819568634033, "logps/chosen": -268.70245361328125, "logps/rejected": -312.71990966796875, "loss": 0.6095, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3011035919189453, "rewards/margins": 0.4379960894584656, "rewards/rejected": -0.7390996813774109, "step": 430 }, { "epoch": 0.09, "learning_rate": 4.313725490196079e-06, "logits/chosen": -2.6626319885253906, "logits/rejected": -2.6842870712280273, "logps/chosen": -302.835205078125, "logps/rejected": -280.4335021972656, "loss": 0.6034, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.45034360885620117, "rewards/margins": 0.2182246446609497, "rewards/rejected": -0.6685682535171509, "step": 440 }, { "epoch": 0.09, "learning_rate": 4.411764705882353e-06, "logits/chosen": -2.625410556793213, "logits/rejected": -2.690067768096924, "logps/chosen": -291.3028869628906, "logps/rejected": -334.9422912597656, "loss": 0.5729, "rewards/accuracies": 0.75, "rewards/chosen": -0.5663882493972778, "rewards/margins": 0.3346121907234192, "rewards/rejected": -0.9010004997253418, "step": 450 }, { "epoch": 0.09, "learning_rate": 4.509803921568628e-06, "logits/chosen": -2.628746509552002, "logits/rejected": -2.707404851913452, "logps/chosen": -299.4609375, "logps/rejected": -319.9123229980469, "loss": 0.5846, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6919635534286499, "rewards/margins": 0.19277557730674744, "rewards/rejected": -0.884739100933075, "step": 460 }, { "epoch": 0.09, "learning_rate": 4.607843137254902e-06, "logits/chosen": -2.645448684692383, "logits/rejected": -2.391211748123169, "logps/chosen": -413.65765380859375, "logps/rejected": -357.4613952636719, "loss": 0.6402, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6778701543807983, "rewards/margins": 0.4130594730377197, "rewards/rejected": -1.090929627418518, "step": 470 }, { "epoch": 0.09, "learning_rate": 4.705882352941177e-06, "logits/chosen": -2.6332736015319824, "logits/rejected": -2.4814000129699707, "logps/chosen": -250.36038208007812, "logps/rejected": -264.98663330078125, "loss": 0.5656, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4399610161781311, "rewards/margins": 0.33872875571250916, "rewards/rejected": -0.7786898016929626, "step": 480 }, { "epoch": 0.1, "learning_rate": 4.803921568627452e-06, "logits/chosen": -2.584453821182251, "logits/rejected": -2.591069221496582, "logps/chosen": -395.53118896484375, "logps/rejected": -409.98126220703125, "loss": 0.5925, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5409294366836548, "rewards/margins": 0.5661200881004333, "rewards/rejected": -1.107049584388733, "step": 490 }, { "epoch": 0.1, "learning_rate": 4.901960784313726e-06, "logits/chosen": -2.58937931060791, "logits/rejected": -2.5619587898254395, "logps/chosen": -391.55841064453125, "logps/rejected": -380.4855041503906, "loss": 0.5806, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9605814218521118, "rewards/margins": 0.352530300617218, "rewards/rejected": -1.3131117820739746, "step": 500 }, { "epoch": 0.1, "eval_logits/chosen": -0.5252847075462341, "eval_logits/rejected": -0.4989687204360962, "eval_logps/chosen": -394.94683837890625, "eval_logps/rejected": -398.19195556640625, "eval_loss": 0.5915763974189758, "eval_rewards/accuracies": 0.663690447807312, "eval_rewards/chosen": -1.3170610666275024, "eval_rewards/margins": 0.33367499709129333, "eval_rewards/rejected": -1.6507360935211182, "eval_runtime": 466.1502, "eval_samples_per_second": 4.29, "eval_steps_per_second": 0.18, "step": 500 }, { "epoch": 0.1, "learning_rate": 5e-06, "logits/chosen": -2.3698482513427734, "logits/rejected": -2.3730268478393555, "logps/chosen": -325.0603942871094, "logps/rejected": -333.634033203125, "loss": 0.6341, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.3440351486206055, "rewards/margins": 0.16630719602108002, "rewards/rejected": -1.5103422403335571, "step": 510 }, { "epoch": 0.1, "learning_rate": 4.999941289086112e-06, "logits/chosen": -2.434908628463745, "logits/rejected": -2.4711365699768066, "logps/chosen": -319.7598571777344, "logps/rejected": -411.1217346191406, "loss": 0.6063, "rewards/accuracies": 0.625, "rewards/chosen": -0.8812028169631958, "rewards/margins": 0.4554789662361145, "rewards/rejected": -1.336681842803955, "step": 520 }, { "epoch": 0.1, "learning_rate": 4.999765159102025e-06, "logits/chosen": -2.4896693229675293, "logits/rejected": -2.3514962196350098, "logps/chosen": -338.57122802734375, "logps/rejected": -392.3268737792969, "loss": 0.6066, "rewards/accuracies": 0.75, "rewards/chosen": -0.4506074786186218, "rewards/margins": 0.2730453908443451, "rewards/rejected": -0.7236528992652893, "step": 530 }, { "epoch": 0.11, "learning_rate": 4.999471618320339e-06, "logits/chosen": -2.5596091747283936, "logits/rejected": -2.569035291671753, "logps/chosen": -285.6130065917969, "logps/rejected": -337.4367370605469, "loss": 0.5838, "rewards/accuracies": 0.75, "rewards/chosen": -0.35268205404281616, "rewards/margins": 0.43236231803894043, "rewards/rejected": -0.7850444912910461, "step": 540 }, { "epoch": 0.11, "learning_rate": 4.999060680528294e-06, "logits/chosen": -2.639185667037964, "logits/rejected": -2.4026732444763184, "logps/chosen": -354.9365234375, "logps/rejected": -350.83575439453125, "loss": 0.5779, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7679035067558289, "rewards/margins": 0.30885177850723267, "rewards/rejected": -1.0767552852630615, "step": 550 }, { "epoch": 0.11, "learning_rate": 4.998532365027117e-06, "logits/chosen": -2.2880897521972656, "logits/rejected": -2.203477621078491, "logps/chosen": -402.0857849121094, "logps/rejected": -435.74090576171875, "loss": 0.5588, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.303902268409729, "rewards/margins": 0.5334252715110779, "rewards/rejected": -1.8373275995254517, "step": 560 }, { "epoch": 0.11, "learning_rate": 4.997886696631115e-06, "logits/chosen": -2.271165132522583, "logits/rejected": -2.3455963134765625, "logps/chosen": -351.71917724609375, "logps/rejected": -414.587646484375, "loss": 0.5306, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.247244119644165, "rewards/margins": 0.45136338472366333, "rewards/rejected": -1.6986074447631836, "step": 570 }, { "epoch": 0.11, "learning_rate": 4.997123705666514e-06, "logits/chosen": -2.1570160388946533, "logits/rejected": -2.1810998916625977, "logps/chosen": -408.73883056640625, "logps/rejected": -413.47174072265625, "loss": 0.6342, "rewards/accuracies": 0.625, "rewards/chosen": -1.6730220317840576, "rewards/margins": 0.255155473947525, "rewards/rejected": -1.928177833557129, "step": 580 }, { "epoch": 0.12, "learning_rate": 4.996243427970032e-06, "logits/chosen": -2.1544055938720703, "logits/rejected": -1.9909731149673462, "logps/chosen": -351.2218933105469, "logps/rejected": -395.60662841796875, "loss": 0.6315, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.1272488832473755, "rewards/margins": 0.46109557151794434, "rewards/rejected": -1.5883444547653198, "step": 590 }, { "epoch": 0.12, "learning_rate": 4.995245904887195e-06, "logits/chosen": -2.532106876373291, "logits/rejected": -2.246403932571411, "logps/chosen": -447.5849609375, "logps/rejected": -391.7081298828125, "loss": 0.6219, "rewards/accuracies": 0.625, "rewards/chosen": -1.1101863384246826, "rewards/margins": 0.3275432586669922, "rewards/rejected": -1.4377295970916748, "step": 600 }, { "epoch": 0.12, "eval_logits/chosen": -0.03613191470503807, "eval_logits/rejected": 0.03838645666837692, "eval_logps/chosen": -376.6808166503906, "eval_logps/rejected": -383.747802734375, "eval_loss": 0.5753170847892761, "eval_rewards/accuracies": 0.6502976417541504, "eval_rewards/chosen": -1.1344010829925537, "eval_rewards/margins": 0.37189337611198425, "eval_rewards/rejected": -1.5062944889068604, "eval_runtime": 466.8014, "eval_samples_per_second": 4.284, "eval_steps_per_second": 0.18, "step": 600 }, { "epoch": 0.12, "learning_rate": 4.994131183270396e-06, "logits/chosen": -2.3710973262786865, "logits/rejected": -2.3201870918273926, "logps/chosen": -421.33709716796875, "logps/rejected": -424.4642639160156, "loss": 0.5701, "rewards/accuracies": 0.75, "rewards/chosen": -1.1958863735198975, "rewards/margins": 0.5684593915939331, "rewards/rejected": -1.7643455266952515, "step": 610 }, { "epoch": 0.12, "learning_rate": 4.992899315476696e-06, "logits/chosen": -2.2475314140319824, "logits/rejected": -2.1929378509521484, "logps/chosen": -438.3310546875, "logps/rejected": -469.04241943359375, "loss": 0.5701, "rewards/accuracies": 0.75, "rewards/chosen": -1.3635677099227905, "rewards/margins": 0.46668586134910583, "rewards/rejected": -1.8302538394927979, "step": 620 }, { "epoch": 0.12, "learning_rate": 4.99155035936536e-06, "logits/chosen": -2.449263095855713, "logits/rejected": -2.265713930130005, "logps/chosen": -329.15960693359375, "logps/rejected": -357.1077880859375, "loss": 0.5563, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0894893407821655, "rewards/margins": 0.3795085847377777, "rewards/rejected": -1.4689979553222656, "step": 630 }, { "epoch": 0.13, "learning_rate": 4.990084378295148e-06, "logits/chosen": -2.448363780975342, "logits/rejected": -2.4051525592803955, "logps/chosen": -341.86956787109375, "logps/rejected": -338.0180969238281, "loss": 0.5833, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.8054157495498657, "rewards/margins": 0.5508281588554382, "rewards/rejected": -1.3562438488006592, "step": 640 }, { "epoch": 0.13, "learning_rate": 4.988501441121328e-06, "logits/chosen": -2.3474011421203613, "logits/rejected": -2.023271322250366, "logps/chosen": -382.7298889160156, "logps/rejected": -369.0820007324219, "loss": 0.6044, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0760247707366943, "rewards/margins": 0.42178216576576233, "rewards/rejected": -1.4978069067001343, "step": 650 }, { "epoch": 0.13, "learning_rate": 4.986801622192453e-06, "logits/chosen": -2.241762638092041, "logits/rejected": -1.996416449546814, "logps/chosen": -416.619384765625, "logps/rejected": -449.72052001953125, "loss": 0.5281, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.3046057224273682, "rewards/margins": 0.7088319659233093, "rewards/rejected": -2.0134377479553223, "step": 660 }, { "epoch": 0.13, "learning_rate": 4.984985001346859e-06, "logits/chosen": -2.369713544845581, "logits/rejected": -2.3672068119049072, "logps/chosen": -391.0481262207031, "logps/rejected": -429.03759765625, "loss": 0.5965, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.2604517936706543, "rewards/margins": 0.23743471503257751, "rewards/rejected": -1.4978865385055542, "step": 670 }, { "epoch": 0.13, "learning_rate": 4.9830516639089226e-06, "logits/chosen": -2.217136859893799, "logits/rejected": -2.181342124938965, "logps/chosen": -373.64794921875, "logps/rejected": -409.3558654785156, "loss": 0.5203, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.8603825569152832, "rewards/margins": 0.7127417325973511, "rewards/rejected": -1.5731241703033447, "step": 680 }, { "epoch": 0.14, "learning_rate": 4.981001700685051e-06, "logits/chosen": -2.3387327194213867, "logits/rejected": -2.1862881183624268, "logps/chosen": -413.24298095703125, "logps/rejected": -404.2867126464844, "loss": 0.5476, "rewards/accuracies": 0.75, "rewards/chosen": -0.955763041973114, "rewards/margins": 0.55617755651474, "rewards/rejected": -1.5119404792785645, "step": 690 }, { "epoch": 0.14, "learning_rate": 4.978835207959414e-06, "logits/chosen": -2.3319435119628906, "logits/rejected": -2.234999418258667, "logps/chosen": -312.42431640625, "logps/rejected": -341.87005615234375, "loss": 0.5586, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9548459053039551, "rewards/margins": 0.35879063606262207, "rewards/rejected": -1.3136365413665771, "step": 700 }, { "epoch": 0.14, "eval_logits/chosen": 0.24727466702461243, "eval_logits/rejected": 0.30730071663856506, "eval_logps/chosen": -342.16094970703125, "eval_logps/rejected": -351.8956604003906, "eval_loss": 0.5732717514038086, "eval_rewards/accuracies": 0.6666666865348816, "eval_rewards/chosen": -0.7892022132873535, "eval_rewards/margins": 0.3985709249973297, "eval_rewards/rejected": -1.1877731084823608, "eval_runtime": 465.6349, "eval_samples_per_second": 4.295, "eval_steps_per_second": 0.18, "step": 700 }, { "epoch": 0.14, "learning_rate": 4.976552287489427e-06, "logits/chosen": -2.3588404655456543, "logits/rejected": -2.3238019943237305, "logps/chosen": -366.0771179199219, "logps/rejected": -426.2799377441406, "loss": 0.4964, "rewards/accuracies": 0.875, "rewards/chosen": -0.7471452951431274, "rewards/margins": 0.7910875678062439, "rewards/rejected": -1.5382329225540161, "step": 710 }, { "epoch": 0.14, "learning_rate": 4.9741530465009665e-06, "logits/chosen": -2.0894782543182373, "logits/rejected": -2.11095929145813, "logps/chosen": -360.30963134765625, "logps/rejected": -429.9984436035156, "loss": 0.5533, "rewards/accuracies": 0.75, "rewards/chosen": -1.0334492921829224, "rewards/margins": 0.8144800066947937, "rewards/rejected": -1.8479293584823608, "step": 720 }, { "epoch": 0.14, "learning_rate": 4.9716375976833395e-06, "logits/chosen": -2.13502836227417, "logits/rejected": -2.0187134742736816, "logps/chosen": -387.08819580078125, "logps/rejected": -384.268798828125, "loss": 0.5997, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.14211905002594, "rewards/margins": 0.3813985288143158, "rewards/rejected": -1.5235174894332886, "step": 730 }, { "epoch": 0.15, "learning_rate": 4.969006059183984e-06, "logits/chosen": -2.2879302501678467, "logits/rejected": -2.1529295444488525, "logps/chosen": -307.6477966308594, "logps/rejected": -326.3321838378906, "loss": 0.5228, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8938900232315063, "rewards/margins": 0.3973291218280792, "rewards/rejected": -1.2912189960479736, "step": 740 }, { "epoch": 0.15, "learning_rate": 4.966258554602924e-06, "logits/chosen": -2.2723472118377686, "logits/rejected": -2.1424341201782227, "logps/chosen": -370.5997619628906, "logps/rejected": -356.42242431640625, "loss": 0.6127, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.07357919216156, "rewards/margins": 0.4219037890434265, "rewards/rejected": -1.4954830408096313, "step": 750 }, { "epoch": 0.15, "learning_rate": 4.963395212986964e-06, "logits/chosen": -2.4523584842681885, "logits/rejected": -2.3470985889434814, "logps/chosen": -400.797607421875, "logps/rejected": -381.6387939453125, "loss": 0.5702, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7941485643386841, "rewards/margins": 0.6219018697738647, "rewards/rejected": -1.4160504341125488, "step": 760 }, { "epoch": 0.15, "learning_rate": 4.960416168823626e-06, "logits/chosen": -2.389075756072998, "logits/rejected": -2.132652759552002, "logps/chosen": -314.0247802734375, "logps/rejected": -338.5572509765625, "loss": 0.5706, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6960195302963257, "rewards/margins": 0.6828244924545288, "rewards/rejected": -1.3788437843322754, "step": 770 }, { "epoch": 0.15, "learning_rate": 4.957321562034833e-06, "logits/chosen": -2.3235738277435303, "logits/rejected": -2.2064554691314697, "logps/chosen": -361.6338195800781, "logps/rejected": -368.1969299316406, "loss": 0.536, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8635967373847961, "rewards/margins": 0.5974106788635254, "rewards/rejected": -1.4610074758529663, "step": 780 }, { "epoch": 0.16, "learning_rate": 4.954111537970342e-06, "logits/chosen": -2.2395503520965576, "logits/rejected": -2.0655577182769775, "logps/chosen": -382.55023193359375, "logps/rejected": -423.2119140625, "loss": 0.6418, "rewards/accuracies": 0.625, "rewards/chosen": -1.3395254611968994, "rewards/margins": 0.3794713616371155, "rewards/rejected": -1.7189967632293701, "step": 790 }, { "epoch": 0.16, "learning_rate": 4.950786247400908e-06, "logits/chosen": -2.2357563972473145, "logits/rejected": -2.2734086513519287, "logps/chosen": -346.0960693359375, "logps/rejected": -380.20074462890625, "loss": 0.6123, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.31546151638031, "rewards/margins": 0.2703830301761627, "rewards/rejected": -1.58584463596344, "step": 800 }, { "epoch": 0.16, "eval_logits/chosen": 1.0326988697052002, "eval_logits/rejected": 1.0808866024017334, "eval_logps/chosen": -390.5542297363281, "eval_logps/rejected": -403.5396728515625, "eval_loss": 0.5578105449676514, "eval_rewards/accuracies": 0.6651785969734192, "eval_rewards/chosen": -1.2731355428695679, "eval_rewards/margins": 0.43107810616493225, "eval_rewards/rejected": -1.7042136192321777, "eval_runtime": 471.0801, "eval_samples_per_second": 4.246, "eval_steps_per_second": 0.178, "step": 800 }, { "epoch": 0.16, "learning_rate": 4.94734584651121e-06, "logits/chosen": -2.2077815532684326, "logits/rejected": -1.9737892150878906, "logps/chosen": -449.19549560546875, "logps/rejected": -509.70611572265625, "loss": 0.4952, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.3227405548095703, "rewards/margins": 0.795570969581604, "rewards/rejected": -2.1183114051818848, "step": 810 }, { "epoch": 0.16, "learning_rate": 4.943790496892513e-06, "logits/chosen": -2.080096960067749, "logits/rejected": -1.8696342706680298, "logps/chosen": -482.72174072265625, "logps/rejected": -451.3267517089844, "loss": 0.5183, "rewards/accuracies": 0.75, "rewards/chosen": -1.5850732326507568, "rewards/margins": 0.7216095328330994, "rewards/rejected": -2.306682586669922, "step": 820 }, { "epoch": 0.16, "learning_rate": 4.940120365535076e-06, "logits/chosen": -2.4171366691589355, "logits/rejected": -2.279550075531006, "logps/chosen": -424.75775146484375, "logps/rejected": -457.83026123046875, "loss": 0.5598, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3365710973739624, "rewards/margins": 0.7002229690551758, "rewards/rejected": -2.0367941856384277, "step": 830 }, { "epoch": 0.16, "learning_rate": 4.936335624820313e-06, "logits/chosen": -2.015204429626465, "logits/rejected": -2.105721950531006, "logps/chosen": -332.3876953125, "logps/rejected": -497.28656005859375, "loss": 0.4966, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.1477575302124023, "rewards/margins": 1.0808641910552979, "rewards/rejected": -2.2286219596862793, "step": 840 }, { "epoch": 0.17, "learning_rate": 4.932436452512693e-06, "logits/chosen": -2.215301275253296, "logits/rejected": -2.0272057056427, "logps/chosen": -424.7681579589844, "logps/rejected": -471.20709228515625, "loss": 0.5339, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.3136613368988037, "rewards/margins": 0.7162275314331055, "rewards/rejected": -2.029888868331909, "step": 850 }, { "epoch": 0.17, "learning_rate": 4.9284230317513906e-06, "logits/chosen": -2.259880304336548, "logits/rejected": -2.076293706893921, "logps/chosen": -361.982177734375, "logps/rejected": -447.18798828125, "loss": 0.5459, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.242569088935852, "rewards/margins": 0.8317493200302124, "rewards/rejected": -2.0743184089660645, "step": 860 }, { "epoch": 0.17, "learning_rate": 4.924295551041688e-06, "logits/chosen": -2.0979232788085938, "logits/rejected": -2.2426350116729736, "logps/chosen": -359.0535888671875, "logps/rejected": -432.423095703125, "loss": 0.5804, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.3003675937652588, "rewards/margins": 0.2097325325012207, "rewards/rejected": -1.51010000705719, "step": 870 }, { "epoch": 0.17, "learning_rate": 4.920054204246116e-06, "logits/chosen": -1.8829069137573242, "logits/rejected": -1.774928331375122, "logps/chosen": -295.80743408203125, "logps/rejected": -424.815673828125, "loss": 0.5379, "rewards/accuracies": 0.75, "rewards/chosen": -1.0513074398040771, "rewards/margins": 0.9006274938583374, "rewards/rejected": -1.9519351720809937, "step": 880 }, { "epoch": 0.17, "learning_rate": 4.915699190575349e-06, "logits/chosen": -2.2078824043273926, "logits/rejected": -1.8246761560440063, "logps/chosen": -414.6767578125, "logps/rejected": -450.5083923339844, "loss": 0.5016, "rewards/accuracies": 0.875, "rewards/chosen": -1.1404173374176025, "rewards/margins": 0.949181854724884, "rewards/rejected": -2.089599132537842, "step": 890 }, { "epoch": 0.18, "learning_rate": 4.911230714578858e-06, "logits/chosen": -2.33544659614563, "logits/rejected": -2.159890651702881, "logps/chosen": -484.42315673828125, "logps/rejected": -484.75543212890625, "loss": 0.555, "rewards/accuracies": 0.625, "rewards/chosen": -1.2610318660736084, "rewards/margins": 0.7130531072616577, "rewards/rejected": -1.9740852117538452, "step": 900 }, { "epoch": 0.18, "eval_logits/chosen": 1.3993161916732788, "eval_logits/rejected": 1.4158341884613037, "eval_logps/chosen": -382.6491394042969, "eval_logps/rejected": -413.9874572753906, "eval_loss": 0.5461392998695374, "eval_rewards/accuracies": 0.6770833134651184, "eval_rewards/chosen": -1.1940844058990479, "eval_rewards/margins": 0.6146063208580017, "eval_rewards/rejected": -1.8086905479431152, "eval_runtime": 464.4097, "eval_samples_per_second": 4.307, "eval_steps_per_second": 0.181, "step": 900 }, { "epoch": 0.18, "learning_rate": 4.9066489861352875e-06, "logits/chosen": -2.130706310272217, "logits/rejected": -2.1322884559631348, "logps/chosen": -396.38226318359375, "logps/rejected": -399.5631103515625, "loss": 0.578, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1745949983596802, "rewards/margins": 0.6246682405471802, "rewards/rejected": -1.79926335811615, "step": 910 }, { "epoch": 0.18, "learning_rate": 4.90195422044261e-06, "logits/chosen": -2.1645100116729736, "logits/rejected": -2.146820545196533, "logps/chosen": -313.99578857421875, "logps/rejected": -400.88330078125, "loss": 0.4906, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.011773705482483, "rewards/margins": 0.7602158188819885, "rewards/rejected": -1.7719894647598267, "step": 920 }, { "epoch": 0.18, "learning_rate": 4.897146638008012e-06, "logits/chosen": -2.0529541969299316, "logits/rejected": -2.055722713470459, "logps/chosen": -401.8716125488281, "logps/rejected": -484.28863525390625, "loss": 0.4835, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.197890043258667, "rewards/margins": 0.9039093852043152, "rewards/rejected": -2.101799488067627, "step": 930 }, { "epoch": 0.18, "learning_rate": 4.89222646463754e-06, "logits/chosen": -1.9853671789169312, "logits/rejected": -1.9570581912994385, "logps/chosen": -390.3425598144531, "logps/rejected": -452.4058532714844, "loss": 0.5724, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.5371025800704956, "rewards/margins": 0.6626251339912415, "rewards/rejected": -2.199727773666382, "step": 940 }, { "epoch": 0.19, "learning_rate": 4.8871939314254965e-06, "logits/chosen": -2.0246262550354004, "logits/rejected": -1.9982950687408447, "logps/chosen": -401.33062744140625, "logps/rejected": -527.936767578125, "loss": 0.4559, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.588289499282837, "rewards/margins": 1.0147461891174316, "rewards/rejected": -2.6030356884002686, "step": 950 }, { "epoch": 0.19, "learning_rate": 4.882049274743578e-06, "logits/chosen": -2.163276195526123, "logits/rejected": -1.980111837387085, "logps/chosen": -394.1162109375, "logps/rejected": -423.0694885253906, "loss": 0.5912, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.2555248737335205, "rewards/margins": 0.48558419942855835, "rewards/rejected": -1.7411092519760132, "step": 960 }, { "epoch": 0.19, "learning_rate": 4.876792736229782e-06, "logits/chosen": -2.1688385009765625, "logits/rejected": -1.9922654628753662, "logps/chosen": -377.38580322265625, "logps/rejected": -441.0692443847656, "loss": 0.5064, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.8046746253967285, "rewards/margins": 0.9893644452095032, "rewards/rejected": -1.7940391302108765, "step": 970 }, { "epoch": 0.19, "learning_rate": 4.8714245627770515e-06, "logits/chosen": -2.1530489921569824, "logits/rejected": -2.2191214561462402, "logps/chosen": -370.20111083984375, "logps/rejected": -379.66998291015625, "loss": 0.604, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9098675847053528, "rewards/margins": 0.4256000518798828, "rewards/rejected": -1.3354675769805908, "step": 980 }, { "epoch": 0.19, "learning_rate": 4.865945006521684e-06, "logits/chosen": -2.262906789779663, "logits/rejected": -2.044600009918213, "logps/chosen": -346.9558410644531, "logps/rejected": -393.48828125, "loss": 0.5445, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7731464505195618, "rewards/margins": 0.82574862241745, "rewards/rejected": -1.5988948345184326, "step": 990 }, { "epoch": 0.2, "learning_rate": 4.860354324831482e-06, "logits/chosen": -2.0484061241149902, "logits/rejected": -1.9440243244171143, "logps/chosen": -324.58270263671875, "logps/rejected": -407.22796630859375, "loss": 0.4905, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.867997944355011, "rewards/margins": 0.8739334344863892, "rewards/rejected": -1.7419313192367554, "step": 1000 }, { "epoch": 0.2, "eval_logits/chosen": 0.7731575965881348, "eval_logits/rejected": 0.8211134076118469, "eval_logps/chosen": -387.9333801269531, "eval_logps/rejected": -428.39447021484375, "eval_loss": 0.5462982058525085, "eval_rewards/accuracies": 0.6889880895614624, "eval_rewards/chosen": -1.2469266653060913, "eval_rewards/margins": 0.7058340907096863, "eval_rewards/rejected": -1.952761173248291, "eval_runtime": 475.5201, "eval_samples_per_second": 4.206, "eval_steps_per_second": 0.177, "step": 1000 }, { "epoch": 0.2, "learning_rate": 4.854652780293672e-06, "logits/chosen": -1.7674760818481445, "logits/rejected": -1.483852744102478, "logps/chosen": -344.6761779785156, "logps/rejected": -485.04400634765625, "loss": 0.4088, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.1883604526519775, "rewards/margins": 1.2560036182403564, "rewards/rejected": -2.444364309310913, "step": 1010 }, { "epoch": 0.2, "learning_rate": 4.848840640702565e-06, "logits/chosen": -1.7526578903198242, "logits/rejected": -1.849961280822754, "logps/chosen": -413.9766540527344, "logps/rejected": -477.40582275390625, "loss": 0.6059, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.663875937461853, "rewards/margins": 0.5298476815223694, "rewards/rejected": -2.193723440170288, "step": 1020 }, { "epoch": 0.2, "learning_rate": 4.842918179046982e-06, "logits/chosen": -1.7181034088134766, "logits/rejected": -1.4771192073822021, "logps/chosen": -380.33673095703125, "logps/rejected": -443.24139404296875, "loss": 0.5238, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.0469777584075928, "rewards/margins": 1.1572082042694092, "rewards/rejected": -2.204185962677002, "step": 1030 }, { "epoch": 0.2, "learning_rate": 4.836885673497435e-06, "logits/chosen": -1.8101253509521484, "logits/rejected": -1.8072385787963867, "logps/chosen": -405.07415771484375, "logps/rejected": -431.8306579589844, "loss": 0.5149, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1933597326278687, "rewards/margins": 0.6611484885215759, "rewards/rejected": -1.8545081615447998, "step": 1040 }, { "epoch": 0.21, "learning_rate": 4.830743407393052e-06, "logits/chosen": -1.8477604389190674, "logits/rejected": -1.9941295385360718, "logps/chosen": -387.93194580078125, "logps/rejected": -495.73565673828125, "loss": 0.5424, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1492812633514404, "rewards/margins": 0.8768144845962524, "rewards/rejected": -2.0260956287384033, "step": 1050 }, { "epoch": 0.21, "learning_rate": 4.824491669228279e-06, "logits/chosen": -2.023118257522583, "logits/rejected": -1.845958948135376, "logps/chosen": -384.5801696777344, "logps/rejected": -453.61260986328125, "loss": 0.5636, "rewards/accuracies": 0.75, "rewards/chosen": -0.9846547842025757, "rewards/margins": 0.5864224433898926, "rewards/rejected": -1.5710772275924683, "step": 1060 }, { "epoch": 0.21, "learning_rate": 4.818130752639326e-06, "logits/chosen": -1.8387682437896729, "logits/rejected": -1.9775362014770508, "logps/chosen": -323.5730895996094, "logps/rejected": -408.73388671875, "loss": 0.5621, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1858361959457397, "rewards/margins": 0.44976431131362915, "rewards/rejected": -1.6356006860733032, "step": 1070 }, { "epoch": 0.21, "learning_rate": 4.811660956390372e-06, "logits/chosen": -2.032257556915283, "logits/rejected": -1.8938792943954468, "logps/chosen": -384.43597412109375, "logps/rejected": -397.3373107910156, "loss": 0.6228, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0033721923828125, "rewards/margins": 0.4804312288761139, "rewards/rejected": -1.483803391456604, "step": 1080 }, { "epoch": 0.21, "learning_rate": 4.8050825843595395e-06, "logits/chosen": -2.114607572555542, "logits/rejected": -2.0265519618988037, "logps/chosen": -308.5718688964844, "logps/rejected": -327.57550048828125, "loss": 0.5083, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.671250581741333, "rewards/margins": 0.6738994717597961, "rewards/rejected": -1.3451499938964844, "step": 1090 }, { "epoch": 0.22, "learning_rate": 4.798395945524615e-06, "logits/chosen": -2.040956735610962, "logits/rejected": -1.9049040079116821, "logps/chosen": -340.9862365722656, "logps/rejected": -406.28216552734375, "loss": 0.5214, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0534942150115967, "rewards/margins": 0.7783910036087036, "rewards/rejected": -1.8318853378295898, "step": 1100 }, { "epoch": 0.22, "eval_logits/chosen": 1.416340708732605, "eval_logits/rejected": 1.3945404291152954, "eval_logps/chosen": -391.100830078125, "eval_logps/rejected": -423.03472900390625, "eval_loss": 0.5355843305587769, "eval_rewards/accuracies": 0.6979166865348816, "eval_rewards/chosen": -1.2786011695861816, "eval_rewards/margins": 0.6205627918243408, "eval_rewards/rejected": -1.8991637229919434, "eval_runtime": 474.4879, "eval_samples_per_second": 4.215, "eval_steps_per_second": 0.177, "step": 1100 }, { "epoch": 0.22, "learning_rate": 4.791601353948537e-06, "logits/chosen": -2.277050495147705, "logits/rejected": -1.9915672540664673, "logps/chosen": -436.3802795410156, "logps/rejected": -471.61761474609375, "loss": 0.5151, "rewards/accuracies": 0.75, "rewards/chosen": -1.1520277261734009, "rewards/margins": 0.8716039657592773, "rewards/rejected": -2.0236315727233887, "step": 1110 }, { "epoch": 0.22, "learning_rate": 4.784699128764654e-06, "logits/chosen": -2.116421699523926, "logits/rejected": -2.0355710983276367, "logps/chosen": -420.1725158691406, "logps/rejected": -476.16168212890625, "loss": 0.5504, "rewards/accuracies": 0.75, "rewards/chosen": -1.3343623876571655, "rewards/margins": 0.5101630687713623, "rewards/rejected": -1.8445253372192383, "step": 1120 }, { "epoch": 0.22, "learning_rate": 4.777689594161724e-06, "logits/chosen": -2.114661931991577, "logits/rejected": -2.0693721771240234, "logps/chosen": -463.98077392578125, "logps/rejected": -556.939453125, "loss": 0.5139, "rewards/accuracies": 0.75, "rewards/chosen": -1.175616979598999, "rewards/margins": 0.8436921238899231, "rewards/rejected": -2.0193092823028564, "step": 1130 }, { "epoch": 0.22, "learning_rate": 4.770573079368691e-06, "logits/chosen": -2.093860149383545, "logits/rejected": -1.8888766765594482, "logps/chosen": -430.1224060058594, "logps/rejected": -518.7135009765625, "loss": 0.5418, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.3075679540634155, "rewards/margins": 0.8580087423324585, "rewards/rejected": -2.165576696395874, "step": 1140 }, { "epoch": 0.23, "learning_rate": 4.763349918639228e-06, "logits/chosen": -2.113792657852173, "logits/rejected": -1.906060814857483, "logps/chosen": -448.8399353027344, "logps/rejected": -523.9088134765625, "loss": 0.5246, "rewards/accuracies": 0.75, "rewards/chosen": -1.4472620487213135, "rewards/margins": 0.853011429309845, "rewards/rejected": -2.3002734184265137, "step": 1150 }, { "epoch": 0.23, "learning_rate": 4.756020451236025e-06, "logits/chosen": -2.009995222091675, "logits/rejected": -1.607735276222229, "logps/chosen": -415.8470153808594, "logps/rejected": -441.7735290527344, "loss": 0.5287, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.5157968997955322, "rewards/margins": 0.8386972546577454, "rewards/rejected": -2.354494094848633, "step": 1160 }, { "epoch": 0.23, "learning_rate": 4.748585021414869e-06, "logits/chosen": -1.892051100730896, "logits/rejected": -1.7459224462509155, "logps/chosen": -378.3355407714844, "logps/rejected": -477.277587890625, "loss": 0.5629, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5294681787490845, "rewards/margins": 0.621840238571167, "rewards/rejected": -2.151308536529541, "step": 1170 }, { "epoch": 0.23, "learning_rate": 4.741043978408463e-06, "logits/chosen": -1.8768208026885986, "logits/rejected": -1.7806380987167358, "logps/chosen": -328.5000305175781, "logps/rejected": -318.7469177246094, "loss": 0.5865, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.0944186449050903, "rewards/margins": 0.3815223276615143, "rewards/rejected": -1.4759409427642822, "step": 1180 }, { "epoch": 0.23, "learning_rate": 4.733397676410027e-06, "logits/chosen": -2.0375049114227295, "logits/rejected": -1.5592041015625, "logps/chosen": -374.0691223144531, "logps/rejected": -392.29302978515625, "loss": 0.5295, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3160943984985352, "rewards/margins": 0.6633793711662292, "rewards/rejected": -1.9794738292694092, "step": 1190 }, { "epoch": 0.24, "learning_rate": 4.725646474556666e-06, "logits/chosen": -2.212542772293091, "logits/rejected": -1.9363590478897095, "logps/chosen": -367.44024658203125, "logps/rejected": -388.07452392578125, "loss": 0.4988, "rewards/accuracies": 0.75, "rewards/chosen": -1.1194865703582764, "rewards/margins": 0.8652739524841309, "rewards/rejected": -1.9847608804702759, "step": 1200 }, { "epoch": 0.24, "eval_logits/chosen": 0.9227811098098755, "eval_logits/rejected": 1.0273418426513672, "eval_logps/chosen": -385.0260925292969, "eval_logps/rejected": -426.0503234863281, "eval_loss": 0.530683159828186, "eval_rewards/accuracies": 0.6979166865348816, "eval_rewards/chosen": -1.2178539037704468, "eval_rewards/margins": 0.7114654183387756, "eval_rewards/rejected": -1.9293192625045776, "eval_runtime": 471.1353, "eval_samples_per_second": 4.245, "eval_steps_per_second": 0.178, "step": 1200 }, { "epoch": 0.24, "learning_rate": 4.717790736912493e-06, "logits/chosen": -1.9181499481201172, "logits/rejected": -1.7514560222625732, "logps/chosen": -359.93341064453125, "logps/rejected": -441.38916015625, "loss": 0.4777, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.209984540939331, "rewards/margins": 1.0826075077056885, "rewards/rejected": -2.2925918102264404, "step": 1210 }, { "epoch": 0.24, "learning_rate": 4.709830832451538e-06, "logits/chosen": -1.9338840246200562, "logits/rejected": -1.9117820262908936, "logps/chosen": -480.5819396972656, "logps/rejected": -532.8638916015625, "loss": 0.5032, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4112778902053833, "rewards/margins": 0.9129534959793091, "rewards/rejected": -2.3242313861846924, "step": 1220 }, { "epoch": 0.24, "learning_rate": 4.701767135040415e-06, "logits/chosen": -1.8094873428344727, "logits/rejected": -1.5869756937026978, "logps/chosen": -513.3035888671875, "logps/rejected": -593.2393798828125, "loss": 0.5695, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2972371578216553, "rewards/margins": 0.9080797433853149, "rewards/rejected": -3.2053170204162598, "step": 1230 }, { "epoch": 0.24, "learning_rate": 4.693600023420758e-06, "logits/chosen": -1.4798624515533447, "logits/rejected": -1.369323968887329, "logps/chosen": -486.832275390625, "logps/rejected": -542.3109130859375, "loss": 0.5068, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.3903822898864746, "rewards/margins": 0.7768516540527344, "rewards/rejected": -3.167233943939209, "step": 1240 }, { "epoch": 0.25, "learning_rate": 4.685329881191436e-06, "logits/chosen": -1.6226552724838257, "logits/rejected": -1.513686180114746, "logps/chosen": -399.8258056640625, "logps/rejected": -447.3807678222656, "loss": 0.5162, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9063441753387451, "rewards/margins": 0.6103194355964661, "rewards/rejected": -2.5166637897491455, "step": 1250 }, { "epoch": 0.25, "learning_rate": 4.676957096790536e-06, "logits/chosen": -1.4838886260986328, "logits/rejected": -1.0853869915008545, "logps/chosen": -432.89801025390625, "logps/rejected": -512.7747802734375, "loss": 0.5367, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.6043317317962646, "rewards/margins": 1.2039530277252197, "rewards/rejected": -2.8082849979400635, "step": 1260 }, { "epoch": 0.25, "learning_rate": 4.668482063477118e-06, "logits/chosen": -1.923230528831482, "logits/rejected": -1.7049680948257446, "logps/chosen": -423.11859130859375, "logps/rejected": -456.056884765625, "loss": 0.5425, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.4797232151031494, "rewards/margins": 0.6203845739364624, "rewards/rejected": -2.1001076698303223, "step": 1270 }, { "epoch": 0.25, "learning_rate": 4.659905179312743e-06, "logits/chosen": -1.4056613445281982, "logits/rejected": -1.4088342189788818, "logps/chosen": -336.7528381347656, "logps/rejected": -490.25946044921875, "loss": 0.5005, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.4386407136917114, "rewards/margins": 1.2846283912658691, "rewards/rejected": -2.723268985748291, "step": 1280 }, { "epoch": 0.25, "learning_rate": 4.651226847142774e-06, "logits/chosen": -1.3955827951431274, "logits/rejected": -1.3704919815063477, "logps/chosen": -420.107177734375, "logps/rejected": -611.5444946289062, "loss": 0.5074, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.8919422626495361, "rewards/margins": 1.1938669681549072, "rewards/rejected": -3.0858092308044434, "step": 1290 }, { "epoch": 0.26, "learning_rate": 4.642447474577466e-06, "logits/chosen": -1.661499261856079, "logits/rejected": -1.3011276721954346, "logps/chosen": -438.72747802734375, "logps/rejected": -499.0279235839844, "loss": 0.5324, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.768543004989624, "rewards/margins": 0.987531840801239, "rewards/rejected": -2.756074905395508, "step": 1300 }, { "epoch": 0.26, "eval_logits/chosen": 0.591680645942688, "eval_logits/rejected": 0.9343855977058411, "eval_logps/chosen": -408.3595275878906, "eval_logps/rejected": -450.9059753417969, "eval_loss": 0.5320248007774353, "eval_rewards/accuracies": 0.7023809552192688, "eval_rewards/chosen": -1.451188087463379, "eval_rewards/margins": 0.7266876697540283, "eval_rewards/rejected": -2.1778759956359863, "eval_runtime": 469.1631, "eval_samples_per_second": 4.263, "eval_steps_per_second": 0.179, "step": 1300 }, { "epoch": 0.26, "learning_rate": 4.6335674739728055e-06, "logits/chosen": -1.9182145595550537, "logits/rejected": -1.6547549962997437, "logps/chosen": -414.7605895996094, "logps/rejected": -529.4044799804688, "loss": 0.5226, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.5170066356658936, "rewards/margins": 1.003164529800415, "rewards/rejected": -2.5201709270477295, "step": 1310 }, { "epoch": 0.26, "learning_rate": 4.6245872624111535e-06, "logits/chosen": -1.8730331659317017, "logits/rejected": -1.5008450746536255, "logps/chosen": -348.73748779296875, "logps/rejected": -431.8541564941406, "loss": 0.5453, "rewards/accuracies": 0.75, "rewards/chosen": -1.0170891284942627, "rewards/margins": 0.7871019244194031, "rewards/rejected": -1.804190993309021, "step": 1320 }, { "epoch": 0.26, "learning_rate": 4.6155072616816515e-06, "logits/chosen": -1.6007864475250244, "logits/rejected": -1.7298269271850586, "logps/chosen": -354.00042724609375, "logps/rejected": -439.82025146484375, "loss": 0.5378, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1786034107208252, "rewards/margins": 0.829128623008728, "rewards/rejected": -2.0077319145202637, "step": 1330 }, { "epoch": 0.26, "learning_rate": 4.606327898260413e-06, "logits/chosen": -1.7862968444824219, "logits/rejected": -1.6234447956085205, "logps/chosen": -358.39483642578125, "logps/rejected": -438.0057678222656, "loss": 0.5152, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.1921870708465576, "rewards/margins": 1.013359785079956, "rewards/rejected": -2.2055468559265137, "step": 1340 }, { "epoch": 0.26, "learning_rate": 4.597049603290491e-06, "logits/chosen": -1.755611777305603, "logits/rejected": -1.4681942462921143, "logps/chosen": -490.5035095214844, "logps/rejected": -585.6339111328125, "loss": 0.4694, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7060209512710571, "rewards/margins": 1.0946344137191772, "rewards/rejected": -2.8006553649902344, "step": 1350 }, { "epoch": 0.27, "learning_rate": 4.587672812561626e-06, "logits/chosen": -1.5236080884933472, "logits/rejected": -1.5720051527023315, "logps/chosen": -467.0594787597656, "logps/rejected": -558.3744506835938, "loss": 0.5319, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.960383653640747, "rewards/margins": 0.865696907043457, "rewards/rejected": -2.826080322265625, "step": 1360 }, { "epoch": 0.27, "learning_rate": 4.578197966489782e-06, "logits/chosen": -1.61127507686615, "logits/rejected": -1.5698819160461426, "logps/chosen": -402.5616149902344, "logps/rejected": -409.2900390625, "loss": 0.5398, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4020943641662598, "rewards/margins": 0.8227679133415222, "rewards/rejected": -2.2248623371124268, "step": 1370 }, { "epoch": 0.27, "learning_rate": 4.5686255100964535e-06, "logits/chosen": -1.72307550907135, "logits/rejected": -1.580277442932129, "logps/chosen": -336.98004150390625, "logps/rejected": -388.698486328125, "loss": 0.5668, "rewards/accuracies": 0.75, "rewards/chosen": -1.2258248329162598, "rewards/margins": 0.6155509948730469, "rewards/rejected": -1.8413759469985962, "step": 1380 }, { "epoch": 0.27, "learning_rate": 4.558955892987774e-06, "logits/chosen": -1.59917414188385, "logits/rejected": -1.4982532262802124, "logps/chosen": -340.16107177734375, "logps/rejected": -405.5443420410156, "loss": 0.4999, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.325260877609253, "rewards/margins": 0.938653826713562, "rewards/rejected": -2.2639145851135254, "step": 1390 }, { "epoch": 0.27, "learning_rate": 4.549189569333387e-06, "logits/chosen": -1.7914392948150635, "logits/rejected": -1.4878028631210327, "logps/chosen": -396.49798583984375, "logps/rejected": -399.7130432128906, "loss": 0.5286, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.035138726234436, "rewards/margins": 0.9291532635688782, "rewards/rejected": -1.9642921686172485, "step": 1400 }, { "epoch": 0.27, "eval_logits/chosen": 0.8243531584739685, "eval_logits/rejected": 1.1978684663772583, "eval_logps/chosen": -401.0144958496094, "eval_logps/rejected": -447.237060546875, "eval_loss": 0.5193303227424622, "eval_rewards/accuracies": 0.7038690447807312, "eval_rewards/chosen": -1.3777379989624023, "eval_rewards/margins": 0.7634496092796326, "eval_rewards/rejected": -2.1411876678466797, "eval_runtime": 469.87, "eval_samples_per_second": 4.256, "eval_steps_per_second": 0.179, "step": 1400 }, { "epoch": 0.28, "learning_rate": 4.539326997845124e-06, "logits/chosen": -1.8032891750335693, "logits/rejected": -1.5840117931365967, "logps/chosen": -414.5916442871094, "logps/rejected": -584.2218627929688, "loss": 0.4502, "rewards/accuracies": 0.75, "rewards/chosen": -1.7965669631958008, "rewards/margins": 1.4432768821716309, "rewards/rejected": -3.2398440837860107, "step": 1410 }, { "epoch": 0.28, "learning_rate": 4.529368641755453e-06, "logits/chosen": -1.791632890701294, "logits/rejected": -1.1489166021347046, "logps/chosen": -516.554443359375, "logps/rejected": -596.7518920898438, "loss": 0.5538, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.201895236968994, "rewards/margins": 1.2632453441619873, "rewards/rejected": -3.4651405811309814, "step": 1420 }, { "epoch": 0.28, "learning_rate": 4.519314968795722e-06, "logits/chosen": -1.3863189220428467, "logits/rejected": -1.6445220708847046, "logps/chosen": -467.303466796875, "logps/rejected": -603.4534301757812, "loss": 0.6027, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.094290018081665, "rewards/margins": 0.9386318325996399, "rewards/rejected": -3.03292179107666, "step": 1430 }, { "epoch": 0.28, "learning_rate": 4.509166451174194e-06, "logits/chosen": -1.7560551166534424, "logits/rejected": -1.5599005222320557, "logps/chosen": -516.143310546875, "logps/rejected": -626.3974609375, "loss": 0.4954, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.342982053756714, "rewards/margins": 1.2926456928253174, "rewards/rejected": -3.6356277465820312, "step": 1440 }, { "epoch": 0.28, "learning_rate": 4.498923565553866e-06, "logits/chosen": -1.6948440074920654, "logits/rejected": -1.644818663597107, "logps/chosen": -414.959716796875, "logps/rejected": -533.6055908203125, "loss": 0.5304, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.96126389503479, "rewards/margins": 0.956006646156311, "rewards/rejected": -2.9172706604003906, "step": 1450 }, { "epoch": 0.29, "learning_rate": 4.488586793030075e-06, "logits/chosen": -1.698326826095581, "logits/rejected": -1.4736413955688477, "logps/chosen": -370.10833740234375, "logps/rejected": -512.392333984375, "loss": 0.4624, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.6409461498260498, "rewards/margins": 1.054914116859436, "rewards/rejected": -2.6958603858947754, "step": 1460 }, { "epoch": 0.29, "learning_rate": 4.478156619107912e-06, "logits/chosen": -1.7624685764312744, "logits/rejected": -1.3393685817718506, "logps/chosen": -386.123046875, "logps/rejected": -550.3563232421875, "loss": 0.4852, "rewards/accuracies": 0.75, "rewards/chosen": -1.5561819076538086, "rewards/margins": 1.3703248500823975, "rewards/rejected": -2.926506519317627, "step": 1470 }, { "epoch": 0.29, "learning_rate": 4.4676335336794125e-06, "logits/chosen": -2.015625238418579, "logits/rejected": -1.9299089908599854, "logps/chosen": -498.6893615722656, "logps/rejected": -575.2039184570312, "loss": 0.615, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.8964248895645142, "rewards/margins": 0.6595728397369385, "rewards/rejected": -2.555997848510742, "step": 1480 }, { "epoch": 0.29, "learning_rate": 4.457018031000544e-06, "logits/chosen": -1.7110872268676758, "logits/rejected": -1.4807096719741821, "logps/chosen": -418.84381103515625, "logps/rejected": -466.87628173828125, "loss": 0.4524, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.6023814678192139, "rewards/margins": 0.7802639007568359, "rewards/rejected": -2.38264536857605, "step": 1490 }, { "epoch": 0.29, "learning_rate": 4.446310609668001e-06, "logits/chosen": -1.6997228860855103, "logits/rejected": -1.5844206809997559, "logps/chosen": -328.55035400390625, "logps/rejected": -412.58380126953125, "loss": 0.6095, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.467379093170166, "rewards/margins": 0.6344095468521118, "rewards/rejected": -2.1017885208129883, "step": 1500 }, { "epoch": 0.29, "eval_logits/chosen": -0.023842444643378258, "eval_logits/rejected": 0.35978996753692627, "eval_logps/chosen": -380.54217529296875, "eval_logps/rejected": -421.9496765136719, "eval_loss": 0.5205935835838318, "eval_rewards/accuracies": 0.7008928656578064, "eval_rewards/chosen": -1.1730148792266846, "eval_rewards/margins": 0.7152983546257019, "eval_rewards/rejected": -1.8883132934570312, "eval_runtime": 472.7173, "eval_samples_per_second": 4.231, "eval_steps_per_second": 0.178, "step": 1500 }, { "epoch": 0.3, "learning_rate": 4.435511772595773e-06, "logits/chosen": -1.9284782409667969, "logits/rejected": -1.6309083700180054, "logps/chosen": -413.587158203125, "logps/rejected": -419.9436950683594, "loss": 0.5097, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.2852447032928467, "rewards/margins": 0.7189093232154846, "rewards/rejected": -2.0041542053222656, "step": 1510 }, { "epoch": 0.3, "learning_rate": 4.424622026991536e-06, "logits/chosen": -1.9414260387420654, "logits/rejected": -1.683161973953247, "logps/chosen": -374.2628173828125, "logps/rejected": -393.2518615722656, "loss": 0.5011, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.1332727670669556, "rewards/margins": 0.7838243246078491, "rewards/rejected": -1.9170968532562256, "step": 1520 }, { "epoch": 0.3, "learning_rate": 4.413641884332825e-06, "logits/chosen": -1.6223335266113281, "logits/rejected": -1.4534587860107422, "logps/chosen": -372.34564208984375, "logps/rejected": -485.110107421875, "loss": 0.4454, "rewards/accuracies": 0.75, "rewards/chosen": -1.1431056261062622, "rewards/margins": 1.2465258836746216, "rewards/rejected": -2.389631748199463, "step": 1530 }, { "epoch": 0.3, "learning_rate": 4.402571860343006e-06, "logits/chosen": -1.517801284790039, "logits/rejected": -1.4638478755950928, "logps/chosen": -393.1158447265625, "logps/rejected": -419.7225036621094, "loss": 0.5361, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.1784864664077759, "rewards/margins": 0.9864957928657532, "rewards/rejected": -2.164982318878174, "step": 1540 }, { "epoch": 0.3, "learning_rate": 4.39141247496706e-06, "logits/chosen": -1.835381269454956, "logits/rejected": -1.4641658067703247, "logps/chosen": -422.1026916503906, "logps/rejected": -509.72357177734375, "loss": 0.5483, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.7700493335723877, "rewards/margins": 0.6548973321914673, "rewards/rejected": -2.4249467849731445, "step": 1550 }, { "epoch": 0.31, "learning_rate": 4.3801642523471585e-06, "logits/chosen": -1.53899347782135, "logits/rejected": -1.6525852680206299, "logps/chosen": -452.6863708496094, "logps/rejected": -562.87109375, "loss": 0.481, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9491571187973022, "rewards/margins": 1.0457651615142822, "rewards/rejected": -2.994922161102295, "step": 1560 }, { "epoch": 0.31, "learning_rate": 4.368827720798044e-06, "logits/chosen": -1.6177898645401, "logits/rejected": -1.3273764848709106, "logps/chosen": -503.0707092285156, "logps/rejected": -591.0535888671875, "loss": 0.5171, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.2399299144744873, "rewards/margins": 1.0294549465179443, "rewards/rejected": -3.2693848609924316, "step": 1570 }, { "epoch": 0.31, "learning_rate": 4.35740341278222e-06, "logits/chosen": -1.937767744064331, "logits/rejected": -1.4204972982406616, "logps/chosen": -516.6253662109375, "logps/rejected": -531.3732299804688, "loss": 0.4762, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.137434244155884, "rewards/margins": 1.3851698637008667, "rewards/rejected": -3.522603988647461, "step": 1580 }, { "epoch": 0.31, "learning_rate": 4.345891864884937e-06, "logits/chosen": -1.4484498500823975, "logits/rejected": -1.2074967622756958, "logps/chosen": -376.8973388671875, "logps/rejected": -465.946044921875, "loss": 0.4636, "rewards/accuracies": 0.875, "rewards/chosen": -1.6752738952636719, "rewards/margins": 1.1741887331008911, "rewards/rejected": -2.8494625091552734, "step": 1590 }, { "epoch": 0.31, "learning_rate": 4.334293617788992e-06, "logits/chosen": -1.7261905670166016, "logits/rejected": -1.6486454010009766, "logps/chosen": -483.2078552246094, "logps/rejected": -598.8380126953125, "loss": 0.5627, "rewards/accuracies": 0.75, "rewards/chosen": -2.0185751914978027, "rewards/margins": 1.0393562316894531, "rewards/rejected": -3.057931423187256, "step": 1600 }, { "epoch": 0.31, "eval_logits/chosen": 0.41467130184173584, "eval_logits/rejected": 0.7394571304321289, "eval_logps/chosen": -451.3461608886719, "eval_logps/rejected": -510.4462890625, "eval_loss": 0.5224563479423523, "eval_rewards/accuracies": 0.6934523582458496, "eval_rewards/chosen": -1.8810548782348633, "eval_rewards/margins": 0.892224907875061, "eval_rewards/rejected": -2.773279905319214, "eval_runtime": 468.6159, "eval_samples_per_second": 4.268, "eval_steps_per_second": 0.179, "step": 1600 }, { "epoch": 0.32, "learning_rate": 4.322609216249336e-06, "logits/chosen": -1.4203035831451416, "logits/rejected": -1.3660808801651, "logps/chosen": -459.4410095214844, "logps/rejected": -574.9068603515625, "loss": 0.5023, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.7188522815704346, "rewards/margins": 1.460274577140808, "rewards/rejected": -3.179126739501953, "step": 1610 }, { "epoch": 0.32, "learning_rate": 4.310839209067482e-06, "logits/chosen": -1.4383463859558105, "logits/rejected": -1.3699233531951904, "logps/chosen": -462.7704162597656, "logps/rejected": -559.3999633789062, "loss": 0.5211, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.2110602855682373, "rewards/margins": 0.9121319055557251, "rewards/rejected": -3.123192310333252, "step": 1620 }, { "epoch": 0.32, "learning_rate": 4.298984149065732e-06, "logits/chosen": -1.6822255849838257, "logits/rejected": -1.4348416328430176, "logps/chosen": -506.04083251953125, "logps/rejected": -583.9845581054688, "loss": 0.5117, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.2623586654663086, "rewards/margins": 0.8611604571342468, "rewards/rejected": -3.1235194206237793, "step": 1630 }, { "epoch": 0.32, "learning_rate": 4.2870445930612135e-06, "logits/chosen": -1.5166553258895874, "logits/rejected": -1.214342713356018, "logps/chosen": -446.37664794921875, "logps/rejected": -529.3927612304688, "loss": 0.3845, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0724778175354004, "rewards/margins": 1.1939537525177002, "rewards/rejected": -3.2664313316345215, "step": 1640 }, { "epoch": 0.32, "learning_rate": 4.2750211018397204e-06, "logits/chosen": -1.3522977828979492, "logits/rejected": -1.2287527322769165, "logps/chosen": -498.2793884277344, "logps/rejected": -644.9459228515625, "loss": 0.4669, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.349169969558716, "rewards/margins": 1.3009414672851562, "rewards/rejected": -3.650111675262451, "step": 1650 }, { "epoch": 0.33, "learning_rate": 4.262914240129379e-06, "logits/chosen": -1.5904858112335205, "logits/rejected": -1.40458083152771, "logps/chosen": -495.546630859375, "logps/rejected": -591.2596435546875, "loss": 0.4949, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.0124459266662598, "rewards/margins": 1.3423213958740234, "rewards/rejected": -3.354767322540283, "step": 1660 }, { "epoch": 0.33, "learning_rate": 4.2507245765741215e-06, "logits/chosen": -1.656916618347168, "logits/rejected": -1.5722547769546509, "logps/chosen": -529.6829833984375, "logps/rejected": -617.2437744140625, "loss": 0.4921, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.2331345081329346, "rewards/margins": 1.2079585790634155, "rewards/rejected": -3.4410929679870605, "step": 1670 }, { "epoch": 0.33, "learning_rate": 4.238452683706979e-06, "logits/chosen": -1.775490403175354, "logits/rejected": -1.6295232772827148, "logps/chosen": -435.9142150878906, "logps/rejected": -568.1779174804688, "loss": 0.5471, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.8802047967910767, "rewards/margins": 1.2522530555725098, "rewards/rejected": -3.132458209991455, "step": 1680 }, { "epoch": 0.33, "learning_rate": 4.226099137923186e-06, "logits/chosen": -1.973921537399292, "logits/rejected": -1.6960344314575195, "logps/chosen": -485.93902587890625, "logps/rejected": -519.7362060546875, "loss": 0.5535, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.8466163873672485, "rewards/margins": 0.6419102549552917, "rewards/rejected": -2.4885265827178955, "step": 1690 }, { "epoch": 0.33, "learning_rate": 4.213664519453115e-06, "logits/chosen": -1.9954742193222046, "logits/rejected": -2.0354678630828857, "logps/chosen": -381.51885986328125, "logps/rejected": -468.69818115234375, "loss": 0.5222, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.402010440826416, "rewards/margins": 0.8014278411865234, "rewards/rejected": -2.2034382820129395, "step": 1700 }, { "epoch": 0.33, "eval_logits/chosen": -0.384397953748703, "eval_logits/rejected": -0.06426750123500824, "eval_logps/chosen": -382.0738830566406, "eval_logps/rejected": -417.88531494140625, "eval_loss": 0.5209601521492004, "eval_rewards/accuracies": 0.7142857313156128, "eval_rewards/chosen": -1.188332200050354, "eval_rewards/margins": 0.6593378186225891, "eval_rewards/rejected": -1.847670078277588, "eval_runtime": 458.9623, "eval_samples_per_second": 4.358, "eval_steps_per_second": 0.183, "step": 1700 }, { "epoch": 0.34, "learning_rate": 4.201149412335015e-06, "logits/chosen": -2.1409153938293457, "logits/rejected": -1.844313621520996, "logps/chosen": -423.0439453125, "logps/rejected": -440.78570556640625, "loss": 0.4777, "rewards/accuracies": 0.75, "rewards/chosen": -1.185486912727356, "rewards/margins": 0.8028791546821594, "rewards/rejected": -1.988365888595581, "step": 1710 }, { "epoch": 0.34, "learning_rate": 4.188554404387588e-06, "logits/chosen": -1.8138986825942993, "logits/rejected": -1.6512314081192017, "logps/chosen": -327.36737060546875, "logps/rejected": -398.70184326171875, "loss": 0.5029, "rewards/accuracies": 0.875, "rewards/chosen": -1.018953800201416, "rewards/margins": 0.9757841229438782, "rewards/rejected": -1.994738221168518, "step": 1720 }, { "epoch": 0.34, "learning_rate": 4.175880087182376e-06, "logits/chosen": -1.8932291269302368, "logits/rejected": -1.6494147777557373, "logps/chosen": -279.64373779296875, "logps/rejected": -345.70465087890625, "loss": 0.5683, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9910870790481567, "rewards/margins": 0.7539299726486206, "rewards/rejected": -1.7450170516967773, "step": 1730 }, { "epoch": 0.34, "learning_rate": 4.163127056015975e-06, "logits/chosen": -2.0070395469665527, "logits/rejected": -1.9813343286514282, "logps/chosen": -383.7837829589844, "logps/rejected": -468.212646484375, "loss": 0.561, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9646474123001099, "rewards/margins": 1.0487464666366577, "rewards/rejected": -2.0133938789367676, "step": 1740 }, { "epoch": 0.34, "learning_rate": 4.1502959098820774e-06, "logits/chosen": -1.8485866785049438, "logits/rejected": -1.7759544849395752, "logps/chosen": -352.3072814941406, "logps/rejected": -425.56915283203125, "loss": 0.4764, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.904167652130127, "rewards/margins": 1.056064486503601, "rewards/rejected": -1.960232138633728, "step": 1750 }, { "epoch": 0.35, "learning_rate": 4.137387251443335e-06, "logits/chosen": -1.5388623476028442, "logits/rejected": -1.4535671472549438, "logps/chosen": -339.34197998046875, "logps/rejected": -419.2979431152344, "loss": 0.531, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.3059513568878174, "rewards/margins": 0.8590526580810547, "rewards/rejected": -2.165003776550293, "step": 1760 }, { "epoch": 0.35, "learning_rate": 4.124401687003057e-06, "logits/chosen": -1.820469856262207, "logits/rejected": -1.6758521795272827, "logps/chosen": -412.8130798339844, "logps/rejected": -451.430419921875, "loss": 0.5253, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3753070831298828, "rewards/margins": 0.783947229385376, "rewards/rejected": -2.159254550933838, "step": 1770 }, { "epoch": 0.35, "learning_rate": 4.111339826476725e-06, "logits/chosen": -1.655381441116333, "logits/rejected": -1.4131075143814087, "logps/chosen": -486.42401123046875, "logps/rejected": -557.5438842773438, "loss": 0.5744, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.7467525005340576, "rewards/margins": 0.8533447980880737, "rewards/rejected": -2.600097179412842, "step": 1780 }, { "epoch": 0.35, "learning_rate": 4.098202283363356e-06, "logits/chosen": -1.9893325567245483, "logits/rejected": -1.7300984859466553, "logps/chosen": -499.3247985839844, "logps/rejected": -464.3089294433594, "loss": 0.5828, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4985158443450928, "rewards/margins": 0.48281264305114746, "rewards/rejected": -1.9813286066055298, "step": 1790 }, { "epoch": 0.35, "learning_rate": 4.084989674716679e-06, "logits/chosen": -1.932824730873108, "logits/rejected": -1.5683988332748413, "logps/chosen": -370.2239685058594, "logps/rejected": -427.7416076660156, "loss": 0.5163, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3531526327133179, "rewards/margins": 0.8490608334541321, "rewards/rejected": -2.2022132873535156, "step": 1800 }, { "epoch": 0.35, "eval_logits/chosen": 0.9604912996292114, "eval_logits/rejected": 1.2999762296676636, "eval_logps/chosen": -381.0427551269531, "eval_logps/rejected": -430.9522399902344, "eval_loss": 0.5219135880470276, "eval_rewards/accuracies": 0.7247023582458496, "eval_rewards/chosen": -1.1780204772949219, "eval_rewards/margins": 0.8003180623054504, "eval_rewards/rejected": -1.9783387184143066, "eval_runtime": 466.9785, "eval_samples_per_second": 4.283, "eval_steps_per_second": 0.18, "step": 1800 }, { "epoch": 0.36, "learning_rate": 4.071702621116158e-06, "logits/chosen": -1.9070345163345337, "logits/rejected": -1.732208490371704, "logps/chosen": -396.03912353515625, "logps/rejected": -419.9725646972656, "loss": 0.6269, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.3162269592285156, "rewards/margins": 0.49873286485671997, "rewards/rejected": -1.8149597644805908, "step": 1810 }, { "epoch": 0.36, "learning_rate": 4.05834174663784e-06, "logits/chosen": -1.7320168018341064, "logits/rejected": -1.5021142959594727, "logps/chosen": -367.4055480957031, "logps/rejected": -415.22900390625, "loss": 0.5183, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.2845791578292847, "rewards/margins": 0.863175094127655, "rewards/rejected": -2.147754430770874, "step": 1820 }, { "epoch": 0.36, "learning_rate": 4.044907678825045e-06, "logits/chosen": -1.7646843194961548, "logits/rejected": -1.7249200344085693, "logps/chosen": -432.54925537109375, "logps/rejected": -497.01348876953125, "loss": 0.5647, "rewards/accuracies": 0.75, "rewards/chosen": -1.4014708995819092, "rewards/margins": 0.8739999532699585, "rewards/rejected": -2.2754709720611572, "step": 1830 }, { "epoch": 0.36, "learning_rate": 4.031401048658892e-06, "logits/chosen": -2.0431199073791504, "logits/rejected": -1.9735429286956787, "logps/chosen": -399.82501220703125, "logps/rejected": -501.8202209472656, "loss": 0.4795, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.3438560962677002, "rewards/margins": 1.3123345375061035, "rewards/rejected": -2.656190872192383, "step": 1840 }, { "epoch": 0.36, "learning_rate": 4.017822490528664e-06, "logits/chosen": -1.529136061668396, "logits/rejected": -1.4915677309036255, "logps/chosen": -423.474365234375, "logps/rejected": -527.9143676757812, "loss": 0.4903, "rewards/accuracies": 0.75, "rewards/chosen": -1.7233842611312866, "rewards/margins": 0.9296349287033081, "rewards/rejected": -2.653019428253174, "step": 1850 }, { "epoch": 0.37, "learning_rate": 4.004172642202002e-06, "logits/chosen": -1.5431911945343018, "logits/rejected": -1.4479010105133057, "logps/chosen": -502.7958984375, "logps/rejected": -612.9650268554688, "loss": 0.54, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.8878120183944702, "rewards/margins": 1.3048069477081299, "rewards/rejected": -3.1926190853118896, "step": 1860 }, { "epoch": 0.37, "learning_rate": 3.990452144794966e-06, "logits/chosen": -1.468390941619873, "logits/rejected": -1.2413341999053955, "logps/chosen": -449.906982421875, "logps/rejected": -506.55914306640625, "loss": 0.5596, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0285403728485107, "rewards/margins": 0.9599047899246216, "rewards/rejected": -2.9884450435638428, "step": 1870 }, { "epoch": 0.37, "learning_rate": 3.976661642741908e-06, "logits/chosen": -1.447554111480713, "logits/rejected": -1.3410959243774414, "logps/chosen": -457.1023864746094, "logps/rejected": -557.6286010742188, "loss": 0.4901, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.9820743799209595, "rewards/margins": 0.9573952555656433, "rewards/rejected": -2.939469575881958, "step": 1880 }, { "epoch": 0.37, "learning_rate": 3.96280178376521e-06, "logits/chosen": -1.8144747018814087, "logits/rejected": -1.8249022960662842, "logps/chosen": -422.5291442871094, "logps/rejected": -498.2322692871094, "loss": 0.5081, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5139400959014893, "rewards/margins": 0.7199869751930237, "rewards/rejected": -2.2339272499084473, "step": 1890 }, { "epoch": 0.37, "learning_rate": 3.948873218844863e-06, "logits/chosen": -1.4795005321502686, "logits/rejected": -1.4016082286834717, "logps/chosen": -480.5079040527344, "logps/rejected": -600.5020751953125, "loss": 0.511, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.804473638534546, "rewards/margins": 1.339569330215454, "rewards/rejected": -3.144043207168579, "step": 1900 }, { "epoch": 0.37, "eval_logits/chosen": 0.9550392627716064, "eval_logits/rejected": 1.305156946182251, "eval_logps/chosen": -448.5622253417969, "eval_logps/rejected": -507.06622314453125, "eval_loss": 0.5213829278945923, "eval_rewards/accuracies": 0.71875, "eval_rewards/chosen": -1.8532155752182007, "eval_rewards/margins": 0.8862631916999817, "eval_rewards/rejected": -2.739478588104248, "eval_runtime": 466.4383, "eval_samples_per_second": 4.288, "eval_steps_per_second": 0.18, "step": 1900 }, { "epoch": 0.37, "learning_rate": 3.934876602187886e-06, "logits/chosen": -1.4697811603546143, "logits/rejected": -1.375469446182251, "logps/chosen": -453.0664978027344, "logps/rejected": -478.17547607421875, "loss": 0.5752, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.7024272680282593, "rewards/margins": 0.9104480743408203, "rewards/rejected": -2.612875461578369, "step": 1910 }, { "epoch": 0.38, "learning_rate": 3.920812591197604e-06, "logits/chosen": -1.5655367374420166, "logits/rejected": -1.5533018112182617, "logps/chosen": -464.52154541015625, "logps/rejected": -574.5903930664062, "loss": 0.5062, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1701035499572754, "rewards/margins": 0.9189977645874023, "rewards/rejected": -3.0891010761260986, "step": 1920 }, { "epoch": 0.38, "learning_rate": 3.906681846442768e-06, "logits/chosen": -1.3456695079803467, "logits/rejected": -1.4312808513641357, "logps/chosen": -495.2659606933594, "logps/rejected": -612.2733154296875, "loss": 0.4389, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.6250083446502686, "rewards/margins": 1.3098533153533936, "rewards/rejected": -3.934861421585083, "step": 1930 }, { "epoch": 0.38, "learning_rate": 3.892485031626527e-06, "logits/chosen": -1.3476498126983643, "logits/rejected": -1.0824012756347656, "logps/chosen": -602.011474609375, "logps/rejected": -693.4718017578125, "loss": 0.4966, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.9910778999328613, "rewards/margins": 1.296937346458435, "rewards/rejected": -4.2880144119262695, "step": 1940 }, { "epoch": 0.38, "learning_rate": 3.8782228135552615e-06, "logits/chosen": -1.182939052581787, "logits/rejected": -1.1389211416244507, "logps/chosen": -505.2765197753906, "logps/rejected": -689.6798095703125, "loss": 0.5595, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.8137130737304688, "rewards/margins": 1.3322536945343018, "rewards/rejected": -4.145966529846191, "step": 1950 }, { "epoch": 0.38, "learning_rate": 3.863895862107255e-06, "logits/chosen": -1.6203094720840454, "logits/rejected": -1.1558005809783936, "logps/chosen": -561.7947387695312, "logps/rejected": -610.0185546875, "loss": 0.4369, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.4451165199279785, "rewards/margins": 1.4709765911102295, "rewards/rejected": -3.91609263420105, "step": 1960 }, { "epoch": 0.39, "learning_rate": 3.849504850201238e-06, "logits/chosen": -1.3694090843200684, "logits/rejected": -1.1556737422943115, "logps/chosen": -539.959228515625, "logps/rejected": -617.424072265625, "loss": 0.4763, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.7902231216430664, "rewards/margins": 0.9595049023628235, "rewards/rejected": -3.749727964401245, "step": 1970 }, { "epoch": 0.39, "learning_rate": 3.835050453764779e-06, "logits/chosen": -1.4200727939605713, "logits/rejected": -1.1518549919128418, "logps/chosen": -501.21063232421875, "logps/rejected": -642.7298583984375, "loss": 0.582, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.5669314861297607, "rewards/margins": 1.3281501531600952, "rewards/rejected": -3.8950817584991455, "step": 1980 }, { "epoch": 0.39, "learning_rate": 3.820533351702538e-06, "logits/chosen": -1.4720011949539185, "logits/rejected": -1.189164161682129, "logps/chosen": -465.8736877441406, "logps/rejected": -565.3887329101562, "loss": 0.5744, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.1700820922851562, "rewards/margins": 1.2808657884597778, "rewards/rejected": -3.4509479999542236, "step": 1990 }, { "epoch": 0.39, "learning_rate": 3.80595422586438e-06, "logits/chosen": -1.685773491859436, "logits/rejected": -1.3915890455245972, "logps/chosen": -437.60894775390625, "logps/rejected": -498.50390625, "loss": 0.484, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.956373929977417, "rewards/margins": 0.9637616872787476, "rewards/rejected": -2.920135736465454, "step": 2000 }, { "epoch": 0.39, "eval_logits/chosen": 1.3132091760635376, "eval_logits/rejected": 1.6338907480239868, "eval_logps/chosen": -441.24273681640625, "eval_logps/rejected": -494.93695068359375, "eval_loss": 0.5161046981811523, "eval_rewards/accuracies": 0.71875, "eval_rewards/chosen": -1.7800202369689941, "eval_rewards/margins": 0.8381654024124146, "eval_rewards/rejected": -2.6181857585906982, "eval_runtime": 461.8655, "eval_samples_per_second": 4.33, "eval_steps_per_second": 0.182, "step": 2000 }, { "epoch": 0.39, "learning_rate": 3.791313761013343e-06, "logits/chosen": -1.5194200277328491, "logits/rejected": -1.4752867221832275, "logps/chosen": -414.8877868652344, "logps/rejected": -529.7425537109375, "loss": 0.392, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.467829942703247, "rewards/margins": 1.1718862056732178, "rewards/rejected": -2.639716386795044, "step": 2010 }, { "epoch": 0.4, "learning_rate": 3.7766126447934857e-06, "logits/chosen": -1.9778658151626587, "logits/rejected": -1.9723155498504639, "logps/chosen": -564.9736938476562, "logps/rejected": -619.1234130859375, "loss": 0.4866, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.397613525390625, "rewards/margins": 0.49791449308395386, "rewards/rejected": -2.8955278396606445, "step": 2020 }, { "epoch": 0.4, "learning_rate": 3.761851567697583e-06, "logits/chosen": -1.4296815395355225, "logits/rejected": -1.3542410135269165, "logps/chosen": -450.0517578125, "logps/rejected": -552.33935546875, "loss": 0.5166, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.0276377201080322, "rewards/margins": 1.0687463283538818, "rewards/rejected": -3.096384048461914, "step": 2030 }, { "epoch": 0.4, "learning_rate": 3.7470312230346955e-06, "logits/chosen": -1.493628978729248, "logits/rejected": -1.6857808828353882, "logps/chosen": -440.001708984375, "logps/rejected": -583.5567626953125, "loss": 0.5048, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.050156354904175, "rewards/margins": 1.209169626235962, "rewards/rejected": -3.2593257427215576, "step": 2040 }, { "epoch": 0.4, "learning_rate": 3.7321523068976068e-06, "logits/chosen": -1.536556363105774, "logits/rejected": -1.583799123764038, "logps/chosen": -450.343505859375, "logps/rejected": -600.0687255859375, "loss": 0.4018, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.8308045864105225, "rewards/margins": 1.2747979164123535, "rewards/rejected": -3.105602741241455, "step": 2050 }, { "epoch": 0.4, "learning_rate": 3.717215518130127e-06, "logits/chosen": -1.6683518886566162, "logits/rejected": -1.5575300455093384, "logps/chosen": -513.941650390625, "logps/rejected": -543.3946533203125, "loss": 0.5745, "rewards/accuracies": 0.625, "rewards/chosen": -2.285006046295166, "rewards/margins": 0.6656274199485779, "rewards/rejected": -2.9506335258483887, "step": 2060 }, { "epoch": 0.41, "learning_rate": 3.702221558294274e-06, "logits/chosen": -1.1688969135284424, "logits/rejected": -0.8642789125442505, "logps/chosen": -405.437255859375, "logps/rejected": -475.583740234375, "loss": 0.5127, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.129958152770996, "rewards/margins": 1.0819867849349976, "rewards/rejected": -3.211945056915283, "step": 2070 }, { "epoch": 0.41, "learning_rate": 3.687171131637314e-06, "logits/chosen": -1.8774950504302979, "logits/rejected": -1.746014952659607, "logps/chosen": -484.9676818847656, "logps/rejected": -528.9475708007812, "loss": 0.4464, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9119985103607178, "rewards/margins": 0.8228870630264282, "rewards/rejected": -2.7348859310150146, "step": 2080 }, { "epoch": 0.41, "learning_rate": 3.6720649450586885e-06, "logits/chosen": -1.770233154296875, "logits/rejected": -1.5432703495025635, "logps/chosen": -523.8516845703125, "logps/rejected": -624.4163818359375, "loss": 0.462, "rewards/accuracies": 0.875, "rewards/chosen": -2.295456886291504, "rewards/margins": 1.4275439977645874, "rewards/rejected": -3.7230007648468018, "step": 2090 }, { "epoch": 0.41, "learning_rate": 3.6569037080768153e-06, "logits/chosen": -1.7754666805267334, "logits/rejected": -1.4459218978881836, "logps/chosen": -549.3494873046875, "logps/rejected": -605.2619018554688, "loss": 0.4863, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.489126205444336, "rewards/margins": 1.4342973232269287, "rewards/rejected": -3.9234230518341064, "step": 2100 }, { "epoch": 0.41, "eval_logits/chosen": 2.0461196899414062, "eval_logits/rejected": 2.342752456665039, "eval_logps/chosen": -541.5034790039062, "eval_logps/rejected": -617.3856811523438, "eval_loss": 0.5182604789733887, "eval_rewards/accuracies": 0.7157738208770752, "eval_rewards/chosen": -2.782627820968628, "eval_rewards/margins": 1.0600451231002808, "eval_rewards/rejected": -3.8426730632781982, "eval_runtime": 471.2755, "eval_samples_per_second": 4.244, "eval_steps_per_second": 0.178, "step": 2100 }, { "epoch": 0.41, "learning_rate": 3.641688132795757e-06, "logits/chosen": -1.4198346138000488, "logits/rejected": -1.179573655128479, "logps/chosen": -549.3745727539062, "logps/rejected": -682.312744140625, "loss": 0.4812, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -3.051333427429199, "rewards/margins": 1.439754843711853, "rewards/rejected": -4.491088390350342, "step": 2110 }, { "epoch": 0.42, "learning_rate": 3.6264189338717766e-06, "logits/chosen": -1.679993987083435, "logits/rejected": -1.485656499862671, "logps/chosen": -512.0293579101562, "logps/rejected": -534.0633544921875, "loss": 0.5092, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.6621744632720947, "rewards/margins": 0.41850295662879944, "rewards/rejected": -3.0806775093078613, "step": 2120 }, { "epoch": 0.42, "learning_rate": 3.611096828479773e-06, "logits/chosen": -1.690553069114685, "logits/rejected": -1.6504080295562744, "logps/chosen": -460.89923095703125, "logps/rejected": -589.2086791992188, "loss": 0.503, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.381263256072998, "rewards/margins": 1.3044815063476562, "rewards/rejected": -3.6857447624206543, "step": 2130 }, { "epoch": 0.42, "learning_rate": 3.595722536279595e-06, "logits/chosen": -1.7249847650527954, "logits/rejected": -1.7725975513458252, "logps/chosen": -478.90478515625, "logps/rejected": -575.4903564453125, "loss": 0.5849, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.228055953979492, "rewards/margins": 1.0864596366882324, "rewards/rejected": -3.3145155906677246, "step": 2140 }, { "epoch": 0.42, "learning_rate": 3.5802967793822386e-06, "logits/chosen": -1.5533924102783203, "logits/rejected": -1.4917786121368408, "logps/chosen": -496.40606689453125, "logps/rejected": -567.6104736328125, "loss": 0.454, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6233153343200684, "rewards/margins": 1.0902141332626343, "rewards/rejected": -3.7135300636291504, "step": 2150 }, { "epoch": 0.42, "learning_rate": 3.5648202823159317e-06, "logits/chosen": -1.7521919012069702, "logits/rejected": -1.4811947345733643, "logps/chosen": -518.8137817382812, "logps/rejected": -560.3478393554688, "loss": 0.4623, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.16514253616333, "rewards/margins": 0.9891520738601685, "rewards/rejected": -3.154294490814209, "step": 2160 }, { "epoch": 0.43, "learning_rate": 3.549293771992104e-06, "logits/chosen": -1.439638376235962, "logits/rejected": -1.3014863729476929, "logps/chosen": -445.81585693359375, "logps/rejected": -580.5036010742188, "loss": 0.5646, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.1650853157043457, "rewards/margins": 0.892579197883606, "rewards/rejected": -3.057664394378662, "step": 2170 }, { "epoch": 0.43, "learning_rate": 3.5337179776712427e-06, "logits/chosen": -1.9127401113510132, "logits/rejected": -1.8155453205108643, "logps/chosen": -468.47650146484375, "logps/rejected": -502.29949951171875, "loss": 0.5507, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.7308883666992188, "rewards/margins": 0.747651219367981, "rewards/rejected": -2.478539228439331, "step": 2180 }, { "epoch": 0.43, "learning_rate": 3.5180936309286444e-06, "logits/chosen": -1.9631290435791016, "logits/rejected": -1.778114676475525, "logps/chosen": -451.0347595214844, "logps/rejected": -454.935546875, "loss": 0.4834, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.4917702674865723, "rewards/margins": 0.7677947878837585, "rewards/rejected": -2.2595651149749756, "step": 2190 }, { "epoch": 0.43, "learning_rate": 3.5024214656200497e-06, "logits/chosen": -1.657544493675232, "logits/rejected": -1.5524613857269287, "logps/chosen": -398.22076416015625, "logps/rejected": -510.81866455078125, "loss": 0.5233, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.5267362594604492, "rewards/margins": 1.0521165132522583, "rewards/rejected": -2.578852653503418, "step": 2200 }, { "epoch": 0.43, "eval_logits/chosen": 0.5627753734588623, "eval_logits/rejected": 0.9790993928909302, "eval_logps/chosen": -440.25799560546875, "eval_logps/rejected": -494.96429443359375, "eval_loss": 0.5114842653274536, "eval_rewards/accuracies": 0.7172619104385376, "eval_rewards/chosen": -1.7701728343963623, "eval_rewards/margins": 0.8482868671417236, "eval_rewards/rejected": -2.618459701538086, "eval_runtime": 471.2173, "eval_samples_per_second": 4.244, "eval_steps_per_second": 0.178, "step": 2200 }, { "epoch": 0.43, "learning_rate": 3.4867022178471764e-06, "logits/chosen": -1.8048145771026611, "logits/rejected": -1.6141141653060913, "logps/chosen": -382.29986572265625, "logps/rejected": -459.41729736328125, "loss": 0.5518, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.8070600032806396, "rewards/margins": 1.0074267387390137, "rewards/rejected": -2.8144869804382324, "step": 2210 }, { "epoch": 0.44, "learning_rate": 3.4709366259231468e-06, "logits/chosen": -1.8121106624603271, "logits/rejected": -1.7930854558944702, "logps/chosen": -457.7802734375, "logps/rejected": -583.01708984375, "loss": 0.5169, "rewards/accuracies": 0.75, "rewards/chosen": -1.9206135272979736, "rewards/margins": 0.919543445110321, "rewards/rejected": -2.8401570320129395, "step": 2220 }, { "epoch": 0.44, "learning_rate": 3.455125430337809e-06, "logits/chosen": -1.637721300125122, "logits/rejected": -1.5897992849349976, "logps/chosen": -431.33209228515625, "logps/rejected": -502.7018127441406, "loss": 0.6024, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1560745239257812, "rewards/margins": 0.6406152844429016, "rewards/rejected": -2.796689987182617, "step": 2230 }, { "epoch": 0.44, "learning_rate": 3.439269373722957e-06, "logits/chosen": -1.791049599647522, "logits/rejected": -1.5320322513580322, "logps/chosen": -456.51739501953125, "logps/rejected": -544.6226196289062, "loss": 0.5001, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.033076763153076, "rewards/margins": 0.6280455589294434, "rewards/rejected": -2.6611223220825195, "step": 2240 }, { "epoch": 0.44, "learning_rate": 3.4233692008174497e-06, "logits/chosen": -1.7971560955047607, "logits/rejected": -1.3178821802139282, "logps/chosen": -524.2515869140625, "logps/rejected": -556.8560791015625, "loss": 0.5692, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.5150840282440186, "rewards/margins": 0.8231359720230103, "rewards/rejected": -3.3382201194763184, "step": 2250 }, { "epoch": 0.44, "learning_rate": 3.4074256584322336e-06, "logits/chosen": -1.49863600730896, "logits/rejected": -1.141994595527649, "logps/chosen": -595.3875122070312, "logps/rejected": -646.2301025390625, "loss": 0.55, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.773069381713867, "rewards/margins": 0.7329319715499878, "rewards/rejected": -3.5060012340545654, "step": 2260 }, { "epoch": 0.45, "learning_rate": 3.3914394954152635e-06, "logits/chosen": -1.5777288675308228, "logits/rejected": -1.4726307392120361, "logps/chosen": -527.77099609375, "logps/rejected": -569.65625, "loss": 0.5508, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.4085521697998047, "rewards/margins": 0.8079524040222168, "rewards/rejected": -3.2165043354034424, "step": 2270 }, { "epoch": 0.45, "learning_rate": 3.375411462616332e-06, "logits/chosen": -1.5236730575561523, "logits/rejected": -1.6202980279922485, "logps/chosen": -460.6907653808594, "logps/rejected": -555.6349487304688, "loss": 0.5557, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1377615928649902, "rewards/margins": 0.86217200756073, "rewards/rejected": -2.9999334812164307, "step": 2280 }, { "epoch": 0.45, "learning_rate": 3.3593423128518017e-06, "logits/chosen": -1.9396699666976929, "logits/rejected": -1.71761155128479, "logps/chosen": -395.2947082519531, "logps/rejected": -500.8861389160156, "loss": 0.4944, "rewards/accuracies": 0.75, "rewards/chosen": -1.609452486038208, "rewards/margins": 1.191273808479309, "rewards/rejected": -2.8007264137268066, "step": 2290 }, { "epoch": 0.45, "learning_rate": 3.343232800869247e-06, "logits/chosen": -1.7285282611846924, "logits/rejected": -1.6788495779037476, "logps/chosen": -368.57952880859375, "logps/rejected": -448.85833740234375, "loss": 0.5343, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.4113600254058838, "rewards/margins": 1.0198975801467896, "rewards/rejected": -2.431257724761963, "step": 2300 }, { "epoch": 0.45, "eval_logits/chosen": 0.5468556880950928, "eval_logits/rejected": 1.0255200862884521, "eval_logps/chosen": -406.37005615234375, "eval_logps/rejected": -455.22125244140625, "eval_loss": 0.507876455783844, "eval_rewards/accuracies": 0.7202380895614624, "eval_rewards/chosen": -1.4312934875488281, "eval_rewards/margins": 0.7897354960441589, "eval_rewards/rejected": -2.2210288047790527, "eval_runtime": 469.5669, "eval_samples_per_second": 4.259, "eval_steps_per_second": 0.179, "step": 2300 }, { "epoch": 0.45, "learning_rate": 3.3270836833120047e-06, "logits/chosen": -1.9568431377410889, "logits/rejected": -1.5243427753448486, "logps/chosen": -324.5955810546875, "logps/rejected": -381.22576904296875, "loss": 0.5557, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.318694829940796, "rewards/margins": 0.8176606893539429, "rewards/rejected": -2.1363556385040283, "step": 2310 }, { "epoch": 0.46, "learning_rate": 3.310895718683635e-06, "logits/chosen": -2.0503430366516113, "logits/rejected": -1.8878549337387085, "logps/chosen": -412.74017333984375, "logps/rejected": -501.390625, "loss": 0.5432, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.5697447061538696, "rewards/margins": 0.8855142593383789, "rewards/rejected": -2.455258846282959, "step": 2320 }, { "epoch": 0.46, "learning_rate": 3.2946696673122953e-06, "logits/chosen": -1.760880708694458, "logits/rejected": -1.8217281103134155, "logps/chosen": -435.4695739746094, "logps/rejected": -527.7000732421875, "loss": 0.4251, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.736303687095642, "rewards/margins": 1.1922600269317627, "rewards/rejected": -2.9285635948181152, "step": 2330 }, { "epoch": 0.46, "learning_rate": 3.27840629131503e-06, "logits/chosen": -1.9441564083099365, "logits/rejected": -1.6510957479476929, "logps/chosen": -512.7154541015625, "logps/rejected": -557.56005859375, "loss": 0.5342, "rewards/accuracies": 0.875, "rewards/chosen": -1.9222028255462646, "rewards/margins": 1.151485562324524, "rewards/rejected": -3.073688268661499, "step": 2340 }, { "epoch": 0.46, "learning_rate": 3.2621063545619734e-06, "logits/chosen": -1.6034061908721924, "logits/rejected": -1.4517968893051147, "logps/chosen": -548.5872192382812, "logps/rejected": -585.1336669921875, "loss": 0.5077, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.5587801933288574, "rewards/margins": 0.6855789422988892, "rewards/rejected": -3.244358777999878, "step": 2350 }, { "epoch": 0.46, "learning_rate": 3.2457706226404715e-06, "logits/chosen": -1.538956880569458, "logits/rejected": -1.595827579498291, "logps/chosen": -428.7093811035156, "logps/rejected": -501.70111083984375, "loss": 0.5667, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.207473039627075, "rewards/margins": 0.5854024887084961, "rewards/rejected": -2.792875051498413, "step": 2360 }, { "epoch": 0.47, "learning_rate": 3.2293998628191246e-06, "logits/chosen": -1.7881847620010376, "logits/rejected": -1.5611097812652588, "logps/chosen": -468.3409118652344, "logps/rejected": -515.4591064453125, "loss": 0.5295, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.007239818572998, "rewards/margins": 0.803979754447937, "rewards/rejected": -2.8112196922302246, "step": 2370 }, { "epoch": 0.47, "learning_rate": 3.2129948440117487e-06, "logits/chosen": -1.548412561416626, "logits/rejected": -1.2934997081756592, "logps/chosen": -426.29937744140625, "logps/rejected": -491.78662109375, "loss": 0.4824, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.7749338150024414, "rewards/margins": 0.7233313918113708, "rewards/rejected": -2.498265027999878, "step": 2380 }, { "epoch": 0.47, "learning_rate": 3.196556336741261e-06, "logits/chosen": -1.6830675601959229, "logits/rejected": -1.5950696468353271, "logps/chosen": -488.90960693359375, "logps/rejected": -620.7691650390625, "loss": 0.4662, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.2164549827575684, "rewards/margins": 1.2778276205062866, "rewards/rejected": -3.4942824840545654, "step": 2390 }, { "epoch": 0.47, "learning_rate": 3.1800851131034904e-06, "logits/chosen": -1.626853346824646, "logits/rejected": -1.2772800922393799, "logps/chosen": -600.2614135742188, "logps/rejected": -646.8006591796875, "loss": 0.5251, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.8196444511413574, "rewards/margins": 1.0009969472885132, "rewards/rejected": -3.820641279220581, "step": 2400 }, { "epoch": 0.47, "eval_logits/chosen": 1.5133353471755981, "eval_logits/rejected": 2.1152803897857666, "eval_logps/chosen": -534.41259765625, "eval_logps/rejected": -613.07080078125, "eval_loss": 0.5087560415267944, "eval_rewards/accuracies": 0.7172619104385376, "eval_rewards/chosen": -2.711719512939453, "eval_rewards/margins": 1.0878052711486816, "eval_rewards/rejected": -3.7995245456695557, "eval_runtime": 461.5152, "eval_samples_per_second": 4.334, "eval_steps_per_second": 0.182, "step": 2400 }, { "epoch": 0.47, "learning_rate": 3.1635819467309094e-06, "logits/chosen": -1.764386773109436, "logits/rejected": -1.4338537454605103, "logps/chosen": -613.195556640625, "logps/rejected": -632.0240478515625, "loss": 0.4615, "rewards/accuracies": 0.75, "rewards/chosen": -2.8152177333831787, "rewards/margins": 1.2248064279556274, "rewards/rejected": -4.040024280548096, "step": 2410 }, { "epoch": 0.47, "learning_rate": 3.147047612756302e-06, "logits/chosen": -1.6126506328582764, "logits/rejected": -1.3426209688186646, "logps/chosen": -522.5135498046875, "logps/rejected": -672.3236083984375, "loss": 0.4471, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.4577982425689697, "rewards/margins": 1.5147340297698975, "rewards/rejected": -3.9725327491760254, "step": 2420 }, { "epoch": 0.48, "learning_rate": 3.1304828877763567e-06, "logits/chosen": -1.835339903831482, "logits/rejected": -1.46001398563385, "logps/chosen": -562.58642578125, "logps/rejected": -616.4159545898438, "loss": 0.4956, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.563161849975586, "rewards/margins": 1.3803926706314087, "rewards/rejected": -3.943554639816284, "step": 2430 }, { "epoch": 0.48, "learning_rate": 3.1138885498151843e-06, "logits/chosen": -1.6502008438110352, "logits/rejected": -1.3979710340499878, "logps/chosen": -537.5780029296875, "logps/rejected": -613.78857421875, "loss": 0.5772, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.5154900550842285, "rewards/margins": 1.337890386581421, "rewards/rejected": -3.8533802032470703, "step": 2440 }, { "epoch": 0.48, "learning_rate": 3.0972653782877836e-06, "logits/chosen": -1.7864410877227783, "logits/rejected": -1.657470941543579, "logps/chosen": -552.2943115234375, "logps/rejected": -602.1121826171875, "loss": 0.556, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.7135872840881348, "rewards/margins": 0.8077222108840942, "rewards/rejected": -3.5213096141815186, "step": 2450 }, { "epoch": 0.48, "learning_rate": 3.0806141539634294e-06, "logits/chosen": -1.750838041305542, "logits/rejected": -1.445452094078064, "logps/chosen": -595.9046630859375, "logps/rejected": -634.5581665039062, "loss": 0.4801, "rewards/accuracies": 0.75, "rewards/chosen": -2.9126906394958496, "rewards/margins": 0.7003231048583984, "rewards/rejected": -3.613013505935669, "step": 2460 }, { "epoch": 0.48, "learning_rate": 3.063935658928998e-06, "logits/chosen": -1.584479808807373, "logits/rejected": -1.4214916229248047, "logps/chosen": -502.6748962402344, "logps/rejected": -579.9150390625, "loss": 0.5253, "rewards/accuracies": 0.75, "rewards/chosen": -2.5569519996643066, "rewards/margins": 1.0509430170059204, "rewards/rejected": -3.6078953742980957, "step": 2470 }, { "epoch": 0.49, "learning_rate": 3.0472306765522393e-06, "logits/chosen": -1.4984403848648071, "logits/rejected": -1.5491224527359009, "logps/chosen": -531.972900390625, "logps/rejected": -635.1728515625, "loss": 0.4886, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.8651509284973145, "rewards/margins": 1.0753536224365234, "rewards/rejected": -3.940504550933838, "step": 2480 }, { "epoch": 0.49, "learning_rate": 3.0304999914449774e-06, "logits/chosen": -1.3658727407455444, "logits/rejected": -1.5433199405670166, "logps/chosen": -462.2303771972656, "logps/rejected": -598.8155517578125, "loss": 0.5015, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.40022349357605, "rewards/margins": 0.9658218622207642, "rewards/rejected": -3.3660457134246826, "step": 2490 }, { "epoch": 0.49, "learning_rate": 3.0137443894262634e-06, "logits/chosen": -1.8412716388702393, "logits/rejected": -1.5159828662872314, "logps/chosen": -576.944580078125, "logps/rejected": -619.55810546875, "loss": 0.5104, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.637674570083618, "rewards/margins": 1.1486151218414307, "rewards/rejected": -3.786289691925049, "step": 2500 }, { "epoch": 0.49, "eval_logits/chosen": 1.7461397647857666, "eval_logits/rejected": 2.2888615131378174, "eval_logps/chosen": -562.9376831054688, "eval_logps/rejected": -633.336181640625, "eval_loss": 0.5005843639373779, "eval_rewards/accuracies": 0.7202380895614624, "eval_rewards/chosen": -2.9969699382781982, "eval_rewards/margins": 1.0052084922790527, "eval_rewards/rejected": -4.002178192138672, "eval_runtime": 466.0167, "eval_samples_per_second": 4.292, "eval_steps_per_second": 0.18, "step": 2500 }, { "epoch": 0.49, "learning_rate": 2.9969646574854632e-06, "logits/chosen": -1.2289443016052246, "logits/rejected": -1.1734968423843384, "logps/chosen": -571.4437866210938, "logps/rejected": -637.2028198242188, "loss": 0.4959, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -3.083714008331299, "rewards/margins": 1.2261943817138672, "rewards/rejected": -4.309908390045166, "step": 2510 }, { "epoch": 0.49, "learning_rate": 2.980161583745294e-06, "logits/chosen": -1.57248055934906, "logits/rejected": -1.629582405090332, "logps/chosen": -613.6909790039062, "logps/rejected": -710.1002807617188, "loss": 0.5703, "rewards/accuracies": 0.75, "rewards/chosen": -3.2303519248962402, "rewards/margins": 0.9834693670272827, "rewards/rejected": -4.213820934295654, "step": 2520 }, { "epoch": 0.5, "learning_rate": 2.9633359574248077e-06, "logits/chosen": -1.5341389179229736, "logits/rejected": -1.6534173488616943, "logps/chosen": -466.84539794921875, "logps/rejected": -630.6109619140625, "loss": 0.5298, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.717761754989624, "rewards/margins": 0.9395642280578613, "rewards/rejected": -3.6573257446289062, "step": 2530 }, { "epoch": 0.5, "learning_rate": 2.946488568802324e-06, "logits/chosen": -1.7299247980117798, "logits/rejected": -1.6176536083221436, "logps/chosen": -538.8867797851562, "logps/rejected": -684.6195068359375, "loss": 0.5598, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.5870797634124756, "rewards/margins": 1.0093539953231812, "rewards/rejected": -3.5964341163635254, "step": 2540 }, { "epoch": 0.5, "learning_rate": 2.929620209178307e-06, "logits/chosen": -1.6182016134262085, "logits/rejected": -1.2783520221710205, "logps/chosen": -498.810546875, "logps/rejected": -527.8185424804688, "loss": 0.4818, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.53918719291687, "rewards/margins": 0.693315863609314, "rewards/rejected": -3.2325031757354736, "step": 2550 }, { "epoch": 0.5, "learning_rate": 2.912731670838207e-06, "logits/chosen": -1.6861143112182617, "logits/rejected": -1.5500279664993286, "logps/chosen": -578.6981201171875, "logps/rejected": -656.0988159179688, "loss": 0.4895, "rewards/accuracies": 0.75, "rewards/chosen": -3.022346019744873, "rewards/margins": 0.8760727047920227, "rewards/rejected": -3.898418426513672, "step": 2560 }, { "epoch": 0.5, "learning_rate": 2.8958237470152374e-06, "logits/chosen": -1.2931665182113647, "logits/rejected": -1.2862474918365479, "logps/chosen": -509.349365234375, "logps/rejected": -610.8724365234375, "loss": 0.5523, "rewards/accuracies": 0.75, "rewards/chosen": -2.792919874191284, "rewards/margins": 1.0914205312728882, "rewards/rejected": -3.884340763092041, "step": 2570 }, { "epoch": 0.51, "learning_rate": 2.8788972318531272e-06, "logits/chosen": -1.5627813339233398, "logits/rejected": -1.2355066537857056, "logps/chosen": -600.7859497070312, "logps/rejected": -690.1842041015625, "loss": 0.4223, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.2663826942443848, "rewards/margins": 1.125218152999878, "rewards/rejected": -4.391600608825684, "step": 2580 }, { "epoch": 0.51, "learning_rate": 2.861952920368816e-06, "logits/chosen": -1.438971757888794, "logits/rejected": -1.2704181671142578, "logps/chosen": -601.17724609375, "logps/rejected": -654.62890625, "loss": 0.4558, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.0297670364379883, "rewards/margins": 1.1394041776657104, "rewards/rejected": -4.169171333312988, "step": 2590 }, { "epoch": 0.51, "learning_rate": 2.844991608415113e-06, "logits/chosen": -1.4737740755081177, "logits/rejected": -1.1703132390975952, "logps/chosen": -623.9537353515625, "logps/rejected": -725.4031372070312, "loss": 0.429, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.3853325843811035, "rewards/margins": 1.5982491970062256, "rewards/rejected": -4.98358154296875, "step": 2600 }, { "epoch": 0.51, "eval_logits/chosen": 3.2826781272888184, "eval_logits/rejected": 3.6630711555480957, "eval_logps/chosen": -626.0599975585938, "eval_logps/rejected": -713.4385986328125, "eval_loss": 0.5238474011421204, "eval_rewards/accuracies": 0.7142857313156128, "eval_rewards/chosen": -3.6281931400299072, "eval_rewards/margins": 1.175009846687317, "eval_rewards/rejected": -4.8032026290893555, "eval_runtime": 470.5782, "eval_samples_per_second": 4.25, "eval_steps_per_second": 0.179, "step": 2600 }, { "epoch": 0.51, "learning_rate": 2.828014092643319e-06, "logits/chosen": -1.471954345703125, "logits/rejected": -1.1723235845565796, "logps/chosen": -665.859619140625, "logps/rejected": -789.55859375, "loss": 0.6042, "rewards/accuracies": 0.75, "rewards/chosen": -3.7893319129943848, "rewards/margins": 1.5368101596832275, "rewards/rejected": -5.326141834259033, "step": 2610 }, { "epoch": 0.51, "learning_rate": 2.8110211704658073e-06, "logits/chosen": -1.1027779579162598, "logits/rejected": -1.2894227504730225, "logps/chosen": -625.0130004882812, "logps/rejected": -680.197021484375, "loss": 0.5153, "rewards/accuracies": 0.75, "rewards/chosen": -3.4413421154022217, "rewards/margins": 1.2280861139297485, "rewards/rejected": -4.669427871704102, "step": 2620 }, { "epoch": 0.52, "learning_rate": 2.7940136400185697e-06, "logits/chosen": -1.7961403131484985, "logits/rejected": -1.3579736948013306, "logps/chosen": -618.7360229492188, "logps/rejected": -626.2679443359375, "loss": 0.5125, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.7992968559265137, "rewards/margins": 1.030220627784729, "rewards/rejected": -3.829517364501953, "step": 2630 }, { "epoch": 0.52, "learning_rate": 2.776992300123732e-06, "logits/chosen": -1.4044650793075562, "logits/rejected": -1.0620936155319214, "logps/chosen": -550.3770751953125, "logps/rejected": -686.0360107421875, "loss": 0.453, "rewards/accuracies": 0.75, "rewards/chosen": -2.9266915321350098, "rewards/margins": 1.4235899448394775, "rewards/rejected": -4.350281715393066, "step": 2640 }, { "epoch": 0.52, "learning_rate": 2.7599579502520295e-06, "logits/chosen": -1.5171329975128174, "logits/rejected": -1.2877377271652222, "logps/chosen": -535.6190185546875, "logps/rejected": -594.6309814453125, "loss": 0.5086, "rewards/accuracies": 0.75, "rewards/chosen": -2.649843692779541, "rewards/margins": 0.9460121989250183, "rewards/rejected": -3.595855712890625, "step": 2650 }, { "epoch": 0.52, "learning_rate": 2.742911390485262e-06, "logits/chosen": -1.7375767230987549, "logits/rejected": -1.5891015529632568, "logps/chosen": -522.1452026367188, "logps/rejected": -644.7877197265625, "loss": 0.5278, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.4621710777282715, "rewards/margins": 1.264937400817871, "rewards/rejected": -3.7271084785461426, "step": 2660 }, { "epoch": 0.52, "learning_rate": 2.7258534214787108e-06, "logits/chosen": -1.1750578880310059, "logits/rejected": -1.2237205505371094, "logps/chosen": -444.18402099609375, "logps/rejected": -584.5846557617188, "loss": 0.4825, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.4036126136779785, "rewards/margins": 1.157496452331543, "rewards/rejected": -3.5611088275909424, "step": 2670 }, { "epoch": 0.53, "learning_rate": 2.7087848444235354e-06, "logits/chosen": -1.661940574645996, "logits/rejected": -1.6658750772476196, "logps/chosen": -430.6551818847656, "logps/rejected": -487.06341552734375, "loss": 0.4763, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0942399501800537, "rewards/margins": 0.7797794342041016, "rewards/rejected": -2.8740196228027344, "step": 2680 }, { "epoch": 0.53, "learning_rate": 2.6917064610091425e-06, "logits/chosen": -1.58583664894104, "logits/rejected": -1.0581375360488892, "logps/chosen": -522.0615234375, "logps/rejected": -650.8190307617188, "loss": 0.4368, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.498887538909912, "rewards/margins": 1.6336562633514404, "rewards/rejected": -4.132543563842773, "step": 2690 }, { "epoch": 0.53, "learning_rate": 2.674619073385531e-06, "logits/chosen": -1.503377079963684, "logits/rejected": -1.3497763872146606, "logps/chosen": -531.3106079101562, "logps/rejected": -552.6693115234375, "loss": 0.4255, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.2223851680755615, "rewards/margins": 1.1123954057693481, "rewards/rejected": -3.33478045463562, "step": 2700 }, { "epoch": 0.53, "eval_logits/chosen": 1.687303066253662, "eval_logits/rejected": 2.191955089569092, "eval_logps/chosen": -512.7009887695312, "eval_logps/rejected": -583.7889404296875, "eval_loss": 0.49927037954330444, "eval_rewards/accuracies": 0.71875, "eval_rewards/chosen": -2.494602680206299, "eval_rewards/margins": 1.0121026039123535, "eval_rewards/rejected": -3.5067055225372314, "eval_runtime": 467.7621, "eval_samples_per_second": 4.276, "eval_steps_per_second": 0.18, "step": 2700 }, { "epoch": 0.53, "learning_rate": 2.6575234841256137e-06, "logits/chosen": -1.3108127117156982, "logits/rejected": -1.0921776294708252, "logps/chosen": -561.8636474609375, "logps/rejected": -687.85888671875, "loss": 0.5514, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.914976119995117, "rewards/margins": 1.1241728067398071, "rewards/rejected": -4.039148807525635, "step": 2710 }, { "epoch": 0.53, "learning_rate": 2.640420496187528e-06, "logits/chosen": -1.6478326320648193, "logits/rejected": -1.1835671663284302, "logps/chosen": -631.3289794921875, "logps/rejected": -626.5182495117188, "loss": 0.5263, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -3.000012159347534, "rewards/margins": 0.643200159072876, "rewards/rejected": -3.6432127952575684, "step": 2720 }, { "epoch": 0.54, "learning_rate": 2.6233109128769134e-06, "logits/chosen": -1.5925486087799072, "logits/rejected": -1.4888114929199219, "logps/chosen": -582.1151123046875, "logps/rejected": -646.9674072265625, "loss": 0.4845, "rewards/accuracies": 0.625, "rewards/chosen": -3.2123794555664062, "rewards/margins": 0.7758103013038635, "rewards/rejected": -3.988189697265625, "step": 2730 }, { "epoch": 0.54, "learning_rate": 2.6061955378091896e-06, "logits/chosen": -1.295643925666809, "logits/rejected": -1.2166447639465332, "logps/chosen": -478.65655517578125, "logps/rejected": -537.48486328125, "loss": 0.4722, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.5640273094177246, "rewards/margins": 1.0961014032363892, "rewards/rejected": -3.660128355026245, "step": 2740 }, { "epoch": 0.54, "learning_rate": 2.5890751748718055e-06, "logits/chosen": -0.9939683675765991, "logits/rejected": -1.0293607711791992, "logps/chosen": -543.2905883789062, "logps/rejected": -722.3085327148438, "loss": 0.4421, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.28556752204895, "rewards/margins": 1.4245612621307373, "rewards/rejected": -4.7101287841796875, "step": 2750 }, { "epoch": 0.54, "learning_rate": 2.5719506281864838e-06, "logits/chosen": -1.103952169418335, "logits/rejected": -0.954735279083252, "logps/chosen": -631.2025756835938, "logps/rejected": -684.2635498046875, "loss": 0.6389, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -4.1589555740356445, "rewards/margins": 0.8297792673110962, "rewards/rejected": -4.988734722137451, "step": 2760 }, { "epoch": 0.54, "learning_rate": 2.5548227020714532e-06, "logits/chosen": -0.9842194318771362, "logits/rejected": -0.25584936141967773, "logps/chosen": -638.1835327148438, "logps/rejected": -731.91162109375, "loss": 0.5314, "rewards/accuracies": 0.625, "rewards/chosen": -4.321416854858398, "rewards/margins": 1.1731635332107544, "rewards/rejected": -5.494580268859863, "step": 2770 }, { "epoch": 0.55, "learning_rate": 2.537692201003671e-06, "logits/chosen": -1.3658416271209717, "logits/rejected": -0.6274362802505493, "logps/chosen": -674.0128173828125, "logps/rejected": -775.1448974609375, "loss": 0.4476, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -3.8319332599639893, "rewards/margins": 1.8677552938461304, "rewards/rejected": -5.699688911437988, "step": 2780 }, { "epoch": 0.55, "learning_rate": 2.520559929581034e-06, "logits/chosen": -0.9689489603042603, "logits/rejected": -1.237882137298584, "logps/chosen": -640.4005126953125, "logps/rejected": -691.1866455078125, "loss": 0.5249, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.775344133377075, "rewards/margins": 0.899651825428009, "rewards/rejected": -4.674995422363281, "step": 2790 }, { "epoch": 0.55, "learning_rate": 2.503426692484594e-06, "logits/chosen": -1.564483642578125, "logits/rejected": -1.5032641887664795, "logps/chosen": -535.062744140625, "logps/rejected": -703.3568725585938, "loss": 0.4733, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.2556099891662598, "rewards/margins": 1.3399698734283447, "rewards/rejected": -4.595579624176025, "step": 2800 }, { "epoch": 0.55, "eval_logits/chosen": 2.2111027240753174, "eval_logits/rejected": 2.679600715637207, "eval_logps/chosen": -584.3987426757812, "eval_logps/rejected": -661.117431640625, "eval_loss": 0.49895724654197693, "eval_rewards/accuracies": 0.7202380895614624, "eval_rewards/chosen": -3.211580514907837, "eval_rewards/margins": 1.068410873413086, "eval_rewards/rejected": -4.279991149902344, "eval_runtime": 474.1567, "eval_samples_per_second": 4.218, "eval_steps_per_second": 0.177, "step": 2800 }, { "epoch": 0.55, "learning_rate": 2.486293294440755e-06, "logits/chosen": -1.2708990573883057, "logits/rejected": -1.044060468673706, "logps/chosen": -603.8212890625, "logps/rejected": -696.2957763671875, "loss": 0.4856, "rewards/accuracies": 0.75, "rewards/chosen": -3.46049427986145, "rewards/margins": 1.102307677268982, "rewards/rejected": -4.562802314758301, "step": 2810 }, { "epoch": 0.55, "learning_rate": 2.4691605401834843e-06, "logits/chosen": -1.3584177494049072, "logits/rejected": -1.0103107690811157, "logps/chosen": -636.3997802734375, "logps/rejected": -636.8760375976562, "loss": 0.5038, "rewards/accuracies": 0.75, "rewards/chosen": -3.3813788890838623, "rewards/margins": 0.9568415880203247, "rewards/rejected": -4.338220596313477, "step": 2820 }, { "epoch": 0.56, "learning_rate": 2.4520292344165093e-06, "logits/chosen": -1.0483958721160889, "logits/rejected": -0.6678327918052673, "logps/chosen": -531.8612060546875, "logps/rejected": -628.7724609375, "loss": 0.4416, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -3.131394863128662, "rewards/margins": 1.3460534811019897, "rewards/rejected": -4.477448463439941, "step": 2830 }, { "epoch": 0.56, "learning_rate": 2.434900181775524e-06, "logits/chosen": -1.2988386154174805, "logits/rejected": -1.310418725013733, "logps/chosen": -623.8057861328125, "logps/rejected": -765.7599487304688, "loss": 0.6217, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.3501858711242676, "rewards/margins": 1.2648541927337646, "rewards/rejected": -4.615039825439453, "step": 2840 }, { "epoch": 0.56, "learning_rate": 2.4177741867903966e-06, "logits/chosen": -1.3026063442230225, "logits/rejected": -1.5730775594711304, "logps/chosen": -574.4251098632812, "logps/rejected": -738.9527587890625, "loss": 0.4522, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.1266930103302, "rewards/margins": 1.183801531791687, "rewards/rejected": -4.310494422912598, "step": 2850 }, { "epoch": 0.56, "learning_rate": 2.40065205384738e-06, "logits/chosen": -1.2829475402832031, "logits/rejected": -0.7965912818908691, "logps/chosen": -579.4163208007812, "logps/rejected": -643.0167846679688, "loss": 0.4394, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.2237465381622314, "rewards/margins": 1.3293319940567017, "rewards/rejected": -4.553078651428223, "step": 2860 }, { "epoch": 0.56, "learning_rate": 2.3835345871513334e-06, "logits/chosen": -1.3497217893600464, "logits/rejected": -1.2243283987045288, "logps/chosen": -546.4749145507812, "logps/rejected": -704.9342041015625, "loss": 0.4332, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.956820011138916, "rewards/margins": 1.5760740041732788, "rewards/rejected": -4.532893657684326, "step": 2870 }, { "epoch": 0.57, "learning_rate": 2.3664225906879452e-06, "logits/chosen": -1.3499679565429688, "logits/rejected": -1.1703636646270752, "logps/chosen": -603.4747314453125, "logps/rejected": -666.8853759765625, "loss": 0.4698, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.120850086212158, "rewards/margins": 1.1407297849655151, "rewards/rejected": -4.261579990386963, "step": 2880 }, { "epoch": 0.57, "learning_rate": 2.3493168681859782e-06, "logits/chosen": -1.677674651145935, "logits/rejected": -1.3583574295043945, "logps/chosen": -669.3980102539062, "logps/rejected": -768.7742919921875, "loss": 0.5308, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -3.5255637168884277, "rewards/margins": 1.2949529886245728, "rewards/rejected": -4.820516586303711, "step": 2890 }, { "epoch": 0.57, "learning_rate": 2.3322182230795127e-06, "logits/chosen": -1.104689359664917, "logits/rejected": -0.9737561345100403, "logps/chosen": -493.5654296875, "logps/rejected": -645.1444091796875, "loss": 0.5394, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.879586696624756, "rewards/margins": 1.7333507537841797, "rewards/rejected": -4.612936973571777, "step": 2900 }, { "epoch": 0.57, "eval_logits/chosen": 1.2350561618804932, "eval_logits/rejected": 1.7757776975631714, "eval_logps/chosen": -554.5653076171875, "eval_logps/rejected": -625.8765869140625, "eval_loss": 0.5039872527122498, "eval_rewards/accuracies": 0.7157738208770752, "eval_rewards/chosen": -2.9132461547851562, "eval_rewards/margins": 1.014336347579956, "eval_rewards/rejected": -3.927582263946533, "eval_runtime": 471.9119, "eval_samples_per_second": 4.238, "eval_steps_per_second": 0.178, "step": 2900 }, { "epoch": 0.57, "learning_rate": 2.315127458470212e-06, "logits/chosen": -1.6969263553619385, "logits/rejected": -1.1446783542633057, "logps/chosen": -537.927734375, "logps/rejected": -597.5512084960938, "loss": 0.4702, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.5726828575134277, "rewards/margins": 1.4696344137191772, "rewards/rejected": -4.0423173904418945, "step": 2910 }, { "epoch": 0.57, "learning_rate": 2.298045377089604e-06, "logits/chosen": -1.631068468093872, "logits/rejected": -1.5715925693511963, "logps/chosen": -588.5338134765625, "logps/rejected": -680.6761474609375, "loss": 0.5311, "rewards/accuracies": 0.75, "rewards/chosen": -2.739121913909912, "rewards/margins": 1.2334686517715454, "rewards/rejected": -3.972590684890747, "step": 2920 }, { "epoch": 0.58, "learning_rate": 2.2809727812613767e-06, "logits/chosen": -1.3248388767242432, "logits/rejected": -1.031965970993042, "logps/chosen": -521.406005859375, "logps/rejected": -598.8995361328125, "loss": 0.5472, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.8669657707214355, "rewards/margins": 1.0183131694793701, "rewards/rejected": -3.8852791786193848, "step": 2930 }, { "epoch": 0.58, "learning_rate": 2.2639104728636915e-06, "logits/chosen": -1.6601577997207642, "logits/rejected": -1.537408471107483, "logps/chosen": -529.3999633789062, "logps/rejected": -581.3070678710938, "loss": 0.5026, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.78721284866333, "rewards/margins": 0.9390803575515747, "rewards/rejected": -3.7262930870056152, "step": 2940 }, { "epoch": 0.58, "learning_rate": 2.246859253291524e-06, "logits/chosen": -1.3986241817474365, "logits/rejected": -1.3699901103973389, "logps/chosen": -501.624267578125, "logps/rejected": -596.663818359375, "loss": 0.4539, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.6009819507598877, "rewards/margins": 1.3324124813079834, "rewards/rejected": -3.933394193649292, "step": 2950 }, { "epoch": 0.58, "learning_rate": 2.2298199234190236e-06, "logits/chosen": -1.3165680170059204, "logits/rejected": -1.3338545560836792, "logps/chosen": -550.9847412109375, "logps/rejected": -629.339111328125, "loss": 0.5604, "rewards/accuracies": 0.625, "rewards/chosen": -3.0295987129211426, "rewards/margins": 0.9783576726913452, "rewards/rejected": -4.007956504821777, "step": 2960 }, { "epoch": 0.58, "learning_rate": 2.21279328356189e-06, "logits/chosen": -1.407828688621521, "logits/rejected": -1.4380414485931396, "logps/chosen": -561.3121337890625, "logps/rejected": -591.61572265625, "loss": 0.509, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.8780205249786377, "rewards/margins": 0.8007872700691223, "rewards/rejected": -3.6788082122802734, "step": 2970 }, { "epoch": 0.58, "learning_rate": 2.195780133439794e-06, "logits/chosen": -1.6875905990600586, "logits/rejected": -1.2853671312332153, "logps/chosen": -455.3389587402344, "logps/rejected": -507.90771484375, "loss": 0.469, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.5273537635803223, "rewards/margins": 1.1238353252410889, "rewards/rejected": -3.6511893272399902, "step": 2980 }, { "epoch": 0.59, "learning_rate": 2.1787812721388093e-06, "logits/chosen": -1.6108009815216064, "logits/rejected": -1.2731918096542358, "logps/chosen": -545.8873291015625, "logps/rejected": -659.3258666992188, "loss": 0.4554, "rewards/accuracies": 0.75, "rewards/chosen": -2.7631006240844727, "rewards/margins": 1.0572516918182373, "rewards/rejected": -3.820352077484131, "step": 2990 }, { "epoch": 0.59, "learning_rate": 2.1617974980738814e-06, "logits/chosen": -1.4267470836639404, "logits/rejected": -1.2199398279190063, "logps/chosen": -505.8406677246094, "logps/rejected": -642.9313354492188, "loss": 0.5128, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.7473788261413574, "rewards/margins": 1.0760217905044556, "rewards/rejected": -3.8234009742736816, "step": 3000 }, { "epoch": 0.59, "eval_logits/chosen": 1.666269063949585, "eval_logits/rejected": 2.128396987915039, "eval_logps/chosen": -522.9818115234375, "eval_logps/rejected": -590.36376953125, "eval_loss": 0.5060966610908508, "eval_rewards/accuracies": 0.7172619104385376, "eval_rewards/chosen": -2.5974111557006836, "eval_rewards/margins": 0.975042462348938, "eval_rewards/rejected": -3.572453737258911, "eval_runtime": 471.1115, "eval_samples_per_second": 4.245, "eval_steps_per_second": 0.178, "step": 3000 }, { "epoch": 0.59, "learning_rate": 2.1448296089513273e-06, "logits/chosen": -1.5748159885406494, "logits/rejected": -1.455926537513733, "logps/chosen": -551.4447021484375, "logps/rejected": -678.2323608398438, "loss": 0.5886, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.5907955169677734, "rewards/margins": 0.9479290246963501, "rewards/rejected": -3.538724422454834, "step": 3010 }, { "epoch": 0.59, "learning_rate": 2.1278784017313688e-06, "logits/chosen": -1.8420231342315674, "logits/rejected": -1.4978781938552856, "logps/chosen": -536.6190185546875, "logps/rejected": -554.1761474609375, "loss": 0.488, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.728389263153076, "rewards/margins": 0.605846107006073, "rewards/rejected": -3.334235429763794, "step": 3020 }, { "epoch": 0.59, "learning_rate": 2.1109446725907003e-06, "logits/chosen": -1.6853519678115845, "logits/rejected": -1.6203676462173462, "logps/chosen": -522.2265014648438, "logps/rejected": -587.4464111328125, "loss": 0.5519, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.5095019340515137, "rewards/margins": 0.8812414407730103, "rewards/rejected": -3.3907437324523926, "step": 3030 }, { "epoch": 0.6, "learning_rate": 2.0940292168850913e-06, "logits/chosen": -1.5753730535507202, "logits/rejected": -1.2198309898376465, "logps/chosen": -500.56121826171875, "logps/rejected": -598.4393310546875, "loss": 0.5011, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.4829251766204834, "rewards/margins": 1.1780023574829102, "rewards/rejected": -3.6609275341033936, "step": 3040 }, { "epoch": 0.6, "learning_rate": 2.0771328291120336e-06, "logits/chosen": -1.6674007177352905, "logits/rejected": -1.412819743156433, "logps/chosen": -471.34857177734375, "logps/rejected": -570.1939697265625, "loss": 0.4816, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2399003505706787, "rewards/margins": 0.8762845993041992, "rewards/rejected": -3.116184949874878, "step": 3050 }, { "epoch": 0.6, "learning_rate": 2.060256302873421e-06, "logits/chosen": -1.6781727075576782, "logits/rejected": -1.4560127258300781, "logps/chosen": -543.1824951171875, "logps/rejected": -595.8843383789062, "loss": 0.4934, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.42879056930542, "rewards/margins": 0.9694629907608032, "rewards/rejected": -3.3982536792755127, "step": 3060 }, { "epoch": 0.6, "learning_rate": 2.043400430838276e-06, "logits/chosen": -1.662009835243225, "logits/rejected": -1.1407781839370728, "logps/chosen": -496.57666015625, "logps/rejected": -568.4881591796875, "loss": 0.563, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.3798727989196777, "rewards/margins": 1.0104573965072632, "rewards/rejected": -3.3903305530548096, "step": 3070 }, { "epoch": 0.6, "learning_rate": 2.02656600470552e-06, "logits/chosen": -1.5956240892410278, "logits/rejected": -1.5949556827545166, "logps/chosen": -416.9606018066406, "logps/rejected": -524.3873291015625, "loss": 0.5409, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.056086540222168, "rewards/margins": 0.749538004398346, "rewards/rejected": -2.80562424659729, "step": 3080 }, { "epoch": 0.61, "learning_rate": 2.0097538151667885e-06, "logits/chosen": -1.6122442483901978, "logits/rejected": -1.130645990371704, "logps/chosen": -440.63494873046875, "logps/rejected": -500.908203125, "loss": 0.4872, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.053252696990967, "rewards/margins": 1.218098759651184, "rewards/rejected": -3.2713515758514404, "step": 3090 }, { "epoch": 0.61, "learning_rate": 1.99296465186929e-06, "logits/chosen": -1.49817955493927, "logits/rejected": -1.393099308013916, "logps/chosen": -449.64447021484375, "logps/rejected": -532.80712890625, "loss": 0.5215, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.427433490753174, "rewards/margins": 0.646077036857605, "rewards/rejected": -3.0735104084014893, "step": 3100 }, { "epoch": 0.61, "eval_logits/chosen": 0.8593930006027222, "eval_logits/rejected": 1.4432045221328735, "eval_logps/chosen": -489.5559997558594, "eval_logps/rejected": -551.8787231445312, "eval_loss": 0.4959636628627777, "eval_rewards/accuracies": 0.71875, "eval_rewards/chosen": -2.263152599334717, "eval_rewards/margins": 0.9244504570960999, "eval_rewards/rejected": -3.187603235244751, "eval_runtime": 469.4229, "eval_samples_per_second": 4.261, "eval_steps_per_second": 0.179, "step": 3100 }, { "epoch": 0.61, "learning_rate": 1.9761993033787206e-06, "logits/chosen": -1.436286211013794, "logits/rejected": -1.667724847793579, "logps/chosen": -481.9756774902344, "logps/rejected": -577.6883544921875, "loss": 0.4815, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.447153091430664, "rewards/margins": 0.920573353767395, "rewards/rejected": -3.3677260875701904, "step": 3110 }, { "epoch": 0.61, "learning_rate": 1.959458557142228e-06, "logits/chosen": -1.811018705368042, "logits/rejected": -1.5022584199905396, "logps/chosen": -477.35906982421875, "logps/rejected": -560.0203247070312, "loss": 0.4815, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.4165022373199463, "rewards/margins": 1.0257890224456787, "rewards/rejected": -3.442291259765625, "step": 3120 }, { "epoch": 0.61, "learning_rate": 1.942743199451418e-06, "logits/chosen": -1.4638209342956543, "logits/rejected": -1.3204270601272583, "logps/chosen": -535.1734008789062, "logps/rejected": -584.9055786132812, "loss": 0.5191, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.5391428470611572, "rewards/margins": 0.8269332051277161, "rewards/rejected": -3.3660759925842285, "step": 3130 }, { "epoch": 0.62, "learning_rate": 1.9260540154054317e-06, "logits/chosen": -1.6943273544311523, "logits/rejected": -1.4195598363876343, "logps/chosen": -619.236572265625, "logps/rejected": -740.2224731445312, "loss": 0.4769, "rewards/accuracies": 0.875, "rewards/chosen": -2.336930274963379, "rewards/margins": 1.6488691568374634, "rewards/rejected": -3.9857993125915527, "step": 3140 }, { "epoch": 0.62, "learning_rate": 1.909391788874069e-06, "logits/chosen": -1.7227598428726196, "logits/rejected": -1.6687244176864624, "logps/chosen": -519.3486328125, "logps/rejected": -590.3075561523438, "loss": 0.497, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.510953187942505, "rewards/margins": 0.8232176899909973, "rewards/rejected": -3.3341705799102783, "step": 3150 }, { "epoch": 0.62, "learning_rate": 1.8927573024609666e-06, "logits/chosen": -1.7152988910675049, "logits/rejected": -1.376106858253479, "logps/chosen": -565.9855346679688, "logps/rejected": -634.7908935546875, "loss": 0.4533, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.826343536376953, "rewards/margins": 1.0604288578033447, "rewards/rejected": -3.8867721557617188, "step": 3160 }, { "epoch": 0.62, "learning_rate": 1.8761513374668434e-06, "logits/chosen": -1.7361100912094116, "logits/rejected": -1.1922258138656616, "logps/chosen": -569.1446533203125, "logps/rejected": -591.5360107421875, "loss": 0.4621, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.7468013763427734, "rewards/margins": 0.9878908395767212, "rewards/rejected": -3.734692096710205, "step": 3170 }, { "epoch": 0.62, "learning_rate": 1.8595746738528045e-06, "logits/chosen": -1.5199767351150513, "logits/rejected": -1.3576875925064087, "logps/chosen": -579.2857055664062, "logps/rejected": -665.6951904296875, "loss": 0.473, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.663944959640503, "rewards/margins": 1.08833909034729, "rewards/rejected": -3.752284288406372, "step": 3180 }, { "epoch": 0.63, "learning_rate": 1.8430280902037061e-06, "logits/chosen": -1.3695231676101685, "logits/rejected": -1.2491718530654907, "logps/chosen": -510.26409912109375, "logps/rejected": -649.3592529296875, "loss": 0.4412, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6402225494384766, "rewards/margins": 1.2626936435699463, "rewards/rejected": -3.902916669845581, "step": 3190 }, { "epoch": 0.63, "learning_rate": 1.826512363691586e-06, "logits/chosen": -0.9787014722824097, "logits/rejected": -1.1823240518569946, "logps/chosen": -531.9524536132812, "logps/rejected": -697.2850341796875, "loss": 0.5023, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.7318313121795654, "rewards/margins": 1.3929023742675781, "rewards/rejected": -4.124733924865723, "step": 3200 }, { "epoch": 0.63, "eval_logits/chosen": 1.2951397895812988, "eval_logits/rejected": 1.9057204723358154, "eval_logps/chosen": -549.5391845703125, "eval_logps/rejected": -629.523681640625, "eval_loss": 0.49993908405303955, "eval_rewards/accuracies": 0.7127976417541504, "eval_rewards/chosen": -2.8629848957061768, "eval_rewards/margins": 1.101068377494812, "eval_rewards/rejected": -3.964053153991699, "eval_runtime": 469.417, "eval_samples_per_second": 4.261, "eval_steps_per_second": 0.179, "step": 3200 }, { "epoch": 0.63, "learning_rate": 1.8100282700391616e-06, "logits/chosen": -1.81111741065979, "logits/rejected": -1.0964924097061157, "logps/chosen": -580.1653442382812, "logps/rejected": -659.6453247070312, "loss": 0.3998, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.867839813232422, "rewards/margins": 1.4525978565216064, "rewards/rejected": -4.320437908172607, "step": 3210 }, { "epoch": 0.63, "learning_rate": 1.7935765834833966e-06, "logits/chosen": -1.3006770610809326, "logits/rejected": -1.370444655418396, "logps/chosen": -544.4671630859375, "logps/rejected": -650.7494506835938, "loss": 0.4532, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.8837952613830566, "rewards/margins": 1.2802571058273315, "rewards/rejected": -4.164052486419678, "step": 3220 }, { "epoch": 0.63, "learning_rate": 1.7771580767391314e-06, "logits/chosen": -1.5054481029510498, "logits/rejected": -1.3543479442596436, "logps/chosen": -568.9913330078125, "logps/rejected": -634.9281005859375, "loss": 0.4947, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.2038333415985107, "rewards/margins": 0.8554115295410156, "rewards/rejected": -4.0592451095581055, "step": 3230 }, { "epoch": 0.64, "learning_rate": 1.7607735209627953e-06, "logits/chosen": -1.2933645248413086, "logits/rejected": -0.849597156047821, "logps/chosen": -549.0391235351562, "logps/rejected": -660.4158935546875, "loss": 0.5686, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -3.0153212547302246, "rewards/margins": 1.2810765504837036, "rewards/rejected": -4.2963972091674805, "step": 3240 }, { "epoch": 0.64, "learning_rate": 1.7444236857161837e-06, "logits/chosen": -1.6714146137237549, "logits/rejected": -1.5842548608779907, "logps/chosen": -662.2576293945312, "logps/rejected": -656.3047485351562, "loss": 0.4996, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.8321645259857178, "rewards/margins": 0.9911035299301147, "rewards/rejected": -3.823268175125122, "step": 3250 }, { "epoch": 0.64, "learning_rate": 1.7281093389303105e-06, "logits/chosen": -1.5343830585479736, "logits/rejected": -1.2284438610076904, "logps/chosen": -543.3328857421875, "logps/rejected": -612.6687622070312, "loss": 0.4262, "rewards/accuracies": 0.75, "rewards/chosen": -2.54597806930542, "rewards/margins": 1.2225624322891235, "rewards/rejected": -3.768540620803833, "step": 3260 }, { "epoch": 0.64, "learning_rate": 1.7118312468693437e-06, "logits/chosen": -1.4476630687713623, "logits/rejected": -1.4361945390701294, "logps/chosen": -556.40673828125, "logps/rejected": -581.2120361328125, "loss": 0.5645, "rewards/accuracies": 0.75, "rewards/chosen": -2.6290464401245117, "rewards/margins": 0.753653883934021, "rewards/rejected": -3.3827004432678223, "step": 3270 }, { "epoch": 0.64, "learning_rate": 1.6955901740946136e-06, "logits/chosen": -1.3370723724365234, "logits/rejected": -1.1809570789337158, "logps/chosen": -486.3965759277344, "logps/rejected": -560.33740234375, "loss": 0.4332, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.7506370544433594, "rewards/margins": 0.7360703349113464, "rewards/rejected": -3.4867072105407715, "step": 3280 }, { "epoch": 0.65, "learning_rate": 1.6793868834286985e-06, "logits/chosen": -1.5083470344543457, "logits/rejected": -1.303236722946167, "logps/chosen": -488.62933349609375, "logps/rejected": -601.1016845703125, "loss": 0.5114, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.4180212020874023, "rewards/margins": 1.666049599647522, "rewards/rejected": -4.084070682525635, "step": 3290 }, { "epoch": 0.65, "learning_rate": 1.663222135919601e-06, "logits/chosen": -1.4300639629364014, "logits/rejected": -1.349562168121338, "logps/chosen": -599.2969970703125, "logps/rejected": -618.0079956054688, "loss": 0.5042, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.0840084552764893, "rewards/margins": 0.6745599508285522, "rewards/rejected": -3.758568525314331, "step": 3300 }, { "epoch": 0.65, "eval_logits/chosen": 1.4333837032318115, "eval_logits/rejected": 1.9776458740234375, "eval_logps/chosen": -547.7244873046875, "eval_logps/rejected": -621.050048828125, "eval_loss": 0.4904102087020874, "eval_rewards/accuracies": 0.730654776096344, "eval_rewards/chosen": -2.8448381423950195, "eval_rewards/margins": 1.0344792604446411, "eval_rewards/rejected": -3.879317045211792, "eval_runtime": 468.097, "eval_samples_per_second": 4.273, "eval_steps_per_second": 0.179, "step": 3300 }, { "epoch": 0.65, "learning_rate": 1.6470966908050012e-06, "logits/chosen": -1.3912785053253174, "logits/rejected": -1.166416883468628, "logps/chosen": -551.7232666015625, "logps/rejected": -542.6461181640625, "loss": 0.4594, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.709670066833496, "rewards/margins": 1.1980925798416138, "rewards/rejected": -3.9077625274658203, "step": 3310 }, { "epoch": 0.65, "learning_rate": 1.6310113054765947e-06, "logits/chosen": -1.1876062154769897, "logits/rejected": -1.081268310546875, "logps/chosen": -555.0560913085938, "logps/rejected": -684.9098510742188, "loss": 0.4173, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.7197089195251465, "rewards/margins": 1.6264817714691162, "rewards/rejected": -4.346190929412842, "step": 3320 }, { "epoch": 0.65, "learning_rate": 1.6149667354445192e-06, "logits/chosen": -1.2864583730697632, "logits/rejected": -1.1489123106002808, "logps/chosen": -517.0961303710938, "logps/rejected": -634.1941528320312, "loss": 0.4567, "rewards/accuracies": 0.75, "rewards/chosen": -2.976073741912842, "rewards/margins": 1.2558077573776245, "rewards/rejected": -4.231881141662598, "step": 3330 }, { "epoch": 0.66, "learning_rate": 1.5989637343018705e-06, "logits/chosen": -1.4901988506317139, "logits/rejected": -1.3862135410308838, "logps/chosen": -543.125, "logps/rejected": -639.5211181640625, "loss": 0.5195, "rewards/accuracies": 0.75, "rewards/chosen": -2.8310227394104004, "rewards/margins": 0.9975448846817017, "rewards/rejected": -3.8285675048828125, "step": 3340 }, { "epoch": 0.66, "learning_rate": 1.5830030536893066e-06, "logits/chosen": -1.5147039890289307, "logits/rejected": -1.5230892896652222, "logps/chosen": -561.2169189453125, "logps/rejected": -679.5860595703125, "loss": 0.5184, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.9714980125427246, "rewards/margins": 1.08120596408844, "rewards/rejected": -4.052703857421875, "step": 3350 }, { "epoch": 0.66, "learning_rate": 1.5670854432597433e-06, "logits/chosen": -1.0411088466644287, "logits/rejected": -1.4086737632751465, "logps/chosen": -509.64947509765625, "logps/rejected": -623.0701904296875, "loss": 0.5161, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.043079376220703, "rewards/margins": 0.5725584030151367, "rewards/rejected": -3.615638017654419, "step": 3360 }, { "epoch": 0.66, "learning_rate": 1.551211650643144e-06, "logits/chosen": -1.7322018146514893, "logits/rejected": -1.328801155090332, "logps/chosen": -521.364990234375, "logps/rejected": -571.3067626953125, "loss": 0.5073, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -3.0033740997314453, "rewards/margins": 0.6566835641860962, "rewards/rejected": -3.660057544708252, "step": 3370 }, { "epoch": 0.66, "learning_rate": 1.5353824214114075e-06, "logits/chosen": -1.5441231727600098, "logits/rejected": -1.23960280418396, "logps/chosen": -581.4852294921875, "logps/rejected": -667.9786376953125, "loss": 0.488, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.996382474899292, "rewards/margins": 1.474982500076294, "rewards/rejected": -4.471364974975586, "step": 3380 }, { "epoch": 0.67, "learning_rate": 1.5195984990433437e-06, "logits/chosen": -1.4372183084487915, "logits/rejected": -1.19015371799469, "logps/chosen": -541.4854125976562, "logps/rejected": -616.9935913085938, "loss": 0.4854, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.807796001434326, "rewards/margins": 1.1931097507476807, "rewards/rejected": -4.000905990600586, "step": 3390 }, { "epoch": 0.67, "learning_rate": 1.5038606248897586e-06, "logits/chosen": -1.525448203086853, "logits/rejected": -1.3807704448699951, "logps/chosen": -604.7877807617188, "logps/rejected": -673.3497924804688, "loss": 0.498, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.8307852745056152, "rewards/margins": 1.112122893333435, "rewards/rejected": -3.942908525466919, "step": 3400 }, { "epoch": 0.67, "eval_logits/chosen": 0.960801362991333, "eval_logits/rejected": 1.4781014919281006, "eval_logps/chosen": -547.4754028320312, "eval_logps/rejected": -614.0842895507812, "eval_loss": 0.48787009716033936, "eval_rewards/accuracies": 0.7321428656578064, "eval_rewards/chosen": -2.842346668243408, "eval_rewards/margins": 0.9673130512237549, "eval_rewards/rejected": -3.809659719467163, "eval_runtime": 465.7039, "eval_samples_per_second": 4.295, "eval_steps_per_second": 0.18, "step": 3400 }, { "epoch": 0.67, "learning_rate": 1.4881695381386324e-06, "logits/chosen": -1.2873122692108154, "logits/rejected": -0.9784881472587585, "logps/chosen": -531.7821044921875, "logps/rejected": -645.1023559570312, "loss": 0.4981, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.941249132156372, "rewards/margins": 1.3310232162475586, "rewards/rejected": -4.272271633148193, "step": 3410 }, { "epoch": 0.67, "learning_rate": 1.4725259757803983e-06, "logits/chosen": -1.3161420822143555, "logits/rejected": -1.2805860042572021, "logps/chosen": -540.3150634765625, "logps/rejected": -688.1172485351562, "loss": 0.4693, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.891829013824463, "rewards/margins": 1.5289371013641357, "rewards/rejected": -4.420766830444336, "step": 3420 }, { "epoch": 0.67, "learning_rate": 1.4569306725733313e-06, "logits/chosen": -1.1628152132034302, "logits/rejected": -0.943515419960022, "logps/chosen": -589.7276000976562, "logps/rejected": -602.8516845703125, "loss": 0.5659, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.100240468978882, "rewards/margins": 0.6115292310714722, "rewards/rejected": -3.7117698192596436, "step": 3430 }, { "epoch": 0.68, "learning_rate": 1.4413843610090342e-06, "logits/chosen": -1.3137940168380737, "logits/rejected": -1.1455672979354858, "logps/chosen": -529.7459716796875, "logps/rejected": -633.06787109375, "loss": 0.5939, "rewards/accuracies": 0.5, "rewards/chosen": -2.8938117027282715, "rewards/margins": 0.768166184425354, "rewards/rejected": -3.661977767944336, "step": 3440 }, { "epoch": 0.68, "learning_rate": 1.4258877712780333e-06, "logits/chosen": -1.950718641281128, "logits/rejected": -1.7199163436889648, "logps/chosen": -601.2168579101562, "logps/rejected": -665.1631469726562, "loss": 0.4123, "rewards/accuracies": 0.75, "rewards/chosen": -2.4955344200134277, "rewards/margins": 1.3623603582382202, "rewards/rejected": -3.8578948974609375, "step": 3450 }, { "epoch": 0.68, "learning_rate": 1.410441631235487e-06, "logits/chosen": -1.2827733755111694, "logits/rejected": -1.3301194906234741, "logps/chosen": -592.0504760742188, "logps/rejected": -645.4057006835938, "loss": 0.491, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.829989194869995, "rewards/margins": 1.2216827869415283, "rewards/rejected": -4.051672458648682, "step": 3460 }, { "epoch": 0.68, "learning_rate": 1.3950466663669915e-06, "logits/chosen": -1.3424937725067139, "logits/rejected": -1.0797219276428223, "logps/chosen": -557.3580322265625, "logps/rejected": -691.0580444335938, "loss": 0.5238, "rewards/accuracies": 0.75, "rewards/chosen": -2.78297758102417, "rewards/margins": 1.1057649850845337, "rewards/rejected": -3.888741970062256, "step": 3470 }, { "epoch": 0.68, "learning_rate": 1.3797035997545144e-06, "logits/chosen": -1.8202447891235352, "logits/rejected": -1.4908593893051147, "logps/chosen": -573.6468505859375, "logps/rejected": -617.0345458984375, "loss": 0.4676, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.885648488998413, "rewards/margins": 1.005799651145935, "rewards/rejected": -3.8914477825164795, "step": 3480 }, { "epoch": 0.69, "learning_rate": 1.3644131520424241e-06, "logits/chosen": -1.4213025569915771, "logits/rejected": -1.0844852924346924, "logps/chosen": -557.9754638671875, "logps/rejected": -613.9381713867188, "loss": 0.5547, "rewards/accuracies": 0.75, "rewards/chosen": -2.9787003993988037, "rewards/margins": 1.2177660465240479, "rewards/rejected": -4.196466445922852, "step": 3490 }, { "epoch": 0.69, "learning_rate": 1.3491760414036478e-06, "logits/chosen": -1.622127890586853, "logits/rejected": -1.628751516342163, "logps/chosen": -499.74346923828125, "logps/rejected": -582.7059326171875, "loss": 0.4987, "rewards/accuracies": 0.625, "rewards/chosen": -2.6528587341308594, "rewards/margins": 0.9285345077514648, "rewards/rejected": -3.5813934803009033, "step": 3500 }, { "epoch": 0.69, "eval_logits/chosen": 0.8556860685348511, "eval_logits/rejected": 1.38192617893219, "eval_logps/chosen": -532.4976806640625, "eval_logps/rejected": -604.8372192382812, "eval_loss": 0.4902108907699585, "eval_rewards/accuracies": 0.730654776096344, "eval_rewards/chosen": -2.6925694942474365, "eval_rewards/margins": 1.024619698524475, "eval_rewards/rejected": -3.717189311981201, "eval_runtime": 468.3987, "eval_samples_per_second": 4.27, "eval_steps_per_second": 0.179, "step": 3500 }, { "epoch": 0.69, "learning_rate": 1.3339929835059393e-06, "logits/chosen": -1.4586498737335205, "logits/rejected": -1.3849519491195679, "logps/chosen": -517.4207153320312, "logps/rejected": -552.0170288085938, "loss": 0.5863, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.6804182529449463, "rewards/margins": 0.6542779207229614, "rewards/rejected": -3.334695816040039, "step": 3510 }, { "epoch": 0.69, "learning_rate": 1.3188646914782616e-06, "logits/chosen": -1.5962340831756592, "logits/rejected": -1.6393506526947021, "logps/chosen": -480.1661682128906, "logps/rejected": -566.5606689453125, "loss": 0.4092, "rewards/accuracies": 0.75, "rewards/chosen": -2.3056910037994385, "rewards/margins": 1.0150161981582642, "rewards/rejected": -3.320706844329834, "step": 3520 }, { "epoch": 0.69, "learning_rate": 1.3037918758772944e-06, "logits/chosen": -1.2742021083831787, "logits/rejected": -1.164294958114624, "logps/chosen": -438.3392639160156, "logps/rejected": -590.9483032226562, "loss": 0.4378, "rewards/accuracies": 0.875, "rewards/chosen": -2.328174114227295, "rewards/margins": 1.5971230268478394, "rewards/rejected": -3.925297260284424, "step": 3530 }, { "epoch": 0.69, "learning_rate": 1.288775244654062e-06, "logits/chosen": -1.6658971309661865, "logits/rejected": -1.5197378396987915, "logps/chosen": -534.6138916015625, "logps/rejected": -623.6290283203125, "loss": 0.4726, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.696544647216797, "rewards/margins": 1.2441034317016602, "rewards/rejected": -3.940647840499878, "step": 3540 }, { "epoch": 0.7, "learning_rate": 1.2738155031206772e-06, "logits/chosen": -1.400800108909607, "logits/rejected": -1.4668363332748413, "logps/chosen": -527.6932373046875, "logps/rejected": -605.9529418945312, "loss": 0.537, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.713974952697754, "rewards/margins": 0.906460165977478, "rewards/rejected": -3.6204352378845215, "step": 3550 }, { "epoch": 0.7, "learning_rate": 1.2589133539172193e-06, "logits/chosen": -1.618035912513733, "logits/rejected": -1.2135871648788452, "logps/chosen": -610.2980346679688, "logps/rejected": -685.7574462890625, "loss": 0.5337, "rewards/accuracies": 0.625, "rewards/chosen": -3.3299853801727295, "rewards/margins": 0.8561728596687317, "rewards/rejected": -4.186158180236816, "step": 3560 }, { "epoch": 0.7, "learning_rate": 1.2440694969787262e-06, "logits/chosen": -1.460566759109497, "logits/rejected": -1.4396517276763916, "logps/chosen": -508.64617919921875, "logps/rejected": -639.2283935546875, "loss": 0.4811, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.7212297916412354, "rewards/margins": 1.1402428150177002, "rewards/rejected": -3.8614726066589355, "step": 3570 }, { "epoch": 0.7, "learning_rate": 1.2292846295023222e-06, "logits/chosen": -0.9604961276054382, "logits/rejected": -0.9926923513412476, "logps/chosen": -546.4825439453125, "logps/rejected": -696.2088623046875, "loss": 0.504, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -3.0244810581207275, "rewards/margins": 1.4495601654052734, "rewards/rejected": -4.474040508270264, "step": 3580 }, { "epoch": 0.7, "learning_rate": 1.2145594459144745e-06, "logits/chosen": -1.2483197450637817, "logits/rejected": -1.1393179893493652, "logps/chosen": -451.2967224121094, "logps/rejected": -571.0396728515625, "loss": 0.4789, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.4650654792785645, "rewards/margins": 1.3195736408233643, "rewards/rejected": -3.7846388816833496, "step": 3590 }, { "epoch": 0.71, "learning_rate": 1.19989463783837e-06, "logits/chosen": -1.7845804691314697, "logits/rejected": -1.4175323247909546, "logps/chosen": -554.6177368164062, "logps/rejected": -556.827880859375, "loss": 0.5824, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.5787031650543213, "rewards/margins": 0.7001349925994873, "rewards/rejected": -3.2788383960723877, "step": 3600 }, { "epoch": 0.71, "eval_logits/chosen": 0.5335975289344788, "eval_logits/rejected": 1.10366690158844, "eval_logps/chosen": -519.9661254882812, "eval_logps/rejected": -592.4444580078125, "eval_loss": 0.49079516530036926, "eval_rewards/accuracies": 0.7291666865348816, "eval_rewards/chosen": -2.567253589630127, "eval_rewards/margins": 1.0260074138641357, "eval_rewards/rejected": -3.5932610034942627, "eval_runtime": 470.105, "eval_samples_per_second": 4.254, "eval_steps_per_second": 0.179, "step": 3600 }, { "epoch": 0.71, "learning_rate": 1.1852908940614354e-06, "logits/chosen": -1.411493182182312, "logits/rejected": -1.0653778314590454, "logps/chosen": -560.4616088867188, "logps/rejected": -618.6390380859375, "loss": 0.5021, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.9032797813415527, "rewards/margins": 1.0411841869354248, "rewards/rejected": -3.9444642066955566, "step": 3610 }, { "epoch": 0.71, "learning_rate": 1.1707489005029877e-06, "logits/chosen": -1.6911699771881104, "logits/rejected": -1.442684531211853, "logps/chosen": -532.0489501953125, "logps/rejected": -603.7266235351562, "loss": 0.4439, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.5354647636413574, "rewards/margins": 0.9961854815483093, "rewards/rejected": -3.5316505432128906, "step": 3620 }, { "epoch": 0.71, "learning_rate": 1.1562693401820094e-06, "logits/chosen": -1.2883455753326416, "logits/rejected": -1.1316763162612915, "logps/chosen": -515.0657958984375, "logps/rejected": -637.5360107421875, "loss": 0.5508, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.779428720474243, "rewards/margins": 1.0291640758514404, "rewards/rejected": -3.8085930347442627, "step": 3630 }, { "epoch": 0.71, "learning_rate": 1.1418528931850781e-06, "logits/chosen": -1.702050805091858, "logits/rejected": -1.3528387546539307, "logps/chosen": -614.9039916992188, "logps/rejected": -675.7921142578125, "loss": 0.4977, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.8123397827148438, "rewards/margins": 1.1464719772338867, "rewards/rejected": -3.9588119983673096, "step": 3640 }, { "epoch": 0.72, "learning_rate": 1.1275002366344156e-06, "logits/chosen": -1.5493879318237305, "logits/rejected": -1.2522351741790771, "logps/chosen": -499.3773498535156, "logps/rejected": -630.6110229492188, "loss": 0.3831, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.6639785766601562, "rewards/margins": 1.3068674802780151, "rewards/rejected": -3.970846176147461, "step": 3650 }, { "epoch": 0.72, "learning_rate": 1.113212044656087e-06, "logits/chosen": -1.2208716869354248, "logits/rejected": -0.8246925473213196, "logps/chosen": -522.56689453125, "logps/rejected": -544.2653198242188, "loss": 0.5248, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.8272600173950195, "rewards/margins": 0.7849901914596558, "rewards/rejected": -3.6122500896453857, "step": 3660 }, { "epoch": 0.72, "learning_rate": 1.0989889883483415e-06, "logits/chosen": -1.6956714391708374, "logits/rejected": -1.2789404392242432, "logps/chosen": -505.72564697265625, "logps/rejected": -618.1361083984375, "loss": 0.4234, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.5855579376220703, "rewards/margins": 1.574141263961792, "rewards/rejected": -4.159698963165283, "step": 3670 }, { "epoch": 0.72, "learning_rate": 1.0848317357500854e-06, "logits/chosen": -1.5158052444458008, "logits/rejected": -1.1376941204071045, "logps/chosen": -522.5565185546875, "logps/rejected": -601.7191162109375, "loss": 0.5567, "rewards/accuracies": 0.75, "rewards/chosen": -2.8235135078430176, "rewards/margins": 1.2751871347427368, "rewards/rejected": -4.098701000213623, "step": 3680 }, { "epoch": 0.72, "learning_rate": 1.070740951809508e-06, "logits/chosen": -1.6038143634796143, "logits/rejected": -1.3182604312896729, "logps/chosen": -525.1019897460938, "logps/rejected": -647.8762817382812, "loss": 0.4621, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.785275936126709, "rewards/margins": 1.263718605041504, "rewards/rejected": -4.048994064331055, "step": 3690 }, { "epoch": 0.73, "learning_rate": 1.0567172983528534e-06, "logits/chosen": -1.3500463962554932, "logits/rejected": -1.1551401615142822, "logps/chosen": -501.09039306640625, "logps/rejected": -653.4024658203125, "loss": 0.425, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.015165328979492, "rewards/margins": 1.1308244466781616, "rewards/rejected": -4.145989894866943, "step": 3700 }, { "epoch": 0.73, "eval_logits/chosen": 0.7257053256034851, "eval_logits/rejected": 1.2902551889419556, "eval_logps/chosen": -539.9019775390625, "eval_logps/rejected": -615.5826416015625, "eval_loss": 0.49059832096099854, "eval_rewards/accuracies": 0.730654776096344, "eval_rewards/chosen": -2.7666125297546387, "eval_rewards/margins": 1.0580310821533203, "eval_rewards/rejected": -3.82464337348938, "eval_runtime": 464.5737, "eval_samples_per_second": 4.305, "eval_steps_per_second": 0.181, "step": 3700 }, { "epoch": 0.73, "learning_rate": 1.0427614340533293e-06, "logits/chosen": -1.522006630897522, "logits/rejected": -1.1353822946548462, "logps/chosen": -548.3036499023438, "logps/rejected": -561.1580810546875, "loss": 0.4571, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.8132216930389404, "rewards/margins": 1.0134377479553223, "rewards/rejected": -3.8266589641571045, "step": 3710 }, { "epoch": 0.73, "learning_rate": 1.0288740144001722e-06, "logits/chosen": -0.8532799482345581, "logits/rejected": -1.0594323873519897, "logps/chosen": -472.4169921875, "logps/rejected": -640.5179443359375, "loss": 0.454, "rewards/accuracies": 0.75, "rewards/chosen": -2.8810791969299316, "rewards/margins": 1.3560783863067627, "rewards/rejected": -4.237157344818115, "step": 3720 }, { "epoch": 0.73, "learning_rate": 1.0150556916678634e-06, "logits/chosen": -1.344792366027832, "logits/rejected": -1.2084754705429077, "logps/chosen": -511.25238037109375, "logps/rejected": -632.6343383789062, "loss": 0.385, "rewards/accuracies": 0.75, "rewards/chosen": -2.9333691596984863, "rewards/margins": 1.295652985572815, "rewards/rejected": -4.229022026062012, "step": 3730 }, { "epoch": 0.73, "learning_rate": 1.0013071148854861e-06, "logits/chosen": -1.4463289976119995, "logits/rejected": -1.3817778825759888, "logps/chosen": -558.93603515625, "logps/rejected": -659.0678100585938, "loss": 0.5267, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.1432852745056152, "rewards/margins": 1.226470708847046, "rewards/rejected": -4.36975622177124, "step": 3740 }, { "epoch": 0.74, "learning_rate": 9.876289298062478e-07, "logits/chosen": -1.5644800662994385, "logits/rejected": -1.373430609703064, "logps/chosen": -536.2015380859375, "logps/rejected": -613.3848876953125, "loss": 0.4981, "rewards/accuracies": 0.625, "rewards/chosen": -3.0300514698028564, "rewards/margins": 0.7052406668663025, "rewards/rejected": -3.735292434692383, "step": 3750 }, { "epoch": 0.74, "learning_rate": 9.740217788771453e-07, "logits/chosen": -1.371816635131836, "logits/rejected": -1.263338565826416, "logps/chosen": -574.6948852539062, "logps/rejected": -703.0294799804688, "loss": 0.5132, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -3.1160013675689697, "rewards/margins": 1.5144450664520264, "rewards/rejected": -4.630446434020996, "step": 3760 }, { "epoch": 0.74, "learning_rate": 9.604863012087904e-07, "logits/chosen": -1.4769089221954346, "logits/rejected": -1.448307752609253, "logps/chosen": -574.9603271484375, "logps/rejected": -687.00341796875, "loss": 0.4775, "rewards/accuracies": 0.75, "rewards/chosen": -2.8347721099853516, "rewards/margins": 1.0564663410186768, "rewards/rejected": -3.8912384510040283, "step": 3770 }, { "epoch": 0.74, "learning_rate": 9.470231325453958e-07, "logits/chosen": -1.3595274686813354, "logits/rejected": -1.3797645568847656, "logps/chosen": -597.6848754882812, "logps/rejected": -693.9461059570312, "loss": 0.4352, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -3.229882001876831, "rewards/margins": 1.3679691553115845, "rewards/rejected": -4.597851276397705, "step": 3780 }, { "epoch": 0.74, "learning_rate": 9.336329052349089e-07, "logits/chosen": -1.5314759016036987, "logits/rejected": -1.400506854057312, "logps/chosen": -585.5628051757812, "logps/rejected": -757.356689453125, "loss": 0.43, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.8826019763946533, "rewards/margins": 1.7076170444488525, "rewards/rejected": -4.590218544006348, "step": 3790 }, { "epoch": 0.75, "learning_rate": 9.203162481993175e-07, "logits/chosen": -1.3001272678375244, "logits/rejected": -0.6133405566215515, "logps/chosen": -554.6573486328125, "logps/rejected": -680.6116943359375, "loss": 0.4756, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -2.721282482147217, "rewards/margins": 1.9358084201812744, "rewards/rejected": -4.65709114074707, "step": 3800 }, { "epoch": 0.75, "eval_logits/chosen": 0.9387251734733582, "eval_logits/rejected": 1.50146484375, "eval_logps/chosen": -550.5606689453125, "eval_logps/rejected": -629.0961303710938, "eval_loss": 0.4916338324546814, "eval_rewards/accuracies": 0.7291666865348816, "eval_rewards/chosen": -2.873199939727783, "eval_rewards/margins": 1.0865780115127563, "eval_rewards/rejected": -3.959777355194092, "eval_runtime": 473.6033, "eval_samples_per_second": 4.223, "eval_steps_per_second": 0.177, "step": 3800 }, { "epoch": 0.75, "learning_rate": 9.070737869051044e-07, "logits/chosen": -1.2892731428146362, "logits/rejected": -1.412314772605896, "logps/chosen": -556.5120239257812, "logps/rejected": -668.5825805664062, "loss": 0.54, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.2580204010009766, "rewards/margins": 0.9198251962661743, "rewards/rejected": -4.1778459548950195, "step": 3810 }, { "epoch": 0.75, "learning_rate": 8.939061433338722e-07, "logits/chosen": -1.6973645687103271, "logits/rejected": -1.2744815349578857, "logps/chosen": -602.6815185546875, "logps/rejected": -683.7181396484375, "loss": 0.4363, "rewards/accuracies": 0.75, "rewards/chosen": -3.169668674468994, "rewards/margins": 1.1083095073699951, "rewards/rejected": -4.277978420257568, "step": 3820 }, { "epoch": 0.75, "learning_rate": 8.808139359531332e-07, "logits/chosen": -1.469407558441162, "logits/rejected": -1.1006393432617188, "logps/chosen": -486.2688903808594, "logps/rejected": -569.169677734375, "loss": 0.4169, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.552382707595825, "rewards/margins": 1.328578233718872, "rewards/rejected": -3.8809609413146973, "step": 3830 }, { "epoch": 0.75, "learning_rate": 8.677977796872541e-07, "logits/chosen": -1.5187252759933472, "logits/rejected": -1.0123035907745361, "logps/chosen": -576.05908203125, "logps/rejected": -661.7920532226562, "loss": 0.5426, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -3.05285382270813, "rewards/margins": 1.5546613931655884, "rewards/rejected": -4.60751485824585, "step": 3840 }, { "epoch": 0.76, "learning_rate": 8.548582858885787e-07, "logits/chosen": -1.2918612957000732, "logits/rejected": -1.4479345083236694, "logps/chosen": -565.8729248046875, "logps/rejected": -629.3079833984375, "loss": 0.5346, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -3.0743348598480225, "rewards/margins": 0.7325077056884766, "rewards/rejected": -3.80684232711792, "step": 3850 }, { "epoch": 0.76, "learning_rate": 8.419960623087129e-07, "logits/chosen": -1.4159595966339111, "logits/rejected": -1.4038660526275635, "logps/chosen": -540.0975952148438, "logps/rejected": -623.6644287109375, "loss": 0.4268, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.547733783721924, "rewards/margins": 0.9830751419067383, "rewards/rejected": -3.530808925628662, "step": 3860 }, { "epoch": 0.76, "learning_rate": 8.292117130699767e-07, "logits/chosen": -1.3750841617584229, "logits/rejected": -0.923875629901886, "logps/chosen": -544.1021728515625, "logps/rejected": -588.0936279296875, "loss": 0.5068, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.6934802532196045, "rewards/margins": 0.9981153607368469, "rewards/rejected": -3.6915955543518066, "step": 3870 }, { "epoch": 0.76, "learning_rate": 8.165058386370314e-07, "logits/chosen": -1.3146508932113647, "logits/rejected": -1.3720710277557373, "logps/chosen": -520.4107055664062, "logps/rejected": -645.3299560546875, "loss": 0.4759, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.5611908435821533, "rewards/margins": 1.5067590475082397, "rewards/rejected": -4.067950248718262, "step": 3880 }, { "epoch": 0.76, "learning_rate": 8.038790357886783e-07, "logits/chosen": -1.5785200595855713, "logits/rejected": -1.4798448085784912, "logps/chosen": -547.60595703125, "logps/rejected": -614.8184814453125, "loss": 0.5455, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.773311138153076, "rewards/margins": 0.8587993383407593, "rewards/rejected": -3.632110118865967, "step": 3890 }, { "epoch": 0.77, "learning_rate": 7.913318975898238e-07, "logits/chosen": -1.518733263015747, "logits/rejected": -1.3821210861206055, "logps/chosen": -609.395263671875, "logps/rejected": -702.1522216796875, "loss": 0.4597, "rewards/accuracies": 0.75, "rewards/chosen": -3.131622791290283, "rewards/margins": 1.0383737087249756, "rewards/rejected": -4.169996738433838, "step": 3900 }, { "epoch": 0.77, "eval_logits/chosen": 0.7636324763298035, "eval_logits/rejected": 1.3349512815475464, "eval_logps/chosen": -549.4085693359375, "eval_logps/rejected": -627.3712158203125, "eval_loss": 0.48964768648147583, "eval_rewards/accuracies": 0.7276785969734192, "eval_rewards/chosen": -2.8616786003112793, "eval_rewards/margins": 1.0808496475219727, "eval_rewards/rejected": -3.942528486251831, "eval_runtime": 474.3988, "eval_samples_per_second": 4.216, "eval_steps_per_second": 0.177, "step": 3900 }, { "epoch": 0.77, "learning_rate": 7.788650133636291e-07, "logits/chosen": -1.2482768297195435, "logits/rejected": -1.1767711639404297, "logps/chosen": -543.4593505859375, "logps/rejected": -596.6537475585938, "loss": 0.5069, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.8678791522979736, "rewards/margins": 1.308966875076294, "rewards/rejected": -4.176846504211426, "step": 3910 }, { "epoch": 0.77, "learning_rate": 7.664789686638272e-07, "logits/chosen": -1.4836885929107666, "logits/rejected": -1.18732488155365, "logps/chosen": -538.8760986328125, "logps/rejected": -629.8192138671875, "loss": 0.458, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.757962226867676, "rewards/margins": 1.2660038471221924, "rewards/rejected": -4.023966312408447, "step": 3920 }, { "epoch": 0.77, "learning_rate": 7.541743452472194e-07, "logits/chosen": -1.8099521398544312, "logits/rejected": -1.2252228260040283, "logps/chosen": -555.2513427734375, "logps/rejected": -627.4388427734375, "loss": 0.5447, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.7958285808563232, "rewards/margins": 1.049216866493225, "rewards/rejected": -3.845045566558838, "step": 3930 }, { "epoch": 0.77, "learning_rate": 7.41951721046357e-07, "logits/chosen": -1.4922353029251099, "logits/rejected": -1.045175313949585, "logps/chosen": -594.3184814453125, "logps/rejected": -714.8767700195312, "loss": 0.4802, "rewards/accuracies": 0.75, "rewards/chosen": -3.1308560371398926, "rewards/margins": 1.3848567008972168, "rewards/rejected": -4.515712738037109, "step": 3940 }, { "epoch": 0.78, "learning_rate": 7.298116701423874e-07, "logits/chosen": -1.6804969310760498, "logits/rejected": -1.3681905269622803, "logps/chosen": -653.5281982421875, "logps/rejected": -714.237060546875, "loss": 0.4969, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -3.151542901992798, "rewards/margins": 1.1915686130523682, "rewards/rejected": -4.343111515045166, "step": 3950 }, { "epoch": 0.78, "learning_rate": 7.177547627380987e-07, "logits/chosen": -1.7028090953826904, "logits/rejected": -1.2910749912261963, "logps/chosen": -559.255859375, "logps/rejected": -671.7525634765625, "loss": 0.494, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.8281595706939697, "rewards/margins": 1.2134965658187866, "rewards/rejected": -4.041655540466309, "step": 3960 }, { "epoch": 0.78, "learning_rate": 7.057815651311323e-07, "logits/chosen": -1.2011252641677856, "logits/rejected": -1.1789474487304688, "logps/chosen": -517.1002197265625, "logps/rejected": -617.9232788085938, "loss": 0.4652, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.9817757606506348, "rewards/margins": 1.4597489833831787, "rewards/rejected": -4.441524505615234, "step": 3970 }, { "epoch": 0.78, "learning_rate": 6.93892639687386e-07, "logits/chosen": -1.2308969497680664, "logits/rejected": -1.37088143825531, "logps/chosen": -484.86016845703125, "logps/rejected": -606.0040283203125, "loss": 0.4863, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.809051036834717, "rewards/margins": 0.9818013906478882, "rewards/rejected": -3.7908523082733154, "step": 3980 }, { "epoch": 0.78, "learning_rate": 6.820885448146041e-07, "logits/chosen": -1.4448660612106323, "logits/rejected": -1.356806993484497, "logps/chosen": -596.8998413085938, "logps/rejected": -694.8192138671875, "loss": 0.5442, "rewards/accuracies": 0.625, "rewards/chosen": -3.2882466316223145, "rewards/margins": 1.0220199823379517, "rewards/rejected": -4.310266971588135, "step": 3990 }, { "epoch": 0.79, "learning_rate": 6.703698349361437e-07, "logits/chosen": -1.4850349426269531, "logits/rejected": -1.4774724245071411, "logps/chosen": -545.3355712890625, "logps/rejected": -660.8809814453125, "loss": 0.4649, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.9125723838806152, "rewards/margins": 0.9985507130622864, "rewards/rejected": -3.9111227989196777, "step": 4000 }, { "epoch": 0.79, "eval_logits/chosen": 0.7213255167007446, "eval_logits/rejected": 1.2903335094451904, "eval_logps/chosen": -550.0615234375, "eval_logps/rejected": -626.822998046875, "eval_loss": 0.4885156452655792, "eval_rewards/accuracies": 0.7232142686843872, "eval_rewards/chosen": -2.868208408355713, "eval_rewards/margins": 1.0688380002975464, "eval_rewards/rejected": -3.937046527862549, "eval_runtime": 474.6508, "eval_samples_per_second": 4.214, "eval_steps_per_second": 0.177, "step": 4000 }, { "epoch": 0.79, "learning_rate": 6.587370604649373e-07, "logits/chosen": -1.7546825408935547, "logits/rejected": -1.379854440689087, "logps/chosen": -639.4371337890625, "logps/rejected": -648.8902587890625, "loss": 0.4445, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.9304933547973633, "rewards/margins": 0.9470575451850891, "rewards/rejected": -3.877551317214966, "step": 4010 }, { "epoch": 0.79, "learning_rate": 6.471907677776426e-07, "logits/chosen": -1.4482325315475464, "logits/rejected": -1.013795256614685, "logps/chosen": -637.1885986328125, "logps/rejected": -682.3165893554688, "loss": 0.456, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.1467432975769043, "rewards/margins": 1.3057441711425781, "rewards/rejected": -4.452487945556641, "step": 4020 }, { "epoch": 0.79, "learning_rate": 6.357314991889757e-07, "logits/chosen": -1.1399719715118408, "logits/rejected": -1.3395874500274658, "logps/chosen": -552.9012451171875, "logps/rejected": -661.7059326171875, "loss": 0.4233, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.0356509685516357, "rewards/margins": 1.240136981010437, "rewards/rejected": -4.275787830352783, "step": 4030 }, { "epoch": 0.79, "learning_rate": 6.243597929262404e-07, "logits/chosen": -1.6289745569229126, "logits/rejected": -1.1411449909210205, "logps/chosen": -638.5223999023438, "logps/rejected": -637.6964111328125, "loss": 0.5376, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.073040008544922, "rewards/margins": 0.9560378193855286, "rewards/rejected": -4.029077529907227, "step": 4040 }, { "epoch": 0.79, "learning_rate": 6.130761831040522e-07, "logits/chosen": -1.2583967447280884, "logits/rejected": -1.3109838962554932, "logps/chosen": -503.34527587890625, "logps/rejected": -617.5255126953125, "loss": 0.497, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.6558876037597656, "rewards/margins": 1.1685144901275635, "rewards/rejected": -3.82440185546875, "step": 4050 }, { "epoch": 0.8, "learning_rate": 6.018811996992455e-07, "logits/chosen": -1.5982011556625366, "logits/rejected": -0.981221079826355, "logps/chosen": -584.2902221679688, "logps/rejected": -656.946044921875, "loss": 0.4365, "rewards/accuracies": 0.75, "rewards/chosen": -3.050417423248291, "rewards/margins": 1.3010116815567017, "rewards/rejected": -4.351428985595703, "step": 4060 }, { "epoch": 0.8, "learning_rate": 5.907753685259865e-07, "logits/chosen": -1.481526494026184, "logits/rejected": -1.2903486490249634, "logps/chosen": -581.9069213867188, "logps/rejected": -682.1727294921875, "loss": 0.5691, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.04373836517334, "rewards/margins": 1.1073167324066162, "rewards/rejected": -4.151054859161377, "step": 4070 }, { "epoch": 0.8, "learning_rate": 5.797592112110734e-07, "logits/chosen": -1.7861783504486084, "logits/rejected": -1.5701062679290771, "logps/chosen": -582.6986083984375, "logps/rejected": -750.8484497070312, "loss": 0.5171, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.053048849105835, "rewards/margins": 1.0844833850860596, "rewards/rejected": -4.137532711029053, "step": 4080 }, { "epoch": 0.8, "learning_rate": 5.688332451694356e-07, "logits/chosen": -1.3515706062316895, "logits/rejected": -1.1656490564346313, "logps/chosen": -552.8885498046875, "logps/rejected": -614.0411376953125, "loss": 0.5751, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.6918797492980957, "rewards/margins": 0.9679886102676392, "rewards/rejected": -3.6598687171936035, "step": 4090 }, { "epoch": 0.8, "learning_rate": 5.579979835798361e-07, "logits/chosen": -1.4821619987487793, "logits/rejected": -1.256227970123291, "logps/chosen": -478.2623596191406, "logps/rejected": -660.5684814453125, "loss": 0.4689, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.627953052520752, "rewards/margins": 1.5396177768707275, "rewards/rejected": -4.167571067810059, "step": 4100 }, { "epoch": 0.8, "eval_logits/chosen": 0.6762639284133911, "eval_logits/rejected": 1.249507188796997, "eval_logps/chosen": -547.4949951171875, "eval_logps/rejected": -623.7166137695312, "eval_loss": 0.48798251152038574, "eval_rewards/accuracies": 0.7232142686843872, "eval_rewards/chosen": -2.842543125152588, "eval_rewards/margins": 1.0634390115737915, "eval_rewards/rejected": -3.905982255935669, "eval_runtime": 475.2471, "eval_samples_per_second": 4.208, "eval_steps_per_second": 0.177, "step": 4100 }, { "epoch": 0.81, "learning_rate": 5.472539353607612e-07, "logits/chosen": -1.6128085851669312, "logits/rejected": -1.0963213443756104, "logps/chosen": -575.4742431640625, "logps/rejected": -641.0703735351562, "loss": 0.4269, "rewards/accuracies": 0.75, "rewards/chosen": -2.5407283306121826, "rewards/margins": 1.1561939716339111, "rewards/rejected": -3.6969223022460938, "step": 4110 }, { "epoch": 0.81, "learning_rate": 5.366016051465245e-07, "logits/chosen": -1.2906649112701416, "logits/rejected": -0.9485572576522827, "logps/chosen": -501.63824462890625, "logps/rejected": -602.8499145507812, "loss": 0.5234, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.7279627323150635, "rewards/margins": 1.3095009326934814, "rewards/rejected": -4.037463188171387, "step": 4120 }, { "epoch": 0.81, "learning_rate": 5.260414932635588e-07, "logits/chosen": -1.6067225933074951, "logits/rejected": -1.5598185062408447, "logps/chosen": -568.68798828125, "logps/rejected": -691.7966918945312, "loss": 0.4267, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.682281970977783, "rewards/margins": 1.302312970161438, "rewards/rejected": -3.9845948219299316, "step": 4130 }, { "epoch": 0.81, "learning_rate": 5.155740957069186e-07, "logits/chosen": -1.5909394025802612, "logits/rejected": -1.1699573993682861, "logps/chosen": -647.465087890625, "logps/rejected": -690.4301147460938, "loss": 0.5324, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.0774474143981934, "rewards/margins": 1.313281774520874, "rewards/rejected": -4.390728950500488, "step": 4140 }, { "epoch": 0.81, "learning_rate": 5.051999041169869e-07, "logits/chosen": -1.702135443687439, "logits/rejected": -1.2952522039413452, "logps/chosen": -592.5914916992188, "logps/rejected": -648.1393432617188, "loss": 0.6196, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.9406609535217285, "rewards/margins": 1.1990137100219727, "rewards/rejected": -4.139674663543701, "step": 4150 }, { "epoch": 0.82, "learning_rate": 4.949194057563783e-07, "logits/chosen": -1.4475417137145996, "logits/rejected": -1.2416096925735474, "logps/chosen": -536.6510009765625, "logps/rejected": -681.0042724609375, "loss": 0.4172, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.51859712600708, "rewards/margins": 1.5448805093765259, "rewards/rejected": -4.063477516174316, "step": 4160 }, { "epoch": 0.82, "learning_rate": 4.847330834870551e-07, "logits/chosen": -1.7177613973617554, "logits/rejected": -1.2784730195999146, "logps/chosen": -523.9808349609375, "logps/rejected": -643.7147216796875, "loss": 0.4968, "rewards/accuracies": 0.875, "rewards/chosen": -2.432467460632324, "rewards/margins": 1.484404444694519, "rewards/rejected": -3.916872024536133, "step": 4170 }, { "epoch": 0.82, "learning_rate": 4.746414157476506e-07, "logits/chosen": -1.6163629293441772, "logits/rejected": -1.0138940811157227, "logps/chosen": -532.6239013671875, "logps/rejected": -668.1685791015625, "loss": 0.4849, "rewards/accuracies": 0.75, "rewards/chosen": -2.767432451248169, "rewards/margins": 1.6345386505126953, "rewards/rejected": -4.401970863342285, "step": 4180 }, { "epoch": 0.82, "learning_rate": 4.6464487653099216e-07, "logits/chosen": -1.466838002204895, "logits/rejected": -1.3406709432601929, "logps/chosen": -547.8306274414062, "logps/rejected": -684.18408203125, "loss": 0.5063, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.955652952194214, "rewards/margins": 1.1953189373016357, "rewards/rejected": -4.15097188949585, "step": 4190 }, { "epoch": 0.82, "learning_rate": 4.5474393536184214e-07, "logits/chosen": -1.5597014427185059, "logits/rejected": -1.5640283823013306, "logps/chosen": -577.12451171875, "logps/rejected": -654.9713134765625, "loss": 0.4275, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.894909620285034, "rewards/margins": 0.7931472659111023, "rewards/rejected": -3.6880574226379395, "step": 4200 }, { "epoch": 0.82, "eval_logits/chosen": 0.7330583930015564, "eval_logits/rejected": 1.3066866397857666, "eval_logps/chosen": -549.9531860351562, "eval_logps/rejected": -626.6477661132812, "eval_loss": 0.487714946269989, "eval_rewards/accuracies": 0.7232142686843872, "eval_rewards/chosen": -2.8671252727508545, "eval_rewards/margins": 1.0681687593460083, "eval_rewards/rejected": -3.9352939128875732, "eval_runtime": 467.6637, "eval_samples_per_second": 4.277, "eval_steps_per_second": 0.18, "step": 4200 }, { "epoch": 0.83, "learning_rate": 4.449390572748449e-07, "logits/chosen": -1.133298635482788, "logits/rejected": -0.997468113899231, "logps/chosen": -500.6832580566406, "logps/rejected": -642.2027587890625, "loss": 0.5158, "rewards/accuracies": 0.875, "rewards/chosen": -2.7464077472686768, "rewards/margins": 1.693267583847046, "rewards/rejected": -4.4396748542785645, "step": 4210 }, { "epoch": 0.83, "learning_rate": 4.352307027926828e-07, "logits/chosen": -1.6892297267913818, "logits/rejected": -1.145814299583435, "logps/chosen": -536.3302001953125, "logps/rejected": -664.6512451171875, "loss": 0.3924, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.630311965942383, "rewards/margins": 1.869051218032837, "rewards/rejected": -4.499362945556641, "step": 4220 }, { "epoch": 0.83, "learning_rate": 4.2561932790444597e-07, "logits/chosen": -1.4355401992797852, "logits/rejected": -1.4584405422210693, "logps/chosen": -499.212158203125, "logps/rejected": -660.339111328125, "loss": 0.5618, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.6407253742218018, "rewards/margins": 1.5666038990020752, "rewards/rejected": -4.207329273223877, "step": 4230 }, { "epoch": 0.83, "learning_rate": 4.1610538404421837e-07, "logits/chosen": -1.648999810218811, "logits/rejected": -1.5709302425384521, "logps/chosen": -538.1671752929688, "logps/rejected": -669.4725341796875, "loss": 0.4689, "rewards/accuracies": 0.75, "rewards/chosen": -2.68080472946167, "rewards/margins": 1.298196792602539, "rewards/rejected": -3.979001998901367, "step": 4240 }, { "epoch": 0.83, "learning_rate": 4.0668931806987e-07, "logits/chosen": -1.6326240301132202, "logits/rejected": -1.6104761362075806, "logps/chosen": -607.5662231445312, "logps/rejected": -701.7105102539062, "loss": 0.4732, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.885305166244507, "rewards/margins": 1.4834986925125122, "rewards/rejected": -4.368803977966309, "step": 4250 }, { "epoch": 0.84, "learning_rate": 3.9737157224207265e-07, "logits/chosen": -1.6464459896087646, "logits/rejected": -1.6791051626205444, "logps/chosen": -548.1013793945312, "logps/rejected": -617.052734375, "loss": 0.4732, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.502802848815918, "rewards/margins": 1.1017303466796875, "rewards/rejected": -3.6045334339141846, "step": 4260 }, { "epoch": 0.84, "learning_rate": 3.8815258420352385e-07, "logits/chosen": -1.4971342086791992, "logits/rejected": -1.3662328720092773, "logps/chosen": -561.4806518554688, "logps/rejected": -697.7352905273438, "loss": 0.411, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.7723212242126465, "rewards/margins": 1.7536299228668213, "rewards/rejected": -4.525950908660889, "step": 4270 }, { "epoch": 0.84, "learning_rate": 3.7903278695839456e-07, "logits/chosen": -1.6506578922271729, "logits/rejected": -1.4599170684814453, "logps/chosen": -518.5300903320312, "logps/rejected": -640.587646484375, "loss": 0.4808, "rewards/accuracies": 0.75, "rewards/chosen": -2.7276813983917236, "rewards/margins": 1.2205212116241455, "rewards/rejected": -3.9482028484344482, "step": 4280 }, { "epoch": 0.84, "learning_rate": 3.7001260885198925e-07, "logits/chosen": -1.7103252410888672, "logits/rejected": -1.1314291954040527, "logps/chosen": -595.9619750976562, "logps/rejected": -664.2386474609375, "loss": 0.4952, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.961540937423706, "rewards/margins": 1.683045744895935, "rewards/rejected": -4.644586086273193, "step": 4290 }, { "epoch": 0.84, "learning_rate": 3.610924735506274e-07, "logits/chosen": -1.5473103523254395, "logits/rejected": -1.102410078048706, "logps/chosen": -491.91522216796875, "logps/rejected": -658.9654541015625, "loss": 0.5325, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.771312713623047, "rewards/margins": 1.5326588153839111, "rewards/rejected": -4.303971290588379, "step": 4300 }, { "epoch": 0.84, "eval_logits/chosen": 0.8069880604743958, "eval_logits/rejected": 1.3795324563980103, "eval_logps/chosen": -551.7904663085938, "eval_logps/rejected": -629.420166015625, "eval_loss": 0.4881390929222107, "eval_rewards/accuracies": 0.726190447807312, "eval_rewards/chosen": -2.885497570037842, "eval_rewards/margins": 1.0775203704833984, "eval_rewards/rejected": -3.9630184173583984, "eval_runtime": 468.0535, "eval_samples_per_second": 4.273, "eval_steps_per_second": 0.179, "step": 4300 }, { "epoch": 0.85, "learning_rate": 3.5227280002174626e-07, "logits/chosen": -1.4590882062911987, "logits/rejected": -1.0051017999649048, "logps/chosen": -617.4918823242188, "logps/rejected": -654.9302978515625, "loss": 0.5026, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.8475120067596436, "rewards/margins": 1.0741074085235596, "rewards/rejected": -3.9216198921203613, "step": 4310 }, { "epoch": 0.85, "learning_rate": 3.4355400251421977e-07, "logits/chosen": -1.5790798664093018, "logits/rejected": -1.4507157802581787, "logps/chosen": -529.469970703125, "logps/rejected": -670.0650634765625, "loss": 0.4777, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.685770273208618, "rewards/margins": 1.1587498188018799, "rewards/rejected": -3.844520092010498, "step": 4320 }, { "epoch": 0.85, "learning_rate": 3.3493649053890325e-07, "logits/chosen": -1.507348656654358, "logits/rejected": -1.4271577596664429, "logps/chosen": -546.6571655273438, "logps/rejected": -621.9240112304688, "loss": 0.4806, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.7799060344696045, "rewards/margins": 0.9522191882133484, "rewards/rejected": -3.732125759124756, "step": 4330 }, { "epoch": 0.85, "learning_rate": 3.2642066884940064e-07, "logits/chosen": -1.4219774007797241, "logits/rejected": -1.054166316986084, "logps/chosen": -609.1405639648438, "logps/rejected": -705.8851928710938, "loss": 0.6196, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.914445161819458, "rewards/margins": 1.4480302333831787, "rewards/rejected": -4.362475395202637, "step": 4340 }, { "epoch": 0.85, "learning_rate": 3.1800693742305074e-07, "logits/chosen": -1.2960752248764038, "logits/rejected": -1.182204008102417, "logps/chosen": -497.4002990722656, "logps/rejected": -645.9159545898438, "loss": 0.4492, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.8490395545959473, "rewards/margins": 1.3345952033996582, "rewards/rejected": -4.1836347579956055, "step": 4350 }, { "epoch": 0.86, "learning_rate": 3.0969569144214147e-07, "logits/chosen": -1.3932464122772217, "logits/rejected": -1.5933971405029297, "logps/chosen": -510.855712890625, "logps/rejected": -639.6701049804688, "loss": 0.5679, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.6335511207580566, "rewards/margins": 1.0062687397003174, "rewards/rejected": -3.639819383621216, "step": 4360 }, { "epoch": 0.86, "learning_rate": 3.014873212753516e-07, "logits/chosen": -1.4604628086090088, "logits/rejected": -1.1423250436782837, "logps/chosen": -519.5737915039062, "logps/rejected": -592.70458984375, "loss": 0.4433, "rewards/accuracies": 0.625, "rewards/chosen": -3.1641898155212402, "rewards/margins": 0.8947893381118774, "rewards/rejected": -4.0589799880981445, "step": 4370 }, { "epoch": 0.86, "learning_rate": 2.933822124594124e-07, "logits/chosen": -1.4781441688537598, "logits/rejected": -1.0364634990692139, "logps/chosen": -474.93829345703125, "logps/rejected": -620.2733154296875, "loss": 0.4233, "rewards/accuracies": 0.875, "rewards/chosen": -2.6123297214508057, "rewards/margins": 1.690799355506897, "rewards/rejected": -4.303129196166992, "step": 4380 }, { "epoch": 0.86, "learning_rate": 2.8538074568099954e-07, "logits/chosen": -1.3508070707321167, "logits/rejected": -0.8607912063598633, "logps/chosen": -572.3214721679688, "logps/rejected": -780.388671875, "loss": 0.5175, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -3.1330060958862305, "rewards/margins": 1.9722713232040405, "rewards/rejected": -5.1052775382995605, "step": 4390 }, { "epoch": 0.86, "learning_rate": 2.774832967588556e-07, "logits/chosen": -1.3934319019317627, "logits/rejected": -1.3498982191085815, "logps/chosen": -520.5974731445312, "logps/rejected": -616.978271484375, "loss": 0.532, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.889716625213623, "rewards/margins": 0.6850132942199707, "rewards/rejected": -3.5747299194335938, "step": 4400 }, { "epoch": 0.86, "eval_logits/chosen": 0.7732266187667847, "eval_logits/rejected": 1.3435120582580566, "eval_logps/chosen": -549.9609985351562, "eval_logps/rejected": -627.1785278320312, "eval_loss": 0.48813939094543457, "eval_rewards/accuracies": 0.7276785969734192, "eval_rewards/chosen": -2.8672022819519043, "eval_rewards/margins": 1.073399543762207, "eval_rewards/rejected": -3.9406018257141113, "eval_runtime": 470.3533, "eval_samples_per_second": 4.252, "eval_steps_per_second": 0.179, "step": 4400 }, { "epoch": 0.87, "learning_rate": 2.6969023662613473e-07, "logits/chosen": -1.2395694255828857, "logits/rejected": -1.2353589534759521, "logps/chosen": -604.3405151367188, "logps/rejected": -702.7529296875, "loss": 0.4027, "rewards/accuracies": 0.75, "rewards/chosen": -3.2787222862243652, "rewards/margins": 1.1157318353652954, "rewards/rejected": -4.394454002380371, "step": 4410 }, { "epoch": 0.87, "learning_rate": 2.6200193131298376e-07, "logits/chosen": -1.449593186378479, "logits/rejected": -1.4640024900436401, "logps/chosen": -543.8770751953125, "logps/rejected": -664.721923828125, "loss": 0.4854, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.8391997814178467, "rewards/margins": 1.157238245010376, "rewards/rejected": -3.9964377880096436, "step": 4420 }, { "epoch": 0.87, "learning_rate": 2.544187419293462e-07, "logits/chosen": -1.5657931566238403, "logits/rejected": -1.2472331523895264, "logps/chosen": -567.7420654296875, "logps/rejected": -664.3908081054688, "loss": 0.5379, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -3.110095500946045, "rewards/margins": 1.1092383861541748, "rewards/rejected": -4.219334125518799, "step": 4430 }, { "epoch": 0.87, "learning_rate": 2.469410246480067e-07, "logits/chosen": -1.2664387226104736, "logits/rejected": -1.014687180519104, "logps/chosen": -577.5288696289062, "logps/rejected": -627.8881225585938, "loss": 0.505, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.1275877952575684, "rewards/margins": 1.2311753034591675, "rewards/rejected": -4.358763217926025, "step": 4440 }, { "epoch": 0.87, "learning_rate": 2.3956913068785697e-07, "logits/chosen": -1.8588111400604248, "logits/rejected": -1.5784744024276733, "logps/chosen": -606.2607421875, "logps/rejected": -602.3804931640625, "loss": 0.5029, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.689060926437378, "rewards/margins": 0.9545005559921265, "rewards/rejected": -3.643561601638794, "step": 4450 }, { "epoch": 0.88, "learning_rate": 2.3230340629740166e-07, "logits/chosen": -1.300377607345581, "logits/rejected": -1.225534200668335, "logps/chosen": -591.2781372070312, "logps/rejected": -612.3079223632812, "loss": 0.5624, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.157484769821167, "rewards/margins": 0.8397257924079895, "rewards/rejected": -3.9972102642059326, "step": 4460 }, { "epoch": 0.88, "learning_rate": 2.2514419273849674e-07, "logits/chosen": -1.5014206171035767, "logits/rejected": -1.5819671154022217, "logps/chosen": -518.7572021484375, "logps/rejected": -617.6094360351562, "loss": 0.5182, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.928231954574585, "rewards/margins": 0.8245126605033875, "rewards/rejected": -3.752744197845459, "step": 4470 }, { "epoch": 0.88, "learning_rate": 2.1809182627031883e-07, "logits/chosen": -1.4660804271697998, "logits/rejected": -1.550597071647644, "logps/chosen": -489.97802734375, "logps/rejected": -606.5343017578125, "loss": 0.4504, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.7580807209014893, "rewards/margins": 1.096167802810669, "rewards/rejected": -3.854248523712158, "step": 4480 }, { "epoch": 0.88, "learning_rate": 2.111466381335714e-07, "logits/chosen": -1.643610954284668, "logits/rejected": -1.324254035949707, "logps/chosen": -560.3173828125, "logps/rejected": -619.2115478515625, "loss": 0.5088, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.0471415519714355, "rewards/margins": 1.023773193359375, "rewards/rejected": -4.070915222167969, "step": 4490 }, { "epoch": 0.88, "learning_rate": 2.0430895453492944e-07, "logits/chosen": -1.7354183197021484, "logits/rejected": -1.5083003044128418, "logps/chosen": -547.43798828125, "logps/rejected": -576.88330078125, "loss": 0.4558, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.6389975547790527, "rewards/margins": 0.869195818901062, "rewards/rejected": -3.5081934928894043, "step": 4500 }, { "epoch": 0.88, "eval_logits/chosen": 0.7711246013641357, "eval_logits/rejected": 1.3411438465118408, "eval_logps/chosen": -548.8391723632812, "eval_logps/rejected": -625.7067260742188, "eval_loss": 0.4879080057144165, "eval_rewards/accuracies": 0.726190447807312, "eval_rewards/chosen": -2.855985403060913, "eval_rewards/margins": 1.0698983669281006, "eval_rewards/rejected": -3.9258840084075928, "eval_runtime": 468.3034, "eval_samples_per_second": 4.271, "eval_steps_per_second": 0.179, "step": 4500 }, { "epoch": 0.89, "learning_rate": 1.9757909663171508e-07, "logits/chosen": -1.497092604637146, "logits/rejected": -1.3735014200210571, "logps/chosen": -544.7706909179688, "logps/rejected": -599.2404174804688, "loss": 0.4727, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.603917360305786, "rewards/margins": 1.5174095630645752, "rewards/rejected": -4.121326446533203, "step": 4510 }, { "epoch": 0.89, "learning_rate": 1.9095738051681412e-07, "logits/chosen": -1.1327025890350342, "logits/rejected": -1.2740120887756348, "logps/chosen": -515.599609375, "logps/rejected": -598.8855590820312, "loss": 0.5864, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -3.005495309829712, "rewards/margins": 0.9244306683540344, "rewards/rejected": -3.9299259185791016, "step": 4520 }, { "epoch": 0.89, "learning_rate": 1.844441172038311e-07, "logits/chosen": -1.7061221599578857, "logits/rejected": -1.4002907276153564, "logps/chosen": -560.3880615234375, "logps/rejected": -678.8825073242188, "loss": 0.3871, "rewards/accuracies": 0.75, "rewards/chosen": -2.8229405879974365, "rewards/margins": 1.5331408977508545, "rewards/rejected": -4.356081485748291, "step": 4530 }, { "epoch": 0.89, "learning_rate": 1.7803961261247864e-07, "logits/chosen": -1.7175147533416748, "logits/rejected": -1.5015738010406494, "logps/chosen": -564.5352783203125, "logps/rejected": -642.4002075195312, "loss": 0.5053, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.874131441116333, "rewards/margins": 1.224721908569336, "rewards/rejected": -4.09885311126709, "step": 4540 }, { "epoch": 0.89, "learning_rate": 1.717441675542106e-07, "logits/chosen": -1.5754218101501465, "logits/rejected": -1.1890827417373657, "logps/chosen": -534.8074340820312, "logps/rejected": -595.2333374023438, "loss": 0.5414, "rewards/accuracies": 0.75, "rewards/chosen": -2.6678903102874756, "rewards/margins": 1.0343869924545288, "rewards/rejected": -3.7022769451141357, "step": 4550 }, { "epoch": 0.9, "learning_rate": 1.6555807771809375e-07, "logits/chosen": -1.4952316284179688, "logits/rejected": -1.4921852350234985, "logps/chosen": -543.2953491210938, "logps/rejected": -644.8867797851562, "loss": 0.449, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.997863531112671, "rewards/margins": 0.9223406910896301, "rewards/rejected": -3.9202046394348145, "step": 4560 }, { "epoch": 0.9, "learning_rate": 1.5948163365691798e-07, "logits/chosen": -1.542854905128479, "logits/rejected": -1.1436667442321777, "logps/chosen": -567.2286376953125, "logps/rejected": -647.6072387695312, "loss": 0.4908, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.061537027359009, "rewards/margins": 0.9950931668281555, "rewards/rejected": -4.0566301345825195, "step": 4570 }, { "epoch": 0.9, "learning_rate": 1.5351512077355024e-07, "logits/chosen": -1.57053542137146, "logits/rejected": -1.3311067819595337, "logps/chosen": -661.517578125, "logps/rejected": -673.8138427734375, "loss": 0.4326, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.0921597480773926, "rewards/margins": 1.1435569524765015, "rewards/rejected": -4.235716819763184, "step": 4580 }, { "epoch": 0.9, "learning_rate": 1.4765881930752983e-07, "logits/chosen": -1.4779008626937866, "logits/rejected": -1.4968267679214478, "logps/chosen": -565.7744750976562, "logps/rejected": -658.5062255859375, "loss": 0.491, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -3.344609498977661, "rewards/margins": 0.8472648859024048, "rewards/rejected": -4.1918745040893555, "step": 4590 }, { "epoch": 0.9, "learning_rate": 1.4191300432190634e-07, "logits/chosen": -1.478987455368042, "logits/rejected": -1.0707805156707764, "logps/chosen": -554.4490966796875, "logps/rejected": -645.3145751953125, "loss": 0.5541, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.8828506469726562, "rewards/margins": 1.1082347631454468, "rewards/rejected": -3.9910855293273926, "step": 4600 }, { "epoch": 0.9, "eval_logits/chosen": 0.7729079127311707, "eval_logits/rejected": 1.342787742614746, "eval_logps/chosen": -549.2481079101562, "eval_logps/rejected": -626.0703735351562, "eval_loss": 0.4881538152694702, "eval_rewards/accuracies": 0.726190447807312, "eval_rewards/chosen": -2.8600735664367676, "eval_rewards/margins": 1.0694462060928345, "eval_rewards/rejected": -3.9295201301574707, "eval_runtime": 470.2505, "eval_samples_per_second": 4.253, "eval_steps_per_second": 0.179, "step": 4600 }, { "epoch": 0.9, "learning_rate": 1.362779456903182e-07, "logits/chosen": -1.3034653663635254, "logits/rejected": -1.1462644338607788, "logps/chosen": -512.8648681640625, "logps/rejected": -576.4937133789062, "loss": 0.4791, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.9724960327148438, "rewards/margins": 0.8336421251296997, "rewards/rejected": -3.806138277053833, "step": 4610 }, { "epoch": 0.91, "learning_rate": 1.3075390808431897e-07, "logits/chosen": -1.6049964427947998, "logits/rejected": -1.38681960105896, "logps/chosen": -504.82086181640625, "logps/rejected": -620.4871826171875, "loss": 0.4517, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.6851613521575928, "rewards/margins": 1.6643813848495483, "rewards/rejected": -4.34954309463501, "step": 4620 }, { "epoch": 0.91, "learning_rate": 1.253411509609459e-07, "logits/chosen": -1.4405021667480469, "logits/rejected": -1.1606242656707764, "logps/chosen": -493.46160888671875, "logps/rejected": -642.3789672851562, "loss": 0.534, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.7728381156921387, "rewards/margins": 1.5827041864395142, "rewards/rejected": -4.355542182922363, "step": 4630 }, { "epoch": 0.91, "learning_rate": 1.2003992855053326e-07, "logits/chosen": -1.223035454750061, "logits/rejected": -0.8624464273452759, "logps/chosen": -497.0065002441406, "logps/rejected": -650.3905639648438, "loss": 0.44, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.5264394283294678, "rewards/margins": 1.526515245437622, "rewards/rejected": -4.05295467376709, "step": 4640 }, { "epoch": 0.91, "learning_rate": 1.1485048984476998e-07, "logits/chosen": -1.4365018606185913, "logits/rejected": -1.4451005458831787, "logps/chosen": -546.9738159179688, "logps/rejected": -685.6676025390625, "loss": 0.4365, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.7223660945892334, "rewards/margins": 1.5035502910614014, "rewards/rejected": -4.225916385650635, "step": 4650 }, { "epoch": 0.91, "learning_rate": 1.0977307858500818e-07, "logits/chosen": -1.23599112033844, "logits/rejected": -1.4569393396377563, "logps/chosen": -502.11016845703125, "logps/rejected": -630.4044189453125, "loss": 0.4859, "rewards/accuracies": 0.75, "rewards/chosen": -3.0557899475097656, "rewards/margins": 1.1950440406799316, "rewards/rejected": -4.250833511352539, "step": 4660 }, { "epoch": 0.92, "learning_rate": 1.0480793325081174e-07, "logits/chosen": -1.729148507118225, "logits/rejected": -1.2992174625396729, "logps/chosen": -502.6327209472656, "logps/rejected": -604.9547119140625, "loss": 0.4806, "rewards/accuracies": 0.875, "rewards/chosen": -2.1948771476745605, "rewards/margins": 1.5025956630706787, "rewards/rejected": -3.6974730491638184, "step": 4670 }, { "epoch": 0.92, "learning_rate": 9.995528704875635e-08, "logits/chosen": -1.5218055248260498, "logits/rejected": -1.5403430461883545, "logps/chosen": -569.9359130859375, "logps/rejected": -700.4747314453125, "loss": 0.5107, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.7318825721740723, "rewards/margins": 1.2422831058502197, "rewards/rejected": -3.974165678024292, "step": 4680 }, { "epoch": 0.92, "learning_rate": 9.521536790147722e-08, "logits/chosen": -1.601231336593628, "logits/rejected": -1.397159457206726, "logps/chosen": -591.1676025390625, "logps/rejected": -694.2679443359375, "loss": 0.4968, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.1166365146636963, "rewards/margins": 1.142465353012085, "rewards/rejected": -4.259101867675781, "step": 4690 }, { "epoch": 0.92, "learning_rate": 9.058839843696237e-08, "logits/chosen": -1.6953855752944946, "logits/rejected": -1.4888617992401123, "logps/chosen": -580.9130859375, "logps/rejected": -659.9354248046875, "loss": 0.5743, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.9675400257110596, "rewards/margins": 1.20027494430542, "rewards/rejected": -4.1678147315979, "step": 4700 }, { "epoch": 0.92, "eval_logits/chosen": 0.7755272388458252, "eval_logits/rejected": 1.3445079326629639, "eval_logps/chosen": -549.6526489257812, "eval_logps/rejected": -626.5550537109375, "eval_loss": 0.4878641664981842, "eval_rewards/accuracies": 0.726190447807312, "eval_rewards/chosen": -2.8641197681427, "eval_rewards/margins": 1.0702478885650635, "eval_rewards/rejected": -3.9343676567077637, "eval_runtime": 471.0127, "eval_samples_per_second": 4.246, "eval_steps_per_second": 0.178, "step": 4700 }, { "epoch": 0.92, "learning_rate": 8.607459597809565e-08, "logits/chosen": -1.82345712184906, "logits/rejected": -1.6927080154418945, "logps/chosen": -487.48687744140625, "logps/rejected": -665.341796875, "loss": 0.429, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.701889991760254, "rewards/margins": 1.1798779964447021, "rewards/rejected": -3.881767749786377, "step": 4710 }, { "epoch": 0.93, "learning_rate": 8.167417253245213e-08, "logits/chosen": -1.5453943014144897, "logits/rejected": -1.4755821228027344, "logps/chosen": -578.2698974609375, "logps/rejected": -590.1826171875, "loss": 0.5955, "rewards/accuracies": 0.625, "rewards/chosen": -2.737913131713867, "rewards/margins": 0.9511528015136719, "rewards/rejected": -3.6890664100646973, "step": 4720 }, { "epoch": 0.93, "learning_rate": 7.738733478233673e-08, "logits/chosen": -1.5097318887710571, "logits/rejected": -1.0241183042526245, "logps/chosen": -550.3403930664062, "logps/rejected": -640.8431396484375, "loss": 0.5177, "rewards/accuracies": 0.75, "rewards/chosen": -2.93200945854187, "rewards/margins": 1.2537683248519897, "rewards/rejected": -4.185778617858887, "step": 4730 }, { "epoch": 0.93, "learning_rate": 7.32142840750788e-08, "logits/chosen": -1.4939231872558594, "logits/rejected": -1.254494309425354, "logps/chosen": -608.97119140625, "logps/rejected": -694.7242431640625, "loss": 0.4677, "rewards/accuracies": 0.625, "rewards/chosen": -3.08970308303833, "rewards/margins": 0.9346927404403687, "rewards/rejected": -4.024395942687988, "step": 4740 }, { "epoch": 0.93, "learning_rate": 6.915521641357504e-08, "logits/chosen": -1.3975909948349, "logits/rejected": -1.4619452953338623, "logps/chosen": -559.6480712890625, "logps/rejected": -671.4445190429688, "loss": 0.4622, "rewards/accuracies": 0.75, "rewards/chosen": -2.970442295074463, "rewards/margins": 1.1618378162384033, "rewards/rejected": -4.132280349731445, "step": 4750 }, { "epoch": 0.93, "learning_rate": 6.521032244708375e-08, "logits/chosen": -1.5189892053604126, "logits/rejected": -1.4619439840316772, "logps/chosen": -530.2403564453125, "logps/rejected": -606.9569091796875, "loss": 0.5584, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.9585671424865723, "rewards/margins": 0.770112156867981, "rewards/rejected": -3.728679656982422, "step": 4760 }, { "epoch": 0.94, "learning_rate": 6.137978746226848e-08, "logits/chosen": -1.2548673152923584, "logits/rejected": -1.4840552806854248, "logps/chosen": -565.2719116210938, "logps/rejected": -694.0643920898438, "loss": 0.4721, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.204113721847534, "rewards/margins": 1.1963703632354736, "rewards/rejected": -4.400484085083008, "step": 4770 }, { "epoch": 0.94, "learning_rate": 5.766379137449624e-08, "logits/chosen": -1.4481416940689087, "logits/rejected": -1.0816242694854736, "logps/chosen": -640.5113525390625, "logps/rejected": -766.0667724609375, "loss": 0.4272, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.9921982288360596, "rewards/margins": 1.9512176513671875, "rewards/rejected": -4.943415641784668, "step": 4780 }, { "epoch": 0.94, "learning_rate": 5.406250871938912e-08, "logits/chosen": -1.626961350440979, "logits/rejected": -1.5448499917984009, "logps/chosen": -551.1680908203125, "logps/rejected": -574.9620971679688, "loss": 0.5034, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.941655397415161, "rewards/margins": 0.6056486964225769, "rewards/rejected": -3.547304153442383, "step": 4790 }, { "epoch": 0.94, "learning_rate": 5.0576108644623536e-08, "logits/chosen": -1.4706220626831055, "logits/rejected": -1.495298981666565, "logps/chosen": -528.7744140625, "logps/rejected": -635.4783935546875, "loss": 0.4657, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.6470999717712402, "rewards/margins": 1.1267454624176025, "rewards/rejected": -3.7738451957702637, "step": 4800 }, { "epoch": 0.94, "eval_logits/chosen": 0.7749411463737488, "eval_logits/rejected": 1.3436989784240723, "eval_logps/chosen": -549.4993286132812, "eval_logps/rejected": -626.3385620117188, "eval_loss": 0.4880455732345581, "eval_rewards/accuracies": 0.7291666865348816, "eval_rewards/chosen": -2.8625855445861816, "eval_rewards/margins": 1.069616436958313, "eval_rewards/rejected": -3.9322023391723633, "eval_runtime": 471.6194, "eval_samples_per_second": 4.241, "eval_steps_per_second": 0.178, "step": 4800 }, { "epoch": 0.94, "learning_rate": 4.720475490198634e-08, "logits/chosen": -1.3117965459823608, "logits/rejected": -1.2122046947479248, "logps/chosen": -583.239990234375, "logps/rejected": -767.133544921875, "loss": 0.446, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -3.2251923084259033, "rewards/margins": 1.360032558441162, "rewards/rejected": -4.5852251052856445, "step": 4810 }, { "epoch": 0.95, "learning_rate": 4.394860583968624e-08, "logits/chosen": -1.5553256273269653, "logits/rejected": -1.46771240234375, "logps/chosen": -525.3215942382812, "logps/rejected": -673.70849609375, "loss": 0.4399, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.5879640579223633, "rewards/margins": 1.598061203956604, "rewards/rejected": -4.186025142669678, "step": 4820 }, { "epoch": 0.95, "learning_rate": 4.0807814394911996e-08, "logits/chosen": -1.505833387374878, "logits/rejected": -1.222219467163086, "logps/chosen": -514.3837280273438, "logps/rejected": -615.126220703125, "loss": 0.524, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.7017548084259033, "rewards/margins": 1.5507588386535645, "rewards/rejected": -4.252513408660889, "step": 4830 }, { "epoch": 0.95, "learning_rate": 3.778252808665284e-08, "logits/chosen": -1.4378811120986938, "logits/rejected": -1.2144570350646973, "logps/chosen": -524.7705078125, "logps/rejected": -648.2017822265625, "loss": 0.4667, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.9504950046539307, "rewards/margins": 1.5240049362182617, "rewards/rejected": -4.474499702453613, "step": 4840 }, { "epoch": 0.95, "learning_rate": 3.4872889008767954e-08, "logits/chosen": -1.5345133543014526, "logits/rejected": -1.3139573335647583, "logps/chosen": -548.332275390625, "logps/rejected": -588.4432373046875, "loss": 0.5319, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.756354808807373, "rewards/margins": 0.9404181241989136, "rewards/rejected": -3.696773052215576, "step": 4850 }, { "epoch": 0.95, "learning_rate": 3.207903382331262e-08, "logits/chosen": -1.3094028234481812, "logits/rejected": -0.9478843808174133, "logps/chosen": -526.9425659179688, "logps/rejected": -672.6736450195312, "loss": 0.4164, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.8847177028656006, "rewards/margins": 1.873437523841858, "rewards/rejected": -4.758155345916748, "step": 4860 }, { "epoch": 0.96, "learning_rate": 2.940109375411976e-08, "logits/chosen": -1.6699939966201782, "logits/rejected": -1.1226723194122314, "logps/chosen": -588.523193359375, "logps/rejected": -711.6766357421875, "loss": 0.5051, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.0758986473083496, "rewards/margins": 1.7069766521453857, "rewards/rejected": -4.782876014709473, "step": 4870 }, { "epoch": 0.96, "learning_rate": 2.683919458063705e-08, "logits/chosen": -1.4859200716018677, "logits/rejected": -1.0420989990234375, "logps/chosen": -515.8519897460938, "logps/rejected": -623.1054077148438, "loss": 0.4612, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.6491692066192627, "rewards/margins": 1.424944519996643, "rewards/rejected": -4.074113368988037, "step": 4880 }, { "epoch": 0.96, "learning_rate": 2.4393456632016977e-08, "logits/chosen": -1.6544864177703857, "logits/rejected": -1.0563756227493286, "logps/chosen": -636.07568359375, "logps/rejected": -661.3756103515625, "loss": 0.4991, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -3.177685260772705, "rewards/margins": 1.1920883655548096, "rewards/rejected": -4.369773864746094, "step": 4890 }, { "epoch": 0.96, "learning_rate": 2.2063994781468256e-08, "logits/chosen": -1.73834228515625, "logits/rejected": -1.2774205207824707, "logps/chosen": -620.8421020507812, "logps/rejected": -687.5242919921875, "loss": 0.5126, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.0707154273986816, "rewards/margins": 1.0745410919189453, "rewards/rejected": -4.145256519317627, "step": 4900 }, { "epoch": 0.96, "eval_logits/chosen": 0.7748141288757324, "eval_logits/rejected": 1.3439706563949585, "eval_logps/chosen": -549.604248046875, "eval_logps/rejected": -626.5125732421875, "eval_loss": 0.4879511594772339, "eval_rewards/accuracies": 0.7276785969734192, "eval_rewards/chosen": -2.863635540008545, "eval_rewards/margins": 1.0703070163726807, "eval_rewards/rejected": -3.9339427947998047, "eval_runtime": 474.6902, "eval_samples_per_second": 4.213, "eval_steps_per_second": 0.177, "step": 4900 }, { "epoch": 0.96, "learning_rate": 1.985091844085796e-08, "logits/chosen": -1.444320559501648, "logits/rejected": -1.05115807056427, "logps/chosen": -531.5235595703125, "logps/rejected": -714.3778076171875, "loss": 0.4811, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.1124281883239746, "rewards/margins": 1.223809003829956, "rewards/rejected": -4.33623743057251, "step": 4910 }, { "epoch": 0.97, "learning_rate": 1.7754331555573656e-08, "logits/chosen": -1.3569526672363281, "logits/rejected": -1.4361093044281006, "logps/chosen": -560.3422241210938, "logps/rejected": -637.2498168945312, "loss": 0.4647, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.958449602127075, "rewards/margins": 1.0478699207305908, "rewards/rejected": -4.006319046020508, "step": 4920 }, { "epoch": 0.97, "learning_rate": 1.5774332599641228e-08, "logits/chosen": -1.3948105573654175, "logits/rejected": -1.3578966856002808, "logps/chosen": -559.6775512695312, "logps/rejected": -650.9224243164062, "loss": 0.5038, "rewards/accuracies": 0.75, "rewards/chosen": -3.05662202835083, "rewards/margins": 0.9975637197494507, "rewards/rejected": -4.0541863441467285, "step": 4930 }, { "epoch": 0.97, "learning_rate": 1.3911014571098835e-08, "logits/chosen": -1.3849412202835083, "logits/rejected": -1.3447059392929077, "logps/chosen": -466.18218994140625, "logps/rejected": -612.2434692382812, "loss": 0.5102, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.5744361877441406, "rewards/margins": 1.5182634592056274, "rewards/rejected": -4.0926995277404785, "step": 4940 }, { "epoch": 0.97, "learning_rate": 1.2164464987630131e-08, "logits/chosen": -1.3624558448791504, "logits/rejected": -1.4019831418991089, "logps/chosen": -501.2599182128906, "logps/rejected": -611.615966796875, "loss": 0.4275, "rewards/accuracies": 0.75, "rewards/chosen": -2.9041037559509277, "rewards/margins": 0.9574500918388367, "rewards/rejected": -3.86155366897583, "step": 4950 }, { "epoch": 0.97, "learning_rate": 1.0534765882453113e-08, "logits/chosen": -1.3500089645385742, "logits/rejected": -1.2703732252120972, "logps/chosen": -562.4613037109375, "logps/rejected": -663.7322387695312, "loss": 0.4457, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.9094297885894775, "rewards/margins": 1.388083577156067, "rewards/rejected": -4.297513008117676, "step": 4960 }, { "epoch": 0.98, "learning_rate": 9.021993800466256e-09, "logits/chosen": -1.2833272218704224, "logits/rejected": -1.3957890272140503, "logps/chosen": -558.2420654296875, "logps/rejected": -692.7595825195312, "loss": 0.5062, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.962353467941284, "rewards/margins": 1.226319670677185, "rewards/rejected": -4.188673496246338, "step": 4970 }, { "epoch": 0.98, "learning_rate": 7.626219794655553e-09, "logits/chosen": -1.369972825050354, "logits/rejected": -1.2614896297454834, "logps/chosen": -567.4708251953125, "logps/rejected": -585.6512451171875, "loss": 0.5181, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.9253811836242676, "rewards/margins": 0.8255603909492493, "rewards/rejected": -3.750941514968872, "step": 4980 }, { "epoch": 0.98, "learning_rate": 6.347509422754139e-09, "logits/chosen": -1.561457633972168, "logits/rejected": -1.3234002590179443, "logps/chosen": -571.8287353515625, "logps/rejected": -671.9757690429688, "loss": 0.5035, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.9173166751861572, "rewards/margins": 1.3114066123962402, "rewards/rejected": -4.228723049163818, "step": 4990 }, { "epoch": 0.98, "learning_rate": 5.185922744166128e-09, "logits/chosen": -1.435817003250122, "logits/rejected": -1.3162662982940674, "logps/chosen": -520.7555541992188, "logps/rejected": -686.9046630859375, "loss": 0.3967, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.884249448776245, "rewards/margins": 1.8164691925048828, "rewards/rejected": -4.700718879699707, "step": 5000 }, { "epoch": 0.98, "eval_logits/chosen": 0.7736020088195801, "eval_logits/rejected": 1.342397928237915, "eval_logps/chosen": -549.6658325195312, "eval_logps/rejected": -626.5614013671875, "eval_loss": 0.48798322677612305, "eval_rewards/accuracies": 0.726190447807312, "eval_rewards/chosen": -2.864250898361206, "eval_rewards/margins": 1.0701801776885986, "eval_rewards/rejected": -3.9344310760498047, "eval_runtime": 476.236, "eval_samples_per_second": 4.2, "eval_steps_per_second": 0.176, "step": 5000 }, { "epoch": 0.98, "learning_rate": 4.1415143171436026e-09, "logits/chosen": -1.745918869972229, "logits/rejected": -1.5587217807769775, "logps/chosen": -586.3348388671875, "logps/rejected": -672.9236450195312, "loss": 0.4885, "rewards/accuracies": 0.75, "rewards/chosen": -2.848877429962158, "rewards/margins": 1.149807333946228, "rewards/rejected": -3.998685121536255, "step": 5010 }, { "epoch": 0.99, "learning_rate": 3.2143331962256053e-09, "logits/chosen": -1.6162992715835571, "logits/rejected": -1.0427744388580322, "logps/chosen": -599.6419677734375, "logps/rejected": -640.3694458007812, "loss": 0.3889, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.6251840591430664, "rewards/margins": 1.4099795818328857, "rewards/rejected": -4.035163879394531, "step": 5020 }, { "epoch": 0.99, "learning_rate": 2.404422929932204e-09, "logits/chosen": -1.2071707248687744, "logits/rejected": -0.8376060724258423, "logps/chosen": -549.0296630859375, "logps/rejected": -699.3610229492188, "loss": 0.5418, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.2370543479919434, "rewards/margins": 1.4369432926177979, "rewards/rejected": -4.673997402191162, "step": 5030 }, { "epoch": 0.99, "learning_rate": 1.711821558721405e-09, "logits/chosen": -1.4277589321136475, "logits/rejected": -1.4227768182754517, "logps/chosen": -532.2887573242188, "logps/rejected": -689.0106201171875, "loss": 0.5386, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.7164626121520996, "rewards/margins": 1.2939804792404175, "rewards/rejected": -4.010442733764648, "step": 5040 }, { "epoch": 0.99, "learning_rate": 1.1365616132008595e-09, "logits/chosen": -1.5623667240142822, "logits/rejected": -1.4521827697753906, "logps/chosen": -545.3251953125, "logps/rejected": -688.9048461914062, "loss": 0.5253, "rewards/accuracies": 0.75, "rewards/chosen": -3.058255672454834, "rewards/margins": 1.6055889129638672, "rewards/rejected": -4.663844585418701, "step": 5050 }, { "epoch": 0.99, "learning_rate": 6.786701125999218e-10, "logits/chosen": -1.2991487979888916, "logits/rejected": -1.0468859672546387, "logps/chosen": -624.85302734375, "logps/rejected": -665.943115234375, "loss": 0.4871, "rewards/accuracies": 0.75, "rewards/chosen": -3.1303889751434326, "rewards/margins": 0.9218120574951172, "rewards/rejected": -4.052201271057129, "step": 5060 }, { "epoch": 1.0, "learning_rate": 3.3816856350177284e-10, "logits/chosen": -1.112547755241394, "logits/rejected": -1.2372267246246338, "logps/chosen": -536.0859985351562, "logps/rejected": -682.0577392578125, "loss": 0.526, "rewards/accuracies": 0.75, "rewards/chosen": -3.1959328651428223, "rewards/margins": 1.218519926071167, "rewards/rejected": -4.41445255279541, "step": 5070 }, { "epoch": 1.0, "learning_rate": 1.1507295883145253e-10, "logits/chosen": -1.5622161626815796, "logits/rejected": -1.194392204284668, "logps/chosen": -590.2625122070312, "logps/rejected": -675.1867065429688, "loss": 0.4111, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.098064422607422, "rewards/margins": 0.7926515340805054, "rewards/rejected": -3.8907153606414795, "step": 5080 }, { "epoch": 1.0, "learning_rate": 9.393777107291614e-12, "logits/chosen": -1.6349897384643555, "logits/rejected": -1.5304569005966187, "logps/chosen": -610.5177612304688, "logps/rejected": -668.1381225585938, "loss": 0.536, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.0565178394317627, "rewards/margins": 0.9504305124282837, "rewards/rejected": -4.006947994232178, "step": 5090 }, { "epoch": 1.0, "step": 5094, "total_flos": 0.0, "train_loss": 0.5211080308048501, "train_runtime": 54683.4674, "train_samples_per_second": 1.118, "train_steps_per_second": 0.093 } ], "logging_steps": 10, "max_steps": 5094, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }