{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9994666666666666, "eval_steps": 100, "global_step": 937, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.319148936170213e-08, "logits/chosen": -0.5045956373214722, "logits/rejected": -0.805889368057251, "logps/chosen": -165.41160583496094, "logps/rejected": -172.8127899169922, "loss": 0.0848, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 5.319148936170213e-07, "logits/chosen": -0.851473867893219, "logits/rejected": -0.8214991092681885, "logps/chosen": -258.1239013671875, "logps/rejected": -255.48716735839844, "loss": 0.0877, "rewards/accuracies": 0.2986111044883728, "rewards/chosen": 0.0002587677154224366, "rewards/margins": 0.00023072944895830005, "rewards/rejected": 2.803823554131668e-05, "step": 10 }, { "epoch": 0.02, "learning_rate": 1.0638297872340427e-06, "logits/chosen": -0.8987849354743958, "logits/rejected": -0.7349363565444946, "logps/chosen": -260.9398193359375, "logps/rejected": -253.32925415039062, "loss": 0.0893, "rewards/accuracies": 0.29374998807907104, "rewards/chosen": -0.000219681765884161, "rewards/margins": -3.2768032269814285e-06, "rewards/rejected": -0.00021640490740537643, "step": 20 }, { "epoch": 0.03, "learning_rate": 1.595744680851064e-06, "logits/chosen": -0.9162956476211548, "logits/rejected": -0.7800331115722656, "logps/chosen": -240.79800415039062, "logps/rejected": -235.59182739257812, "loss": 0.0783, "rewards/accuracies": 0.2874999940395355, "rewards/chosen": -0.00024258208577521145, "rewards/margins": 0.00012204260565340519, "rewards/rejected": -0.0003646246623247862, "step": 30 }, { "epoch": 0.04, "learning_rate": 2.1276595744680853e-06, "logits/chosen": -0.8354488611221313, "logits/rejected": -0.8405616879463196, "logps/chosen": -255.01931762695312, "logps/rejected": -224.09188842773438, "loss": 0.0749, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": 0.0001305304904235527, "rewards/margins": 0.0002960737328976393, "rewards/rejected": -0.0001655431988183409, "step": 40 }, { "epoch": 0.05, "learning_rate": 2.6595744680851065e-06, "logits/chosen": -0.9115155935287476, "logits/rejected": -0.7566107511520386, "logps/chosen": -295.87884521484375, "logps/rejected": -261.06951904296875, "loss": 0.0713, "rewards/accuracies": 0.2874999940395355, "rewards/chosen": -0.0011757513275370002, "rewards/margins": -0.0006419935962185264, "rewards/rejected": -0.0005337577313184738, "step": 50 }, { "epoch": 0.06, "learning_rate": 3.191489361702128e-06, "logits/chosen": -0.9281567335128784, "logits/rejected": -0.8129026293754578, "logps/chosen": -261.63751220703125, "logps/rejected": -261.89483642578125, "loss": 0.0779, "rewards/accuracies": 0.32499998807907104, "rewards/chosen": -0.000994718400761485, "rewards/margins": 0.00043715062201954424, "rewards/rejected": -0.0014318691100925207, "step": 60 }, { "epoch": 0.07, "learning_rate": 3.723404255319149e-06, "logits/chosen": -0.8597652316093445, "logits/rejected": -0.8151811361312866, "logps/chosen": -271.51458740234375, "logps/rejected": -241.4061279296875, "loss": 0.0898, "rewards/accuracies": 0.3062500059604645, "rewards/chosen": -0.0014379310887306929, "rewards/margins": 0.0002869053860194981, "rewards/rejected": -0.0017248367657884955, "step": 70 }, { "epoch": 0.09, "learning_rate": 4.255319148936171e-06, "logits/chosen": -0.8520501255989075, "logits/rejected": -0.811953067779541, "logps/chosen": -311.61431884765625, "logps/rejected": -305.77520751953125, "loss": 0.0795, "rewards/accuracies": 0.33125001192092896, "rewards/chosen": -0.0034166008699685335, "rewards/margins": 0.00010135892080143094, "rewards/rejected": -0.003517959965392947, "step": 80 }, { "epoch": 0.1, "learning_rate": 4.787234042553192e-06, "logits/chosen": -0.9568193554878235, "logits/rejected": -0.8735030293464661, "logps/chosen": -277.09405517578125, "logps/rejected": -237.3052978515625, "loss": 0.0831, "rewards/accuracies": 0.3687500059604645, "rewards/chosen": -0.0037826255429536104, "rewards/margins": 0.0007604987476952374, "rewards/rejected": -0.0045431237667799, "step": 90 }, { "epoch": 0.11, "learning_rate": 4.999375059004058e-06, "logits/chosen": -0.8838983774185181, "logits/rejected": -0.8245723843574524, "logps/chosen": -274.2312927246094, "logps/rejected": -233.004638671875, "loss": 0.0982, "rewards/accuracies": 0.375, "rewards/chosen": -0.004330983851104975, "rewards/margins": 0.0017646064516156912, "rewards/rejected": -0.00609559053555131, "step": 100 }, { "epoch": 0.11, "eval_logits/chosen": -0.8696709871292114, "eval_logits/rejected": -0.7816442847251892, "eval_logps/chosen": -404.4459228515625, "eval_logps/rejected": -377.37725830078125, "eval_loss": 0.05261076241731644, "eval_rewards/accuracies": 0.5189999938011169, "eval_rewards/chosen": -0.008140643127262592, "eval_rewards/margins": 0.0020011626183986664, "eval_rewards/rejected": -0.010141806676983833, "eval_runtime": 545.9504, "eval_samples_per_second": 3.663, "eval_steps_per_second": 0.916, "step": 100 }, { "epoch": 0.12, "learning_rate": 4.9955571065548795e-06, "logits/chosen": -0.9740250706672668, "logits/rejected": -0.8206865191459656, "logps/chosen": -308.79986572265625, "logps/rejected": -279.56817626953125, "loss": 0.0729, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -0.006666774861514568, "rewards/margins": 0.001093443250283599, "rewards/rejected": -0.0077602192759513855, "step": 110 }, { "epoch": 0.13, "learning_rate": 4.9882736864879e-06, "logits/chosen": -0.9458662271499634, "logits/rejected": -0.8622045516967773, "logps/chosen": -269.64190673828125, "logps/rejected": -255.1685028076172, "loss": 0.0678, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": -0.006551130209118128, "rewards/margins": 0.0029696193523705006, "rewards/rejected": -0.009520749561488628, "step": 120 }, { "epoch": 0.14, "learning_rate": 4.977534912960124e-06, "logits/chosen": -1.0262787342071533, "logits/rejected": -0.9416742324829102, "logps/chosen": -228.7926788330078, "logps/rejected": -229.67898559570312, "loss": 0.0929, "rewards/accuracies": 0.32499998807907104, "rewards/chosen": -0.008367964997887611, "rewards/margins": 0.0024298636708408594, "rewards/rejected": -0.010797828435897827, "step": 130 }, { "epoch": 0.15, "learning_rate": 4.963355698422092e-06, "logits/chosen": -0.9834293127059937, "logits/rejected": -0.9608744382858276, "logps/chosen": -244.4986572265625, "logps/rejected": -238.37118530273438, "loss": 0.092, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": -0.010878035798668861, "rewards/margins": 0.003188747214153409, "rewards/rejected": -0.014066783711314201, "step": 140 }, { "epoch": 0.16, "learning_rate": 4.945755732909625e-06, "logits/chosen": -1.0271694660186768, "logits/rejected": -0.8658772706985474, "logps/chosen": -303.09539794921875, "logps/rejected": -265.5880126953125, "loss": 0.0767, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.013803419657051563, "rewards/margins": 0.005266121588647366, "rewards/rejected": -0.01906954124569893, "step": 150 }, { "epoch": 0.17, "learning_rate": 4.924759456701167e-06, "logits/chosen": -1.1086270809173584, "logits/rejected": -1.041982650756836, "logps/chosen": -317.58245849609375, "logps/rejected": -280.8768310546875, "loss": 0.0902, "rewards/accuracies": 0.39375001192092896, "rewards/chosen": -0.021537428721785545, "rewards/margins": 0.0052458057180047035, "rewards/rejected": -0.026783233508467674, "step": 160 }, { "epoch": 0.18, "learning_rate": 4.900396026378671e-06, "logits/chosen": -1.1098581552505493, "logits/rejected": -0.9803470373153687, "logps/chosen": -335.6375427246094, "logps/rejected": -316.95733642578125, "loss": 0.0536, "rewards/accuracies": 0.3812499940395355, "rewards/chosen": -0.026775449514389038, "rewards/margins": 0.008162637241184711, "rewards/rejected": -0.03493808954954147, "step": 170 }, { "epoch": 0.19, "learning_rate": 4.872699274339169e-06, "logits/chosen": -1.0897270441055298, "logits/rejected": -1.001300573348999, "logps/chosen": -296.06353759765625, "logps/rejected": -265.63751220703125, "loss": 0.0746, "rewards/accuracies": 0.3687500059604645, "rewards/chosen": -0.022861812263727188, "rewards/margins": 0.005839685909450054, "rewards/rejected": -0.02870149537920952, "step": 180 }, { "epoch": 0.2, "learning_rate": 4.8417076618132434e-06, "logits/chosen": -1.0900896787643433, "logits/rejected": -1.0583564043045044, "logps/chosen": -266.21160888671875, "logps/rejected": -245.45376586914062, "loss": 0.0942, "rewards/accuracies": 0.2874999940395355, "rewards/chosen": -0.018833670765161514, "rewards/margins": 0.00439481670036912, "rewards/rejected": -0.023228485137224197, "step": 190 }, { "epoch": 0.21, "learning_rate": 4.807464225455655e-06, "logits/chosen": -1.0679035186767578, "logits/rejected": -1.003225564956665, "logps/chosen": -345.30181884765625, "logps/rejected": -323.2543640136719, "loss": 0.0846, "rewards/accuracies": 0.39375001192092896, "rewards/chosen": -0.02550928294658661, "rewards/margins": 0.003808406414464116, "rewards/rejected": -0.029317688196897507, "step": 200 }, { "epoch": 0.21, "eval_logits/chosen": -1.0426863431930542, "eval_logits/rejected": -0.9791997671127319, "eval_logps/chosen": -422.8199462890625, "eval_logps/rejected": -407.46539306640625, "eval_loss": 0.048517368733882904, "eval_rewards/accuracies": 0.5529999732971191, "eval_rewards/chosen": -0.026514720171689987, "eval_rewards/margins": 0.013715260662138462, "eval_rewards/rejected": -0.040229979902505875, "eval_runtime": 545.8919, "eval_samples_per_second": 3.664, "eval_steps_per_second": 0.916, "step": 200 }, { "epoch": 0.22, "learning_rate": 4.770016517582283e-06, "logits/chosen": -1.0943100452423096, "logits/rejected": -1.0767104625701904, "logps/chosen": -325.3826904296875, "logps/rejected": -329.48663330078125, "loss": 0.0676, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": -0.018910493701696396, "rewards/margins": 0.013479876331984997, "rewards/rejected": -0.032390374690294266, "step": 210 }, { "epoch": 0.23, "learning_rate": 4.7294165401363616e-06, "logits/chosen": -1.0735843181610107, "logits/rejected": -1.0250236988067627, "logps/chosen": -313.1111145019531, "logps/rejected": -292.7440490722656, "loss": 0.0745, "rewards/accuracies": 0.3812499940395355, "rewards/chosen": -0.018653307110071182, "rewards/margins": 0.01024434994906187, "rewards/rejected": -0.028897657990455627, "step": 220 }, { "epoch": 0.25, "learning_rate": 4.68572067247573e-06, "logits/chosen": -1.1854560375213623, "logits/rejected": -1.0616133213043213, "logps/chosen": -281.5065612792969, "logps/rejected": -277.0508728027344, "loss": 0.0699, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.02585085853934288, "rewards/margins": 0.01193526666611433, "rewards/rejected": -0.037786126136779785, "step": 230 }, { "epoch": 0.26, "learning_rate": 4.638989593081364e-06, "logits/chosen": -1.1488720178604126, "logits/rejected": -1.0367449522018433, "logps/chosen": -283.6094970703125, "logps/rejected": -252.0610809326172, "loss": 0.0894, "rewards/accuracies": 0.28125, "rewards/chosen": -0.020607244223356247, "rewards/margins": 0.006351941730827093, "rewards/rejected": -0.026959186419844627, "step": 240 }, { "epoch": 0.27, "learning_rate": 4.5892881952959015e-06, "logits/chosen": -1.1442514657974243, "logits/rejected": -1.1018245220184326, "logps/chosen": -303.63458251953125, "logps/rejected": -305.7111511230469, "loss": 0.0878, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.02438071370124817, "rewards/margins": 0.009189085103571415, "rewards/rejected": -0.03356979787349701, "step": 250 }, { "epoch": 0.28, "learning_rate": 4.536685497209182e-06, "logits/chosen": -1.134932041168213, "logits/rejected": -1.106890082359314, "logps/chosen": -300.16815185546875, "logps/rejected": -284.40765380859375, "loss": 0.0921, "rewards/accuracies": 0.3812499940395355, "rewards/chosen": -0.025658372789621353, "rewards/margins": 0.005949888378381729, "rewards/rejected": -0.03160826116800308, "step": 260 }, { "epoch": 0.29, "learning_rate": 4.481254545815943e-06, "logits/chosen": -1.1486608982086182, "logits/rejected": -1.0326900482177734, "logps/chosen": -294.9302673339844, "logps/rejected": -285.3538513183594, "loss": 0.0814, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": -0.022086424753069878, "rewards/margins": 0.010328343138098717, "rewards/rejected": -0.03241477161645889, "step": 270 }, { "epoch": 0.3, "learning_rate": 4.42307231557875e-06, "logits/chosen": -1.121669888496399, "logits/rejected": -1.0683071613311768, "logps/chosen": -310.044189453125, "logps/rejected": -306.346435546875, "loss": 0.0916, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.02136383019387722, "rewards/margins": 0.012828357517719269, "rewards/rejected": -0.03419218957424164, "step": 280 }, { "epoch": 0.31, "learning_rate": 4.3622196015370305e-06, "logits/chosen": -1.1434075832366943, "logits/rejected": -1.1294233798980713, "logps/chosen": -259.6036682128906, "logps/rejected": -267.0892333984375, "loss": 0.0856, "rewards/accuracies": 0.3375000059604645, "rewards/chosen": -0.01935209520161152, "rewards/margins": 0.008529379032552242, "rewards/rejected": -0.027881473302841187, "step": 290 }, { "epoch": 0.32, "learning_rate": 4.298780907110648e-06, "logits/chosen": -1.214237928390503, "logits/rejected": -1.1189312934875488, "logps/chosen": -250.4637451171875, "logps/rejected": -255.03079223632812, "loss": 0.0859, "rewards/accuracies": 0.35624998807907104, "rewards/chosen": -0.014152693562209606, "rewards/margins": 0.011050628498196602, "rewards/rejected": -0.025203322991728783, "step": 300 }, { "epoch": 0.32, "eval_logits/chosen": -1.11543869972229, "eval_logits/rejected": -1.0612365007400513, "eval_logps/chosen": -422.0489807128906, "eval_logps/rejected": -413.28131103515625, "eval_loss": 0.046408262103796005, "eval_rewards/accuracies": 0.5724999904632568, "eval_rewards/chosen": -0.02574371173977852, "eval_rewards/margins": 0.02030220627784729, "eval_rewards/rejected": -0.04604591801762581, "eval_runtime": 546.0923, "eval_samples_per_second": 3.662, "eval_steps_per_second": 0.916, "step": 300 }, { "epoch": 0.33, "learning_rate": 4.23284432675381e-06, "logits/chosen": -1.1313543319702148, "logits/rejected": -1.0724719762802124, "logps/chosen": -262.6627502441406, "logps/rejected": -277.54632568359375, "loss": 0.0916, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -0.019768275320529938, "rewards/margins": 0.014759126119315624, "rewards/rejected": -0.03452740237116814, "step": 310 }, { "epoch": 0.34, "learning_rate": 4.164501423622277e-06, "logits/chosen": -1.246671199798584, "logits/rejected": -1.1658015251159668, "logps/chosen": -264.7757873535156, "logps/rejected": -277.6180114746094, "loss": 0.0774, "rewards/accuracies": 0.33125001192092896, "rewards/chosen": -0.020890336483716965, "rewards/margins": 0.011497320607304573, "rewards/rejected": -0.03238765895366669, "step": 320 }, { "epoch": 0.35, "learning_rate": 4.0938471024237355e-06, "logits/chosen": -1.222037672996521, "logits/rejected": -1.156553030014038, "logps/chosen": -256.74359130859375, "logps/rejected": -277.230224609375, "loss": 0.0805, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -0.015277216210961342, "rewards/margins": 0.02550993300974369, "rewards/rejected": -0.04078715294599533, "step": 330 }, { "epoch": 0.36, "learning_rate": 4.020979477627907e-06, "logits/chosen": -1.1742956638336182, "logits/rejected": -1.1521165370941162, "logps/chosen": -239.5863494873047, "logps/rejected": -247.32522583007812, "loss": 0.1015, "rewards/accuracies": 0.3687500059604645, "rewards/chosen": -0.01359327882528305, "rewards/margins": 0.015490619465708733, "rewards/rejected": -0.029083898290991783, "step": 340 }, { "epoch": 0.37, "learning_rate": 3.9459997372194105e-06, "logits/chosen": -1.1355948448181152, "logits/rejected": -1.0996363162994385, "logps/chosen": -268.62945556640625, "logps/rejected": -278.85870361328125, "loss": 0.0786, "rewards/accuracies": 0.35624998807907104, "rewards/chosen": -0.013594739139080048, "rewards/margins": 0.00844600610435009, "rewards/rejected": -0.02204074338078499, "step": 350 }, { "epoch": 0.38, "learning_rate": 3.869012002182573e-06, "logits/chosen": -1.2170326709747314, "logits/rejected": -1.1180956363677979, "logps/chosen": -265.72393798828125, "logps/rejected": -233.3331298828125, "loss": 0.0879, "rewards/accuracies": 0.3812499940395355, "rewards/chosen": -0.01117833610624075, "rewards/margins": 0.015456246212124825, "rewards/rejected": -0.02663458324968815, "step": 360 }, { "epoch": 0.39, "learning_rate": 3.7901231819133104e-06, "logits/chosen": -1.1366071701049805, "logits/rejected": -1.0916543006896973, "logps/chosen": -266.32037353515625, "logps/rejected": -267.02313232421875, "loss": 0.0726, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -0.01092919148504734, "rewards/margins": 0.01567809283733368, "rewards/rejected": -0.02660728432238102, "step": 370 }, { "epoch": 0.41, "learning_rate": 3.709442825758875e-06, "logits/chosen": -1.089163064956665, "logits/rejected": -1.0779684782028198, "logps/chosen": -259.6034240722656, "logps/rejected": -261.5791015625, "loss": 0.0859, "rewards/accuracies": 0.41874998807907104, "rewards/chosen": -0.008027950301766396, "rewards/margins": 0.02001611702144146, "rewards/rejected": -0.028044065460562706, "step": 380 }, { "epoch": 0.42, "learning_rate": 3.6270829708916113e-06, "logits/chosen": -1.0756354331970215, "logits/rejected": -1.0809965133666992, "logps/chosen": -317.52227783203125, "logps/rejected": -299.58990478515625, "loss": 0.066, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -0.012966620735824108, "rewards/margins": 0.015139798633754253, "rewards/rejected": -0.02810642123222351, "step": 390 }, { "epoch": 0.43, "learning_rate": 3.543157986727991e-06, "logits/chosen": -1.1576625108718872, "logits/rejected": -1.0739920139312744, "logps/chosen": -259.60516357421875, "logps/rejected": -274.40240478515625, "loss": 0.0957, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.006962036248296499, "rewards/margins": 0.03462858498096466, "rewards/rejected": -0.041590623557567596, "step": 400 }, { "epoch": 0.43, "eval_logits/chosen": -1.0983970165252686, "eval_logits/rejected": -1.0450440645217896, "eval_logps/chosen": -417.00225830078125, "eval_logps/rejected": -415.34869384765625, "eval_loss": 0.04426228255033493, "eval_rewards/accuracies": 0.578000009059906, "eval_rewards/chosen": -0.02069696970283985, "eval_rewards/margins": 0.027416307479143143, "eval_rewards/rejected": -0.04811327904462814, "eval_runtime": 545.9497, "eval_samples_per_second": 3.663, "eval_steps_per_second": 0.916, "step": 400 }, { "epoch": 0.44, "learning_rate": 3.4577844161089614e-06, "logits/chosen": -1.0938787460327148, "logits/rejected": -1.0761361122131348, "logps/chosen": -252.89974975585938, "logps/rejected": -279.4630126953125, "loss": 0.0703, "rewards/accuracies": 0.3687500059604645, "rewards/chosen": -0.017448369413614273, "rewards/margins": 0.02003558911383152, "rewards/rejected": -0.03748396039009094, "step": 410 }, { "epoch": 0.45, "learning_rate": 3.3710808134621577e-06, "logits/chosen": -1.1595834493637085, "logits/rejected": -1.126773476600647, "logps/chosen": -298.33538818359375, "logps/rejected": -295.24468994140625, "loss": 0.0549, "rewards/accuracies": 0.41874998807907104, "rewards/chosen": -0.016742903739213943, "rewards/margins": 0.022577572613954544, "rewards/rejected": -0.03932047635316849, "step": 420 }, { "epoch": 0.46, "learning_rate": 3.2831675801707126e-06, "logits/chosen": -1.1363260746002197, "logits/rejected": -1.1643562316894531, "logps/chosen": -259.0823669433594, "logps/rejected": -262.0459899902344, "loss": 0.08, "rewards/accuracies": 0.35624998807907104, "rewards/chosen": -0.01677551493048668, "rewards/margins": 0.012358926236629486, "rewards/rejected": -0.029134441167116165, "step": 430 }, { "epoch": 0.47, "learning_rate": 3.194166797377289e-06, "logits/chosen": -1.0814932584762573, "logits/rejected": -1.0023730993270874, "logps/chosen": -267.9959411621094, "logps/rejected": -241.27685546875, "loss": 0.086, "rewards/accuracies": 0.3125, "rewards/chosen": -0.014996061101555824, "rewards/margins": 0.004354935139417648, "rewards/rejected": -0.019350996240973473, "step": 440 }, { "epoch": 0.48, "learning_rate": 3.104202056455501e-06, "logits/chosen": -1.1119335889816284, "logits/rejected": -1.036833643913269, "logps/chosen": -264.6695251464844, "logps/rejected": -294.46173095703125, "loss": 0.0732, "rewards/accuracies": 0.3812499940395355, "rewards/chosen": -0.00541292130947113, "rewards/margins": 0.018471624702215195, "rewards/rejected": -0.023884546011686325, "step": 450 }, { "epoch": 0.49, "learning_rate": 3.013398287384144e-06, "logits/chosen": -1.100239872932434, "logits/rejected": -0.9833891987800598, "logps/chosen": -265.06292724609375, "logps/rejected": -251.05789184570312, "loss": 0.0769, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -0.00949514377862215, "rewards/margins": 0.011730840429663658, "rewards/rejected": -0.021225983276963234, "step": 460 }, { "epoch": 0.5, "learning_rate": 2.9218815852625717e-06, "logits/chosen": -1.0965584516525269, "logits/rejected": -1.1050448417663574, "logps/chosen": -245.2798309326172, "logps/rejected": -247.6891632080078, "loss": 0.0867, "rewards/accuracies": 0.375, "rewards/chosen": -0.00036931521026417613, "rewards/margins": 0.01472887396812439, "rewards/rejected": -0.015098191797733307, "step": 470 }, { "epoch": 0.51, "learning_rate": 2.829779035208113e-06, "logits/chosen": -1.1267783641815186, "logits/rejected": -1.0608749389648438, "logps/chosen": -276.13861083984375, "logps/rejected": -242.2423858642578, "loss": 0.0866, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 0.003509046044200659, "rewards/margins": 0.01866857148706913, "rewards/rejected": -0.015159524977207184, "step": 480 }, { "epoch": 0.52, "learning_rate": 2.737218535878705e-06, "logits/chosen": -1.1088229417800903, "logits/rejected": -1.0114442110061646, "logps/chosen": -264.47747802734375, "logps/rejected": -256.66265869140625, "loss": 0.0697, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": -0.0013200236717239022, "rewards/margins": 0.02153395116329193, "rewards/rejected": -0.022853974252939224, "step": 490 }, { "epoch": 0.53, "learning_rate": 2.64432862186579e-06, "logits/chosen": -1.112363338470459, "logits/rejected": -1.0634427070617676, "logps/chosen": -263.20428466796875, "logps/rejected": -262.7369079589844, "loss": 0.068, "rewards/accuracies": 0.41874998807907104, "rewards/chosen": -0.0034271120093762875, "rewards/margins": 0.013932084664702415, "rewards/rejected": -0.01735919900238514, "step": 500 }, { "epoch": 0.53, "eval_logits/chosen": -1.0329285860061646, "eval_logits/rejected": -0.9790877103805542, "eval_logps/chosen": -402.9732360839844, "eval_logps/rejected": -399.08111572265625, "eval_loss": 0.04319905489683151, "eval_rewards/accuracies": 0.5954999923706055, "eval_rewards/chosen": -0.006668027024716139, "eval_rewards/margins": 0.025177694857120514, "eval_rewards/rejected": -0.03184572234749794, "eval_runtime": 545.9259, "eval_samples_per_second": 3.664, "eval_steps_per_second": 0.916, "step": 500 }, { "epoch": 0.54, "learning_rate": 2.551238285204126e-06, "logits/chosen": -1.0819242000579834, "logits/rejected": -1.0868195295333862, "logps/chosen": -204.0397186279297, "logps/rejected": -212.2799835205078, "loss": 0.0808, "rewards/accuracies": 0.375, "rewards/chosen": -0.003992350306361914, "rewards/margins": 0.010435246862471104, "rewards/rejected": -0.01442759484052658, "step": 510 }, { "epoch": 0.55, "learning_rate": 2.4580767962463688e-06, "logits/chosen": -1.1004348993301392, "logits/rejected": -1.0923566818237305, "logps/chosen": -287.0587463378906, "logps/rejected": -295.1195373535156, "loss": 0.0783, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": -0.005773237906396389, "rewards/margins": 0.016825079917907715, "rewards/rejected": -0.02259831875562668, "step": 520 }, { "epoch": 0.57, "learning_rate": 2.3649735241511546e-06, "logits/chosen": -1.025914192199707, "logits/rejected": -1.0844639539718628, "logps/chosen": -233.93783569335938, "logps/rejected": -278.87310791015625, "loss": 0.0701, "rewards/accuracies": 0.4375, "rewards/chosen": 0.0003628075937740505, "rewards/margins": 0.019406834617257118, "rewards/rejected": -0.019044026732444763, "step": 530 }, { "epoch": 0.58, "learning_rate": 2.2720577572339914e-06, "logits/chosen": -1.0895086526870728, "logits/rejected": -0.9644759297370911, "logps/chosen": -313.9569396972656, "logps/rejected": -267.2975158691406, "loss": 0.0828, "rewards/accuracies": 0.375, "rewards/chosen": -0.009684056974947453, "rewards/margins": 0.013868686743080616, "rewards/rejected": -0.02355274185538292, "step": 540 }, { "epoch": 0.59, "learning_rate": 2.1794585234303995e-06, "logits/chosen": -1.039159893989563, "logits/rejected": -1.0821508169174194, "logps/chosen": -257.9473571777344, "logps/rejected": -274.3190002441406, "loss": 0.0743, "rewards/accuracies": 0.3687500059604645, "rewards/chosen": -0.0062354551628232, "rewards/margins": 0.01928626373410225, "rewards/rejected": -0.025521719828248024, "step": 550 }, { "epoch": 0.6, "learning_rate": 2.0873044111206407e-06, "logits/chosen": -1.0668865442276, "logits/rejected": -1.0260220766067505, "logps/chosen": -271.9998474121094, "logps/rejected": -282.83038330078125, "loss": 0.0714, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 0.0038192705251276493, "rewards/margins": 0.019924623891711235, "rewards/rejected": -0.016105355694890022, "step": 560 }, { "epoch": 0.61, "learning_rate": 1.9957233905648293e-06, "logits/chosen": -1.054386019706726, "logits/rejected": -1.042152762413025, "logps/chosen": -284.9267272949219, "logps/rejected": -249.73110961914062, "loss": 0.0739, "rewards/accuracies": 0.40625, "rewards/chosen": 0.0018435310339555144, "rewards/margins": 0.013795648701488972, "rewards/rejected": -0.011952118948101997, "step": 570 }, { "epoch": 0.62, "learning_rate": 1.904842636196402e-06, "logits/chosen": -1.027489185333252, "logits/rejected": -1.0255085229873657, "logps/chosen": -250.985107421875, "logps/rejected": -246.9531707763672, "loss": 0.0877, "rewards/accuracies": 0.40625, "rewards/chosen": -0.00342792016454041, "rewards/margins": 0.010951442644000053, "rewards/rejected": -0.01437936257570982, "step": 580 }, { "epoch": 0.63, "learning_rate": 1.814788350020726e-06, "logits/chosen": -1.0244547128677368, "logits/rejected": -1.0658903121948242, "logps/chosen": -295.65191650390625, "logps/rejected": -290.0276184082031, "loss": 0.0697, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.00019127638370264322, "rewards/margins": 0.02073330618441105, "rewards/rejected": -0.020924581214785576, "step": 590 }, { "epoch": 0.64, "learning_rate": 1.725685586364051e-06, "logits/chosen": -1.0251004695892334, "logits/rejected": -1.0600395202636719, "logps/chosen": -244.59384155273438, "logps/rejected": -243.28256225585938, "loss": 0.0847, "rewards/accuracies": 0.41874998807907104, "rewards/chosen": -0.001342198345810175, "rewards/margins": 0.01429178100079298, "rewards/rejected": -0.015633979812264442, "step": 600 }, { "epoch": 0.64, "eval_logits/chosen": -1.0363810062408447, "eval_logits/rejected": -0.9837189316749573, "eval_logps/chosen": -401.28790283203125, "eval_logps/rejected": -398.47442626953125, "eval_loss": 0.042725615203380585, "eval_rewards/accuracies": 0.5945000052452087, "eval_rewards/chosen": -0.0049826642498373985, "eval_rewards/margins": 0.026256347075104713, "eval_rewards/rejected": -0.031239010393619537, "eval_runtime": 546.1434, "eval_samples_per_second": 3.662, "eval_steps_per_second": 0.916, "step": 600 }, { "epoch": 0.65, "learning_rate": 1.6376580782162172e-06, "logits/chosen": -1.03734290599823, "logits/rejected": -0.9538782238960266, "logps/chosen": -302.88079833984375, "logps/rejected": -275.7502136230469, "loss": 0.07, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.003554628463461995, "rewards/margins": 0.016584644094109535, "rewards/rejected": -0.020139271393418312, "step": 610 }, { "epoch": 0.66, "learning_rate": 1.550828065408227e-06, "logits/chosen": -1.0758119821548462, "logits/rejected": -0.9793124198913574, "logps/chosen": -256.09783935546875, "logps/rejected": -237.6195831298828, "loss": 0.0857, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": -0.004253728315234184, "rewards/margins": 0.020320799201726913, "rewards/rejected": -0.024574527516961098, "step": 620 }, { "epoch": 0.67, "learning_rate": 1.4653161248633053e-06, "logits/chosen": -1.1112511157989502, "logits/rejected": -1.0120253562927246, "logps/chosen": -217.60305786132812, "logps/rejected": -254.78884887695312, "loss": 0.099, "rewards/accuracies": 0.3812499940395355, "rewards/chosen": -0.007809498347342014, "rewards/margins": 0.020094871520996094, "rewards/rejected": -0.027904370799660683, "step": 630 }, { "epoch": 0.68, "learning_rate": 1.381241003157162e-06, "logits/chosen": -1.1298284530639648, "logits/rejected": -1.0621023178100586, "logps/chosen": -286.02459716796875, "logps/rejected": -299.3070373535156, "loss": 0.0677, "rewards/accuracies": 0.39375001192092896, "rewards/chosen": -0.011842104606330395, "rewards/margins": 0.014377683401107788, "rewards/rejected": -0.026219788938760757, "step": 640 }, { "epoch": 0.69, "learning_rate": 1.298719451619979e-06, "logits/chosen": -1.0666725635528564, "logits/rejected": -1.0321277379989624, "logps/chosen": -300.1710205078125, "logps/rejected": -384.95751953125, "loss": 0.073, "rewards/accuracies": 0.5, "rewards/chosen": -0.0033969897776842117, "rewards/margins": 0.039332348853349686, "rewards/rejected": -0.04272934049367905, "step": 650 }, { "epoch": 0.7, "learning_rate": 1.2178660642091036e-06, "logits/chosen": -1.062753677368164, "logits/rejected": -1.034977674484253, "logps/chosen": -275.3869934082031, "logps/rejected": -275.52667236328125, "loss": 0.0798, "rewards/accuracies": 0.46875, "rewards/chosen": -0.0005760884960182011, "rewards/margins": 0.0214390866458416, "rewards/rejected": -0.022015176713466644, "step": 660 }, { "epoch": 0.71, "learning_rate": 1.1387931183775821e-06, "logits/chosen": -1.072608470916748, "logits/rejected": -1.0768264532089233, "logps/chosen": -300.6933288574219, "logps/rejected": -286.68951416015625, "loss": 0.0876, "rewards/accuracies": 0.40625, "rewards/chosen": -0.005371665116399527, "rewards/margins": 0.01708284579217434, "rewards/rejected": -0.022454511374235153, "step": 670 }, { "epoch": 0.73, "learning_rate": 1.061610419159532e-06, "logits/chosen": -1.1137611865997314, "logits/rejected": -1.0089493989944458, "logps/chosen": -284.4278259277344, "logps/rejected": -263.4560546875, "loss": 0.0803, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0019712348002940416, "rewards/margins": 0.015994885936379433, "rewards/rejected": -0.017966121435165405, "step": 680 }, { "epoch": 0.74, "learning_rate": 9.864251466888364e-07, "logits/chosen": -1.0998120307922363, "logits/rejected": -1.0427272319793701, "logps/chosen": -273.5634460449219, "logps/rejected": -273.6974792480469, "loss": 0.0779, "rewards/accuracies": 0.40625, "rewards/chosen": -0.00520918658003211, "rewards/margins": 0.018212206661701202, "rewards/rejected": -0.02342139557003975, "step": 690 }, { "epoch": 0.75, "learning_rate": 9.133417073629288e-07, "logits/chosen": -1.0788428783416748, "logits/rejected": -1.0710010528564453, "logps/chosen": -295.2659912109375, "logps/rejected": -301.9012145996094, "loss": 0.0519, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": -0.006222005933523178, "rewards/margins": 0.023821452632546425, "rewards/rejected": -0.030043456703424454, "step": 700 }, { "epoch": 0.75, "eval_logits/chosen": -1.0359783172607422, "eval_logits/rejected": -0.9871743321418762, "eval_logps/chosen": -404.5330505371094, "eval_logps/rejected": -404.9790954589844, "eval_loss": 0.04230288788676262, "eval_rewards/accuracies": 0.590499997138977, "eval_rewards/chosen": -0.008227824233472347, "eval_rewards/margins": 0.02951584756374359, "eval_rewards/rejected": -0.03774367272853851, "eval_runtime": 545.9714, "eval_samples_per_second": 3.663, "eval_steps_per_second": 0.916, "step": 700 }, { "epoch": 0.76, "learning_rate": 8.424615888583332e-07, "logits/chosen": -1.0752325057983398, "logits/rejected": -1.005172848701477, "logps/chosen": -263.06549072265625, "logps/rejected": -263.9055480957031, "loss": 0.0702, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": -0.0012165942462161183, "rewards/margins": 0.0245666466653347, "rewards/rejected": -0.025783240795135498, "step": 710 }, { "epoch": 0.77, "learning_rate": 7.738832191993092e-07, "logits/chosen": -1.0686012506484985, "logits/rejected": -1.0680710077285767, "logps/chosen": -260.68853759765625, "logps/rejected": -299.7347717285156, "loss": 0.0669, "rewards/accuracies": 0.375, "rewards/chosen": -0.006182204931974411, "rewards/margins": 0.016528166830539703, "rewards/rejected": -0.022710371762514114, "step": 720 }, { "epoch": 0.78, "learning_rate": 7.077018300752917e-07, "logits/chosen": -1.1110173463821411, "logits/rejected": -1.0842745304107666, "logps/chosen": -270.8677673339844, "logps/rejected": -273.29339599609375, "loss": 0.07, "rewards/accuracies": 0.4312500059604645, "rewards/chosen": -0.0033080249559134245, "rewards/margins": 0.02419392392039299, "rewards/rejected": -0.027501946315169334, "step": 730 }, { "epoch": 0.79, "learning_rate": 6.440093245969342e-07, "logits/chosen": -1.1270530223846436, "logits/rejected": -1.0608434677124023, "logps/chosen": -296.01556396484375, "logps/rejected": -293.2162170410156, "loss": 0.0753, "rewards/accuracies": 0.41874998807907104, "rewards/chosen": -0.004587044008076191, "rewards/margins": 0.017461195588111877, "rewards/rejected": -0.022048238664865494, "step": 740 }, { "epoch": 0.8, "learning_rate": 5.828941496744075e-07, "logits/chosen": -1.1132383346557617, "logits/rejected": -1.0976136922836304, "logps/chosen": -258.3694763183594, "logps/rejected": -271.54095458984375, "loss": 0.0907, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -0.012257089838385582, "rewards/margins": 0.019415555521845818, "rewards/rejected": -0.0316726490855217, "step": 750 }, { "epoch": 0.81, "learning_rate": 5.244411731951671e-07, "logits/chosen": -1.1247339248657227, "logits/rejected": -1.0423662662506104, "logps/chosen": -287.46160888671875, "logps/rejected": -290.23736572265625, "loss": 0.0781, "rewards/accuracies": 0.39375001192092896, "rewards/chosen": -0.008325648494064808, "rewards/margins": 0.021062636747956276, "rewards/rejected": -0.02938828244805336, "step": 760 }, { "epoch": 0.82, "learning_rate": 4.6873156617173594e-07, "logits/chosen": -1.1069273948669434, "logits/rejected": -1.086753010749817, "logps/chosen": -298.63433837890625, "logps/rejected": -294.5317077636719, "loss": 0.0679, "rewards/accuracies": 0.4437499940395355, "rewards/chosen": -0.007812710478901863, "rewards/margins": 0.02419663593173027, "rewards/rejected": -0.032009344547986984, "step": 770 }, { "epoch": 0.83, "learning_rate": 4.1584269002318653e-07, "logits/chosen": -1.1254509687423706, "logits/rejected": -1.0976530313491821, "logps/chosen": -287.83282470703125, "logps/rejected": -289.3851623535156, "loss": 0.0738, "rewards/accuracies": 0.4437499940395355, "rewards/chosen": -0.009489515796303749, "rewards/margins": 0.02022114023566246, "rewards/rejected": -0.029710659757256508, "step": 780 }, { "epoch": 0.84, "learning_rate": 3.658479891468258e-07, "logits/chosen": -1.1135103702545166, "logits/rejected": -1.0031986236572266, "logps/chosen": -292.74835205078125, "logps/rejected": -297.3404846191406, "loss": 0.0784, "rewards/accuracies": 0.3687500059604645, "rewards/chosen": -0.003799052909016609, "rewards/margins": 0.016090305522084236, "rewards/rejected": -0.019889358431100845, "step": 790 }, { "epoch": 0.85, "learning_rate": 3.18816888929272e-07, "logits/chosen": -1.0912225246429443, "logits/rejected": -1.0440576076507568, "logps/chosen": -273.9382019042969, "logps/rejected": -279.7242126464844, "loss": 0.0742, "rewards/accuracies": 0.4437499940395355, "rewards/chosen": -0.00514855096116662, "rewards/margins": 0.019596170634031296, "rewards/rejected": -0.024744722992181778, "step": 800 }, { "epoch": 0.85, "eval_logits/chosen": -1.055617332458496, "eval_logits/rejected": -1.0109219551086426, "eval_logps/chosen": -406.80352783203125, "eval_logps/rejected": -409.2462158203125, "eval_loss": 0.04217638820409775, "eval_rewards/accuracies": 0.6000000238418579, "eval_rewards/chosen": -0.010498268529772758, "eval_rewards/margins": 0.0315125547349453, "eval_rewards/rejected": -0.042010821402072906, "eval_runtime": 546.1165, "eval_samples_per_second": 3.662, "eval_steps_per_second": 0.916, "step": 800 }, { "epoch": 0.86, "learning_rate": 2.748146993385484e-07, "logits/chosen": -1.118160367012024, "logits/rejected": -1.0629098415374756, "logps/chosen": -223.4706573486328, "logps/rejected": -247.4434356689453, "loss": 0.0963, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": -0.0038195624947547913, "rewards/margins": 0.02616865560412407, "rewards/rejected": -0.029988214373588562, "step": 810 }, { "epoch": 0.87, "learning_rate": 2.3390252423108077e-07, "logits/chosen": -1.1013834476470947, "logits/rejected": -1.1045656204223633, "logps/chosen": -240.3532257080078, "logps/rejected": -258.44989013671875, "loss": 0.0812, "rewards/accuracies": 0.41874998807907104, "rewards/chosen": -0.002859788713976741, "rewards/margins": 0.029717862606048584, "rewards/rejected": -0.03257765248417854, "step": 820 }, { "epoch": 0.89, "learning_rate": 1.961371764995243e-07, "logits/chosen": -1.1316301822662354, "logits/rejected": -1.1215277910232544, "logps/chosen": -266.54931640625, "logps/rejected": -260.0931701660156, "loss": 0.0782, "rewards/accuracies": 0.39375001192092896, "rewards/chosen": -0.006319983396679163, "rewards/margins": 0.014086413197219372, "rewards/rejected": -0.020406395196914673, "step": 830 }, { "epoch": 0.9, "learning_rate": 1.61571099179261e-07, "logits/chosen": -1.1706424951553345, "logits/rejected": -1.0354385375976562, "logps/chosen": -278.2705993652344, "logps/rejected": -276.22967529296875, "loss": 0.0814, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.009933208115398884, "rewards/margins": 0.019498441368341446, "rewards/rejected": -0.029431650415062904, "step": 840 }, { "epoch": 0.91, "learning_rate": 1.3025229262312367e-07, "logits/chosen": -1.0535192489624023, "logits/rejected": -1.0249392986297607, "logps/chosen": -238.9296875, "logps/rejected": -260.4687805175781, "loss": 0.0732, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0005777518963441253, "rewards/margins": 0.021599723026156425, "rewards/rejected": -0.022177476435899734, "step": 850 }, { "epoch": 0.92, "learning_rate": 1.0222424784546853e-07, "logits/chosen": -1.1237046718597412, "logits/rejected": -1.1744358539581299, "logps/chosen": -290.61187744140625, "logps/rejected": -289.50775146484375, "loss": 0.0682, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": -0.00292245764285326, "rewards/margins": 0.022101474925875664, "rewards/rejected": -0.0250239335000515, "step": 860 }, { "epoch": 0.93, "learning_rate": 7.752588612816553e-08, "logits/chosen": -1.0622873306274414, "logits/rejected": -1.047603964805603, "logps/chosen": -290.8896484375, "logps/rejected": -276.6893005371094, "loss": 0.0607, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.005569613538682461, "rewards/margins": 0.024782858788967133, "rewards/rejected": -0.030352476984262466, "step": 870 }, { "epoch": 0.94, "learning_rate": 5.619150497236991e-08, "logits/chosen": -1.1374906301498413, "logits/rejected": -1.079929232597351, "logps/chosen": -223.29623413085938, "logps/rejected": -222.3146209716797, "loss": 0.0661, "rewards/accuracies": 0.3687500059604645, "rewards/chosen": -0.0003207772097084671, "rewards/margins": 0.016658511012792587, "rewards/rejected": -0.016979288309812546, "step": 880 }, { "epoch": 0.95, "learning_rate": 3.825073047112743e-08, "logits/chosen": -1.0372432470321655, "logits/rejected": -1.0072650909423828, "logps/chosen": -297.5245056152344, "logps/rejected": -319.53076171875, "loss": 0.0776, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": -0.007688297424465418, "rewards/margins": 0.022589299827814102, "rewards/rejected": -0.030277591198682785, "step": 890 }, { "epoch": 0.96, "learning_rate": 2.372847616895685e-08, "logits/chosen": -1.096644639968872, "logits/rejected": -1.0533974170684814, "logps/chosen": -268.70343017578125, "logps/rejected": -255.3015594482422, "loss": 0.0768, "rewards/accuracies": 0.41874998807907104, "rewards/chosen": -0.003904106793925166, "rewards/margins": 0.022419685497879982, "rewards/rejected": -0.02632378600537777, "step": 900 }, { "epoch": 0.96, "eval_logits/chosen": -1.0502386093139648, "eval_logits/rejected": -1.0050266981124878, "eval_logps/chosen": -406.3475341796875, "eval_logps/rejected": -408.7396545410156, "eval_loss": 0.04211420938372612, "eval_rewards/accuracies": 0.5929999947547913, "eval_rewards/chosen": -0.01004225667566061, "eval_rewards/margins": 0.031461965292692184, "eval_rewards/rejected": -0.04150421544909477, "eval_runtime": 545.8034, "eval_samples_per_second": 3.664, "eval_steps_per_second": 0.916, "step": 900 }, { "epoch": 0.97, "learning_rate": 1.264490846553279e-08, "logits/chosen": -1.1268881559371948, "logits/rejected": -1.0797778367996216, "logps/chosen": -271.73004150390625, "logps/rejected": -292.3177490234375, "loss": 0.0679, "rewards/accuracies": 0.41874998807907104, "rewards/chosen": -0.008194219321012497, "rewards/margins": 0.016915880143642426, "rewards/rejected": -0.025110099464654922, "step": 910 }, { "epoch": 0.98, "learning_rate": 5.015418611516165e-09, "logits/chosen": -1.13059401512146, "logits/rejected": -1.099302887916565, "logps/chosen": -252.9181365966797, "logps/rejected": -257.0358581542969, "loss": 0.0898, "rewards/accuracies": 0.3687500059604645, "rewards/chosen": -0.005635020788758993, "rewards/margins": 0.021289747208356857, "rewards/rejected": -0.026924768462777138, "step": 920 }, { "epoch": 0.99, "learning_rate": 8.506013354186993e-10, "logits/chosen": -1.0815865993499756, "logits/rejected": -1.0594186782836914, "logps/chosen": -270.22955322265625, "logps/rejected": -268.7232360839844, "loss": 0.0661, "rewards/accuracies": 0.40625, "rewards/chosen": 0.0016765497857704759, "rewards/margins": 0.02203894779086113, "rewards/rejected": -0.020362399518489838, "step": 930 }, { "epoch": 1.0, "step": 937, "total_flos": 0.0, "train_loss": 0.07849642387894454, "train_runtime": 13138.6455, "train_samples_per_second": 1.142, "train_steps_per_second": 0.071 } ], "logging_steps": 10, "max_steps": 937, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }