|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994666666666666, |
|
"eval_steps": 100, |
|
"global_step": 937, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.319148936170213e-08, |
|
"logits/chosen": -0.5045956373214722, |
|
"logits/rejected": -0.805889368057251, |
|
"logps/chosen": -165.41160583496094, |
|
"logps/rejected": -172.8127899169922, |
|
"loss": 0.0848, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.319148936170213e-07, |
|
"logits/chosen": -0.851473867893219, |
|
"logits/rejected": -0.8214991092681885, |
|
"logps/chosen": -258.1239013671875, |
|
"logps/rejected": -255.48716735839844, |
|
"loss": 0.0877, |
|
"rewards/accuracies": 0.2986111044883728, |
|
"rewards/chosen": 0.0002587677154224366, |
|
"rewards/margins": 0.00023072944895830005, |
|
"rewards/rejected": 2.803823554131668e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0638297872340427e-06, |
|
"logits/chosen": -0.8987849354743958, |
|
"logits/rejected": -0.7349363565444946, |
|
"logps/chosen": -260.9398193359375, |
|
"logps/rejected": -253.32925415039062, |
|
"loss": 0.0893, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.000219681765884161, |
|
"rewards/margins": -3.2768032269814285e-06, |
|
"rewards/rejected": -0.00021640490740537643, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.595744680851064e-06, |
|
"logits/chosen": -0.9162956476211548, |
|
"logits/rejected": -0.7800331115722656, |
|
"logps/chosen": -240.79800415039062, |
|
"logps/rejected": -235.59182739257812, |
|
"loss": 0.0783, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": -0.00024258208577521145, |
|
"rewards/margins": 0.00012204260565340519, |
|
"rewards/rejected": -0.0003646246623247862, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.1276595744680853e-06, |
|
"logits/chosen": -0.8354488611221313, |
|
"logits/rejected": -0.8405616879463196, |
|
"logps/chosen": -255.01931762695312, |
|
"logps/rejected": -224.09188842773438, |
|
"loss": 0.0749, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": 0.0001305304904235527, |
|
"rewards/margins": 0.0002960737328976393, |
|
"rewards/rejected": -0.0001655431988183409, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.6595744680851065e-06, |
|
"logits/chosen": -0.9115155935287476, |
|
"logits/rejected": -0.7566107511520386, |
|
"logps/chosen": -295.87884521484375, |
|
"logps/rejected": -261.06951904296875, |
|
"loss": 0.0713, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": -0.0011757513275370002, |
|
"rewards/margins": -0.0006419935962185264, |
|
"rewards/rejected": -0.0005337577313184738, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.191489361702128e-06, |
|
"logits/chosen": -0.9281567335128784, |
|
"logits/rejected": -0.8129026293754578, |
|
"logps/chosen": -261.63751220703125, |
|
"logps/rejected": -261.89483642578125, |
|
"loss": 0.0779, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.000994718400761485, |
|
"rewards/margins": 0.00043715062201954424, |
|
"rewards/rejected": -0.0014318691100925207, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.723404255319149e-06, |
|
"logits/chosen": -0.8597652316093445, |
|
"logits/rejected": -0.8151811361312866, |
|
"logps/chosen": -271.51458740234375, |
|
"logps/rejected": -241.4061279296875, |
|
"loss": 0.0898, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.0014379310887306929, |
|
"rewards/margins": 0.0002869053860194981, |
|
"rewards/rejected": -0.0017248367657884955, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.255319148936171e-06, |
|
"logits/chosen": -0.8520501255989075, |
|
"logits/rejected": -0.811953067779541, |
|
"logps/chosen": -311.61431884765625, |
|
"logps/rejected": -305.77520751953125, |
|
"loss": 0.0795, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.0034166008699685335, |
|
"rewards/margins": 0.00010135892080143094, |
|
"rewards/rejected": -0.003517959965392947, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.787234042553192e-06, |
|
"logits/chosen": -0.9568193554878235, |
|
"logits/rejected": -0.8735030293464661, |
|
"logps/chosen": -277.09405517578125, |
|
"logps/rejected": -237.3052978515625, |
|
"loss": 0.0831, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.0037826255429536104, |
|
"rewards/margins": 0.0007604987476952374, |
|
"rewards/rejected": -0.0045431237667799, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999375059004058e-06, |
|
"logits/chosen": -0.8838983774185181, |
|
"logits/rejected": -0.8245723843574524, |
|
"logps/chosen": -274.2312927246094, |
|
"logps/rejected": -233.004638671875, |
|
"loss": 0.0982, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.004330983851104975, |
|
"rewards/margins": 0.0017646064516156912, |
|
"rewards/rejected": -0.00609559053555131, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_logits/chosen": -0.8696709871292114, |
|
"eval_logits/rejected": -0.7816442847251892, |
|
"eval_logps/chosen": -404.4459228515625, |
|
"eval_logps/rejected": -377.37725830078125, |
|
"eval_loss": 0.05261076241731644, |
|
"eval_rewards/accuracies": 0.5189999938011169, |
|
"eval_rewards/chosen": -0.008140643127262592, |
|
"eval_rewards/margins": 0.0020011626183986664, |
|
"eval_rewards/rejected": -0.010141806676983833, |
|
"eval_runtime": 545.9504, |
|
"eval_samples_per_second": 3.663, |
|
"eval_steps_per_second": 0.916, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9955571065548795e-06, |
|
"logits/chosen": -0.9740250706672668, |
|
"logits/rejected": -0.8206865191459656, |
|
"logps/chosen": -308.79986572265625, |
|
"logps/rejected": -279.56817626953125, |
|
"loss": 0.0729, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.006666774861514568, |
|
"rewards/margins": 0.001093443250283599, |
|
"rewards/rejected": -0.0077602192759513855, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9882736864879e-06, |
|
"logits/chosen": -0.9458662271499634, |
|
"logits/rejected": -0.8622045516967773, |
|
"logps/chosen": -269.64190673828125, |
|
"logps/rejected": -255.1685028076172, |
|
"loss": 0.0678, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.006551130209118128, |
|
"rewards/margins": 0.0029696193523705006, |
|
"rewards/rejected": -0.009520749561488628, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.977534912960124e-06, |
|
"logits/chosen": -1.0262787342071533, |
|
"logits/rejected": -0.9416742324829102, |
|
"logps/chosen": -228.7926788330078, |
|
"logps/rejected": -229.67898559570312, |
|
"loss": 0.0929, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.008367964997887611, |
|
"rewards/margins": 0.0024298636708408594, |
|
"rewards/rejected": -0.010797828435897827, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.963355698422092e-06, |
|
"logits/chosen": -0.9834293127059937, |
|
"logits/rejected": -0.9608744382858276, |
|
"logps/chosen": -244.4986572265625, |
|
"logps/rejected": -238.37118530273438, |
|
"loss": 0.092, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.010878035798668861, |
|
"rewards/margins": 0.003188747214153409, |
|
"rewards/rejected": -0.014066783711314201, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.945755732909625e-06, |
|
"logits/chosen": -1.0271694660186768, |
|
"logits/rejected": -0.8658772706985474, |
|
"logps/chosen": -303.09539794921875, |
|
"logps/rejected": -265.5880126953125, |
|
"loss": 0.0767, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.013803419657051563, |
|
"rewards/margins": 0.005266121588647366, |
|
"rewards/rejected": -0.01906954124569893, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.924759456701167e-06, |
|
"logits/chosen": -1.1086270809173584, |
|
"logits/rejected": -1.041982650756836, |
|
"logps/chosen": -317.58245849609375, |
|
"logps/rejected": -280.8768310546875, |
|
"loss": 0.0902, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.021537428721785545, |
|
"rewards/margins": 0.0052458057180047035, |
|
"rewards/rejected": -0.026783233508467674, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.900396026378671e-06, |
|
"logits/chosen": -1.1098581552505493, |
|
"logits/rejected": -0.9803470373153687, |
|
"logps/chosen": -335.6375427246094, |
|
"logps/rejected": -316.95733642578125, |
|
"loss": 0.0536, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.026775449514389038, |
|
"rewards/margins": 0.008162637241184711, |
|
"rewards/rejected": -0.03493808954954147, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.872699274339169e-06, |
|
"logits/chosen": -1.0897270441055298, |
|
"logits/rejected": -1.001300573348999, |
|
"logps/chosen": -296.06353759765625, |
|
"logps/rejected": -265.63751220703125, |
|
"loss": 0.0746, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.022861812263727188, |
|
"rewards/margins": 0.005839685909450054, |
|
"rewards/rejected": -0.02870149537920952, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8417076618132434e-06, |
|
"logits/chosen": -1.0900896787643433, |
|
"logits/rejected": -1.0583564043045044, |
|
"logps/chosen": -266.21160888671875, |
|
"logps/rejected": -245.45376586914062, |
|
"loss": 0.0942, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": -0.018833670765161514, |
|
"rewards/margins": 0.00439481670036912, |
|
"rewards/rejected": -0.023228485137224197, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.807464225455655e-06, |
|
"logits/chosen": -1.0679035186767578, |
|
"logits/rejected": -1.003225564956665, |
|
"logps/chosen": -345.30181884765625, |
|
"logps/rejected": -323.2543640136719, |
|
"loss": 0.0846, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.02550928294658661, |
|
"rewards/margins": 0.003808406414464116, |
|
"rewards/rejected": -0.029317688196897507, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -1.0426863431930542, |
|
"eval_logits/rejected": -0.9791997671127319, |
|
"eval_logps/chosen": -422.8199462890625, |
|
"eval_logps/rejected": -407.46539306640625, |
|
"eval_loss": 0.048517368733882904, |
|
"eval_rewards/accuracies": 0.5529999732971191, |
|
"eval_rewards/chosen": -0.026514720171689987, |
|
"eval_rewards/margins": 0.013715260662138462, |
|
"eval_rewards/rejected": -0.040229979902505875, |
|
"eval_runtime": 545.8919, |
|
"eval_samples_per_second": 3.664, |
|
"eval_steps_per_second": 0.916, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.770016517582283e-06, |
|
"logits/chosen": -1.0943100452423096, |
|
"logits/rejected": -1.0767104625701904, |
|
"logps/chosen": -325.3826904296875, |
|
"logps/rejected": -329.48663330078125, |
|
"loss": 0.0676, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.018910493701696396, |
|
"rewards/margins": 0.013479876331984997, |
|
"rewards/rejected": -0.032390374690294266, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7294165401363616e-06, |
|
"logits/chosen": -1.0735843181610107, |
|
"logits/rejected": -1.0250236988067627, |
|
"logps/chosen": -313.1111145019531, |
|
"logps/rejected": -292.7440490722656, |
|
"loss": 0.0745, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.018653307110071182, |
|
"rewards/margins": 0.01024434994906187, |
|
"rewards/rejected": -0.028897657990455627, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.68572067247573e-06, |
|
"logits/chosen": -1.1854560375213623, |
|
"logits/rejected": -1.0616133213043213, |
|
"logps/chosen": -281.5065612792969, |
|
"logps/rejected": -277.0508728027344, |
|
"loss": 0.0699, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.02585085853934288, |
|
"rewards/margins": 0.01193526666611433, |
|
"rewards/rejected": -0.037786126136779785, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.638989593081364e-06, |
|
"logits/chosen": -1.1488720178604126, |
|
"logits/rejected": -1.0367449522018433, |
|
"logps/chosen": -283.6094970703125, |
|
"logps/rejected": -252.0610809326172, |
|
"loss": 0.0894, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.020607244223356247, |
|
"rewards/margins": 0.006351941730827093, |
|
"rewards/rejected": -0.026959186419844627, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5892881952959015e-06, |
|
"logits/chosen": -1.1442514657974243, |
|
"logits/rejected": -1.1018245220184326, |
|
"logps/chosen": -303.63458251953125, |
|
"logps/rejected": -305.7111511230469, |
|
"loss": 0.0878, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.02438071370124817, |
|
"rewards/margins": 0.009189085103571415, |
|
"rewards/rejected": -0.03356979787349701, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536685497209182e-06, |
|
"logits/chosen": -1.134932041168213, |
|
"logits/rejected": -1.106890082359314, |
|
"logps/chosen": -300.16815185546875, |
|
"logps/rejected": -284.40765380859375, |
|
"loss": 0.0921, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.025658372789621353, |
|
"rewards/margins": 0.005949888378381729, |
|
"rewards/rejected": -0.03160826116800308, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.481254545815943e-06, |
|
"logits/chosen": -1.1486608982086182, |
|
"logits/rejected": -1.0326900482177734, |
|
"logps/chosen": -294.9302673339844, |
|
"logps/rejected": -285.3538513183594, |
|
"loss": 0.0814, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.022086424753069878, |
|
"rewards/margins": 0.010328343138098717, |
|
"rewards/rejected": -0.03241477161645889, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.42307231557875e-06, |
|
"logits/chosen": -1.121669888496399, |
|
"logits/rejected": -1.0683071613311768, |
|
"logps/chosen": -310.044189453125, |
|
"logps/rejected": -306.346435546875, |
|
"loss": 0.0916, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.02136383019387722, |
|
"rewards/margins": 0.012828357517719269, |
|
"rewards/rejected": -0.03419218957424164, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3622196015370305e-06, |
|
"logits/chosen": -1.1434075832366943, |
|
"logits/rejected": -1.1294233798980713, |
|
"logps/chosen": -259.6036682128906, |
|
"logps/rejected": -267.0892333984375, |
|
"loss": 0.0856, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.01935209520161152, |
|
"rewards/margins": 0.008529379032552242, |
|
"rewards/rejected": -0.027881473302841187, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.298780907110648e-06, |
|
"logits/chosen": -1.214237928390503, |
|
"logits/rejected": -1.1189312934875488, |
|
"logps/chosen": -250.4637451171875, |
|
"logps/rejected": -255.03079223632812, |
|
"loss": 0.0859, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.014152693562209606, |
|
"rewards/margins": 0.011050628498196602, |
|
"rewards/rejected": -0.025203322991728783, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_logits/chosen": -1.11543869972229, |
|
"eval_logits/rejected": -1.0612365007400513, |
|
"eval_logps/chosen": -422.0489807128906, |
|
"eval_logps/rejected": -413.28131103515625, |
|
"eval_loss": 0.046408262103796005, |
|
"eval_rewards/accuracies": 0.5724999904632568, |
|
"eval_rewards/chosen": -0.02574371173977852, |
|
"eval_rewards/margins": 0.02030220627784729, |
|
"eval_rewards/rejected": -0.04604591801762581, |
|
"eval_runtime": 546.0923, |
|
"eval_samples_per_second": 3.662, |
|
"eval_steps_per_second": 0.916, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.23284432675381e-06, |
|
"logits/chosen": -1.1313543319702148, |
|
"logits/rejected": -1.0724719762802124, |
|
"logps/chosen": -262.6627502441406, |
|
"logps/rejected": -277.54632568359375, |
|
"loss": 0.0916, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.019768275320529938, |
|
"rewards/margins": 0.014759126119315624, |
|
"rewards/rejected": -0.03452740237116814, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.164501423622277e-06, |
|
"logits/chosen": -1.246671199798584, |
|
"logits/rejected": -1.1658015251159668, |
|
"logps/chosen": -264.7757873535156, |
|
"logps/rejected": -277.6180114746094, |
|
"loss": 0.0774, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.020890336483716965, |
|
"rewards/margins": 0.011497320607304573, |
|
"rewards/rejected": -0.03238765895366669, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.0938471024237355e-06, |
|
"logits/chosen": -1.222037672996521, |
|
"logits/rejected": -1.156553030014038, |
|
"logps/chosen": -256.74359130859375, |
|
"logps/rejected": -277.230224609375, |
|
"loss": 0.0805, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.015277216210961342, |
|
"rewards/margins": 0.02550993300974369, |
|
"rewards/rejected": -0.04078715294599533, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.020979477627907e-06, |
|
"logits/chosen": -1.1742956638336182, |
|
"logits/rejected": -1.1521165370941162, |
|
"logps/chosen": -239.5863494873047, |
|
"logps/rejected": -247.32522583007812, |
|
"loss": 0.1015, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.01359327882528305, |
|
"rewards/margins": 0.015490619465708733, |
|
"rewards/rejected": -0.029083898290991783, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.9459997372194105e-06, |
|
"logits/chosen": -1.1355948448181152, |
|
"logits/rejected": -1.0996363162994385, |
|
"logps/chosen": -268.62945556640625, |
|
"logps/rejected": -278.85870361328125, |
|
"loss": 0.0786, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.013594739139080048, |
|
"rewards/margins": 0.00844600610435009, |
|
"rewards/rejected": -0.02204074338078499, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.869012002182573e-06, |
|
"logits/chosen": -1.2170326709747314, |
|
"logits/rejected": -1.1180956363677979, |
|
"logps/chosen": -265.72393798828125, |
|
"logps/rejected": -233.3331298828125, |
|
"loss": 0.0879, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.01117833610624075, |
|
"rewards/margins": 0.015456246212124825, |
|
"rewards/rejected": -0.02663458324968815, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.7901231819133104e-06, |
|
"logits/chosen": -1.1366071701049805, |
|
"logits/rejected": -1.0916543006896973, |
|
"logps/chosen": -266.32037353515625, |
|
"logps/rejected": -267.02313232421875, |
|
"loss": 0.0726, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.01092919148504734, |
|
"rewards/margins": 0.01567809283733368, |
|
"rewards/rejected": -0.02660728432238102, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.709442825758875e-06, |
|
"logits/chosen": -1.089163064956665, |
|
"logits/rejected": -1.0779684782028198, |
|
"logps/chosen": -259.6034240722656, |
|
"logps/rejected": -261.5791015625, |
|
"loss": 0.0859, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.008027950301766396, |
|
"rewards/margins": 0.02001611702144146, |
|
"rewards/rejected": -0.028044065460562706, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6270829708916113e-06, |
|
"logits/chosen": -1.0756354331970215, |
|
"logits/rejected": -1.0809965133666992, |
|
"logps/chosen": -317.52227783203125, |
|
"logps/rejected": -299.58990478515625, |
|
"loss": 0.066, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.012966620735824108, |
|
"rewards/margins": 0.015139798633754253, |
|
"rewards/rejected": -0.02810642123222351, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.543157986727991e-06, |
|
"logits/chosen": -1.1576625108718872, |
|
"logits/rejected": -1.0739920139312744, |
|
"logps/chosen": -259.60516357421875, |
|
"logps/rejected": -274.40240478515625, |
|
"loss": 0.0957, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.006962036248296499, |
|
"rewards/margins": 0.03462858498096466, |
|
"rewards/rejected": -0.041590623557567596, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_logits/chosen": -1.0983970165252686, |
|
"eval_logits/rejected": -1.0450440645217896, |
|
"eval_logps/chosen": -417.00225830078125, |
|
"eval_logps/rejected": -415.34869384765625, |
|
"eval_loss": 0.04426228255033493, |
|
"eval_rewards/accuracies": 0.578000009059906, |
|
"eval_rewards/chosen": -0.02069696970283985, |
|
"eval_rewards/margins": 0.027416307479143143, |
|
"eval_rewards/rejected": -0.04811327904462814, |
|
"eval_runtime": 545.9497, |
|
"eval_samples_per_second": 3.663, |
|
"eval_steps_per_second": 0.916, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4577844161089614e-06, |
|
"logits/chosen": -1.0938787460327148, |
|
"logits/rejected": -1.0761361122131348, |
|
"logps/chosen": -252.89974975585938, |
|
"logps/rejected": -279.4630126953125, |
|
"loss": 0.0703, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.017448369413614273, |
|
"rewards/margins": 0.02003558911383152, |
|
"rewards/rejected": -0.03748396039009094, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3710808134621577e-06, |
|
"logits/chosen": -1.1595834493637085, |
|
"logits/rejected": -1.126773476600647, |
|
"logps/chosen": -298.33538818359375, |
|
"logps/rejected": -295.24468994140625, |
|
"loss": 0.0549, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.016742903739213943, |
|
"rewards/margins": 0.022577572613954544, |
|
"rewards/rejected": -0.03932047635316849, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2831675801707126e-06, |
|
"logits/chosen": -1.1363260746002197, |
|
"logits/rejected": -1.1643562316894531, |
|
"logps/chosen": -259.0823669433594, |
|
"logps/rejected": -262.0459899902344, |
|
"loss": 0.08, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.01677551493048668, |
|
"rewards/margins": 0.012358926236629486, |
|
"rewards/rejected": -0.029134441167116165, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.194166797377289e-06, |
|
"logits/chosen": -1.0814932584762573, |
|
"logits/rejected": -1.0023730993270874, |
|
"logps/chosen": -267.9959411621094, |
|
"logps/rejected": -241.27685546875, |
|
"loss": 0.086, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.014996061101555824, |
|
"rewards/margins": 0.004354935139417648, |
|
"rewards/rejected": -0.019350996240973473, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.104202056455501e-06, |
|
"logits/chosen": -1.1119335889816284, |
|
"logits/rejected": -1.036833643913269, |
|
"logps/chosen": -264.6695251464844, |
|
"logps/rejected": -294.46173095703125, |
|
"loss": 0.0732, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.00541292130947113, |
|
"rewards/margins": 0.018471624702215195, |
|
"rewards/rejected": -0.023884546011686325, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.013398287384144e-06, |
|
"logits/chosen": -1.100239872932434, |
|
"logits/rejected": -0.9833891987800598, |
|
"logps/chosen": -265.06292724609375, |
|
"logps/rejected": -251.05789184570312, |
|
"loss": 0.0769, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.00949514377862215, |
|
"rewards/margins": 0.011730840429663658, |
|
"rewards/rejected": -0.021225983276963234, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9218815852625717e-06, |
|
"logits/chosen": -1.0965584516525269, |
|
"logits/rejected": -1.1050448417663574, |
|
"logps/chosen": -245.2798309326172, |
|
"logps/rejected": -247.6891632080078, |
|
"loss": 0.0867, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.00036931521026417613, |
|
"rewards/margins": 0.01472887396812439, |
|
"rewards/rejected": -0.015098191797733307, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.829779035208113e-06, |
|
"logits/chosen": -1.1267783641815186, |
|
"logits/rejected": -1.0608749389648438, |
|
"logps/chosen": -276.13861083984375, |
|
"logps/rejected": -242.2423858642578, |
|
"loss": 0.0866, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.003509046044200659, |
|
"rewards/margins": 0.01866857148706913, |
|
"rewards/rejected": -0.015159524977207184, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.737218535878705e-06, |
|
"logits/chosen": -1.1088229417800903, |
|
"logits/rejected": -1.0114442110061646, |
|
"logps/chosen": -264.47747802734375, |
|
"logps/rejected": -256.66265869140625, |
|
"loss": 0.0697, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.0013200236717239022, |
|
"rewards/margins": 0.02153395116329193, |
|
"rewards/rejected": -0.022853974252939224, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.64432862186579e-06, |
|
"logits/chosen": -1.112363338470459, |
|
"logits/rejected": -1.0634427070617676, |
|
"logps/chosen": -263.20428466796875, |
|
"logps/rejected": -262.7369079589844, |
|
"loss": 0.068, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.0034271120093762875, |
|
"rewards/margins": 0.013932084664702415, |
|
"rewards/rejected": -0.01735919900238514, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_logits/chosen": -1.0329285860061646, |
|
"eval_logits/rejected": -0.9790877103805542, |
|
"eval_logps/chosen": -402.9732360839844, |
|
"eval_logps/rejected": -399.08111572265625, |
|
"eval_loss": 0.04319905489683151, |
|
"eval_rewards/accuracies": 0.5954999923706055, |
|
"eval_rewards/chosen": -0.006668027024716139, |
|
"eval_rewards/margins": 0.025177694857120514, |
|
"eval_rewards/rejected": -0.03184572234749794, |
|
"eval_runtime": 545.9259, |
|
"eval_samples_per_second": 3.664, |
|
"eval_steps_per_second": 0.916, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.551238285204126e-06, |
|
"logits/chosen": -1.0819242000579834, |
|
"logits/rejected": -1.0868195295333862, |
|
"logps/chosen": -204.0397186279297, |
|
"logps/rejected": -212.2799835205078, |
|
"loss": 0.0808, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.003992350306361914, |
|
"rewards/margins": 0.010435246862471104, |
|
"rewards/rejected": -0.01442759484052658, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4580767962463688e-06, |
|
"logits/chosen": -1.1004348993301392, |
|
"logits/rejected": -1.0923566818237305, |
|
"logps/chosen": -287.0587463378906, |
|
"logps/rejected": -295.1195373535156, |
|
"loss": 0.0783, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.005773237906396389, |
|
"rewards/margins": 0.016825079917907715, |
|
"rewards/rejected": -0.02259831875562668, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3649735241511546e-06, |
|
"logits/chosen": -1.025914192199707, |
|
"logits/rejected": -1.0844639539718628, |
|
"logps/chosen": -233.93783569335938, |
|
"logps/rejected": -278.87310791015625, |
|
"loss": 0.0701, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0003628075937740505, |
|
"rewards/margins": 0.019406834617257118, |
|
"rewards/rejected": -0.019044026732444763, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2720577572339914e-06, |
|
"logits/chosen": -1.0895086526870728, |
|
"logits/rejected": -0.9644759297370911, |
|
"logps/chosen": -313.9569396972656, |
|
"logps/rejected": -267.2975158691406, |
|
"loss": 0.0828, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.009684056974947453, |
|
"rewards/margins": 0.013868686743080616, |
|
"rewards/rejected": -0.02355274185538292, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1794585234303995e-06, |
|
"logits/chosen": -1.039159893989563, |
|
"logits/rejected": -1.0821508169174194, |
|
"logps/chosen": -257.9473571777344, |
|
"logps/rejected": -274.3190002441406, |
|
"loss": 0.0743, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.0062354551628232, |
|
"rewards/margins": 0.01928626373410225, |
|
"rewards/rejected": -0.025521719828248024, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0873044111206407e-06, |
|
"logits/chosen": -1.0668865442276, |
|
"logits/rejected": -1.0260220766067505, |
|
"logps/chosen": -271.9998474121094, |
|
"logps/rejected": -282.83038330078125, |
|
"loss": 0.0714, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.0038192705251276493, |
|
"rewards/margins": 0.019924623891711235, |
|
"rewards/rejected": -0.016105355694890022, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9957233905648293e-06, |
|
"logits/chosen": -1.054386019706726, |
|
"logits/rejected": -1.042152762413025, |
|
"logps/chosen": -284.9267272949219, |
|
"logps/rejected": -249.73110961914062, |
|
"loss": 0.0739, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.0018435310339555144, |
|
"rewards/margins": 0.013795648701488972, |
|
"rewards/rejected": -0.011952118948101997, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.904842636196402e-06, |
|
"logits/chosen": -1.027489185333252, |
|
"logits/rejected": -1.0255085229873657, |
|
"logps/chosen": -250.985107421875, |
|
"logps/rejected": -246.9531707763672, |
|
"loss": 0.0877, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.00342792016454041, |
|
"rewards/margins": 0.010951442644000053, |
|
"rewards/rejected": -0.01437936257570982, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.814788350020726e-06, |
|
"logits/chosen": -1.0244547128677368, |
|
"logits/rejected": -1.0658903121948242, |
|
"logps/chosen": -295.65191650390625, |
|
"logps/rejected": -290.0276184082031, |
|
"loss": 0.0697, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.00019127638370264322, |
|
"rewards/margins": 0.02073330618441105, |
|
"rewards/rejected": -0.020924581214785576, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.725685586364051e-06, |
|
"logits/chosen": -1.0251004695892334, |
|
"logits/rejected": -1.0600395202636719, |
|
"logps/chosen": -244.59384155273438, |
|
"logps/rejected": -243.28256225585938, |
|
"loss": 0.0847, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.001342198345810175, |
|
"rewards/margins": 0.01429178100079298, |
|
"rewards/rejected": -0.015633979812264442, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/chosen": -1.0363810062408447, |
|
"eval_logits/rejected": -0.9837189316749573, |
|
"eval_logps/chosen": -401.28790283203125, |
|
"eval_logps/rejected": -398.47442626953125, |
|
"eval_loss": 0.042725615203380585, |
|
"eval_rewards/accuracies": 0.5945000052452087, |
|
"eval_rewards/chosen": -0.0049826642498373985, |
|
"eval_rewards/margins": 0.026256347075104713, |
|
"eval_rewards/rejected": -0.031239010393619537, |
|
"eval_runtime": 546.1434, |
|
"eval_samples_per_second": 3.662, |
|
"eval_steps_per_second": 0.916, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6376580782162172e-06, |
|
"logits/chosen": -1.03734290599823, |
|
"logits/rejected": -0.9538782238960266, |
|
"logps/chosen": -302.88079833984375, |
|
"logps/rejected": -275.7502136230469, |
|
"loss": 0.07, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.003554628463461995, |
|
"rewards/margins": 0.016584644094109535, |
|
"rewards/rejected": -0.020139271393418312, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.550828065408227e-06, |
|
"logits/chosen": -1.0758119821548462, |
|
"logits/rejected": -0.9793124198913574, |
|
"logps/chosen": -256.09783935546875, |
|
"logps/rejected": -237.6195831298828, |
|
"loss": 0.0857, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.004253728315234184, |
|
"rewards/margins": 0.020320799201726913, |
|
"rewards/rejected": -0.024574527516961098, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4653161248633053e-06, |
|
"logits/chosen": -1.1112511157989502, |
|
"logits/rejected": -1.0120253562927246, |
|
"logps/chosen": -217.60305786132812, |
|
"logps/rejected": -254.78884887695312, |
|
"loss": 0.099, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.007809498347342014, |
|
"rewards/margins": 0.020094871520996094, |
|
"rewards/rejected": -0.027904370799660683, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.381241003157162e-06, |
|
"logits/chosen": -1.1298284530639648, |
|
"logits/rejected": -1.0621023178100586, |
|
"logps/chosen": -286.02459716796875, |
|
"logps/rejected": -299.3070373535156, |
|
"loss": 0.0677, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.011842104606330395, |
|
"rewards/margins": 0.014377683401107788, |
|
"rewards/rejected": -0.026219788938760757, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.298719451619979e-06, |
|
"logits/chosen": -1.0666725635528564, |
|
"logits/rejected": -1.0321277379989624, |
|
"logps/chosen": -300.1710205078125, |
|
"logps/rejected": -384.95751953125, |
|
"loss": 0.073, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0033969897776842117, |
|
"rewards/margins": 0.039332348853349686, |
|
"rewards/rejected": -0.04272934049367905, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2178660642091036e-06, |
|
"logits/chosen": -1.062753677368164, |
|
"logits/rejected": -1.034977674484253, |
|
"logps/chosen": -275.3869934082031, |
|
"logps/rejected": -275.52667236328125, |
|
"loss": 0.0798, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.0005760884960182011, |
|
"rewards/margins": 0.0214390866458416, |
|
"rewards/rejected": -0.022015176713466644, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1387931183775821e-06, |
|
"logits/chosen": -1.072608470916748, |
|
"logits/rejected": -1.0768264532089233, |
|
"logps/chosen": -300.6933288574219, |
|
"logps/rejected": -286.68951416015625, |
|
"loss": 0.0876, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.005371665116399527, |
|
"rewards/margins": 0.01708284579217434, |
|
"rewards/rejected": -0.022454511374235153, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.061610419159532e-06, |
|
"logits/chosen": -1.1137611865997314, |
|
"logits/rejected": -1.0089493989944458, |
|
"logps/chosen": -284.4278259277344, |
|
"logps/rejected": -263.4560546875, |
|
"loss": 0.0803, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.0019712348002940416, |
|
"rewards/margins": 0.015994885936379433, |
|
"rewards/rejected": -0.017966121435165405, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.864251466888364e-07, |
|
"logits/chosen": -1.0998120307922363, |
|
"logits/rejected": -1.0427272319793701, |
|
"logps/chosen": -273.5634460449219, |
|
"logps/rejected": -273.6974792480469, |
|
"loss": 0.0779, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.00520918658003211, |
|
"rewards/margins": 0.018212206661701202, |
|
"rewards/rejected": -0.02342139557003975, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.133417073629288e-07, |
|
"logits/chosen": -1.0788428783416748, |
|
"logits/rejected": -1.0710010528564453, |
|
"logps/chosen": -295.2659912109375, |
|
"logps/rejected": -301.9012145996094, |
|
"loss": 0.0519, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.006222005933523178, |
|
"rewards/margins": 0.023821452632546425, |
|
"rewards/rejected": -0.030043456703424454, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_logits/chosen": -1.0359783172607422, |
|
"eval_logits/rejected": -0.9871743321418762, |
|
"eval_logps/chosen": -404.5330505371094, |
|
"eval_logps/rejected": -404.9790954589844, |
|
"eval_loss": 0.04230288788676262, |
|
"eval_rewards/accuracies": 0.590499997138977, |
|
"eval_rewards/chosen": -0.008227824233472347, |
|
"eval_rewards/margins": 0.02951584756374359, |
|
"eval_rewards/rejected": -0.03774367272853851, |
|
"eval_runtime": 545.9714, |
|
"eval_samples_per_second": 3.663, |
|
"eval_steps_per_second": 0.916, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.424615888583332e-07, |
|
"logits/chosen": -1.0752325057983398, |
|
"logits/rejected": -1.005172848701477, |
|
"logps/chosen": -263.06549072265625, |
|
"logps/rejected": -263.9055480957031, |
|
"loss": 0.0702, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.0012165942462161183, |
|
"rewards/margins": 0.0245666466653347, |
|
"rewards/rejected": -0.025783240795135498, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.738832191993092e-07, |
|
"logits/chosen": -1.0686012506484985, |
|
"logits/rejected": -1.0680710077285767, |
|
"logps/chosen": -260.68853759765625, |
|
"logps/rejected": -299.7347717285156, |
|
"loss": 0.0669, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.006182204931974411, |
|
"rewards/margins": 0.016528166830539703, |
|
"rewards/rejected": -0.022710371762514114, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.077018300752917e-07, |
|
"logits/chosen": -1.1110173463821411, |
|
"logits/rejected": -1.0842745304107666, |
|
"logps/chosen": -270.8677673339844, |
|
"logps/rejected": -273.29339599609375, |
|
"loss": 0.07, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.0033080249559134245, |
|
"rewards/margins": 0.02419392392039299, |
|
"rewards/rejected": -0.027501946315169334, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.440093245969342e-07, |
|
"logits/chosen": -1.1270530223846436, |
|
"logits/rejected": -1.0608434677124023, |
|
"logps/chosen": -296.01556396484375, |
|
"logps/rejected": -293.2162170410156, |
|
"loss": 0.0753, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.004587044008076191, |
|
"rewards/margins": 0.017461195588111877, |
|
"rewards/rejected": -0.022048238664865494, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.828941496744075e-07, |
|
"logits/chosen": -1.1132383346557617, |
|
"logits/rejected": -1.0976136922836304, |
|
"logps/chosen": -258.3694763183594, |
|
"logps/rejected": -271.54095458984375, |
|
"loss": 0.0907, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.012257089838385582, |
|
"rewards/margins": 0.019415555521845818, |
|
"rewards/rejected": -0.0316726490855217, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.244411731951671e-07, |
|
"logits/chosen": -1.1247339248657227, |
|
"logits/rejected": -1.0423662662506104, |
|
"logps/chosen": -287.46160888671875, |
|
"logps/rejected": -290.23736572265625, |
|
"loss": 0.0781, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.008325648494064808, |
|
"rewards/margins": 0.021062636747956276, |
|
"rewards/rejected": -0.02938828244805336, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.6873156617173594e-07, |
|
"logits/chosen": -1.1069273948669434, |
|
"logits/rejected": -1.086753010749817, |
|
"logps/chosen": -298.63433837890625, |
|
"logps/rejected": -294.5317077636719, |
|
"loss": 0.0679, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.007812710478901863, |
|
"rewards/margins": 0.02419663593173027, |
|
"rewards/rejected": -0.032009344547986984, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1584269002318653e-07, |
|
"logits/chosen": -1.1254509687423706, |
|
"logits/rejected": -1.0976530313491821, |
|
"logps/chosen": -287.83282470703125, |
|
"logps/rejected": -289.3851623535156, |
|
"loss": 0.0738, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.009489515796303749, |
|
"rewards/margins": 0.02022114023566246, |
|
"rewards/rejected": -0.029710659757256508, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.658479891468258e-07, |
|
"logits/chosen": -1.1135103702545166, |
|
"logits/rejected": -1.0031986236572266, |
|
"logps/chosen": -292.74835205078125, |
|
"logps/rejected": -297.3404846191406, |
|
"loss": 0.0784, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.003799052909016609, |
|
"rewards/margins": 0.016090305522084236, |
|
"rewards/rejected": -0.019889358431100845, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.18816888929272e-07, |
|
"logits/chosen": -1.0912225246429443, |
|
"logits/rejected": -1.0440576076507568, |
|
"logps/chosen": -273.9382019042969, |
|
"logps/rejected": -279.7242126464844, |
|
"loss": 0.0742, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.00514855096116662, |
|
"rewards/margins": 0.019596170634031296, |
|
"rewards/rejected": -0.024744722992181778, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_logits/chosen": -1.055617332458496, |
|
"eval_logits/rejected": -1.0109219551086426, |
|
"eval_logps/chosen": -406.80352783203125, |
|
"eval_logps/rejected": -409.2462158203125, |
|
"eval_loss": 0.04217638820409775, |
|
"eval_rewards/accuracies": 0.6000000238418579, |
|
"eval_rewards/chosen": -0.010498268529772758, |
|
"eval_rewards/margins": 0.0315125547349453, |
|
"eval_rewards/rejected": -0.042010821402072906, |
|
"eval_runtime": 546.1165, |
|
"eval_samples_per_second": 3.662, |
|
"eval_steps_per_second": 0.916, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.748146993385484e-07, |
|
"logits/chosen": -1.118160367012024, |
|
"logits/rejected": -1.0629098415374756, |
|
"logps/chosen": -223.4706573486328, |
|
"logps/rejected": -247.4434356689453, |
|
"loss": 0.0963, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.0038195624947547913, |
|
"rewards/margins": 0.02616865560412407, |
|
"rewards/rejected": -0.029988214373588562, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3390252423108077e-07, |
|
"logits/chosen": -1.1013834476470947, |
|
"logits/rejected": -1.1045656204223633, |
|
"logps/chosen": -240.3532257080078, |
|
"logps/rejected": -258.44989013671875, |
|
"loss": 0.0812, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.002859788713976741, |
|
"rewards/margins": 0.029717862606048584, |
|
"rewards/rejected": -0.03257765248417854, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.961371764995243e-07, |
|
"logits/chosen": -1.1316301822662354, |
|
"logits/rejected": -1.1215277910232544, |
|
"logps/chosen": -266.54931640625, |
|
"logps/rejected": -260.0931701660156, |
|
"loss": 0.0782, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.006319983396679163, |
|
"rewards/margins": 0.014086413197219372, |
|
"rewards/rejected": -0.020406395196914673, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.61571099179261e-07, |
|
"logits/chosen": -1.1706424951553345, |
|
"logits/rejected": -1.0354385375976562, |
|
"logps/chosen": -278.2705993652344, |
|
"logps/rejected": -276.22967529296875, |
|
"loss": 0.0814, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.009933208115398884, |
|
"rewards/margins": 0.019498441368341446, |
|
"rewards/rejected": -0.029431650415062904, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3025229262312367e-07, |
|
"logits/chosen": -1.0535192489624023, |
|
"logits/rejected": -1.0249392986297607, |
|
"logps/chosen": -238.9296875, |
|
"logps/rejected": -260.4687805175781, |
|
"loss": 0.0732, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.0005777518963441253, |
|
"rewards/margins": 0.021599723026156425, |
|
"rewards/rejected": -0.022177476435899734, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0222424784546853e-07, |
|
"logits/chosen": -1.1237046718597412, |
|
"logits/rejected": -1.1744358539581299, |
|
"logps/chosen": -290.61187744140625, |
|
"logps/rejected": -289.50775146484375, |
|
"loss": 0.0682, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.00292245764285326, |
|
"rewards/margins": 0.022101474925875664, |
|
"rewards/rejected": -0.0250239335000515, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.752588612816553e-08, |
|
"logits/chosen": -1.0622873306274414, |
|
"logits/rejected": -1.047603964805603, |
|
"logps/chosen": -290.8896484375, |
|
"logps/rejected": -276.6893005371094, |
|
"loss": 0.0607, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.005569613538682461, |
|
"rewards/margins": 0.024782858788967133, |
|
"rewards/rejected": -0.030352476984262466, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.619150497236991e-08, |
|
"logits/chosen": -1.1374906301498413, |
|
"logits/rejected": -1.079929232597351, |
|
"logps/chosen": -223.29623413085938, |
|
"logps/rejected": -222.3146209716797, |
|
"loss": 0.0661, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.0003207772097084671, |
|
"rewards/margins": 0.016658511012792587, |
|
"rewards/rejected": -0.016979288309812546, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.825073047112743e-08, |
|
"logits/chosen": -1.0372432470321655, |
|
"logits/rejected": -1.0072650909423828, |
|
"logps/chosen": -297.5245056152344, |
|
"logps/rejected": -319.53076171875, |
|
"loss": 0.0776, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.007688297424465418, |
|
"rewards/margins": 0.022589299827814102, |
|
"rewards/rejected": -0.030277591198682785, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.372847616895685e-08, |
|
"logits/chosen": -1.096644639968872, |
|
"logits/rejected": -1.0533974170684814, |
|
"logps/chosen": -268.70343017578125, |
|
"logps/rejected": -255.3015594482422, |
|
"loss": 0.0768, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.003904106793925166, |
|
"rewards/margins": 0.022419685497879982, |
|
"rewards/rejected": -0.02632378600537777, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_logits/chosen": -1.0502386093139648, |
|
"eval_logits/rejected": -1.0050266981124878, |
|
"eval_logps/chosen": -406.3475341796875, |
|
"eval_logps/rejected": -408.7396545410156, |
|
"eval_loss": 0.04211420938372612, |
|
"eval_rewards/accuracies": 0.5929999947547913, |
|
"eval_rewards/chosen": -0.01004225667566061, |
|
"eval_rewards/margins": 0.031461965292692184, |
|
"eval_rewards/rejected": -0.04150421544909477, |
|
"eval_runtime": 545.8034, |
|
"eval_samples_per_second": 3.664, |
|
"eval_steps_per_second": 0.916, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.264490846553279e-08, |
|
"logits/chosen": -1.1268881559371948, |
|
"logits/rejected": -1.0797778367996216, |
|
"logps/chosen": -271.73004150390625, |
|
"logps/rejected": -292.3177490234375, |
|
"loss": 0.0679, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.008194219321012497, |
|
"rewards/margins": 0.016915880143642426, |
|
"rewards/rejected": -0.025110099464654922, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.015418611516165e-09, |
|
"logits/chosen": -1.13059401512146, |
|
"logits/rejected": -1.099302887916565, |
|
"logps/chosen": -252.9181365966797, |
|
"logps/rejected": -257.0358581542969, |
|
"loss": 0.0898, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.005635020788758993, |
|
"rewards/margins": 0.021289747208356857, |
|
"rewards/rejected": -0.026924768462777138, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.506013354186993e-10, |
|
"logits/chosen": -1.0815865993499756, |
|
"logits/rejected": -1.0594186782836914, |
|
"logps/chosen": -270.22955322265625, |
|
"logps/rejected": -268.7232360839844, |
|
"loss": 0.0661, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.0016765497857704759, |
|
"rewards/margins": 0.02203894779086113, |
|
"rewards/rejected": -0.020362399518489838, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 937, |
|
"total_flos": 0.0, |
|
"train_loss": 0.07849642387894454, |
|
"train_runtime": 13138.6455, |
|
"train_samples_per_second": 1.142, |
|
"train_steps_per_second": 0.071 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 937, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|