|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.954476479514415, |
|
"eval_steps": 250, |
|
"global_step": 2050, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.4390243902439025e-09, |
|
"logits/chosen": 23.83146095275879, |
|
"logits/rejected": 24.366979598999023, |
|
"logps/chosen": -513.1724243164062, |
|
"logps/rejected": -471.20977783203125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4390243902439023e-08, |
|
"logits/chosen": 25.113672256469727, |
|
"logits/rejected": 25.29496955871582, |
|
"logps/chosen": -440.1910400390625, |
|
"logps/rejected": -464.28997802734375, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4583333432674408, |
|
"rewards/chosen": 0.0020149427000433207, |
|
"rewards/margins": 0.00028653975459747016, |
|
"rewards/rejected": 0.0017284027999266982, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.878048780487805e-08, |
|
"logits/chosen": 24.36227035522461, |
|
"logits/rejected": 23.910043716430664, |
|
"logps/chosen": -433.28289794921875, |
|
"logps/rejected": -493.90667724609375, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.00046742885024286807, |
|
"rewards/margins": 0.0003720354288816452, |
|
"rewards/rejected": -0.0008394649485126138, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.317073170731706e-08, |
|
"logits/chosen": 24.743637084960938, |
|
"logits/rejected": 24.689483642578125, |
|
"logps/chosen": -507.70458984375, |
|
"logps/rejected": -543.5693969726562, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": 0.0006420022109523416, |
|
"rewards/margins": -0.0008240239694714546, |
|
"rewards/rejected": 0.001466025598347187, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.75609756097561e-08, |
|
"logits/chosen": 26.19744873046875, |
|
"logits/rejected": 25.904888153076172, |
|
"logps/chosen": -483.9391174316406, |
|
"logps/rejected": -522.8155517578125, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.000658182892948389, |
|
"rewards/margins": 0.0012660929933190346, |
|
"rewards/rejected": -0.0006079099839553237, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.219512195121951e-07, |
|
"logits/chosen": 25.00406265258789, |
|
"logits/rejected": 25.312259674072266, |
|
"logps/chosen": -505.9356384277344, |
|
"logps/rejected": -498.18035888671875, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": 0.0002137926931027323, |
|
"rewards/margins": -0.0007602882687933743, |
|
"rewards/rejected": 0.0009740809909999371, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4634146341463413e-07, |
|
"logits/chosen": 24.721242904663086, |
|
"logits/rejected": 24.36328887939453, |
|
"logps/chosen": -491.00518798828125, |
|
"logps/rejected": -569.5256958007812, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.0011952219065278769, |
|
"rewards/margins": -0.001965393777936697, |
|
"rewards/rejected": 0.000770171987824142, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.7073170731707317e-07, |
|
"logits/chosen": 24.990764617919922, |
|
"logits/rejected": 24.78268814086914, |
|
"logps/chosen": -418.36761474609375, |
|
"logps/rejected": -448.0746154785156, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.0008768116822466254, |
|
"rewards/margins": 0.0001256523682968691, |
|
"rewards/rejected": 0.0007511593285016716, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.951219512195122e-07, |
|
"logits/chosen": 25.365007400512695, |
|
"logits/rejected": 24.888202667236328, |
|
"logps/chosen": -414.1890563964844, |
|
"logps/rejected": -434.57550048828125, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0006678829668089747, |
|
"rewards/margins": -0.001990665215998888, |
|
"rewards/rejected": 0.0013227818999439478, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.195121951219512e-07, |
|
"logits/chosen": 25.361377716064453, |
|
"logits/rejected": 25.293743133544922, |
|
"logps/chosen": -452.96783447265625, |
|
"logps/rejected": -467.5086975097656, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0010917505715042353, |
|
"rewards/margins": 0.004853174090385437, |
|
"rewards/rejected": -0.0037614230532199144, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.439024390243902e-07, |
|
"logits/chosen": 24.833248138427734, |
|
"logits/rejected": 24.911867141723633, |
|
"logps/chosen": -463.259033203125, |
|
"logps/rejected": -451.12750244140625, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.001009153900668025, |
|
"rewards/margins": -0.000572516699321568, |
|
"rewards/rejected": 0.0015816707164049149, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.682926829268293e-07, |
|
"logits/chosen": 25.980321884155273, |
|
"logits/rejected": 26.036426544189453, |
|
"logps/chosen": -485.67852783203125, |
|
"logps/rejected": -501.48291015625, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.003403586568310857, |
|
"rewards/margins": 0.007840663194656372, |
|
"rewards/rejected": -0.004437076393514872, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.9268292682926825e-07, |
|
"logits/chosen": 24.954368591308594, |
|
"logits/rejected": 25.147586822509766, |
|
"logps/chosen": -493.1083984375, |
|
"logps/rejected": -499.5880432128906, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.001717461971566081, |
|
"rewards/margins": -0.0020078145898878574, |
|
"rewards/rejected": 0.00029035229817964137, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.170731707317073e-07, |
|
"logits/chosen": 25.536571502685547, |
|
"logits/rejected": 25.209754943847656, |
|
"logps/chosen": -443.41363525390625, |
|
"logps/rejected": -473.89483642578125, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0012768972665071487, |
|
"rewards/margins": -0.0017949879402294755, |
|
"rewards/rejected": 0.0005180907319299877, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.4146341463414634e-07, |
|
"logits/chosen": 24.778900146484375, |
|
"logits/rejected": 24.485666275024414, |
|
"logps/chosen": -463.8463439941406, |
|
"logps/rejected": -505.55657958984375, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.003415347309783101, |
|
"rewards/margins": -0.0018013190710917115, |
|
"rewards/rejected": -0.0016140276566147804, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.6585365853658536e-07, |
|
"logits/chosen": 26.186752319335938, |
|
"logits/rejected": 26.14641761779785, |
|
"logps/chosen": -454.46356201171875, |
|
"logps/rejected": -493.9950256347656, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.0016146504785865545, |
|
"rewards/margins": 0.0017917368095368147, |
|
"rewards/rejected": -0.00017708637460600585, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.902439024390244e-07, |
|
"logits/chosen": 26.05021095275879, |
|
"logits/rejected": 25.78249740600586, |
|
"logps/chosen": -446.4471740722656, |
|
"logps/rejected": -482.791015625, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.0032919864170253277, |
|
"rewards/margins": -0.0010836247820407152, |
|
"rewards/rejected": -0.002208361867815256, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.146341463414634e-07, |
|
"logits/chosen": 25.29595184326172, |
|
"logits/rejected": 24.819242477416992, |
|
"logps/chosen": -449.38006591796875, |
|
"logps/rejected": -485.6714782714844, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.004366643726825714, |
|
"rewards/margins": 0.0014269489329308271, |
|
"rewards/rejected": -0.00579359196126461, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.390243902439024e-07, |
|
"logits/chosen": 25.511014938354492, |
|
"logits/rejected": 25.20175552368164, |
|
"logps/chosen": -485.9794006347656, |
|
"logps/rejected": -547.3993530273438, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -9.034853428602219e-05, |
|
"rewards/margins": 0.007031626999378204, |
|
"rewards/rejected": -0.007121975068002939, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.634146341463415e-07, |
|
"logits/chosen": 25.762130737304688, |
|
"logits/rejected": 25.043359756469727, |
|
"logps/chosen": -436.8501892089844, |
|
"logps/rejected": -504.27337646484375, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.0031238270457834005, |
|
"rewards/margins": 0.0019209437305107713, |
|
"rewards/rejected": -0.0050447722896933556, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.878048780487804e-07, |
|
"logits/chosen": 25.300086975097656, |
|
"logits/rejected": 25.226686477661133, |
|
"logps/chosen": -411.12091064453125, |
|
"logps/rejected": -453.89599609375, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0008737426251173019, |
|
"rewards/margins": 0.0030270516872406006, |
|
"rewards/rejected": -0.0021533078979700804, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.999909394533081e-07, |
|
"logits/chosen": 25.390342712402344, |
|
"logits/rejected": 24.91876983642578, |
|
"logps/chosen": -444.7391052246094, |
|
"logps/rejected": -464.7679138183594, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.0024602802004665136, |
|
"rewards/margins": 0.011555373668670654, |
|
"rewards/rejected": -0.014015654101967812, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.99918459020214e-07, |
|
"logits/chosen": 25.847061157226562, |
|
"logits/rejected": 26.030590057373047, |
|
"logps/chosen": -463.7713928222656, |
|
"logps/rejected": -441.03118896484375, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.005838096607476473, |
|
"rewards/margins": 0.0016079202760010958, |
|
"rewards/rejected": -0.007446016184985638, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.997735191684404e-07, |
|
"logits/chosen": 24.9683837890625, |
|
"logits/rejected": 24.894073486328125, |
|
"logps/chosen": -421.6605529785156, |
|
"logps/rejected": -454.12445068359375, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.0041381316259503365, |
|
"rewards/margins": 0.0008476437069475651, |
|
"rewards/rejected": -0.004985774867236614, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.995561619207226e-07, |
|
"logits/chosen": 24.7789249420166, |
|
"logits/rejected": 24.977474212646484, |
|
"logps/chosen": -551.719482421875, |
|
"logps/rejected": -586.6744995117188, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.019014570862054825, |
|
"rewards/margins": 0.008881723508238792, |
|
"rewards/rejected": -0.027896294370293617, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.992664502959351e-07, |
|
"logits/chosen": 24.666425704956055, |
|
"logits/rejected": 24.653018951416016, |
|
"logps/chosen": -446.96844482421875, |
|
"logps/rejected": -463.4334411621094, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.006912347860634327, |
|
"rewards/margins": 0.005273967050015926, |
|
"rewards/rejected": -0.012186313979327679, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.989044682908178e-07, |
|
"logits/chosen": 25.226409912109375, |
|
"logits/rejected": 24.97530174255371, |
|
"logps/chosen": -491.82000732421875, |
|
"logps/rejected": -502.476318359375, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.005164532456547022, |
|
"rewards/margins": 0.008609605953097343, |
|
"rewards/rejected": -0.013774137012660503, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 4.984703208556244e-07, |
|
"logits/chosen": 24.473495483398438, |
|
"logits/rejected": 24.27614974975586, |
|
"logps/chosen": -405.4839782714844, |
|
"logps/rejected": -428.57958984375, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.004830534104257822, |
|
"rewards/margins": 0.00787128135561943, |
|
"rewards/rejected": -0.012701814994215965, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.979641338636934e-07, |
|
"logits/chosen": 25.67917823791504, |
|
"logits/rejected": 25.326581954956055, |
|
"logps/chosen": -449.4097595214844, |
|
"logps/rejected": -484.1585998535156, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.006435055285692215, |
|
"rewards/margins": 0.010736002586781979, |
|
"rewards/rejected": -0.017171058803796768, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.973860540749533e-07, |
|
"logits/chosen": 25.372289657592773, |
|
"logits/rejected": 25.274517059326172, |
|
"logps/chosen": -453.90704345703125, |
|
"logps/rejected": -508.42364501953125, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.017870599403977394, |
|
"rewards/margins": 0.010955670848488808, |
|
"rewards/rejected": -0.028826270252466202, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.967362490933723e-07, |
|
"logits/chosen": 24.464265823364258, |
|
"logits/rejected": 24.45247459411621, |
|
"logps/chosen": -483.70562744140625, |
|
"logps/rejected": -502.24139404296875, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.013566325418651104, |
|
"rewards/margins": 0.0146838603541255, |
|
"rewards/rejected": -0.028250187635421753, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.960149073183643e-07, |
|
"logits/chosen": 24.628841400146484, |
|
"logits/rejected": 24.8803768157959, |
|
"logps/chosen": -490.34234619140625, |
|
"logps/rejected": -478.694580078125, |
|
"loss": 0.6866, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.01605142280459404, |
|
"rewards/margins": 0.00866149552166462, |
|
"rewards/rejected": -0.02471291646361351, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.95222237890166e-07, |
|
"logits/chosen": 24.773128509521484, |
|
"logits/rejected": 25.139238357543945, |
|
"logps/chosen": -479.4823303222656, |
|
"logps/rejected": -452.5008850097656, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.013470214791595936, |
|
"rewards/margins": 0.006181957200169563, |
|
"rewards/rejected": -0.019652169197797775, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.943584706292005e-07, |
|
"logits/chosen": 23.42044448852539, |
|
"logits/rejected": 23.604700088500977, |
|
"logps/chosen": -449.1239318847656, |
|
"logps/rejected": -457.1341247558594, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.015130296349525452, |
|
"rewards/margins": 0.01666988618671894, |
|
"rewards/rejected": -0.03180018067359924, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.934238559694447e-07, |
|
"logits/chosen": 24.050968170166016, |
|
"logits/rejected": 23.906604766845703, |
|
"logps/chosen": -468.267333984375, |
|
"logps/rejected": -493.30780029296875, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.024675091728568077, |
|
"rewards/margins": 0.010691693052649498, |
|
"rewards/rejected": -0.035366784781217575, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.924186648858207e-07, |
|
"logits/chosen": 24.65400505065918, |
|
"logits/rejected": 24.585697174072266, |
|
"logps/chosen": -400.6718444824219, |
|
"logps/rejected": -425.316650390625, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.009073630906641483, |
|
"rewards/margins": 0.01387846004217863, |
|
"rewards/rejected": -0.022952087223529816, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.913431888156309e-07, |
|
"logits/chosen": 23.99672508239746, |
|
"logits/rejected": 24.127235412597656, |
|
"logps/chosen": -453.24615478515625, |
|
"logps/rejected": -438.322509765625, |
|
"loss": 0.6853, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.027061685919761658, |
|
"rewards/margins": 0.006015921477228403, |
|
"rewards/rejected": -0.03307760879397392, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.901977395740619e-07, |
|
"logits/chosen": 25.19208526611328, |
|
"logits/rejected": 24.945858001708984, |
|
"logps/chosen": -420.7489318847656, |
|
"logps/rejected": -421.52813720703125, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.013960194773972034, |
|
"rewards/margins": 0.031657829880714417, |
|
"rewards/rejected": -0.045618023723363876, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.889826492637781e-07, |
|
"logits/chosen": 24.140026092529297, |
|
"logits/rejected": 24.18136978149414, |
|
"logps/chosen": -455.3021545410156, |
|
"logps/rejected": -455.71685791015625, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.02444566786289215, |
|
"rewards/margins": 0.021536489948630333, |
|
"rewards/rejected": -0.04598215967416763, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 4.876982701786351e-07, |
|
"logits/chosen": 24.755725860595703, |
|
"logits/rejected": 24.589107513427734, |
|
"logps/chosen": -481.8275451660156, |
|
"logps/rejected": -526.6813354492188, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.02834920585155487, |
|
"rewards/margins": 0.04028704762458801, |
|
"rewards/rejected": -0.06863625347614288, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 4.863449747015383e-07, |
|
"logits/chosen": 24.02549171447754, |
|
"logits/rejected": 23.976415634155273, |
|
"logps/chosen": -439.884765625, |
|
"logps/rejected": -450.75079345703125, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.031968437135219574, |
|
"rewards/margins": 0.012536533176898956, |
|
"rewards/rejected": -0.04450497403740883, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 4.849231551964771e-07, |
|
"logits/chosen": 23.458415985107422, |
|
"logits/rejected": 23.864471435546875, |
|
"logps/chosen": -444.437744140625, |
|
"logps/rejected": -470.9718322753906, |
|
"loss": 0.6817, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.027122044935822487, |
|
"rewards/margins": 0.01693027839064598, |
|
"rewards/rejected": -0.04405232518911362, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.834332238947655e-07, |
|
"logits/chosen": 25.05409812927246, |
|
"logits/rejected": 25.178359985351562, |
|
"logps/chosen": -397.0162658691406, |
|
"logps/rejected": -431.53997802734375, |
|
"loss": 0.6798, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.016871152445673943, |
|
"rewards/margins": 0.027827546000480652, |
|
"rewards/rejected": -0.044698696583509445, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.818756127755237e-07, |
|
"logits/chosen": 24.50407600402832, |
|
"logits/rejected": 24.129384994506836, |
|
"logps/chosen": -440.77374267578125, |
|
"logps/rejected": -477.9395446777344, |
|
"loss": 0.6728, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.027348514646291733, |
|
"rewards/margins": 0.043705716729164124, |
|
"rewards/rejected": -0.07105423510074615, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.802507734404325e-07, |
|
"logits/chosen": 24.7203426361084, |
|
"logits/rejected": 24.716331481933594, |
|
"logps/chosen": -503.48583984375, |
|
"logps/rejected": -516.9214477539062, |
|
"loss": 0.6794, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.025318074971437454, |
|
"rewards/margins": 0.05121081322431564, |
|
"rewards/rejected": -0.0765288919210434, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 4.785591769828005e-07, |
|
"logits/chosen": 24.545543670654297, |
|
"logits/rejected": 24.529743194580078, |
|
"logps/chosen": -462.376953125, |
|
"logps/rejected": -532.814453125, |
|
"loss": 0.6778, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.036473408341407776, |
|
"rewards/margins": 0.03656883165240288, |
|
"rewards/rejected": -0.07304224371910095, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 4.76801313850978e-07, |
|
"logits/chosen": 22.99500274658203, |
|
"logits/rejected": 22.959396362304688, |
|
"logps/chosen": -456.98760986328125, |
|
"logps/rejected": -483.37432861328125, |
|
"loss": 0.6748, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.036379098892211914, |
|
"rewards/margins": 0.05310980603098869, |
|
"rewards/rejected": -0.0894889086484909, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.749776937061606e-07, |
|
"logits/chosen": 24.91106414794922, |
|
"logits/rejected": 24.693490982055664, |
|
"logps/chosen": -492.05322265625, |
|
"logps/rejected": -560.6348876953125, |
|
"loss": 0.6703, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.06551917642354965, |
|
"rewards/margins": 0.06626399606466293, |
|
"rewards/rejected": -0.13178317248821259, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.730888452746222e-07, |
|
"logits/chosen": 23.304880142211914, |
|
"logits/rejected": 23.373096466064453, |
|
"logps/chosen": -489.0440979003906, |
|
"logps/rejected": -516.6759033203125, |
|
"loss": 0.6716, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.05236157774925232, |
|
"rewards/margins": 0.04353173449635506, |
|
"rewards/rejected": -0.09589330852031708, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 4.711353161944198e-07, |
|
"logits/chosen": 24.48396873474121, |
|
"logits/rejected": 24.153030395507812, |
|
"logps/chosen": -482.59100341796875, |
|
"logps/rejected": -511.71112060546875, |
|
"loss": 0.6692, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.06634119898080826, |
|
"rewards/margins": 0.05835053324699402, |
|
"rewards/rejected": -0.12469172477722168, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.6911767285661583e-07, |
|
"logits/chosen": 23.99973487854004, |
|
"logits/rejected": 23.886028289794922, |
|
"logps/chosen": -451.34564208984375, |
|
"logps/rejected": -481.80126953125, |
|
"loss": 0.6719, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.04214207082986832, |
|
"rewards/margins": 0.0447273924946785, |
|
"rewards/rejected": -0.08686945587396622, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 4.6703650024106324e-07, |
|
"logits/chosen": 24.31854248046875, |
|
"logits/rejected": 24.71249771118164, |
|
"logps/chosen": -512.5477294921875, |
|
"logps/rejected": -497.3389587402344, |
|
"loss": 0.673, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.05323835089802742, |
|
"rewards/margins": 0.03783535957336426, |
|
"rewards/rejected": -0.09107370674610138, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.6489240174680026e-07, |
|
"logits/chosen": 23.387109756469727, |
|
"logits/rejected": 23.42694664001465, |
|
"logps/chosen": -469.442626953125, |
|
"logps/rejected": -464.22137451171875, |
|
"loss": 0.6719, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.06812478601932526, |
|
"rewards/margins": 0.04352138563990593, |
|
"rewards/rejected": -0.11164617538452148, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.626859990171067e-07, |
|
"logits/chosen": 22.03474998474121, |
|
"logits/rejected": 22.551021575927734, |
|
"logps/chosen": -503.203369140625, |
|
"logps/rejected": -512.3323364257812, |
|
"loss": 0.6698, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.07380495220422745, |
|
"rewards/margins": 0.0384240485727787, |
|
"rewards/rejected": -0.11222900450229645, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 4.604179317592686e-07, |
|
"logits/chosen": 22.982210159301758, |
|
"logits/rejected": 23.15829086303711, |
|
"logps/chosen": -482.0206604003906, |
|
"logps/rejected": -482.66864013671875, |
|
"loss": 0.6695, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.07758830487728119, |
|
"rewards/margins": 0.05295907333493233, |
|
"rewards/rejected": -0.13054737448692322, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 4.5808885755910673e-07, |
|
"logits/chosen": 23.076644897460938, |
|
"logits/rejected": 23.11397361755371, |
|
"logps/chosen": -428.58734130859375, |
|
"logps/rejected": -437.57757568359375, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.05579688400030136, |
|
"rewards/margins": 0.037121035158634186, |
|
"rewards/rejected": -0.09291792660951614, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.5569945169032164e-07, |
|
"logits/chosen": 23.258991241455078, |
|
"logits/rejected": 23.74163818359375, |
|
"logps/chosen": -438.8357849121094, |
|
"logps/rejected": -455.6856384277344, |
|
"loss": 0.6698, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.06992115080356598, |
|
"rewards/margins": 0.06130353733897209, |
|
"rewards/rejected": -0.13122470676898956, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.532504069187094e-07, |
|
"logits/chosen": 23.573583602905273, |
|
"logits/rejected": 23.63486099243164, |
|
"logps/chosen": -433.8453063964844, |
|
"logps/rejected": -453.99713134765625, |
|
"loss": 0.672, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.05322835594415665, |
|
"rewards/margins": 0.038681793957948685, |
|
"rewards/rejected": -0.09191014617681503, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.507424333013069e-07, |
|
"logits/chosen": 22.418657302856445, |
|
"logits/rejected": 22.448223114013672, |
|
"logps/chosen": -407.6749267578125, |
|
"logps/rejected": -427.78485107421875, |
|
"loss": 0.668, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.08355311304330826, |
|
"rewards/margins": 0.053498029708862305, |
|
"rewards/rejected": -0.13705115020275116, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 4.481762579805232e-07, |
|
"logits/chosen": 22.880319595336914, |
|
"logits/rejected": 22.28330421447754, |
|
"logps/chosen": -441.55804443359375, |
|
"logps/rejected": -507.30206298828125, |
|
"loss": 0.6648, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.08028487116098404, |
|
"rewards/margins": 0.10107441991567612, |
|
"rewards/rejected": -0.18135927617549896, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 4.455526249733178e-07, |
|
"logits/chosen": 22.544607162475586, |
|
"logits/rejected": 22.759769439697266, |
|
"logps/chosen": -467.66009521484375, |
|
"logps/rejected": -458.80419921875, |
|
"loss": 0.6687, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.06196471303701401, |
|
"rewards/margins": 0.0634593591094017, |
|
"rewards/rejected": -0.12542405724525452, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.4287229495548573e-07, |
|
"logits/chosen": 23.092634201049805, |
|
"logits/rejected": 22.90291976928711, |
|
"logps/chosen": -462.6261291503906, |
|
"logps/rejected": -483.6346130371094, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.09866807609796524, |
|
"rewards/margins": 0.11319071054458618, |
|
"rewards/rejected": -0.21185874938964844, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.4013604504111347e-07, |
|
"logits/chosen": 23.19097328186035, |
|
"logits/rejected": 22.682292938232422, |
|
"logps/chosen": -420.0496520996094, |
|
"logps/rejected": -469.2996520996094, |
|
"loss": 0.6614, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.059682805091142654, |
|
"rewards/margins": 0.0631704181432724, |
|
"rewards/rejected": -0.12285321950912476, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 4.3734466855726823e-07, |
|
"logits/chosen": 23.40199089050293, |
|
"logits/rejected": 23.664241790771484, |
|
"logps/chosen": -521.8033447265625, |
|
"logps/rejected": -551.7887573242188, |
|
"loss": 0.658, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.10334376245737076, |
|
"rewards/margins": 0.07836399972438812, |
|
"rewards/rejected": -0.18170776963233948, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 4.344989748139873e-07, |
|
"logits/chosen": 22.226797103881836, |
|
"logits/rejected": 21.844837188720703, |
|
"logps/chosen": -438.06756591796875, |
|
"logps/rejected": -524.7212524414062, |
|
"loss": 0.6528, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.08786562830209732, |
|
"rewards/margins": 0.17397987842559814, |
|
"rewards/rejected": -0.26184552907943726, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 4.315997888696322e-07, |
|
"logits/chosen": 22.644672393798828, |
|
"logits/rejected": 22.466157913208008, |
|
"logps/chosen": -456.107666015625, |
|
"logps/rejected": -488.3030700683594, |
|
"loss": 0.6502, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.09486567229032516, |
|
"rewards/margins": 0.18466800451278687, |
|
"rewards/rejected": -0.2795336842536926, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 4.2864795129167865e-07, |
|
"logits/chosen": 21.293453216552734, |
|
"logits/rejected": 21.037721633911133, |
|
"logps/chosen": -492.1611328125, |
|
"logps/rejected": -532.6552734375, |
|
"loss": 0.6516, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.148245707154274, |
|
"rewards/margins": 0.15969568490982056, |
|
"rewards/rejected": -0.30794137716293335, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 4.25644317913008e-07, |
|
"logits/chosen": 22.722232818603516, |
|
"logits/rejected": 22.16741943359375, |
|
"logps/chosen": -501.4042053222656, |
|
"logps/rejected": -566.7221069335938, |
|
"loss": 0.6456, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.13956955075263977, |
|
"rewards/margins": 0.29204845428466797, |
|
"rewards/rejected": -0.4316180348396301, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 4.2258975958377437e-07, |
|
"logits/chosen": 22.7330379486084, |
|
"logits/rejected": 22.67351722717285, |
|
"logps/chosen": -489.033935546875, |
|
"logps/rejected": -503.78778076171875, |
|
"loss": 0.6467, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.12569200992584229, |
|
"rewards/margins": 0.0982416495680809, |
|
"rewards/rejected": -0.22393366694450378, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 4.194851619189169e-07, |
|
"logits/chosen": 21.356544494628906, |
|
"logits/rejected": 20.965524673461914, |
|
"logps/chosen": -456.1627502441406, |
|
"logps/rejected": -518.4822387695312, |
|
"loss": 0.6516, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.12254482507705688, |
|
"rewards/margins": 0.10786397755146027, |
|
"rewards/rejected": -0.23040878772735596, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 4.163314250413913e-07, |
|
"logits/chosen": 22.082752227783203, |
|
"logits/rejected": 22.4614315032959, |
|
"logps/chosen": -464.14080810546875, |
|
"logps/rejected": -480.8935546875, |
|
"loss": 0.6588, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.137410506606102, |
|
"rewards/margins": 0.1056610569357872, |
|
"rewards/rejected": -0.24307158589363098, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 4.131294633211954e-07, |
|
"logits/chosen": 22.201583862304688, |
|
"logits/rejected": 22.151172637939453, |
|
"logps/chosen": -499.27032470703125, |
|
"logps/rejected": -530.463623046875, |
|
"loss": 0.6526, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.1417187750339508, |
|
"rewards/margins": 0.10248730331659317, |
|
"rewards/rejected": -0.24420607089996338, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 4.098802051102635e-07, |
|
"logits/chosen": 21.357421875, |
|
"logits/rejected": 21.796932220458984, |
|
"logps/chosen": -467.2061462402344, |
|
"logps/rejected": -489.28143310546875, |
|
"loss": 0.65, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.14098785817623138, |
|
"rewards/margins": 0.11554199457168579, |
|
"rewards/rejected": -0.25652986764907837, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 4.065845924733076e-07, |
|
"logits/chosen": 21.877614974975586, |
|
"logits/rejected": 21.869991302490234, |
|
"logps/chosen": -525.2653198242188, |
|
"logps/rejected": -533.6386108398438, |
|
"loss": 0.65, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.14704535901546478, |
|
"rewards/margins": 0.15783420205116272, |
|
"rewards/rejected": -0.3048795163631439, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 4.0324358091468226e-07, |
|
"logits/chosen": 21.090862274169922, |
|
"logits/rejected": 20.943960189819336, |
|
"logps/chosen": -482.4337463378906, |
|
"logps/rejected": -536.8486328125, |
|
"loss": 0.6535, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.21552202105522156, |
|
"rewards/margins": 0.1812928169965744, |
|
"rewards/rejected": -0.39681482315063477, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 3.99858139101353e-07, |
|
"logits/chosen": 21.675159454345703, |
|
"logits/rejected": 21.479019165039062, |
|
"logps/chosen": -580.7256469726562, |
|
"logps/rejected": -579.4886474609375, |
|
"loss": 0.644, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.1938856542110443, |
|
"rewards/margins": 0.13946060836315155, |
|
"rewards/rejected": -0.33334627747535706, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 3.964292485820487e-07, |
|
"logits/chosen": 21.661422729492188, |
|
"logits/rejected": 21.30933380126953, |
|
"logps/chosen": -474.04315185546875, |
|
"logps/rejected": -520.8201904296875, |
|
"loss": 0.6441, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.13717155158519745, |
|
"rewards/margins": 0.17133645713329315, |
|
"rewards/rejected": -0.3085080087184906, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 3.929579035026788e-07, |
|
"logits/chosen": 22.402542114257812, |
|
"logits/rejected": 22.312419891357422, |
|
"logps/chosen": -430.6495056152344, |
|
"logps/rejected": -463.7230529785156, |
|
"loss": 0.6431, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.07999514788389206, |
|
"rewards/margins": 0.18375879526138306, |
|
"rewards/rejected": -0.2637539505958557, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 3.8944511031809865e-07, |
|
"logits/chosen": 21.463491439819336, |
|
"logits/rejected": 21.453006744384766, |
|
"logps/chosen": -502.7759704589844, |
|
"logps/rejected": -547.2340087890625, |
|
"loss": 0.6575, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.1663341373205185, |
|
"rewards/margins": 0.12559418380260468, |
|
"rewards/rejected": -0.29192835092544556, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 3.858918875003053e-07, |
|
"logits/chosen": 21.34784698486328, |
|
"logits/rejected": 21.8105525970459, |
|
"logps/chosen": -517.6514282226562, |
|
"logps/rejected": -540.4078979492188, |
|
"loss": 0.6481, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.16342517733573914, |
|
"rewards/margins": 0.11599358171224594, |
|
"rewards/rejected": -0.2794187664985657, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 3.8229926524315013e-07, |
|
"logits/chosen": 22.361557006835938, |
|
"logits/rejected": 22.051918029785156, |
|
"logps/chosen": -479.0370178222656, |
|
"logps/rejected": -508.67523193359375, |
|
"loss": 0.6509, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.17205052077770233, |
|
"rewards/margins": 0.13092947006225586, |
|
"rewards/rejected": -0.3029800057411194, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 3.7866828516365223e-07, |
|
"logits/chosen": 21.549245834350586, |
|
"logits/rejected": 21.51742172241211, |
|
"logps/chosen": -488.44146728515625, |
|
"logps/rejected": -505.7699279785156, |
|
"loss": 0.6473, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.1810511201620102, |
|
"rewards/margins": 0.11019665002822876, |
|
"rewards/rejected": -0.29124775528907776, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": 21.30332374572754, |
|
"logits/rejected": 21.18606185913086, |
|
"logps/chosen": -468.67852783203125, |
|
"logps/rejected": -542.9542236328125, |
|
"loss": 0.6399, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.14327475428581238, |
|
"rewards/margins": 0.19075681269168854, |
|
"rewards/rejected": -0.3340315818786621, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 3.712954733063284e-07, |
|
"logits/chosen": 20.62077522277832, |
|
"logits/rejected": 20.85430908203125, |
|
"logps/chosen": -418.8779296875, |
|
"logps/rejected": -455.548095703125, |
|
"loss": 0.6344, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.11488916724920273, |
|
"rewards/margins": 0.23587051033973694, |
|
"rewards/rejected": -0.3507596552371979, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 3.6755577914436054e-07, |
|
"logits/chosen": 21.663427352905273, |
|
"logits/rejected": 21.377941131591797, |
|
"logps/chosen": -541.1193237304688, |
|
"logps/rejected": -579.5726318359375, |
|
"loss": 0.6321, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.19904150068759918, |
|
"rewards/margins": 0.19058963656425476, |
|
"rewards/rejected": -0.3896311819553375, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 3.637820017720022e-07, |
|
"logits/chosen": 20.406190872192383, |
|
"logits/rejected": 19.97982406616211, |
|
"logps/chosen": -452.04754638671875, |
|
"logps/rejected": -489.46826171875, |
|
"loss": 0.6266, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.16855832934379578, |
|
"rewards/margins": 0.13646240532398224, |
|
"rewards/rejected": -0.3050207495689392, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 3.599752353289808e-07, |
|
"logits/chosen": 20.002662658691406, |
|
"logits/rejected": 20.125558853149414, |
|
"logps/chosen": -461.048095703125, |
|
"logps/rejected": -485.640869140625, |
|
"loss": 0.6367, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.18868403136730194, |
|
"rewards/margins": 0.16586804389953613, |
|
"rewards/rejected": -0.35455209016799927, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 3.56136583519619e-07, |
|
"logits/chosen": 20.461118698120117, |
|
"logits/rejected": 20.211368560791016, |
|
"logps/chosen": -473.6031188964844, |
|
"logps/rejected": -509.35565185546875, |
|
"loss": 0.634, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1867034137248993, |
|
"rewards/margins": 0.1752786636352539, |
|
"rewards/rejected": -0.3619820773601532, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 3.52267159292835e-07, |
|
"logits/chosen": 20.3443603515625, |
|
"logits/rejected": 20.236209869384766, |
|
"logps/chosen": -500.35247802734375, |
|
"logps/rejected": -538.0313720703125, |
|
"loss": 0.6186, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.2049115002155304, |
|
"rewards/margins": 0.20946213603019714, |
|
"rewards/rejected": -0.41437363624572754, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 3.483680845194629e-07, |
|
"logits/chosen": 19.040111541748047, |
|
"logits/rejected": 19.063495635986328, |
|
"logps/chosen": -503.2266540527344, |
|
"logps/rejected": -557.2120361328125, |
|
"loss": 0.6408, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.2737266421318054, |
|
"rewards/margins": 0.1703735888004303, |
|
"rewards/rejected": -0.4441002309322357, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 3.444404896669864e-07, |
|
"logits/chosen": 19.740901947021484, |
|
"logits/rejected": 19.64006233215332, |
|
"logps/chosen": -513.2991333007812, |
|
"logps/rejected": -551.9752807617188, |
|
"loss": 0.6262, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.22840385138988495, |
|
"rewards/margins": 0.22724993526935577, |
|
"rewards/rejected": -0.45565375685691833, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 3.4048551347177943e-07, |
|
"logits/chosen": 19.359893798828125, |
|
"logits/rejected": 19.82449722290039, |
|
"logps/chosen": -478.95452880859375, |
|
"logps/rejected": -482.80731201171875, |
|
"loss": 0.6325, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.21430854499340057, |
|
"rewards/margins": 0.14684443175792694, |
|
"rewards/rejected": -0.3611529767513275, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 3.365043026089501e-07, |
|
"logits/chosen": 19.821773529052734, |
|
"logits/rejected": 19.241750717163086, |
|
"logps/chosen": -502.0999450683594, |
|
"logps/rejected": -568.6721801757812, |
|
"loss": 0.6354, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2433268278837204, |
|
"rewards/margins": 0.28582024574279785, |
|
"rewards/rejected": -0.5291470289230347, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 3.3249801135988236e-07, |
|
"logits/chosen": 20.231494903564453, |
|
"logits/rejected": 20.169645309448242, |
|
"logps/chosen": -473.277099609375, |
|
"logps/rejected": -506.4814453125, |
|
"loss": 0.6261, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.19130480289459229, |
|
"rewards/margins": 0.21689121425151825, |
|
"rewards/rejected": -0.40819603204727173, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 3.284678012775727e-07, |
|
"logits/chosen": 20.264537811279297, |
|
"logits/rejected": 19.87087631225586, |
|
"logps/chosen": -489.9398498535156, |
|
"logps/rejected": -575.63671875, |
|
"loss": 0.6216, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.2465265691280365, |
|
"rewards/margins": 0.2502959966659546, |
|
"rewards/rejected": -0.4968225359916687, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 3.2441484084985866e-07, |
|
"logits/chosen": 19.91929054260254, |
|
"logits/rejected": 19.753122329711914, |
|
"logps/chosen": -473.8438415527344, |
|
"logps/rejected": -550.4216918945312, |
|
"loss": 0.6254, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2036692351102829, |
|
"rewards/margins": 0.2004026174545288, |
|
"rewards/rejected": -0.4040718674659729, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 3.203403051606362e-07, |
|
"logits/chosen": 19.359331130981445, |
|
"logits/rejected": 18.84432601928711, |
|
"logps/chosen": -432.80950927734375, |
|
"logps/rejected": -495.875, |
|
"loss": 0.6273, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.2121230810880661, |
|
"rewards/margins": 0.2295958250761032, |
|
"rewards/rejected": -0.4417189061641693, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 3.162453755491655e-07, |
|
"logits/chosen": 19.773300170898438, |
|
"logits/rejected": 20.017602920532227, |
|
"logps/chosen": -488.0621032714844, |
|
"logps/rejected": -516.9225463867188, |
|
"loss": 0.6339, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.22858639061450958, |
|
"rewards/margins": 0.1711055487394333, |
|
"rewards/rejected": -0.3996918797492981, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 3.1213123926756174e-07, |
|
"logits/chosen": 19.31648063659668, |
|
"logits/rejected": 19.08823013305664, |
|
"logps/chosen": -508.760009765625, |
|
"logps/rejected": -547.8890991210938, |
|
"loss": 0.6245, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.25501328706741333, |
|
"rewards/margins": 0.20005568861961365, |
|
"rewards/rejected": -0.455068975687027, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 3.0799908913657367e-07, |
|
"logits/chosen": 21.24723243713379, |
|
"logits/rejected": 20.8863468170166, |
|
"logps/chosen": -459.1988830566406, |
|
"logps/rejected": -493.8885803222656, |
|
"loss": 0.6141, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.18732424080371857, |
|
"rewards/margins": 0.16734528541564941, |
|
"rewards/rejected": -0.3546695113182068, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 3.0385012319974533e-07, |
|
"logits/chosen": 19.451326370239258, |
|
"logits/rejected": 18.73493766784668, |
|
"logps/chosen": -506.89697265625, |
|
"logps/rejected": -547.7899169921875, |
|
"loss": 0.6236, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.25920993089675903, |
|
"rewards/margins": 0.2783968448638916, |
|
"rewards/rejected": -0.5376068353652954, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 2.996855443760651e-07, |
|
"logits/chosen": 18.372570037841797, |
|
"logits/rejected": 18.149747848510742, |
|
"logps/chosen": -456.7911682128906, |
|
"logps/rejected": -510.40155029296875, |
|
"loss": 0.6235, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.29108864068984985, |
|
"rewards/margins": 0.19627229869365692, |
|
"rewards/rejected": -0.4873608648777008, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 2.955065601112005e-07, |
|
"logits/chosen": 18.089101791381836, |
|
"logits/rejected": 17.99216079711914, |
|
"logps/chosen": -497.9737854003906, |
|
"logps/rejected": -488.06475830078125, |
|
"loss": 0.6259, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2766265869140625, |
|
"rewards/margins": 0.1618172526359558, |
|
"rewards/rejected": -0.4384438395500183, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.913143820274212e-07, |
|
"logits/chosen": 19.157297134399414, |
|
"logits/rejected": 18.781457901000977, |
|
"logps/chosen": -478.51336669921875, |
|
"logps/rejected": -547.4224853515625, |
|
"loss": 0.6255, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3197309076786041, |
|
"rewards/margins": 0.22106032073497772, |
|
"rewards/rejected": -0.5407912135124207, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 2.8711022557231015e-07, |
|
"logits/chosen": 18.902210235595703, |
|
"logits/rejected": 18.82394027709961, |
|
"logps/chosen": -529.0880737304688, |
|
"logps/rejected": -508.85565185546875, |
|
"loss": 0.6151, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.3066019117832184, |
|
"rewards/margins": 0.16971367597579956, |
|
"rewards/rejected": -0.47631555795669556, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 2.828953096663662e-07, |
|
"logits/chosen": 19.924898147583008, |
|
"logits/rejected": 19.486276626586914, |
|
"logps/chosen": -437.6498107910156, |
|
"logps/rejected": -507.98040771484375, |
|
"loss": 0.6111, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.21237564086914062, |
|
"rewards/margins": 0.23849359154701233, |
|
"rewards/rejected": -0.4508691728115082, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 2.786708563496001e-07, |
|
"logits/chosen": 19.466875076293945, |
|
"logits/rejected": 19.788875579833984, |
|
"logps/chosen": -494.97149658203125, |
|
"logps/rejected": -527.1760864257812, |
|
"loss": 0.6105, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.27766185998916626, |
|
"rewards/margins": 0.17516490817070007, |
|
"rewards/rejected": -0.4528267979621887, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 2.7443809042722544e-07, |
|
"logits/chosen": 18.834243774414062, |
|
"logits/rejected": 18.642332077026367, |
|
"logps/chosen": -465.34686279296875, |
|
"logps/rejected": -512.1998291015625, |
|
"loss": 0.61, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.28242748975753784, |
|
"rewards/margins": 0.32754284143447876, |
|
"rewards/rejected": -0.6099702715873718, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 2.7019823911454807e-07, |
|
"logits/chosen": 19.293468475341797, |
|
"logits/rejected": 19.021459579467773, |
|
"logps/chosen": -548.0870971679688, |
|
"logps/rejected": -661.7999267578125, |
|
"loss": 0.6068, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2817661762237549, |
|
"rewards/margins": 0.36686158180236816, |
|
"rewards/rejected": -0.648627758026123, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 2.6595253168115705e-07, |
|
"logits/chosen": 19.84808921813965, |
|
"logits/rejected": 19.46322250366211, |
|
"logps/chosen": -505.37445068359375, |
|
"logps/rejected": -561.1935424804688, |
|
"loss": 0.6063, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.24887773394584656, |
|
"rewards/margins": 0.29243093729019165, |
|
"rewards/rejected": -0.5413086414337158, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 2.6170219909451967e-07, |
|
"logits/chosen": 18.073366165161133, |
|
"logits/rejected": 18.1170597076416, |
|
"logps/chosen": -528.2265625, |
|
"logps/rejected": -528.5629272460938, |
|
"loss": 0.6039, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.33055204153060913, |
|
"rewards/margins": 0.24000290036201477, |
|
"rewards/rejected": -0.5705549120903015, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 2.5744847366308395e-07, |
|
"logits/chosen": 18.11019515991211, |
|
"logits/rejected": 18.144580841064453, |
|
"logps/chosen": -494.4580993652344, |
|
"logps/rejected": -561.71484375, |
|
"loss": 0.6052, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.3098324239253998, |
|
"rewards/margins": 0.20377469062805176, |
|
"rewards/rejected": -0.5136070847511292, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 2.5319258867899344e-07, |
|
"logits/chosen": 18.397960662841797, |
|
"logits/rejected": 18.59024429321289, |
|
"logps/chosen": -441.19732666015625, |
|
"logps/rejected": -474.4454040527344, |
|
"loss": 0.6058, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.2723812758922577, |
|
"rewards/margins": 0.18869531154632568, |
|
"rewards/rejected": -0.4610765874385834, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 2.4893577806051536e-07, |
|
"logits/chosen": 18.157339096069336, |
|
"logits/rejected": 18.65304946899414, |
|
"logps/chosen": -463.7784118652344, |
|
"logps/rejected": -516.1624145507812, |
|
"loss": 0.6077, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.26962539553642273, |
|
"rewards/margins": 0.26316118240356445, |
|
"rewards/rejected": -0.5327866077423096, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 2.4467927599428815e-07, |
|
"logits/chosen": 18.12697982788086, |
|
"logits/rejected": 17.87653923034668, |
|
"logps/chosen": -479.36029052734375, |
|
"logps/rejected": -521.6390380859375, |
|
"loss": 0.6157, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.3562749922275543, |
|
"rewards/margins": 0.23832616209983826, |
|
"rewards/rejected": -0.5946012139320374, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 2.4042431657749115e-07, |
|
"logits/chosen": 18.84893035888672, |
|
"logits/rejected": 18.976377487182617, |
|
"logps/chosen": -420.193359375, |
|
"logps/rejected": -480.762451171875, |
|
"loss": 0.6109, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.231888085603714, |
|
"rewards/margins": 0.21741366386413574, |
|
"rewards/rejected": -0.44930171966552734, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 2.3617213346003988e-07, |
|
"logits/chosen": 18.333866119384766, |
|
"logits/rejected": 18.267248153686523, |
|
"logps/chosen": -506.20098876953125, |
|
"logps/rejected": -571.8406372070312, |
|
"loss": 0.6076, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.32090744376182556, |
|
"rewards/margins": 0.24834957718849182, |
|
"rewards/rejected": -0.5692570209503174, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 2.319239594869112e-07, |
|
"logits/chosen": 17.52167510986328, |
|
"logits/rejected": 17.264745712280273, |
|
"logps/chosen": -436.5166015625, |
|
"logps/rejected": -520.7471923828125, |
|
"loss": 0.5941, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.30473703145980835, |
|
"rewards/margins": 0.257717102766037, |
|
"rewards/rejected": -0.562454104423523, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 2.2768102634070143e-07, |
|
"logits/chosen": 17.672739028930664, |
|
"logits/rejected": 17.970993041992188, |
|
"logps/chosen": -445.01165771484375, |
|
"logps/rejected": -475.51495361328125, |
|
"loss": 0.6056, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.2711699604988098, |
|
"rewards/margins": 0.23058536648750305, |
|
"rewards/rejected": -0.5017553567886353, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 2.2344456418452267e-07, |
|
"logits/chosen": 17.519277572631836, |
|
"logits/rejected": 16.88127326965332, |
|
"logps/chosen": -530.1575927734375, |
|
"logps/rejected": -594.2427978515625, |
|
"loss": 0.5959, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.4372480809688568, |
|
"rewards/margins": 0.3837299942970276, |
|
"rewards/rejected": -0.8209781646728516, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 2.1921580130533827e-07, |
|
"logits/chosen": 18.267105102539062, |
|
"logits/rejected": 18.296194076538086, |
|
"logps/chosen": -558.4171142578125, |
|
"logps/rejected": -604.7532958984375, |
|
"loss": 0.6001, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.3732374906539917, |
|
"rewards/margins": 0.3241427540779114, |
|
"rewards/rejected": -0.6973801851272583, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 2.1499596375784279e-07, |
|
"logits/chosen": 17.248226165771484, |
|
"logits/rejected": 17.385725021362305, |
|
"logps/chosen": -491.4832458496094, |
|
"logps/rejected": -536.4549560546875, |
|
"loss": 0.6098, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.3684207499027252, |
|
"rewards/margins": 0.2918558120727539, |
|
"rewards/rejected": -0.6602765321731567, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 2.1078627500898936e-07, |
|
"logits/chosen": 17.595806121826172, |
|
"logits/rejected": 17.477294921875, |
|
"logps/chosen": -474.83770751953125, |
|
"logps/rejected": -536.64013671875, |
|
"loss": 0.5951, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.3259069621562958, |
|
"rewards/margins": 0.24975259602069855, |
|
"rewards/rejected": -0.5756595134735107, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 2.065879555832674e-07, |
|
"logits/chosen": 17.800256729125977, |
|
"logits/rejected": 17.910017013549805, |
|
"logps/chosen": -471.83135986328125, |
|
"logps/rejected": -539.9712524414062, |
|
"loss": 0.6031, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.32458698749542236, |
|
"rewards/margins": 0.3115597665309906, |
|
"rewards/rejected": -0.6361468434333801, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 2.0240222270883288e-07, |
|
"logits/chosen": 18.058645248413086, |
|
"logits/rejected": 17.72833824157715, |
|
"logps/chosen": -495.13092041015625, |
|
"logps/rejected": -523.317138671875, |
|
"loss": 0.5936, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.36168938875198364, |
|
"rewards/margins": 0.30222177505493164, |
|
"rewards/rejected": -0.6639112234115601, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 1.9823028996459483e-07, |
|
"logits/chosen": 16.63167953491211, |
|
"logits/rejected": 17.25162696838379, |
|
"logps/chosen": -548.2149658203125, |
|
"logps/rejected": -542.8499755859375, |
|
"loss": 0.5939, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.3957754969596863, |
|
"rewards/margins": 0.31186193227767944, |
|
"rewards/rejected": -0.7076374292373657, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 1.9407336692835946e-07, |
|
"logits/chosen": 17.020973205566406, |
|
"logits/rejected": 16.73345947265625, |
|
"logps/chosen": -446.87579345703125, |
|
"logps/rejected": -543.0826416015625, |
|
"loss": 0.5857, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.3498944342136383, |
|
"rewards/margins": 0.3392466902732849, |
|
"rewards/rejected": -0.6891411542892456, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 1.899326588261348e-07, |
|
"logits/chosen": 17.527660369873047, |
|
"logits/rejected": 16.82056999206543, |
|
"logps/chosen": -457.8648376464844, |
|
"logps/rejected": -546.5458984375, |
|
"loss": 0.5926, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3373507261276245, |
|
"rewards/margins": 0.3978506922721863, |
|
"rewards/rejected": -0.7352014183998108, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 1.8580936618269693e-07, |
|
"logits/chosen": 17.878108978271484, |
|
"logits/rejected": 18.693519592285156, |
|
"logps/chosen": -512.388671875, |
|
"logps/rejected": -550.9382934570312, |
|
"loss": 0.5984, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.3437446653842926, |
|
"rewards/margins": 0.27838242053985596, |
|
"rewards/rejected": -0.6221270561218262, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 1.8170468447351857e-07, |
|
"logits/chosen": 16.762582778930664, |
|
"logits/rejected": 16.87929916381836, |
|
"logps/chosen": -499.5602111816406, |
|
"logps/rejected": -558.8322143554688, |
|
"loss": 0.5861, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4218806326389313, |
|
"rewards/margins": 0.30065587162971497, |
|
"rewards/rejected": -0.722536563873291, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 1.7761980377816284e-07, |
|
"logits/chosen": 18.27288818359375, |
|
"logits/rejected": 18.402935028076172, |
|
"logps/chosen": -531.6474609375, |
|
"logps/rejected": -601.9354248046875, |
|
"loss": 0.5913, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3757680356502533, |
|
"rewards/margins": 0.3616601228713989, |
|
"rewards/rejected": -0.7374281883239746, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 1.7355590843524053e-07, |
|
"logits/chosen": 16.844575881958008, |
|
"logits/rejected": 16.470760345458984, |
|
"logps/chosen": -515.7340087890625, |
|
"logps/rejected": -567.6885375976562, |
|
"loss": 0.5888, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.38387516140937805, |
|
"rewards/margins": 0.3353000283241272, |
|
"rewards/rejected": -0.7191751599311829, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 1.6951417669903228e-07, |
|
"logits/chosen": 17.121362686157227, |
|
"logits/rejected": 17.330198287963867, |
|
"logps/chosen": -500.6908264160156, |
|
"logps/rejected": -558.0948486328125, |
|
"loss": 0.5833, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.34819692373275757, |
|
"rewards/margins": 0.33231672644615173, |
|
"rewards/rejected": -0.6805136799812317, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 1.6549578039787434e-07, |
|
"logits/chosen": 17.425867080688477, |
|
"logits/rejected": 17.398069381713867, |
|
"logps/chosen": -491.4676208496094, |
|
"logps/rejected": -521.668701171875, |
|
"loss": 0.5899, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.37772536277770996, |
|
"rewards/margins": 0.28015801310539246, |
|
"rewards/rejected": -0.6578834652900696, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 1.615018845944081e-07, |
|
"logits/chosen": 18.616252899169922, |
|
"logits/rejected": 18.190723419189453, |
|
"logps/chosen": -510.36358642578125, |
|
"logps/rejected": -567.6229248046875, |
|
"loss": 0.5824, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.3491878807544708, |
|
"rewards/margins": 0.26081031560897827, |
|
"rewards/rejected": -0.6099982857704163, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 1.575336472477909e-07, |
|
"logits/chosen": 17.597698211669922, |
|
"logits/rejected": 17.078960418701172, |
|
"logps/chosen": -488.8829650878906, |
|
"logps/rejected": -548.6746826171875, |
|
"loss": 0.5956, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.35902029275894165, |
|
"rewards/margins": 0.33025866746902466, |
|
"rewards/rejected": -0.6892789602279663, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 1.5359221887796613e-07, |
|
"logits/chosen": 16.720373153686523, |
|
"logits/rejected": 16.681528091430664, |
|
"logps/chosen": -466.1805114746094, |
|
"logps/rejected": -475.7158203125, |
|
"loss": 0.5899, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.3708762228488922, |
|
"rewards/margins": 0.19428952038288116, |
|
"rewards/rejected": -0.5651656985282898, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 1.4967874223209033e-07, |
|
"logits/chosen": 17.109895706176758, |
|
"logits/rejected": 17.34455108642578, |
|
"logps/chosen": -473.8462829589844, |
|
"logps/rejected": -489.8412170410156, |
|
"loss": 0.5861, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.3177076280117035, |
|
"rewards/margins": 0.24576309323310852, |
|
"rewards/rejected": -0.563470721244812, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 1.4579435195321432e-07, |
|
"logits/chosen": 17.316837310791016, |
|
"logits/rejected": 16.62727928161621, |
|
"logps/chosen": -489.8931579589844, |
|
"logps/rejected": -570.500732421875, |
|
"loss": 0.5877, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.42431893944740295, |
|
"rewards/margins": 0.4839262366294861, |
|
"rewards/rejected": -0.9082452058792114, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 1.4194017425131323e-07, |
|
"logits/chosen": 17.513019561767578, |
|
"logits/rejected": 16.158910751342773, |
|
"logps/chosen": -538.9515380859375, |
|
"logps/rejected": -655.079345703125, |
|
"loss": 0.5822, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.4365549683570862, |
|
"rewards/margins": 0.43610841035842896, |
|
"rewards/rejected": -0.8726633191108704, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 1.381173265767623e-07, |
|
"logits/chosen": 16.728803634643555, |
|
"logits/rejected": 16.60254669189453, |
|
"logps/chosen": -563.25732421875, |
|
"logps/rejected": -635.5299072265625, |
|
"loss": 0.5872, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.5063729286193848, |
|
"rewards/margins": 0.488800585269928, |
|
"rewards/rejected": -0.995173454284668, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 1.343269172963513e-07, |
|
"logits/chosen": 16.771358489990234, |
|
"logits/rejected": 16.57272720336914, |
|
"logps/chosen": -491.09466552734375, |
|
"logps/rejected": -566.2666015625, |
|
"loss": 0.5768, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.4542843699455261, |
|
"rewards/margins": 0.352291464805603, |
|
"rewards/rejected": -0.8065758943557739, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 1.3057004537193422e-07, |
|
"logits/chosen": 17.443374633789062, |
|
"logits/rejected": 17.598270416259766, |
|
"logps/chosen": -503.72674560546875, |
|
"logps/rejected": -556.9064331054688, |
|
"loss": 0.5844, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.41296952962875366, |
|
"rewards/margins": 0.30032414197921753, |
|
"rewards/rejected": -0.7132936716079712, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 1.268478000418041e-07, |
|
"logits/chosen": 16.66560935974121, |
|
"logits/rejected": 16.443384170532227, |
|
"logps/chosen": -522.2667236328125, |
|
"logps/rejected": -560.718017578125, |
|
"loss": 0.5828, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.39559242129325867, |
|
"rewards/margins": 0.3799566328525543, |
|
"rewards/rejected": -0.775549054145813, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 1.2316126050488782e-07, |
|
"logits/chosen": 16.437084197998047, |
|
"logits/rejected": 16.211811065673828, |
|
"logps/chosen": -542.4644775390625, |
|
"logps/rejected": -548.2205810546875, |
|
"loss": 0.5879, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.4820192754268646, |
|
"rewards/margins": 0.30185410380363464, |
|
"rewards/rejected": -0.7838733792304993, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 1.1951149560785166e-07, |
|
"logits/chosen": 15.954843521118164, |
|
"logits/rejected": 15.96337890625, |
|
"logps/chosen": -590.3001708984375, |
|
"logps/rejected": -672.43505859375, |
|
"loss": 0.5841, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.5148499608039856, |
|
"rewards/margins": 0.5929259061813354, |
|
"rewards/rejected": -1.1077758073806763, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 1.1589956353520833e-07, |
|
"logits/chosen": 17.03178596496582, |
|
"logits/rejected": 17.020648956298828, |
|
"logps/chosen": -431.1114807128906, |
|
"logps/rejected": -483.35211181640625, |
|
"loss": 0.5811, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.30041372776031494, |
|
"rewards/margins": 0.21424512565135956, |
|
"rewards/rejected": -0.5146588683128357, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 1.1232651150251504e-07, |
|
"logits/chosen": 15.323884963989258, |
|
"logits/rejected": 15.189486503601074, |
|
"logps/chosen": -494.89739990234375, |
|
"logps/rejected": -535.5572509765625, |
|
"loss": 0.5802, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.49184292554855347, |
|
"rewards/margins": 0.30568668246269226, |
|
"rewards/rejected": -0.7975295782089233, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 1.0879337545275164e-07, |
|
"logits/chosen": 15.969990730285645, |
|
"logits/rejected": 15.853551864624023, |
|
"logps/chosen": -559.8504028320312, |
|
"logps/rejected": -637.55810546875, |
|
"loss": 0.5799, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.49292922019958496, |
|
"rewards/margins": 0.4328843653202057, |
|
"rewards/rejected": -0.9258135557174683, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 1.0530117975596789e-07, |
|
"logits/chosen": 16.58124351501465, |
|
"logits/rejected": 15.693090438842773, |
|
"logps/chosen": -526.1766967773438, |
|
"logps/rejected": -569.5706787109375, |
|
"loss": 0.5595, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.37696436047554016, |
|
"rewards/margins": 0.4419701099395752, |
|
"rewards/rejected": -0.8189345598220825, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 1.0185093691228533e-07, |
|
"logits/chosen": 16.440006256103516, |
|
"logits/rejected": 15.588663101196289, |
|
"logps/chosen": -476.3081970214844, |
|
"logps/rejected": -536.4998168945312, |
|
"loss": 0.566, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.40554875135421753, |
|
"rewards/margins": 0.4396079182624817, |
|
"rewards/rejected": -0.8451566696166992, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 9.844364725834056e-08, |
|
"logits/chosen": 16.686330795288086, |
|
"logits/rejected": 16.508487701416016, |
|
"logps/chosen": -488.6481018066406, |
|
"logps/rejected": -532.7208251953125, |
|
"loss": 0.5787, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.3779573440551758, |
|
"rewards/margins": 0.32084184885025024, |
|
"rewards/rejected": -0.6987992525100708, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 9.508029867725578e-08, |
|
"logits/chosen": 15.356477737426758, |
|
"logits/rejected": 15.598932266235352, |
|
"logps/chosen": -435.1556701660156, |
|
"logps/rejected": -488.14666748046875, |
|
"loss": 0.5834, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.3525981307029724, |
|
"rewards/margins": 0.313102662563324, |
|
"rewards/rejected": -0.6657007932662964, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 9.176186631221958e-08, |
|
"logits/chosen": 15.836163520812988, |
|
"logits/rejected": 15.421684265136719, |
|
"logps/chosen": -503.1708984375, |
|
"logps/rejected": -574.23779296875, |
|
"loss": 0.5743, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5309593081474304, |
|
"rewards/margins": 0.6058937907218933, |
|
"rewards/rejected": -1.1368530988693237, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 8.848931228376136e-08, |
|
"logits/chosen": 15.552096366882324, |
|
"logits/rejected": 15.042068481445312, |
|
"logps/chosen": -465.777099609375, |
|
"logps/rejected": -539.7342529296875, |
|
"loss": 0.5621, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.42118939757347107, |
|
"rewards/margins": 0.3839268982410431, |
|
"rewards/rejected": -0.8051162958145142, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 8.526358541080172e-08, |
|
"logits/chosen": 16.235004425048828, |
|
"logits/rejected": 16.21014976501465, |
|
"logps/chosen": -497.1258850097656, |
|
"logps/rejected": -547.8870849609375, |
|
"loss": 0.5804, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.4342379570007324, |
|
"rewards/margins": 0.3557444214820862, |
|
"rewards/rejected": -0.7899823784828186, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 8.208562093555887e-08, |
|
"logits/chosen": 15.568893432617188, |
|
"logits/rejected": 15.181528091430664, |
|
"logps/chosen": -463.81097412109375, |
|
"logps/rejected": -515.5152587890625, |
|
"loss": 0.5826, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4401473104953766, |
|
"rewards/margins": 0.3010689616203308, |
|
"rewards/rejected": -0.7412161827087402, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 7.895634025239242e-08, |
|
"logits/chosen": 15.460016250610352, |
|
"logits/rejected": 14.932809829711914, |
|
"logps/chosen": -470.60552978515625, |
|
"logps/rejected": -520.3035278320312, |
|
"loss": 0.5777, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.4876958727836609, |
|
"rewards/margins": 0.33743318915367126, |
|
"rewards/rejected": -0.8251290321350098, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 7.587665064066085e-08, |
|
"logits/chosen": 16.43811798095703, |
|
"logits/rejected": 16.288951873779297, |
|
"logps/chosen": -470.00360107421875, |
|
"logps/rejected": -557.7108764648438, |
|
"loss": 0.5784, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4131472706794739, |
|
"rewards/margins": 0.3947201371192932, |
|
"rewards/rejected": -0.8078674077987671, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 7.284744500167217e-08, |
|
"logits/chosen": 14.962780952453613, |
|
"logits/rejected": 14.988690376281738, |
|
"logps/chosen": -490.5482482910156, |
|
"logps/rejected": -576.4386596679688, |
|
"loss": 0.5611, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.5098827481269836, |
|
"rewards/margins": 0.4253455102443695, |
|
"rewards/rejected": -0.9352282285690308, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 6.986960159980326e-08, |
|
"logits/chosen": 15.525604248046875, |
|
"logits/rejected": 15.047343254089355, |
|
"logps/chosen": -457.3753967285156, |
|
"logps/rejected": -528.1970825195312, |
|
"loss": 0.5788, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.48724955320358276, |
|
"rewards/margins": 0.36003851890563965, |
|
"rewards/rejected": -0.8472881317138672, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 6.694398380786245e-08, |
|
"logits/chosen": 15.537455558776855, |
|
"logits/rejected": 16.031370162963867, |
|
"logps/chosen": -470.35986328125, |
|
"logps/rejected": -515.3436279296875, |
|
"loss": 0.5703, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.47040343284606934, |
|
"rewards/margins": 0.26535606384277344, |
|
"rewards/rejected": -0.735759437084198, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 6.40714398567701e-08, |
|
"logits/chosen": 15.932138442993164, |
|
"logits/rejected": 16.016658782958984, |
|
"logps/chosen": -534.5318603515625, |
|
"logps/rejected": -534.1517944335938, |
|
"loss": 0.5821, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4990972578525543, |
|
"rewards/margins": 0.2680392861366272, |
|
"rewards/rejected": -0.7671364545822144, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 6.125280258962872e-08, |
|
"logits/chosen": 15.469932556152344, |
|
"logits/rejected": 15.58923053741455, |
|
"logps/chosen": -482.654541015625, |
|
"logps/rejected": -505.08673095703125, |
|
"loss": 0.5816, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.46863317489624023, |
|
"rewards/margins": 0.312537282705307, |
|
"rewards/rejected": -0.7811704277992249, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 5.848888922025552e-08, |
|
"logits/chosen": 16.112123489379883, |
|
"logits/rejected": 15.844825744628906, |
|
"logps/chosen": -530.7129516601562, |
|
"logps/rejected": -626.224365234375, |
|
"loss": 0.5729, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.5626592636108398, |
|
"rewards/margins": 0.5355942249298096, |
|
"rewards/rejected": -1.0982534885406494, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 5.57805010962451e-08, |
|
"logits/chosen": 16.61453628540039, |
|
"logits/rejected": 16.19707679748535, |
|
"logps/chosen": -539.4405517578125, |
|
"logps/rejected": -595.6859130859375, |
|
"loss": 0.5768, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.5616611242294312, |
|
"rewards/margins": 0.3783496618270874, |
|
"rewards/rejected": -0.9400107264518738, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 5.3128423466633624e-08, |
|
"logits/chosen": 15.611248970031738, |
|
"logits/rejected": 15.253950119018555, |
|
"logps/chosen": -500.92755126953125, |
|
"logps/rejected": -547.9854125976562, |
|
"loss": 0.5778, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.456888347864151, |
|
"rewards/margins": 0.2701273262500763, |
|
"rewards/rejected": -0.7270156145095825, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 5.053342525422918e-08, |
|
"logits/chosen": 15.895495414733887, |
|
"logits/rejected": 16.411907196044922, |
|
"logps/chosen": -491.34246826171875, |
|
"logps/rejected": -502.4891052246094, |
|
"loss": 0.5779, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.42431122064590454, |
|
"rewards/margins": 0.2600492835044861, |
|
"rewards/rejected": -0.6843605041503906, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 4.7996258832676716e-08, |
|
"logits/chosen": 15.992956161499023, |
|
"logits/rejected": 15.863334655761719, |
|
"logps/chosen": -570.5872192382812, |
|
"logps/rejected": -589.7510375976562, |
|
"loss": 0.5665, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.5297167897224426, |
|
"rewards/margins": 0.35133522748947144, |
|
"rewards/rejected": -0.8810520172119141, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 4.551765980832059e-08, |
|
"logits/chosen": 15.435505867004395, |
|
"logits/rejected": 15.565594673156738, |
|
"logps/chosen": -519.7587280273438, |
|
"logps/rejected": -555.607421875, |
|
"loss": 0.566, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.44829368591308594, |
|
"rewards/margins": 0.4504165053367615, |
|
"rewards/rejected": -0.8987102508544922, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 4.309834680692831e-08, |
|
"logits/chosen": 15.550143241882324, |
|
"logits/rejected": 15.454347610473633, |
|
"logps/chosen": -535.6947021484375, |
|
"logps/rejected": -586.8497314453125, |
|
"loss": 0.5675, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.5995014905929565, |
|
"rewards/margins": 0.37678369879722595, |
|
"rewards/rejected": -0.9762851595878601, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 4.07390212653379e-08, |
|
"logits/chosen": 15.492868423461914, |
|
"logits/rejected": 15.356470108032227, |
|
"logps/chosen": -533.2853393554688, |
|
"logps/rejected": -584.7105102539062, |
|
"loss": 0.5739, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.5474352836608887, |
|
"rewards/margins": 0.6199637651443481, |
|
"rewards/rejected": -1.1673991680145264, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 3.844036722808899e-08, |
|
"logits/chosen": 16.752445220947266, |
|
"logits/rejected": 16.3040771484375, |
|
"logps/chosen": -484.51416015625, |
|
"logps/rejected": -536.3505249023438, |
|
"loss": 0.5718, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.43505972623825073, |
|
"rewards/margins": 0.2838330864906311, |
|
"rewards/rejected": -0.7188928127288818, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 3.620305114909597e-08, |
|
"logits/chosen": 16.092082977294922, |
|
"logits/rejected": 16.506916046142578, |
|
"logps/chosen": -589.302490234375, |
|
"logps/rejected": -597.6064453125, |
|
"loss": 0.567, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.5278688669204712, |
|
"rewards/margins": 0.34603801369667053, |
|
"rewards/rejected": -0.8739069104194641, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 3.4027721698421466e-08, |
|
"logits/chosen": 14.892633438110352, |
|
"logits/rejected": 15.243593215942383, |
|
"logps/chosen": -518.6754150390625, |
|
"logps/rejected": -537.8123779296875, |
|
"loss": 0.5702, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5415439605712891, |
|
"rewards/margins": 0.358787477016449, |
|
"rewards/rejected": -0.9003314971923828, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 3.191500957420626e-08, |
|
"logits/chosen": 16.676576614379883, |
|
"logits/rejected": 16.07375144958496, |
|
"logps/chosen": -466.3976135253906, |
|
"logps/rejected": -529.3568115234375, |
|
"loss": 0.5677, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.4896460175514221, |
|
"rewards/margins": 0.3948792517185211, |
|
"rewards/rejected": -0.8845251798629761, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 2.986552731980932e-08, |
|
"logits/chosen": 15.437856674194336, |
|
"logits/rejected": 15.470430374145508, |
|
"logps/chosen": -502.57696533203125, |
|
"logps/rejected": -518.8566284179688, |
|
"loss": 0.5697, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.4669625759124756, |
|
"rewards/margins": 0.27933841943740845, |
|
"rewards/rejected": -0.7463010549545288, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 2.787986914621182e-08, |
|
"logits/chosen": 16.045337677001953, |
|
"logits/rejected": 15.906814575195312, |
|
"logps/chosen": -537.7979125976562, |
|
"logps/rejected": -605.097900390625, |
|
"loss": 0.564, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.5049812197685242, |
|
"rewards/margins": 0.3923494517803192, |
|
"rewards/rejected": -0.897330641746521, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 2.5958610759736126e-08, |
|
"logits/chosen": 15.55151081085205, |
|
"logits/rejected": 14.9086332321167, |
|
"logps/chosen": -501.61322021484375, |
|
"logps/rejected": -578.8296508789062, |
|
"loss": 0.5655, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.47102347016334534, |
|
"rewards/margins": 0.46625399589538574, |
|
"rewards/rejected": -0.9372774958610535, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 2.410230919513023e-08, |
|
"logits/chosen": 14.670611381530762, |
|
"logits/rejected": 13.935040473937988, |
|
"logps/chosen": -508.17877197265625, |
|
"logps/rejected": -615.4990234375, |
|
"loss": 0.5551, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.4717566967010498, |
|
"rewards/margins": 0.4692964553833008, |
|
"rewards/rejected": -0.9410530924797058, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 2.231150265406512e-08, |
|
"logits/chosen": 14.696261405944824, |
|
"logits/rejected": 14.434527397155762, |
|
"logps/chosen": -389.96209716796875, |
|
"logps/rejected": -480.9751892089844, |
|
"loss": 0.5744, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.4124878942966461, |
|
"rewards/margins": 0.5330362319946289, |
|
"rewards/rejected": -0.9455240964889526, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 2.058671034909301e-08, |
|
"logits/chosen": 15.37951374053955, |
|
"logits/rejected": 15.185888290405273, |
|
"logps/chosen": -507.5043029785156, |
|
"logps/rejected": -560.7593994140625, |
|
"loss": 0.5675, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5982163548469543, |
|
"rewards/margins": 0.356797456741333, |
|
"rewards/rejected": -0.9550137519836426, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 1.892843235311059e-08, |
|
"logits/chosen": 14.205958366394043, |
|
"logits/rejected": 15.277796745300293, |
|
"logps/chosen": -525.5435791015625, |
|
"logps/rejected": -556.5437622070312, |
|
"loss": 0.5742, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.57078617811203, |
|
"rewards/margins": 0.3275993764400482, |
|
"rewards/rejected": -0.8983856439590454, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 1.733714945437212e-08, |
|
"logits/chosen": 15.41334056854248, |
|
"logits/rejected": 15.21619701385498, |
|
"logps/chosen": -524.0565795898438, |
|
"logps/rejected": -608.4319458007812, |
|
"loss": 0.5716, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.5062750577926636, |
|
"rewards/margins": 0.621990442276001, |
|
"rewards/rejected": -1.128265619277954, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 1.581332301709304e-08, |
|
"logits/chosen": 14.983938217163086, |
|
"logits/rejected": 14.970956802368164, |
|
"logps/chosen": -502.68621826171875, |
|
"logps/rejected": -581.88720703125, |
|
"loss": 0.5593, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.522268533706665, |
|
"rewards/margins": 0.4709092974662781, |
|
"rewards/rejected": -0.9931778907775879, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 1.4357394847686027e-08, |
|
"logits/chosen": 14.20927619934082, |
|
"logits/rejected": 14.463226318359375, |
|
"logps/chosen": -503.64678955078125, |
|
"logps/rejected": -510.6846618652344, |
|
"loss": 0.5691, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.5376115441322327, |
|
"rewards/margins": 0.3068149983882904, |
|
"rewards/rejected": -0.8444265127182007, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 1.2969787066666654e-08, |
|
"logits/chosen": 15.562335014343262, |
|
"logits/rejected": 15.038406372070312, |
|
"logps/chosen": -493.4950256347656, |
|
"logps/rejected": -572.0565185546875, |
|
"loss": 0.5653, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.5538553595542908, |
|
"rewards/margins": 0.42482686042785645, |
|
"rewards/rejected": -0.9786823391914368, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 1.1650901986267364e-08, |
|
"logits/chosen": 15.639094352722168, |
|
"logits/rejected": 14.934486389160156, |
|
"logps/chosen": -545.4066162109375, |
|
"logps/rejected": -635.1442260742188, |
|
"loss": 0.5599, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.5437031984329224, |
|
"rewards/margins": 0.5424902439117432, |
|
"rewards/rejected": -1.086193323135376, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 1.0401121993794032e-08, |
|
"logits/chosen": 14.307469367980957, |
|
"logits/rejected": 13.92272663116455, |
|
"logps/chosen": -485.2925720214844, |
|
"logps/rejected": -516.3497924804688, |
|
"loss": 0.5667, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.42878469824790955, |
|
"rewards/margins": 0.43366050720214844, |
|
"rewards/rejected": -0.8624452352523804, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 9.220809440759592e-09, |
|
"logits/chosen": 15.5972900390625, |
|
"logits/rejected": 15.571401596069336, |
|
"logps/chosen": -468.67254638671875, |
|
"logps/rejected": -490.158447265625, |
|
"loss": 0.5921, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4918997883796692, |
|
"rewards/margins": 0.321148544549942, |
|
"rewards/rejected": -0.813048243522644, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 8.1103065378266e-09, |
|
"logits/chosen": 15.903124809265137, |
|
"logits/rejected": 15.235029220581055, |
|
"logps/chosen": -504.0237731933594, |
|
"logps/rejected": -586.244140625, |
|
"loss": 0.5656, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.49678725004196167, |
|
"rewards/margins": 0.39936769008636475, |
|
"rewards/rejected": -0.8961549997329712, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 7.0699352555893825e-09, |
|
"logits/chosen": 14.437850952148438, |
|
"logits/rejected": 14.658166885375977, |
|
"logps/chosen": -552.8507690429688, |
|
"logps/rejected": -595.6864013671875, |
|
"loss": 0.565, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5628484487533569, |
|
"rewards/margins": 0.3715595602989197, |
|
"rewards/rejected": -0.9344080090522766, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 6.099997231224452e-09, |
|
"logits/chosen": 14.984025955200195, |
|
"logits/rejected": 14.939895629882812, |
|
"logps/chosen": -483.92315673828125, |
|
"logps/rejected": -557.912841796875, |
|
"loss": 0.5689, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5138859152793884, |
|
"rewards/margins": 0.3408345878124237, |
|
"rewards/rejected": -0.8547204732894897, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 5.200773681035969e-09, |
|
"logits/chosen": 14.894918441772461, |
|
"logits/rejected": 15.099899291992188, |
|
"logps/chosen": -491.946044921875, |
|
"logps/rejected": -533.989501953125, |
|
"loss": 0.5626, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.43056002259254456, |
|
"rewards/margins": 0.3058411478996277, |
|
"rewards/rejected": -0.7364012002944946, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 4.372525318922266e-09, |
|
"logits/chosen": 15.696123123168945, |
|
"logits/rejected": 14.719775199890137, |
|
"logps/chosen": -542.4873046875, |
|
"logps/rejected": -633.1751708984375, |
|
"loss": 0.5629, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.517546534538269, |
|
"rewards/margins": 0.4977279305458069, |
|
"rewards/rejected": -1.0152745246887207, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 3.6154922807863643e-09, |
|
"logits/chosen": 14.244386672973633, |
|
"logits/rejected": 13.889488220214844, |
|
"logps/chosen": -456.33135986328125, |
|
"logps/rejected": -569.2753295898438, |
|
"loss": 0.5784, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5168919563293457, |
|
"rewards/margins": 0.4235960841178894, |
|
"rewards/rejected": -0.9404880404472351, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 2.929894054912896e-09, |
|
"logits/chosen": 15.562467575073242, |
|
"logits/rejected": 15.46942138671875, |
|
"logps/chosen": -548.723876953125, |
|
"logps/rejected": -627.305908203125, |
|
"loss": 0.5561, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.4751965403556824, |
|
"rewards/margins": 0.44715824723243713, |
|
"rewards/rejected": -0.9223548173904419, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 2.3159294183312804e-09, |
|
"logits/chosen": 15.579564094543457, |
|
"logits/rejected": 15.467860221862793, |
|
"logps/chosen": -508.56231689453125, |
|
"logps/rejected": -582.9691162109375, |
|
"loss": 0.5663, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.48075562715530396, |
|
"rewards/margins": 0.35179024934768677, |
|
"rewards/rejected": -0.8325458765029907, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 1.7737763791840499e-09, |
|
"logits/chosen": 15.496403694152832, |
|
"logits/rejected": 14.966221809387207, |
|
"logps/chosen": -438.64410400390625, |
|
"logps/rejected": -536.9528198242188, |
|
"loss": 0.5684, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.4091852605342865, |
|
"rewards/margins": 0.39350926876068115, |
|
"rewards/rejected": -0.8026946187019348, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 1.3035921251163263e-09, |
|
"logits/chosen": 15.670194625854492, |
|
"logits/rejected": 15.499285697937012, |
|
"logps/chosen": -510.4190979003906, |
|
"logps/rejected": -574.4082641601562, |
|
"loss": 0.5712, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.49581804871559143, |
|
"rewards/margins": 0.3804387152194977, |
|
"rewards/rejected": -0.8762567639350891, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 9.055129777021663e-10, |
|
"logits/chosen": 14.768771171569824, |
|
"logits/rejected": 14.854043960571289, |
|
"logps/chosen": -461.849609375, |
|
"logps/rejected": -519.3204345703125, |
|
"loss": 0.5585, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.42806243896484375, |
|
"rewards/margins": 0.41432294249534607, |
|
"rewards/rejected": -0.842385470867157, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 5.796543529205389e-10, |
|
"logits/chosen": 15.0086030960083, |
|
"logits/rejected": 15.746871948242188, |
|
"logps/chosen": -584.0777587890625, |
|
"logps/rejected": -618.7239379882812, |
|
"loss": 0.5594, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6217775940895081, |
|
"rewards/margins": 0.3591920733451843, |
|
"rewards/rejected": -0.9809697270393372, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 3.261107276925079e-10, |
|
"logits/chosen": 15.121289253234863, |
|
"logits/rejected": 15.262140274047852, |
|
"logps/chosen": -490.37420654296875, |
|
"logps/rejected": -545.4072265625, |
|
"loss": 0.5679, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.45917099714279175, |
|
"rewards/margins": 0.5055267214775085, |
|
"rewards/rejected": -0.9646978378295898, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 1.4495561248931144e-10, |
|
"logits/chosen": 15.449140548706055, |
|
"logits/rejected": 16.043636322021484, |
|
"logps/chosen": -548.4398803710938, |
|
"logps/rejected": -583.19287109375, |
|
"loss": 0.5659, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.5652498006820679, |
|
"rewards/margins": 0.3585050702095032, |
|
"rewards/rejected": -0.923754870891571, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 3.6241530019326397e-11, |
|
"logits/chosen": 15.511439323425293, |
|
"logits/rejected": 15.162747383117676, |
|
"logps/chosen": -504.12799072265625, |
|
"logps/rejected": -518.1746215820312, |
|
"loss": 0.5789, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5089441537857056, |
|
"rewards/margins": 0.29728806018829346, |
|
"rewards/rejected": -0.8062320947647095, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 14.901571273803711, |
|
"logits/rejected": 14.849283218383789, |
|
"logps/chosen": -539.0538330078125, |
|
"logps/rejected": -590.5074462890625, |
|
"loss": 0.5537, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.6097260117530823, |
|
"rewards/margins": 0.40109339356422424, |
|
"rewards/rejected": -1.0108195543289185, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"step": 2050, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6235497470890603, |
|
"train_runtime": 23319.902, |
|
"train_samples_per_second": 8.476, |
|
"train_steps_per_second": 0.088 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2050, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|