|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.998691442030882, |
|
"eval_steps": 500, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.7967724800109863, |
|
"logits/rejected": -2.805750846862793, |
|
"logps/chosen": -270.3558654785156, |
|
"logps/rejected": -243.22396850585938, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"logits/chosen": -2.671762466430664, |
|
"logits/rejected": -2.713010549545288, |
|
"logps/chosen": -228.7700958251953, |
|
"logps/rejected": -213.14617919921875, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.3680555522441864, |
|
"rewards/chosen": 6.792499334551394e-06, |
|
"rewards/margins": -1.524862182122888e-05, |
|
"rewards/rejected": 2.204112388426438e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"logits/chosen": -2.7296969890594482, |
|
"logits/rejected": -2.7327723503112793, |
|
"logps/chosen": -242.0876922607422, |
|
"logps/rejected": -224.0583038330078, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.4781250059604645, |
|
"rewards/chosen": 0.00022381536837201566, |
|
"rewards/margins": 7.298699347302318e-05, |
|
"rewards/rejected": 0.0001508283894509077, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -2.7316231727600098, |
|
"logits/rejected": -2.7253520488739014, |
|
"logps/chosen": -253.94223022460938, |
|
"logps/rejected": -235.99728393554688, |
|
"loss": 0.4999, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.000914489384740591, |
|
"rewards/margins": 0.0003135653678327799, |
|
"rewards/rejected": 0.0006009239004924893, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": -2.6916933059692383, |
|
"logits/rejected": -2.690849781036377, |
|
"logps/chosen": -252.69357299804688, |
|
"logps/rejected": -234.3971710205078, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0020142460707575083, |
|
"rewards/margins": 0.0008797285263426602, |
|
"rewards/rejected": 0.0011345174862071872, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999731868769027e-06, |
|
"logits/chosen": -2.6373677253723145, |
|
"logits/rejected": -2.641324281692505, |
|
"logps/chosen": -224.9910430908203, |
|
"logps/rejected": -220.6387939453125, |
|
"loss": 0.4996, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.003733439836651087, |
|
"rewards/margins": 0.0014786701649427414, |
|
"rewards/rejected": 0.0022547696717083454, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9903533134293035e-06, |
|
"logits/chosen": -2.6569573879241943, |
|
"logits/rejected": -2.6791322231292725, |
|
"logps/chosen": -221.05709838867188, |
|
"logps/rejected": -203.5032501220703, |
|
"loss": 0.4994, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.005443854723125696, |
|
"rewards/margins": 0.0024864792358130217, |
|
"rewards/rejected": 0.0029573754873126745, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967625656594782e-06, |
|
"logits/chosen": -2.707890748977661, |
|
"logits/rejected": -2.6958038806915283, |
|
"logps/chosen": -227.9226531982422, |
|
"logps/rejected": -209.75619506835938, |
|
"loss": 0.4992, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": 0.006226530764251947, |
|
"rewards/margins": 0.0026825761888176203, |
|
"rewards/rejected": 0.0035439543426036835, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.93167072587771e-06, |
|
"logits/chosen": -2.691779375076294, |
|
"logits/rejected": -2.6711440086364746, |
|
"logps/chosen": -218.3474578857422, |
|
"logps/rejected": -207.0499725341797, |
|
"loss": 0.499, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.007510344497859478, |
|
"rewards/margins": 0.003996443003416061, |
|
"rewards/rejected": 0.0035139017272740602, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882681251368549e-06, |
|
"logits/chosen": -2.6946892738342285, |
|
"logits/rejected": -2.6706433296203613, |
|
"logps/chosen": -233.44540405273438, |
|
"logps/rejected": -225.4489288330078, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.007306075654923916, |
|
"rewards/margins": 0.004931028466671705, |
|
"rewards/rejected": 0.002375046955421567, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8209198325401815e-06, |
|
"logits/chosen": -2.741804838180542, |
|
"logits/rejected": -2.722029447555542, |
|
"logps/chosen": -244.57785034179688, |
|
"logps/rejected": -229.181884765625, |
|
"loss": 0.4986, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.00877557136118412, |
|
"rewards/margins": 0.007159523665904999, |
|
"rewards/rejected": 0.0016160461818799376, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.746717530629565e-06, |
|
"logits/chosen": -2.6642231941223145, |
|
"logits/rejected": -2.682945966720581, |
|
"logps/chosen": -227.15109252929688, |
|
"logps/rejected": -215.261474609375, |
|
"loss": 0.4986, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.007662790361791849, |
|
"rewards/margins": 0.007442783564329147, |
|
"rewards/rejected": 0.00022000684111844748, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.660472094042121e-06, |
|
"logits/chosen": -2.6481804847717285, |
|
"logits/rejected": -2.6557509899139404, |
|
"logps/chosen": -243.96139526367188, |
|
"logps/rejected": -208.84384155273438, |
|
"loss": 0.4984, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.007970081642270088, |
|
"rewards/margins": 0.007513949181884527, |
|
"rewards/rejected": 0.00045613135444000363, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5626458262912745e-06, |
|
"logits/chosen": -2.607055187225342, |
|
"logits/rejected": -2.6165382862091064, |
|
"logps/chosen": -214.7068634033203, |
|
"logps/rejected": -214.4553680419922, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.007824478670954704, |
|
"rewards/margins": 0.007803040556609631, |
|
"rewards/rejected": 2.143829988199286e-05, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.453763107901676e-06, |
|
"logits/chosen": -2.662113666534424, |
|
"logits/rejected": -2.6899476051330566, |
|
"logps/chosen": -230.07870483398438, |
|
"logps/rejected": -218.9852752685547, |
|
"loss": 0.4981, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.008939255960285664, |
|
"rewards/margins": 0.00983688049018383, |
|
"rewards/rejected": -0.000897624995559454, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.33440758555951e-06, |
|
"logits/chosen": -2.6915247440338135, |
|
"logits/rejected": -2.684671640396118, |
|
"logps/chosen": -239.7102813720703, |
|
"logps/rejected": -196.85520935058594, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.006214095279574394, |
|
"rewards/margins": 0.008828171528875828, |
|
"rewards/rejected": -0.0026140757836401463, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.205219043576955e-06, |
|
"logits/chosen": -2.6351287364959717, |
|
"logits/rejected": -2.6126668453216553, |
|
"logps/chosen": -217.2763214111328, |
|
"logps/rejected": -232.38308715820312, |
|
"loss": 0.4977, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.005867967382073402, |
|
"rewards/margins": 0.012363599613308907, |
|
"rewards/rejected": -0.006495633628219366, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.066889974440757e-06, |
|
"logits/chosen": -2.651954412460327, |
|
"logits/rejected": -2.6270413398742676, |
|
"logps/chosen": -230.48202514648438, |
|
"logps/rejected": -207.2578887939453, |
|
"loss": 0.4981, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.005082262679934502, |
|
"rewards/margins": 0.009124400094151497, |
|
"rewards/rejected": -0.004042136482894421, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.92016186682789e-06, |
|
"logits/chosen": -2.623335838317871, |
|
"logits/rejected": -2.6311707496643066, |
|
"logps/chosen": -240.3880157470703, |
|
"logps/rejected": -239.96383666992188, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.004731752909719944, |
|
"rewards/margins": 0.013921832665801048, |
|
"rewards/rejected": -0.009190080687403679, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7658212309857576e-06, |
|
"logits/chosen": -2.6556308269500732, |
|
"logits/rejected": -2.635357618331909, |
|
"logps/chosen": -227.9666748046875, |
|
"logps/rejected": -235.2104949951172, |
|
"loss": 0.4976, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": 0.005550178233534098, |
|
"rewards/margins": 0.015345620922744274, |
|
"rewards/rejected": -0.009795443154871464, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.604695382782159e-06, |
|
"logits/chosen": -2.613764524459839, |
|
"logits/rejected": -2.5981905460357666, |
|
"logps/chosen": -244.93527221679688, |
|
"logps/rejected": -219.70816040039062, |
|
"loss": 0.4975, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": 0.005905141122639179, |
|
"rewards/margins": 0.01656787097454071, |
|
"rewards/rejected": -0.010662728920578957, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.437648009023905e-06, |
|
"logits/chosen": -2.6281652450561523, |
|
"logits/rejected": -2.6158878803253174, |
|
"logps/chosen": -230.68093872070312, |
|
"logps/rejected": -229.5253448486328, |
|
"loss": 0.4974, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": 0.0013032301794737577, |
|
"rewards/margins": 0.012972685508430004, |
|
"rewards/rejected": -0.011669456958770752, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.265574537815398e-06, |
|
"logits/chosen": -2.6179089546203613, |
|
"logits/rejected": -2.600205898284912, |
|
"logps/chosen": -220.5909881591797, |
|
"logps/rejected": -219.4180450439453, |
|
"loss": 0.4975, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.00289526185952127, |
|
"rewards/margins": 0.01316472701728344, |
|
"rewards/rejected": -0.0102694658562541, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.089397338773569e-06, |
|
"logits/chosen": -2.5429725646972656, |
|
"logits/rejected": -2.5193886756896973, |
|
"logps/chosen": -219.8972930908203, |
|
"logps/rejected": -224.34597778320312, |
|
"loss": 0.4974, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.001055281376466155, |
|
"rewards/margins": 0.015370063483715057, |
|
"rewards/rejected": -0.014314780943095684, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9100607788275547e-06, |
|
"logits/chosen": -2.5410211086273193, |
|
"logits/rejected": -2.5228562355041504, |
|
"logps/chosen": -221.77243041992188, |
|
"logps/rejected": -222.72091674804688, |
|
"loss": 0.4972, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.0065743401646614075, |
|
"rewards/margins": 0.014264288358390331, |
|
"rewards/rejected": -0.020838629454374313, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.72852616010567e-06, |
|
"logits/chosen": -2.5719101428985596, |
|
"logits/rejected": -2.5223493576049805, |
|
"logps/chosen": -226.51950073242188, |
|
"logps/rejected": -222.7196502685547, |
|
"loss": 0.4969, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -0.0035572790075093508, |
|
"rewards/margins": 0.023138266056776047, |
|
"rewards/rejected": -0.02669554576277733, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5457665670441937e-06, |
|
"logits/chosen": -2.6053082942962646, |
|
"logits/rejected": -2.5733938217163086, |
|
"logps/chosen": -249.0303955078125, |
|
"logps/rejected": -240.6870574951172, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -0.005832755006849766, |
|
"rewards/margins": 0.026431718841195107, |
|
"rewards/rejected": -0.03226447105407715, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3627616503391813e-06, |
|
"logits/chosen": -2.6097915172576904, |
|
"logits/rejected": -2.565622329711914, |
|
"logps/chosen": -272.3103942871094, |
|
"logps/rejected": -283.4029846191406, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.009318219497799873, |
|
"rewards/margins": 0.03199902921915054, |
|
"rewards/rejected": -0.041317250579595566, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1804923757009885e-06, |
|
"logits/chosen": -2.5360941886901855, |
|
"logits/rejected": -2.502972364425659, |
|
"logps/chosen": -253.5753173828125, |
|
"logps/rejected": -242.55581665039062, |
|
"loss": 0.4965, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.025225955992937088, |
|
"rewards/margins": 0.025721151381731033, |
|
"rewards/rejected": -0.05094710737466812, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9999357655598894e-06, |
|
"logits/chosen": -2.5319244861602783, |
|
"logits/rejected": -2.465296983718872, |
|
"logps/chosen": -268.68438720703125, |
|
"logps/rejected": -286.90582275390625, |
|
"loss": 0.4959, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03240751475095749, |
|
"rewards/margins": 0.035424619913101196, |
|
"rewards/rejected": -0.06783213466405869, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8220596619089576e-06, |
|
"logits/chosen": -2.4394478797912598, |
|
"logits/rejected": -2.4182257652282715, |
|
"logps/chosen": -272.16973876953125, |
|
"logps/rejected": -293.65020751953125, |
|
"loss": 0.4947, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.03661135956645012, |
|
"rewards/margins": 0.03990130499005318, |
|
"rewards/rejected": -0.07651267200708389, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.647817538357072e-06, |
|
"logits/chosen": -2.297726631164551, |
|
"logits/rejected": -2.2101216316223145, |
|
"logps/chosen": -390.9844665527344, |
|
"logps/rejected": -431.4691467285156, |
|
"loss": 0.4919, |
|
"rewards/accuracies": 0.528124988079071, |
|
"rewards/chosen": -0.15482836961746216, |
|
"rewards/margins": 0.07953239977359772, |
|
"rewards/rejected": -0.23436078429222107, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4781433892011132e-06, |
|
"logits/chosen": -1.8453972339630127, |
|
"logits/rejected": -1.7317909002304077, |
|
"logps/chosen": -681.6414184570312, |
|
"logps/rejected": -987.8673706054688, |
|
"loss": 0.4842, |
|
"rewards/accuracies": 0.5531250238418579, |
|
"rewards/chosen": -0.4435897767543793, |
|
"rewards/margins": 0.3242936134338379, |
|
"rewards/rejected": -0.7678834199905396, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3139467229135999e-06, |
|
"logits/chosen": -1.9067537784576416, |
|
"logits/rejected": -1.8014500141143799, |
|
"logps/chosen": -885.9420166015625, |
|
"logps/rejected": -1086.572509765625, |
|
"loss": 0.482, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.6387772560119629, |
|
"rewards/margins": 0.23005250096321106, |
|
"rewards/rejected": -0.8688297271728516, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1561076868822756e-06, |
|
"logits/chosen": -2.052692413330078, |
|
"logits/rejected": -1.9470455646514893, |
|
"logps/chosen": -739.042236328125, |
|
"logps/rejected": -992.4461059570312, |
|
"loss": 0.4809, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": -0.49481409788131714, |
|
"rewards/margins": 0.2876318097114563, |
|
"rewards/rejected": -0.7824459075927734, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0054723495346484e-06, |
|
"logits/chosen": -1.9866969585418701, |
|
"logits/rejected": -1.8636195659637451, |
|
"logps/chosen": -883.5921630859375, |
|
"logps/rejected": -1097.7490234375, |
|
"loss": 0.4832, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": -0.658557116985321, |
|
"rewards/margins": 0.2439090460538864, |
|
"rewards/rejected": -0.9024661779403687, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.628481651367876e-07, |
|
"logits/chosen": -2.103574275970459, |
|
"logits/rejected": -1.9690395593643188, |
|
"logps/chosen": -854.9332885742188, |
|
"logps/rejected": -1295.6673583984375, |
|
"loss": 0.4805, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.6121799945831299, |
|
"rewards/margins": 0.4702150821685791, |
|
"rewards/rejected": -1.082395076751709, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.289996455765749e-07, |
|
"logits/chosen": -2.1500675678253174, |
|
"logits/rejected": -2.044593572616577, |
|
"logps/chosen": -781.8174438476562, |
|
"logps/rejected": -1134.3828125, |
|
"loss": 0.4804, |
|
"rewards/accuracies": 0.49687498807907104, |
|
"rewards/chosen": -0.5213514566421509, |
|
"rewards/margins": 0.39176231622695923, |
|
"rewards/rejected": -0.9131137132644653, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.046442623320145e-07, |
|
"logits/chosen": -2.1524083614349365, |
|
"logits/rejected": -2.063347816467285, |
|
"logps/chosen": -762.1702270507812, |
|
"logps/rejected": -1244.992431640625, |
|
"loss": 0.477, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": -0.5212429761886597, |
|
"rewards/margins": 0.4973062574863434, |
|
"rewards/rejected": -1.0185492038726807, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.904486005914027e-07, |
|
"logits/chosen": -2.1940252780914307, |
|
"logits/rejected": -2.106301784515381, |
|
"logps/chosen": -789.7882690429688, |
|
"logps/rejected": -1301.745361328125, |
|
"loss": 0.4771, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.5382066965103149, |
|
"rewards/margins": 0.5309630632400513, |
|
"rewards/rejected": -1.0691697597503662, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.8702478614051353e-07, |
|
"logits/chosen": -2.1056671142578125, |
|
"logits/rejected": -2.0197396278381348, |
|
"logps/chosen": -846.3111572265625, |
|
"logps/rejected": -1285.551025390625, |
|
"loss": 0.478, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.6085025072097778, |
|
"rewards/margins": 0.4548709988594055, |
|
"rewards/rejected": -1.0633734464645386, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9492720416985004e-07, |
|
"logits/chosen": -2.086305618286133, |
|
"logits/rejected": -1.996105432510376, |
|
"logps/chosen": -932.2883911132812, |
|
"logps/rejected": -1177.2127685546875, |
|
"loss": 0.4807, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": -0.6851298213005066, |
|
"rewards/margins": 0.29829445481300354, |
|
"rewards/rejected": -0.983424186706543, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1464952759020857e-07, |
|
"logits/chosen": -2.1438374519348145, |
|
"logits/rejected": -2.0642104148864746, |
|
"logps/chosen": -878.0729370117188, |
|
"logps/rejected": -1302.420654296875, |
|
"loss": 0.4798, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.6570709347724915, |
|
"rewards/margins": 0.4223295748233795, |
|
"rewards/rejected": -1.0794004201889038, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4662207078575685e-07, |
|
"logits/chosen": -2.1188504695892334, |
|
"logits/rejected": -2.0422720909118652, |
|
"logps/chosen": -986.8173828125, |
|
"logps/rejected": -1470.625244140625, |
|
"loss": 0.4786, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.7492085695266724, |
|
"rewards/margins": 0.5096833109855652, |
|
"rewards/rejected": -1.2588918209075928, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.120948298936422e-08, |
|
"logits/chosen": -2.160806894302368, |
|
"logits/rejected": -2.0456414222717285, |
|
"logps/chosen": -828.1990356445312, |
|
"logps/rejected": -1279.345703125, |
|
"loss": 0.4768, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.6060695052146912, |
|
"rewards/margins": 0.45984458923339844, |
|
"rewards/rejected": -1.0659140348434448, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.870879364444109e-08, |
|
"logits/chosen": -2.1377763748168945, |
|
"logits/rejected": -2.0168356895446777, |
|
"logps/chosen": -823.4695434570312, |
|
"logps/rejected": -1253.1837158203125, |
|
"loss": 0.4787, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": -0.591966986656189, |
|
"rewards/margins": 0.44343581795692444, |
|
"rewards/rejected": -1.0354026556015015, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.93478202307823e-08, |
|
"logits/chosen": -2.1620395183563232, |
|
"logits/rejected": -2.011014223098755, |
|
"logps/chosen": -961.3709106445312, |
|
"logps/rejected": -1517.798828125, |
|
"loss": 0.4762, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7009885907173157, |
|
"rewards/margins": 0.592383086681366, |
|
"rewards/rejected": -1.2933716773986816, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.283947088983663e-09, |
|
"logits/chosen": -2.198997735977173, |
|
"logits/rejected": -2.040200710296631, |
|
"logps/chosen": -834.0213623046875, |
|
"logps/rejected": -1428.2811279296875, |
|
"loss": 0.4748, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -0.594894289970398, |
|
"rewards/margins": 0.6237603425979614, |
|
"rewards/rejected": -1.2186545133590698, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 0.38682256204777044, |
|
"train_runtime": 17349.9683, |
|
"train_samples_per_second": 3.524, |
|
"train_steps_per_second": 0.027 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 20, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|