|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9982859101816935, |
|
"eval_steps": 0, |
|
"global_step": 182, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005485087418580734, |
|
"grad_norm": 12.289390049252361, |
|
"learning_rate": 5.2631578947368416e-08, |
|
"logits/chosen": -0.3854110836982727, |
|
"logits/rejected": -0.38843637704849243, |
|
"logps/chosen": -0.5867404937744141, |
|
"logps/rejected": -0.7349259853363037, |
|
"loss": 2.2106, |
|
"rewards/accuracies": 0.328125, |
|
"rewards/chosen": -1.8373150825500488, |
|
"rewards/margins": -0.37046387791633606, |
|
"rewards/rejected": -1.4668511152267456, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010970174837161468, |
|
"grad_norm": 9.808910529487358, |
|
"learning_rate": 1.0526315789473683e-07, |
|
"logits/chosen": -0.4200110137462616, |
|
"logits/rejected": -0.4337027370929718, |
|
"logps/chosen": -0.5888247489929199, |
|
"logps/rejected": -0.7141146659851074, |
|
"loss": 2.1187, |
|
"rewards/accuracies": 0.3671875, |
|
"rewards/chosen": -1.7852866649627686, |
|
"rewards/margins": -0.31322479248046875, |
|
"rewards/rejected": -1.4720618724822998, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0164552622557422, |
|
"grad_norm": 7.82623444940022, |
|
"learning_rate": 1.5789473684210525e-07, |
|
"logits/chosen": -0.3889790177345276, |
|
"logits/rejected": -0.3634672164916992, |
|
"logps/chosen": -0.6838980317115784, |
|
"logps/rejected": -0.6908231973648071, |
|
"loss": 2.0561, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.727057933807373, |
|
"rewards/margins": -0.01731281727552414, |
|
"rewards/rejected": -1.709745168685913, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.021940349674322936, |
|
"grad_norm": 11.908990882185469, |
|
"learning_rate": 2.1052631578947366e-07, |
|
"logits/chosen": -0.41128796339035034, |
|
"logits/rejected": -0.44201532006263733, |
|
"logps/chosen": -0.6115437150001526, |
|
"logps/rejected": -0.7170974612236023, |
|
"loss": 2.1332, |
|
"rewards/accuracies": 0.4140625, |
|
"rewards/chosen": -1.7927436828613281, |
|
"rewards/margins": -0.2638842463493347, |
|
"rewards/rejected": -1.5288593769073486, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.027425437092903668, |
|
"grad_norm": 9.763139044795885, |
|
"learning_rate": 2.631578947368421e-07, |
|
"logits/chosen": -0.4671156406402588, |
|
"logits/rejected": -0.4450330138206482, |
|
"logps/chosen": -0.5787723064422607, |
|
"logps/rejected": -0.6804812550544739, |
|
"loss": 2.0846, |
|
"rewards/accuracies": 0.3359375, |
|
"rewards/chosen": -1.7012031078338623, |
|
"rewards/margins": -0.2542722821235657, |
|
"rewards/rejected": -1.4469308853149414, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0329105245114844, |
|
"grad_norm": 10.006426324247332, |
|
"learning_rate": 3.157894736842105e-07, |
|
"logits/chosen": -0.3141833543777466, |
|
"logits/rejected": -0.3982672095298767, |
|
"logps/chosen": -0.6082320213317871, |
|
"logps/rejected": -0.7216463088989258, |
|
"loss": 2.1337, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -1.8041157722473145, |
|
"rewards/margins": -0.2835356295108795, |
|
"rewards/rejected": -1.5205800533294678, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03839561193006513, |
|
"grad_norm": 11.019623612251491, |
|
"learning_rate": 3.684210526315789e-07, |
|
"logits/chosen": -0.41403576731681824, |
|
"logits/rejected": -0.4747769236564636, |
|
"logps/chosen": -0.6087530851364136, |
|
"logps/rejected": -0.6581674814224243, |
|
"loss": 2.0016, |
|
"rewards/accuracies": 0.4140625, |
|
"rewards/chosen": -1.6454188823699951, |
|
"rewards/margins": -0.12353596091270447, |
|
"rewards/rejected": -1.5218827724456787, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.04388069934864587, |
|
"grad_norm": 13.106807696358263, |
|
"learning_rate": 4.2105263157894733e-07, |
|
"logits/chosen": -0.3825553059577942, |
|
"logits/rejected": -0.4531961679458618, |
|
"logps/chosen": -0.6047242879867554, |
|
"logps/rejected": -0.7030869722366333, |
|
"loss": 2.1026, |
|
"rewards/accuracies": 0.390625, |
|
"rewards/chosen": -1.7577173709869385, |
|
"rewards/margins": -0.24590645730495453, |
|
"rewards/rejected": -1.5118108987808228, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.049365786767226603, |
|
"grad_norm": 11.770594063909739, |
|
"learning_rate": 4.7368421052631574e-07, |
|
"logits/chosen": -0.3712048828601837, |
|
"logits/rejected": -0.40800797939300537, |
|
"logps/chosen": -0.5091754794120789, |
|
"logps/rejected": -0.7823854684829712, |
|
"loss": 2.425, |
|
"rewards/accuracies": 0.265625, |
|
"rewards/chosen": -1.9559637308120728, |
|
"rewards/margins": -0.683025062084198, |
|
"rewards/rejected": -1.27293860912323, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.054850874185807336, |
|
"grad_norm": 12.669444366856121, |
|
"learning_rate": 5.263157894736842e-07, |
|
"logits/chosen": -0.4165411591529846, |
|
"logits/rejected": -0.3989042639732361, |
|
"logps/chosen": -0.5609541535377502, |
|
"logps/rejected": -0.8160127997398376, |
|
"loss": 2.435, |
|
"rewards/accuracies": 0.296875, |
|
"rewards/chosen": -2.040031909942627, |
|
"rewards/margins": -0.637646496295929, |
|
"rewards/rejected": -1.4023855924606323, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06033596160438807, |
|
"grad_norm": 18.900235847859772, |
|
"learning_rate": 5.789473684210526e-07, |
|
"logits/chosen": -0.39879918098449707, |
|
"logits/rejected": -0.4029804468154907, |
|
"logps/chosen": -0.5685741901397705, |
|
"logps/rejected": -0.7790898084640503, |
|
"loss": 2.3341, |
|
"rewards/accuracies": 0.3359375, |
|
"rewards/chosen": -1.9477243423461914, |
|
"rewards/margins": -0.5262887477874756, |
|
"rewards/rejected": -1.4214354753494263, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0658210490229688, |
|
"grad_norm": 11.34517381440851, |
|
"learning_rate": 6.31578947368421e-07, |
|
"logits/chosen": -0.43950849771499634, |
|
"logits/rejected": -0.4288792312145233, |
|
"logps/chosen": -0.6902109980583191, |
|
"logps/rejected": -0.7512154579162598, |
|
"loss": 2.2512, |
|
"rewards/accuracies": 0.296875, |
|
"rewards/chosen": -1.8780386447906494, |
|
"rewards/margins": -0.15251119434833527, |
|
"rewards/rejected": -1.7255275249481201, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.07130613644154954, |
|
"grad_norm": 12.242091991409806, |
|
"learning_rate": 6.842105263157895e-07, |
|
"logits/chosen": -0.3881993591785431, |
|
"logits/rejected": -0.38021671772003174, |
|
"logps/chosen": -0.5799139738082886, |
|
"logps/rejected": -0.714693546295166, |
|
"loss": 2.1644, |
|
"rewards/accuracies": 0.328125, |
|
"rewards/chosen": -1.7867339849472046, |
|
"rewards/margins": -0.3369489908218384, |
|
"rewards/rejected": -1.4497849941253662, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.07679122386013026, |
|
"grad_norm": 15.332493592901395, |
|
"learning_rate": 7.368421052631578e-07, |
|
"logits/chosen": -0.4336930215358734, |
|
"logits/rejected": -0.4184849262237549, |
|
"logps/chosen": -0.6871960163116455, |
|
"logps/rejected": -0.8031256794929504, |
|
"loss": 2.3949, |
|
"rewards/accuracies": 0.3203125, |
|
"rewards/chosen": -2.0078141689300537, |
|
"rewards/margins": -0.28982412815093994, |
|
"rewards/rejected": -1.7179901599884033, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.082276311278711, |
|
"grad_norm": 14.281514936865825, |
|
"learning_rate": 7.894736842105263e-07, |
|
"logits/chosen": -0.420330286026001, |
|
"logits/rejected": -0.39623183012008667, |
|
"logps/chosen": -0.6022149324417114, |
|
"logps/rejected": -0.7605262398719788, |
|
"loss": 2.2735, |
|
"rewards/accuracies": 0.328125, |
|
"rewards/chosen": -1.901315450668335, |
|
"rewards/margins": -0.39577823877334595, |
|
"rewards/rejected": -1.5055372714996338, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08776139869729174, |
|
"grad_norm": 17.350845573103634, |
|
"learning_rate": 8.421052631578947e-07, |
|
"logits/chosen": -0.4929282069206238, |
|
"logits/rejected": -0.4550182521343231, |
|
"logps/chosen": -0.5783462524414062, |
|
"logps/rejected": -0.7597732543945312, |
|
"loss": 2.2617, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -1.8994331359863281, |
|
"rewards/margins": -0.45356735587120056, |
|
"rewards/rejected": -1.4458656311035156, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.09324648611587247, |
|
"grad_norm": 13.387104362920853, |
|
"learning_rate": 8.947368421052631e-07, |
|
"logits/chosen": -0.4294038414955139, |
|
"logits/rejected": -0.4245000183582306, |
|
"logps/chosen": -0.6331275105476379, |
|
"logps/rejected": -0.7878226041793823, |
|
"loss": 2.3686, |
|
"rewards/accuracies": 0.265625, |
|
"rewards/chosen": -1.9695566892623901, |
|
"rewards/margins": -0.3867378830909729, |
|
"rewards/rejected": -1.582818865776062, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.09873157353445321, |
|
"grad_norm": 11.747460293777701, |
|
"learning_rate": 9.473684210526315e-07, |
|
"logits/chosen": -0.43416640162467957, |
|
"logits/rejected": -0.41705217957496643, |
|
"logps/chosen": -0.6964335441589355, |
|
"logps/rejected": -0.7354090213775635, |
|
"loss": 2.0268, |
|
"rewards/accuracies": 0.3984375, |
|
"rewards/chosen": -1.8385226726531982, |
|
"rewards/margins": -0.09743872284889221, |
|
"rewards/rejected": -1.741084098815918, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.10421666095303393, |
|
"grad_norm": 16.166401888969105, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -0.42683446407318115, |
|
"logits/rejected": -0.49105304479599, |
|
"logps/chosen": -0.5967155694961548, |
|
"logps/rejected": -0.6899043917655945, |
|
"loss": 2.0874, |
|
"rewards/accuracies": 0.3046875, |
|
"rewards/chosen": -1.724760890007019, |
|
"rewards/margins": -0.2329719066619873, |
|
"rewards/rejected": -1.4917889833450317, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.10970174837161467, |
|
"grad_norm": 8.619811148509852, |
|
"learning_rate": 9.999071352056673e-07, |
|
"logits/chosen": -0.3747601807117462, |
|
"logits/rejected": -0.41268259286880493, |
|
"logps/chosen": -0.5958091616630554, |
|
"logps/rejected": -0.7304958701133728, |
|
"loss": 2.0832, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -1.8262397050857544, |
|
"rewards/margins": -0.33671680092811584, |
|
"rewards/rejected": -1.489522933959961, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11518683579019541, |
|
"grad_norm": 8.74498909781148, |
|
"learning_rate": 9.996285753181497e-07, |
|
"logits/chosen": -0.4670315086841583, |
|
"logits/rejected": -0.4540305733680725, |
|
"logps/chosen": -0.6016995906829834, |
|
"logps/rejected": -0.7677630186080933, |
|
"loss": 2.1691, |
|
"rewards/accuracies": 0.3046875, |
|
"rewards/chosen": -1.9194074869155884, |
|
"rewards/margins": -0.4151587188243866, |
|
"rewards/rejected": -1.504248857498169, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.12067192320877614, |
|
"grad_norm": 12.710357284461692, |
|
"learning_rate": 9.99164423811074e-07, |
|
"logits/chosen": -0.4635859429836273, |
|
"logits/rejected": -0.49371862411499023, |
|
"logps/chosen": -0.9511612057685852, |
|
"logps/rejected": -0.7291851043701172, |
|
"loss": 1.9912, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -1.8229626417160034, |
|
"rewards/margins": 0.5549403429031372, |
|
"rewards/rejected": -2.3779029846191406, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.12615701062735687, |
|
"grad_norm": 7.521146349471263, |
|
"learning_rate": 9.985148530977764e-07, |
|
"logits/chosen": -0.44370928406715393, |
|
"logits/rejected": -0.4425956606864929, |
|
"logps/chosen": -0.6404778361320496, |
|
"logps/rejected": -0.7111139297485352, |
|
"loss": 1.9723, |
|
"rewards/accuracies": 0.3203125, |
|
"rewards/chosen": -1.777784824371338, |
|
"rewards/margins": -0.17659035325050354, |
|
"rewards/rejected": -1.6011945009231567, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.1316420980459376, |
|
"grad_norm": 9.114160770658133, |
|
"learning_rate": 9.976801044672607e-07, |
|
"logits/chosen": -0.4481334686279297, |
|
"logits/rejected": -0.45388296246528625, |
|
"logps/chosen": -0.7301231622695923, |
|
"logps/rejected": -0.6901272535324097, |
|
"loss": 1.8305, |
|
"rewards/accuracies": 0.4140625, |
|
"rewards/chosen": -1.7253180742263794, |
|
"rewards/margins": 0.09998967498540878, |
|
"rewards/rejected": -1.825307846069336, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.13712718546451835, |
|
"grad_norm": 16.624964926964527, |
|
"learning_rate": 9.966604879945656e-07, |
|
"logits/chosen": -0.5069385170936584, |
|
"logits/rejected": -0.5200111865997314, |
|
"logps/chosen": -0.721697211265564, |
|
"logps/rejected": -0.7383979558944702, |
|
"loss": 1.9472, |
|
"rewards/accuracies": 0.3828125, |
|
"rewards/chosen": -1.8459948301315308, |
|
"rewards/margins": -0.04175184667110443, |
|
"rewards/rejected": -1.8042429685592651, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.14261227288309908, |
|
"grad_norm": 8.144004658411824, |
|
"learning_rate": 9.954563824255877e-07, |
|
"logits/chosen": -0.4636583924293518, |
|
"logits/rejected": -0.4762532711029053, |
|
"logps/chosen": -0.6404789686203003, |
|
"logps/rejected": -0.7015948295593262, |
|
"loss": 1.8902, |
|
"rewards/accuracies": 0.4140625, |
|
"rewards/chosen": -1.7539873123168945, |
|
"rewards/margins": -0.15278980135917664, |
|
"rewards/rejected": -1.601197361946106, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.1480973603016798, |
|
"grad_norm": 9.860499037174334, |
|
"learning_rate": 9.94068235036391e-07, |
|
"logits/chosen": -0.4618387818336487, |
|
"logits/rejected": -0.47574925422668457, |
|
"logps/chosen": -0.6812009811401367, |
|
"logps/rejected": -0.7617368698120117, |
|
"loss": 1.9858, |
|
"rewards/accuracies": 0.3046875, |
|
"rewards/chosen": -1.9043422937393188, |
|
"rewards/margins": -0.20133966207504272, |
|
"rewards/rejected": -1.7030025720596313, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15358244772026053, |
|
"grad_norm": 10.039428944668376, |
|
"learning_rate": 9.924965614670628e-07, |
|
"logits/chosen": -0.5124188661575317, |
|
"logits/rejected": -0.4997189939022064, |
|
"logps/chosen": -0.755838930606842, |
|
"logps/rejected": -0.7807177901268005, |
|
"loss": 1.9039, |
|
"rewards/accuracies": 0.3828125, |
|
"rewards/chosen": -1.9517943859100342, |
|
"rewards/margins": -0.06219691038131714, |
|
"rewards/rejected": -1.8895972967147827, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.15906753513884128, |
|
"grad_norm": 10.666772071989447, |
|
"learning_rate": 9.90741945530174e-07, |
|
"logits/chosen": -0.5431128740310669, |
|
"logits/rejected": -0.5210611820220947, |
|
"logps/chosen": -0.9286273121833801, |
|
"logps/rejected": -0.9671891927719116, |
|
"loss": 2.0302, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -2.417973041534424, |
|
"rewards/margins": -0.0964045524597168, |
|
"rewards/rejected": -2.321568250656128, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.164552622557422, |
|
"grad_norm": 8.160421166678184, |
|
"learning_rate": 9.888050389939172e-07, |
|
"logits/chosen": -0.5067495107650757, |
|
"logits/rejected": -0.4774128198623657, |
|
"logps/chosen": -0.7992498874664307, |
|
"logps/rejected": -0.8549879789352417, |
|
"loss": 1.9332, |
|
"rewards/accuracies": 0.421875, |
|
"rewards/chosen": -2.13746976852417, |
|
"rewards/margins": -0.13934528827667236, |
|
"rewards/rejected": -1.9981244802474976, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.17003770997600273, |
|
"grad_norm": 10.514382125623976, |
|
"learning_rate": 9.866865613400006e-07, |
|
"logits/chosen": -0.5173575282096863, |
|
"logits/rejected": -0.4771508574485779, |
|
"logps/chosen": -0.8561153411865234, |
|
"logps/rejected": -0.9025238156318665, |
|
"loss": 1.9123, |
|
"rewards/accuracies": 0.3671875, |
|
"rewards/chosen": -2.256309747695923, |
|
"rewards/margins": -0.11602123826742172, |
|
"rewards/rejected": -2.1402883529663086, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.17552279739458349, |
|
"grad_norm": 6.797061723595508, |
|
"learning_rate": 9.843872994963912e-07, |
|
"logits/chosen": -0.6115865707397461, |
|
"logits/rejected": -0.5253005027770996, |
|
"logps/chosen": -0.8001683950424194, |
|
"logps/rejected": -0.8365844488143921, |
|
"loss": 1.9177, |
|
"rewards/accuracies": 0.3515625, |
|
"rewards/chosen": -2.091461181640625, |
|
"rewards/margins": -0.09104003012180328, |
|
"rewards/rejected": -2.0004210472106934, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1810078848131642, |
|
"grad_norm": 10.986752360147651, |
|
"learning_rate": 9.819081075450013e-07, |
|
"logits/chosen": -0.5589928030967712, |
|
"logits/rejected": -0.5537349581718445, |
|
"logps/chosen": -0.9822956323623657, |
|
"logps/rejected": -0.8852106332778931, |
|
"loss": 1.7645, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": -2.213026523590088, |
|
"rewards/margins": 0.2427126169204712, |
|
"rewards/rejected": -2.4557392597198486, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.18649297223174494, |
|
"grad_norm": 7.9793683352025395, |
|
"learning_rate": 9.792499064044342e-07, |
|
"logits/chosen": -0.6396060585975647, |
|
"logits/rejected": -0.557750940322876, |
|
"logps/chosen": -0.8982308506965637, |
|
"logps/rejected": -0.9113630056381226, |
|
"loss": 1.8363, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -2.2784078121185303, |
|
"rewards/margins": -0.03283056244254112, |
|
"rewards/rejected": -2.245576858520508, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.1919780596503257, |
|
"grad_norm": 6.791520764999855, |
|
"learning_rate": 9.764136834878985e-07, |
|
"logits/chosen": -0.6534283757209778, |
|
"logits/rejected": -0.6032913327217102, |
|
"logps/chosen": -0.8987076282501221, |
|
"logps/rejected": -0.9370582699775696, |
|
"loss": 1.8242, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -2.3426456451416016, |
|
"rewards/margins": -0.095876544713974, |
|
"rewards/rejected": -2.2467689514160156, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.19746314706890641, |
|
"grad_norm": 7.022459099441044, |
|
"learning_rate": 9.734004923364256e-07, |
|
"logits/chosen": -0.6581586003303528, |
|
"logits/rejected": -0.6092681884765625, |
|
"logps/chosen": -0.960416316986084, |
|
"logps/rejected": -0.9933475852012634, |
|
"loss": 1.8998, |
|
"rewards/accuracies": 0.390625, |
|
"rewards/chosen": -2.4833688735961914, |
|
"rewards/margins": -0.08232799917459488, |
|
"rewards/rejected": -2.401041030883789, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.20294823448748714, |
|
"grad_norm": 7.996309219752066, |
|
"learning_rate": 9.702114522275216e-07, |
|
"logits/chosen": -0.6663313508033752, |
|
"logits/rejected": -0.6023609638214111, |
|
"logps/chosen": -0.9479801058769226, |
|
"logps/rejected": -0.998908281326294, |
|
"loss": 1.8343, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -2.4972708225250244, |
|
"rewards/margins": -0.1273205429315567, |
|
"rewards/rejected": -2.369950294494629, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.20843332190606786, |
|
"grad_norm": 6.699323358307292, |
|
"learning_rate": 9.66847747759402e-07, |
|
"logits/chosen": -0.622665286064148, |
|
"logits/rejected": -0.5349312424659729, |
|
"logps/chosen": -1.041911244392395, |
|
"logps/rejected": -0.9843886494636536, |
|
"loss": 1.6697, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": -2.4609715938568115, |
|
"rewards/margins": 0.14380690455436707, |
|
"rewards/rejected": -2.604778289794922, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.21391840932464862, |
|
"grad_norm": 6.682266326866778, |
|
"learning_rate": 9.63310628410961e-07, |
|
"logits/chosen": -0.6338837742805481, |
|
"logits/rejected": -0.5423075556755066, |
|
"logps/chosen": -1.0179288387298584, |
|
"logps/rejected": -0.967695951461792, |
|
"loss": 1.8336, |
|
"rewards/accuracies": 0.359375, |
|
"rewards/chosen": -2.4192402362823486, |
|
"rewards/margins": 0.1255817860364914, |
|
"rewards/rejected": -2.5448219776153564, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.21940349674322934, |
|
"grad_norm": 8.105951134201701, |
|
"learning_rate": 9.596014080776421e-07, |
|
"logits/chosen": -0.6672332286834717, |
|
"logits/rejected": -0.6281388998031616, |
|
"logps/chosen": -1.0504218339920044, |
|
"logps/rejected": -1.0622175931930542, |
|
"loss": 1.8315, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -2.6555442810058594, |
|
"rewards/margins": -0.029489843174815178, |
|
"rewards/rejected": -2.626054525375366, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.22488858416181007, |
|
"grad_norm": 13.378049805845984, |
|
"learning_rate": 9.55721464583379e-07, |
|
"logits/chosen": -0.7849185466766357, |
|
"logits/rejected": -0.6884415745735168, |
|
"logps/chosen": -1.0642319917678833, |
|
"logps/rejected": -1.05228853225708, |
|
"loss": 1.7842, |
|
"rewards/accuracies": 0.4140625, |
|
"rewards/chosen": -2.6307215690612793, |
|
"rewards/margins": 0.02985840104520321, |
|
"rewards/rejected": -2.6605796813964844, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.23037367158039082, |
|
"grad_norm": 9.064118287711297, |
|
"learning_rate": 9.516722391687902e-07, |
|
"logits/chosen": -0.6929774284362793, |
|
"logits/rejected": -0.6579635739326477, |
|
"logps/chosen": -1.1520164012908936, |
|
"logps/rejected": -1.1617083549499512, |
|
"loss": 1.9825, |
|
"rewards/accuracies": 0.3203125, |
|
"rewards/chosen": -2.904270887374878, |
|
"rewards/margins": -0.02423013746738434, |
|
"rewards/rejected": -2.8800406455993652, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.23585875899897155, |
|
"grad_norm": 7.541139303599616, |
|
"learning_rate": 9.474552359558165e-07, |
|
"logits/chosen": -0.7736871242523193, |
|
"logits/rejected": -0.6667463183403015, |
|
"logps/chosen": -1.1246612071990967, |
|
"logps/rejected": -1.06680428981781, |
|
"loss": 1.6144, |
|
"rewards/accuracies": 0.4453125, |
|
"rewards/chosen": -2.667010545730591, |
|
"rewards/margins": 0.1446424126625061, |
|
"rewards/rejected": -2.811652898788452, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.24134384641755227, |
|
"grad_norm": 9.736710769671427, |
|
"learning_rate": 9.430720213890029e-07, |
|
"logits/chosen": -0.7818886041641235, |
|
"logits/rejected": -0.7196276187896729, |
|
"logps/chosen": -1.2841920852661133, |
|
"logps/rejected": -1.2012193202972412, |
|
"loss": 1.648, |
|
"rewards/accuracies": 0.4453125, |
|
"rewards/chosen": -3.0030481815338135, |
|
"rewards/margins": 0.2074318528175354, |
|
"rewards/rejected": -3.210480213165283, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.24682893383613302, |
|
"grad_norm": 21.756241022877973, |
|
"learning_rate": 9.385242236536259e-07, |
|
"logits/chosen": -0.8642858266830444, |
|
"logits/rejected": -0.816374659538269, |
|
"logps/chosen": -1.3722171783447266, |
|
"logps/rejected": -1.34407639503479, |
|
"loss": 1.9336, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -3.3601903915405273, |
|
"rewards/margins": 0.07035252451896667, |
|
"rewards/rejected": -3.4305431842803955, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.25231402125471375, |
|
"grad_norm": 32.5907546315537, |
|
"learning_rate": 9.338135320708911e-07, |
|
"logits/chosen": -0.7350670695304871, |
|
"logits/rejected": -0.6843174695968628, |
|
"logps/chosen": -1.6099109649658203, |
|
"logps/rejected": -1.380704641342163, |
|
"loss": 1.4913, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -3.4517619609832764, |
|
"rewards/margins": 0.5730158686637878, |
|
"rewards/rejected": -4.024777889251709, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.2577991086732945, |
|
"grad_norm": 69.74307826075179, |
|
"learning_rate": 9.289416964704185e-07, |
|
"logits/chosen": -0.6261876821517944, |
|
"logits/rejected": -0.5953123569488525, |
|
"logps/chosen": -2.0205883979797363, |
|
"logps/rejected": -1.7966296672821045, |
|
"loss": 1.4505, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": -4.491574287414551, |
|
"rewards/margins": 0.5598966479301453, |
|
"rewards/rejected": -5.05147123336792, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.2632841960918752, |
|
"grad_norm": 111.71156009374812, |
|
"learning_rate": 9.239105265402525e-07, |
|
"logits/chosen": -0.6214447021484375, |
|
"logits/rejected": -0.6121379137039185, |
|
"logps/chosen": -4.529265880584717, |
|
"logps/rejected": -4.061288356781006, |
|
"loss": 1.5693, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -10.15322208404541, |
|
"rewards/margins": 1.1699434518814087, |
|
"rewards/rejected": -11.323163032531738, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.2687692835104559, |
|
"grad_norm": 106.00540500477615, |
|
"learning_rate": 9.187218911546361e-07, |
|
"logits/chosen": -0.6318798661231995, |
|
"logits/rejected": -0.6299155950546265, |
|
"logps/chosen": -6.402857780456543, |
|
"logps/rejected": -5.515579700469971, |
|
"loss": 1.4994, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -13.788949966430664, |
|
"rewards/margins": 2.218195676803589, |
|
"rewards/rejected": -16.007144927978516, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.2742543709290367, |
|
"grad_norm": 100.18405633526882, |
|
"learning_rate": 9.133777176798012e-07, |
|
"logits/chosen": -0.5955071449279785, |
|
"logits/rejected": -0.587684690952301, |
|
"logps/chosen": -6.099169731140137, |
|
"logps/rejected": -5.420334339141846, |
|
"loss": 1.686, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -13.550837516784668, |
|
"rewards/margins": 1.6970889568328857, |
|
"rewards/rejected": -15.2479248046875, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.27973945834761743, |
|
"grad_norm": 58.20603276880462, |
|
"learning_rate": 9.078799912580303e-07, |
|
"logits/chosen": -0.4950883388519287, |
|
"logits/rejected": -0.47393253445625305, |
|
"logps/chosen": -3.7327165603637695, |
|
"logps/rejected": -2.9755775928497314, |
|
"loss": 1.1265, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -7.438943386077881, |
|
"rewards/margins": 1.892848014831543, |
|
"rewards/rejected": -9.331791877746582, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.28522454576619816, |
|
"grad_norm": 34.976161133474584, |
|
"learning_rate": 9.022307540702576e-07, |
|
"logits/chosen": -0.595772385597229, |
|
"logits/rejected": -0.5804386138916016, |
|
"logps/chosen": -3.406771659851074, |
|
"logps/rejected": -2.477839469909668, |
|
"loss": 1.0881, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -6.1945977210998535, |
|
"rewards/margins": 2.322330951690674, |
|
"rewards/rejected": -8.516929626464844, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.2907096331847789, |
|
"grad_norm": 51.69206884243948, |
|
"learning_rate": 8.964321045774806e-07, |
|
"logits/chosen": -0.5769085884094238, |
|
"logits/rejected": -0.5628898739814758, |
|
"logps/chosen": -3.4867472648620605, |
|
"logps/rejected": -2.7880890369415283, |
|
"loss": 1.2003, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -6.970221996307373, |
|
"rewards/margins": 1.7466471195220947, |
|
"rewards/rejected": -8.716869354248047, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.2961947206033596, |
|
"grad_norm": 71.37167688264066, |
|
"learning_rate": 8.904861967412701e-07, |
|
"logits/chosen": -0.6889777183532715, |
|
"logits/rejected": -0.6549051403999329, |
|
"logps/chosen": -2.727174997329712, |
|
"logps/rejected": -2.3221614360809326, |
|
"loss": 1.4033, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": -5.805403709411621, |
|
"rewards/margins": 1.0125339031219482, |
|
"rewards/rejected": -6.817937850952148, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.30167980802194033, |
|
"grad_norm": 47.86988316740339, |
|
"learning_rate": 8.843952392236593e-07, |
|
"logits/chosen": -0.7492246031761169, |
|
"logits/rejected": -0.6207780838012695, |
|
"logps/chosen": -3.1855251789093018, |
|
"logps/rejected": -2.70566987991333, |
|
"loss": 1.3969, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -6.764174461364746, |
|
"rewards/margins": 1.1996381282806396, |
|
"rewards/rejected": -7.963812351226807, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.30716489544052106, |
|
"grad_norm": 78.52357858963444, |
|
"learning_rate": 8.781614945667168e-07, |
|
"logits/chosen": -0.7661877274513245, |
|
"logits/rejected": -0.6237936615943909, |
|
"logps/chosen": -3.6718194484710693, |
|
"logps/rejected": -2.841695547103882, |
|
"loss": 1.2378, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -7.104238033294678, |
|
"rewards/margins": 2.075310230255127, |
|
"rewards/rejected": -9.179548263549805, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.31264998285910184, |
|
"grad_norm": 71.4152133884341, |
|
"learning_rate": 8.717872783521047e-07, |
|
"logits/chosen": -0.800015389919281, |
|
"logits/rejected": -0.7331135272979736, |
|
"logps/chosen": -3.140065908432007, |
|
"logps/rejected": -2.2423806190490723, |
|
"loss": 1.0947, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -5.605951309204102, |
|
"rewards/margins": 2.244213104248047, |
|
"rewards/rejected": -7.850164890289307, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.31813507027768256, |
|
"grad_norm": 42.30260390013795, |
|
"learning_rate": 8.652749583409339e-07, |
|
"logits/chosen": -0.9033212661743164, |
|
"logits/rejected": -0.7837256193161011, |
|
"logps/chosen": -3.608813524246216, |
|
"logps/rejected": -2.8119547367095947, |
|
"loss": 1.0673, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -7.0298871994018555, |
|
"rewards/margins": 1.99214768409729, |
|
"rewards/rejected": -9.02203369140625, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.3236201576962633, |
|
"grad_norm": 72.45091367067401, |
|
"learning_rate": 8.586269535942385e-07, |
|
"logits/chosen": -0.9157741069793701, |
|
"logits/rejected": -0.8138267993927002, |
|
"logps/chosen": -4.422084808349609, |
|
"logps/rejected": -3.3933629989624023, |
|
"loss": 1.0082, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -8.483407974243164, |
|
"rewards/margins": 2.5718040466308594, |
|
"rewards/rejected": -11.055212020874023, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.329105245114844, |
|
"grad_norm": 67.2038247219744, |
|
"learning_rate": 8.518457335743924e-07, |
|
"logits/chosen": -1.0231534242630005, |
|
"logits/rejected": -0.8920707106590271, |
|
"logps/chosen": -6.588432312011719, |
|
"logps/rejected": -4.968594551086426, |
|
"loss": 1.2004, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -12.42148494720459, |
|
"rewards/margins": 4.049595832824707, |
|
"rewards/rejected": -16.471080780029297, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.33459033253342474, |
|
"grad_norm": 71.2725286215776, |
|
"learning_rate": 8.449338172278058e-07, |
|
"logits/chosen": -1.0856202840805054, |
|
"logits/rejected": -1.001308798789978, |
|
"logps/chosen": -7.01667594909668, |
|
"logps/rejected": -5.5445075035095215, |
|
"loss": 1.07, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -13.861268043518066, |
|
"rewards/margins": 3.680420398712158, |
|
"rewards/rejected": -17.541690826416016, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.34007541995200546, |
|
"grad_norm": 83.45605345714169, |
|
"learning_rate": 8.378937720492383e-07, |
|
"logits/chosen": -0.9825168251991272, |
|
"logits/rejected": -0.8393011093139648, |
|
"logps/chosen": -6.87870979309082, |
|
"logps/rejected": -5.226398468017578, |
|
"loss": 1.1342, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -13.065997123718262, |
|
"rewards/margins": 4.130776882171631, |
|
"rewards/rejected": -17.196773529052734, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.34556050737058625, |
|
"grad_norm": 47.538952621401116, |
|
"learning_rate": 8.307282131280804e-07, |
|
"logits/chosen": -1.073388695716858, |
|
"logits/rejected": -0.8648728728294373, |
|
"logps/chosen": -5.328536033630371, |
|
"logps/rejected": -3.9907565116882324, |
|
"loss": 0.9218, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -9.976890563964844, |
|
"rewards/margins": 3.344449520111084, |
|
"rewards/rejected": -13.321340560913086, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.35104559478916697, |
|
"grad_norm": 69.97786008191355, |
|
"learning_rate": 8.23439802176954e-07, |
|
"logits/chosen": -1.045760989189148, |
|
"logits/rejected": -0.8985159993171692, |
|
"logps/chosen": -4.391729831695557, |
|
"logps/rejected": -2.979081869125366, |
|
"loss": 1.0545, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -7.447704315185547, |
|
"rewards/margins": 3.531620502471924, |
|
"rewards/rejected": -10.979324340820312, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.3565306822077477, |
|
"grad_norm": 57.99100276656143, |
|
"learning_rate": 8.160312465429952e-07, |
|
"logits/chosen": -1.059841513633728, |
|
"logits/rejected": -0.9158331155776978, |
|
"logps/chosen": -4.3702073097229, |
|
"logps/rejected": -2.9328854084014893, |
|
"loss": 0.8092, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -7.332213401794434, |
|
"rewards/margins": 3.5933048725128174, |
|
"rewards/rejected": -10.925518035888672, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.3620157696263284, |
|
"grad_norm": 32.78666266298242, |
|
"learning_rate": 8.085052982021847e-07, |
|
"logits/chosen": -1.128019094467163, |
|
"logits/rejected": -0.9226801991462708, |
|
"logps/chosen": -3.9989237785339355, |
|
"logps/rejected": -2.929816246032715, |
|
"loss": 1.0525, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -7.324541091918945, |
|
"rewards/margins": 2.6727685928344727, |
|
"rewards/rejected": -9.997309684753418, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.36750085704490915, |
|
"grad_norm": 50.293055849490955, |
|
"learning_rate": 8.008647527371022e-07, |
|
"logits/chosen": -1.4013196229934692, |
|
"logits/rejected": -1.138377070426941, |
|
"logps/chosen": -4.476810932159424, |
|
"logps/rejected": -3.069303512573242, |
|
"loss": 0.8161, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -7.6732587814331055, |
|
"rewards/margins": 3.518767833709717, |
|
"rewards/rejected": -11.19202709197998, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.37298594446348987, |
|
"grad_norm": 50.48347857053289, |
|
"learning_rate": 7.931124482984801e-07, |
|
"logits/chosen": -1.4336833953857422, |
|
"logits/rejected": -1.2769416570663452, |
|
"logps/chosen": -4.982694625854492, |
|
"logps/rejected": -3.5518202781677246, |
|
"loss": 1.0579, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -8.87955093383789, |
|
"rewards/margins": 3.5771865844726562, |
|
"rewards/rejected": -12.45673656463623, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.3784710318820706, |
|
"grad_norm": 41.95073141945747, |
|
"learning_rate": 7.85251264550948e-07, |
|
"logits/chosen": -1.5961790084838867, |
|
"logits/rejected": -1.3946001529693604, |
|
"logps/chosen": -5.758039951324463, |
|
"logps/rejected": -4.470717906951904, |
|
"loss": 1.3453, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": -11.176795959472656, |
|
"rewards/margins": 3.218303680419922, |
|
"rewards/rejected": -14.395099639892578, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.3839561193006514, |
|
"grad_norm": 52.202423339982246, |
|
"learning_rate": 7.772841216033532e-07, |
|
"logits/chosen": -1.6466355323791504, |
|
"logits/rejected": -1.4171117544174194, |
|
"logps/chosen": -6.761007308959961, |
|
"logps/rejected": -5.078183650970459, |
|
"loss": 1.2675, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -12.69545841217041, |
|
"rewards/margins": 4.207059860229492, |
|
"rewards/rejected": -16.90251922607422, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3894412067192321, |
|
"grad_norm": 33.978850229748005, |
|
"learning_rate": 7.69213978924061e-07, |
|
"logits/chosen": -1.544925332069397, |
|
"logits/rejected": -1.2573606967926025, |
|
"logps/chosen": -6.336057662963867, |
|
"logps/rejected": -4.741216659545898, |
|
"loss": 1.096, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -11.85304069519043, |
|
"rewards/margins": 3.987103223800659, |
|
"rewards/rejected": -15.840145111083984, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.39492629413781283, |
|
"grad_norm": 96.09934849635745, |
|
"learning_rate": 7.610438342416319e-07, |
|
"logits/chosen": -1.4953880310058594, |
|
"logits/rejected": -1.2785418033599854, |
|
"logps/chosen": -6.89131498336792, |
|
"logps/rejected": -5.383131980895996, |
|
"loss": 1.0171, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -13.457829475402832, |
|
"rewards/margins": 3.7704575061798096, |
|
"rewards/rejected": -17.228288650512695, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.40041138155639355, |
|
"grad_norm": 27.422261234951808, |
|
"learning_rate": 7.527767224312882e-07, |
|
"logits/chosen": -1.322948932647705, |
|
"logits/rejected": -1.1441978216171265, |
|
"logps/chosen": -6.809509754180908, |
|
"logps/rejected": -4.989666938781738, |
|
"loss": 0.6614, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -12.47416877746582, |
|
"rewards/margins": 4.549604415893555, |
|
"rewards/rejected": -17.023773193359375, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.4058964689749743, |
|
"grad_norm": 80.95278759548928, |
|
"learning_rate": 7.444157143875819e-07, |
|
"logits/chosen": -1.216729760169983, |
|
"logits/rejected": -1.057979941368103, |
|
"logps/chosen": -6.466203689575195, |
|
"logps/rejected": -5.076437950134277, |
|
"loss": 0.9105, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -12.691095352172852, |
|
"rewards/margins": 3.474414110183716, |
|
"rewards/rejected": -16.165510177612305, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.411381556393555, |
|
"grad_norm": 72.36852993655451, |
|
"learning_rate": 7.359639158836827e-07, |
|
"logits/chosen": -1.1118669509887695, |
|
"logits/rejected": -1.0416420698165894, |
|
"logps/chosen": -7.078163146972656, |
|
"logps/rejected": -5.48430871963501, |
|
"loss": 1.0358, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -13.710769653320312, |
|
"rewards/margins": 3.9846386909484863, |
|
"rewards/rejected": -17.695409774780273, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.41686664381213573, |
|
"grad_norm": 66.4908598237263, |
|
"learning_rate": 7.274244664177097e-07, |
|
"logits/chosen": -1.041873812675476, |
|
"logits/rejected": -0.9867510199546814, |
|
"logps/chosen": -6.261934280395508, |
|
"logps/rejected": -4.695401668548584, |
|
"loss": 0.9974, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -11.738503456115723, |
|
"rewards/margins": 3.9163331985473633, |
|
"rewards/rejected": -15.654836654663086, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.4223517312307165, |
|
"grad_norm": 49.259846719809424, |
|
"learning_rate": 7.188005380465364e-07, |
|
"logits/chosen": -1.1777944564819336, |
|
"logits/rejected": -1.0354324579238892, |
|
"logps/chosen": -6.061973571777344, |
|
"logps/rejected": -4.546577453613281, |
|
"loss": 0.8174, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -11.36644458770752, |
|
"rewards/margins": 3.7884879112243652, |
|
"rewards/rejected": -15.154932975769043, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.42783681864929723, |
|
"grad_norm": 29.6260649209974, |
|
"learning_rate": 7.100953342075009e-07, |
|
"logits/chosen": -1.2290102243423462, |
|
"logits/rejected": -1.110871434211731, |
|
"logps/chosen": -5.283913612365723, |
|
"logps/rejected": -3.9515509605407715, |
|
"loss": 0.8695, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -9.878876686096191, |
|
"rewards/margins": 3.33090877532959, |
|
"rewards/rejected": -13.209785461425781, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.43332190606787796, |
|
"grad_norm": 32.05872824883326, |
|
"learning_rate": 7.013120885284598e-07, |
|
"logits/chosen": -1.3086589574813843, |
|
"logits/rejected": -1.1846544742584229, |
|
"logps/chosen": -5.159869194030762, |
|
"logps/rejected": -3.5984580516815186, |
|
"loss": 0.7876, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -8.996145248413086, |
|
"rewards/margins": 3.9035279750823975, |
|
"rewards/rejected": -12.899672508239746, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.4388069934864587, |
|
"grad_norm": 40.42205251951496, |
|
"learning_rate": 6.924540636266272e-07, |
|
"logits/chosen": -1.3288094997406006, |
|
"logits/rejected": -1.2276866436004639, |
|
"logps/chosen": -5.131710529327393, |
|
"logps/rejected": -3.831780433654785, |
|
"loss": 0.9434, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -9.579451560974121, |
|
"rewards/margins": 3.2498245239257812, |
|
"rewards/rejected": -12.829277038574219, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.4442920809050394, |
|
"grad_norm": 36.25388918144398, |
|
"learning_rate": 6.83524549896646e-07, |
|
"logits/chosen": -1.163621187210083, |
|
"logits/rejected": -1.1168286800384521, |
|
"logps/chosen": -5.016862392425537, |
|
"logps/rejected": -3.682563066482544, |
|
"loss": 0.8759, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -9.206408500671387, |
|
"rewards/margins": 3.3357465267181396, |
|
"rewards/rejected": -12.542155265808105, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.44977716832362014, |
|
"grad_norm": 25.153488392194294, |
|
"learning_rate": 6.745268642883404e-07, |
|
"logits/chosen": -1.2277235984802246, |
|
"logits/rejected": -1.0471045970916748, |
|
"logps/chosen": -5.926680088043213, |
|
"logps/rejected": -4.389726161956787, |
|
"loss": 0.6727, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -10.974315643310547, |
|
"rewards/margins": 3.8423848152160645, |
|
"rewards/rejected": -14.816699981689453, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.4552622557422009, |
|
"grad_norm": 22.152291434451502, |
|
"learning_rate": 6.654643490746041e-07, |
|
"logits/chosen": -1.2063225507736206, |
|
"logits/rejected": -1.0923081636428833, |
|
"logps/chosen": -6.3293843269348145, |
|
"logps/rejected": -4.906074047088623, |
|
"loss": 0.8406, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -12.26518440246582, |
|
"rewards/margins": 3.558277130126953, |
|
"rewards/rejected": -15.823461532592773, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.46074734316078164, |
|
"grad_norm": 29.47429990683989, |
|
"learning_rate": 6.563403706098832e-07, |
|
"logits/chosen": -1.2531236410140991, |
|
"logits/rejected": -1.1536014080047607, |
|
"logps/chosen": -7.200500965118408, |
|
"logps/rejected": -5.6704511642456055, |
|
"loss": 0.844, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -14.176128387451172, |
|
"rewards/margins": 3.8251240253448486, |
|
"rewards/rejected": -18.001251220703125, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.46623243057936237, |
|
"grad_norm": 39.720494087331325, |
|
"learning_rate": 6.47158318079712e-07, |
|
"logits/chosen": -1.2474052906036377, |
|
"logits/rejected": -1.1939733028411865, |
|
"logps/chosen": -7.995599746704102, |
|
"logps/rejected": -6.105539321899414, |
|
"loss": 0.9392, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -15.263847351074219, |
|
"rewards/margins": 4.725150108337402, |
|
"rewards/rejected": -19.988998413085938, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.4717175179979431, |
|
"grad_norm": 26.901425160757288, |
|
"learning_rate": 6.379216022417695e-07, |
|
"logits/chosen": -1.2418212890625, |
|
"logits/rejected": -1.1997092962265015, |
|
"logps/chosen": -8.617918014526367, |
|
"logps/rejected": -6.57258415222168, |
|
"loss": 0.6221, |
|
"rewards/accuracies": 0.9140625, |
|
"rewards/chosen": -16.431461334228516, |
|
"rewards/margins": 5.113334655761719, |
|
"rewards/rejected": -21.544795989990234, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.4772026054165238, |
|
"grad_norm": 58.114713536632436, |
|
"learning_rate": 6.286336541589223e-07, |
|
"logits/chosen": -1.2740073204040527, |
|
"logits/rejected": -1.2163861989974976, |
|
"logps/chosen": -8.867392539978027, |
|
"logps/rejected": -7.1280741691589355, |
|
"loss": 0.9602, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -17.820186614990234, |
|
"rewards/margins": 4.348294734954834, |
|
"rewards/rejected": -22.168481826782227, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.48268769283510454, |
|
"grad_norm": 33.361524717086475, |
|
"learning_rate": 6.192979239247242e-07, |
|
"logits/chosen": -1.1057997941970825, |
|
"logits/rejected": -1.0285227298736572, |
|
"logps/chosen": -8.445158004760742, |
|
"logps/rejected": -6.86185359954834, |
|
"loss": 0.8718, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -17.154632568359375, |
|
"rewards/margins": 3.9582619667053223, |
|
"rewards/rejected": -21.112895965576172, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.48817278025368527, |
|
"grad_norm": 51.84635707806423, |
|
"learning_rate": 6.099178793818478e-07, |
|
"logits/chosen": -1.1163854598999023, |
|
"logits/rejected": -1.0613051652908325, |
|
"logps/chosen": -9.026713371276855, |
|
"logps/rejected": -7.194836616516113, |
|
"loss": 0.8724, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -17.987092971801758, |
|
"rewards/margins": 4.5796895027160645, |
|
"rewards/rejected": -22.566783905029297, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.49365786767226605, |
|
"grad_norm": 35.13623148031408, |
|
"learning_rate": 6.004970048339225e-07, |
|
"logits/chosen": -0.9862219095230103, |
|
"logits/rejected": -0.868794858455658, |
|
"logps/chosen": -7.408356666564941, |
|
"logps/rejected": -5.994558334350586, |
|
"loss": 0.9605, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -14.986395835876465, |
|
"rewards/margins": 3.5344960689544678, |
|
"rewards/rejected": -18.520891189575195, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4991429550908468, |
|
"grad_norm": 30.15074112906336, |
|
"learning_rate": 5.910387997512573e-07, |
|
"logits/chosen": -0.9399983286857605, |
|
"logits/rejected": -0.8564634919166565, |
|
"logps/chosen": -7.057155609130859, |
|
"logps/rejected": -5.3340325355529785, |
|
"loss": 0.7882, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -13.3350830078125, |
|
"rewards/margins": 4.307806968688965, |
|
"rewards/rejected": -17.64288902282715, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.5046280425094275, |
|
"grad_norm": 38.88195575984379, |
|
"learning_rate": 5.815467774709313e-07, |
|
"logits/chosen": -0.9454355835914612, |
|
"logits/rejected": -0.9133027791976929, |
|
"logps/chosen": -6.99016809463501, |
|
"logps/rejected": -5.265021324157715, |
|
"loss": 0.8483, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -13.162553787231445, |
|
"rewards/margins": 4.3128662109375, |
|
"rewards/rejected": -17.475419998168945, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.5101131299280083, |
|
"grad_norm": 30.158868667054477, |
|
"learning_rate": 5.720244638917323e-07, |
|
"logits/chosen": -0.965910792350769, |
|
"logits/rejected": -0.8706585168838501, |
|
"logps/chosen": -6.497648239135742, |
|
"logps/rejected": -4.859541416168213, |
|
"loss": 0.8819, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -12.148852348327637, |
|
"rewards/margins": 4.095267295837402, |
|
"rewards/rejected": -16.24411964416504, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.515598217346589, |
|
"grad_norm": 53.363507967860116, |
|
"learning_rate": 5.624753961644281e-07, |
|
"logits/chosen": -1.0084278583526611, |
|
"logits/rejected": -0.9723138213157654, |
|
"logps/chosen": -5.623072624206543, |
|
"logps/rejected": -4.460352420806885, |
|
"loss": 1.0174, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -11.150880813598633, |
|
"rewards/margins": 2.9068009853363037, |
|
"rewards/rejected": -14.057682037353516, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.5210833047651697, |
|
"grad_norm": 23.23631443491041, |
|
"learning_rate": 5.529031213778614e-07, |
|
"logits/chosen": -1.0280265808105469, |
|
"logits/rejected": -0.9905204772949219, |
|
"logps/chosen": -5.483713150024414, |
|
"logps/rejected": -4.167681694030762, |
|
"loss": 0.98, |
|
"rewards/accuracies": 0.7890625, |
|
"rewards/chosen": -10.419203758239746, |
|
"rewards/margins": 3.290079355239868, |
|
"rewards/rejected": -13.709283828735352, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5265683921837504, |
|
"grad_norm": 37.123060878167124, |
|
"learning_rate": 5.433111952413494e-07, |
|
"logits/chosen": -1.088523507118225, |
|
"logits/rejected": -0.998890221118927, |
|
"logps/chosen": -5.065528869628906, |
|
"logps/rejected": -3.6324613094329834, |
|
"loss": 0.8177, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -9.081153869628906, |
|
"rewards/margins": 3.582667112350464, |
|
"rewards/rejected": -12.663820266723633, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.5320534796023312, |
|
"grad_norm": 25.260662494721746, |
|
"learning_rate": 5.33703180763884e-07, |
|
"logits/chosen": -1.0532890558242798, |
|
"logits/rejected": -0.9741649031639099, |
|
"logps/chosen": -5.729362964630127, |
|
"logps/rejected": -4.276680946350098, |
|
"loss": 0.7902, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": -10.691701889038086, |
|
"rewards/margins": 3.631704568862915, |
|
"rewards/rejected": -14.323406219482422, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.5375385670209119, |
|
"grad_norm": 46.96307372174284, |
|
"learning_rate": 5.240826469306186e-07, |
|
"logits/chosen": -1.0120959281921387, |
|
"logits/rejected": -0.9785177707672119, |
|
"logps/chosen": -6.013980865478516, |
|
"logps/rejected": -4.12684965133667, |
|
"loss": 0.5899, |
|
"rewards/accuracies": 0.9140625, |
|
"rewards/chosen": -10.317124366760254, |
|
"rewards/margins": 4.71782922744751, |
|
"rewards/rejected": -15.034952163696289, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.5430236544394926, |
|
"grad_norm": 24.414358778835954, |
|
"learning_rate": 5.144531673771363e-07, |
|
"logits/chosen": -1.002170205116272, |
|
"logits/rejected": -0.9993859529495239, |
|
"logps/chosen": -6.949717044830322, |
|
"logps/rejected": -5.171581745147705, |
|
"loss": 0.8516, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -12.928955078125, |
|
"rewards/margins": 4.445338726043701, |
|
"rewards/rejected": -17.37429428100586, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.5485087418580734, |
|
"grad_norm": 25.008431407505398, |
|
"learning_rate": 5.048183190619903e-07, |
|
"logits/chosen": -0.9874565005302429, |
|
"logits/rejected": -0.9811626672744751, |
|
"logps/chosen": -6.921389102935791, |
|
"logps/rejected": -5.277797698974609, |
|
"loss": 0.8378, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -13.19449520111084, |
|
"rewards/margins": 4.108977317810059, |
|
"rewards/rejected": -17.3034725189209, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5539938292766541, |
|
"grad_norm": 27.960086992805927, |
|
"learning_rate": 4.951816809380097e-07, |
|
"logits/chosen": -1.0021039247512817, |
|
"logits/rejected": -0.9522125124931335, |
|
"logps/chosen": -7.083625793457031, |
|
"logps/rejected": -5.62723445892334, |
|
"loss": 0.669, |
|
"rewards/accuracies": 0.8828125, |
|
"rewards/chosen": -14.068085670471191, |
|
"rewards/margins": 3.6409800052642822, |
|
"rewards/rejected": -17.709064483642578, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.5594789166952349, |
|
"grad_norm": 30.679626728308502, |
|
"learning_rate": 4.855468326228638e-07, |
|
"logits/chosen": -1.0606987476348877, |
|
"logits/rejected": -1.041282296180725, |
|
"logps/chosen": -7.568184852600098, |
|
"logps/rejected": -6.082253456115723, |
|
"loss": 0.8745, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -15.205633163452148, |
|
"rewards/margins": 3.714829206466675, |
|
"rewards/rejected": -18.92046356201172, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.5649640041138155, |
|
"grad_norm": 27.494401307007365, |
|
"learning_rate": 4.7591735306938134e-07, |
|
"logits/chosen": -1.0469098091125488, |
|
"logits/rejected": -0.9781535267829895, |
|
"logps/chosen": -7.355569839477539, |
|
"logps/rejected": -6.020066261291504, |
|
"loss": 0.7655, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -15.050165176391602, |
|
"rewards/margins": 3.3387598991394043, |
|
"rewards/rejected": -18.388925552368164, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.5704490915323963, |
|
"grad_norm": 37.8050143114576, |
|
"learning_rate": 4.6629681923611603e-07, |
|
"logits/chosen": -1.049713373184204, |
|
"logits/rejected": -1.0141334533691406, |
|
"logps/chosen": -7.472883224487305, |
|
"logps/rejected": -6.059725284576416, |
|
"loss": 0.9818, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -15.149312973022461, |
|
"rewards/margins": 3.5328941345214844, |
|
"rewards/rejected": -18.682207107543945, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.575934178950977, |
|
"grad_norm": 24.845125832684612, |
|
"learning_rate": 4.5668880475865067e-07, |
|
"logits/chosen": -1.0235170125961304, |
|
"logits/rejected": -0.9582427144050598, |
|
"logps/chosen": -7.626412391662598, |
|
"logps/rejected": -6.162938594818115, |
|
"loss": 0.7142, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -15.407346725463867, |
|
"rewards/margins": 3.658684730529785, |
|
"rewards/rejected": -19.066030502319336, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.5814192663695578, |
|
"grad_norm": 24.20853718541258, |
|
"learning_rate": 4.4709687862213864e-07, |
|
"logits/chosen": -0.9750124216079712, |
|
"logits/rejected": -0.9425258636474609, |
|
"logps/chosen": -7.844966411590576, |
|
"logps/rejected": -6.051673412322998, |
|
"loss": 0.624, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": -15.129182815551758, |
|
"rewards/margins": 4.4832329750061035, |
|
"rewards/rejected": -19.612417221069336, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.5869043537881385, |
|
"grad_norm": 27.32408594646776, |
|
"learning_rate": 4.3752460383557194e-07, |
|
"logits/chosen": -0.9948883056640625, |
|
"logits/rejected": -0.8997665643692017, |
|
"logps/chosen": -7.177610397338867, |
|
"logps/rejected": -5.7914228439331055, |
|
"loss": 0.779, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -14.478557586669922, |
|
"rewards/margins": 3.465468406677246, |
|
"rewards/rejected": -17.94402503967285, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.5923894412067192, |
|
"grad_norm": 25.295159101372597, |
|
"learning_rate": 4.2797553610826797e-07, |
|
"logits/chosen": -0.9283576011657715, |
|
"logits/rejected": -0.8969117403030396, |
|
"logps/chosen": -7.38961935043335, |
|
"logps/rejected": -6.016010284423828, |
|
"loss": 0.8094, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -15.04002571105957, |
|
"rewards/margins": 3.434022903442383, |
|
"rewards/rejected": -18.474048614501953, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.5978745286253, |
|
"grad_norm": 34.809554467100526, |
|
"learning_rate": 4.184532225290686e-07, |
|
"logits/chosen": -0.8853582739830017, |
|
"logits/rejected": -0.8778493404388428, |
|
"logps/chosen": -7.672779560089111, |
|
"logps/rejected": -5.923637390136719, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -14.809093475341797, |
|
"rewards/margins": 4.372855186462402, |
|
"rewards/rejected": -19.181949615478516, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.6033596160438807, |
|
"grad_norm": 37.595761539961494, |
|
"learning_rate": 4.089612002487428e-07, |
|
"logits/chosen": -0.9878619909286499, |
|
"logits/rejected": -0.9121577739715576, |
|
"logps/chosen": -7.86918830871582, |
|
"logps/rejected": -6.307096481323242, |
|
"loss": 0.9853, |
|
"rewards/accuracies": 0.7734375, |
|
"rewards/chosen": -15.767744064331055, |
|
"rewards/margins": 3.905228614807129, |
|
"rewards/rejected": -19.672971725463867, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6088447034624614, |
|
"grad_norm": 19.136373017418645, |
|
"learning_rate": 3.995029951660776e-07, |
|
"logits/chosen": -0.938258945941925, |
|
"logits/rejected": -0.9154999256134033, |
|
"logps/chosen": -7.287668704986572, |
|
"logps/rejected": -5.683687210083008, |
|
"loss": 0.6273, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -14.20921802520752, |
|
"rewards/margins": 4.009955406188965, |
|
"rewards/rejected": -18.21917152404785, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.6143297908810421, |
|
"grad_norm": 41.38729707458279, |
|
"learning_rate": 3.9008212061815207e-07, |
|
"logits/chosen": -0.9403737783432007, |
|
"logits/rejected": -0.8944230079650879, |
|
"logps/chosen": -7.414663314819336, |
|
"logps/rejected": -5.728695869445801, |
|
"loss": 0.5671, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -14.32174015045166, |
|
"rewards/margins": 4.214918613433838, |
|
"rewards/rejected": -18.536659240722656, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.6198148782996229, |
|
"grad_norm": 24.324325222505287, |
|
"learning_rate": 3.8070207607527585e-07, |
|
"logits/chosen": -0.9715641736984253, |
|
"logits/rejected": -0.9244170784950256, |
|
"logps/chosen": -6.609511852264404, |
|
"logps/rejected": -5.317971229553223, |
|
"loss": 1.0423, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -13.294927597045898, |
|
"rewards/margins": 3.228851795196533, |
|
"rewards/rejected": -16.523780822753906, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.6252999657182037, |
|
"grad_norm": 25.97873694518042, |
|
"learning_rate": 3.7136634584107783e-07, |
|
"logits/chosen": -1.0553674697875977, |
|
"logits/rejected": -0.9997435808181763, |
|
"logps/chosen": -7.505800247192383, |
|
"logps/rejected": -5.754428863525391, |
|
"loss": 0.7409, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -14.386072158813477, |
|
"rewards/margins": 4.378428936004639, |
|
"rewards/rejected": -18.764501571655273, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.6307850531367843, |
|
"grad_norm": 33.98592167333287, |
|
"learning_rate": 3.6207839775823047e-07, |
|
"logits/chosen": -0.9378336071968079, |
|
"logits/rejected": -0.9261949062347412, |
|
"logps/chosen": -6.874807357788086, |
|
"logps/rejected": -5.083373546600342, |
|
"loss": 0.8674, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -12.708434104919434, |
|
"rewards/margins": 4.478583335876465, |
|
"rewards/rejected": -17.1870174407959, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.6362701405553651, |
|
"grad_norm": 27.335955275549072, |
|
"learning_rate": 3.5284168192028805e-07, |
|
"logits/chosen": -0.9258574843406677, |
|
"logits/rejected": -0.8993632793426514, |
|
"logps/chosen": -6.7075514793396, |
|
"logps/rejected": -5.005417346954346, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -12.513543128967285, |
|
"rewards/margins": 4.255335330963135, |
|
"rewards/rejected": -16.768878936767578, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.6417552279739458, |
|
"grad_norm": 36.43999913709104, |
|
"learning_rate": 3.4365962939011693e-07, |
|
"logits/chosen": -0.9867472648620605, |
|
"logits/rejected": -0.9383954405784607, |
|
"logps/chosen": -6.8381452560424805, |
|
"logps/rejected": -5.291529178619385, |
|
"loss": 0.9343, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -13.228822708129883, |
|
"rewards/margins": 3.86653995513916, |
|
"rewards/rejected": -17.09536361694336, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.6472403153925266, |
|
"grad_norm": 43.410283610352316, |
|
"learning_rate": 3.345356509253958e-07, |
|
"logits/chosen": -0.9349948167800903, |
|
"logits/rejected": -0.8691989183425903, |
|
"logps/chosen": -6.570774555206299, |
|
"logps/rejected": -4.859616279602051, |
|
"loss": 0.6338, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -12.149040222167969, |
|
"rewards/margins": 4.277895927429199, |
|
"rewards/rejected": -16.42693519592285, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.6527254028111072, |
|
"grad_norm": 30.13014793522752, |
|
"learning_rate": 3.2547313571165967e-07, |
|
"logits/chosen": -0.9361096024513245, |
|
"logits/rejected": -0.9115648865699768, |
|
"logps/chosen": -6.824566841125488, |
|
"logps/rejected": -5.03220272064209, |
|
"loss": 0.7826, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -12.580507278442383, |
|
"rewards/margins": 4.480910778045654, |
|
"rewards/rejected": -17.061420440673828, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.658210490229688, |
|
"grad_norm": 33.220837044211905, |
|
"learning_rate": 3.1647545010335395e-07, |
|
"logits/chosen": -0.9235398173332214, |
|
"logits/rejected": -0.8107198476791382, |
|
"logps/chosen": -6.378120422363281, |
|
"logps/rejected": -4.839158058166504, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -12.097895622253418, |
|
"rewards/margins": 3.847404956817627, |
|
"rewards/rejected": -15.945301055908203, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6636955776482688, |
|
"grad_norm": 50.56573612879948, |
|
"learning_rate": 3.075459363733727e-07, |
|
"logits/chosen": -0.8829526901245117, |
|
"logits/rejected": -0.8535292744636536, |
|
"logps/chosen": -6.172534465789795, |
|
"logps/rejected": -4.939080715179443, |
|
"loss": 0.9172, |
|
"rewards/accuracies": 0.7578125, |
|
"rewards/chosen": -12.347702026367188, |
|
"rewards/margins": 3.0836341381073, |
|
"rewards/rejected": -15.431337356567383, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.6691806650668495, |
|
"grad_norm": 31.870637933820483, |
|
"learning_rate": 2.9868791147154025e-07, |
|
"logits/chosen": -0.9092215895652771, |
|
"logits/rejected": -0.8585975170135498, |
|
"logps/chosen": -6.820605278015137, |
|
"logps/rejected": -5.349386215209961, |
|
"loss": 0.868, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -13.373466491699219, |
|
"rewards/margins": 3.678046226501465, |
|
"rewards/rejected": -17.051511764526367, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.6746657524854303, |
|
"grad_norm": 28.176132333643206, |
|
"learning_rate": 2.8990466579249917e-07, |
|
"logits/chosen": -0.8528233766555786, |
|
"logits/rejected": -0.7868634462356567, |
|
"logps/chosen": -6.514227867126465, |
|
"logps/rejected": -4.8763251304626465, |
|
"loss": 0.659, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -12.190811157226562, |
|
"rewards/margins": 4.094757556915283, |
|
"rewards/rejected": -16.28557014465332, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.6801508399040109, |
|
"grad_norm": 38.990501071087174, |
|
"learning_rate": 2.811994619534637e-07, |
|
"logits/chosen": -0.9431190490722656, |
|
"logits/rejected": -0.9019297957420349, |
|
"logps/chosen": -7.381836891174316, |
|
"logps/rejected": -5.60933780670166, |
|
"loss": 0.6562, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -14.023344993591309, |
|
"rewards/margins": 4.431247711181641, |
|
"rewards/rejected": -18.454591751098633, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.6856359273225917, |
|
"grad_norm": 23.287789798850373, |
|
"learning_rate": 2.725755335822903e-07, |
|
"logits/chosen": -0.9163570404052734, |
|
"logits/rejected": -0.8643731474876404, |
|
"logps/chosen": -7.203619003295898, |
|
"logps/rejected": -5.303135395050049, |
|
"loss": 0.5385, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -13.25783920288086, |
|
"rewards/margins": 4.7512078285217285, |
|
"rewards/rejected": -18.00904655456543, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6911210147411725, |
|
"grad_norm": 29.532793849548835, |
|
"learning_rate": 2.640360841163174e-07, |
|
"logits/chosen": -0.87614506483078, |
|
"logits/rejected": -0.8524197340011597, |
|
"logps/chosen": -6.783047676086426, |
|
"logps/rejected": -5.092715263366699, |
|
"loss": 0.6031, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -12.731788635253906, |
|
"rewards/margins": 4.225830078125, |
|
"rewards/rejected": -16.957618713378906, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.6966061021597532, |
|
"grad_norm": 30.505873793824883, |
|
"learning_rate": 2.5558428561241816e-07, |
|
"logits/chosen": -0.947504997253418, |
|
"logits/rejected": -0.8782521486282349, |
|
"logps/chosen": -6.791367530822754, |
|
"logps/rejected": -5.176680564880371, |
|
"loss": 0.6676, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -12.941701889038086, |
|
"rewards/margins": 4.036717414855957, |
|
"rewards/rejected": -16.97842025756836, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.7020911895783339, |
|
"grad_norm": 39.92525272801302, |
|
"learning_rate": 2.472232775687119e-07, |
|
"logits/chosen": -0.8722752332687378, |
|
"logits/rejected": -0.856322705745697, |
|
"logps/chosen": -7.144659042358398, |
|
"logps/rejected": -5.182129859924316, |
|
"loss": 0.7803, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -12.955324172973633, |
|
"rewards/margins": 4.906323432922363, |
|
"rewards/rejected": -17.86164665222168, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.7075762769969146, |
|
"grad_norm": 35.65648286534799, |
|
"learning_rate": 2.3895616575836806e-07, |
|
"logits/chosen": -0.8648374676704407, |
|
"logits/rejected": -0.8587543964385986, |
|
"logps/chosen": -7.462764263153076, |
|
"logps/rejected": -5.415197372436523, |
|
"loss": 0.5901, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -13.537995338439941, |
|
"rewards/margins": 5.1189165115356445, |
|
"rewards/rejected": -18.656909942626953, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.7130613644154954, |
|
"grad_norm": 36.897396094592246, |
|
"learning_rate": 2.3078602107593897e-07, |
|
"logits/chosen": -0.9551251530647278, |
|
"logits/rejected": -0.9301334619522095, |
|
"logps/chosen": -7.187896251678467, |
|
"logps/rejected": -5.676419258117676, |
|
"loss": 0.7432, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -14.191046714782715, |
|
"rewards/margins": 3.7786920070648193, |
|
"rewards/rejected": -17.96973991394043, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.7185464518340761, |
|
"grad_norm": 23.791455410978802, |
|
"learning_rate": 2.2271587839664668e-07, |
|
"logits/chosen": -0.8816163539886475, |
|
"logits/rejected": -0.8654621839523315, |
|
"logps/chosen": -7.688798427581787, |
|
"logps/rejected": -5.900554180145264, |
|
"loss": 0.7962, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -14.751386642456055, |
|
"rewards/margins": 4.470608711242676, |
|
"rewards/rejected": -19.221996307373047, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.7240315392526568, |
|
"grad_norm": 25.229342149520836, |
|
"learning_rate": 2.1474873544905203e-07, |
|
"logits/chosen": -0.9233815670013428, |
|
"logits/rejected": -0.8769809007644653, |
|
"logps/chosen": -7.945870876312256, |
|
"logps/rejected": -6.068084239959717, |
|
"loss": 0.7871, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -15.170208930969238, |
|
"rewards/margins": 4.694468975067139, |
|
"rewards/rejected": -19.86467933654785, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.7295166266712376, |
|
"grad_norm": 24.072357862431296, |
|
"learning_rate": 2.0688755170151994e-07, |
|
"logits/chosen": -0.9093427062034607, |
|
"logits/rejected": -0.8254431486129761, |
|
"logps/chosen": -7.6159138679504395, |
|
"logps/rejected": -6.035447597503662, |
|
"loss": 0.7131, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": -15.08862018585205, |
|
"rewards/margins": 3.951165199279785, |
|
"rewards/rejected": -19.039783477783203, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.7350017140898183, |
|
"grad_norm": 27.574889812468303, |
|
"learning_rate": 1.991352472628978e-07, |
|
"logits/chosen": -0.9855005741119385, |
|
"logits/rejected": -0.8852315545082092, |
|
"logps/chosen": -8.272013664245605, |
|
"logps/rejected": -6.531748294830322, |
|
"loss": 0.5673, |
|
"rewards/accuracies": 0.8828125, |
|
"rewards/chosen": -16.329370498657227, |
|
"rewards/margins": 4.350663661956787, |
|
"rewards/rejected": -20.68003273010254, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.7404868015083991, |
|
"grad_norm": 38.0684454587543, |
|
"learning_rate": 1.9149470179781529e-07, |
|
"logits/chosen": -0.8634744882583618, |
|
"logits/rejected": -0.8510404825210571, |
|
"logps/chosen": -8.125703811645508, |
|
"logps/rejected": -6.487473011016846, |
|
"loss": 0.9743, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -16.21868324279785, |
|
"rewards/margins": 4.095577239990234, |
|
"rewards/rejected": -20.314258575439453, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7459718889269797, |
|
"grad_norm": 48.36799803983576, |
|
"learning_rate": 1.8396875345700496e-07, |
|
"logits/chosen": -0.9214343428611755, |
|
"logits/rejected": -0.8962255120277405, |
|
"logps/chosen": -8.190770149230957, |
|
"logps/rejected": -6.314955711364746, |
|
"loss": 0.5596, |
|
"rewards/accuracies": 0.8984375, |
|
"rewards/chosen": -15.78738784790039, |
|
"rewards/margins": 4.689537048339844, |
|
"rewards/rejected": -20.476924896240234, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.7514569763455605, |
|
"grad_norm": 44.16548053204111, |
|
"learning_rate": 1.76560197823046e-07, |
|
"logits/chosen": -0.919052004814148, |
|
"logits/rejected": -0.8849231600761414, |
|
"logps/chosen": -8.452275276184082, |
|
"logps/rejected": -6.598462104797363, |
|
"loss": 0.7156, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -16.496156692504883, |
|
"rewards/margins": 4.634530544281006, |
|
"rewards/rejected": -21.130685806274414, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.7569420637641412, |
|
"grad_norm": 21.109729731146004, |
|
"learning_rate": 1.6927178687191952e-07, |
|
"logits/chosen": -0.9427747130393982, |
|
"logits/rejected": -0.8983960151672363, |
|
"logps/chosen": -8.133773803710938, |
|
"logps/rejected": -6.293461322784424, |
|
"loss": 0.5443, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -15.733654022216797, |
|
"rewards/margins": 4.600779056549072, |
|
"rewards/rejected": -20.334434509277344, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.762427151182722, |
|
"grad_norm": 24.71923168446529, |
|
"learning_rate": 1.6210622795076167e-07, |
|
"logits/chosen": -0.9024847149848938, |
|
"logits/rejected": -0.8320090770721436, |
|
"logps/chosen": -7.866386413574219, |
|
"logps/rejected": -5.999449729919434, |
|
"loss": 0.7399, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -14.998624801635742, |
|
"rewards/margins": 4.6673407554626465, |
|
"rewards/rejected": -19.665966033935547, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.7679122386013028, |
|
"grad_norm": 28.05368903550952, |
|
"learning_rate": 1.5506618277219408e-07, |
|
"logits/chosen": -0.9084888100624084, |
|
"logits/rejected": -0.8153257966041565, |
|
"logps/chosen": -8.255277633666992, |
|
"logps/rejected": -6.376511096954346, |
|
"loss": 0.6289, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -15.941277503967285, |
|
"rewards/margins": 4.696916580200195, |
|
"rewards/rejected": -20.638193130493164, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7733973260198834, |
|
"grad_norm": 25.708532161588263, |
|
"learning_rate": 1.481542664256075e-07, |
|
"logits/chosen": -0.8618345856666565, |
|
"logits/rejected": -0.7895917892456055, |
|
"logps/chosen": -7.592902660369873, |
|
"logps/rejected": -5.800815582275391, |
|
"loss": 0.623, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": -14.50204086303711, |
|
"rewards/margins": 4.480217933654785, |
|
"rewards/rejected": -18.982257843017578, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.7788824134384642, |
|
"grad_norm": 24.709393184243222, |
|
"learning_rate": 1.413730464057616e-07, |
|
"logits/chosen": -0.8121160268783569, |
|
"logits/rejected": -0.7455395460128784, |
|
"logps/chosen": -7.604070663452148, |
|
"logps/rejected": -5.874716281890869, |
|
"loss": 0.6692, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -14.68679141998291, |
|
"rewards/margins": 4.323384761810303, |
|
"rewards/rejected": -19.010177612304688, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.7843675008570449, |
|
"grad_norm": 38.97698904003614, |
|
"learning_rate": 1.3472504165906612e-07, |
|
"logits/chosen": -0.8006829619407654, |
|
"logits/rejected": -0.7394671440124512, |
|
"logps/chosen": -7.116863250732422, |
|
"logps/rejected": -5.581884384155273, |
|
"loss": 0.6601, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -13.954710960388184, |
|
"rewards/margins": 3.837446928024292, |
|
"rewards/rejected": -17.792160034179688, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.7898525882756257, |
|
"grad_norm": 29.640146576501934, |
|
"learning_rate": 1.2821272164789543e-07, |
|
"logits/chosen": -0.8447168469429016, |
|
"logits/rejected": -0.7728930115699768, |
|
"logps/chosen": -7.501246452331543, |
|
"logps/rejected": -5.655298709869385, |
|
"loss": 0.564, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -14.138248443603516, |
|
"rewards/margins": 4.6148681640625, |
|
"rewards/rejected": -18.753116607666016, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.7953376756942063, |
|
"grad_norm": 22.947844708118026, |
|
"learning_rate": 1.2183850543328312e-07, |
|
"logits/chosen": -0.8897333741188049, |
|
"logits/rejected": -0.8205296397209167, |
|
"logps/chosen": -7.398487091064453, |
|
"logps/rejected": -5.790860652923584, |
|
"loss": 0.7226, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -14.477151870727539, |
|
"rewards/margins": 4.019064426422119, |
|
"rewards/rejected": -18.4962158203125, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.8008227631127871, |
|
"grad_norm": 27.798903132552176, |
|
"learning_rate": 1.1560476077634069e-07, |
|
"logits/chosen": -0.7919908761978149, |
|
"logits/rejected": -0.8394799828529358, |
|
"logps/chosen": -7.839868068695068, |
|
"logps/rejected": -5.743566036224365, |
|
"loss": 0.544, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -14.358914375305176, |
|
"rewards/margins": 5.240755081176758, |
|
"rewards/rejected": -19.59967041015625, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.8063078505313679, |
|
"grad_norm": 29.92809246962988, |
|
"learning_rate": 1.0951380325872977e-07, |
|
"logits/chosen": -0.8169230818748474, |
|
"logits/rejected": -0.7812893986701965, |
|
"logps/chosen": -7.359820365905762, |
|
"logps/rejected": -5.6241607666015625, |
|
"loss": 0.7967, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -14.060400009155273, |
|
"rewards/margins": 4.3391499519348145, |
|
"rewards/rejected": -18.39954948425293, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.8117929379499486, |
|
"grad_norm": 27.792210529581745, |
|
"learning_rate": 1.0356789542251936e-07, |
|
"logits/chosen": -0.8669033050537109, |
|
"logits/rejected": -0.8605407476425171, |
|
"logps/chosen": -8.198552131652832, |
|
"logps/rejected": -6.282137393951416, |
|
"loss": 0.8025, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -15.705344200134277, |
|
"rewards/margins": 4.791035175323486, |
|
"rewards/rejected": -20.496379852294922, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.8172780253685293, |
|
"grad_norm": 28.277349818001756, |
|
"learning_rate": 9.776924592974256e-08, |
|
"logits/chosen": -0.8328518867492676, |
|
"logits/rejected": -0.8236594796180725, |
|
"logps/chosen": -7.214673042297363, |
|
"logps/rejected": -5.481906414031982, |
|
"loss": 0.7674, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -13.704765319824219, |
|
"rewards/margins": 4.331914901733398, |
|
"rewards/rejected": -18.03668212890625, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.82276311278711, |
|
"grad_norm": 29.007836684575707, |
|
"learning_rate": 9.212000874196952e-08, |
|
"logits/chosen": -0.8581669330596924, |
|
"logits/rejected": -0.8325639367103577, |
|
"logps/chosen": -7.351170539855957, |
|
"logps/rejected": -5.474079608917236, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": -13.685198783874512, |
|
"rewards/margins": 4.692727088928223, |
|
"rewards/rejected": -18.377925872802734, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8282482002056908, |
|
"grad_norm": 43.71571457854729, |
|
"learning_rate": 8.662228232019875e-08, |
|
"logits/chosen": -0.8501139879226685, |
|
"logits/rejected": -0.8618481755256653, |
|
"logps/chosen": -7.359824180603027, |
|
"logps/rejected": -5.42210054397583, |
|
"loss": 0.7332, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -13.555251121520996, |
|
"rewards/margins": 4.844309329986572, |
|
"rewards/rejected": -18.399559020996094, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.8337332876242715, |
|
"grad_norm": 46.33073864540601, |
|
"learning_rate": 8.127810884536402e-08, |
|
"logits/chosen": -0.853046715259552, |
|
"logits/rejected": -0.8423393964767456, |
|
"logps/chosen": -6.985077857971191, |
|
"logps/rejected": -5.142387390136719, |
|
"loss": 0.6407, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -12.855968475341797, |
|
"rewards/margins": 4.606726169586182, |
|
"rewards/rejected": -17.462696075439453, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.8392183750428522, |
|
"grad_norm": 42.89209343669169, |
|
"learning_rate": 7.608947345974759e-08, |
|
"logits/chosen": -0.920865535736084, |
|
"logits/rejected": -0.8836889266967773, |
|
"logps/chosen": -7.042182445526123, |
|
"logps/rejected": -5.48915958404541, |
|
"loss": 0.8117, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -13.722898483276367, |
|
"rewards/margins": 3.882556915283203, |
|
"rewards/rejected": -17.605453491210938, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.844703462461433, |
|
"grad_norm": 37.7037024205324, |
|
"learning_rate": 7.105830352958142e-08, |
|
"logits/chosen": -0.9472789764404297, |
|
"logits/rejected": -0.9106646180152893, |
|
"logps/chosen": -7.459850311279297, |
|
"logps/rejected": -5.479578971862793, |
|
"loss": 0.6658, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": -13.698948860168457, |
|
"rewards/margins": 4.950677871704102, |
|
"rewards/rejected": -18.649625778198242, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.8501885498800137, |
|
"grad_norm": 24.0763873064064, |
|
"learning_rate": 6.618646792910893e-08, |
|
"logits/chosen": -0.8774456977844238, |
|
"logits/rejected": -0.7841386795043945, |
|
"logps/chosen": -6.863981246948242, |
|
"logps/rejected": -5.015853404998779, |
|
"loss": 0.5643, |
|
"rewards/accuracies": 0.8984375, |
|
"rewards/chosen": -12.539634704589844, |
|
"rewards/margins": 4.620318412780762, |
|
"rewards/rejected": -17.15995216369629, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.8556736372985945, |
|
"grad_norm": 32.66084265444344, |
|
"learning_rate": 6.147577634637413e-08, |
|
"logits/chosen": -0.9129813313484192, |
|
"logits/rejected": -0.8859033584594727, |
|
"logps/chosen": -7.296760082244873, |
|
"logps/rejected": -5.64870548248291, |
|
"loss": 0.7727, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -14.121763229370117, |
|
"rewards/margins": 4.120136737823486, |
|
"rewards/rejected": -18.241899490356445, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.8611587247171751, |
|
"grad_norm": 23.94997399437051, |
|
"learning_rate": 5.692797861099718e-08, |
|
"logits/chosen": -0.8945199847221375, |
|
"logits/rejected": -0.8570997714996338, |
|
"logps/chosen": -6.75827693939209, |
|
"logps/rejected": -5.06813907623291, |
|
"loss": 0.5738, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -12.670347213745117, |
|
"rewards/margins": 4.22534704208374, |
|
"rewards/rejected": -16.895692825317383, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.8666438121357559, |
|
"grad_norm": 24.150699682280717, |
|
"learning_rate": 5.25447640441834e-08, |
|
"logits/chosen": -0.954617977142334, |
|
"logits/rejected": -0.8557642102241516, |
|
"logps/chosen": -7.347589492797852, |
|
"logps/rejected": -5.5732340812683105, |
|
"loss": 0.6284, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": -13.933087348937988, |
|
"rewards/margins": 4.435887813568115, |
|
"rewards/rejected": -18.368972778320312, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.8721288995543367, |
|
"grad_norm": 36.75533433702862, |
|
"learning_rate": 4.832776083120982e-08, |
|
"logits/chosen": -0.9205527305603027, |
|
"logits/rejected": -0.8304504752159119, |
|
"logps/chosen": -7.045970439910889, |
|
"logps/rejected": -5.256443023681641, |
|
"loss": 0.6382, |
|
"rewards/accuracies": 0.8828125, |
|
"rewards/chosen": -13.141106605529785, |
|
"rewards/margins": 4.473819732666016, |
|
"rewards/rejected": -17.614925384521484, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.8776139869729174, |
|
"grad_norm": 31.994585110678578, |
|
"learning_rate": 4.427853541662091e-08, |
|
"logits/chosen": -0.9841543436050415, |
|
"logits/rejected": -0.8539234399795532, |
|
"logps/chosen": -7.1274094581604, |
|
"logps/rejected": -5.204132080078125, |
|
"loss": 0.6011, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -13.010330200195312, |
|
"rewards/margins": 4.808194160461426, |
|
"rewards/rejected": -17.818523406982422, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8830990743914982, |
|
"grad_norm": 28.795886036839306, |
|
"learning_rate": 4.039859192235778e-08, |
|
"logits/chosen": -0.9625253677368164, |
|
"logits/rejected": -0.9089056849479675, |
|
"logps/chosen": -7.486809253692627, |
|
"logps/rejected": -5.702870845794678, |
|
"loss": 0.8583, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -14.257177352905273, |
|
"rewards/margins": 4.459846496582031, |
|
"rewards/rejected": -18.717023849487305, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.8885841618100788, |
|
"grad_norm": 26.44205968756603, |
|
"learning_rate": 3.668937158903901e-08, |
|
"logits/chosen": -0.9169929027557373, |
|
"logits/rejected": -0.8381502628326416, |
|
"logps/chosen": -7.5406494140625, |
|
"logps/rejected": -5.6013288497924805, |
|
"loss": 0.5664, |
|
"rewards/accuracies": 0.8984375, |
|
"rewards/chosen": -14.003321647644043, |
|
"rewards/margins": 4.848302364349365, |
|
"rewards/rejected": -18.85162353515625, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.8940692492286596, |
|
"grad_norm": 27.304950853911894, |
|
"learning_rate": 3.3152252240598086e-08, |
|
"logits/chosen": -0.9413248896598816, |
|
"logits/rejected": -0.8493109941482544, |
|
"logps/chosen": -7.453137397766113, |
|
"logps/rejected": -5.501208782196045, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -13.753022193908691, |
|
"rewards/margins": 4.87982177734375, |
|
"rewards/rejected": -18.632844924926758, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.8995543366472403, |
|
"grad_norm": 20.074530093573202, |
|
"learning_rate": 2.978854777247841e-08, |
|
"logits/chosen": -0.9120803475379944, |
|
"logits/rejected": -0.8509462475776672, |
|
"logps/chosen": -7.397423267364502, |
|
"logps/rejected": -5.588208198547363, |
|
"loss": 0.6246, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -13.97052001953125, |
|
"rewards/margins": 4.523037433624268, |
|
"rewards/rejected": -18.49355697631836, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.905039424065821, |
|
"grad_norm": 26.09353600189251, |
|
"learning_rate": 2.6599507663574384e-08, |
|
"logits/chosen": -0.952299952507019, |
|
"logits/rejected": -0.8652746677398682, |
|
"logps/chosen": -7.684518814086914, |
|
"logps/rejected": -5.83120584487915, |
|
"loss": 0.5817, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -14.57801628112793, |
|
"rewards/margins": 4.633281707763672, |
|
"rewards/rejected": -19.21129608154297, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.9105245114844018, |
|
"grad_norm": 38.37398443400821, |
|
"learning_rate": 2.358631651210141e-08, |
|
"logits/chosen": -0.8585479259490967, |
|
"logits/rejected": -0.8270218372344971, |
|
"logps/chosen": -7.32467794418335, |
|
"logps/rejected": -5.463205337524414, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.8828125, |
|
"rewards/chosen": -13.658012390136719, |
|
"rewards/margins": 4.653683185577393, |
|
"rewards/rejected": -18.311695098876953, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.9160095989029825, |
|
"grad_norm": 17.666453011965785, |
|
"learning_rate": 2.0750093595565733e-08, |
|
"logits/chosen": -0.8858319520950317, |
|
"logits/rejected": -0.8542614579200745, |
|
"logps/chosen": -7.348204612731934, |
|
"logps/rejected": -5.504924774169922, |
|
"loss": 0.6247, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": -13.762311935424805, |
|
"rewards/margins": 4.608198165893555, |
|
"rewards/rejected": -18.37051010131836, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.9214946863215633, |
|
"grad_norm": 19.09191014386972, |
|
"learning_rate": 1.8091892454998593e-08, |
|
"logits/chosen": -0.8447603583335876, |
|
"logits/rejected": -0.8495924472808838, |
|
"logps/chosen": -7.054547309875488, |
|
"logps/rejected": -5.083424091339111, |
|
"loss": 0.6335, |
|
"rewards/accuracies": 0.8828125, |
|
"rewards/chosen": -12.708559036254883, |
|
"rewards/margins": 4.927809238433838, |
|
"rewards/rejected": -17.636367797851562, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.926979773740144, |
|
"grad_norm": 38.37416244304177, |
|
"learning_rate": 1.5612700503608967e-08, |
|
"logits/chosen": -0.95904541015625, |
|
"logits/rejected": -0.874359130859375, |
|
"logps/chosen": -8.061529159545898, |
|
"logps/rejected": -6.1823248863220215, |
|
"loss": 0.79, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -15.455812454223633, |
|
"rewards/margins": 4.698009967803955, |
|
"rewards/rejected": -20.153823852539062, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.9324648611587247, |
|
"grad_norm": 20.52317809637831, |
|
"learning_rate": 1.3313438659999399e-08, |
|
"logits/chosen": -0.8840410113334656, |
|
"logits/rejected": -0.8565166592597961, |
|
"logps/chosen": -7.426989555358887, |
|
"logps/rejected": -5.389143943786621, |
|
"loss": 0.6073, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": -13.472861289978027, |
|
"rewards/margins": 5.094613552093506, |
|
"rewards/rejected": -18.567476272583008, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.9379499485773054, |
|
"grad_norm": 28.898112504649927, |
|
"learning_rate": 1.119496100608297e-08, |
|
"logits/chosen": -0.902927815914154, |
|
"logits/rejected": -0.8713952302932739, |
|
"logps/chosen": -7.5428547859191895, |
|
"logps/rejected": -5.551333904266357, |
|
"loss": 0.6478, |
|
"rewards/accuracies": 0.8984375, |
|
"rewards/chosen": -13.878334045410156, |
|
"rewards/margins": 4.978802680969238, |
|
"rewards/rejected": -18.857135772705078, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.9434350359958862, |
|
"grad_norm": 30.615659518253594, |
|
"learning_rate": 9.258054469825972e-09, |
|
"logits/chosen": -0.9755229949951172, |
|
"logits/rejected": -0.8522156476974487, |
|
"logps/chosen": -7.569779396057129, |
|
"logps/rejected": -5.832912445068359, |
|
"loss": 0.4678, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -14.582280158996582, |
|
"rewards/margins": 4.34216833114624, |
|
"rewards/rejected": -18.924448013305664, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.948920123414467, |
|
"grad_norm": 24.650975590320186, |
|
"learning_rate": 7.503438532937168e-09, |
|
"logits/chosen": -0.8953875303268433, |
|
"logits/rejected": -0.8538772463798523, |
|
"logps/chosen": -7.302203178405762, |
|
"logps/rejected": -5.746313095092773, |
|
"loss": 0.8124, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -14.365781784057617, |
|
"rewards/margins": 3.8897247314453125, |
|
"rewards/rejected": -18.255508422851562, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.9544052108330476, |
|
"grad_norm": 22.56008245408754, |
|
"learning_rate": 5.931764963608865e-09, |
|
"logits/chosen": -0.8796699047088623, |
|
"logits/rejected": -0.8065083026885986, |
|
"logps/chosen": -7.606590270996094, |
|
"logps/rejected": -5.541936874389648, |
|
"loss": 0.5972, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -13.854841232299805, |
|
"rewards/margins": 5.1616339683532715, |
|
"rewards/rejected": -19.016477584838867, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.9598902982516284, |
|
"grad_norm": 27.265075122995274, |
|
"learning_rate": 4.543617574412184e-09, |
|
"logits/chosen": -0.9127550721168518, |
|
"logits/rejected": -0.8750625848770142, |
|
"logps/chosen": -7.915278434753418, |
|
"logps/rejected": -6.025564670562744, |
|
"loss": 0.8845, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -15.063911437988281, |
|
"rewards/margins": 4.724285125732422, |
|
"rewards/rejected": -19.788196563720703, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.9653753856702091, |
|
"grad_norm": 28.882962456249174, |
|
"learning_rate": 3.3395120054343086e-09, |
|
"logits/chosen": -0.9111831188201904, |
|
"logits/rejected": -0.8427572250366211, |
|
"logps/chosen": -7.206770420074463, |
|
"logps/rejected": -5.539353847503662, |
|
"loss": 0.7588, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -13.848384857177734, |
|
"rewards/margins": 4.168540000915527, |
|
"rewards/rejected": -18.016923904418945, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.9708604730887899, |
|
"grad_norm": 31.271655213972466, |
|
"learning_rate": 2.3198955327393686e-09, |
|
"logits/chosen": -0.9476001262664795, |
|
"logits/rejected": -0.8903546333312988, |
|
"logps/chosen": -7.501628875732422, |
|
"logps/rejected": -5.69838809967041, |
|
"loss": 0.603, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -14.245970726013184, |
|
"rewards/margins": 4.508101463317871, |
|
"rewards/rejected": -18.754070281982422, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.9763455605073705, |
|
"grad_norm": 27.6286427872623, |
|
"learning_rate": 1.4851469022233997e-09, |
|
"logits/chosen": -0.9050301313400269, |
|
"logits/rejected": -0.8429163694381714, |
|
"logps/chosen": -7.289128303527832, |
|
"logps/rejected": -5.459531784057617, |
|
"loss": 0.478, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -13.648829460144043, |
|
"rewards/margins": 4.5739922523498535, |
|
"rewards/rejected": -18.222822189331055, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.9818306479259513, |
|
"grad_norm": 33.536490419312095, |
|
"learning_rate": 8.35576188926046e-10, |
|
"logits/chosen": -0.8602910041809082, |
|
"logits/rejected": -0.8685486912727356, |
|
"logps/chosen": -7.6485795974731445, |
|
"logps/rejected": -5.617213249206543, |
|
"loss": 0.5532, |
|
"rewards/accuracies": 0.8984375, |
|
"rewards/chosen": -14.043033599853516, |
|
"rewards/margins": 5.078416347503662, |
|
"rewards/rejected": -19.12145233154297, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.9873157353445321, |
|
"grad_norm": 36.03602197080609, |
|
"learning_rate": 3.71424681850141e-10, |
|
"logits/chosen": -0.9029905200004578, |
|
"logits/rejected": -0.8996679186820984, |
|
"logps/chosen": -7.467672348022461, |
|
"logps/rejected": -5.66632080078125, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -14.165802001953125, |
|
"rewards/margins": 4.503378391265869, |
|
"rewards/rejected": -18.66918182373047, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.9928008227631128, |
|
"grad_norm": 30.463376086047504, |
|
"learning_rate": 9.286479433257e-11, |
|
"logits/chosen": -0.9386723041534424, |
|
"logits/rejected": -0.8105076551437378, |
|
"logps/chosen": -7.705523490905762, |
|
"logps/rejected": -5.895651817321777, |
|
"loss": 0.6545, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -14.739130020141602, |
|
"rewards/margins": 4.524680137634277, |
|
"rewards/rejected": -19.263809204101562, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.9982859101816935, |
|
"grad_norm": 27.85120971616897, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.8981151580810547, |
|
"logits/rejected": -0.8777621388435364, |
|
"logps/chosen": -8.53371524810791, |
|
"logps/rejected": -5.803772449493408, |
|
"loss": 0.3779, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -14.509430885314941, |
|
"rewards/margins": 6.824857234954834, |
|
"rewards/rejected": -21.33428955078125, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.9982859101816935, |
|
"step": 182, |
|
"total_flos": 58779245903872.0, |
|
"train_loss": 1.1193010831599708, |
|
"train_runtime": 13670.339, |
|
"train_samples_per_second": 1.707, |
|
"train_steps_per_second": 0.013 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 182, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 182, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 58779245903872.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|