|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9999333733093477, |
|
"eval_steps": 400, |
|
"global_step": 469, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0021320541008728097, |
|
"grad_norm": 4.17070478980581, |
|
"learning_rate": 1.0638297872340425e-08, |
|
"logits/chosen": -0.4388880133628845, |
|
"logits/rejected": -0.6813962459564209, |
|
"logps/chosen": -137.1171112060547, |
|
"logps/rejected": -114.13969421386719, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010660270504364048, |
|
"grad_norm": 3.7299717491618436, |
|
"learning_rate": 5.3191489361702123e-08, |
|
"logits/chosen": -0.4889238774776459, |
|
"logits/rejected": -0.6665000319480896, |
|
"logps/chosen": -169.8695068359375, |
|
"logps/rejected": -153.95947265625, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.3671875, |
|
"rewards/chosen": 0.00029664667090401053, |
|
"rewards/margins": -0.00023018479987513274, |
|
"rewards/rejected": 0.0005268314271233976, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.021320541008728097, |
|
"grad_norm": 3.95978205732512, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/chosen": -0.46806925535202026, |
|
"logits/rejected": -0.6404483318328857, |
|
"logps/chosen": -160.8107147216797, |
|
"logps/rejected": -149.25921630859375, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0006372839561663568, |
|
"rewards/margins": 0.0015358469681814313, |
|
"rewards/rejected": -0.0008985629538074136, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03198081151309214, |
|
"grad_norm": 4.070738919050114, |
|
"learning_rate": 1.5957446808510638e-07, |
|
"logits/chosen": -0.5198644399642944, |
|
"logits/rejected": -0.7026724219322205, |
|
"logps/chosen": -148.3934783935547, |
|
"logps/rejected": -137.8568878173828, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.00037692085606977344, |
|
"rewards/margins": 9.87994353636168e-05, |
|
"rewards/rejected": 0.00027812132611870766, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04264108201745619, |
|
"grad_norm": 4.076698141198564, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -0.5080031156539917, |
|
"logits/rejected": -0.6844709515571594, |
|
"logps/chosen": -163.26565551757812, |
|
"logps/rejected": -144.93130493164062, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.0008511164924129844, |
|
"rewards/margins": 0.0010705896420404315, |
|
"rewards/rejected": -0.00021947314962744713, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05330135252182024, |
|
"grad_norm": 4.091883356232605, |
|
"learning_rate": 2.659574468085106e-07, |
|
"logits/chosen": -0.45363473892211914, |
|
"logits/rejected": -0.6415150761604309, |
|
"logps/chosen": -160.65203857421875, |
|
"logps/rejected": -139.57582092285156, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0009880407014861703, |
|
"rewards/margins": 0.0012083369074389338, |
|
"rewards/rejected": -0.00022029613319318742, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06396162302618429, |
|
"grad_norm": 4.4267622202574675, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": -0.5177901983261108, |
|
"logits/rejected": -0.6321993470191956, |
|
"logps/chosen": -165.01699829101562, |
|
"logps/rejected": -151.71261596679688, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0023814309388399124, |
|
"rewards/margins": 0.002116392133757472, |
|
"rewards/rejected": 0.0002650389797054231, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07462189353054834, |
|
"grad_norm": 4.269424985466007, |
|
"learning_rate": 3.7234042553191484e-07, |
|
"logits/chosen": -0.4782675802707672, |
|
"logits/rejected": -0.7104529738426208, |
|
"logps/chosen": -163.6421356201172, |
|
"logps/rejected": -143.2295379638672, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.004739758092910051, |
|
"rewards/margins": 0.0038230004720389843, |
|
"rewards/rejected": 0.000916757620871067, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08528216403491239, |
|
"grad_norm": 4.2880363073067365, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -0.5303796529769897, |
|
"logits/rejected": -0.7106837630271912, |
|
"logps/chosen": -174.71463012695312, |
|
"logps/rejected": -153.29507446289062, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.008925501257181168, |
|
"rewards/margins": 0.006593695841729641, |
|
"rewards/rejected": 0.0023318054154515266, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09594243453927644, |
|
"grad_norm": 4.016438849908063, |
|
"learning_rate": 4.787234042553192e-07, |
|
"logits/chosen": -0.522494375705719, |
|
"logits/rejected": -0.7226734757423401, |
|
"logps/chosen": -165.866455078125, |
|
"logps/rejected": -144.34194946289062, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.010274471715092659, |
|
"rewards/margins": 0.011223495937883854, |
|
"rewards/rejected": -0.0009490237571299076, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10660270504364049, |
|
"grad_norm": 4.3216596095930235, |
|
"learning_rate": 4.999376538968061e-07, |
|
"logits/chosen": -0.5761003494262695, |
|
"logits/rejected": -0.7390087842941284, |
|
"logps/chosen": -161.60655212402344, |
|
"logps/rejected": -144.6966552734375, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.009824760258197784, |
|
"rewards/margins": 0.014007952995598316, |
|
"rewards/rejected": -0.004183194134384394, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11726297554800454, |
|
"grad_norm": 4.305829979355763, |
|
"learning_rate": 4.99556762539107e-07, |
|
"logits/chosen": -0.5275800824165344, |
|
"logits/rejected": -0.7155976891517639, |
|
"logps/chosen": -172.5618133544922, |
|
"logps/rejected": -159.7906494140625, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.007245404180139303, |
|
"rewards/margins": 0.016996894031763077, |
|
"rewards/rejected": -0.009751489385962486, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12792324605236857, |
|
"grad_norm": 3.919812332975093, |
|
"learning_rate": 4.988301435819852e-07, |
|
"logits/chosen": -0.528161883354187, |
|
"logits/rejected": -0.7242938280105591, |
|
"logps/chosen": -163.2517547607422, |
|
"logps/rejected": -152.65904235839844, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -4.5745400711894035e-05, |
|
"rewards/margins": 0.017660435289144516, |
|
"rewards/rejected": -0.01770617999136448, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13858351655673262, |
|
"grad_norm": 4.26787115297138, |
|
"learning_rate": 4.977588036590624e-07, |
|
"logits/chosen": -0.6125078797340393, |
|
"logits/rejected": -0.7909122109413147, |
|
"logps/chosen": -157.07858276367188, |
|
"logps/rejected": -142.1239776611328, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.011157763190567493, |
|
"rewards/margins": 0.029583096504211426, |
|
"rewards/rejected": -0.04074086248874664, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14924378706109667, |
|
"grad_norm": 4.32141025222622, |
|
"learning_rate": 4.96344226968867e-07, |
|
"logits/chosen": -0.6417307257652283, |
|
"logits/rejected": -0.8415061235427856, |
|
"logps/chosen": -177.39974975585938, |
|
"logps/rejected": -156.98171997070312, |
|
"loss": 0.6761, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.018069323152303696, |
|
"rewards/margins": 0.04366481304168701, |
|
"rewards/rejected": -0.061734139919281006, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15990405756546072, |
|
"grad_norm": 4.745633736375277, |
|
"learning_rate": 4.945883732186751e-07, |
|
"logits/chosen": -0.6420779824256897, |
|
"logits/rejected": -0.8456922769546509, |
|
"logps/chosen": -175.96359252929688, |
|
"logps/rejected": -160.39553833007812, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.049303699284791946, |
|
"rewards/margins": 0.04190283641219139, |
|
"rewards/rejected": -0.09120653569698334, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17056432806982477, |
|
"grad_norm": 4.4046157142215705, |
|
"learning_rate": 4.924936749095969e-07, |
|
"logits/chosen": -0.6506496071815491, |
|
"logits/rejected": -0.8331305384635925, |
|
"logps/chosen": -170.9277801513672, |
|
"logps/rejected": -157.8987579345703, |
|
"loss": 0.6764, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.07082077115774155, |
|
"rewards/margins": 0.044193871319293976, |
|
"rewards/rejected": -0.11501463502645493, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18122459857418882, |
|
"grad_norm": 5.024858873122934, |
|
"learning_rate": 4.900630339666717e-07, |
|
"logits/chosen": -0.6046501994132996, |
|
"logits/rejected": -0.879498302936554, |
|
"logps/chosen": -172.4420928955078, |
|
"logps/rejected": -155.1177215576172, |
|
"loss": 0.6708, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.08710388094186783, |
|
"rewards/margins": 0.05091012641787529, |
|
"rewards/rejected": -0.13801398873329163, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19188486907855287, |
|
"grad_norm": 4.906760943250142, |
|
"learning_rate": 4.872998177186375e-07, |
|
"logits/chosen": -0.6804112195968628, |
|
"logits/rejected": -0.9185736775398254, |
|
"logps/chosen": -173.2130126953125, |
|
"logps/rejected": -157.01849365234375, |
|
"loss": 0.6656, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.09927495568990707, |
|
"rewards/margins": 0.056527040898799896, |
|
"rewards/rejected": -0.15580201148986816, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20254513958291692, |
|
"grad_norm": 4.854322224106784, |
|
"learning_rate": 4.842078542329463e-07, |
|
"logits/chosen": -0.6420129537582397, |
|
"logits/rejected": -0.8440741300582886, |
|
"logps/chosen": -172.54263305664062, |
|
"logps/rejected": -160.012939453125, |
|
"loss": 0.6636, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.11956344544887543, |
|
"rewards/margins": 0.0651877298951149, |
|
"rewards/rejected": -0.18475116789340973, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21320541008728097, |
|
"grad_norm": 5.020847639274401, |
|
"learning_rate": 4.807914270124876e-07, |
|
"logits/chosen": -0.6584053635597229, |
|
"logits/rejected": -0.8369486927986145, |
|
"logps/chosen": -158.8271484375, |
|
"logps/rejected": -151.04791259765625, |
|
"loss": 0.6622, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.13495273888111115, |
|
"rewards/margins": 0.06916390359401703, |
|
"rewards/rejected": -0.20411665737628937, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22386568059164502, |
|
"grad_norm": 5.1518931973507875, |
|
"learning_rate": 4.770552690613665e-07, |
|
"logits/chosen": -0.7008846998214722, |
|
"logits/rejected": -0.9158443212509155, |
|
"logps/chosen": -181.6995391845703, |
|
"logps/rejected": -168.43638610839844, |
|
"loss": 0.6531, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.14559721946716309, |
|
"rewards/margins": 0.08520212024450302, |
|
"rewards/rejected": -0.2307993471622467, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23452595109600907, |
|
"grad_norm": 4.93222468686984, |
|
"learning_rate": 4.730045563279577e-07, |
|
"logits/chosen": -0.7327751517295837, |
|
"logits/rejected": -0.9426084756851196, |
|
"logps/chosen": -184.8527069091797, |
|
"logps/rejected": -169.2633056640625, |
|
"loss": 0.6536, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.18423308432102203, |
|
"rewards/margins": 0.08043086528778076, |
|
"rewards/rejected": -0.2646639347076416, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24518622160037312, |
|
"grad_norm": 5.321285521863998, |
|
"learning_rate": 4.6864490053432e-07, |
|
"logits/chosen": -0.7645201683044434, |
|
"logits/rejected": -0.9136350750923157, |
|
"logps/chosen": -184.50399780273438, |
|
"logps/rejected": -182.33792114257812, |
|
"loss": 0.6467, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.1797805279493332, |
|
"rewards/margins": 0.10915856063365936, |
|
"rewards/rejected": -0.28893908858299255, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.25584649210473714, |
|
"grad_norm": 5.62424898876036, |
|
"learning_rate": 4.6398234140190413e-07, |
|
"logits/chosen": -0.7312062978744507, |
|
"logits/rejected": -0.9342387318611145, |
|
"logps/chosen": -189.24227905273438, |
|
"logps/rejected": -181.2150115966797, |
|
"loss": 0.6404, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.22928175330162048, |
|
"rewards/margins": 0.1005432978272438, |
|
"rewards/rejected": -0.3298250436782837, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2665067626091012, |
|
"grad_norm": 5.848008736661893, |
|
"learning_rate": 4.5902333828432416e-07, |
|
"logits/chosen": -0.7402585744857788, |
|
"logits/rejected": -0.9469724893569946, |
|
"logps/chosen": -188.2518768310547, |
|
"logps/rejected": -183.68360900878906, |
|
"loss": 0.6314, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2475469410419464, |
|
"rewards/margins": 0.15488557517528534, |
|
"rewards/rejected": -0.40243250131607056, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27716703311346524, |
|
"grad_norm": 5.62435510068984, |
|
"learning_rate": 4.537747612187848e-07, |
|
"logits/chosen": -0.6827915906906128, |
|
"logits/rejected": -0.9053131341934204, |
|
"logps/chosen": -176.27835083007812, |
|
"logps/rejected": -177.09768676757812, |
|
"loss": 0.6331, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2656404376029968, |
|
"rewards/margins": 0.14400802552700043, |
|
"rewards/rejected": -0.40964850783348083, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2878273036178293, |
|
"grad_norm": 5.883733263408107, |
|
"learning_rate": 4.4824388140856194e-07, |
|
"logits/chosen": -0.813726544380188, |
|
"logits/rejected": -0.9863494634628296, |
|
"logps/chosen": -193.75765991210938, |
|
"logps/rejected": -192.6829833984375, |
|
"loss": 0.6258, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.32872524857521057, |
|
"rewards/margins": 0.16848836839199066, |
|
"rewards/rejected": -0.49721360206604004, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29848757412219334, |
|
"grad_norm": 6.222829798884928, |
|
"learning_rate": 4.4243836114972003e-07, |
|
"logits/chosen": -0.7957421541213989, |
|
"logits/rejected": -0.9675641059875488, |
|
"logps/chosen": -185.958251953125, |
|
"logps/rejected": -190.2810516357422, |
|
"loss": 0.6259, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.36352983117103577, |
|
"rewards/margins": 0.1679573506116867, |
|
"rewards/rejected": -0.5314871072769165, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3091478446265574, |
|
"grad_norm": 6.026406045285321, |
|
"learning_rate": 4.3636624321602354e-07, |
|
"logits/chosen": -0.7669280171394348, |
|
"logits/rejected": -1.0013420581817627, |
|
"logps/chosen": -199.62496948242188, |
|
"logps/rejected": -198.5312957763672, |
|
"loss": 0.6139, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.41982731223106384, |
|
"rewards/margins": 0.1919022500514984, |
|
"rewards/rejected": -0.611729621887207, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.31980811513092144, |
|
"grad_norm": 6.938366915650047, |
|
"learning_rate": 4.300359397167469e-07, |
|
"logits/chosen": -0.78579181432724, |
|
"logits/rejected": -1.0266155004501343, |
|
"logps/chosen": -190.5222625732422, |
|
"logps/rejected": -191.94302368164062, |
|
"loss": 0.6191, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.4288663864135742, |
|
"rewards/margins": 0.1750030219554901, |
|
"rewards/rejected": -0.6038694381713867, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3304683856352855, |
|
"grad_norm": 6.503433628260907, |
|
"learning_rate": 4.2345622044281914e-07, |
|
"logits/chosen": -0.7738896608352661, |
|
"logits/rejected": -0.9923878908157349, |
|
"logps/chosen": -201.4437255859375, |
|
"logps/rejected": -201.36099243164062, |
|
"loss": 0.6073, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.46533137559890747, |
|
"rewards/margins": 0.18831129372119904, |
|
"rewards/rejected": -0.6536425948143005, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.34112865613964954, |
|
"grad_norm": 6.951278659773283, |
|
"learning_rate": 4.1663620071744896e-07, |
|
"logits/chosen": -0.8082219958305359, |
|
"logits/rejected": -1.0701286792755127, |
|
"logps/chosen": -221.80789184570312, |
|
"logps/rejected": -220.5237274169922, |
|
"loss": 0.6108, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.5697073340415955, |
|
"rewards/margins": 0.196958988904953, |
|
"rewards/rejected": -0.7666663527488708, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.35178892664401357, |
|
"grad_norm": 7.107245594085975, |
|
"learning_rate": 4.0958532876806036e-07, |
|
"logits/chosen": -0.9068414568901062, |
|
"logits/rejected": -1.0665959119796753, |
|
"logps/chosen": -223.1608428955078, |
|
"logps/rejected": -228.6382598876953, |
|
"loss": 0.6007, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.6051439046859741, |
|
"rewards/margins": 0.22736486792564392, |
|
"rewards/rejected": -0.8325088620185852, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.36244919714837764, |
|
"grad_norm": 7.5558158008023355, |
|
"learning_rate": 4.023133726370341e-07, |
|
"logits/chosen": -0.7768110036849976, |
|
"logits/rejected": -1.023694634437561, |
|
"logps/chosen": -230.20028686523438, |
|
"logps/rejected": -237.296630859375, |
|
"loss": 0.6005, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6818786859512329, |
|
"rewards/margins": 0.2647910714149475, |
|
"rewards/rejected": -0.9466696977615356, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.37310946765274167, |
|
"grad_norm": 7.748401207711855, |
|
"learning_rate": 3.9483040664938844e-07, |
|
"logits/chosen": -0.8651229739189148, |
|
"logits/rejected": -1.1080349683761597, |
|
"logps/chosen": -239.4313201904297, |
|
"logps/rejected": -245.35641479492188, |
|
"loss": 0.5827, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7178173065185547, |
|
"rewards/margins": 0.29743796586990356, |
|
"rewards/rejected": -1.015255331993103, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.38376973815710574, |
|
"grad_norm": 7.833168702083219, |
|
"learning_rate": 3.8714679745614556e-07, |
|
"logits/chosen": -0.9112879633903503, |
|
"logits/rejected": -1.1001932621002197, |
|
"logps/chosen": -251.1482391357422, |
|
"logps/rejected": -257.7167053222656, |
|
"loss": 0.5869, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8083968162536621, |
|
"rewards/margins": 0.26524096727371216, |
|
"rewards/rejected": -1.073637843132019, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39443000866146977, |
|
"grad_norm": 7.402036456357543, |
|
"learning_rate": 3.792731896727196e-07, |
|
"logits/chosen": -0.8897370100021362, |
|
"logits/rejected": -1.091963768005371, |
|
"logps/chosen": -246.6190948486328, |
|
"logps/rejected": -268.6842041015625, |
|
"loss": 0.5851, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8738805651664734, |
|
"rewards/margins": 0.3643074929714203, |
|
"rewards/rejected": -1.2381881475448608, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.40509027916583384, |
|
"grad_norm": 7.32634230041485, |
|
"learning_rate": 3.712204911322228e-07, |
|
"logits/chosen": -0.8557780981063843, |
|
"logits/rejected": -1.057023286819458, |
|
"logps/chosen": -217.1138916015625, |
|
"logps/rejected": -232.2842254638672, |
|
"loss": 0.5838, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.7771707773208618, |
|
"rewards/margins": 0.2797245681285858, |
|
"rewards/rejected": -1.05689537525177, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.41575054967019787, |
|
"grad_norm": 9.45088347010784, |
|
"learning_rate": 3.629998577741174e-07, |
|
"logits/chosen": -0.8742257952690125, |
|
"logits/rejected": -1.0490225553512573, |
|
"logps/chosen": -240.11489868164062, |
|
"logps/rejected": -265.6509094238281, |
|
"loss": 0.5864, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.8606696128845215, |
|
"rewards/margins": 0.3593491315841675, |
|
"rewards/rejected": -1.2200186252593994, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.42641082017456194, |
|
"grad_norm": 8.652861206718594, |
|
"learning_rate": 3.546226781891501e-07, |
|
"logits/chosen": -0.8858518600463867, |
|
"logits/rejected": -1.0868691205978394, |
|
"logps/chosen": -266.2615051269531, |
|
"logps/rejected": -285.27703857421875, |
|
"loss": 0.5821, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.975814938545227, |
|
"rewards/margins": 0.4038930833339691, |
|
"rewards/rejected": -1.3797080516815186, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43707109067892597, |
|
"grad_norm": 9.648919264403354, |
|
"learning_rate": 3.461005578419791e-07, |
|
"logits/chosen": -0.8321302533149719, |
|
"logits/rejected": -1.0552650690078735, |
|
"logps/chosen": -253.7904815673828, |
|
"logps/rejected": -272.8400573730469, |
|
"loss": 0.588, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.9785162210464478, |
|
"rewards/margins": 0.3188565969467163, |
|
"rewards/rejected": -1.297372817993164, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44773136118329004, |
|
"grad_norm": 8.305774901520081, |
|
"learning_rate": 3.374453029933509e-07, |
|
"logits/chosen": -0.9058141708374023, |
|
"logits/rejected": -1.0458682775497437, |
|
"logps/chosen": -258.77069091796875, |
|
"logps/rejected": -279.82977294921875, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9745637774467468, |
|
"rewards/margins": 0.3414529263973236, |
|
"rewards/rejected": -1.3160169124603271, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45839163168765407, |
|
"grad_norm": 8.730250055075079, |
|
"learning_rate": 3.286689043441015e-07, |
|
"logits/chosen": -0.8889232873916626, |
|
"logits/rejected": -1.12659752368927, |
|
"logps/chosen": -264.6424255371094, |
|
"logps/rejected": -273.76092529296875, |
|
"loss": 0.5905, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.9881819486618042, |
|
"rewards/margins": 0.31245288252830505, |
|
"rewards/rejected": -1.3006350994110107, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.46905190219201814, |
|
"grad_norm": 9.464259902697126, |
|
"learning_rate": 3.197835204236402e-07, |
|
"logits/chosen": -0.9472643136978149, |
|
"logits/rejected": -1.142138123512268, |
|
"logps/chosen": -279.47662353515625, |
|
"logps/rejected": -311.5118103027344, |
|
"loss": 0.5629, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.133866548538208, |
|
"rewards/margins": 0.4763459265232086, |
|
"rewards/rejected": -1.6102125644683838, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.47971217269638217, |
|
"grad_norm": 9.53110205637003, |
|
"learning_rate": 3.1080146074592877e-07, |
|
"logits/chosen": -0.8609586954116821, |
|
"logits/rejected": -1.1460800170898438, |
|
"logps/chosen": -280.66595458984375, |
|
"logps/rejected": -307.8553771972656, |
|
"loss": 0.5514, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1233617067337036, |
|
"rewards/margins": 0.49458152055740356, |
|
"rewards/rejected": -1.6179431676864624, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.49037244320074624, |
|
"grad_norm": 10.766670968073823, |
|
"learning_rate": 3.017351687562928e-07, |
|
"logits/chosen": -0.869361400604248, |
|
"logits/rejected": -1.071195125579834, |
|
"logps/chosen": -287.5640869140625, |
|
"logps/rejected": -315.25347900390625, |
|
"loss": 0.5665, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.2507811784744263, |
|
"rewards/margins": 0.4507381319999695, |
|
"rewards/rejected": -1.7015190124511719, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5010327137051103, |
|
"grad_norm": 8.57346401837084, |
|
"learning_rate": 2.925972045926878e-07, |
|
"logits/chosen": -0.9069381952285767, |
|
"logits/rejected": -1.0885123014450073, |
|
"logps/chosen": -276.06878662109375, |
|
"logps/rejected": -302.81072998046875, |
|
"loss": 0.5677, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.1936795711517334, |
|
"rewards/margins": 0.44402870535850525, |
|
"rewards/rejected": -1.6377084255218506, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5116929842094743, |
|
"grad_norm": 8.335769499664682, |
|
"learning_rate": 2.83400227685304e-07, |
|
"logits/chosen": -0.926740288734436, |
|
"logits/rejected": -1.188207983970642, |
|
"logps/chosen": -272.0440979003906, |
|
"logps/rejected": -291.0050964355469, |
|
"loss": 0.5609, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1271604299545288, |
|
"rewards/margins": 0.37117230892181396, |
|
"rewards/rejected": -1.4983327388763428, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5223532547138383, |
|
"grad_norm": 8.95305553011223, |
|
"learning_rate": 2.7415697921861525e-07, |
|
"logits/chosen": -0.8435291051864624, |
|
"logits/rejected": -1.072458028793335, |
|
"logps/chosen": -263.8363952636719, |
|
"logps/rejected": -289.58270263671875, |
|
"loss": 0.552, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.0684736967086792, |
|
"rewards/margins": 0.43612685799598694, |
|
"rewards/rejected": -1.5046006441116333, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5330135252182024, |
|
"grad_norm": 10.305199478555215, |
|
"learning_rate": 2.6488026448016686e-07, |
|
"logits/chosen": -0.9254539608955383, |
|
"logits/rejected": -1.1660327911376953, |
|
"logps/chosen": -287.7872009277344, |
|
"logps/rejected": -306.3985290527344, |
|
"loss": 0.5594, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1574687957763672, |
|
"rewards/margins": 0.37755414843559265, |
|
"rewards/rejected": -1.5350229740142822, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5436737957225665, |
|
"grad_norm": 9.11035884736237, |
|
"learning_rate": 2.5558293512055923e-07, |
|
"logits/chosen": -0.8859409093856812, |
|
"logits/rejected": -1.1229826211929321, |
|
"logps/chosen": -278.84051513671875, |
|
"logps/rejected": -311.79669189453125, |
|
"loss": 0.5571, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2464487552642822, |
|
"rewards/margins": 0.48425453901290894, |
|
"rewards/rejected": -1.730703353881836, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5543340662269305, |
|
"grad_norm": 9.443455019352353, |
|
"learning_rate": 2.4627787134919946e-07, |
|
"logits/chosen": -0.8607537150382996, |
|
"logits/rejected": -1.067083716392517, |
|
"logps/chosen": -306.5609130859375, |
|
"logps/rejected": -340.9252014160156, |
|
"loss": 0.559, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.4955613613128662, |
|
"rewards/margins": 0.5148967504501343, |
|
"rewards/rejected": -2.01045823097229, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5649943367312945, |
|
"grad_norm": 10.020105882711649, |
|
"learning_rate": 2.369779640904909e-07, |
|
"logits/chosen": -0.9872435331344604, |
|
"logits/rejected": -1.1790921688079834, |
|
"logps/chosen": -301.1463928222656, |
|
"logps/rejected": -326.53509521484375, |
|
"loss": 0.5522, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.46715247631073, |
|
"rewards/margins": 0.45322275161743164, |
|
"rewards/rejected": -1.9203754663467407, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5756546072356586, |
|
"grad_norm": 9.230369920285517, |
|
"learning_rate": 2.2769609712517602e-07, |
|
"logits/chosen": -0.9972273707389832, |
|
"logits/rejected": -1.139904499053955, |
|
"logps/chosen": -310.1788635253906, |
|
"logps/rejected": -328.85455322265625, |
|
"loss": 0.5693, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3879780769348145, |
|
"rewards/margins": 0.4023415446281433, |
|
"rewards/rejected": -1.7903196811676025, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5863148777400227, |
|
"grad_norm": 9.773551123939216, |
|
"learning_rate": 2.184451292415778e-07, |
|
"logits/chosen": -0.9245126843452454, |
|
"logits/rejected": -1.0917091369628906, |
|
"logps/chosen": -265.5910949707031, |
|
"logps/rejected": -292.25726318359375, |
|
"loss": 0.5625, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.123450517654419, |
|
"rewards/margins": 0.4249204099178314, |
|
"rewards/rejected": -1.5483709573745728, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5969751482443867, |
|
"grad_norm": 9.944866138311095, |
|
"learning_rate": 2.0923787642146434e-07, |
|
"logits/chosen": -0.8810575604438782, |
|
"logits/rejected": -1.0941672325134277, |
|
"logps/chosen": -280.61279296875, |
|
"logps/rejected": -312.9557800292969, |
|
"loss": 0.552, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2670402526855469, |
|
"rewards/margins": 0.519837498664856, |
|
"rewards/rejected": -1.7868778705596924, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6076354187487507, |
|
"grad_norm": 9.880910925618455, |
|
"learning_rate": 2.0008709408521507e-07, |
|
"logits/chosen": -0.9383381009101868, |
|
"logits/rejected": -1.1827994585037231, |
|
"logps/chosen": -295.6000671386719, |
|
"logps/rejected": -324.3331604003906, |
|
"loss": 0.5407, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2447686195373535, |
|
"rewards/margins": 0.5489395260810852, |
|
"rewards/rejected": -1.793708086013794, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6182956892531148, |
|
"grad_norm": 10.071491320024812, |
|
"learning_rate": 1.9100545942088848e-07, |
|
"logits/chosen": -0.9224274754524231, |
|
"logits/rejected": -1.1538960933685303, |
|
"logps/chosen": -289.017578125, |
|
"logps/rejected": -325.94952392578125, |
|
"loss": 0.5457, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.2537972927093506, |
|
"rewards/margins": 0.5672923922538757, |
|
"rewards/rejected": -1.821089744567871, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6289559597574789, |
|
"grad_norm": 11.845857689113707, |
|
"learning_rate": 1.8200555382166898e-07, |
|
"logits/chosen": -0.9387105107307434, |
|
"logits/rejected": -1.1250282526016235, |
|
"logps/chosen": -318.4964294433594, |
|
"logps/rejected": -338.69696044921875, |
|
"loss": 0.5696, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5140787363052368, |
|
"rewards/margins": 0.4427851140499115, |
|
"rewards/rejected": -1.9568637609481812, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6396162302618429, |
|
"grad_norm": 10.971903527074975, |
|
"learning_rate": 1.7309984545602528e-07, |
|
"logits/chosen": -0.9286500215530396, |
|
"logits/rejected": -1.1137937307357788, |
|
"logps/chosen": -279.747802734375, |
|
"logps/rejected": -307.8285217285156, |
|
"loss": 0.5376, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.323687195777893, |
|
"rewards/margins": 0.48056259751319885, |
|
"rewards/rejected": -1.8042497634887695, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6502765007662069, |
|
"grad_norm": 10.964118734413244, |
|
"learning_rate": 1.6430067199472657e-07, |
|
"logits/chosen": -0.9661188125610352, |
|
"logits/rejected": -1.1719661951065063, |
|
"logps/chosen": -294.7871398925781, |
|
"logps/rejected": -329.8990783691406, |
|
"loss": 0.5342, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3090574741363525, |
|
"rewards/margins": 0.5292733907699585, |
|
"rewards/rejected": -1.838330864906311, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.660936771270571, |
|
"grad_norm": 11.086382549521785, |
|
"learning_rate": 1.5562022351864534e-07, |
|
"logits/chosen": -0.9217275381088257, |
|
"logits/rejected": -1.1163594722747803, |
|
"logps/chosen": -266.56402587890625, |
|
"logps/rejected": -306.4192810058594, |
|
"loss": 0.5437, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.1430429220199585, |
|
"rewards/margins": 0.5940698981285095, |
|
"rewards/rejected": -1.7371127605438232, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6715970417749351, |
|
"grad_norm": 10.957109584007643, |
|
"learning_rate": 1.4707052563102748e-07, |
|
"logits/chosen": -0.8743804097175598, |
|
"logits/rejected": -1.0983814001083374, |
|
"logps/chosen": -285.22607421875, |
|
"logps/rejected": -317.2628173828125, |
|
"loss": 0.5298, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3059532642364502, |
|
"rewards/margins": 0.5242554545402527, |
|
"rewards/rejected": -1.8302087783813477, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6822573122792991, |
|
"grad_norm": 10.507330109558843, |
|
"learning_rate": 1.386634227976224e-07, |
|
"logits/chosen": -0.9597967863082886, |
|
"logits/rejected": -1.124963402748108, |
|
"logps/chosen": -286.6432189941406, |
|
"logps/rejected": -315.79937744140625, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.3352241516113281, |
|
"rewards/margins": 0.4382667541503906, |
|
"rewards/rejected": -1.7734909057617188, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6929175827836631, |
|
"grad_norm": 9.804790546339078, |
|
"learning_rate": 1.3041056193775665e-07, |
|
"logits/chosen": -0.888710618019104, |
|
"logits/rejected": -1.0851693153381348, |
|
"logps/chosen": -311.01544189453125, |
|
"logps/rejected": -332.7283020019531, |
|
"loss": 0.5475, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.5570933818817139, |
|
"rewards/margins": 0.4053064286708832, |
|
"rewards/rejected": -1.9623997211456299, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7035778532880271, |
|
"grad_norm": 9.630550808372668, |
|
"learning_rate": 1.2232337628908103e-07, |
|
"logits/chosen": -0.9582077264785767, |
|
"logits/rejected": -1.1537044048309326, |
|
"logps/chosen": -326.71221923828125, |
|
"logps/rejected": -377.6993713378906, |
|
"loss": 0.5435, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4935967922210693, |
|
"rewards/margins": 0.7231054902076721, |
|
"rewards/rejected": -2.2167022228240967, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7142381237923913, |
|
"grad_norm": 9.172032682717258, |
|
"learning_rate": 1.1441306956834504e-07, |
|
"logits/chosen": -0.9413734674453735, |
|
"logits/rejected": -1.1069329977035522, |
|
"logps/chosen": -306.80218505859375, |
|
"logps/rejected": -357.0929870605469, |
|
"loss": 0.5238, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.4035927057266235, |
|
"rewards/margins": 0.6626663208007812, |
|
"rewards/rejected": -2.0662589073181152, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7248983942967553, |
|
"grad_norm": 10.907598822157487, |
|
"learning_rate": 1.0669060045014214e-07, |
|
"logits/chosen": -1.0222991704940796, |
|
"logits/rejected": -1.228389024734497, |
|
"logps/chosen": -316.627197265625, |
|
"logps/rejected": -357.66229248046875, |
|
"loss": 0.5388, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.4493268728256226, |
|
"rewards/margins": 0.5827343463897705, |
|
"rewards/rejected": -2.0320611000061035, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7355586648011193, |
|
"grad_norm": 10.97300975462713, |
|
"learning_rate": 9.9166667385128e-08, |
|
"logits/chosen": -0.963638186454773, |
|
"logits/rejected": -1.1757190227508545, |
|
"logps/chosen": -304.3102722167969, |
|
"logps/rejected": -354.2998962402344, |
|
"loss": 0.5432, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4618219137191772, |
|
"rewards/margins": 0.7080960273742676, |
|
"rewards/rejected": -2.1699178218841553, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7462189353054833, |
|
"grad_norm": 9.89897013382996, |
|
"learning_rate": 9.185169377874488e-08, |
|
"logits/chosen": -0.9903243780136108, |
|
"logits/rejected": -1.1469306945800781, |
|
"logps/chosen": -312.1212158203125, |
|
"logps/rejected": -346.9307861328125, |
|
"loss": 0.5252, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5106861591339111, |
|
"rewards/margins": 0.49892768263816833, |
|
"rewards/rejected": -2.0096137523651123, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7568792058098475, |
|
"grad_norm": 10.018680833325265, |
|
"learning_rate": 8.475581355098379e-08, |
|
"logits/chosen": -0.9698395729064941, |
|
"logits/rejected": -1.1572554111480713, |
|
"logps/chosen": -304.4853820800781, |
|
"logps/rejected": -342.16827392578125, |
|
"loss": 0.5462, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.4320096969604492, |
|
"rewards/margins": 0.5366055965423584, |
|
"rewards/rejected": -1.968615174293518, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7675394763142115, |
|
"grad_norm": 11.03385142626086, |
|
"learning_rate": 7.788885709719033e-08, |
|
"logits/chosen": -0.9215399622917175, |
|
"logits/rejected": -1.1144723892211914, |
|
"logps/chosen": -316.9365234375, |
|
"logps/rejected": -359.6341857910156, |
|
"loss": 0.5392, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.580185890197754, |
|
"rewards/margins": 0.564557671546936, |
|
"rewards/rejected": -2.1447434425354004, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7781997468185755, |
|
"grad_norm": 9.523737016870674, |
|
"learning_rate": 7.126033766936365e-08, |
|
"logits/chosen": -0.9409270286560059, |
|
"logits/rejected": -1.124208688735962, |
|
"logps/chosen": -311.7746276855469, |
|
"logps/rejected": -355.46343994140625, |
|
"loss": 0.536, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.5002214908599854, |
|
"rewards/margins": 0.5499864816665649, |
|
"rewards/rejected": -2.05020809173584, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7888600173229395, |
|
"grad_norm": 11.210638577879926, |
|
"learning_rate": 6.487943819681488e-08, |
|
"logits/chosen": -0.9616110920906067, |
|
"logits/rejected": -1.0974061489105225, |
|
"logps/chosen": -315.260009765625, |
|
"logps/rejected": -357.67059326171875, |
|
"loss": 0.5533, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.509570837020874, |
|
"rewards/margins": 0.537238597869873, |
|
"rewards/rejected": -2.046809434890747, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7995202878273037, |
|
"grad_norm": 9.781063018210089, |
|
"learning_rate": 5.875499856444358e-08, |
|
"logits/chosen": -0.9564340710639954, |
|
"logits/rejected": -1.1133265495300293, |
|
"logps/chosen": -314.17535400390625, |
|
"logps/rejected": -351.45001220703125, |
|
"loss": 0.5458, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.493622064590454, |
|
"rewards/margins": 0.5427702069282532, |
|
"rewards/rejected": -2.0363922119140625, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8101805583316677, |
|
"grad_norm": 11.983119955061767, |
|
"learning_rate": 5.289550336625731e-08, |
|
"logits/chosen": -1.0206782817840576, |
|
"logits/rejected": -1.2104320526123047, |
|
"logps/chosen": -327.4963684082031, |
|
"logps/rejected": -353.74603271484375, |
|
"loss": 0.5474, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.506259560585022, |
|
"rewards/margins": 0.49152374267578125, |
|
"rewards/rejected": -1.9977830648422241, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8208408288360317, |
|
"grad_norm": 10.83148544527409, |
|
"learning_rate": 4.730907015109759e-08, |
|
"logits/chosen": -0.9245961308479309, |
|
"logits/rejected": -1.1795787811279297, |
|
"logps/chosen": -309.1303405761719, |
|
"logps/rejected": -346.46051025390625, |
|
"loss": 0.5403, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.5297610759735107, |
|
"rewards/margins": 0.5533354878425598, |
|
"rewards/rejected": -2.083096742630005, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8315010993403957, |
|
"grad_norm": 9.500539654945461, |
|
"learning_rate": 4.200343817685981e-08, |
|
"logits/chosen": -0.9566155672073364, |
|
"logits/rejected": -1.0963544845581055, |
|
"logps/chosen": -313.0601501464844, |
|
"logps/rejected": -343.36773681640625, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.5300524234771729, |
|
"rewards/margins": 0.4933779835700989, |
|
"rewards/rejected": -2.023430347442627, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8421613698447599, |
|
"grad_norm": 9.955855605589283, |
|
"learning_rate": 3.698595768878363e-08, |
|
"logits/chosen": -0.9913743734359741, |
|
"logits/rejected": -1.180884599685669, |
|
"logps/chosen": -311.83636474609375, |
|
"logps/rejected": -356.932373046875, |
|
"loss": 0.5178, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.429694414138794, |
|
"rewards/margins": 0.6187530755996704, |
|
"rewards/rejected": -2.048447370529175, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8528216403491239, |
|
"grad_norm": 11.149747005186983, |
|
"learning_rate": 3.226357973666888e-08, |
|
"logits/chosen": -1.0238213539123535, |
|
"logits/rejected": -1.1811949014663696, |
|
"logps/chosen": -332.1514587402344, |
|
"logps/rejected": -359.03167724609375, |
|
"loss": 0.5505, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.6280012130737305, |
|
"rewards/margins": 0.43937546014785767, |
|
"rewards/rejected": -2.0673766136169434, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8528216403491239, |
|
"eval_logits/chosen": -0.9705477356910706, |
|
"eval_logits/rejected": -1.165926456451416, |
|
"eval_logps/chosen": -307.21051025390625, |
|
"eval_logps/rejected": -356.52508544921875, |
|
"eval_loss": 0.5049245953559875, |
|
"eval_rewards/accuracies": 0.7932573556900024, |
|
"eval_rewards/chosen": -1.4455755949020386, |
|
"eval_rewards/margins": 0.6763937473297119, |
|
"eval_rewards/rejected": -2.12196946144104, |
|
"eval_runtime": 11441.6179, |
|
"eval_samples_per_second": 5.247, |
|
"eval_steps_per_second": 1.312, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8634819108534879, |
|
"grad_norm": 9.468787134199466, |
|
"learning_rate": 2.7842846545123505e-08, |
|
"logits/chosen": -0.9555789232254028, |
|
"logits/rejected": -1.1705703735351562, |
|
"logps/chosen": -289.6531677246094, |
|
"logps/rejected": -345.7925720214844, |
|
"loss": 0.5233, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.3922350406646729, |
|
"rewards/margins": 0.6980171203613281, |
|
"rewards/rejected": -2.090252161026001, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8741421813578519, |
|
"grad_norm": 10.178761020491258, |
|
"learning_rate": 2.372988245018401e-08, |
|
"logits/chosen": -0.9851318597793579, |
|
"logits/rejected": -1.1668522357940674, |
|
"logps/chosen": -316.6786193847656, |
|
"logps/rejected": -362.8905944824219, |
|
"loss": 0.5423, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.598661184310913, |
|
"rewards/margins": 0.608306884765625, |
|
"rewards/rejected": -2.206967830657959, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.884802451862216, |
|
"grad_norm": 9.329485481095736, |
|
"learning_rate": 1.9930385414865386e-08, |
|
"logits/chosen": -1.0145405530929565, |
|
"logits/rejected": -1.2289698123931885, |
|
"logps/chosen": -336.15087890625, |
|
"logps/rejected": -373.11309814453125, |
|
"loss": 0.5293, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.558721899986267, |
|
"rewards/margins": 0.6198412775993347, |
|
"rewards/rejected": -2.178563356399536, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8954627223665801, |
|
"grad_norm": 9.690686562397088, |
|
"learning_rate": 1.6449619135393084e-08, |
|
"logits/chosen": -0.9239746928215027, |
|
"logits/rejected": -1.1881077289581299, |
|
"logps/chosen": -296.87200927734375, |
|
"logps/rejected": -329.9718017578125, |
|
"loss": 0.5513, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.472847580909729, |
|
"rewards/margins": 0.5113754868507385, |
|
"rewards/rejected": -1.9842230081558228, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9061229928709441, |
|
"grad_norm": 10.862769817255897, |
|
"learning_rate": 1.329240574905452e-08, |
|
"logits/chosen": -0.9023639559745789, |
|
"logits/rejected": -1.0890004634857178, |
|
"logps/chosen": -324.7179260253906, |
|
"logps/rejected": -374.7180480957031, |
|
"loss": 0.5149, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.5423232316970825, |
|
"rewards/margins": 0.6671528816223145, |
|
"rewards/rejected": -2.2094759941101074, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9167832633753081, |
|
"grad_norm": 11.35977235393007, |
|
"learning_rate": 1.0463119153770989e-08, |
|
"logits/chosen": -0.9444347620010376, |
|
"logits/rejected": -1.1702197790145874, |
|
"logps/chosen": -298.4215393066406, |
|
"logps/rejected": -328.64215087890625, |
|
"loss": 0.5404, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.4311974048614502, |
|
"rewards/margins": 0.5026859045028687, |
|
"rewards/rejected": -1.9338833093643188, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9274435338796722, |
|
"grad_norm": 10.068213055827782, |
|
"learning_rate": 7.965678948645832e-09, |
|
"logits/chosen": -0.9912747144699097, |
|
"logits/rejected": -1.2084077596664429, |
|
"logps/chosen": -336.46929931640625, |
|
"logps/rejected": -379.56640625, |
|
"loss": 0.538, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.6182082891464233, |
|
"rewards/margins": 0.6836891174316406, |
|
"rewards/rejected": -2.3018975257873535, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9381038043840363, |
|
"grad_norm": 12.790282190393167, |
|
"learning_rate": 5.803545003882554e-09, |
|
"logits/chosen": -0.9938758015632629, |
|
"logits/rejected": -1.17817223072052, |
|
"logps/chosen": -326.2915954589844, |
|
"logps/rejected": -371.28631591796875, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.5600776672363281, |
|
"rewards/margins": 0.5917671918869019, |
|
"rewards/rejected": -2.1518447399139404, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9487640748884003, |
|
"grad_norm": 9.050016131957404, |
|
"learning_rate": 3.979712667596669e-09, |
|
"logits/chosen": -0.9720270037651062, |
|
"logits/rejected": -1.1488044261932373, |
|
"logps/chosen": -304.312255859375, |
|
"logps/rejected": -351.5962219238281, |
|
"loss": 0.5199, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.4655094146728516, |
|
"rewards/margins": 0.6790416240692139, |
|
"rewards/rejected": -2.1445512771606445, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9594243453927643, |
|
"grad_norm": 13.159010993827899, |
|
"learning_rate": 2.4967086161600814e-09, |
|
"logits/chosen": -0.994873046875, |
|
"logits/rejected": -1.1672512292861938, |
|
"logps/chosen": -314.894287109375, |
|
"logps/rejected": -354.23223876953125, |
|
"loss": 0.5276, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5018284320831299, |
|
"rewards/margins": 0.5567340850830078, |
|
"rewards/rejected": -2.0585622787475586, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9700846158971284, |
|
"grad_norm": 9.906738715572994, |
|
"learning_rate": 1.3565873538283757e-09, |
|
"logits/chosen": -0.9630732536315918, |
|
"logits/rejected": -1.1276707649230957, |
|
"logps/chosen": -306.04345703125, |
|
"logps/rejected": -351.21099853515625, |
|
"loss": 0.5208, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.395446538925171, |
|
"rewards/margins": 0.6138492822647095, |
|
"rewards/rejected": -2.009295701980591, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9807448864014925, |
|
"grad_norm": 10.687835024200046, |
|
"learning_rate": 5.609283664990693e-10, |
|
"logits/chosen": -0.9506285786628723, |
|
"logits/rejected": -1.20163094997406, |
|
"logps/chosen": -323.80657958984375, |
|
"logps/rejected": -370.2672424316406, |
|
"loss": 0.5199, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.5296146869659424, |
|
"rewards/margins": 0.6610507369041443, |
|
"rewards/rejected": -2.1906654834747314, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9914051569058565, |
|
"grad_norm": 11.797447945184583, |
|
"learning_rate": 1.1083393354488491e-10, |
|
"logits/chosen": -0.9356955289840698, |
|
"logits/rejected": -1.1217402219772339, |
|
"logps/chosen": -326.0872497558594, |
|
"logps/rejected": -382.658203125, |
|
"loss": 0.5263, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.588428020477295, |
|
"rewards/margins": 0.7401828169822693, |
|
"rewards/rejected": -2.328610897064209, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9999333733093477, |
|
"step": 469, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5891387982409138, |
|
"train_runtime": 37343.5856, |
|
"train_samples_per_second": 1.608, |
|
"train_steps_per_second": 0.013 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 469, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|