|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9986824769433466, |
|
"eval_steps": 100, |
|
"global_step": 379, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 32.10821322863283, |
|
"learning_rate": 1.3157894736842104e-08, |
|
"logits/chosen": -2.219799041748047, |
|
"logits/rejected": -2.229109525680542, |
|
"logps/chosen": -269.856201171875, |
|
"logps/rejected": -192.3697509765625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 35.006735982686784, |
|
"learning_rate": 6.578947368421052e-08, |
|
"logits/chosen": -2.276287078857422, |
|
"logits/rejected": -2.2080204486846924, |
|
"logps/chosen": -283.5397644042969, |
|
"logps/rejected": -208.59442138671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.0013411559630185366, |
|
"rewards/margins": 0.00017823810048867017, |
|
"rewards/rejected": 0.0011629178188741207, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 36.439618208906154, |
|
"learning_rate": 1.3157894736842104e-07, |
|
"logits/chosen": -2.3274893760681152, |
|
"logits/rejected": -2.2322466373443604, |
|
"logps/chosen": -286.6865539550781, |
|
"logps/rejected": -206.5416717529297, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.00261278566904366, |
|
"rewards/margins": -3.606556128943339e-05, |
|
"rewards/rejected": 0.0026488511357456446, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 34.873528802219326, |
|
"learning_rate": 1.9736842105263157e-07, |
|
"logits/chosen": -2.3293230533599854, |
|
"logits/rejected": -2.2996506690979004, |
|
"logps/chosen": -289.71771240234375, |
|
"logps/rejected": -237.6280059814453, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.017419874668121338, |
|
"rewards/margins": 0.004653422627598047, |
|
"rewards/rejected": 0.012766450643539429, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 31.898185632963774, |
|
"learning_rate": 2.631578947368421e-07, |
|
"logits/chosen": -2.3196263313293457, |
|
"logits/rejected": -2.2592759132385254, |
|
"logps/chosen": -263.24908447265625, |
|
"logps/rejected": -202.05458068847656, |
|
"loss": 0.6857, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.05536944791674614, |
|
"rewards/margins": 0.01851554773747921, |
|
"rewards/rejected": 0.03685389831662178, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 28.076450357377052, |
|
"learning_rate": 3.2894736842105264e-07, |
|
"logits/chosen": -2.3921802043914795, |
|
"logits/rejected": -2.377410650253296, |
|
"logps/chosen": -260.9800720214844, |
|
"logps/rejected": -214.38821411132812, |
|
"loss": 0.6763, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.13004128634929657, |
|
"rewards/margins": 0.03864985704421997, |
|
"rewards/rejected": 0.0913914293050766, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 25.341140024963043, |
|
"learning_rate": 3.9473684210526315e-07, |
|
"logits/chosen": -2.477496385574341, |
|
"logits/rejected": -2.506354808807373, |
|
"logps/chosen": -255.5835723876953, |
|
"logps/rejected": -223.4532470703125, |
|
"loss": 0.6664, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.2109464406967163, |
|
"rewards/margins": 0.04907592386007309, |
|
"rewards/rejected": 0.16187050938606262, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 23.40327238126855, |
|
"learning_rate": 4.6052631578947365e-07, |
|
"logits/chosen": -2.6838037967681885, |
|
"logits/rejected": -2.6064510345458984, |
|
"logps/chosen": -272.02825927734375, |
|
"logps/rejected": -207.57565307617188, |
|
"loss": 0.6576, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.3294451832771301, |
|
"rewards/margins": 0.12016526609659195, |
|
"rewards/rejected": 0.20927992463111877, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 21.272616898577162, |
|
"learning_rate": 4.999575626062319e-07, |
|
"logits/chosen": -2.747399091720581, |
|
"logits/rejected": -2.7062249183654785, |
|
"logps/chosen": -249.28085327148438, |
|
"logps/rejected": -202.8720245361328, |
|
"loss": 0.6504, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.3287069499492645, |
|
"rewards/margins": 0.0975252240896225, |
|
"rewards/rejected": 0.23118171095848083, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 18.3841620762551, |
|
"learning_rate": 4.994803073715569e-07, |
|
"logits/chosen": -2.788217067718506, |
|
"logits/rejected": -2.736720561981201, |
|
"logps/chosen": -260.8193359375, |
|
"logps/rejected": -202.5912322998047, |
|
"loss": 0.6444, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.4205988049507141, |
|
"rewards/margins": 0.14394915103912354, |
|
"rewards/rejected": 0.2766496539115906, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 18.656759383093647, |
|
"learning_rate": 4.984737660598186e-07, |
|
"logits/chosen": -2.8456408977508545, |
|
"logits/rejected": -2.7947795391082764, |
|
"logps/chosen": -223.0256805419922, |
|
"logps/rejected": -197.88247680664062, |
|
"loss": 0.6469, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.3571344316005707, |
|
"rewards/margins": 0.09834496676921844, |
|
"rewards/rejected": 0.2587894797325134, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 20.93264303618464, |
|
"learning_rate": 4.969400741032999e-07, |
|
"logits/chosen": -2.8368711471557617, |
|
"logits/rejected": -2.8192451000213623, |
|
"logps/chosen": -234.53515625, |
|
"logps/rejected": -199.7665252685547, |
|
"loss": 0.6341, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.33271390199661255, |
|
"rewards/margins": 0.14281830191612244, |
|
"rewards/rejected": 0.18989557027816772, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 19.192901766300064, |
|
"learning_rate": 4.948824853131236e-07, |
|
"logits/chosen": -2.8852925300598145, |
|
"logits/rejected": -2.859867572784424, |
|
"logps/chosen": -253.8678741455078, |
|
"logps/rejected": -212.1206512451172, |
|
"loss": 0.6257, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.3522695302963257, |
|
"rewards/margins": 0.1734958440065384, |
|
"rewards/rejected": 0.1787737011909485, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 19.6177452377838, |
|
"learning_rate": 4.923053649761152e-07, |
|
"logits/chosen": -2.8169569969177246, |
|
"logits/rejected": -2.7849762439727783, |
|
"logps/chosen": -244.50808715820312, |
|
"logps/rejected": -201.7557830810547, |
|
"loss": 0.6149, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.35878387093544006, |
|
"rewards/margins": 0.19476714730262756, |
|
"rewards/rejected": 0.1640167087316513, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 20.424079652700506, |
|
"learning_rate": 4.892141805936084e-07, |
|
"logits/chosen": -2.789267063140869, |
|
"logits/rejected": -2.73822283744812, |
|
"logps/chosen": -250.8653564453125, |
|
"logps/rejected": -224.6582794189453, |
|
"loss": 0.6149, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.4033736288547516, |
|
"rewards/margins": 0.23386640846729279, |
|
"rewards/rejected": 0.1695072501897812, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 25.663818502097516, |
|
"learning_rate": 4.856154902818431e-07, |
|
"logits/chosen": -2.8126118183135986, |
|
"logits/rejected": -2.7708253860473633, |
|
"logps/chosen": -229.9053955078125, |
|
"logps/rejected": -206.87258911132812, |
|
"loss": 0.6004, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.2887989580631256, |
|
"rewards/margins": 0.22907009720802307, |
|
"rewards/rejected": 0.05972885340452194, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 24.00962454367728, |
|
"learning_rate": 4.81516928858564e-07, |
|
"logits/chosen": -2.7702012062072754, |
|
"logits/rejected": -2.7202858924865723, |
|
"logps/chosen": -271.06317138671875, |
|
"logps/rejected": -220.15475463867188, |
|
"loss": 0.5964, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.3578353524208069, |
|
"rewards/margins": 0.31964898109436035, |
|
"rewards/rejected": 0.03818630054593086, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 26.200013285180134, |
|
"learning_rate": 4.769271916453385e-07, |
|
"logits/chosen": -2.817336320877075, |
|
"logits/rejected": -2.7950100898742676, |
|
"logps/chosen": -257.9813537597656, |
|
"logps/rejected": -234.77108764648438, |
|
"loss": 0.5692, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.16794440150260925, |
|
"rewards/margins": 0.3747493624687195, |
|
"rewards/rejected": -0.20680496096611023, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 31.71282371862922, |
|
"learning_rate": 4.7185601601995784e-07, |
|
"logits/chosen": -2.8321759700775146, |
|
"logits/rejected": -2.8417580127716064, |
|
"logps/chosen": -252.80313110351562, |
|
"logps/rejected": -233.3778839111328, |
|
"loss": 0.5452, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.17818805575370789, |
|
"rewards/margins": 0.4816998839378357, |
|
"rewards/rejected": -0.3035118579864502, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 30.203747907953968, |
|
"learning_rate": 4.6631416075805886e-07, |
|
"logits/chosen": -2.9183051586151123, |
|
"logits/rejected": -2.8942832946777344, |
|
"logps/chosen": -292.6710510253906, |
|
"logps/rejected": -278.3623962402344, |
|
"loss": 0.5293, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.0014784678351134062, |
|
"rewards/margins": 0.45240503549575806, |
|
"rewards/rejected": -0.4509265422821045, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 35.20356531841964, |
|
"learning_rate": 4.603133832077953e-07, |
|
"logits/chosen": -3.0372838973999023, |
|
"logits/rejected": -3.03391432762146, |
|
"logps/chosen": -264.54412841796875, |
|
"logps/rejected": -271.9236755371094, |
|
"loss": 0.5292, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.24657364189624786, |
|
"rewards/margins": 0.4146839678287506, |
|
"rewards/rejected": -0.6612575650215149, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.981271743774414, |
|
"eval_logits/rejected": -3.007460117340088, |
|
"eval_logps/chosen": -500.8688049316406, |
|
"eval_logps/rejected": -504.0709228515625, |
|
"eval_loss": 0.6958277821540833, |
|
"eval_rewards/accuracies": 0.53125, |
|
"eval_rewards/chosen": -1.2149137258529663, |
|
"eval_rewards/margins": 0.09968078136444092, |
|
"eval_rewards/rejected": -1.3145945072174072, |
|
"eval_runtime": 97.6692, |
|
"eval_samples_per_second": 20.477, |
|
"eval_steps_per_second": 0.328, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 35.37658079116714, |
|
"learning_rate": 4.538664143459818e-07, |
|
"logits/chosen": -3.1525657176971436, |
|
"logits/rejected": -3.170300245285034, |
|
"logps/chosen": -301.29736328125, |
|
"logps/rejected": -306.07171630859375, |
|
"loss": 0.4875, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.40715986490249634, |
|
"rewards/margins": 0.6011335849761963, |
|
"rewards/rejected": -1.0082933902740479, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 44.5542186040465, |
|
"learning_rate": 4.4698693176863316e-07, |
|
"logits/chosen": -3.1490285396575928, |
|
"logits/rejected": -3.167726993560791, |
|
"logps/chosen": -326.18792724609375, |
|
"logps/rejected": -354.35784912109375, |
|
"loss": 0.4645, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6683047413825989, |
|
"rewards/margins": 0.806756854057312, |
|
"rewards/rejected": -1.4750616550445557, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 46.11054779372386, |
|
"learning_rate": 4.396895306731977e-07, |
|
"logits/chosen": -3.2922375202178955, |
|
"logits/rejected": -3.3002562522888184, |
|
"logps/chosen": -316.46331787109375, |
|
"logps/rejected": -344.7227783203125, |
|
"loss": 0.478, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.5892685651779175, |
|
"rewards/margins": 0.8242443203926086, |
|
"rewards/rejected": -1.413512945175171, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 39.587491646799435, |
|
"learning_rate": 4.319896928940505e-07, |
|
"logits/chosen": -3.4349207878112793, |
|
"logits/rejected": -3.4556515216827393, |
|
"logps/chosen": -430.2960510253906, |
|
"logps/rejected": -467.32061767578125, |
|
"loss": 0.4502, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.621132254600525, |
|
"rewards/margins": 0.7838995456695557, |
|
"rewards/rejected": -2.405031681060791, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 36.84462226750656, |
|
"learning_rate": 4.2390375405693723e-07, |
|
"logits/chosen": -3.474997043609619, |
|
"logits/rejected": -3.5272536277770996, |
|
"logps/chosen": -415.80438232421875, |
|
"logps/rejected": -468.0596618652344, |
|
"loss": 0.4422, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.5106348991394043, |
|
"rewards/margins": 1.0294691324234009, |
|
"rewards/rejected": -2.5401039123535156, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 47.09166107339635, |
|
"learning_rate": 4.1544886892205354e-07, |
|
"logits/chosen": -3.5425872802734375, |
|
"logits/rejected": -3.5473670959472656, |
|
"logps/chosen": -377.5634765625, |
|
"logps/rejected": -425.98016357421875, |
|
"loss": 0.4589, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.8434383273124695, |
|
"rewards/margins": 0.9450035095214844, |
|
"rewards/rejected": -1.7884416580200195, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 49.77750455648269, |
|
"learning_rate": 4.0664297498928534e-07, |
|
"logits/chosen": -3.6749653816223145, |
|
"logits/rejected": -3.6941752433776855, |
|
"logps/chosen": -392.3956604003906, |
|
"logps/rejected": -454.9180603027344, |
|
"loss": 0.4261, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5022857189178467, |
|
"rewards/margins": 1.0955121517181396, |
|
"rewards/rejected": -2.5977978706359863, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 49.72958773280421, |
|
"learning_rate": 3.975047544428254e-07, |
|
"logits/chosen": -3.7381629943847656, |
|
"logits/rejected": -3.768315076828003, |
|
"logps/chosen": -459.23455810546875, |
|
"logps/rejected": -531.2666625976562, |
|
"loss": 0.4262, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.8736200332641602, |
|
"rewards/margins": 1.138641119003296, |
|
"rewards/rejected": -3.012261152267456, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 37.39808184046548, |
|
"learning_rate": 3.880535945158997e-07, |
|
"logits/chosen": -3.7747676372528076, |
|
"logits/rejected": -3.7997519969940186, |
|
"logps/chosen": -367.59576416015625, |
|
"logps/rejected": -449.857177734375, |
|
"loss": 0.4127, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2651011943817139, |
|
"rewards/margins": 1.1386528015136719, |
|
"rewards/rejected": -2.403754234313965, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 47.294132664555036, |
|
"learning_rate": 3.78309546359696e-07, |
|
"logits/chosen": -3.9040164947509766, |
|
"logits/rejected": -3.91438627243042, |
|
"logps/chosen": -410.07159423828125, |
|
"logps/rejected": -498.1920471191406, |
|
"loss": 0.4235, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.4813024997711182, |
|
"rewards/margins": 1.1341646909713745, |
|
"rewards/rejected": -2.6154673099517822, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 40.77943292899045, |
|
"learning_rate": 3.6829328250375227e-07, |
|
"logits/chosen": -3.932398557662964, |
|
"logits/rejected": -4.020986080169678, |
|
"logps/chosen": -427.6490783691406, |
|
"logps/rejected": -508.22930908203125, |
|
"loss": 0.3809, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.581094741821289, |
|
"rewards/margins": 1.3298404216766357, |
|
"rewards/rejected": -2.910935401916504, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 41.2026716936047, |
|
"learning_rate": 3.580260529980584e-07, |
|
"logits/chosen": -4.01393461227417, |
|
"logits/rejected": -4.080017566680908, |
|
"logps/chosen": -387.573974609375, |
|
"logps/rejected": -487.1441345214844, |
|
"loss": 0.3859, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2360130548477173, |
|
"rewards/margins": 1.409085988998413, |
|
"rewards/rejected": -2.64509916305542, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 40.85253910181408, |
|
"learning_rate": 3.475296403299163e-07, |
|
"logits/chosen": -4.092155456542969, |
|
"logits/rejected": -4.100491523742676, |
|
"logps/chosen": -388.4344482421875, |
|
"logps/rejected": -494.84771728515625, |
|
"loss": 0.391, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4383682012557983, |
|
"rewards/margins": 1.5076179504394531, |
|
"rewards/rejected": -2.945986032485962, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 43.630879623871635, |
|
"learning_rate": 3.36826313211205e-07, |
|
"logits/chosen": -4.23541784286499, |
|
"logits/rejected": -4.378731727600098, |
|
"logps/chosen": -423.3770446777344, |
|
"logps/rejected": -521.7530517578125, |
|
"loss": 0.3784, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7406959533691406, |
|
"rewards/margins": 1.5629594326019287, |
|
"rewards/rejected": -3.3036551475524902, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 40.910837008073884, |
|
"learning_rate": 3.259387793340943e-07, |
|
"logits/chosen": -4.390842437744141, |
|
"logits/rejected": -4.466560363769531, |
|
"logps/chosen": -482.41748046875, |
|
"logps/rejected": -587.3478393554688, |
|
"loss": 0.3845, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.141714096069336, |
|
"rewards/margins": 1.4235177040100098, |
|
"rewards/rejected": -3.5652313232421875, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 43.846997204513016, |
|
"learning_rate": 3.14890137195437e-07, |
|
"logits/chosen": -4.389448642730713, |
|
"logits/rejected": -4.424112319946289, |
|
"logps/chosen": -481.9625549316406, |
|
"logps/rejected": -567.7194213867188, |
|
"loss": 0.3682, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1195263862609863, |
|
"rewards/margins": 1.2820372581481934, |
|
"rewards/rejected": -3.4015636444091797, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 48.89695189111101, |
|
"learning_rate": 3.0370382709204883e-07, |
|
"logits/chosen": -4.416136264801025, |
|
"logits/rejected": -4.519248962402344, |
|
"logps/chosen": -472.10931396484375, |
|
"logps/rejected": -609.0546264648438, |
|
"loss": 0.3879, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.9735286235809326, |
|
"rewards/margins": 1.7880016565322876, |
|
"rewards/rejected": -3.7615303993225098, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 49.97288424600573, |
|
"learning_rate": 2.9240358139084013e-07, |
|
"logits/chosen": -4.548556804656982, |
|
"logits/rejected": -4.62185001373291, |
|
"logps/chosen": -421.85052490234375, |
|
"logps/rejected": -534.3924560546875, |
|
"loss": 0.3824, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.734278917312622, |
|
"rewards/margins": 1.5305709838867188, |
|
"rewards/rejected": -3.264849901199341, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 42.794361716855846, |
|
"learning_rate": 2.810133741793052e-07, |
|
"logits/chosen": -4.521183490753174, |
|
"logits/rejected": -4.65042781829834, |
|
"logps/chosen": -472.19500732421875, |
|
"logps/rejected": -594.0242309570312, |
|
"loss": 0.3875, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.0321810245513916, |
|
"rewards/margins": 1.7259247303009033, |
|
"rewards/rejected": -3.758105516433716, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 43.28984360773978, |
|
"learning_rate": 2.695573704031885e-07, |
|
"logits/chosen": -4.522828102111816, |
|
"logits/rejected": -4.58953332901001, |
|
"logps/chosen": -470.6817321777344, |
|
"logps/rejected": -592.047119140625, |
|
"loss": 0.3733, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.8410589694976807, |
|
"rewards/margins": 1.6147441864013672, |
|
"rewards/rejected": -3.4558029174804688, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_logits/chosen": -4.407002925872803, |
|
"eval_logits/rejected": -4.5463128089904785, |
|
"eval_logps/chosen": -804.1498413085938, |
|
"eval_logps/rejected": -826.000732421875, |
|
"eval_loss": 0.9587702751159668, |
|
"eval_rewards/accuracies": 0.52734375, |
|
"eval_rewards/chosen": -4.247724533081055, |
|
"eval_rewards/margins": 0.28616809844970703, |
|
"eval_rewards/rejected": -4.53389310836792, |
|
"eval_runtime": 97.528, |
|
"eval_samples_per_second": 20.507, |
|
"eval_steps_per_second": 0.328, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 45.90321806356986, |
|
"learning_rate": 2.580598745992342e-07, |
|
"logits/chosen": -4.592051029205322, |
|
"logits/rejected": -4.729592800140381, |
|
"logps/chosen": -462.078857421875, |
|
"logps/rejected": -578.6028442382812, |
|
"loss": 0.3683, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6925300359725952, |
|
"rewards/margins": 1.7680349349975586, |
|
"rewards/rejected": -3.4605648517608643, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 45.69561704557737, |
|
"learning_rate": 2.465452793317865e-07, |
|
"logits/chosen": -4.680180549621582, |
|
"logits/rejected": -4.785167694091797, |
|
"logps/chosen": -477.2386779785156, |
|
"logps/rejected": -609.8983154296875, |
|
"loss": 0.3534, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8637679815292358, |
|
"rewards/margins": 1.7234246730804443, |
|
"rewards/rejected": -3.587193012237549, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 50.566593421325194, |
|
"learning_rate": 2.3503801344263344e-07, |
|
"logits/chosen": -4.860222339630127, |
|
"logits/rejected": -4.944591045379639, |
|
"logps/chosen": -456.22357177734375, |
|
"logps/rejected": -573.6387939453125, |
|
"loss": 0.3712, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.1773338317871094, |
|
"rewards/margins": 1.6265901327133179, |
|
"rewards/rejected": -3.8039238452911377, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 49.14997910435826, |
|
"learning_rate": 2.2356249022388789e-07, |
|
"logits/chosen": -4.710982322692871, |
|
"logits/rejected": -4.878017425537109, |
|
"logps/chosen": -468.77978515625, |
|
"logps/rejected": -580.1436157226562, |
|
"loss": 0.3706, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.8084490299224854, |
|
"rewards/margins": 1.884009599685669, |
|
"rewards/rejected": -3.6924586296081543, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 48.71640334228914, |
|
"learning_rate": 2.121430556238559e-07, |
|
"logits/chosen": -4.8175554275512695, |
|
"logits/rejected": -4.996689319610596, |
|
"logps/chosen": -471.17132568359375, |
|
"logps/rejected": -604.3876342773438, |
|
"loss": 0.3386, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.008169412612915, |
|
"rewards/margins": 1.8934684991836548, |
|
"rewards/rejected": -3.9016380310058594, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 39.742825593785554, |
|
"learning_rate": 2.0080393659578038e-07, |
|
"logits/chosen": -4.740202903747559, |
|
"logits/rejected": -5.004001617431641, |
|
"logps/chosen": -513.0294189453125, |
|
"logps/rejected": -656.8961181640625, |
|
"loss": 0.3492, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.400604248046875, |
|
"rewards/margins": 2.0355873107910156, |
|
"rewards/rejected": -4.436191558837891, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 50.20217799621439, |
|
"learning_rate": 1.895691896990388e-07, |
|
"logits/chosen": -4.822530269622803, |
|
"logits/rejected": -4.9560699462890625, |
|
"logps/chosen": -474.0023498535156, |
|
"logps/rejected": -579.5379638671875, |
|
"loss": 0.3354, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1355338096618652, |
|
"rewards/margins": 1.5200008153915405, |
|
"rewards/rejected": -3.655534267425537, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 43.0245343621866, |
|
"learning_rate": 1.7846265006183976e-07, |
|
"logits/chosen": -4.6783013343811035, |
|
"logits/rejected": -4.8442277908325195, |
|
"logps/chosen": -524.712158203125, |
|
"logps/rejected": -632.6328125, |
|
"loss": 0.3455, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.490675687789917, |
|
"rewards/margins": 1.6402429342269897, |
|
"rewards/rejected": -4.130918979644775, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 50.41005236551354, |
|
"learning_rate": 1.6750788081369948e-07, |
|
"logits/chosen": -4.760382175445557, |
|
"logits/rejected": -4.9731292724609375, |
|
"logps/chosen": -475.59765625, |
|
"logps/rejected": -638.8780517578125, |
|
"loss": 0.3533, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.0123469829559326, |
|
"rewards/margins": 2.291203737258911, |
|
"rewards/rejected": -4.303550720214844, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 41.2044329360216, |
|
"learning_rate": 1.5672812309497722e-07, |
|
"logits/chosen": -4.802388668060303, |
|
"logits/rejected": -4.928206443786621, |
|
"logps/chosen": -410.943115234375, |
|
"logps/rejected": -523.5797119140625, |
|
"loss": 0.3523, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.6920316219329834, |
|
"rewards/margins": 1.4624649286270142, |
|
"rewards/rejected": -3.154496669769287, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 44.69671736661672, |
|
"learning_rate": 1.461462467495284e-07, |
|
"logits/chosen": -4.758819580078125, |
|
"logits/rejected": -4.947306156158447, |
|
"logps/chosen": -495.4164123535156, |
|
"logps/rejected": -593.2689208984375, |
|
"loss": 0.3703, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.218562602996826, |
|
"rewards/margins": 1.439822793006897, |
|
"rewards/rejected": -3.6583850383758545, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 47.815462029236265, |
|
"learning_rate": 1.357847018050843e-07, |
|
"logits/chosen": -4.747325420379639, |
|
"logits/rejected": -4.958649158477783, |
|
"logps/chosen": -592.2781372070312, |
|
"logps/rejected": -758.1558837890625, |
|
"loss": 0.3513, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.605052947998047, |
|
"rewards/margins": 2.2088265419006348, |
|
"rewards/rejected": -4.81387996673584, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 49.07485622430381, |
|
"learning_rate": 1.2566547084429324e-07, |
|
"logits/chosen": -4.789057731628418, |
|
"logits/rejected": -5.023972988128662, |
|
"logps/chosen": -472.7372131347656, |
|
"logps/rejected": -615.0753784179688, |
|
"loss": 0.3591, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2092373371124268, |
|
"rewards/margins": 1.8947007656097412, |
|
"rewards/rejected": -4.103938579559326, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 45.44279309151153, |
|
"learning_rate": 1.1581002236747328e-07, |
|
"logits/chosen": -4.7014241218566895, |
|
"logits/rejected": -4.898508548736572, |
|
"logps/chosen": -470.0, |
|
"logps/rejected": -624.503662109375, |
|
"loss": 0.368, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.254678249359131, |
|
"rewards/margins": 2.110576868057251, |
|
"rewards/rejected": -4.365254878997803, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 53.087469451573526, |
|
"learning_rate": 1.062392652460177e-07, |
|
"logits/chosen": -4.696314811706543, |
|
"logits/rejected": -4.83748197555542, |
|
"logps/chosen": -505.9961853027344, |
|
"logps/rejected": -626.0050048828125, |
|
"loss": 0.3245, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.9974615573883057, |
|
"rewards/margins": 1.828150987625122, |
|
"rewards/rejected": -3.8256123065948486, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 41.5564187525106, |
|
"learning_rate": 9.697350436308427e-08, |
|
"logits/chosen": -4.737555027008057, |
|
"logits/rejected": -4.948160648345947, |
|
"logps/chosen": -507.66607666015625, |
|
"logps/rejected": -594.7119750976562, |
|
"loss": 0.3584, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.3149771690368652, |
|
"rewards/margins": 1.4441829919815063, |
|
"rewards/rejected": -3.759159803390503, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 44.45010163032659, |
|
"learning_rate": 8.803239753567829e-08, |
|
"logits/chosen": -4.868664741516113, |
|
"logits/rejected": -4.975742340087891, |
|
"logps/chosen": -469.70263671875, |
|
"logps/rejected": -589.5235595703125, |
|
"loss": 0.3498, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.1795949935913086, |
|
"rewards/margins": 1.7358201742172241, |
|
"rewards/rejected": -3.9154155254364014, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 38.027371906598745, |
|
"learning_rate": 7.943491380952188e-08, |
|
"logits/chosen": -4.954745292663574, |
|
"logits/rejected": -5.065755844116211, |
|
"logps/chosen": -429.4737243652344, |
|
"logps/rejected": -571.6141967773438, |
|
"loss": 0.3285, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.8640539646148682, |
|
"rewards/margins": 1.8129631280899048, |
|
"rewards/rejected": -3.6770172119140625, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 47.705363932547336, |
|
"learning_rate": 7.119929321518875e-08, |
|
"logits/chosen": -4.735751152038574, |
|
"logits/rejected": -5.044283866882324, |
|
"logps/chosen": -445.70367431640625, |
|
"logps/rejected": -583.5177612304688, |
|
"loss": 0.3436, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.9234256744384766, |
|
"rewards/margins": 1.9092767238616943, |
|
"rewards/rejected": -3.832702159881592, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 52.02157401864257, |
|
"learning_rate": 6.334300807088508e-08, |
|
"logits/chosen": -4.835855007171631, |
|
"logits/rejected": -5.021653652191162, |
|
"logps/chosen": -518.7296142578125, |
|
"logps/rejected": -639.1578369140625, |
|
"loss": 0.3689, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.530418634414673, |
|
"rewards/margins": 1.8332151174545288, |
|
"rewards/rejected": -4.363633155822754, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": -4.618500709533691, |
|
"eval_logits/rejected": -4.810915946960449, |
|
"eval_logps/chosen": -895.0606079101562, |
|
"eval_logps/rejected": -917.572265625, |
|
"eval_loss": 1.0593960285186768, |
|
"eval_rewards/accuracies": 0.5625, |
|
"eval_rewards/chosen": -5.156832218170166, |
|
"eval_rewards/margins": 0.2927757203578949, |
|
"eval_rewards/rejected": -5.449607849121094, |
|
"eval_runtime": 97.5523, |
|
"eval_samples_per_second": 20.502, |
|
"eval_steps_per_second": 0.328, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 50.197907161154795, |
|
"learning_rate": 5.588272591397336e-08, |
|
"logits/chosen": -4.786068916320801, |
|
"logits/rejected": -4.9945068359375, |
|
"logps/chosen": -485.31280517578125, |
|
"logps/rejected": -626.9613647460938, |
|
"loss": 0.3457, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.227161169052124, |
|
"rewards/margins": 2.0316004753112793, |
|
"rewards/rejected": -4.258761405944824, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 48.34581690217935, |
|
"learning_rate": 4.8834274139883084e-08, |
|
"logits/chosen": -4.855401039123535, |
|
"logits/rejected": -5.10861873626709, |
|
"logps/chosen": -469.53704833984375, |
|
"logps/rejected": -616.6036376953125, |
|
"loss": 0.3427, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.20617413520813, |
|
"rewards/margins": 1.9504497051239014, |
|
"rewards/rejected": -4.156623840332031, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 43.92355028448363, |
|
"learning_rate": 4.221260642342786e-08, |
|
"logits/chosen": -4.848982810974121, |
|
"logits/rejected": -4.972110271453857, |
|
"logps/chosen": -502.3720703125, |
|
"logps/rejected": -609.923828125, |
|
"loss": 0.3357, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.262599229812622, |
|
"rewards/margins": 1.6350996494293213, |
|
"rewards/rejected": -3.8976986408233643, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 48.906535443168416, |
|
"learning_rate": 3.60317709937693e-08, |
|
"logits/chosen": -4.723662853240967, |
|
"logits/rejected": -4.999013900756836, |
|
"logps/chosen": -511.27691650390625, |
|
"logps/rejected": -656.3660888671875, |
|
"loss": 0.361, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2201476097106934, |
|
"rewards/margins": 2.0478897094726562, |
|
"rewards/rejected": -4.26803731918335, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 38.024316440823085, |
|
"learning_rate": 3.030488083033273e-08, |
|
"logits/chosen": -4.8308892250061035, |
|
"logits/rejected": -5.002086162567139, |
|
"logps/chosen": -500.25579833984375, |
|
"logps/rejected": -703.7189331054688, |
|
"loss": 0.3434, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.2706263065338135, |
|
"rewards/margins": 2.5099949836730957, |
|
"rewards/rejected": -4.780620574951172, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 46.39394071999461, |
|
"learning_rate": 2.5044085842905683e-08, |
|
"logits/chosen": -4.880900859832764, |
|
"logits/rejected": -5.102107048034668, |
|
"logps/chosen": -522.2579345703125, |
|
"logps/rejected": -679.2675170898438, |
|
"loss": 0.375, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.569988965988159, |
|
"rewards/margins": 2.0331876277923584, |
|
"rewards/rejected": -4.603176593780518, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 44.94685644229088, |
|
"learning_rate": 2.0260547094942348e-08, |
|
"logits/chosen": -4.779486656188965, |
|
"logits/rejected": -4.989696502685547, |
|
"logps/chosen": -506.99920654296875, |
|
"logps/rejected": -654.0205688476562, |
|
"loss": 0.3649, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.142956495285034, |
|
"rewards/margins": 2.1117589473724365, |
|
"rewards/rejected": -4.254715919494629, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 44.46821814832556, |
|
"learning_rate": 1.5964413124758493e-08, |
|
"logits/chosen": -4.752711296081543, |
|
"logits/rejected": -4.939455986022949, |
|
"logps/chosen": -438.1668395996094, |
|
"logps/rejected": -598.4129028320312, |
|
"loss": 0.3357, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.8868913650512695, |
|
"rewards/margins": 1.9845082759857178, |
|
"rewards/rejected": -3.8714001178741455, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 49.902887693125585, |
|
"learning_rate": 1.2164798414854071e-08, |
|
"logits/chosen": -4.861344814300537, |
|
"logits/rejected": -4.92690896987915, |
|
"logps/chosen": -514.7852783203125, |
|
"logps/rejected": -692.4171142578125, |
|
"loss": 0.3343, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.289367437362671, |
|
"rewards/margins": 2.1290056705474854, |
|
"rewards/rejected": -4.418373107910156, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 46.82416143070829, |
|
"learning_rate": 8.869764055041501e-09, |
|
"logits/chosen": -4.845822811126709, |
|
"logits/rejected": -5.040514945983887, |
|
"logps/chosen": -476.73358154296875, |
|
"logps/rejected": -638.4616088867188, |
|
"loss": 0.3795, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.128450870513916, |
|
"rewards/margins": 1.8921457529067993, |
|
"rewards/rejected": -4.020596504211426, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 45.59470033908034, |
|
"learning_rate": 6.086300640404079e-09, |
|
"logits/chosen": -4.7604827880859375, |
|
"logits/rejected": -4.900928020477295, |
|
"logps/chosen": -530.9966430664062, |
|
"logps/rejected": -647.1216430664062, |
|
"loss": 0.3474, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.3973419666290283, |
|
"rewards/margins": 1.63766610622406, |
|
"rewards/rejected": -4.035007953643799, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 38.33546651783018, |
|
"learning_rate": 3.82031344036729e-09, |
|
"logits/chosen": -4.72461462020874, |
|
"logits/rejected": -4.975947380065918, |
|
"logps/chosen": -490.10650634765625, |
|
"logps/rejected": -617.1689453125, |
|
"loss": 0.3241, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.2612645626068115, |
|
"rewards/margins": 1.7519699335098267, |
|
"rewards/rejected": -4.013234615325928, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 48.183739221855284, |
|
"learning_rate": 2.0766098703477173e-09, |
|
"logits/chosen": -4.829585552215576, |
|
"logits/rejected": -5.007233619689941, |
|
"logps/chosen": -458.9842834472656, |
|
"logps/rejected": -589.1207275390625, |
|
"loss": 0.3698, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9293123483657837, |
|
"rewards/margins": 1.888193130493164, |
|
"rewards/rejected": -3.8175055980682373, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 47.43892343620843, |
|
"learning_rate": 8.588892925590063e-10, |
|
"logits/chosen": -4.886306285858154, |
|
"logits/rejected": -5.221497535705566, |
|
"logps/chosen": -480.17462158203125, |
|
"logps/rejected": -637.3043823242188, |
|
"loss": 0.3276, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.0167715549468994, |
|
"rewards/margins": 2.319883346557617, |
|
"rewards/rejected": -4.3366546630859375, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 48.471497735173386, |
|
"learning_rate": 1.6973516761317752e-10, |
|
"logits/chosen": -4.912912845611572, |
|
"logits/rejected": -5.170907020568848, |
|
"logps/chosen": -459.01531982421875, |
|
"logps/rejected": -576.1976318359375, |
|
"loss": 0.3505, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.2589094638824463, |
|
"rewards/margins": 1.6588354110717773, |
|
"rewards/rejected": -3.9177448749542236, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 379, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4399322837512537, |
|
"train_runtime": 5833.2156, |
|
"train_samples_per_second": 8.32, |
|
"train_steps_per_second": 0.065 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 379, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|