|
{ |
|
"best_metric": 0.9302791357040405, |
|
"best_model_checkpoint": "./output/dpo_output/10k_students_10k_stack/checkpoint/checkpoint-18000", |
|
"epoch": 0.8335262792313035, |
|
"eval_steps": 1000, |
|
"global_step": 18000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0046307015512850195, |
|
"grad_norm": 5.932480812072754, |
|
"learning_rate": 4.977772632553832e-05, |
|
"logits/chosen": -18.937969207763672, |
|
"logits/rejected": -17.256298065185547, |
|
"logps/chosen": -169.58087158203125, |
|
"logps/rejected": -168.01634216308594, |
|
"loss": 0.7752, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": -2.037126302719116, |
|
"rewards/margins": 0.4727743864059448, |
|
"rewards/rejected": -2.5099010467529297, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.009261403102570039, |
|
"grad_norm": 8.801850318908691, |
|
"learning_rate": 4.954850659874971e-05, |
|
"logits/chosen": -17.237207412719727, |
|
"logits/rejected": -15.479244232177734, |
|
"logps/chosen": -199.30752563476562, |
|
"logps/rejected": -186.26181030273438, |
|
"loss": 1.1298, |
|
"rewards/accuracies": 0.49000000953674316, |
|
"rewards/chosen": -4.508936882019043, |
|
"rewards/margins": -0.0808589830994606, |
|
"rewards/rejected": -4.428077697753906, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01389210465385506, |
|
"grad_norm": 102.89826202392578, |
|
"learning_rate": 4.931697152118546e-05, |
|
"logits/chosen": -17.642629623413086, |
|
"logits/rejected": -16.12238121032715, |
|
"logps/chosen": -188.68121337890625, |
|
"logps/rejected": -182.5252685546875, |
|
"loss": 0.8529, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": -2.8182811737060547, |
|
"rewards/margins": 0.3854120671749115, |
|
"rewards/rejected": -3.203693151473999, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.018522806205140078, |
|
"grad_norm": 0.0003582279896363616, |
|
"learning_rate": 4.9085436443621215e-05, |
|
"logits/chosen": -16.342151641845703, |
|
"logits/rejected": -14.690173149108887, |
|
"logps/chosen": -197.86817932128906, |
|
"logps/rejected": -199.99160766601562, |
|
"loss": 0.9249, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": -4.376795768737793, |
|
"rewards/margins": 0.645234227180481, |
|
"rewards/rejected": -5.022029876708984, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0231535077564251, |
|
"grad_norm": 0.7547457218170166, |
|
"learning_rate": 4.8853901366056956e-05, |
|
"logits/chosen": -14.315742492675781, |
|
"logits/rejected": -13.13943099975586, |
|
"logps/chosen": -226.77879333496094, |
|
"logps/rejected": -212.2596893310547, |
|
"loss": 1.1835, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": -7.502179145812988, |
|
"rewards/margins": 0.5412274599075317, |
|
"rewards/rejected": -8.043407440185547, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02778420930771012, |
|
"grad_norm": 0.09894751757383347, |
|
"learning_rate": 4.862236628849271e-05, |
|
"logits/chosen": -14.297338485717773, |
|
"logits/rejected": -13.270490646362305, |
|
"logps/chosen": -255.91860961914062, |
|
"logps/rejected": -263.9288024902344, |
|
"loss": 1.2753, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": -10.779191970825195, |
|
"rewards/margins": 1.2364416122436523, |
|
"rewards/rejected": -12.015632629394531, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.03241491085899514, |
|
"grad_norm": 18.43197250366211, |
|
"learning_rate": 4.839083121092846e-05, |
|
"logits/chosen": -14.56347370147705, |
|
"logits/rejected": -12.294930458068848, |
|
"logps/chosen": -234.3055877685547, |
|
"logps/rejected": -230.1753692626953, |
|
"loss": 1.085, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -8.342914581298828, |
|
"rewards/margins": 1.1828445196151733, |
|
"rewards/rejected": -9.525758743286133, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.037045612410280156, |
|
"grad_norm": 147.7353515625, |
|
"learning_rate": 4.8159296133364206e-05, |
|
"logits/chosen": -14.960326194763184, |
|
"logits/rejected": -12.486715316772461, |
|
"logps/chosen": -243.6907196044922, |
|
"logps/rejected": -258.2897033691406, |
|
"loss": 0.8159, |
|
"rewards/accuracies": 0.7200000286102295, |
|
"rewards/chosen": -8.589630126953125, |
|
"rewards/margins": 2.1720709800720215, |
|
"rewards/rejected": -10.761700630187988, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.041676313961565174, |
|
"grad_norm": 40.75476837158203, |
|
"learning_rate": 4.792776105579996e-05, |
|
"logits/chosen": -14.261970520019531, |
|
"logits/rejected": -13.42474365234375, |
|
"logps/chosen": -236.05096435546875, |
|
"logps/rejected": -245.3695831298828, |
|
"loss": 1.3291, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": -9.054373741149902, |
|
"rewards/margins": 1.2040209770202637, |
|
"rewards/rejected": -10.258395195007324, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.0463070155128502, |
|
"grad_norm": 41.665733337402344, |
|
"learning_rate": 4.76962259782357e-05, |
|
"logits/chosen": -13.747593879699707, |
|
"logits/rejected": -11.4396390914917, |
|
"logps/chosen": -267.20654296875, |
|
"logps/rejected": -263.12127685546875, |
|
"loss": 1.1523, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": -12.121853828430176, |
|
"rewards/margins": 1.9070380926132202, |
|
"rewards/rejected": -14.028891563415527, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0463070155128502, |
|
"eval_logits/chosen": -15.348400115966797, |
|
"eval_logits/rejected": -12.8839750289917, |
|
"eval_logps/chosen": -278.6545715332031, |
|
"eval_logps/rejected": -285.7479248046875, |
|
"eval_loss": 1.159505009651184, |
|
"eval_rewards/accuracies": 0.6658333539962769, |
|
"eval_rewards/chosen": -12.499542236328125, |
|
"eval_rewards/margins": 2.093540906906128, |
|
"eval_rewards/rejected": -14.593082427978516, |
|
"eval_runtime": 595.521, |
|
"eval_samples_per_second": 4.03, |
|
"eval_steps_per_second": 4.03, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05093771706413522, |
|
"grad_norm": 27.11232566833496, |
|
"learning_rate": 4.746469090067146e-05, |
|
"logits/chosen": -15.117622375488281, |
|
"logits/rejected": -13.129312515258789, |
|
"logps/chosen": -235.89825439453125, |
|
"logps/rejected": -241.9706573486328, |
|
"loss": 0.9035, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -9.738895416259766, |
|
"rewards/margins": 1.9915401935577393, |
|
"rewards/rejected": -11.730435371398926, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.05556841861542024, |
|
"grad_norm": 14.75066089630127, |
|
"learning_rate": 4.7247047927761054e-05, |
|
"logits/chosen": -13.134005546569824, |
|
"logits/rejected": -11.821113586425781, |
|
"logps/chosen": -537.9695434570312, |
|
"logps/rejected": -567.16259765625, |
|
"loss": 3.6509, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": -38.636722564697266, |
|
"rewards/margins": 2.336888551712036, |
|
"rewards/rejected": -40.97361373901367, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.060199120166705256, |
|
"grad_norm": 2.5833189487457275, |
|
"learning_rate": 4.701551285019681e-05, |
|
"logits/chosen": -14.083927154541016, |
|
"logits/rejected": -11.76788330078125, |
|
"logps/chosen": -228.8092041015625, |
|
"logps/rejected": -229.73764038085938, |
|
"loss": 0.9777, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": -8.000514030456543, |
|
"rewards/margins": 1.5612505674362183, |
|
"rewards/rejected": -9.56176471710205, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.06482982171799027, |
|
"grad_norm": 55.1929817199707, |
|
"learning_rate": 4.6783977772632556e-05, |
|
"logits/chosen": -13.045205116271973, |
|
"logits/rejected": -11.083745002746582, |
|
"logps/chosen": -280.2500915527344, |
|
"logps/rejected": -279.9684753417969, |
|
"loss": 1.1287, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -12.219869613647461, |
|
"rewards/margins": 2.250887393951416, |
|
"rewards/rejected": -14.470755577087402, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.06946052326927529, |
|
"grad_norm": 106.15294647216797, |
|
"learning_rate": 4.6552442695068304e-05, |
|
"logits/chosen": -14.938093185424805, |
|
"logits/rejected": -12.27778434753418, |
|
"logps/chosen": -229.48614501953125, |
|
"logps/rejected": -224.2559356689453, |
|
"loss": 0.8994, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": -8.494085311889648, |
|
"rewards/margins": 1.5291900634765625, |
|
"rewards/rejected": -10.023276329040527, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.07409122482056031, |
|
"grad_norm": 0.6163883209228516, |
|
"learning_rate": 4.632090761750406e-05, |
|
"logits/chosen": -11.895711898803711, |
|
"logits/rejected": -11.187520980834961, |
|
"logps/chosen": -250.82479858398438, |
|
"logps/rejected": -271.1986083984375, |
|
"loss": 1.1029, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": -10.7871675491333, |
|
"rewards/margins": 1.8207966089248657, |
|
"rewards/rejected": -12.607963562011719, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.07872192637184533, |
|
"grad_norm": 4.759420394897461, |
|
"learning_rate": 4.60893725399398e-05, |
|
"logits/chosen": -14.557934761047363, |
|
"logits/rejected": -12.280036926269531, |
|
"logps/chosen": -226.98574829101562, |
|
"logps/rejected": -264.96875, |
|
"loss": 0.9261, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": -8.311798095703125, |
|
"rewards/margins": 2.297745943069458, |
|
"rewards/rejected": -10.609543800354004, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.08335262792313035, |
|
"grad_norm": 112.9684066772461, |
|
"learning_rate": 4.5857837462375555e-05, |
|
"logits/chosen": -12.77873420715332, |
|
"logits/rejected": -10.884748458862305, |
|
"logps/chosen": -258.1977233886719, |
|
"logps/rejected": -291.1019287109375, |
|
"loss": 1.0586, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": -11.546738624572754, |
|
"rewards/margins": 1.946559190750122, |
|
"rewards/rejected": -13.493298530578613, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.08798332947441537, |
|
"grad_norm": 185.68812561035156, |
|
"learning_rate": 4.56263023848113e-05, |
|
"logits/chosen": -10.224660873413086, |
|
"logits/rejected": -8.805120468139648, |
|
"logps/chosen": -291.38720703125, |
|
"logps/rejected": -307.789306640625, |
|
"loss": 0.8702, |
|
"rewards/accuracies": 0.7300000190734863, |
|
"rewards/chosen": -13.615660667419434, |
|
"rewards/margins": 2.516305923461914, |
|
"rewards/rejected": -16.13196563720703, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.0926140310257004, |
|
"grad_norm": 1.0021706819534302, |
|
"learning_rate": 4.539476730724705e-05, |
|
"logits/chosen": -12.920204162597656, |
|
"logits/rejected": -10.876238822937012, |
|
"logps/chosen": -260.2502746582031, |
|
"logps/rejected": -249.31326293945312, |
|
"loss": 1.0427, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": -10.521868705749512, |
|
"rewards/margins": 1.398219347000122, |
|
"rewards/rejected": -11.920088768005371, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.0926140310257004, |
|
"eval_logits/chosen": -14.9287748336792, |
|
"eval_logits/rejected": -12.413740158081055, |
|
"eval_logps/chosen": -250.66151428222656, |
|
"eval_logps/rejected": -258.962890625, |
|
"eval_loss": 0.9348099827766418, |
|
"eval_rewards/accuracies": 0.6762499809265137, |
|
"eval_rewards/chosen": -9.700236320495605, |
|
"eval_rewards/margins": 2.2143468856811523, |
|
"eval_rewards/rejected": -11.914582252502441, |
|
"eval_runtime": 595.1862, |
|
"eval_samples_per_second": 4.032, |
|
"eval_steps_per_second": 4.032, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.09724473257698542, |
|
"grad_norm": 92.90179443359375, |
|
"learning_rate": 4.51632322296828e-05, |
|
"logits/chosen": -13.820252418518066, |
|
"logits/rejected": -11.60437297821045, |
|
"logps/chosen": -263.9429931640625, |
|
"logps/rejected": -266.4606018066406, |
|
"loss": 1.1237, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -11.721684455871582, |
|
"rewards/margins": 2.347269058227539, |
|
"rewards/rejected": -14.068955421447754, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.10187543412827044, |
|
"grad_norm": 1.9892809391021729, |
|
"learning_rate": 4.4931697152118546e-05, |
|
"logits/chosen": -16.49983787536621, |
|
"logits/rejected": -13.618443489074707, |
|
"logps/chosen": -261.19989013671875, |
|
"logps/rejected": -265.1740417480469, |
|
"loss": 1.0921, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": -8.718969345092773, |
|
"rewards/margins": 1.7608253955841064, |
|
"rewards/rejected": -10.4797945022583, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.10650613567955546, |
|
"grad_norm": 2.8146913051605225, |
|
"learning_rate": 4.4700162074554294e-05, |
|
"logits/chosen": -14.866826057434082, |
|
"logits/rejected": -12.879820823669434, |
|
"logps/chosen": -237.8057861328125, |
|
"logps/rejected": -248.6677703857422, |
|
"loss": 0.8993, |
|
"rewards/accuracies": 0.7200000286102295, |
|
"rewards/chosen": -9.813733100891113, |
|
"rewards/margins": 1.9094059467315674, |
|
"rewards/rejected": -11.723139762878418, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.11113683723084047, |
|
"grad_norm": 7.9283857345581055, |
|
"learning_rate": 4.446862699699005e-05, |
|
"logits/chosen": -15.892507553100586, |
|
"logits/rejected": -12.79761028289795, |
|
"logps/chosen": -240.3441925048828, |
|
"logps/rejected": -241.5165557861328, |
|
"loss": 0.7475, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": -8.371927261352539, |
|
"rewards/margins": 2.462991714477539, |
|
"rewards/rejected": -10.834918975830078, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.11576753878212549, |
|
"grad_norm": 17.73735237121582, |
|
"learning_rate": 4.4237091919425796e-05, |
|
"logits/chosen": -14.584887504577637, |
|
"logits/rejected": -12.091426849365234, |
|
"logps/chosen": -258.0081787109375, |
|
"logps/rejected": -260.34161376953125, |
|
"loss": 1.1756, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": -10.273959159851074, |
|
"rewards/margins": 1.984671950340271, |
|
"rewards/rejected": -12.258630752563477, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.12039824033341051, |
|
"grad_norm": 133.74703979492188, |
|
"learning_rate": 4.4005556841861544e-05, |
|
"logits/chosen": -13.465319633483887, |
|
"logits/rejected": -10.94394302368164, |
|
"logps/chosen": -266.9393310546875, |
|
"logps/rejected": -272.83087158203125, |
|
"loss": 1.1628, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": -10.949825286865234, |
|
"rewards/margins": 1.9796525239944458, |
|
"rewards/rejected": -12.92947769165039, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.12502894188469554, |
|
"grad_norm": 3.2051329612731934, |
|
"learning_rate": 4.377402176429729e-05, |
|
"logits/chosen": -13.998117446899414, |
|
"logits/rejected": -11.195394515991211, |
|
"logps/chosen": -265.32000732421875, |
|
"logps/rejected": -262.2739562988281, |
|
"loss": 0.98, |
|
"rewards/accuracies": 0.7200000286102295, |
|
"rewards/chosen": -10.569400787353516, |
|
"rewards/margins": 2.5353689193725586, |
|
"rewards/rejected": -13.10477066040039, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.12965964343598055, |
|
"grad_norm": 82.38189697265625, |
|
"learning_rate": 4.354248668673304e-05, |
|
"logits/chosen": -14.051268577575684, |
|
"logits/rejected": -11.348038673400879, |
|
"logps/chosen": -271.5502014160156, |
|
"logps/rejected": -274.1068115234375, |
|
"loss": 1.3392, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": -12.65804672241211, |
|
"rewards/margins": 2.2346200942993164, |
|
"rewards/rejected": -14.892668724060059, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.13429034498726558, |
|
"grad_norm": 0.8718699812889099, |
|
"learning_rate": 4.3310951609168794e-05, |
|
"logits/chosen": -13.869842529296875, |
|
"logits/rejected": -10.826061248779297, |
|
"logps/chosen": -257.593994140625, |
|
"logps/rejected": -272.55975341796875, |
|
"loss": 0.9159, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -10.715877532958984, |
|
"rewards/margins": 2.9527039527893066, |
|
"rewards/rejected": -13.668583869934082, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.13892104653855059, |
|
"grad_norm": 0.9077057242393494, |
|
"learning_rate": 4.3079416531604535e-05, |
|
"logits/chosen": -12.1543550491333, |
|
"logits/rejected": -10.162296295166016, |
|
"logps/chosen": -303.01409912109375, |
|
"logps/rejected": -313.34844970703125, |
|
"loss": 1.2139, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": -13.634842872619629, |
|
"rewards/margins": 2.48551082611084, |
|
"rewards/rejected": -16.12035369873047, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.13892104653855059, |
|
"eval_logits/chosen": -13.600151062011719, |
|
"eval_logits/rejected": -11.181111335754395, |
|
"eval_logps/chosen": -264.4333801269531, |
|
"eval_logps/rejected": -276.6205749511719, |
|
"eval_loss": 1.0894662141799927, |
|
"eval_rewards/accuracies": 0.6833333373069763, |
|
"eval_rewards/chosen": -11.077425956726074, |
|
"eval_rewards/margins": 2.6029253005981445, |
|
"eval_rewards/rejected": -13.680350303649902, |
|
"eval_runtime": 595.3502, |
|
"eval_samples_per_second": 4.031, |
|
"eval_steps_per_second": 4.031, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.14355174808983562, |
|
"grad_norm": 118.16087341308594, |
|
"learning_rate": 4.284788145404029e-05, |
|
"logits/chosen": -12.824311256408691, |
|
"logits/rejected": -10.598331451416016, |
|
"logps/chosen": -287.16229248046875, |
|
"logps/rejected": -296.9625244140625, |
|
"loss": 1.3294, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": -13.0645751953125, |
|
"rewards/margins": 2.751075029373169, |
|
"rewards/rejected": -15.815651893615723, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.14818244964112062, |
|
"grad_norm": 131.24530029296875, |
|
"learning_rate": 4.261634637647604e-05, |
|
"logits/chosen": -12.044851303100586, |
|
"logits/rejected": -10.322354316711426, |
|
"logps/chosen": -263.2828674316406, |
|
"logps/rejected": -278.4976806640625, |
|
"loss": 0.8734, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": -13.411654472351074, |
|
"rewards/margins": 2.831801652908325, |
|
"rewards/rejected": -16.24345588684082, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.15281315119240566, |
|
"grad_norm": 0.012756047770380974, |
|
"learning_rate": 4.2384811298911786e-05, |
|
"logits/chosen": -13.847241401672363, |
|
"logits/rejected": -10.280381202697754, |
|
"logps/chosen": -289.5423889160156, |
|
"logps/rejected": -274.0093994140625, |
|
"loss": 1.238, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": -11.928001403808594, |
|
"rewards/margins": 2.2357709407806396, |
|
"rewards/rejected": -14.163771629333496, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.15744385274369066, |
|
"grad_norm": 74.41487884521484, |
|
"learning_rate": 4.215327622134754e-05, |
|
"logits/chosen": -13.10720443725586, |
|
"logits/rejected": -10.8968505859375, |
|
"logps/chosen": -238.0747528076172, |
|
"logps/rejected": -262.4646301269531, |
|
"loss": 0.8596, |
|
"rewards/accuracies": 0.7400000095367432, |
|
"rewards/chosen": -10.469843864440918, |
|
"rewards/margins": 2.7977781295776367, |
|
"rewards/rejected": -13.267622947692871, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.1620745542949757, |
|
"grad_norm": 45.10124206542969, |
|
"learning_rate": 4.192174114378328e-05, |
|
"logits/chosen": -13.744721412658691, |
|
"logits/rejected": -11.368577003479004, |
|
"logps/chosen": -264.2241516113281, |
|
"logps/rejected": -258.3453063964844, |
|
"loss": 1.384, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -10.90272331237793, |
|
"rewards/margins": 1.433777928352356, |
|
"rewards/rejected": -12.33650016784668, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.1667052558462607, |
|
"grad_norm": 0.9729955196380615, |
|
"learning_rate": 4.1690206066219036e-05, |
|
"logits/chosen": -13.980158805847168, |
|
"logits/rejected": -11.490269660949707, |
|
"logps/chosen": -263.657958984375, |
|
"logps/rejected": -263.7289733886719, |
|
"loss": 0.8817, |
|
"rewards/accuracies": 0.6899999976158142, |
|
"rewards/chosen": -11.24023723602295, |
|
"rewards/margins": 2.3780357837677, |
|
"rewards/rejected": -13.61827278137207, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.17133595739754573, |
|
"grad_norm": 104.15093231201172, |
|
"learning_rate": 4.1458670988654784e-05, |
|
"logits/chosen": -16.399433135986328, |
|
"logits/rejected": -13.435510635375977, |
|
"logps/chosen": -229.3163299560547, |
|
"logps/rejected": -235.4849853515625, |
|
"loss": 1.0346, |
|
"rewards/accuracies": 0.6899999976158142, |
|
"rewards/chosen": -9.024942398071289, |
|
"rewards/margins": 2.8823766708374023, |
|
"rewards/rejected": -11.907319068908691, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.17596665894883073, |
|
"grad_norm": 0.09079485386610031, |
|
"learning_rate": 4.122713591109053e-05, |
|
"logits/chosen": -14.557435035705566, |
|
"logits/rejected": -12.38566780090332, |
|
"logps/chosen": -252.38616943359375, |
|
"logps/rejected": -257.95721435546875, |
|
"loss": 1.2015, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": -10.700638771057129, |
|
"rewards/margins": 2.269381523132324, |
|
"rewards/rejected": -12.970019340515137, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.18059736050011577, |
|
"grad_norm": 1.7486906051635742, |
|
"learning_rate": 4.099560083352628e-05, |
|
"logits/chosen": -16.514432907104492, |
|
"logits/rejected": -13.728776931762695, |
|
"logps/chosen": -254.0413055419922, |
|
"logps/rejected": -259.3268127441406, |
|
"loss": 0.902, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": -8.835321426391602, |
|
"rewards/margins": 2.317606210708618, |
|
"rewards/rejected": -11.15292739868164, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.1852280620514008, |
|
"grad_norm": 84.05282592773438, |
|
"learning_rate": 4.076406575596203e-05, |
|
"logits/chosen": -16.174575805664062, |
|
"logits/rejected": -12.256491661071777, |
|
"logps/chosen": -261.49627685546875, |
|
"logps/rejected": -262.43243408203125, |
|
"loss": 0.9042, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -10.012048721313477, |
|
"rewards/margins": 3.254835844039917, |
|
"rewards/rejected": -13.266884803771973, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1852280620514008, |
|
"eval_logits/chosen": -13.876004219055176, |
|
"eval_logits/rejected": -11.615591049194336, |
|
"eval_logps/chosen": -272.5106506347656, |
|
"eval_logps/rejected": -284.0415344238281, |
|
"eval_loss": 1.0893065929412842, |
|
"eval_rewards/accuracies": 0.6891666650772095, |
|
"eval_rewards/chosen": -11.885148048400879, |
|
"eval_rewards/margins": 2.5372982025146484, |
|
"eval_rewards/rejected": -14.422446250915527, |
|
"eval_runtime": 595.328, |
|
"eval_samples_per_second": 4.031, |
|
"eval_steps_per_second": 4.031, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1898587636026858, |
|
"grad_norm": 31.714935302734375, |
|
"learning_rate": 4.053253067839778e-05, |
|
"logits/chosen": -14.158173561096191, |
|
"logits/rejected": -11.79055404663086, |
|
"logps/chosen": -261.6609191894531, |
|
"logps/rejected": -266.7125244140625, |
|
"loss": 1.0238, |
|
"rewards/accuracies": 0.6899999976158142, |
|
"rewards/chosen": -10.325104713439941, |
|
"rewards/margins": 2.4658026695251465, |
|
"rewards/rejected": -12.790907859802246, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.19448946515397084, |
|
"grad_norm": 0.1167730987071991, |
|
"learning_rate": 4.030099560083353e-05, |
|
"logits/chosen": -13.809327125549316, |
|
"logits/rejected": -11.6063871383667, |
|
"logps/chosen": -293.43170166015625, |
|
"logps/rejected": -306.2814636230469, |
|
"loss": 1.0791, |
|
"rewards/accuracies": 0.7699999809265137, |
|
"rewards/chosen": -13.137182235717773, |
|
"rewards/margins": 3.19193172454834, |
|
"rewards/rejected": -16.329113006591797, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.19912016670525584, |
|
"grad_norm": 108.64598846435547, |
|
"learning_rate": 4.006946052326928e-05, |
|
"logits/chosen": -13.153284072875977, |
|
"logits/rejected": -11.617776870727539, |
|
"logps/chosen": -275.723876953125, |
|
"logps/rejected": -291.53851318359375, |
|
"loss": 1.2784, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": -11.406493186950684, |
|
"rewards/margins": 1.6062767505645752, |
|
"rewards/rejected": -13.012768745422363, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.20375086825654087, |
|
"grad_norm": 20.68520164489746, |
|
"learning_rate": 3.9837925445705025e-05, |
|
"logits/chosen": -11.738310813903809, |
|
"logits/rejected": -10.779892921447754, |
|
"logps/chosen": -269.1992492675781, |
|
"logps/rejected": -300.6120300292969, |
|
"loss": 0.964, |
|
"rewards/accuracies": 0.7300000190734863, |
|
"rewards/chosen": -12.769682884216309, |
|
"rewards/margins": 2.6736278533935547, |
|
"rewards/rejected": -15.443307876586914, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.20838156980782588, |
|
"grad_norm": 2.7133309841156006, |
|
"learning_rate": 3.960639036814078e-05, |
|
"logits/chosen": -12.766013145446777, |
|
"logits/rejected": -10.480879783630371, |
|
"logps/chosen": -344.195068359375, |
|
"logps/rejected": -360.9930725097656, |
|
"loss": 1.1572, |
|
"rewards/accuracies": 0.6899999976158142, |
|
"rewards/chosen": -18.676206588745117, |
|
"rewards/margins": 2.426331043243408, |
|
"rewards/rejected": -21.1025390625, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.2130122713591109, |
|
"grad_norm": 116.57975769042969, |
|
"learning_rate": 3.937485529057652e-05, |
|
"logits/chosen": -13.390982627868652, |
|
"logits/rejected": -11.077901840209961, |
|
"logps/chosen": -304.98016357421875, |
|
"logps/rejected": -315.5147399902344, |
|
"loss": 0.8711, |
|
"rewards/accuracies": 0.7900000214576721, |
|
"rewards/chosen": -13.933846473693848, |
|
"rewards/margins": 3.604663133621216, |
|
"rewards/rejected": -17.538511276245117, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.21764297291039592, |
|
"grad_norm": 0.0504024475812912, |
|
"learning_rate": 3.9143320213012276e-05, |
|
"logits/chosen": -13.784337997436523, |
|
"logits/rejected": -11.406414985656738, |
|
"logps/chosen": -289.4490661621094, |
|
"logps/rejected": -298.804443359375, |
|
"loss": 1.2294, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": -13.892451286315918, |
|
"rewards/margins": 3.4186654090881348, |
|
"rewards/rejected": -17.31111717224121, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.22227367446168095, |
|
"grad_norm": 99.90382385253906, |
|
"learning_rate": 3.891178513544802e-05, |
|
"logits/chosen": -14.787217140197754, |
|
"logits/rejected": -13.069499969482422, |
|
"logps/chosen": -251.93524169921875, |
|
"logps/rejected": -260.898681640625, |
|
"loss": 1.4711, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": -11.366158485412598, |
|
"rewards/margins": 1.8550819158554077, |
|
"rewards/rejected": -13.221240043640137, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.22690437601296595, |
|
"grad_norm": 46.121463775634766, |
|
"learning_rate": 3.868025005788377e-05, |
|
"logits/chosen": -16.073060989379883, |
|
"logits/rejected": -13.704066276550293, |
|
"logps/chosen": -268.179443359375, |
|
"logps/rejected": -277.36944580078125, |
|
"loss": 0.8299, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": -10.643136978149414, |
|
"rewards/margins": 3.1942529678344727, |
|
"rewards/rejected": -13.837389945983887, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.23153507756425099, |
|
"grad_norm": 215.08323669433594, |
|
"learning_rate": 3.844871498031952e-05, |
|
"logits/chosen": -14.96874713897705, |
|
"logits/rejected": -12.921075820922852, |
|
"logps/chosen": -268.0001525878906, |
|
"logps/rejected": -285.048583984375, |
|
"loss": 1.134, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": -12.303441047668457, |
|
"rewards/margins": 2.4722583293914795, |
|
"rewards/rejected": -14.7756986618042, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.23153507756425099, |
|
"eval_logits/chosen": -15.224173545837402, |
|
"eval_logits/rejected": -12.963865280151367, |
|
"eval_logps/chosen": -270.43438720703125, |
|
"eval_logps/rejected": -280.1809387207031, |
|
"eval_loss": 1.1040797233581543, |
|
"eval_rewards/accuracies": 0.6566666960716248, |
|
"eval_rewards/chosen": -11.67752742767334, |
|
"eval_rewards/margins": 2.3588619232177734, |
|
"eval_rewards/rejected": -14.036388397216797, |
|
"eval_runtime": 595.5539, |
|
"eval_samples_per_second": 4.03, |
|
"eval_steps_per_second": 4.03, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.23616577911553602, |
|
"grad_norm": 4.563509355648421e-05, |
|
"learning_rate": 3.821717990275527e-05, |
|
"logits/chosen": -15.249272346496582, |
|
"logits/rejected": -13.242805480957031, |
|
"logps/chosen": -243.7661590576172, |
|
"logps/rejected": -253.2401580810547, |
|
"loss": 1.1956, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": -9.967913627624512, |
|
"rewards/margins": 1.7604706287384033, |
|
"rewards/rejected": -11.728384017944336, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.24079648066682102, |
|
"grad_norm": 5.94889497756958, |
|
"learning_rate": 3.798564482519102e-05, |
|
"logits/chosen": -15.72038459777832, |
|
"logits/rejected": -12.727486610412598, |
|
"logps/chosen": -264.8755187988281, |
|
"logps/rejected": -278.22100830078125, |
|
"loss": 0.9918, |
|
"rewards/accuracies": 0.6899999976158142, |
|
"rewards/chosen": -11.164973258972168, |
|
"rewards/margins": 2.757608652114868, |
|
"rewards/rejected": -13.922581672668457, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.24542718221810605, |
|
"grad_norm": 58.313072204589844, |
|
"learning_rate": 3.775410974762676e-05, |
|
"logits/chosen": -15.393084526062012, |
|
"logits/rejected": -12.0960111618042, |
|
"logps/chosen": -262.96484375, |
|
"logps/rejected": -269.2939453125, |
|
"loss": 1.0712, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -11.024227142333984, |
|
"rewards/margins": 2.7153475284576416, |
|
"rewards/rejected": -13.739574432373047, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.2500578837693911, |
|
"grad_norm": 48.545108795166016, |
|
"learning_rate": 3.752257467006252e-05, |
|
"logits/chosen": -11.685456275939941, |
|
"logits/rejected": -10.465229034423828, |
|
"logps/chosen": -265.57440185546875, |
|
"logps/rejected": -289.9385986328125, |
|
"loss": 0.9366, |
|
"rewards/accuracies": 0.7300000190734863, |
|
"rewards/chosen": -13.04489517211914, |
|
"rewards/margins": 2.876924991607666, |
|
"rewards/rejected": -15.921817779541016, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.25468858532067606, |
|
"grad_norm": 149.3887481689453, |
|
"learning_rate": 3.7291039592498265e-05, |
|
"logits/chosen": -13.974499702453613, |
|
"logits/rejected": -12.667045593261719, |
|
"logps/chosen": -295.39923095703125, |
|
"logps/rejected": -316.9970397949219, |
|
"loss": 1.3305, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": -14.188154220581055, |
|
"rewards/margins": 2.394373655319214, |
|
"rewards/rejected": -16.5825252532959, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.2593192868719611, |
|
"grad_norm": 98.0199966430664, |
|
"learning_rate": 3.705950451493401e-05, |
|
"logits/chosen": -14.317160606384277, |
|
"logits/rejected": -12.36131477355957, |
|
"logps/chosen": -283.5146179199219, |
|
"logps/rejected": -290.797119140625, |
|
"loss": 1.2772, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": -13.607783317565918, |
|
"rewards/margins": 1.6482733488082886, |
|
"rewards/rejected": -15.25605583190918, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.26394998842324613, |
|
"grad_norm": 150.75015258789062, |
|
"learning_rate": 3.682796943736976e-05, |
|
"logits/chosen": -15.891270637512207, |
|
"logits/rejected": -13.70472526550293, |
|
"logps/chosen": -277.7356872558594, |
|
"logps/rejected": -277.91168212890625, |
|
"loss": 1.1193, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": -10.948705673217773, |
|
"rewards/margins": 2.005187749862671, |
|
"rewards/rejected": -12.953892707824707, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.26858068997453116, |
|
"grad_norm": 113.30935668945312, |
|
"learning_rate": 3.6596434359805515e-05, |
|
"logits/chosen": -14.558771133422852, |
|
"logits/rejected": -12.814410209655762, |
|
"logps/chosen": -282.6718444824219, |
|
"logps/rejected": -283.1080017089844, |
|
"loss": 1.442, |
|
"rewards/accuracies": 0.5899999737739563, |
|
"rewards/chosen": -13.232661247253418, |
|
"rewards/margins": 1.6263720989227295, |
|
"rewards/rejected": -14.859031677246094, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.27321139152581614, |
|
"grad_norm": 165.05665588378906, |
|
"learning_rate": 3.636489928224126e-05, |
|
"logits/chosen": -13.373592376708984, |
|
"logits/rejected": -12.674919128417969, |
|
"logps/chosen": -270.6869201660156, |
|
"logps/rejected": -289.0032958984375, |
|
"loss": 1.3157, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": -13.579002380371094, |
|
"rewards/margins": 1.3477805852890015, |
|
"rewards/rejected": -14.926780700683594, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.27784209307710117, |
|
"grad_norm": 28.589683532714844, |
|
"learning_rate": 3.613336420467701e-05, |
|
"logits/chosen": -14.952668190002441, |
|
"logits/rejected": -13.118420600891113, |
|
"logps/chosen": -267.6083679199219, |
|
"logps/rejected": -269.6361389160156, |
|
"loss": 0.9529, |
|
"rewards/accuracies": 0.5899999737739563, |
|
"rewards/chosen": -11.644050598144531, |
|
"rewards/margins": 1.825527310371399, |
|
"rewards/rejected": -13.46957778930664, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.27784209307710117, |
|
"eval_logits/chosen": -16.652177810668945, |
|
"eval_logits/rejected": -14.431635856628418, |
|
"eval_logps/chosen": -268.42169189453125, |
|
"eval_logps/rejected": -274.84332275390625, |
|
"eval_loss": 0.9690461158752441, |
|
"eval_rewards/accuracies": 0.6704166531562805, |
|
"eval_rewards/chosen": -11.476255416870117, |
|
"eval_rewards/margins": 2.0263733863830566, |
|
"eval_rewards/rejected": -13.502629280090332, |
|
"eval_runtime": 595.3843, |
|
"eval_samples_per_second": 4.031, |
|
"eval_steps_per_second": 4.031, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.2824727946283862, |
|
"grad_norm": 5.406942844390869, |
|
"learning_rate": 3.590182912711276e-05, |
|
"logits/chosen": -16.971216201782227, |
|
"logits/rejected": -14.750784873962402, |
|
"logps/chosen": -265.4952392578125, |
|
"logps/rejected": -268.4944152832031, |
|
"loss": 0.7441, |
|
"rewards/accuracies": 0.7200000286102295, |
|
"rewards/chosen": -12.107220649719238, |
|
"rewards/margins": 2.3485963344573975, |
|
"rewards/rejected": -14.455816268920898, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.28710349617967124, |
|
"grad_norm": 4.571298599243164, |
|
"learning_rate": 3.567029404954851e-05, |
|
"logits/chosen": -15.671191215515137, |
|
"logits/rejected": -14.07499885559082, |
|
"logps/chosen": -264.6024475097656, |
|
"logps/rejected": -272.0396423339844, |
|
"loss": 0.8241, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": -9.934189796447754, |
|
"rewards/margins": 2.5992603302001953, |
|
"rewards/rejected": -12.53345012664795, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.2917341977309562, |
|
"grad_norm": 153.76095581054688, |
|
"learning_rate": 3.543875897198426e-05, |
|
"logits/chosen": -16.08559226989746, |
|
"logits/rejected": -13.792272567749023, |
|
"logps/chosen": -255.55722045898438, |
|
"logps/rejected": -261.3247985839844, |
|
"loss": 0.8513, |
|
"rewards/accuracies": 0.6899999976158142, |
|
"rewards/chosen": -9.566289901733398, |
|
"rewards/margins": 2.1109235286712646, |
|
"rewards/rejected": -11.677214622497559, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.29636489928224125, |
|
"grad_norm": 2.1439866031869315e-05, |
|
"learning_rate": 3.520722389442e-05, |
|
"logits/chosen": -14.347354888916016, |
|
"logits/rejected": -12.357396125793457, |
|
"logps/chosen": -293.4591064453125, |
|
"logps/rejected": -306.54376220703125, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 0.8199999928474426, |
|
"rewards/chosen": -13.621291160583496, |
|
"rewards/margins": 4.165319919586182, |
|
"rewards/rejected": -17.786611557006836, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.3009956008335263, |
|
"grad_norm": 0.0006867141928523779, |
|
"learning_rate": 3.497568881685576e-05, |
|
"logits/chosen": -13.660686492919922, |
|
"logits/rejected": -12.209990501403809, |
|
"logps/chosen": -298.58905029296875, |
|
"logps/rejected": -312.8013000488281, |
|
"loss": 1.2808, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -14.760621070861816, |
|
"rewards/margins": 2.044278860092163, |
|
"rewards/rejected": -16.804899215698242, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.3056263023848113, |
|
"grad_norm": 0.06333109736442566, |
|
"learning_rate": 3.4744153739291505e-05, |
|
"logits/chosen": -15.21036148071289, |
|
"logits/rejected": -12.713698387145996, |
|
"logps/chosen": -328.4114990234375, |
|
"logps/rejected": -326.3374328613281, |
|
"loss": 1.0761, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": -14.99398422241211, |
|
"rewards/margins": 3.227440118789673, |
|
"rewards/rejected": -18.221426010131836, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.31025700393609634, |
|
"grad_norm": 13.536566734313965, |
|
"learning_rate": 3.451261866172725e-05, |
|
"logits/chosen": -14.803274154663086, |
|
"logits/rejected": -12.432111740112305, |
|
"logps/chosen": -308.0420837402344, |
|
"logps/rejected": -308.2493591308594, |
|
"loss": 1.6725, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": -15.077004432678223, |
|
"rewards/margins": 1.685246467590332, |
|
"rewards/rejected": -16.762248992919922, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.3148877054873813, |
|
"grad_norm": 98.5750503540039, |
|
"learning_rate": 3.428108358416301e-05, |
|
"logits/chosen": -16.611717224121094, |
|
"logits/rejected": -14.931644439697266, |
|
"logps/chosen": -242.56851196289062, |
|
"logps/rejected": -245.1646728515625, |
|
"loss": 1.2702, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": -9.661548614501953, |
|
"rewards/margins": 1.7692126035690308, |
|
"rewards/rejected": -11.430761337280273, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.31951840703866635, |
|
"grad_norm": 8.07032299041748, |
|
"learning_rate": 3.404954850659875e-05, |
|
"logits/chosen": -15.000127792358398, |
|
"logits/rejected": -12.639293670654297, |
|
"logps/chosen": -314.791259765625, |
|
"logps/rejected": -336.9305725097656, |
|
"loss": 1.106, |
|
"rewards/accuracies": 0.6899999976158142, |
|
"rewards/chosen": -15.519781112670898, |
|
"rewards/margins": 3.476163387298584, |
|
"rewards/rejected": -18.995946884155273, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.3241491085899514, |
|
"grad_norm": 18.31216049194336, |
|
"learning_rate": 3.38180134290345e-05, |
|
"logits/chosen": -15.47805404663086, |
|
"logits/rejected": -13.119900703430176, |
|
"logps/chosen": -250.51080322265625, |
|
"logps/rejected": -276.98236083984375, |
|
"loss": 0.8653, |
|
"rewards/accuracies": 0.7200000286102295, |
|
"rewards/chosen": -11.675602912902832, |
|
"rewards/margins": 3.392669916152954, |
|
"rewards/rejected": -15.068273544311523, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.3241491085899514, |
|
"eval_logits/chosen": -16.167076110839844, |
|
"eval_logits/rejected": -13.826526641845703, |
|
"eval_logps/chosen": -262.20208740234375, |
|
"eval_logps/rejected": -274.491455078125, |
|
"eval_loss": 1.0383297204971313, |
|
"eval_rewards/accuracies": 0.6933333277702332, |
|
"eval_rewards/chosen": -10.854294776916504, |
|
"eval_rewards/margins": 2.6131441593170166, |
|
"eval_rewards/rejected": -13.467439651489258, |
|
"eval_runtime": 595.5855, |
|
"eval_samples_per_second": 4.03, |
|
"eval_steps_per_second": 4.03, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.3287798101412364, |
|
"grad_norm": 8.22741985321045, |
|
"learning_rate": 3.358647835147025e-05, |
|
"logits/chosen": -16.461559295654297, |
|
"logits/rejected": -13.514617919921875, |
|
"logps/chosen": -277.9877014160156, |
|
"logps/rejected": -275.4588928222656, |
|
"loss": 1.0087, |
|
"rewards/accuracies": 0.7300000190734863, |
|
"rewards/chosen": -11.538105010986328, |
|
"rewards/margins": 2.5826308727264404, |
|
"rewards/rejected": -14.120736122131348, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.3334105116925214, |
|
"grad_norm": 53.40534210205078, |
|
"learning_rate": 3.3354943273906e-05, |
|
"logits/chosen": -15.52739143371582, |
|
"logits/rejected": -13.096809387207031, |
|
"logps/chosen": -263.75750732421875, |
|
"logps/rejected": -282.41973876953125, |
|
"loss": 0.9069, |
|
"rewards/accuracies": 0.7400000095367432, |
|
"rewards/chosen": -11.735355377197266, |
|
"rewards/margins": 3.6761889457702637, |
|
"rewards/rejected": -15.411545753479004, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.3380412132438064, |
|
"grad_norm": 3.0102253731456585e-05, |
|
"learning_rate": 3.3123408196341746e-05, |
|
"logits/chosen": -15.369466781616211, |
|
"logits/rejected": -13.38189697265625, |
|
"logps/chosen": -285.2251281738281, |
|
"logps/rejected": -295.077392578125, |
|
"loss": 1.3243, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": -13.272722244262695, |
|
"rewards/margins": 1.980672001838684, |
|
"rewards/rejected": -15.253395080566406, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.34267191479509146, |
|
"grad_norm": 28.470182418823242, |
|
"learning_rate": 3.2891873118777494e-05, |
|
"logits/chosen": -15.281991958618164, |
|
"logits/rejected": -13.268410682678223, |
|
"logps/chosen": -269.4937744140625, |
|
"logps/rejected": -277.6639709472656, |
|
"loss": 0.974, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": -11.336699485778809, |
|
"rewards/margins": 2.3826072216033936, |
|
"rewards/rejected": -13.719305038452148, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.3473026163463765, |
|
"grad_norm": 0.040093112736940384, |
|
"learning_rate": 3.266033804121324e-05, |
|
"logits/chosen": -15.32481575012207, |
|
"logits/rejected": -13.589777946472168, |
|
"logps/chosen": -289.1875305175781, |
|
"logps/rejected": -314.6361389160156, |
|
"loss": 1.0543, |
|
"rewards/accuracies": 0.7400000095367432, |
|
"rewards/chosen": -12.599404335021973, |
|
"rewards/margins": 3.6570980548858643, |
|
"rewards/rejected": -16.256502151489258, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.35193331789766147, |
|
"grad_norm": 117.9237289428711, |
|
"learning_rate": 3.2428802963649e-05, |
|
"logits/chosen": -13.89665412902832, |
|
"logits/rejected": -12.795659065246582, |
|
"logps/chosen": -273.2151794433594, |
|
"logps/rejected": -273.5435485839844, |
|
"loss": 1.6158, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -12.329755783081055, |
|
"rewards/margins": 1.4938496351242065, |
|
"rewards/rejected": -13.823604583740234, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.3565640194489465, |
|
"grad_norm": 107.03690338134766, |
|
"learning_rate": 3.2197267886084745e-05, |
|
"logits/chosen": -13.768420219421387, |
|
"logits/rejected": -12.064899444580078, |
|
"logps/chosen": -288.6653747558594, |
|
"logps/rejected": -299.8011779785156, |
|
"loss": 0.9657, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": -13.14587116241455, |
|
"rewards/margins": 2.6114940643310547, |
|
"rewards/rejected": -15.757366180419922, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.36119472100023153, |
|
"grad_norm": 13.562236785888672, |
|
"learning_rate": 3.196573280852049e-05, |
|
"logits/chosen": -14.771617889404297, |
|
"logits/rejected": -13.586108207702637, |
|
"logps/chosen": -243.8267822265625, |
|
"logps/rejected": -270.68585205078125, |
|
"loss": 1.0483, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": -10.424483299255371, |
|
"rewards/margins": 2.4393911361694336, |
|
"rewards/rejected": -12.863875389099121, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.36582542255151657, |
|
"grad_norm": 32.51217269897461, |
|
"learning_rate": 3.173419773095624e-05, |
|
"logits/chosen": -16.06950569152832, |
|
"logits/rejected": -14.431178092956543, |
|
"logps/chosen": -274.8634033203125, |
|
"logps/rejected": -273.82330322265625, |
|
"loss": 1.4736, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": -11.861929893493652, |
|
"rewards/margins": 1.6777317523956299, |
|
"rewards/rejected": -13.539660453796387, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.3704561241028016, |
|
"grad_norm": 3.5673348903656006, |
|
"learning_rate": 3.150266265339199e-05, |
|
"logits/chosen": -16.012493133544922, |
|
"logits/rejected": -13.561750411987305, |
|
"logps/chosen": -296.7126770019531, |
|
"logps/rejected": -307.0336608886719, |
|
"loss": 0.7154, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": -11.784126281738281, |
|
"rewards/margins": 3.737394094467163, |
|
"rewards/rejected": -15.521519660949707, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.3704561241028016, |
|
"eval_logits/chosen": -15.530220031738281, |
|
"eval_logits/rejected": -13.350693702697754, |
|
"eval_logps/chosen": -271.8744812011719, |
|
"eval_logps/rejected": -283.94451904296875, |
|
"eval_loss": 1.0585798025131226, |
|
"eval_rewards/accuracies": 0.6854166388511658, |
|
"eval_rewards/chosen": -11.821531295776367, |
|
"eval_rewards/margins": 2.59121036529541, |
|
"eval_rewards/rejected": -14.412742614746094, |
|
"eval_runtime": 595.2814, |
|
"eval_samples_per_second": 4.032, |
|
"eval_steps_per_second": 4.032, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.3750868256540866, |
|
"grad_norm": 3.183262825012207, |
|
"learning_rate": 3.127112757582774e-05, |
|
"logits/chosen": -16.365947723388672, |
|
"logits/rejected": -13.736886024475098, |
|
"logps/chosen": -274.3581237792969, |
|
"logps/rejected": -268.3993225097656, |
|
"loss": 1.0855, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": -11.214922904968262, |
|
"rewards/margins": 2.280700445175171, |
|
"rewards/rejected": -13.495623588562012, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.3797175272053716, |
|
"grad_norm": 131.38169860839844, |
|
"learning_rate": 3.1039592498263484e-05, |
|
"logits/chosen": -15.709159851074219, |
|
"logits/rejected": -12.860796928405762, |
|
"logps/chosen": -279.14532470703125, |
|
"logps/rejected": -292.4508361816406, |
|
"loss": 1.0698, |
|
"rewards/accuracies": 0.6899999976158142, |
|
"rewards/chosen": -12.19998550415039, |
|
"rewards/margins": 3.0295968055725098, |
|
"rewards/rejected": -15.229582786560059, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.38434822875665664, |
|
"grad_norm": 5.40769624710083, |
|
"learning_rate": 3.080805742069924e-05, |
|
"logits/chosen": -15.032764434814453, |
|
"logits/rejected": -12.852176666259766, |
|
"logps/chosen": -275.3847351074219, |
|
"logps/rejected": -290.7295227050781, |
|
"loss": 0.8273, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": -12.424884796142578, |
|
"rewards/margins": 3.084066390991211, |
|
"rewards/rejected": -15.508951187133789, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.3889789303079417, |
|
"grad_norm": 68.88822937011719, |
|
"learning_rate": 3.0576522343134986e-05, |
|
"logits/chosen": -14.54586410522461, |
|
"logits/rejected": -11.940185546875, |
|
"logps/chosen": -265.1244201660156, |
|
"logps/rejected": -278.01776123046875, |
|
"loss": 1.115, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": -11.760342597961426, |
|
"rewards/margins": 2.771430253982544, |
|
"rewards/rejected": -14.531773567199707, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.39360963185922665, |
|
"grad_norm": 0.03446720167994499, |
|
"learning_rate": 3.0344987265570734e-05, |
|
"logits/chosen": -15.201501846313477, |
|
"logits/rejected": -12.549232482910156, |
|
"logps/chosen": -283.1265869140625, |
|
"logps/rejected": -287.8479309082031, |
|
"loss": 1.0616, |
|
"rewards/accuracies": 0.6899999976158142, |
|
"rewards/chosen": -12.81374740600586, |
|
"rewards/margins": 2.7665340900421143, |
|
"rewards/rejected": -15.580282211303711, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.3982403334105117, |
|
"grad_norm": 161.14781188964844, |
|
"learning_rate": 3.0113452188006485e-05, |
|
"logits/chosen": -15.855225563049316, |
|
"logits/rejected": -13.235614776611328, |
|
"logps/chosen": -269.371337890625, |
|
"logps/rejected": -254.53370666503906, |
|
"loss": 1.5923, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": -11.234640121459961, |
|
"rewards/margins": 2.052427291870117, |
|
"rewards/rejected": -13.287066459655762, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.4028710349617967, |
|
"grad_norm": 0.12227249890565872, |
|
"learning_rate": 2.988191711044223e-05, |
|
"logits/chosen": -13.875014305114746, |
|
"logits/rejected": -11.776965141296387, |
|
"logps/chosen": -294.89019775390625, |
|
"logps/rejected": -304.0113220214844, |
|
"loss": 1.0747, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": -13.667197227478027, |
|
"rewards/margins": 2.635390043258667, |
|
"rewards/rejected": -16.302587509155273, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.40750173651308175, |
|
"grad_norm": 18.97279930114746, |
|
"learning_rate": 2.965038203287798e-05, |
|
"logits/chosen": -13.133578300476074, |
|
"logits/rejected": -11.885417938232422, |
|
"logps/chosen": -343.5578918457031, |
|
"logps/rejected": -364.3144836425781, |
|
"loss": 1.1518, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": -18.133079528808594, |
|
"rewards/margins": 2.30412220954895, |
|
"rewards/rejected": -20.43720245361328, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.4121324380643667, |
|
"grad_norm": 1.0884398221969604, |
|
"learning_rate": 2.9418846955313732e-05, |
|
"logits/chosen": -12.621952056884766, |
|
"logits/rejected": -10.378700256347656, |
|
"logps/chosen": -284.67498779296875, |
|
"logps/rejected": -298.4495849609375, |
|
"loss": 0.9243, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -13.914322853088379, |
|
"rewards/margins": 2.796579360961914, |
|
"rewards/rejected": -16.710901260375977, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.41676313961565176, |
|
"grad_norm": 83.71758270263672, |
|
"learning_rate": 2.918731187774948e-05, |
|
"logits/chosen": -13.866020202636719, |
|
"logits/rejected": -11.602230072021484, |
|
"logps/chosen": -294.79998779296875, |
|
"logps/rejected": -295.6817626953125, |
|
"loss": 1.2985, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -13.578290939331055, |
|
"rewards/margins": 2.169534206390381, |
|
"rewards/rejected": -15.747827529907227, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.41676313961565176, |
|
"eval_logits/chosen": -14.788281440734863, |
|
"eval_logits/rejected": -12.487042427062988, |
|
"eval_logps/chosen": -283.36846923828125, |
|
"eval_logps/rejected": -294.3752136230469, |
|
"eval_loss": 1.0318500995635986, |
|
"eval_rewards/accuracies": 0.6791666746139526, |
|
"eval_rewards/chosen": -12.970930099487305, |
|
"eval_rewards/margins": 2.4848814010620117, |
|
"eval_rewards/rejected": -15.455812454223633, |
|
"eval_runtime": 595.2306, |
|
"eval_samples_per_second": 4.032, |
|
"eval_steps_per_second": 4.032, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.4213938411669368, |
|
"grad_norm": 0.0032111050095409155, |
|
"learning_rate": 2.895577680018523e-05, |
|
"logits/chosen": -12.746328353881836, |
|
"logits/rejected": -11.675999641418457, |
|
"logps/chosen": -278.3330993652344, |
|
"logps/rejected": -295.795166015625, |
|
"loss": 1.163, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": -13.291616439819336, |
|
"rewards/margins": 1.9494922161102295, |
|
"rewards/rejected": -15.241106986999512, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.4260245427182218, |
|
"grad_norm": 6.636563777923584, |
|
"learning_rate": 2.8724241722620976e-05, |
|
"logits/chosen": -13.147544860839844, |
|
"logits/rejected": -10.731348037719727, |
|
"logps/chosen": -280.5777282714844, |
|
"logps/rejected": -294.8570556640625, |
|
"loss": 0.9186, |
|
"rewards/accuracies": 0.6899999976158142, |
|
"rewards/chosen": -13.029483795166016, |
|
"rewards/margins": 2.9215362071990967, |
|
"rewards/rejected": -15.951019287109375, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.43065524426950685, |
|
"grad_norm": 40.27287673950195, |
|
"learning_rate": 2.8492706645056727e-05, |
|
"logits/chosen": -14.783395767211914, |
|
"logits/rejected": -12.629622459411621, |
|
"logps/chosen": -288.9443054199219, |
|
"logps/rejected": -288.2258605957031, |
|
"loss": 1.5318, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -12.149921417236328, |
|
"rewards/margins": 1.6422406435012817, |
|
"rewards/rejected": -13.792163848876953, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.43528594582079183, |
|
"grad_norm": 19.758710861206055, |
|
"learning_rate": 2.8261171567492478e-05, |
|
"logits/chosen": -15.180052757263184, |
|
"logits/rejected": -12.836910247802734, |
|
"logps/chosen": -277.3511657714844, |
|
"logps/rejected": -276.3822326660156, |
|
"loss": 0.8826, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -11.489418983459473, |
|
"rewards/margins": 2.5768609046936035, |
|
"rewards/rejected": -14.066280364990234, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.43991664737207686, |
|
"grad_norm": 78.22869110107422, |
|
"learning_rate": 2.8029636489928223e-05, |
|
"logits/chosen": -14.848343849182129, |
|
"logits/rejected": -12.628809928894043, |
|
"logps/chosen": -265.226318359375, |
|
"logps/rejected": -287.7568054199219, |
|
"loss": 0.8833, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": -12.058309555053711, |
|
"rewards/margins": 3.1272084712982178, |
|
"rewards/rejected": -15.185519218444824, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.4445473489233619, |
|
"grad_norm": 11.517510414123535, |
|
"learning_rate": 2.7798101412363974e-05, |
|
"logits/chosen": -14.113899230957031, |
|
"logits/rejected": -13.288124084472656, |
|
"logps/chosen": -267.1159362792969, |
|
"logps/rejected": -288.9775695800781, |
|
"loss": 1.2618, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": -13.201350212097168, |
|
"rewards/margins": 2.0197267532348633, |
|
"rewards/rejected": -15.221077919006348, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.44917805047464693, |
|
"grad_norm": 0.0021336909849196672, |
|
"learning_rate": 2.7566566334799725e-05, |
|
"logits/chosen": -14.831360816955566, |
|
"logits/rejected": -12.230374336242676, |
|
"logps/chosen": -287.16168212890625, |
|
"logps/rejected": -288.4497375488281, |
|
"loss": 0.9279, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": -12.792760848999023, |
|
"rewards/margins": 2.720005989074707, |
|
"rewards/rejected": -15.512765884399414, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.4538087520259319, |
|
"grad_norm": 107.86576080322266, |
|
"learning_rate": 2.7335031257235473e-05, |
|
"logits/chosen": -14.760610580444336, |
|
"logits/rejected": -12.295888900756836, |
|
"logps/chosen": -286.934814453125, |
|
"logps/rejected": -295.39581298828125, |
|
"loss": 0.9565, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": -12.014842987060547, |
|
"rewards/margins": 3.125756025314331, |
|
"rewards/rejected": -15.140597343444824, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.45843945357721694, |
|
"grad_norm": 33.82047653198242, |
|
"learning_rate": 2.7103496179671224e-05, |
|
"logits/chosen": -15.7147798538208, |
|
"logits/rejected": -13.075119972229004, |
|
"logps/chosen": -277.96746826171875, |
|
"logps/rejected": -290.1826477050781, |
|
"loss": 0.7664, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -11.504081726074219, |
|
"rewards/margins": 3.1116833686828613, |
|
"rewards/rejected": -14.615765571594238, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.46307015512850197, |
|
"grad_norm": 36.2800407409668, |
|
"learning_rate": 2.687196110210697e-05, |
|
"logits/chosen": -15.890725135803223, |
|
"logits/rejected": -14.088423728942871, |
|
"logps/chosen": -265.2686462402344, |
|
"logps/rejected": -274.26129150390625, |
|
"loss": 1.115, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": -11.545877456665039, |
|
"rewards/margins": 2.241971015930176, |
|
"rewards/rejected": -13.787849426269531, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.46307015512850197, |
|
"eval_logits/chosen": -16.213720321655273, |
|
"eval_logits/rejected": -13.893430709838867, |
|
"eval_logps/chosen": -266.6280517578125, |
|
"eval_logps/rejected": -277.2411193847656, |
|
"eval_loss": 0.9608184695243835, |
|
"eval_rewards/accuracies": 0.6929166913032532, |
|
"eval_rewards/chosen": -11.296893119812012, |
|
"eval_rewards/margins": 2.4455130100250244, |
|
"eval_rewards/rejected": -13.742403984069824, |
|
"eval_runtime": 595.4169, |
|
"eval_samples_per_second": 4.031, |
|
"eval_steps_per_second": 4.031, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.467700856679787, |
|
"grad_norm": 19.66727066040039, |
|
"learning_rate": 2.664042602454272e-05, |
|
"logits/chosen": -15.256206512451172, |
|
"logits/rejected": -13.022037506103516, |
|
"logps/chosen": -262.89752197265625, |
|
"logps/rejected": -279.8008117675781, |
|
"loss": 1.2362, |
|
"rewards/accuracies": 0.7400000095367432, |
|
"rewards/chosen": -12.055992126464844, |
|
"rewards/margins": 2.9693291187286377, |
|
"rewards/rejected": -15.025321960449219, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.47233155823107204, |
|
"grad_norm": 117.48945617675781, |
|
"learning_rate": 2.640889094697847e-05, |
|
"logits/chosen": -15.561003684997559, |
|
"logits/rejected": -13.633736610412598, |
|
"logps/chosen": -261.46624755859375, |
|
"logps/rejected": -266.1573181152344, |
|
"loss": 1.265, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -12.366207122802734, |
|
"rewards/margins": 2.0377037525177, |
|
"rewards/rejected": -14.403912544250488, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.476962259782357, |
|
"grad_norm": 94.04891204833984, |
|
"learning_rate": 2.6177355869414215e-05, |
|
"logits/chosen": -14.99539566040039, |
|
"logits/rejected": -13.923880577087402, |
|
"logps/chosen": -283.6234436035156, |
|
"logps/rejected": -304.1859130859375, |
|
"loss": 1.0018, |
|
"rewards/accuracies": 0.7200000286102295, |
|
"rewards/chosen": -11.769340515136719, |
|
"rewards/margins": 2.5206191539764404, |
|
"rewards/rejected": -14.289960861206055, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.48159296133364204, |
|
"grad_norm": 170.051513671875, |
|
"learning_rate": 2.5945820791849967e-05, |
|
"logits/chosen": -14.865379333496094, |
|
"logits/rejected": -13.41491413116455, |
|
"logps/chosen": -291.048828125, |
|
"logps/rejected": -303.6175842285156, |
|
"loss": 1.5362, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": -13.860764503479004, |
|
"rewards/margins": 2.151341676712036, |
|
"rewards/rejected": -16.012107849121094, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.4862236628849271, |
|
"grad_norm": 185.730712890625, |
|
"learning_rate": 2.5714285714285714e-05, |
|
"logits/chosen": -16.53899383544922, |
|
"logits/rejected": -14.344609260559082, |
|
"logps/chosen": -253.43275451660156, |
|
"logps/rejected": -259.32000732421875, |
|
"loss": 0.8832, |
|
"rewards/accuracies": 0.7300000190734863, |
|
"rewards/chosen": -10.720047950744629, |
|
"rewards/margins": 2.231088876724243, |
|
"rewards/rejected": -12.95113754272461, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.4908543644362121, |
|
"grad_norm": 49.539283752441406, |
|
"learning_rate": 2.5482750636721466e-05, |
|
"logits/chosen": -15.784833908081055, |
|
"logits/rejected": -13.199982643127441, |
|
"logps/chosen": -258.3516845703125, |
|
"logps/rejected": -272.4159851074219, |
|
"loss": 0.882, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": -10.969993591308594, |
|
"rewards/margins": 3.263814687728882, |
|
"rewards/rejected": -14.233808517456055, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.4954850659874971, |
|
"grad_norm": 39.54684829711914, |
|
"learning_rate": 2.5251215559157217e-05, |
|
"logits/chosen": -12.431325912475586, |
|
"logits/rejected": -10.826537132263184, |
|
"logps/chosen": -282.0684814453125, |
|
"logps/rejected": -307.7406921386719, |
|
"loss": 0.834, |
|
"rewards/accuracies": 0.7200000286102295, |
|
"rewards/chosen": -13.165081977844238, |
|
"rewards/margins": 3.1749138832092285, |
|
"rewards/rejected": -16.339994430541992, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.5001157675387822, |
|
"grad_norm": 56.96760940551758, |
|
"learning_rate": 2.501968048159296e-05, |
|
"logits/chosen": -11.492157936096191, |
|
"logits/rejected": -10.02319049835205, |
|
"logps/chosen": -316.1195068359375, |
|
"logps/rejected": -327.64093017578125, |
|
"loss": 1.2565, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": -16.742006301879883, |
|
"rewards/margins": 2.2133688926696777, |
|
"rewards/rejected": -18.95537567138672, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.5047464690900672, |
|
"grad_norm": 18.938976287841797, |
|
"learning_rate": 2.4788145404028713e-05, |
|
"logits/chosen": -13.359079360961914, |
|
"logits/rejected": -11.073596954345703, |
|
"logps/chosen": -300.6905212402344, |
|
"logps/rejected": -313.9961853027344, |
|
"loss": 0.9312, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -15.300127029418945, |
|
"rewards/margins": 3.4490301609039307, |
|
"rewards/rejected": -18.749156951904297, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.5093771706413521, |
|
"grad_norm": 114.1439208984375, |
|
"learning_rate": 2.455661032646446e-05, |
|
"logits/chosen": -12.819533348083496, |
|
"logits/rejected": -11.759612083435059, |
|
"logps/chosen": -281.001708984375, |
|
"logps/rejected": -290.2948303222656, |
|
"loss": 1.247, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": -12.776802062988281, |
|
"rewards/margins": 2.128657817840576, |
|
"rewards/rejected": -14.9054594039917, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.5093771706413521, |
|
"eval_logits/chosen": -14.590534210205078, |
|
"eval_logits/rejected": -12.480382919311523, |
|
"eval_logps/chosen": -255.2227020263672, |
|
"eval_logps/rejected": -267.7187194824219, |
|
"eval_loss": 0.9643799066543579, |
|
"eval_rewards/accuracies": 0.6891666650772095, |
|
"eval_rewards/chosen": -10.15635871887207, |
|
"eval_rewards/margins": 2.6338088512420654, |
|
"eval_rewards/rejected": -12.790166854858398, |
|
"eval_runtime": 595.3576, |
|
"eval_samples_per_second": 4.031, |
|
"eval_steps_per_second": 4.031, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.5140078721926372, |
|
"grad_norm": 8.612520217895508, |
|
"learning_rate": 2.4325075248900208e-05, |
|
"logits/chosen": -14.783145904541016, |
|
"logits/rejected": -12.533705711364746, |
|
"logps/chosen": -256.94061279296875, |
|
"logps/rejected": -258.1162414550781, |
|
"loss": 1.0966, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": -9.744736671447754, |
|
"rewards/margins": 2.184854030609131, |
|
"rewards/rejected": -11.92959213256836, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.5186385737439222, |
|
"grad_norm": 0.05390896648168564, |
|
"learning_rate": 2.409354017133596e-05, |
|
"logits/chosen": -13.70276165008545, |
|
"logits/rejected": -11.86025619506836, |
|
"logps/chosen": -299.79901123046875, |
|
"logps/rejected": -312.7564392089844, |
|
"loss": 1.143, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": -14.02008056640625, |
|
"rewards/margins": 3.1246557235717773, |
|
"rewards/rejected": -17.144737243652344, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.5232692752952072, |
|
"grad_norm": 9.781183242797852, |
|
"learning_rate": 2.3862005093771707e-05, |
|
"logits/chosen": -13.585820198059082, |
|
"logits/rejected": -11.637076377868652, |
|
"logps/chosen": -319.637451171875, |
|
"logps/rejected": -331.197509765625, |
|
"loss": 1.268, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": -16.022397994995117, |
|
"rewards/margins": 2.7055411338806152, |
|
"rewards/rejected": -18.72793960571289, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.5278999768464923, |
|
"grad_norm": 0.5222776532173157, |
|
"learning_rate": 2.3630470016207455e-05, |
|
"logits/chosen": -15.22104263305664, |
|
"logits/rejected": -13.836353302001953, |
|
"logps/chosen": -299.341552734375, |
|
"logps/rejected": -308.09967041015625, |
|
"loss": 1.5426, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -14.346240043640137, |
|
"rewards/margins": 2.032839775085449, |
|
"rewards/rejected": -16.37908172607422, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.5325306783977772, |
|
"grad_norm": 8.653938293457031, |
|
"learning_rate": 2.3398934938643206e-05, |
|
"logits/chosen": -15.450213432312012, |
|
"logits/rejected": -13.667362213134766, |
|
"logps/chosen": -278.2622375488281, |
|
"logps/rejected": -309.5611572265625, |
|
"loss": 1.1489, |
|
"rewards/accuracies": 0.7400000095367432, |
|
"rewards/chosen": -12.77297592163086, |
|
"rewards/margins": 3.425985813140869, |
|
"rewards/rejected": -16.19896125793457, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.5371613799490623, |
|
"grad_norm": 0.0004895396414212883, |
|
"learning_rate": 2.3167399861078954e-05, |
|
"logits/chosen": -13.779914855957031, |
|
"logits/rejected": -12.196557998657227, |
|
"logps/chosen": -301.4092102050781, |
|
"logps/rejected": -328.33807373046875, |
|
"loss": 0.9485, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": -15.266792297363281, |
|
"rewards/margins": 3.053185224533081, |
|
"rewards/rejected": -18.319976806640625, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.5417920815003473, |
|
"grad_norm": 9.483084678649902, |
|
"learning_rate": 2.2935864783514705e-05, |
|
"logits/chosen": -13.6648588180542, |
|
"logits/rejected": -12.20712661743164, |
|
"logps/chosen": -316.0234069824219, |
|
"logps/rejected": -321.4068298339844, |
|
"loss": 1.0788, |
|
"rewards/accuracies": 0.7200000286102295, |
|
"rewards/chosen": -15.333993911743164, |
|
"rewards/margins": 3.2509260177612305, |
|
"rewards/rejected": -18.584922790527344, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.5464227830516323, |
|
"grad_norm": 0.00022494388394989073, |
|
"learning_rate": 2.2704329705950453e-05, |
|
"logits/chosen": -13.692609786987305, |
|
"logits/rejected": -11.759315490722656, |
|
"logps/chosen": -280.59149169921875, |
|
"logps/rejected": -304.33343505859375, |
|
"loss": 1.2103, |
|
"rewards/accuracies": 0.6899999976158142, |
|
"rewards/chosen": -13.981989860534668, |
|
"rewards/margins": 3.3563461303710938, |
|
"rewards/rejected": -17.338335037231445, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.5510534846029174, |
|
"grad_norm": 209.61117553710938, |
|
"learning_rate": 2.24727946283862e-05, |
|
"logits/chosen": -13.98275375366211, |
|
"logits/rejected": -12.51055908203125, |
|
"logps/chosen": -289.4585266113281, |
|
"logps/rejected": -311.2178955078125, |
|
"loss": 0.8208, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": -13.634072303771973, |
|
"rewards/margins": 2.5867316722869873, |
|
"rewards/rejected": -16.220806121826172, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.5556841861542023, |
|
"grad_norm": 8.679315567016602, |
|
"learning_rate": 2.224125955082195e-05, |
|
"logits/chosen": -15.948921203613281, |
|
"logits/rejected": -13.961400985717773, |
|
"logps/chosen": -239.7647247314453, |
|
"logps/rejected": -265.7496032714844, |
|
"loss": 0.8552, |
|
"rewards/accuracies": 0.7400000095367432, |
|
"rewards/chosen": -9.785449028015137, |
|
"rewards/margins": 3.496326208114624, |
|
"rewards/rejected": -13.281774520874023, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.5556841861542023, |
|
"eval_logits/chosen": -16.915624618530273, |
|
"eval_logits/rejected": -14.572998046875, |
|
"eval_logps/chosen": -249.591064453125, |
|
"eval_logps/rejected": -262.7091064453125, |
|
"eval_loss": 0.9432744383811951, |
|
"eval_rewards/accuracies": 0.6899999976158142, |
|
"eval_rewards/chosen": -9.593189239501953, |
|
"eval_rewards/margins": 2.6960136890411377, |
|
"eval_rewards/rejected": -12.289203643798828, |
|
"eval_runtime": 595.3295, |
|
"eval_samples_per_second": 4.031, |
|
"eval_steps_per_second": 4.031, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.5603148877054874, |
|
"grad_norm": 0.0012331042671576142, |
|
"learning_rate": 2.20097244732577e-05, |
|
"logits/chosen": -16.262012481689453, |
|
"logits/rejected": -13.913456916809082, |
|
"logps/chosen": -255.302734375, |
|
"logps/rejected": -258.9942626953125, |
|
"loss": 0.735, |
|
"rewards/accuracies": 0.7599999904632568, |
|
"rewards/chosen": -10.057559967041016, |
|
"rewards/margins": 2.793912410736084, |
|
"rewards/rejected": -12.851473808288574, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.5649455892567724, |
|
"grad_norm": 2.753217631834559e-05, |
|
"learning_rate": 2.1778189395693448e-05, |
|
"logits/chosen": -14.808769226074219, |
|
"logits/rejected": -13.047616958618164, |
|
"logps/chosen": -283.38848876953125, |
|
"logps/rejected": -289.1595764160156, |
|
"loss": 0.9373, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": -12.686135292053223, |
|
"rewards/margins": 2.9538357257843018, |
|
"rewards/rejected": -15.639970779418945, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.5695762908080574, |
|
"grad_norm": 104.78581237792969, |
|
"learning_rate": 2.1546654318129196e-05, |
|
"logits/chosen": -16.803150177001953, |
|
"logits/rejected": -13.628451347351074, |
|
"logps/chosen": -308.3251037597656, |
|
"logps/rejected": -310.8792419433594, |
|
"loss": 1.1489, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": -13.29080581665039, |
|
"rewards/margins": 3.165923833847046, |
|
"rewards/rejected": -16.45673179626465, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.5742069923593425, |
|
"grad_norm": 150.4987030029297, |
|
"learning_rate": 2.1315119240564947e-05, |
|
"logits/chosen": -17.27256965637207, |
|
"logits/rejected": -15.5678129196167, |
|
"logps/chosen": -275.7103271484375, |
|
"logps/rejected": -295.6195983886719, |
|
"loss": 1.068, |
|
"rewards/accuracies": 0.6600000262260437, |
|
"rewards/chosen": -12.30087661743164, |
|
"rewards/margins": 2.3663229942321777, |
|
"rewards/rejected": -14.667201042175293, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.5788376939106274, |
|
"grad_norm": 133.8286590576172, |
|
"learning_rate": 2.1083584163000698e-05, |
|
"logits/chosen": -16.757888793945312, |
|
"logits/rejected": -14.104558944702148, |
|
"logps/chosen": -300.20166015625, |
|
"logps/rejected": -296.3144836425781, |
|
"loss": 1.1399, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -13.082332611083984, |
|
"rewards/margins": 2.939077377319336, |
|
"rewards/rejected": -16.021408081054688, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.5834683954619124, |
|
"grad_norm": 0.008064119145274162, |
|
"learning_rate": 2.0852049085436446e-05, |
|
"logits/chosen": -16.91384506225586, |
|
"logits/rejected": -14.279358863830566, |
|
"logps/chosen": -287.9043273925781, |
|
"logps/rejected": -298.12799072265625, |
|
"loss": 1.1943, |
|
"rewards/accuracies": 0.7300000190734863, |
|
"rewards/chosen": -11.913217544555664, |
|
"rewards/margins": 3.4037926197052, |
|
"rewards/rejected": -15.317008972167969, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.5880990970131975, |
|
"grad_norm": 111.3694076538086, |
|
"learning_rate": 2.0620514007872194e-05, |
|
"logits/chosen": -15.357534408569336, |
|
"logits/rejected": -13.22323226928711, |
|
"logps/chosen": -286.35333251953125, |
|
"logps/rejected": -292.608642578125, |
|
"loss": 0.8879, |
|
"rewards/accuracies": 0.7599999904632568, |
|
"rewards/chosen": -13.575251579284668, |
|
"rewards/margins": 3.244809627532959, |
|
"rewards/rejected": -16.8200626373291, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.5927297985644825, |
|
"grad_norm": 0.017219742760062218, |
|
"learning_rate": 2.0388978930307942e-05, |
|
"logits/chosen": -17.543432235717773, |
|
"logits/rejected": -14.663466453552246, |
|
"logps/chosen": -289.28436279296875, |
|
"logps/rejected": -293.8553771972656, |
|
"loss": 1.1143, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": -12.925048828125, |
|
"rewards/margins": 3.078773260116577, |
|
"rewards/rejected": -16.003822326660156, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.5973605001157676, |
|
"grad_norm": 2.288099765777588, |
|
"learning_rate": 2.015744385274369e-05, |
|
"logits/chosen": -16.486202239990234, |
|
"logits/rejected": -13.849590301513672, |
|
"logps/chosen": -280.4773864746094, |
|
"logps/rejected": -303.1964111328125, |
|
"loss": 0.9143, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": -11.8477144241333, |
|
"rewards/margins": 3.704777717590332, |
|
"rewards/rejected": -15.55249309539795, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.6019912016670526, |
|
"grad_norm": 0.00016069311823230237, |
|
"learning_rate": 1.992590877517944e-05, |
|
"logits/chosen": -15.689356803894043, |
|
"logits/rejected": -13.36341667175293, |
|
"logps/chosen": -289.8409729003906, |
|
"logps/rejected": -305.40447998046875, |
|
"loss": 1.0207, |
|
"rewards/accuracies": 0.7400000095367432, |
|
"rewards/chosen": -13.258853912353516, |
|
"rewards/margins": 3.5280921459198, |
|
"rewards/rejected": -16.786945343017578, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.6019912016670526, |
|
"eval_logits/chosen": -14.765463829040527, |
|
"eval_logits/rejected": -12.787005424499512, |
|
"eval_logps/chosen": -291.5428466796875, |
|
"eval_logps/rejected": -308.170654296875, |
|
"eval_loss": 1.0707193613052368, |
|
"eval_rewards/accuracies": 0.6933333277702332, |
|
"eval_rewards/chosen": -13.788372039794922, |
|
"eval_rewards/margins": 3.046985626220703, |
|
"eval_rewards/rejected": -16.835355758666992, |
|
"eval_runtime": 595.1873, |
|
"eval_samples_per_second": 4.032, |
|
"eval_steps_per_second": 4.032, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.6066219032183375, |
|
"grad_norm": 0.046535275876522064, |
|
"learning_rate": 1.969437369761519e-05, |
|
"logits/chosen": -15.378928184509277, |
|
"logits/rejected": -12.789362907409668, |
|
"logps/chosen": -309.1998291015625, |
|
"logps/rejected": -323.6488037109375, |
|
"loss": 0.9008, |
|
"rewards/accuracies": 0.7699999809265137, |
|
"rewards/chosen": -13.331801414489746, |
|
"rewards/margins": 3.8759450912475586, |
|
"rewards/rejected": -17.207748413085938, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.6112526047696226, |
|
"grad_norm": 1.3174071311950684, |
|
"learning_rate": 1.946283862005094e-05, |
|
"logits/chosen": -14.426813125610352, |
|
"logits/rejected": -12.497895240783691, |
|
"logps/chosen": -283.8373718261719, |
|
"logps/rejected": -290.7317199707031, |
|
"loss": 1.187, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -12.359267234802246, |
|
"rewards/margins": 3.016118049621582, |
|
"rewards/rejected": -15.375384330749512, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.6158833063209076, |
|
"grad_norm": 100.655517578125, |
|
"learning_rate": 1.9231303542486688e-05, |
|
"logits/chosen": -15.149957656860352, |
|
"logits/rejected": -14.034040451049805, |
|
"logps/chosen": -258.75225830078125, |
|
"logps/rejected": -281.8547668457031, |
|
"loss": 1.4425, |
|
"rewards/accuracies": 0.6899999976158142, |
|
"rewards/chosen": -10.893335342407227, |
|
"rewards/margins": 2.794825553894043, |
|
"rewards/rejected": -13.688159942626953, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.6205140078721927, |
|
"grad_norm": 15.705586433410645, |
|
"learning_rate": 1.899976846492244e-05, |
|
"logits/chosen": -15.395785331726074, |
|
"logits/rejected": -13.503198623657227, |
|
"logps/chosen": -251.60552978515625, |
|
"logps/rejected": -264.2630920410156, |
|
"loss": 1.0821, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": -9.937586784362793, |
|
"rewards/margins": 2.0965304374694824, |
|
"rewards/rejected": -12.034117698669434, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.6251447094234777, |
|
"grad_norm": 69.60533905029297, |
|
"learning_rate": 1.8768233387358187e-05, |
|
"logits/chosen": -16.102800369262695, |
|
"logits/rejected": -13.624198913574219, |
|
"logps/chosen": -270.32696533203125, |
|
"logps/rejected": -277.168701171875, |
|
"loss": 1.3105, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": -10.82644271850586, |
|
"rewards/margins": 2.253209352493286, |
|
"rewards/rejected": -13.079649925231934, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.6297754109747626, |
|
"grad_norm": 1.8112232282874174e-05, |
|
"learning_rate": 1.8536698309793935e-05, |
|
"logits/chosen": -15.864636421203613, |
|
"logits/rejected": -13.777695655822754, |
|
"logps/chosen": -267.03643798828125, |
|
"logps/rejected": -290.5374755859375, |
|
"loss": 0.8846, |
|
"rewards/accuracies": 0.7400000095367432, |
|
"rewards/chosen": -11.370699882507324, |
|
"rewards/margins": 2.5179872512817383, |
|
"rewards/rejected": -13.888686180114746, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.6344061125260477, |
|
"grad_norm": 3.8680782318115234, |
|
"learning_rate": 1.8305163232229682e-05, |
|
"logits/chosen": -14.683868408203125, |
|
"logits/rejected": -13.11482048034668, |
|
"logps/chosen": -297.38006591796875, |
|
"logps/rejected": -313.8203125, |
|
"loss": 1.0431, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -14.36437702178955, |
|
"rewards/margins": 2.6509974002838135, |
|
"rewards/rejected": -17.01537322998047, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.6390368140773327, |
|
"grad_norm": 8.920862197875977, |
|
"learning_rate": 1.8073628154665434e-05, |
|
"logits/chosen": -14.917624473571777, |
|
"logits/rejected": -13.442770957946777, |
|
"logps/chosen": -280.73907470703125, |
|
"logps/rejected": -293.82025146484375, |
|
"loss": 1.152, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": -12.922651290893555, |
|
"rewards/margins": 2.584465265274048, |
|
"rewards/rejected": -15.507116317749023, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.6436675156286177, |
|
"grad_norm": 0.016613028943538666, |
|
"learning_rate": 1.784209307710118e-05, |
|
"logits/chosen": -14.548389434814453, |
|
"logits/rejected": -12.599615097045898, |
|
"logps/chosen": -251.54917907714844, |
|
"logps/rejected": -274.4599914550781, |
|
"loss": 1.1302, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": -11.875404357910156, |
|
"rewards/margins": 2.626995801925659, |
|
"rewards/rejected": -14.502399444580078, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.6482982171799028, |
|
"grad_norm": 1.4019454717636108, |
|
"learning_rate": 1.761055799953693e-05, |
|
"logits/chosen": -15.579767227172852, |
|
"logits/rejected": -13.13071060180664, |
|
"logps/chosen": -302.3855895996094, |
|
"logps/rejected": -302.752197265625, |
|
"loss": 1.1515, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": -12.90218734741211, |
|
"rewards/margins": 2.74299693107605, |
|
"rewards/rejected": -15.645181655883789, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.6482982171799028, |
|
"eval_logits/chosen": -14.666524887084961, |
|
"eval_logits/rejected": -12.669315338134766, |
|
"eval_logps/chosen": -285.93170166015625, |
|
"eval_logps/rejected": -303.45745849609375, |
|
"eval_loss": 0.9831567406654358, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": -13.227254867553711, |
|
"eval_rewards/margins": 3.13678240776062, |
|
"eval_rewards/rejected": -16.36404037475586, |
|
"eval_runtime": 595.2283, |
|
"eval_samples_per_second": 4.032, |
|
"eval_steps_per_second": 4.032, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.6529289187311877, |
|
"grad_norm": 0.014233123511075974, |
|
"learning_rate": 1.737902292197268e-05, |
|
"logits/chosen": -14.30229377746582, |
|
"logits/rejected": -12.721465110778809, |
|
"logps/chosen": -298.66680908203125, |
|
"logps/rejected": -312.6944274902344, |
|
"loss": 1.4831, |
|
"rewards/accuracies": 0.6200000047683716, |
|
"rewards/chosen": -14.223825454711914, |
|
"rewards/margins": 1.9816741943359375, |
|
"rewards/rejected": -16.20549964904785, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.6575596202824728, |
|
"grad_norm": 13.379115104675293, |
|
"learning_rate": 1.714748784440843e-05, |
|
"logits/chosen": -12.73404312133789, |
|
"logits/rejected": -12.431059837341309, |
|
"logps/chosen": -303.9568786621094, |
|
"logps/rejected": -315.571533203125, |
|
"loss": 1.5352, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": -15.876431465148926, |
|
"rewards/margins": 1.6310337781906128, |
|
"rewards/rejected": -17.507463455200195, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.6621903218337578, |
|
"grad_norm": 161.47584533691406, |
|
"learning_rate": 1.691595276684418e-05, |
|
"logits/chosen": -15.389359474182129, |
|
"logits/rejected": -12.315173149108887, |
|
"logps/chosen": -290.67950439453125, |
|
"logps/rejected": -285.4556579589844, |
|
"loss": 0.9067, |
|
"rewards/accuracies": 0.7400000095367432, |
|
"rewards/chosen": -12.797494888305664, |
|
"rewards/margins": 3.7715113162994385, |
|
"rewards/rejected": -16.569007873535156, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.6668210233850428, |
|
"grad_norm": 46.53482437133789, |
|
"learning_rate": 1.6684417689279927e-05, |
|
"logits/chosen": -14.485678672790527, |
|
"logits/rejected": -12.454192161560059, |
|
"logps/chosen": -311.9996032714844, |
|
"logps/rejected": -307.30157470703125, |
|
"loss": 1.1361, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": -14.843450546264648, |
|
"rewards/margins": 2.6856822967529297, |
|
"rewards/rejected": -17.529132843017578, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.6714517249363279, |
|
"grad_norm": 26.74111557006836, |
|
"learning_rate": 1.6452882611715675e-05, |
|
"logits/chosen": -15.238399505615234, |
|
"logits/rejected": -13.172194480895996, |
|
"logps/chosen": -277.8358459472656, |
|
"logps/rejected": -284.4881286621094, |
|
"loss": 0.929, |
|
"rewards/accuracies": 0.7200000286102295, |
|
"rewards/chosen": -12.725804328918457, |
|
"rewards/margins": 2.9910831451416016, |
|
"rewards/rejected": -15.71688461303711, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.6760824264876129, |
|
"grad_norm": 11.716771125793457, |
|
"learning_rate": 1.6221347534151423e-05, |
|
"logits/chosen": -14.936090469360352, |
|
"logits/rejected": -13.450495719909668, |
|
"logps/chosen": -270.56854248046875, |
|
"logps/rejected": -291.61248779296875, |
|
"loss": 1.0162, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -12.403809547424316, |
|
"rewards/margins": 2.691650390625, |
|
"rewards/rejected": -15.09546184539795, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.6807131280388979, |
|
"grad_norm": 36.412960052490234, |
|
"learning_rate": 1.5989812456587174e-05, |
|
"logits/chosen": -14.691692352294922, |
|
"logits/rejected": -12.6322021484375, |
|
"logps/chosen": -292.3916931152344, |
|
"logps/rejected": -296.074951171875, |
|
"loss": 1.5685, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": -14.284167289733887, |
|
"rewards/margins": 2.3777682781219482, |
|
"rewards/rejected": -16.66193389892578, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.6853438295901829, |
|
"grad_norm": 24.627042770385742, |
|
"learning_rate": 1.5758277379022922e-05, |
|
"logits/chosen": -13.739795684814453, |
|
"logits/rejected": -12.348119735717773, |
|
"logps/chosen": -292.4388732910156, |
|
"logps/rejected": -313.0835876464844, |
|
"loss": 0.9656, |
|
"rewards/accuracies": 0.7400000095367432, |
|
"rewards/chosen": -15.461133003234863, |
|
"rewards/margins": 3.123561143875122, |
|
"rewards/rejected": -18.584693908691406, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.6899745311414679, |
|
"grad_norm": 99.29113006591797, |
|
"learning_rate": 1.552674230145867e-05, |
|
"logits/chosen": -14.539809226989746, |
|
"logits/rejected": -12.484415054321289, |
|
"logps/chosen": -308.2098083496094, |
|
"logps/rejected": -323.6732482910156, |
|
"loss": 1.0316, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": -15.086298942565918, |
|
"rewards/margins": 3.069261074066162, |
|
"rewards/rejected": -18.155559539794922, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.694605232692753, |
|
"grad_norm": 7.5167412757873535, |
|
"learning_rate": 1.529520722389442e-05, |
|
"logits/chosen": -13.50758171081543, |
|
"logits/rejected": -12.50390625, |
|
"logps/chosen": -277.1807861328125, |
|
"logps/rejected": -310.3955993652344, |
|
"loss": 1.0758, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": -14.185407638549805, |
|
"rewards/margins": 3.0528149604797363, |
|
"rewards/rejected": -17.238222122192383, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.694605232692753, |
|
"eval_logits/chosen": -13.796860694885254, |
|
"eval_logits/rejected": -11.833579063415527, |
|
"eval_logps/chosen": -290.43701171875, |
|
"eval_logps/rejected": -308.2549743652344, |
|
"eval_loss": 0.9895668625831604, |
|
"eval_rewards/accuracies": 0.7108333110809326, |
|
"eval_rewards/chosen": -13.677786827087402, |
|
"eval_rewards/margins": 3.1660029888153076, |
|
"eval_rewards/rejected": -16.84379005432129, |
|
"eval_runtime": 595.1302, |
|
"eval_samples_per_second": 4.033, |
|
"eval_steps_per_second": 4.033, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.699235934244038, |
|
"grad_norm": 1.053455114364624, |
|
"learning_rate": 1.506367214633017e-05, |
|
"logits/chosen": -12.791362762451172, |
|
"logits/rejected": -10.7622709274292, |
|
"logps/chosen": -290.3780212402344, |
|
"logps/rejected": -300.4528503417969, |
|
"loss": 0.8474, |
|
"rewards/accuracies": 0.7699999809265137, |
|
"rewards/chosen": -14.130489349365234, |
|
"rewards/margins": 3.402808427810669, |
|
"rewards/rejected": -17.533300399780273, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.7038666357953229, |
|
"grad_norm": 87.40723419189453, |
|
"learning_rate": 1.4834452419541562e-05, |
|
"logits/chosen": -13.96704387664795, |
|
"logits/rejected": -11.846227645874023, |
|
"logps/chosen": -267.1881103515625, |
|
"logps/rejected": -279.21661376953125, |
|
"loss": 0.9192, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": -12.996103286743164, |
|
"rewards/margins": 2.6929244995117188, |
|
"rewards/rejected": -15.689026832580566, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.708497337346608, |
|
"grad_norm": 0.009607501327991486, |
|
"learning_rate": 1.460291734197731e-05, |
|
"logits/chosen": -14.558998107910156, |
|
"logits/rejected": -12.206707954406738, |
|
"logps/chosen": -270.8651428222656, |
|
"logps/rejected": -293.0597839355469, |
|
"loss": 1.0426, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -12.181621551513672, |
|
"rewards/margins": 3.268308162689209, |
|
"rewards/rejected": -15.449930191040039, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.713128038897893, |
|
"grad_norm": 2.457125186920166, |
|
"learning_rate": 1.4371382264413058e-05, |
|
"logits/chosen": -15.273738861083984, |
|
"logits/rejected": -11.646648406982422, |
|
"logps/chosen": -288.58837890625, |
|
"logps/rejected": -276.04229736328125, |
|
"loss": 0.979, |
|
"rewards/accuracies": 0.7400000095367432, |
|
"rewards/chosen": -12.105132102966309, |
|
"rewards/margins": 3.0485777854919434, |
|
"rewards/rejected": -15.153708457946777, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.7177587404491781, |
|
"grad_norm": 0.34003034234046936, |
|
"learning_rate": 1.4139847186848809e-05, |
|
"logits/chosen": -16.237367630004883, |
|
"logits/rejected": -13.76130485534668, |
|
"logps/chosen": -256.4635925292969, |
|
"logps/rejected": -282.35308837890625, |
|
"loss": 0.9507, |
|
"rewards/accuracies": 0.7400000095367432, |
|
"rewards/chosen": -10.752126693725586, |
|
"rewards/margins": 3.626908540725708, |
|
"rewards/rejected": -14.379035949707031, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.7223894420004631, |
|
"grad_norm": 146.7932891845703, |
|
"learning_rate": 1.3908312109284558e-05, |
|
"logits/chosen": -15.454626083374023, |
|
"logits/rejected": -13.428391456604004, |
|
"logps/chosen": -283.355224609375, |
|
"logps/rejected": -297.4376525878906, |
|
"loss": 0.9933, |
|
"rewards/accuracies": 0.6899999976158142, |
|
"rewards/chosen": -11.3836030960083, |
|
"rewards/margins": 2.890033483505249, |
|
"rewards/rejected": -14.273636817932129, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.727020143551748, |
|
"grad_norm": 141.23631286621094, |
|
"learning_rate": 1.3676777031720306e-05, |
|
"logits/chosen": -14.704964637756348, |
|
"logits/rejected": -12.575312614440918, |
|
"logps/chosen": -286.65362548828125, |
|
"logps/rejected": -310.803955078125, |
|
"loss": 1.2049, |
|
"rewards/accuracies": 0.7300000190734863, |
|
"rewards/chosen": -11.945639610290527, |
|
"rewards/margins": 4.382092475891113, |
|
"rewards/rejected": -16.32773208618164, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.7316508451030331, |
|
"grad_norm": 0.2799323499202728, |
|
"learning_rate": 1.3445241954156054e-05, |
|
"logits/chosen": -12.737774848937988, |
|
"logits/rejected": -11.21628189086914, |
|
"logps/chosen": -298.6518249511719, |
|
"logps/rejected": -310.916259765625, |
|
"loss": 1.5277, |
|
"rewards/accuracies": 0.6899999976158142, |
|
"rewards/chosen": -15.038265228271484, |
|
"rewards/margins": 2.875054359436035, |
|
"rewards/rejected": -17.913318634033203, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.7362815466543181, |
|
"grad_norm": 1.3903080224990845, |
|
"learning_rate": 1.3213706876591805e-05, |
|
"logits/chosen": -13.068157196044922, |
|
"logits/rejected": -11.648136138916016, |
|
"logps/chosen": -295.270263671875, |
|
"logps/rejected": -309.232666015625, |
|
"loss": 1.4665, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -14.108922958374023, |
|
"rewards/margins": 2.2027931213378906, |
|
"rewards/rejected": -16.311716079711914, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.7409122482056032, |
|
"grad_norm": 141.10264587402344, |
|
"learning_rate": 1.2982171799027553e-05, |
|
"logits/chosen": -12.949649810791016, |
|
"logits/rejected": -11.205181121826172, |
|
"logps/chosen": -260.01531982421875, |
|
"logps/rejected": -281.1571960449219, |
|
"loss": 0.8967, |
|
"rewards/accuracies": 0.7200000286102295, |
|
"rewards/chosen": -11.738259315490723, |
|
"rewards/margins": 3.2035210132598877, |
|
"rewards/rejected": -14.941780090332031, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.7409122482056032, |
|
"eval_logits/chosen": -13.536864280700684, |
|
"eval_logits/rejected": -11.524176597595215, |
|
"eval_logps/chosen": -281.8379821777344, |
|
"eval_logps/rejected": -299.4613342285156, |
|
"eval_loss": 0.9621976613998413, |
|
"eval_rewards/accuracies": 0.7149999737739563, |
|
"eval_rewards/chosen": -12.81788444519043, |
|
"eval_rewards/margins": 3.146540880203247, |
|
"eval_rewards/rejected": -15.964425086975098, |
|
"eval_runtime": 595.3564, |
|
"eval_samples_per_second": 4.031, |
|
"eval_steps_per_second": 4.031, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.7455429497568882, |
|
"grad_norm": 190.99884033203125, |
|
"learning_rate": 1.2750636721463303e-05, |
|
"logits/chosen": -13.84753131866455, |
|
"logits/rejected": -12.161564826965332, |
|
"logps/chosen": -280.1208801269531, |
|
"logps/rejected": -314.58917236328125, |
|
"loss": 1.0943, |
|
"rewards/accuracies": 0.7200000286102295, |
|
"rewards/chosen": -12.789907455444336, |
|
"rewards/margins": 3.241086006164551, |
|
"rewards/rejected": -16.03099250793457, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.7501736513081732, |
|
"grad_norm": 0.0009654845925979316, |
|
"learning_rate": 1.251910164389905e-05, |
|
"logits/chosen": -13.52328872680664, |
|
"logits/rejected": -11.637651443481445, |
|
"logps/chosen": -293.39483642578125, |
|
"logps/rejected": -323.0391540527344, |
|
"loss": 1.0122, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": -13.000173568725586, |
|
"rewards/margins": 3.734714984893799, |
|
"rewards/rejected": -16.734888076782227, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.7548043528594582, |
|
"grad_norm": 53.88007354736328, |
|
"learning_rate": 1.22875665663348e-05, |
|
"logits/chosen": -13.761452674865723, |
|
"logits/rejected": -11.556824684143066, |
|
"logps/chosen": -272.66436767578125, |
|
"logps/rejected": -294.9356384277344, |
|
"loss": 0.8265, |
|
"rewards/accuracies": 0.7799999713897705, |
|
"rewards/chosen": -11.812010765075684, |
|
"rewards/margins": 2.9586479663848877, |
|
"rewards/rejected": -14.770659446716309, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.7594350544107432, |
|
"grad_norm": 26.267419815063477, |
|
"learning_rate": 1.205603148877055e-05, |
|
"logits/chosen": -12.850613594055176, |
|
"logits/rejected": -11.345057487487793, |
|
"logps/chosen": -296.42498779296875, |
|
"logps/rejected": -312.7100830078125, |
|
"loss": 0.8868, |
|
"rewards/accuracies": 0.7200000286102295, |
|
"rewards/chosen": -12.809453010559082, |
|
"rewards/margins": 2.7720766067504883, |
|
"rewards/rejected": -15.581528663635254, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.7640657559620282, |
|
"grad_norm": 0.1327919363975525, |
|
"learning_rate": 1.1824496411206299e-05, |
|
"logits/chosen": -12.737493515014648, |
|
"logits/rejected": -10.811483383178711, |
|
"logps/chosen": -271.80743408203125, |
|
"logps/rejected": -286.36383056640625, |
|
"loss": 1.2946, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": -12.860858917236328, |
|
"rewards/margins": 2.731450080871582, |
|
"rewards/rejected": -15.592310905456543, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.7686964575133133, |
|
"grad_norm": 0.0014511727495118976, |
|
"learning_rate": 1.1592961333642047e-05, |
|
"logits/chosen": -12.535152435302734, |
|
"logits/rejected": -10.770435333251953, |
|
"logps/chosen": -276.2295227050781, |
|
"logps/rejected": -292.5387878417969, |
|
"loss": 1.0457, |
|
"rewards/accuracies": 0.7200000286102295, |
|
"rewards/chosen": -12.783775329589844, |
|
"rewards/margins": 2.7502074241638184, |
|
"rewards/rejected": -15.533984184265137, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.7733271590645983, |
|
"grad_norm": 17.507179260253906, |
|
"learning_rate": 1.1361426256077796e-05, |
|
"logits/chosen": -12.542750358581543, |
|
"logits/rejected": -10.764151573181152, |
|
"logps/chosen": -294.1999206542969, |
|
"logps/rejected": -315.76861572265625, |
|
"loss": 1.0815, |
|
"rewards/accuracies": 0.7200000286102295, |
|
"rewards/chosen": -14.347511291503906, |
|
"rewards/margins": 3.8440487384796143, |
|
"rewards/rejected": -18.191560745239258, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.7779578606158833, |
|
"grad_norm": 127.55777740478516, |
|
"learning_rate": 1.1129891178513544e-05, |
|
"logits/chosen": -14.010462760925293, |
|
"logits/rejected": -11.466479301452637, |
|
"logps/chosen": -294.719482421875, |
|
"logps/rejected": -292.8918151855469, |
|
"loss": 1.1056, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": -12.861717224121094, |
|
"rewards/margins": 2.829714298248291, |
|
"rewards/rejected": -15.69143295288086, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.7825885621671683, |
|
"grad_norm": 4.169695854187012, |
|
"learning_rate": 1.0898356100949295e-05, |
|
"logits/chosen": -13.056097030639648, |
|
"logits/rejected": -10.822772979736328, |
|
"logps/chosen": -294.1612548828125, |
|
"logps/rejected": -305.1460876464844, |
|
"loss": 0.8364, |
|
"rewards/accuracies": 0.7799999713897705, |
|
"rewards/chosen": -13.012012481689453, |
|
"rewards/margins": 3.6792736053466797, |
|
"rewards/rejected": -16.691287994384766, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.7872192637184533, |
|
"grad_norm": 1.3353691101074219, |
|
"learning_rate": 1.0666821023385043e-05, |
|
"logits/chosen": -12.639263153076172, |
|
"logits/rejected": -10.551265716552734, |
|
"logps/chosen": -285.9519958496094, |
|
"logps/rejected": -300.0907897949219, |
|
"loss": 1.0922, |
|
"rewards/accuracies": 0.7300000190734863, |
|
"rewards/chosen": -14.422396659851074, |
|
"rewards/margins": 2.6487817764282227, |
|
"rewards/rejected": -17.071178436279297, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.7872192637184533, |
|
"eval_logits/chosen": -12.934601783752441, |
|
"eval_logits/rejected": -10.735593795776367, |
|
"eval_logps/chosen": -287.7341613769531, |
|
"eval_logps/rejected": -306.9658203125, |
|
"eval_loss": 0.9625710248947144, |
|
"eval_rewards/accuracies": 0.7195833325386047, |
|
"eval_rewards/chosen": -13.407501220703125, |
|
"eval_rewards/margins": 3.3073694705963135, |
|
"eval_rewards/rejected": -16.714872360229492, |
|
"eval_runtime": 595.2721, |
|
"eval_samples_per_second": 4.032, |
|
"eval_steps_per_second": 4.032, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.7918499652697384, |
|
"grad_norm": 238.36265563964844, |
|
"learning_rate": 1.0435285945820793e-05, |
|
"logits/chosen": -12.879168510437012, |
|
"logits/rejected": -10.35470962524414, |
|
"logps/chosen": -287.34698486328125, |
|
"logps/rejected": -294.0375061035156, |
|
"loss": 1.0777, |
|
"rewards/accuracies": 0.6700000166893005, |
|
"rewards/chosen": -13.080986022949219, |
|
"rewards/margins": 3.038571834564209, |
|
"rewards/rejected": -16.119556427001953, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.7964806668210234, |
|
"grad_norm": 115.40522766113281, |
|
"learning_rate": 1.020375086825654e-05, |
|
"logits/chosen": -13.390498161315918, |
|
"logits/rejected": -11.11551284790039, |
|
"logps/chosen": -284.6138000488281, |
|
"logps/rejected": -290.37554931640625, |
|
"loss": 1.0877, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -12.631682395935059, |
|
"rewards/margins": 2.2628602981567383, |
|
"rewards/rejected": -14.894542694091797, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.8011113683723085, |
|
"grad_norm": 81.7811279296875, |
|
"learning_rate": 9.97221579069229e-06, |
|
"logits/chosen": -13.257206916809082, |
|
"logits/rejected": -11.7109375, |
|
"logps/chosen": -298.948974609375, |
|
"logps/rejected": -325.81036376953125, |
|
"loss": 1.3241, |
|
"rewards/accuracies": 0.6399999856948853, |
|
"rewards/chosen": -13.646310806274414, |
|
"rewards/margins": 2.978910207748413, |
|
"rewards/rejected": -16.625221252441406, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.8057420699235934, |
|
"grad_norm": 142.9304656982422, |
|
"learning_rate": 9.742996063903683e-06, |
|
"logits/chosen": -13.058794021606445, |
|
"logits/rejected": -11.029099464416504, |
|
"logps/chosen": -276.3821105957031, |
|
"logps/rejected": -288.647705078125, |
|
"loss": 1.0781, |
|
"rewards/accuracies": 0.6800000071525574, |
|
"rewards/chosen": -13.544109344482422, |
|
"rewards/margins": 2.761138677597046, |
|
"rewards/rejected": -16.305246353149414, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.8103727714748784, |
|
"grad_norm": 10.479734420776367, |
|
"learning_rate": 9.51146098633943e-06, |
|
"logits/chosen": -12.74901008605957, |
|
"logits/rejected": -11.03323745727539, |
|
"logps/chosen": -290.25555419921875, |
|
"logps/rejected": -309.6465759277344, |
|
"loss": 0.9795, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -13.511792182922363, |
|
"rewards/margins": 3.147340774536133, |
|
"rewards/rejected": -16.659133911132812, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.8150034730261635, |
|
"grad_norm": 0.0002637170546222478, |
|
"learning_rate": 9.282241259550822e-06, |
|
"logits/chosen": -11.844225883483887, |
|
"logits/rejected": -9.50997543334961, |
|
"logps/chosen": -286.7944030761719, |
|
"logps/rejected": -299.2738342285156, |
|
"loss": 0.9225, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": -14.926375389099121, |
|
"rewards/margins": 3.188018798828125, |
|
"rewards/rejected": -18.114395141601562, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.8196341745774485, |
|
"grad_norm": 9.602815628051758, |
|
"learning_rate": 9.050706181986571e-06, |
|
"logits/chosen": -13.951495170593262, |
|
"logits/rejected": -11.048531532287598, |
|
"logps/chosen": -290.7369384765625, |
|
"logps/rejected": -305.9258117675781, |
|
"loss": 1.1613, |
|
"rewards/accuracies": 0.7400000095367432, |
|
"rewards/chosen": -13.608471870422363, |
|
"rewards/margins": 3.499152183532715, |
|
"rewards/rejected": -17.107624053955078, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.8242648761287334, |
|
"grad_norm": 99.48814392089844, |
|
"learning_rate": 8.819171104422321e-06, |
|
"logits/chosen": -13.188122749328613, |
|
"logits/rejected": -11.652185440063477, |
|
"logps/chosen": -272.55535888671875, |
|
"logps/rejected": -287.4886779785156, |
|
"loss": 0.9058, |
|
"rewards/accuracies": 0.6899999976158142, |
|
"rewards/chosen": -11.741266250610352, |
|
"rewards/margins": 2.858311176300049, |
|
"rewards/rejected": -14.599577903747559, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.8288955776800185, |
|
"grad_norm": 0.6386672854423523, |
|
"learning_rate": 8.587636026858069e-06, |
|
"logits/chosen": -13.960208892822266, |
|
"logits/rejected": -12.15221881866455, |
|
"logps/chosen": -278.147705078125, |
|
"logps/rejected": -297.4937744140625, |
|
"loss": 0.9024, |
|
"rewards/accuracies": 0.7099999785423279, |
|
"rewards/chosen": -12.239116668701172, |
|
"rewards/margins": 2.993659019470215, |
|
"rewards/rejected": -15.232775688171387, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.8335262792313035, |
|
"grad_norm": 25.236881256103516, |
|
"learning_rate": 8.356100949293818e-06, |
|
"logits/chosen": -14.14141845703125, |
|
"logits/rejected": -11.77079963684082, |
|
"logps/chosen": -274.72637939453125, |
|
"logps/rejected": -297.6695556640625, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.7400000095367432, |
|
"rewards/chosen": -12.932685852050781, |
|
"rewards/margins": 3.8792471885681152, |
|
"rewards/rejected": -16.811933517456055, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.8335262792313035, |
|
"eval_logits/chosen": -14.136133193969727, |
|
"eval_logits/rejected": -11.753191947937012, |
|
"eval_logps/chosen": -275.5329284667969, |
|
"eval_logps/rejected": -294.1294250488281, |
|
"eval_loss": 0.9302791357040405, |
|
"eval_rewards/accuracies": 0.7266666889190674, |
|
"eval_rewards/chosen": -12.187378883361816, |
|
"eval_rewards/margins": 3.2438580989837646, |
|
"eval_rewards/rejected": -15.43123722076416, |
|
"eval_runtime": 595.0535, |
|
"eval_samples_per_second": 4.033, |
|
"eval_steps_per_second": 4.033, |
|
"step": 18000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 21595, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|