|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 17.954430379746835, |
|
"eval_steps": 100, |
|
"global_step": 3546, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005063291139240506, |
|
"grad_norm": 520145.53108452284, |
|
"learning_rate": 1.4084507042253521e-09, |
|
"logits/chosen": -16.270591735839844, |
|
"logits/rejected": -16.343984603881836, |
|
"logps/chosen": -186.17276000976562, |
|
"logps/rejected": -175.8095703125, |
|
"loss": 122464.3125, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05063291139240506, |
|
"grad_norm": 501181.05233525805, |
|
"learning_rate": 1.408450704225352e-08, |
|
"logits/chosen": -17.194263458251953, |
|
"logits/rejected": -17.04476547241211, |
|
"logps/chosen": -220.64031982421875, |
|
"logps/rejected": -220.79531860351562, |
|
"loss": 124716.2917, |
|
"rewards/accuracies": 0.4583333432674408, |
|
"rewards/chosen": 1.5937095554363623e-07, |
|
"rewards/margins": 1.5358187738456763e-05, |
|
"rewards/rejected": -1.5198814253380988e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.10126582278481013, |
|
"grad_norm": 537058.8643033113, |
|
"learning_rate": 2.816901408450704e-08, |
|
"logits/chosen": -16.468345642089844, |
|
"logits/rejected": -16.397050857543945, |
|
"logps/chosen": -238.16464233398438, |
|
"logps/rejected": -234.13320922851562, |
|
"loss": 125132.075, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -7.482715773221571e-06, |
|
"rewards/margins": 1.4337347238324583e-05, |
|
"rewards/rejected": -2.1820069378009066e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1518987341772152, |
|
"grad_norm": 457257.68659374124, |
|
"learning_rate": 4.2253521126760564e-08, |
|
"logits/chosen": -16.952747344970703, |
|
"logits/rejected": -16.70650863647461, |
|
"logps/chosen": -242.9259490966797, |
|
"logps/rejected": -242.9457244873047, |
|
"loss": 124660.25, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -3.21022052958142e-05, |
|
"rewards/margins": 3.5706521885003895e-05, |
|
"rewards/rejected": -6.780872354283929e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.20253164556962025, |
|
"grad_norm": 520967.9129238899, |
|
"learning_rate": 5.633802816901408e-08, |
|
"logits/chosen": -16.920284271240234, |
|
"logits/rejected": -16.8529052734375, |
|
"logps/chosen": -243.7992706298828, |
|
"logps/rejected": -244.38906860351562, |
|
"loss": 124148.0625, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0001235240779351443, |
|
"rewards/margins": 8.850651647662744e-05, |
|
"rewards/rejected": -0.00021203060168772936, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.25316455696202533, |
|
"grad_norm": 722258.4292859514, |
|
"learning_rate": 7.042253521126761e-08, |
|
"logits/chosen": -16.24307632446289, |
|
"logits/rejected": -16.294937133789062, |
|
"logps/chosen": -238.68148803710938, |
|
"logps/rejected": -240.46337890625, |
|
"loss": 125272.85, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.00024854010553099215, |
|
"rewards/margins": -7.368279329966754e-05, |
|
"rewards/rejected": -0.00017485734133515507, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3037974683544304, |
|
"grad_norm": 521725.51159479923, |
|
"learning_rate": 8.450704225352113e-08, |
|
"logits/chosen": -16.547048568725586, |
|
"logits/rejected": -16.562244415283203, |
|
"logps/chosen": -234.24453735351562, |
|
"logps/rejected": -236.03823852539062, |
|
"loss": 123692.1, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.00036723288940265775, |
|
"rewards/margins": 6.122588274592999e-06, |
|
"rewards/rejected": -0.00037335552042350173, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.35443037974683544, |
|
"grad_norm": 446768.20251500694, |
|
"learning_rate": 9.859154929577463e-08, |
|
"logits/chosen": -16.514156341552734, |
|
"logits/rejected": -16.41303062438965, |
|
"logps/chosen": -240.8957061767578, |
|
"logps/rejected": -235.2915496826172, |
|
"loss": 125937.8, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.00028800699510611594, |
|
"rewards/margins": 0.00021416530944406986, |
|
"rewards/rejected": -0.0005021723336540163, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4050632911392405, |
|
"grad_norm": 463557.5011981856, |
|
"learning_rate": 1.1267605633802817e-07, |
|
"logits/chosen": -16.711376190185547, |
|
"logits/rejected": -16.489612579345703, |
|
"logps/chosen": -243.5523681640625, |
|
"logps/rejected": -228.8307342529297, |
|
"loss": 125818.525, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0005035396316088736, |
|
"rewards/margins": 6.90509841660969e-05, |
|
"rewards/rejected": -0.0005725906230509281, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.45569620253164556, |
|
"grad_norm": 465137.87035599066, |
|
"learning_rate": 1.2676056338028167e-07, |
|
"logits/chosen": -17.326900482177734, |
|
"logits/rejected": -17.396936416625977, |
|
"logps/chosen": -240.1623077392578, |
|
"logps/rejected": -234.27578735351562, |
|
"loss": 123894.4, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.0008088796166703105, |
|
"rewards/margins": -0.00010466824460308999, |
|
"rewards/rejected": -0.0007042114739306271, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5063291139240507, |
|
"grad_norm": 505006.4054603859, |
|
"learning_rate": 1.4084507042253522e-07, |
|
"logits/chosen": -16.5346736907959, |
|
"logits/rejected": -16.46234893798828, |
|
"logps/chosen": -238.9674530029297, |
|
"logps/rejected": -235.36239624023438, |
|
"loss": 126640.2125, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.0006280581001192331, |
|
"rewards/margins": 2.4443055735900998e-05, |
|
"rewards/rejected": -0.0006525011267513037, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5569620253164557, |
|
"grad_norm": 475489.46555727004, |
|
"learning_rate": 1.549295774647887e-07, |
|
"logits/chosen": -16.67499351501465, |
|
"logits/rejected": -16.584075927734375, |
|
"logps/chosen": -240.5388946533203, |
|
"logps/rejected": -239.03366088867188, |
|
"loss": 122706.3, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0006213908782228827, |
|
"rewards/margins": 0.00010261077841278166, |
|
"rewards/rejected": -0.0007240016711875796, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6075949367088608, |
|
"grad_norm": 492764.07090207015, |
|
"learning_rate": 1.6901408450704225e-07, |
|
"logits/chosen": -16.746532440185547, |
|
"logits/rejected": -16.617717742919922, |
|
"logps/chosen": -227.05398559570312, |
|
"logps/rejected": -225.60214233398438, |
|
"loss": 126588.925, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.0008889889577403665, |
|
"rewards/margins": 2.5076475139940158e-05, |
|
"rewards/rejected": -0.0009140653419308364, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6582278481012658, |
|
"grad_norm": 511084.4558498889, |
|
"learning_rate": 1.8309859154929577e-07, |
|
"logits/chosen": -16.747934341430664, |
|
"logits/rejected": -16.733430862426758, |
|
"logps/chosen": -240.7227325439453, |
|
"logps/rejected": -240.2967529296875, |
|
"loss": 125175.5125, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0010187395382672548, |
|
"rewards/margins": 0.002705145161598921, |
|
"rewards/rejected": -0.003723885165527463, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.7088607594936709, |
|
"grad_norm": 540454.6644647518, |
|
"learning_rate": 1.9718309859154927e-07, |
|
"logits/chosen": -16.1859073638916, |
|
"logits/rejected": -16.264835357666016, |
|
"logps/chosen": -231.37173461914062, |
|
"logps/rejected": -227.0606689453125, |
|
"loss": 126058.6375, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0008466474828310311, |
|
"rewards/margins": -4.7403918870259076e-05, |
|
"rewards/rejected": -0.0007992436294443905, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.759493670886076, |
|
"grad_norm": 503077.16971538117, |
|
"learning_rate": 2.112676056338028e-07, |
|
"logits/chosen": -17.280269622802734, |
|
"logits/rejected": -17.093780517578125, |
|
"logps/chosen": -238.0977325439453, |
|
"logps/rejected": -238.93212890625, |
|
"loss": 126646.125, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.0008783842204138637, |
|
"rewards/margins": 0.00026031016022898257, |
|
"rewards/rejected": -0.0011386943515390158, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.810126582278481, |
|
"grad_norm": 541715.9624559938, |
|
"learning_rate": 2.2535211267605633e-07, |
|
"logits/chosen": -16.782550811767578, |
|
"logits/rejected": -16.79593276977539, |
|
"logps/chosen": -250.48593139648438, |
|
"logps/rejected": -249.44924926757812, |
|
"loss": 124718.425, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0012445250758901238, |
|
"rewards/margins": 5.686017539119348e-05, |
|
"rewards/rejected": -0.0013013852294534445, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8607594936708861, |
|
"grad_norm": 548905.0358445289, |
|
"learning_rate": 2.394366197183098e-07, |
|
"logits/chosen": -17.04167938232422, |
|
"logits/rejected": -16.985572814941406, |
|
"logps/chosen": -255.06942749023438, |
|
"logps/rejected": -260.38128662109375, |
|
"loss": 125650.7625, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0021144188940525055, |
|
"rewards/margins": 0.001583110773935914, |
|
"rewards/rejected": -0.003697529900819063, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.9113924050632911, |
|
"grad_norm": 561549.4959644328, |
|
"learning_rate": 2.5352112676056334e-07, |
|
"logits/chosen": -16.703407287597656, |
|
"logits/rejected": -16.487037658691406, |
|
"logps/chosen": -232.529052734375, |
|
"logps/rejected": -228.3297576904297, |
|
"loss": 127298.1375, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.002397818025201559, |
|
"rewards/margins": 0.0011578220874071121, |
|
"rewards/rejected": -0.003555640112608671, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.9620253164556962, |
|
"grad_norm": 565071.0053763993, |
|
"learning_rate": 2.6760563380281686e-07, |
|
"logits/chosen": -16.11090660095215, |
|
"logits/rejected": -16.053157806396484, |
|
"logps/chosen": -239.39205932617188, |
|
"logps/rejected": -235.435791015625, |
|
"loss": 127009.225, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.0010291519574820995, |
|
"rewards/margins": 0.00019036220328416675, |
|
"rewards/rejected": -0.0012195140589028597, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0126582278481013, |
|
"grad_norm": 497332.98430491646, |
|
"learning_rate": 2.8169014084507043e-07, |
|
"logits/chosen": -16.127140045166016, |
|
"logits/rejected": -15.988116264343262, |
|
"logps/chosen": -225.9070587158203, |
|
"logps/rejected": -227.90145874023438, |
|
"loss": 126358.875, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0012379485415294766, |
|
"rewards/margins": 0.0006675361073575914, |
|
"rewards/rejected": -0.001905484707094729, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0632911392405062, |
|
"grad_norm": 935063.760892245, |
|
"learning_rate": 2.957746478873239e-07, |
|
"logits/chosen": -16.701793670654297, |
|
"logits/rejected": -16.669902801513672, |
|
"logps/chosen": -230.3677520751953, |
|
"logps/rejected": -229.03921508789062, |
|
"loss": 124250.775, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.0006614397279918194, |
|
"rewards/margins": 0.003238010685890913, |
|
"rewards/rejected": -0.003899450646713376, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.1139240506329113, |
|
"grad_norm": 517399.2020129059, |
|
"learning_rate": 3.098591549295774e-07, |
|
"logits/chosen": -16.413972854614258, |
|
"logits/rejected": -16.371458053588867, |
|
"logps/chosen": -247.8984832763672, |
|
"logps/rejected": -249.5322723388672, |
|
"loss": 124993.7375, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.0012706981506198645, |
|
"rewards/margins": 0.003060612827539444, |
|
"rewards/rejected": -0.004331310745328665, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.1645569620253164, |
|
"grad_norm": 499036.7717944408, |
|
"learning_rate": 3.23943661971831e-07, |
|
"logits/chosen": -15.908624649047852, |
|
"logits/rejected": -15.847338676452637, |
|
"logps/chosen": -236.7013397216797, |
|
"logps/rejected": -239.3136749267578, |
|
"loss": 122842.5, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.0006655483739450574, |
|
"rewards/margins": 0.0032406128011643887, |
|
"rewards/rejected": -0.0039061610586941242, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.2151898734177216, |
|
"grad_norm": 540681.7856619481, |
|
"learning_rate": 3.380281690140845e-07, |
|
"logits/chosen": -16.052249908447266, |
|
"logits/rejected": -15.99653148651123, |
|
"logps/chosen": -229.74832153320312, |
|
"logps/rejected": -230.9803009033203, |
|
"loss": 124587.3625, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.0007962372037582099, |
|
"rewards/margins": 0.0025483998470008373, |
|
"rewards/rejected": -0.003344637108966708, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.2658227848101267, |
|
"grad_norm": 1023950.8355601664, |
|
"learning_rate": 3.52112676056338e-07, |
|
"logits/chosen": -15.299288749694824, |
|
"logits/rejected": -15.215815544128418, |
|
"logps/chosen": -231.2301788330078, |
|
"logps/rejected": -232.03359985351562, |
|
"loss": 121822.4, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -7.512583579227794e-06, |
|
"rewards/margins": 0.003883513854816556, |
|
"rewards/rejected": -0.0038910270668566227, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.3164556962025316, |
|
"grad_norm": 620253.8184950812, |
|
"learning_rate": 3.6619718309859155e-07, |
|
"logits/chosen": -16.167770385742188, |
|
"logits/rejected": -15.915590286254883, |
|
"logps/chosen": -238.9904327392578, |
|
"logps/rejected": -239.73953247070312, |
|
"loss": 123388.8625, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.00017356239550281316, |
|
"rewards/margins": 0.0050824107602238655, |
|
"rewards/rejected": -0.005255972500890493, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.3670886075949367, |
|
"grad_norm": 575104.3218096169, |
|
"learning_rate": 3.8028169014084507e-07, |
|
"logits/chosen": -15.480558395385742, |
|
"logits/rejected": -15.386639595031738, |
|
"logps/chosen": -241.60879516601562, |
|
"logps/rejected": -250.003173828125, |
|
"loss": 123555.7, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.001139859901741147, |
|
"rewards/margins": 0.005077657289803028, |
|
"rewards/rejected": -0.0062175169587135315, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.4177215189873418, |
|
"grad_norm": 601224.4433091934, |
|
"learning_rate": 3.9436619718309853e-07, |
|
"logits/chosen": -15.266016960144043, |
|
"logits/rejected": -15.313554763793945, |
|
"logps/chosen": -230.73397827148438, |
|
"logps/rejected": -237.3317108154297, |
|
"loss": 125556.675, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.0007209269679151475, |
|
"rewards/margins": 0.00534270191565156, |
|
"rewards/rejected": -0.004621774889528751, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.4683544303797469, |
|
"grad_norm": 751936.3077706753, |
|
"learning_rate": 4.084507042253521e-07, |
|
"logits/chosen": -14.600263595581055, |
|
"logits/rejected": -14.538311958312988, |
|
"logps/chosen": -224.1177520751953, |
|
"logps/rejected": -226.97879028320312, |
|
"loss": 123584.675, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.0011863496620208025, |
|
"rewards/margins": 0.007649322040379047, |
|
"rewards/rejected": -0.006462973542511463, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.518987341772152, |
|
"grad_norm": 575660.5828565176, |
|
"learning_rate": 4.225352112676056e-07, |
|
"logits/chosen": -14.935551643371582, |
|
"logits/rejected": -15.062429428100586, |
|
"logps/chosen": -235.7123565673828, |
|
"logps/rejected": -245.36181640625, |
|
"loss": 122562.1375, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0014863747637718916, |
|
"rewards/margins": 0.0057060932740569115, |
|
"rewards/rejected": -0.0042197187431156635, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.5696202531645569, |
|
"grad_norm": 619514.1083852616, |
|
"learning_rate": 4.366197183098591e-07, |
|
"logits/chosen": -14.678690910339355, |
|
"logits/rejected": -14.617218017578125, |
|
"logps/chosen": -229.6386260986328, |
|
"logps/rejected": -234.1474151611328, |
|
"loss": 123630.225, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0006864996394142509, |
|
"rewards/margins": 0.004933560267090797, |
|
"rewards/rejected": -0.004247060976922512, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.620253164556962, |
|
"grad_norm": 738538.1512211321, |
|
"learning_rate": 4.5070422535211266e-07, |
|
"logits/chosen": -14.131611824035645, |
|
"logits/rejected": -14.156657218933105, |
|
"logps/chosen": -241.20156860351562, |
|
"logps/rejected": -248.2321319580078, |
|
"loss": 124158.6, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0009155808947980404, |
|
"rewards/margins": 0.006913213524967432, |
|
"rewards/rejected": -0.007828795351088047, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.6708860759493671, |
|
"grad_norm": 688317.7143989427, |
|
"learning_rate": 4.647887323943662e-07, |
|
"logits/chosen": -13.791796684265137, |
|
"logits/rejected": -13.970884323120117, |
|
"logps/chosen": -228.53079223632812, |
|
"logps/rejected": -235.5008087158203, |
|
"loss": 123378.175, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.0017698236042633653, |
|
"rewards/margins": 0.006004182621836662, |
|
"rewards/rejected": -0.004234359599649906, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.721518987341772, |
|
"grad_norm": 693314.5034252935, |
|
"learning_rate": 4.788732394366196e-07, |
|
"logits/chosen": -13.555567741394043, |
|
"logits/rejected": -13.32630729675293, |
|
"logps/chosen": -227.0249481201172, |
|
"logps/rejected": -232.2772216796875, |
|
"loss": 122521.475, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.001143553527072072, |
|
"rewards/margins": 0.009070896543562412, |
|
"rewards/rejected": -0.00792734231799841, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.7721518987341773, |
|
"grad_norm": 758709.6120906892, |
|
"learning_rate": 4.929577464788733e-07, |
|
"logits/chosen": -13.520563125610352, |
|
"logits/rejected": -13.633130073547363, |
|
"logps/chosen": -234.7182159423828, |
|
"logps/rejected": -248.12890625, |
|
"loss": 121557.575, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.00047356385039165616, |
|
"rewards/margins": 0.00813873577862978, |
|
"rewards/rejected": -0.008612299337983131, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.8227848101265822, |
|
"grad_norm": 689974.393201542, |
|
"learning_rate": 4.992165465371357e-07, |
|
"logits/chosen": -12.841153144836426, |
|
"logits/rejected": -12.86094856262207, |
|
"logps/chosen": -232.314697265625, |
|
"logps/rejected": -232.64297485351562, |
|
"loss": 121436.65, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.0036194869317114353, |
|
"rewards/margins": 0.009506477043032646, |
|
"rewards/rejected": -0.005886988714337349, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.8734177215189873, |
|
"grad_norm": 883375.543329047, |
|
"learning_rate": 4.976496396114071e-07, |
|
"logits/chosen": -12.77904224395752, |
|
"logits/rejected": -12.76900577545166, |
|
"logps/chosen": -239.8730010986328, |
|
"logps/rejected": -251.4569549560547, |
|
"loss": 122456.925, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0006393647054210305, |
|
"rewards/margins": 0.008665768429636955, |
|
"rewards/rejected": -0.009305133484303951, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.9240506329113924, |
|
"grad_norm": 797554.0864386982, |
|
"learning_rate": 4.960827326856785e-07, |
|
"logits/chosen": -13.028135299682617, |
|
"logits/rejected": -13.148831367492676, |
|
"logps/chosen": -237.040771484375, |
|
"logps/rejected": -244.45181274414062, |
|
"loss": 124907.725, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0025544934906065464, |
|
"rewards/margins": 0.008132859133183956, |
|
"rewards/rejected": -0.005578366108238697, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.9746835443037973, |
|
"grad_norm": 793120.1180084129, |
|
"learning_rate": 4.945158257599498e-07, |
|
"logits/chosen": -12.312803268432617, |
|
"logits/rejected": -12.135167121887207, |
|
"logps/chosen": -235.60360717773438, |
|
"logps/rejected": -242.9219207763672, |
|
"loss": 121583.8, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.003660207614302635, |
|
"rewards/margins": 0.011001082137227058, |
|
"rewards/rejected": -0.007340874522924423, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.0253164556962027, |
|
"grad_norm": 767339.6192091529, |
|
"learning_rate": 4.929489188342212e-07, |
|
"logits/chosen": -12.052891731262207, |
|
"logits/rejected": -11.94625473022461, |
|
"logps/chosen": -225.0377197265625, |
|
"logps/rejected": -243.81039428710938, |
|
"loss": 119737.85, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.006956162396818399, |
|
"rewards/margins": 0.01727995090186596, |
|
"rewards/rejected": -0.010323788039386272, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0759493670886076, |
|
"grad_norm": 936793.207320047, |
|
"learning_rate": 4.913820119084926e-07, |
|
"logits/chosen": -11.38767147064209, |
|
"logits/rejected": -11.339715957641602, |
|
"logps/chosen": -219.8796844482422, |
|
"logps/rejected": -252.80581665039062, |
|
"loss": 114021.05, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.008199459873139858, |
|
"rewards/margins": 0.031510110944509506, |
|
"rewards/rejected": -0.023310650140047073, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.1265822784810124, |
|
"grad_norm": 1035986.8564166825, |
|
"learning_rate": 4.89815104982764e-07, |
|
"logits/chosen": -10.819408416748047, |
|
"logits/rejected": -10.774351119995117, |
|
"logps/chosen": -231.78854370117188, |
|
"logps/rejected": -260.20355224609375, |
|
"loss": 116051.6, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.0056950985454022884, |
|
"rewards/margins": 0.027868490666151047, |
|
"rewards/rejected": -0.02217339165508747, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.1772151898734178, |
|
"grad_norm": 1036991.7861177241, |
|
"learning_rate": 4.882481980570354e-07, |
|
"logits/chosen": -10.84526252746582, |
|
"logits/rejected": -10.708145141601562, |
|
"logps/chosen": -221.5430908203125, |
|
"logps/rejected": -257.36114501953125, |
|
"loss": 113501.175, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.005057200789451599, |
|
"rewards/margins": 0.038923002779483795, |
|
"rewards/rejected": -0.033865805715322495, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.2278481012658227, |
|
"grad_norm": 1227488.243303788, |
|
"learning_rate": 4.866812911313068e-07, |
|
"logits/chosen": -10.5010986328125, |
|
"logits/rejected": -10.63232135772705, |
|
"logps/chosen": -233.42373657226562, |
|
"logps/rejected": -276.0982666015625, |
|
"loss": 112100.4, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.004779786802828312, |
|
"rewards/margins": 0.040522992610931396, |
|
"rewards/rejected": -0.03574320673942566, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.278481012658228, |
|
"grad_norm": 1079397.6974786038, |
|
"learning_rate": 4.851143842055782e-07, |
|
"logits/chosen": -10.104026794433594, |
|
"logits/rejected": -10.142271995544434, |
|
"logps/chosen": -216.66940307617188, |
|
"logps/rejected": -258.98858642578125, |
|
"loss": 112483.4, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.0053299954161047935, |
|
"rewards/margins": 0.03484385460615158, |
|
"rewards/rejected": -0.029513856396079063, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.329113924050633, |
|
"grad_norm": 1367054.8438774655, |
|
"learning_rate": 4.835474772798496e-07, |
|
"logits/chosen": -10.148681640625, |
|
"logits/rejected": -10.183786392211914, |
|
"logps/chosen": -233.730224609375, |
|
"logps/rejected": -278.64349365234375, |
|
"loss": 111561.6625, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.011530257761478424, |
|
"rewards/margins": 0.04578756168484688, |
|
"rewards/rejected": -0.034257303923368454, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.379746835443038, |
|
"grad_norm": 1298484.9349088285, |
|
"learning_rate": 4.819805703541209e-07, |
|
"logits/chosen": -10.018949508666992, |
|
"logits/rejected": -10.097805976867676, |
|
"logps/chosen": -224.6026153564453, |
|
"logps/rejected": -270.0591735839844, |
|
"loss": 112710.1875, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.011801879853010178, |
|
"rewards/margins": 0.040784891694784164, |
|
"rewards/rejected": -0.028983011841773987, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.430379746835443, |
|
"grad_norm": 1428524.6930006845, |
|
"learning_rate": 4.804136634283923e-07, |
|
"logits/chosen": -9.595979690551758, |
|
"logits/rejected": -9.634994506835938, |
|
"logps/chosen": -265.3009338378906, |
|
"logps/rejected": -315.98541259765625, |
|
"loss": 110031.3, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.0027348275762051344, |
|
"rewards/margins": 0.05151837319135666, |
|
"rewards/rejected": -0.048783544450998306, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.481012658227848, |
|
"grad_norm": 1467649.8441612076, |
|
"learning_rate": 4.788467565026637e-07, |
|
"logits/chosen": -8.871723175048828, |
|
"logits/rejected": -8.764354705810547, |
|
"logps/chosen": -203.2312774658203, |
|
"logps/rejected": -241.612548828125, |
|
"loss": 110534.325, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.01302252896130085, |
|
"rewards/margins": 0.03907207027077675, |
|
"rewards/rejected": -0.02604953944683075, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.5316455696202533, |
|
"grad_norm": 1382959.9591988046, |
|
"learning_rate": 4.772798495769351e-07, |
|
"logits/chosen": -8.468270301818848, |
|
"logits/rejected": -8.384966850280762, |
|
"logps/chosen": -226.46237182617188, |
|
"logps/rejected": -269.6461181640625, |
|
"loss": 110480.175, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.015191495418548584, |
|
"rewards/margins": 0.0456535741686821, |
|
"rewards/rejected": -0.030462080612778664, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.5822784810126582, |
|
"grad_norm": 1369494.2190603705, |
|
"learning_rate": 4.757129426512065e-07, |
|
"logits/chosen": -8.634099006652832, |
|
"logits/rejected": -8.640868186950684, |
|
"logps/chosen": -232.20022583007812, |
|
"logps/rejected": -304.80352783203125, |
|
"loss": 109921.975, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.013228721916675568, |
|
"rewards/margins": 0.07378505170345306, |
|
"rewards/rejected": -0.060556329786777496, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.632911392405063, |
|
"grad_norm": 1750255.0550240122, |
|
"learning_rate": 4.741460357254779e-07, |
|
"logits/chosen": -7.8379316329956055, |
|
"logits/rejected": -7.4784440994262695, |
|
"logps/chosen": -213.3401641845703, |
|
"logps/rejected": -258.43743896484375, |
|
"loss": 111730.3875, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.016342563554644585, |
|
"rewards/margins": 0.048144370317459106, |
|
"rewards/rejected": -0.03180180490016937, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.6835443037974684, |
|
"grad_norm": 1447093.2174814222, |
|
"learning_rate": 4.7257912879974927e-07, |
|
"logits/chosen": -8.354089736938477, |
|
"logits/rejected": -7.889782905578613, |
|
"logps/chosen": -225.5243682861328, |
|
"logps/rejected": -276.7877502441406, |
|
"loss": 109226.9625, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.013667022809386253, |
|
"rewards/margins": 0.05627403408288956, |
|
"rewards/rejected": -0.042607005685567856, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.7341772151898733, |
|
"grad_norm": 1477083.7533012358, |
|
"learning_rate": 4.710122218740207e-07, |
|
"logits/chosen": -7.921019077301025, |
|
"logits/rejected": -7.979846000671387, |
|
"logps/chosen": -237.23715209960938, |
|
"logps/rejected": -285.4289855957031, |
|
"loss": 109592.125, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.014952963218092918, |
|
"rewards/margins": 0.055934417992830276, |
|
"rewards/rejected": -0.040981464087963104, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.7848101265822782, |
|
"grad_norm": 1486366.6324330876, |
|
"learning_rate": 4.6944531494829204e-07, |
|
"logits/chosen": -7.12634801864624, |
|
"logits/rejected": -7.396058082580566, |
|
"logps/chosen": -226.1304168701172, |
|
"logps/rejected": -276.8672790527344, |
|
"loss": 108245.925, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.00994019117206335, |
|
"rewards/margins": 0.0537477545440197, |
|
"rewards/rejected": -0.04380756989121437, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.8354430379746836, |
|
"grad_norm": 1560304.698196799, |
|
"learning_rate": 4.6787840802256345e-07, |
|
"logits/chosen": -7.268878936767578, |
|
"logits/rejected": -7.414219856262207, |
|
"logps/chosen": -215.24661254882812, |
|
"logps/rejected": -276.79437255859375, |
|
"loss": 110187.5125, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.016926631331443787, |
|
"rewards/margins": 0.05572710186243057, |
|
"rewards/rejected": -0.03880046680569649, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.8860759493670884, |
|
"grad_norm": 1647695.8714812996, |
|
"learning_rate": 4.663115010968348e-07, |
|
"logits/chosen": -8.584083557128906, |
|
"logits/rejected": -8.43793773651123, |
|
"logps/chosen": -239.3496856689453, |
|
"logps/rejected": -301.948974609375, |
|
"loss": 108493.15, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.007640582975000143, |
|
"rewards/margins": 0.06335236132144928, |
|
"rewards/rejected": -0.0557117760181427, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.9367088607594938, |
|
"grad_norm": 1523200.3846012072, |
|
"learning_rate": 4.647445941711062e-07, |
|
"logits/chosen": -8.875934600830078, |
|
"logits/rejected": -8.860316276550293, |
|
"logps/chosen": -234.2982635498047, |
|
"logps/rejected": -293.39727783203125, |
|
"loss": 107204.65, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.0077833631075918674, |
|
"rewards/margins": 0.061719853430986404, |
|
"rewards/rejected": -0.05393648147583008, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.9873417721518987, |
|
"grad_norm": 1605115.356703113, |
|
"learning_rate": 4.631776872453776e-07, |
|
"logits/chosen": -8.788633346557617, |
|
"logits/rejected": -8.637460708618164, |
|
"logps/chosen": -257.7025146484375, |
|
"logps/rejected": -303.82147216796875, |
|
"loss": 108959.225, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.0057020229287445545, |
|
"rewards/margins": 0.053022872656583786, |
|
"rewards/rejected": -0.04732084274291992, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.037974683544304, |
|
"grad_norm": 1435515.2852262415, |
|
"learning_rate": 4.61610780319649e-07, |
|
"logits/chosen": -7.956998348236084, |
|
"logits/rejected": -7.496169090270996, |
|
"logps/chosen": -219.92410278320312, |
|
"logps/rejected": -310.20123291015625, |
|
"loss": 95986.4875, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.016220271587371826, |
|
"rewards/margins": 0.09167212247848511, |
|
"rewards/rejected": -0.07545184344053268, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.088607594936709, |
|
"grad_norm": 1646011.901841717, |
|
"learning_rate": 4.6004387339392035e-07, |
|
"logits/chosen": -7.747580051422119, |
|
"logits/rejected": -7.5227952003479, |
|
"logps/chosen": -217.8295440673828, |
|
"logps/rejected": -343.4312438964844, |
|
"loss": 91538.925, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.02667585015296936, |
|
"rewards/margins": 0.12547221779823303, |
|
"rewards/rejected": -0.09879636764526367, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.1392405063291138, |
|
"grad_norm": 1631989.4144731541, |
|
"learning_rate": 4.5847696646819176e-07, |
|
"logits/chosen": -6.8127121925354, |
|
"logits/rejected": -6.8090972900390625, |
|
"logps/chosen": -209.46859741210938, |
|
"logps/rejected": -332.0594482421875, |
|
"loss": 92242.9, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.026208167895674706, |
|
"rewards/margins": 0.12268342822790146, |
|
"rewards/rejected": -0.0964752584695816, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.189873417721519, |
|
"grad_norm": 1627589.9925143481, |
|
"learning_rate": 4.569100595424631e-07, |
|
"logits/chosen": -6.631221771240234, |
|
"logits/rejected": -6.502354621887207, |
|
"logps/chosen": -211.57974243164062, |
|
"logps/rejected": -333.447265625, |
|
"loss": 89921.25, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.022689208388328552, |
|
"rewards/margins": 0.12395058572292328, |
|
"rewards/rejected": -0.10126137733459473, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.240506329113924, |
|
"grad_norm": 1780107.5787213328, |
|
"learning_rate": 4.5534315261673453e-07, |
|
"logits/chosen": -7.868208885192871, |
|
"logits/rejected": -7.755393981933594, |
|
"logps/chosen": -209.3970184326172, |
|
"logps/rejected": -341.9508056640625, |
|
"loss": 89608.1875, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.027028566226363182, |
|
"rewards/margins": 0.133165642619133, |
|
"rewards/rejected": -0.10613708198070526, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.291139240506329, |
|
"grad_norm": 1730512.4518714033, |
|
"learning_rate": 4.5377624569100595e-07, |
|
"logits/chosen": -7.359053134918213, |
|
"logits/rejected": -7.324367523193359, |
|
"logps/chosen": -193.1954803466797, |
|
"logps/rejected": -309.5513610839844, |
|
"loss": 93257.225, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.028996175155043602, |
|
"rewards/margins": 0.11760006099939346, |
|
"rewards/rejected": -0.08860386908054352, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.3417721518987342, |
|
"grad_norm": 1692816.769511115, |
|
"learning_rate": 4.5220933876527736e-07, |
|
"logits/chosen": -8.043203353881836, |
|
"logits/rejected": -8.003018379211426, |
|
"logps/chosen": -211.73648071289062, |
|
"logps/rejected": -336.10455322265625, |
|
"loss": 88400.4688, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.024640550836920738, |
|
"rewards/margins": 0.12655004858970642, |
|
"rewards/rejected": -0.10190950334072113, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.392405063291139, |
|
"grad_norm": 1906377.7496358757, |
|
"learning_rate": 4.506424318395487e-07, |
|
"logits/chosen": -7.25619649887085, |
|
"logits/rejected": -7.37869119644165, |
|
"logps/chosen": -197.8258819580078, |
|
"logps/rejected": -324.2138671875, |
|
"loss": 89983.5688, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.026263948529958725, |
|
"rewards/margins": 0.12702925503253937, |
|
"rewards/rejected": -0.10076530277729034, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.4430379746835444, |
|
"grad_norm": 1785643.0594316572, |
|
"learning_rate": 4.4907552491382013e-07, |
|
"logits/chosen": -6.798577785491943, |
|
"logits/rejected": -6.7768073081970215, |
|
"logps/chosen": -208.5835723876953, |
|
"logps/rejected": -323.3017883300781, |
|
"loss": 89767.5, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.025741413235664368, |
|
"rewards/margins": 0.1167701929807663, |
|
"rewards/rejected": -0.09102877229452133, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.4936708860759493, |
|
"grad_norm": 2393957.296937455, |
|
"learning_rate": 4.475086179880915e-07, |
|
"logits/chosen": -6.352355480194092, |
|
"logits/rejected": -6.526197910308838, |
|
"logps/chosen": -187.56597900390625, |
|
"logps/rejected": -306.5972595214844, |
|
"loss": 89036.6875, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.024061182513833046, |
|
"rewards/margins": 0.11990946531295776, |
|
"rewards/rejected": -0.09584827721118927, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.5443037974683547, |
|
"grad_norm": 1811486.2204670438, |
|
"learning_rate": 4.459417110623629e-07, |
|
"logits/chosen": -5.7466630935668945, |
|
"logits/rejected": -5.797163486480713, |
|
"logps/chosen": -212.6585235595703, |
|
"logps/rejected": -364.36199951171875, |
|
"loss": 88031.3, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.027677077800035477, |
|
"rewards/margins": 0.14764061570167542, |
|
"rewards/rejected": -0.11996352672576904, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.5949367088607596, |
|
"grad_norm": 1724684.5755440604, |
|
"learning_rate": 4.4437480413663426e-07, |
|
"logits/chosen": -5.412962436676025, |
|
"logits/rejected": -5.541121959686279, |
|
"logps/chosen": -202.39065551757812, |
|
"logps/rejected": -333.0758056640625, |
|
"loss": 86956.675, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.0247800350189209, |
|
"rewards/margins": 0.12825721502304077, |
|
"rewards/rejected": -0.10347716510295868, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.6455696202531644, |
|
"grad_norm": 1933271.7611355048, |
|
"learning_rate": 4.4280789721090567e-07, |
|
"logits/chosen": -5.053005218505859, |
|
"logits/rejected": -4.886711597442627, |
|
"logps/chosen": -199.10885620117188, |
|
"logps/rejected": -317.7257385253906, |
|
"loss": 86655.0125, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.02152046002447605, |
|
"rewards/margins": 0.11774978786706924, |
|
"rewards/rejected": -0.09622932970523834, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.6962025316455698, |
|
"grad_norm": 2267463.489494214, |
|
"learning_rate": 4.4124099028517703e-07, |
|
"logits/chosen": -6.616279602050781, |
|
"logits/rejected": -6.9615797996521, |
|
"logps/chosen": -200.58961486816406, |
|
"logps/rejected": -351.6376953125, |
|
"loss": 86181.3938, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.032253801822662354, |
|
"rewards/margins": 0.14937567710876465, |
|
"rewards/rejected": -0.1171218603849411, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.7468354430379747, |
|
"grad_norm": 1734288.0953653858, |
|
"learning_rate": 4.3967408335944844e-07, |
|
"logits/chosen": -5.873335361480713, |
|
"logits/rejected": -5.689335823059082, |
|
"logps/chosen": -217.43637084960938, |
|
"logps/rejected": -350.2752990722656, |
|
"loss": 86780.825, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.031159091740846634, |
|
"rewards/margins": 0.13692796230316162, |
|
"rewards/rejected": -0.10576887428760529, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.7974683544303796, |
|
"grad_norm": 1741715.9901586007, |
|
"learning_rate": 4.381071764337198e-07, |
|
"logits/chosen": -7.123785972595215, |
|
"logits/rejected": -7.188807487487793, |
|
"logps/chosen": -207.00045776367188, |
|
"logps/rejected": -336.5976867675781, |
|
"loss": 86139.5625, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.03052128478884697, |
|
"rewards/margins": 0.13043463230133057, |
|
"rewards/rejected": -0.0999133437871933, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.848101265822785, |
|
"grad_norm": 1879351.8394690978, |
|
"learning_rate": 4.365402695079912e-07, |
|
"logits/chosen": -7.820990085601807, |
|
"logits/rejected": -7.7128729820251465, |
|
"logps/chosen": -213.57388305664062, |
|
"logps/rejected": -362.5634460449219, |
|
"loss": 87478.3625, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.03660900145769119, |
|
"rewards/margins": 0.1480773240327835, |
|
"rewards/rejected": -0.11146833002567291, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.8987341772151898, |
|
"grad_norm": 1968713.4204386624, |
|
"learning_rate": 4.349733625822626e-07, |
|
"logits/chosen": -7.314540863037109, |
|
"logits/rejected": -7.363668918609619, |
|
"logps/chosen": -213.6930694580078, |
|
"logps/rejected": -367.44073486328125, |
|
"loss": 86825.5813, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.026752913370728493, |
|
"rewards/margins": 0.15061405301094055, |
|
"rewards/rejected": -0.1238611489534378, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.9493670886075947, |
|
"grad_norm": 2163439.406665409, |
|
"learning_rate": 4.33406455656534e-07, |
|
"logits/chosen": -7.67099666595459, |
|
"logits/rejected": -7.536408424377441, |
|
"logps/chosen": -213.9747772216797, |
|
"logps/rejected": -344.7560119628906, |
|
"loss": 86913.0375, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.029844319447875023, |
|
"rewards/margins": 0.12930825352668762, |
|
"rewards/rejected": -0.09946390986442566, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1866234.1823014135, |
|
"learning_rate": 4.3183954873080535e-07, |
|
"logits/chosen": -7.922532081604004, |
|
"logits/rejected": -7.692726135253906, |
|
"logps/chosen": -211.41653442382812, |
|
"logps/rejected": -349.7116394042969, |
|
"loss": 86592.8938, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.027728911489248276, |
|
"rewards/margins": 0.1435452550649643, |
|
"rewards/rejected": -0.11581633985042572, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 4.050632911392405, |
|
"grad_norm": 1782853.8797277175, |
|
"learning_rate": 4.3027264180507676e-07, |
|
"logits/chosen": -8.29829216003418, |
|
"logits/rejected": -8.205643653869629, |
|
"logps/chosen": -178.8797149658203, |
|
"logps/rejected": -378.06121826171875, |
|
"loss": 69143.425, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.05098045617341995, |
|
"rewards/margins": 0.1993386447429657, |
|
"rewards/rejected": -0.14835818111896515, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.10126582278481, |
|
"grad_norm": 1719472.9461235409, |
|
"learning_rate": 4.287057348793481e-07, |
|
"logits/chosen": -7.558290958404541, |
|
"logits/rejected": -7.646592617034912, |
|
"logps/chosen": -186.36911010742188, |
|
"logps/rejected": -386.6961975097656, |
|
"loss": 67634.3375, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.04189852252602577, |
|
"rewards/margins": 0.19968575239181519, |
|
"rewards/rejected": -0.1577872335910797, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 4.151898734177215, |
|
"grad_norm": 1571399.8942716653, |
|
"learning_rate": 4.2713882795361953e-07, |
|
"logits/chosen": -7.811161994934082, |
|
"logits/rejected": -7.783130645751953, |
|
"logps/chosen": -181.81602478027344, |
|
"logps/rejected": -402.1683654785156, |
|
"loss": 66806.9187, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.049001529812812805, |
|
"rewards/margins": 0.21849961578845978, |
|
"rewards/rejected": -0.16949808597564697, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.2025316455696204, |
|
"grad_norm": 1992030.3917670588, |
|
"learning_rate": 4.255719210278909e-07, |
|
"logits/chosen": -7.349759101867676, |
|
"logits/rejected": -7.380797386169434, |
|
"logps/chosen": -175.21702575683594, |
|
"logps/rejected": -396.2167053222656, |
|
"loss": 67021.875, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.05283821374177933, |
|
"rewards/margins": 0.22190704941749573, |
|
"rewards/rejected": -0.169068843126297, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.253164556962025, |
|
"grad_norm": 1859879.670487208, |
|
"learning_rate": 4.2400501410216235e-07, |
|
"logits/chosen": -7.482248783111572, |
|
"logits/rejected": -7.252910614013672, |
|
"logps/chosen": -187.070556640625, |
|
"logps/rejected": -401.1556701660156, |
|
"loss": 68463.9, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.05697192624211311, |
|
"rewards/margins": 0.21645841002464294, |
|
"rewards/rejected": -0.15948647260665894, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.30379746835443, |
|
"grad_norm": 1688181.1410657803, |
|
"learning_rate": 4.224381071764337e-07, |
|
"logits/chosen": -5.693742275238037, |
|
"logits/rejected": -5.435591697692871, |
|
"logps/chosen": -198.21900939941406, |
|
"logps/rejected": -398.49981689453125, |
|
"loss": 67266.2, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.04546400159597397, |
|
"rewards/margins": 0.20465342700481415, |
|
"rewards/rejected": -0.15918943285942078, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.3544303797468356, |
|
"grad_norm": 1750431.6432656392, |
|
"learning_rate": 4.208712002507051e-07, |
|
"logits/chosen": -8.664016723632812, |
|
"logits/rejected": -8.082508087158203, |
|
"logps/chosen": -178.05966186523438, |
|
"logps/rejected": -402.77093505859375, |
|
"loss": 65760.2625, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.056066203862428665, |
|
"rewards/margins": 0.22950176894664764, |
|
"rewards/rejected": -0.17343556880950928, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.405063291139241, |
|
"grad_norm": 1904336.610304837, |
|
"learning_rate": 4.193042933249765e-07, |
|
"logits/chosen": -5.778517723083496, |
|
"logits/rejected": -5.432709693908691, |
|
"logps/chosen": -176.563720703125, |
|
"logps/rejected": -379.2276916503906, |
|
"loss": 67058.1125, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.05091014504432678, |
|
"rewards/margins": 0.2058809995651245, |
|
"rewards/rejected": -0.15497085452079773, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.455696202531645, |
|
"grad_norm": 1779397.1811982268, |
|
"learning_rate": 4.177373863992479e-07, |
|
"logits/chosen": -6.937778472900391, |
|
"logits/rejected": -6.611588954925537, |
|
"logps/chosen": -180.23001098632812, |
|
"logps/rejected": -400.9800720214844, |
|
"loss": 67019.0875, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.05085798352956772, |
|
"rewards/margins": 0.2235671728849411, |
|
"rewards/rejected": -0.17270918190479279, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.506329113924051, |
|
"grad_norm": 1755630.994265544, |
|
"learning_rate": 4.1617047947351925e-07, |
|
"logits/chosen": -6.663479804992676, |
|
"logits/rejected": -6.144991397857666, |
|
"logps/chosen": -189.93707275390625, |
|
"logps/rejected": -383.9622802734375, |
|
"loss": 66060.8813, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.053109876811504364, |
|
"rewards/margins": 0.20497091114521027, |
|
"rewards/rejected": -0.1518610268831253, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.556962025316456, |
|
"grad_norm": 1729683.010514938, |
|
"learning_rate": 4.1460357254779067e-07, |
|
"logits/chosen": -7.10635232925415, |
|
"logits/rejected": -7.227837562561035, |
|
"logps/chosen": -184.3021240234375, |
|
"logps/rejected": -391.59930419921875, |
|
"loss": 67231.6313, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.050502438098192215, |
|
"rewards/margins": 0.20674797892570496, |
|
"rewards/rejected": -0.15624557435512543, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.6075949367088604, |
|
"grad_norm": 1921064.671845176, |
|
"learning_rate": 4.13036665622062e-07, |
|
"logits/chosen": -7.409733772277832, |
|
"logits/rejected": -7.2668256759643555, |
|
"logps/chosen": -184.89645385742188, |
|
"logps/rejected": -395.2364501953125, |
|
"loss": 67370.1875, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.047733135521411896, |
|
"rewards/margins": 0.2108074128627777, |
|
"rewards/rejected": -0.1630742847919464, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.658227848101266, |
|
"grad_norm": 1780170.6356310213, |
|
"learning_rate": 4.1146975869633344e-07, |
|
"logits/chosen": -8.294339179992676, |
|
"logits/rejected": -8.312765121459961, |
|
"logps/chosen": -185.74949645996094, |
|
"logps/rejected": -405.0606689453125, |
|
"loss": 64484.2438, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.05801473185420036, |
|
"rewards/margins": 0.21365991234779358, |
|
"rewards/rejected": -0.15564517676830292, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.708860759493671, |
|
"grad_norm": 1755118.627079852, |
|
"learning_rate": 4.099028517706048e-07, |
|
"logits/chosen": -8.692441940307617, |
|
"logits/rejected": -8.729148864746094, |
|
"logps/chosen": -177.8703155517578, |
|
"logps/rejected": -410.15179443359375, |
|
"loss": 65960.6812, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.061922211199998856, |
|
"rewards/margins": 0.2333444058895111, |
|
"rewards/rejected": -0.17142215371131897, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 4.759493670886076, |
|
"grad_norm": 1801666.0452341542, |
|
"learning_rate": 4.083359448448762e-07, |
|
"logits/chosen": -8.838138580322266, |
|
"logits/rejected": -8.679426193237305, |
|
"logps/chosen": -160.35488891601562, |
|
"logps/rejected": -387.3427429199219, |
|
"loss": 65957.3, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.061734091490507126, |
|
"rewards/margins": 0.2303626835346222, |
|
"rewards/rejected": -0.16862855851650238, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.810126582278481, |
|
"grad_norm": 1823914.1164093877, |
|
"learning_rate": 4.0676903791914757e-07, |
|
"logits/chosen": -8.039133071899414, |
|
"logits/rejected": -8.235550880432129, |
|
"logps/chosen": -181.90818786621094, |
|
"logps/rejected": -390.46075439453125, |
|
"loss": 65100.0437, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.05453425645828247, |
|
"rewards/margins": 0.20622405409812927, |
|
"rewards/rejected": -0.1516897976398468, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.860759493670886, |
|
"grad_norm": 2552504.752187401, |
|
"learning_rate": 4.05202130993419e-07, |
|
"logits/chosen": -8.228861808776855, |
|
"logits/rejected": -8.044200897216797, |
|
"logps/chosen": -175.62306213378906, |
|
"logps/rejected": -387.7801818847656, |
|
"loss": 65251.5563, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.05643890053033829, |
|
"rewards/margins": 0.2162017822265625, |
|
"rewards/rejected": -0.15976287424564362, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 4.911392405063291, |
|
"grad_norm": 2112562.829549655, |
|
"learning_rate": 4.0363522406769034e-07, |
|
"logits/chosen": -8.678482055664062, |
|
"logits/rejected": -8.680012702941895, |
|
"logps/chosen": -180.9581298828125, |
|
"logps/rejected": -402.48944091796875, |
|
"loss": 65731.7188, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.05988938361406326, |
|
"rewards/margins": 0.22270476818084717, |
|
"rewards/rejected": -0.1628153920173645, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 4.962025316455696, |
|
"grad_norm": 1800725.2761679955, |
|
"learning_rate": 4.0206831714196175e-07, |
|
"logits/chosen": -9.068916320800781, |
|
"logits/rejected": -8.908533096313477, |
|
"logps/chosen": -191.30018615722656, |
|
"logps/rejected": -433.2850036621094, |
|
"loss": 64987.5125, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.0664498582482338, |
|
"rewards/margins": 0.24509286880493164, |
|
"rewards/rejected": -0.17864301800727844, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 5.012658227848101, |
|
"grad_norm": 1442340.8531233447, |
|
"learning_rate": 4.005014102162331e-07, |
|
"logits/chosen": -7.928460121154785, |
|
"logits/rejected": -7.941502571105957, |
|
"logps/chosen": -175.59664916992188, |
|
"logps/rejected": -406.7601623535156, |
|
"loss": 62010.275, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.06751301139593124, |
|
"rewards/margins": 0.23539571464061737, |
|
"rewards/rejected": -0.16788268089294434, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 5.063291139240507, |
|
"grad_norm": 1557498.8859861568, |
|
"learning_rate": 3.989345032905045e-07, |
|
"logits/chosen": -7.7452850341796875, |
|
"logits/rejected": -8.02453899383545, |
|
"logps/chosen": -154.46292114257812, |
|
"logps/rejected": -469.1910095214844, |
|
"loss": 49347.1687, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.08384937047958374, |
|
"rewards/margins": 0.31221631169319153, |
|
"rewards/rejected": -0.2283669412136078, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.113924050632911, |
|
"grad_norm": 1581238.5613807905, |
|
"learning_rate": 3.973675963647759e-07, |
|
"logits/chosen": -7.881131649017334, |
|
"logits/rejected": -7.651412010192871, |
|
"logps/chosen": -169.71153259277344, |
|
"logps/rejected": -476.58477783203125, |
|
"loss": 49390.7562, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.08512581884860992, |
|
"rewards/margins": 0.3120972514152527, |
|
"rewards/rejected": -0.22697141766548157, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 5.1645569620253164, |
|
"grad_norm": 1497324.3970905554, |
|
"learning_rate": 3.958006894390473e-07, |
|
"logits/chosen": -6.736274719238281, |
|
"logits/rejected": -6.750421047210693, |
|
"logps/chosen": -151.04129028320312, |
|
"logps/rejected": -459.47808837890625, |
|
"loss": 49656.7812, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.07378469407558441, |
|
"rewards/margins": 0.3127291798591614, |
|
"rewards/rejected": -0.23894445598125458, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 5.215189873417722, |
|
"grad_norm": 1898671.7222835466, |
|
"learning_rate": 3.942337825133187e-07, |
|
"logits/chosen": -7.030360221862793, |
|
"logits/rejected": -6.9101104736328125, |
|
"logps/chosen": -168.35183715820312, |
|
"logps/rejected": -469.60235595703125, |
|
"loss": 49247.5312, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.08571706712245941, |
|
"rewards/margins": 0.3044472634792328, |
|
"rewards/rejected": -0.21873018145561218, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 5.265822784810126, |
|
"grad_norm": 1859831.3291458376, |
|
"learning_rate": 3.926668755875901e-07, |
|
"logits/chosen": -6.842263698577881, |
|
"logits/rejected": -6.943556308746338, |
|
"logps/chosen": -153.25328063964844, |
|
"logps/rejected": -473.513427734375, |
|
"loss": 51145.4938, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.08420612663030624, |
|
"rewards/margins": 0.3194884657859802, |
|
"rewards/rejected": -0.235282301902771, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 5.3164556962025316, |
|
"grad_norm": 1855378.6614461695, |
|
"learning_rate": 3.910999686618615e-07, |
|
"logits/chosen": -7.331165313720703, |
|
"logits/rejected": -7.468164920806885, |
|
"logps/chosen": -162.1797637939453, |
|
"logps/rejected": -474.08074951171875, |
|
"loss": 50799.1687, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.0886077731847763, |
|
"rewards/margins": 0.31340503692626953, |
|
"rewards/rejected": -0.22479727864265442, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.367088607594937, |
|
"grad_norm": 1600231.8694471747, |
|
"learning_rate": 3.895330617361329e-07, |
|
"logits/chosen": -7.2842841148376465, |
|
"logits/rejected": -7.146345615386963, |
|
"logps/chosen": -140.54055786132812, |
|
"logps/rejected": -446.4241638183594, |
|
"loss": 49384.9875, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.08458932489156723, |
|
"rewards/margins": 0.3061215877532959, |
|
"rewards/rejected": -0.22153222560882568, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 5.417721518987342, |
|
"grad_norm": 1820648.707460815, |
|
"learning_rate": 3.8796615481040425e-07, |
|
"logits/chosen": -7.4867706298828125, |
|
"logits/rejected": -7.318013668060303, |
|
"logps/chosen": -162.54937744140625, |
|
"logps/rejected": -469.13433837890625, |
|
"loss": 48744.0469, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.0876765102148056, |
|
"rewards/margins": 0.31078898906707764, |
|
"rewards/rejected": -0.22311246395111084, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.468354430379747, |
|
"grad_norm": 1629981.2772913359, |
|
"learning_rate": 3.8639924788467566e-07, |
|
"logits/chosen": -8.141877174377441, |
|
"logits/rejected": -7.992497444152832, |
|
"logps/chosen": -151.8604736328125, |
|
"logps/rejected": -496.25201416015625, |
|
"loss": 46868.6719, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09172078222036362, |
|
"rewards/margins": 0.3495192527770996, |
|
"rewards/rejected": -0.257798433303833, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 5.518987341772152, |
|
"grad_norm": 1843259.5793917184, |
|
"learning_rate": 3.84832340958947e-07, |
|
"logits/chosen": -7.577700614929199, |
|
"logits/rejected": -7.340989589691162, |
|
"logps/chosen": -152.68710327148438, |
|
"logps/rejected": -466.3287048339844, |
|
"loss": 48765.2375, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.08904045075178146, |
|
"rewards/margins": 0.31981557607650757, |
|
"rewards/rejected": -0.2307751476764679, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 5.569620253164557, |
|
"grad_norm": 1848670.003471961, |
|
"learning_rate": 3.8326543403321843e-07, |
|
"logits/chosen": -5.992789268493652, |
|
"logits/rejected": -5.831528663635254, |
|
"logps/chosen": -131.7107696533203, |
|
"logps/rejected": -433.0040588378906, |
|
"loss": 48441.2188, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.08974520117044449, |
|
"rewards/margins": 0.2995590269565582, |
|
"rewards/rejected": -0.20981380343437195, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.620253164556962, |
|
"grad_norm": 1834994.3527284127, |
|
"learning_rate": 3.816985271074898e-07, |
|
"logits/chosen": -6.8782501220703125, |
|
"logits/rejected": -7.123211860656738, |
|
"logps/chosen": -143.1776885986328, |
|
"logps/rejected": -439.9363708496094, |
|
"loss": 50301.1625, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.0787430927157402, |
|
"rewards/margins": 0.29441121220588684, |
|
"rewards/rejected": -0.21566812694072723, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 5.670886075949367, |
|
"grad_norm": 2055858.9168272892, |
|
"learning_rate": 3.801316201817612e-07, |
|
"logits/chosen": -7.6317338943481445, |
|
"logits/rejected": -7.619107723236084, |
|
"logps/chosen": -152.3334503173828, |
|
"logps/rejected": -453.30120849609375, |
|
"loss": 49359.2312, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.0867711529135704, |
|
"rewards/margins": 0.2968466281890869, |
|
"rewards/rejected": -0.2100754976272583, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 5.7215189873417724, |
|
"grad_norm": 1760917.726879333, |
|
"learning_rate": 3.7856471325603256e-07, |
|
"logits/chosen": -6.669379234313965, |
|
"logits/rejected": -6.568717002868652, |
|
"logps/chosen": -152.34774780273438, |
|
"logps/rejected": -439.8075256347656, |
|
"loss": 48808.2812, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.08005286753177643, |
|
"rewards/margins": 0.28860196471214294, |
|
"rewards/rejected": -0.20854909718036652, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 5.772151898734177, |
|
"grad_norm": 1793917.574084858, |
|
"learning_rate": 3.76997806330304e-07, |
|
"logits/chosen": -7.020206451416016, |
|
"logits/rejected": -6.4513840675354, |
|
"logps/chosen": -126.99436950683594, |
|
"logps/rejected": -429.0069274902344, |
|
"loss": 48991.9938, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.08981131762266159, |
|
"rewards/margins": 0.3046417832374573, |
|
"rewards/rejected": -0.21483047306537628, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 5.822784810126582, |
|
"grad_norm": 1856995.4726512374, |
|
"learning_rate": 3.7543089940457533e-07, |
|
"logits/chosen": -7.1540846824646, |
|
"logits/rejected": -7.103608131408691, |
|
"logps/chosen": -150.0362548828125, |
|
"logps/rejected": -459.3680114746094, |
|
"loss": 45240.3094, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.08858338743448257, |
|
"rewards/margins": 0.3066866397857666, |
|
"rewards/rejected": -0.21810325980186462, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 5.8734177215189876, |
|
"grad_norm": 2252812.5376150296, |
|
"learning_rate": 3.7386399247884675e-07, |
|
"logits/chosen": -6.23285436630249, |
|
"logits/rejected": -5.795694351196289, |
|
"logps/chosen": -145.6466827392578, |
|
"logps/rejected": -485.41229248046875, |
|
"loss": 46892.1625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09205026924610138, |
|
"rewards/margins": 0.34098342061042786, |
|
"rewards/rejected": -0.24893316626548767, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 5.924050632911392, |
|
"grad_norm": 1669143.1623524264, |
|
"learning_rate": 3.722970855531181e-07, |
|
"logits/chosen": -7.314904689788818, |
|
"logits/rejected": -7.455816745758057, |
|
"logps/chosen": -133.58151245117188, |
|
"logps/rejected": -482.9154357910156, |
|
"loss": 46493.0938, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.09256922453641891, |
|
"rewards/margins": 0.34824666380882263, |
|
"rewards/rejected": -0.2556774616241455, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 5.974683544303797, |
|
"grad_norm": 1914279.6891733713, |
|
"learning_rate": 3.707301786273895e-07, |
|
"logits/chosen": -6.429854393005371, |
|
"logits/rejected": -5.985020160675049, |
|
"logps/chosen": -142.39651489257812, |
|
"logps/rejected": -442.7286682128906, |
|
"loss": 47640.0813, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.08776311576366425, |
|
"rewards/margins": 0.30018630623817444, |
|
"rewards/rejected": -0.2124231606721878, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 6.025316455696203, |
|
"grad_norm": 1287103.6124582873, |
|
"learning_rate": 3.691632717016609e-07, |
|
"logits/chosen": -6.58931827545166, |
|
"logits/rejected": -6.494097709655762, |
|
"logps/chosen": -136.68003845214844, |
|
"logps/rejected": -493.61822509765625, |
|
"loss": 41587.3125, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.10335598886013031, |
|
"rewards/margins": 0.36172229051589966, |
|
"rewards/rejected": -0.25836625695228577, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 6.075949367088608, |
|
"grad_norm": 1654691.3160849167, |
|
"learning_rate": 3.675963647759323e-07, |
|
"logits/chosen": -5.342609882354736, |
|
"logits/rejected": -5.393660545349121, |
|
"logps/chosen": -116.93675231933594, |
|
"logps/rejected": -476.22833251953125, |
|
"loss": 38118.9437, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.10985767841339111, |
|
"rewards/margins": 0.3632175922393799, |
|
"rewards/rejected": -0.25335997343063354, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.1265822784810124, |
|
"grad_norm": 1390108.9081190277, |
|
"learning_rate": 3.6602945785020365e-07, |
|
"logits/chosen": -5.185478687286377, |
|
"logits/rejected": -4.843894958496094, |
|
"logps/chosen": -128.81143188476562, |
|
"logps/rejected": -519.8304443359375, |
|
"loss": 36511.2875, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1154375821352005, |
|
"rewards/margins": 0.3926604092121124, |
|
"rewards/rejected": -0.27722278237342834, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 6.177215189873418, |
|
"grad_norm": 1502780.5568957475, |
|
"learning_rate": 3.644625509244751e-07, |
|
"logits/chosen": -4.163270473480225, |
|
"logits/rejected": -3.8083653450012207, |
|
"logps/chosen": -120.57966613769531, |
|
"logps/rejected": -497.63226318359375, |
|
"loss": 37966.2937, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11406160891056061, |
|
"rewards/margins": 0.37608999013900757, |
|
"rewards/rejected": -0.2620283365249634, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 6.227848101265823, |
|
"grad_norm": 1846607.9980803088, |
|
"learning_rate": 3.6289564399874647e-07, |
|
"logits/chosen": -4.317009925842285, |
|
"logits/rejected": -4.062619209289551, |
|
"logps/chosen": -112.0468521118164, |
|
"logps/rejected": -490.73974609375, |
|
"loss": 36750.4688, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.11853437125682831, |
|
"rewards/margins": 0.37694281339645386, |
|
"rewards/rejected": -0.25840842723846436, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 6.2784810126582276, |
|
"grad_norm": 1432477.9223833755, |
|
"learning_rate": 3.613287370730179e-07, |
|
"logits/chosen": -4.580340385437012, |
|
"logits/rejected": -4.493284225463867, |
|
"logps/chosen": -123.97422790527344, |
|
"logps/rejected": -509.47076416015625, |
|
"loss": 37540.4875, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.11854572594165802, |
|
"rewards/margins": 0.38835546374320984, |
|
"rewards/rejected": -0.2698097229003906, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 6.329113924050633, |
|
"grad_norm": 1551602.6793086384, |
|
"learning_rate": 3.5976183014728924e-07, |
|
"logits/chosen": -3.541313886642456, |
|
"logits/rejected": -3.6754157543182373, |
|
"logps/chosen": -120.3751220703125, |
|
"logps/rejected": -483.46221923828125, |
|
"loss": 35927.6062, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.10841184854507446, |
|
"rewards/margins": 0.3652178645133972, |
|
"rewards/rejected": -0.25680604577064514, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.379746835443038, |
|
"grad_norm": 1628016.050343189, |
|
"learning_rate": 3.5819492322156066e-07, |
|
"logits/chosen": -3.570946216583252, |
|
"logits/rejected": -3.6950716972351074, |
|
"logps/chosen": -134.7080535888672, |
|
"logps/rejected": -500.80108642578125, |
|
"loss": 36467.1375, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1038375124335289, |
|
"rewards/margins": 0.36301389336586, |
|
"rewards/rejected": -0.2591763734817505, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 6.430379746835443, |
|
"grad_norm": 1416336.114974791, |
|
"learning_rate": 3.56628016295832e-07, |
|
"logits/chosen": -2.9958808422088623, |
|
"logits/rejected": -3.158600330352783, |
|
"logps/chosen": -120.319580078125, |
|
"logps/rejected": -493.46075439453125, |
|
"loss": 35704.05, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.11720545589923859, |
|
"rewards/margins": 0.3729427754878998, |
|
"rewards/rejected": -0.2557373046875, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 6.481012658227848, |
|
"grad_norm": 1429276.465119334, |
|
"learning_rate": 3.5506110937010343e-07, |
|
"logits/chosen": -5.23915958404541, |
|
"logits/rejected": -5.513189792633057, |
|
"logps/chosen": -106.6229476928711, |
|
"logps/rejected": -512.9346923828125, |
|
"loss": 37476.4688, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1187194362282753, |
|
"rewards/margins": 0.4039131700992584, |
|
"rewards/rejected": -0.2851937413215637, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 6.531645569620253, |
|
"grad_norm": 1838991.6289765981, |
|
"learning_rate": 3.534942024443748e-07, |
|
"logits/chosen": -3.1320407390594482, |
|
"logits/rejected": -3.531493663787842, |
|
"logps/chosen": -114.69315338134766, |
|
"logps/rejected": -521.70458984375, |
|
"loss": 37236.3688, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12156815826892853, |
|
"rewards/margins": 0.39552414417266846, |
|
"rewards/rejected": -0.2739560008049011, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 6.582278481012658, |
|
"grad_norm": 1965294.5428377022, |
|
"learning_rate": 3.519272955186462e-07, |
|
"logits/chosen": -3.1404528617858887, |
|
"logits/rejected": -3.159364938735962, |
|
"logps/chosen": -108.1359634399414, |
|
"logps/rejected": -441.573486328125, |
|
"loss": 35760.8688, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.10433737188577652, |
|
"rewards/margins": 0.3334364593029022, |
|
"rewards/rejected": -0.2290991097688675, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.632911392405063, |
|
"grad_norm": 1744782.725381992, |
|
"learning_rate": 3.5036038859291756e-07, |
|
"logits/chosen": -5.149240970611572, |
|
"logits/rejected": -4.872938632965088, |
|
"logps/chosen": -110.17635345458984, |
|
"logps/rejected": -462.6591796875, |
|
"loss": 38854.3313, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.10797703266143799, |
|
"rewards/margins": 0.35402077436447144, |
|
"rewards/rejected": -0.24604372680187225, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 6.6835443037974684, |
|
"grad_norm": 1449584.094036676, |
|
"learning_rate": 3.4879348166718897e-07, |
|
"logits/chosen": -5.302030086517334, |
|
"logits/rejected": -5.005532264709473, |
|
"logps/chosen": -114.39412689208984, |
|
"logps/rejected": -497.2879943847656, |
|
"loss": 37031.9281, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.11388063430786133, |
|
"rewards/margins": 0.38410684466362, |
|
"rewards/rejected": -0.27022621035575867, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 6.734177215189874, |
|
"grad_norm": 1655726.3529691189, |
|
"learning_rate": 3.4722657474146033e-07, |
|
"logits/chosen": -5.846579074859619, |
|
"logits/rejected": -5.164810657501221, |
|
"logps/chosen": -122.16035461425781, |
|
"logps/rejected": -490.97503662109375, |
|
"loss": 35881.3438, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11242518573999405, |
|
"rewards/margins": 0.3698340058326721, |
|
"rewards/rejected": -0.2574087679386139, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 6.784810126582278, |
|
"grad_norm": 1473850.8586688952, |
|
"learning_rate": 3.4565966781573174e-07, |
|
"logits/chosen": -6.604684352874756, |
|
"logits/rejected": -6.540472984313965, |
|
"logps/chosen": -141.56655883789062, |
|
"logps/rejected": -504.536865234375, |
|
"loss": 35791.1937, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.11088699102401733, |
|
"rewards/margins": 0.36103492975234985, |
|
"rewards/rejected": -0.2501479685306549, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 6.8354430379746836, |
|
"grad_norm": 1716575.4855753484, |
|
"learning_rate": 3.440927608900031e-07, |
|
"logits/chosen": -5.3845696449279785, |
|
"logits/rejected": -5.094508647918701, |
|
"logps/chosen": -126.5009536743164, |
|
"logps/rejected": -501.36407470703125, |
|
"loss": 36855.7281, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.12023582309484482, |
|
"rewards/margins": 0.3794700503349304, |
|
"rewards/rejected": -0.2592342793941498, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 6.886075949367089, |
|
"grad_norm": 1860603.9086510486, |
|
"learning_rate": 3.425258539642745e-07, |
|
"logits/chosen": -5.825100898742676, |
|
"logits/rejected": -5.165715217590332, |
|
"logps/chosen": -123.0651626586914, |
|
"logps/rejected": -519.5916748046875, |
|
"loss": 37158.7969, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11998645961284637, |
|
"rewards/margins": 0.40252119302749634, |
|
"rewards/rejected": -0.28253474831581116, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 6.936708860759493, |
|
"grad_norm": 1781429.39957367, |
|
"learning_rate": 3.4095894703854587e-07, |
|
"logits/chosen": -5.593798637390137, |
|
"logits/rejected": -5.400781631469727, |
|
"logps/chosen": -122.57585144042969, |
|
"logps/rejected": -500.21844482421875, |
|
"loss": 36281.8938, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11947381496429443, |
|
"rewards/margins": 0.377518892288208, |
|
"rewards/rejected": -0.25804510712623596, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 6.987341772151899, |
|
"grad_norm": 1883344.192547866, |
|
"learning_rate": 3.393920401128173e-07, |
|
"logits/chosen": -5.272061347961426, |
|
"logits/rejected": -5.000374794006348, |
|
"logps/chosen": -109.66764831542969, |
|
"logps/rejected": -471.388916015625, |
|
"loss": 37081.4062, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.11030924320220947, |
|
"rewards/margins": 0.36379513144493103, |
|
"rewards/rejected": -0.25348588824272156, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 7.037974683544304, |
|
"grad_norm": 1158283.9951295503, |
|
"learning_rate": 3.3782513318708864e-07, |
|
"logits/chosen": -4.4635396003723145, |
|
"logits/rejected": -4.055373668670654, |
|
"logps/chosen": -126.25242614746094, |
|
"logps/rejected": -513.0021362304688, |
|
"loss": 32182.2562, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.12364669889211655, |
|
"rewards/margins": 0.39015716314315796, |
|
"rewards/rejected": -0.2665104568004608, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 7.0886075949367084, |
|
"grad_norm": 1635336.0000705447, |
|
"learning_rate": 3.3625822626136005e-07, |
|
"logits/chosen": -3.2711379528045654, |
|
"logits/rejected": -2.849708080291748, |
|
"logps/chosen": -120.3502426147461, |
|
"logps/rejected": -554.61669921875, |
|
"loss": 28154.0125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1400633156299591, |
|
"rewards/margins": 0.4437219500541687, |
|
"rewards/rejected": -0.3036586344242096, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.139240506329114, |
|
"grad_norm": 1478880.6175367055, |
|
"learning_rate": 3.346913193356314e-07, |
|
"logits/chosen": -1.498684048652649, |
|
"logits/rejected": -1.5719478130340576, |
|
"logps/chosen": -97.41731262207031, |
|
"logps/rejected": -528.29833984375, |
|
"loss": 30443.8531, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.13250485062599182, |
|
"rewards/margins": 0.4276755452156067, |
|
"rewards/rejected": -0.29517072439193726, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 7.189873417721519, |
|
"grad_norm": 1190966.9261622827, |
|
"learning_rate": 3.331244124099029e-07, |
|
"logits/chosen": -3.576815366744995, |
|
"logits/rejected": -3.1508662700653076, |
|
"logps/chosen": -92.4610595703125, |
|
"logps/rejected": -499.2225646972656, |
|
"loss": 30200.7656, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1342589408159256, |
|
"rewards/margins": 0.40714582800865173, |
|
"rewards/rejected": -0.2728869318962097, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 7.2405063291139244, |
|
"grad_norm": 1654460.4321586012, |
|
"learning_rate": 3.3155750548417424e-07, |
|
"logits/chosen": -3.6517982482910156, |
|
"logits/rejected": -2.912386894226074, |
|
"logps/chosen": -113.77073669433594, |
|
"logps/rejected": -548.2919921875, |
|
"loss": 29291.1719, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.13462531566619873, |
|
"rewards/margins": 0.435891717672348, |
|
"rewards/rejected": -0.3012663722038269, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 7.291139240506329, |
|
"grad_norm": 1547048.8074025025, |
|
"learning_rate": 3.2999059855844565e-07, |
|
"logits/chosen": -4.762998580932617, |
|
"logits/rejected": -4.417517185211182, |
|
"logps/chosen": -103.59019470214844, |
|
"logps/rejected": -516.0870361328125, |
|
"loss": 30597.95, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1253672093153, |
|
"rewards/margins": 0.4090943932533264, |
|
"rewards/rejected": -0.28372713923454285, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 7.341772151898734, |
|
"grad_norm": 1083334.846955902, |
|
"learning_rate": 3.28423691632717e-07, |
|
"logits/chosen": -4.341902732849121, |
|
"logits/rejected": -3.4809889793395996, |
|
"logps/chosen": -105.1113052368164, |
|
"logps/rejected": -537.7858276367188, |
|
"loss": 28933.9125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.139817476272583, |
|
"rewards/margins": 0.4371423125267029, |
|
"rewards/rejected": -0.2973248362541199, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 7.3924050632911396, |
|
"grad_norm": 1583721.4157786674, |
|
"learning_rate": 3.268567847069884e-07, |
|
"logits/chosen": -5.8856353759765625, |
|
"logits/rejected": -5.3746867179870605, |
|
"logps/chosen": -94.76522827148438, |
|
"logps/rejected": -525.3110961914062, |
|
"loss": 29575.7844, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.13582661747932434, |
|
"rewards/margins": 0.4354213774204254, |
|
"rewards/rejected": -0.29959478974342346, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 7.443037974683544, |
|
"grad_norm": 1391896.6733071958, |
|
"learning_rate": 3.252898777812598e-07, |
|
"logits/chosen": -3.2749342918395996, |
|
"logits/rejected": -3.6061177253723145, |
|
"logps/chosen": -99.21089172363281, |
|
"logps/rejected": -534.4422607421875, |
|
"loss": 29207.5719, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1312985122203827, |
|
"rewards/margins": 0.433136522769928, |
|
"rewards/rejected": -0.3018379807472229, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 7.493670886075949, |
|
"grad_norm": 1294960.5242478126, |
|
"learning_rate": 3.237229708555312e-07, |
|
"logits/chosen": -2.985567808151245, |
|
"logits/rejected": -1.8726612329483032, |
|
"logps/chosen": -112.32755279541016, |
|
"logps/rejected": -509.37286376953125, |
|
"loss": 29187.1594, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1289207637310028, |
|
"rewards/margins": 0.4079267978668213, |
|
"rewards/rejected": -0.27900606393814087, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 7.544303797468355, |
|
"grad_norm": 1193173.6877739348, |
|
"learning_rate": 3.2215606392980255e-07, |
|
"logits/chosen": -2.0656161308288574, |
|
"logits/rejected": -2.3443799018859863, |
|
"logps/chosen": -97.64754486083984, |
|
"logps/rejected": -511.40576171875, |
|
"loss": 29322.4313, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.13589712977409363, |
|
"rewards/margins": 0.413860946893692, |
|
"rewards/rejected": -0.2779638171195984, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 7.594936708860759, |
|
"grad_norm": 1279108.0637389964, |
|
"learning_rate": 3.2058915700407396e-07, |
|
"logits/chosen": -3.5005557537078857, |
|
"logits/rejected": -3.4204413890838623, |
|
"logps/chosen": -107.39742279052734, |
|
"logps/rejected": -530.2638549804688, |
|
"loss": 27542.3625, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.13995657861232758, |
|
"rewards/margins": 0.42647701501846313, |
|
"rewards/rejected": -0.28652042150497437, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.6455696202531644, |
|
"grad_norm": 2707102.044355496, |
|
"learning_rate": 3.190222500783453e-07, |
|
"logits/chosen": -4.715664863586426, |
|
"logits/rejected": -4.245431900024414, |
|
"logps/chosen": -101.01532745361328, |
|
"logps/rejected": -561.7377319335938, |
|
"loss": 29571.3625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14493677020072937, |
|
"rewards/margins": 0.4646069407463074, |
|
"rewards/rejected": -0.3196701109409332, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 7.69620253164557, |
|
"grad_norm": 1346703.2802720347, |
|
"learning_rate": 3.1745534315261674e-07, |
|
"logits/chosen": -2.4094414710998535, |
|
"logits/rejected": -2.316082715988159, |
|
"logps/chosen": -90.64556121826172, |
|
"logps/rejected": -524.6895751953125, |
|
"loss": 29962.2875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1430484652519226, |
|
"rewards/margins": 0.4339544177055359, |
|
"rewards/rejected": -0.2909059524536133, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 7.746835443037975, |
|
"grad_norm": 1570681.8076612286, |
|
"learning_rate": 3.158884362268881e-07, |
|
"logits/chosen": -1.977839708328247, |
|
"logits/rejected": -1.748456597328186, |
|
"logps/chosen": -95.17073822021484, |
|
"logps/rejected": -536.3465576171875, |
|
"loss": 29005.075, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.13247540593147278, |
|
"rewards/margins": 0.44195109605789185, |
|
"rewards/rejected": -0.3094756603240967, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 7.7974683544303796, |
|
"grad_norm": 1321655.562082779, |
|
"learning_rate": 3.143215293011595e-07, |
|
"logits/chosen": -5.75424861907959, |
|
"logits/rejected": -5.283251762390137, |
|
"logps/chosen": -109.5367202758789, |
|
"logps/rejected": -538.626220703125, |
|
"loss": 29057.1688, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.14621947705745697, |
|
"rewards/margins": 0.43537068367004395, |
|
"rewards/rejected": -0.2891511619091034, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 7.848101265822785, |
|
"grad_norm": 1360253.1191038797, |
|
"learning_rate": 3.1275462237543087e-07, |
|
"logits/chosen": -3.4590229988098145, |
|
"logits/rejected": -3.5962212085723877, |
|
"logps/chosen": -114.27938079833984, |
|
"logps/rejected": -566.5555419921875, |
|
"loss": 29716.3094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1347774863243103, |
|
"rewards/margins": 0.44886675477027893, |
|
"rewards/rejected": -0.314089298248291, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 7.89873417721519, |
|
"grad_norm": 1269167.0621019504, |
|
"learning_rate": 3.111877154497023e-07, |
|
"logits/chosen": -1.0884647369384766, |
|
"logits/rejected": -0.7194244265556335, |
|
"logps/chosen": -89.07111358642578, |
|
"logps/rejected": -494.15789794921875, |
|
"loss": 29335.9875, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1284293383359909, |
|
"rewards/margins": 0.4071559011936188, |
|
"rewards/rejected": -0.2787265181541443, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 7.949367088607595, |
|
"grad_norm": 1453875.4579149496, |
|
"learning_rate": 3.0962080852397364e-07, |
|
"logits/chosen": -2.750883102416992, |
|
"logits/rejected": -3.123683452606201, |
|
"logps/chosen": -98.0600357055664, |
|
"logps/rejected": -508.206298828125, |
|
"loss": 29392.4875, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.13056252896785736, |
|
"rewards/margins": 0.4083867073059082, |
|
"rewards/rejected": -0.2778242230415344, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1764041.9454831716, |
|
"learning_rate": 3.0805390159824505e-07, |
|
"logits/chosen": -3.7020182609558105, |
|
"logits/rejected": -2.8675622940063477, |
|
"logps/chosen": -112.20640563964844, |
|
"logps/rejected": -527.1363525390625, |
|
"loss": 30214.225, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1370132714509964, |
|
"rewards/margins": 0.42148295044898987, |
|
"rewards/rejected": -0.2844696640968323, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 8.050632911392405, |
|
"grad_norm": 1502727.0577222395, |
|
"learning_rate": 3.064869946725164e-07, |
|
"logits/chosen": -2.0656542778015137, |
|
"logits/rejected": -1.5985521078109741, |
|
"logps/chosen": -84.60444641113281, |
|
"logps/rejected": -520.1857299804688, |
|
"loss": 24723.275, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1492975652217865, |
|
"rewards/margins": 0.4404692053794861, |
|
"rewards/rejected": -0.2911716103553772, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 8.10126582278481, |
|
"grad_norm": 838369.9468876831, |
|
"learning_rate": 3.049200877467878e-07, |
|
"logits/chosen": -1.758178949356079, |
|
"logits/rejected": -0.7727742791175842, |
|
"logps/chosen": -83.45867919921875, |
|
"logps/rejected": -530.3883666992188, |
|
"loss": 25817.0203, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14538443088531494, |
|
"rewards/margins": 0.45367687940597534, |
|
"rewards/rejected": -0.3082924485206604, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.151898734177216, |
|
"grad_norm": 1012852.54550217, |
|
"learning_rate": 3.0335318082105923e-07, |
|
"logits/chosen": -2.217496156692505, |
|
"logits/rejected": -2.0143866539001465, |
|
"logps/chosen": -100.38580322265625, |
|
"logps/rejected": -549.8438720703125, |
|
"loss": 25090.8891, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.13634233176708221, |
|
"rewards/margins": 0.44348135590553284, |
|
"rewards/rejected": -0.30713900923728943, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 8.20253164556962, |
|
"grad_norm": 1056784.1797241461, |
|
"learning_rate": 3.0178627389533064e-07, |
|
"logits/chosen": -1.1953948736190796, |
|
"logits/rejected": -0.2751680910587311, |
|
"logps/chosen": -89.64523315429688, |
|
"logps/rejected": -510.4059143066406, |
|
"loss": 24456.725, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.14029642939567566, |
|
"rewards/margins": 0.4281511902809143, |
|
"rewards/rejected": -0.28785476088523865, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 8.253164556962025, |
|
"grad_norm": 1147595.1251004518, |
|
"learning_rate": 3.00219366969602e-07, |
|
"logits/chosen": -2.550518035888672, |
|
"logits/rejected": -2.5027434825897217, |
|
"logps/chosen": -76.6513442993164, |
|
"logps/rejected": -524.4201049804688, |
|
"loss": 23486.5594, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.15493164956569672, |
|
"rewards/margins": 0.44891220331192017, |
|
"rewards/rejected": -0.29398053884506226, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 8.30379746835443, |
|
"grad_norm": 1390175.0732444616, |
|
"learning_rate": 2.986524600438734e-07, |
|
"logits/chosen": -0.059876419603824615, |
|
"logits/rejected": 0.00422248849645257, |
|
"logps/chosen": -74.77996063232422, |
|
"logps/rejected": -544.7862548828125, |
|
"loss": 24176.6094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.151381716132164, |
|
"rewards/margins": 0.4694734215736389, |
|
"rewards/rejected": -0.3180916905403137, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 8.354430379746836, |
|
"grad_norm": 1846159.1203677754, |
|
"learning_rate": 2.970855531181448e-07, |
|
"logits/chosen": -3.206434726715088, |
|
"logits/rejected": -2.6545357704162598, |
|
"logps/chosen": -79.13458251953125, |
|
"logps/rejected": -529.1912841796875, |
|
"loss": 25560.5344, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.14862783253192902, |
|
"rewards/margins": 0.4489147663116455, |
|
"rewards/rejected": -0.3002868890762329, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 8.405063291139241, |
|
"grad_norm": 1294602.7153889702, |
|
"learning_rate": 2.955186461924162e-07, |
|
"logits/chosen": -1.0581172704696655, |
|
"logits/rejected": -0.6744507551193237, |
|
"logps/chosen": -78.69017028808594, |
|
"logps/rejected": -526.4840087890625, |
|
"loss": 25549.9125, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.14595063030719757, |
|
"rewards/margins": 0.44837069511413574, |
|
"rewards/rejected": -0.302420049905777, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 8.455696202531646, |
|
"grad_norm": 1653521.5239311927, |
|
"learning_rate": 2.9395173926668755e-07, |
|
"logits/chosen": -0.9036309123039246, |
|
"logits/rejected": -0.16554176807403564, |
|
"logps/chosen": -83.71012878417969, |
|
"logps/rejected": -525.7719116210938, |
|
"loss": 25089.5516, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14826878905296326, |
|
"rewards/margins": 0.4438709616661072, |
|
"rewards/rejected": -0.2956022024154663, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 8.50632911392405, |
|
"grad_norm": 1371497.4089594388, |
|
"learning_rate": 2.9238483234095896e-07, |
|
"logits/chosen": -1.423182725906372, |
|
"logits/rejected": -1.0717556476593018, |
|
"logps/chosen": -89.4638671875, |
|
"logps/rejected": -577.1199340820312, |
|
"loss": 24558.0953, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.15898647904396057, |
|
"rewards/margins": 0.48913446068763733, |
|
"rewards/rejected": -0.330147922039032, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 8.556962025316455, |
|
"grad_norm": 1476867.0955964676, |
|
"learning_rate": 2.908179254152303e-07, |
|
"logits/chosen": -3.2004425525665283, |
|
"logits/rejected": -2.7161200046539307, |
|
"logps/chosen": -86.7264633178711, |
|
"logps/rejected": -543.3889770507812, |
|
"loss": 26642.4781, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1485292911529541, |
|
"rewards/margins": 0.4551934599876404, |
|
"rewards/rejected": -0.3066641688346863, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 8.60759493670886, |
|
"grad_norm": 1134090.4892000444, |
|
"learning_rate": 2.8925101848950173e-07, |
|
"logits/chosen": -0.274528443813324, |
|
"logits/rejected": 0.4862538278102875, |
|
"logps/chosen": -79.16570281982422, |
|
"logps/rejected": -513.53173828125, |
|
"loss": 23741.9938, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.15034614503383636, |
|
"rewards/margins": 0.43597039580345154, |
|
"rewards/rejected": -0.28562426567077637, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 8.658227848101266, |
|
"grad_norm": 1314089.2981008843, |
|
"learning_rate": 2.876841115637731e-07, |
|
"logits/chosen": 0.6013806462287903, |
|
"logits/rejected": 1.2335985898971558, |
|
"logps/chosen": -90.46197509765625, |
|
"logps/rejected": -551.8345947265625, |
|
"loss": 24216.4281, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1541350781917572, |
|
"rewards/margins": 0.47102633118629456, |
|
"rewards/rejected": -0.3168913424015045, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 8.708860759493671, |
|
"grad_norm": 1622019.967143891, |
|
"learning_rate": 2.861172046380445e-07, |
|
"logits/chosen": 0.2407432496547699, |
|
"logits/rejected": 0.4264713227748871, |
|
"logps/chosen": -93.0431900024414, |
|
"logps/rejected": -564.0677490234375, |
|
"loss": 23649.3016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.147947758436203, |
|
"rewards/margins": 0.4662678837776184, |
|
"rewards/rejected": -0.3183201253414154, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 8.759493670886076, |
|
"grad_norm": 1520791.345848389, |
|
"learning_rate": 2.8455029771231586e-07, |
|
"logits/chosen": 0.6626393795013428, |
|
"logits/rejected": 0.7864507436752319, |
|
"logps/chosen": -94.95128631591797, |
|
"logps/rejected": -540.1358642578125, |
|
"loss": 25224.3125, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.14551883935928345, |
|
"rewards/margins": 0.4529417157173157, |
|
"rewards/rejected": -0.3074227571487427, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 8.810126582278482, |
|
"grad_norm": 1625465.2135884068, |
|
"learning_rate": 2.8298339078658727e-07, |
|
"logits/chosen": -0.07786345481872559, |
|
"logits/rejected": -0.031427524983882904, |
|
"logps/chosen": -90.72882843017578, |
|
"logps/rejected": -539.1676025390625, |
|
"loss": 24133.7531, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.15023007988929749, |
|
"rewards/margins": 0.4491490423679352, |
|
"rewards/rejected": -0.2989189624786377, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 8.860759493670885, |
|
"grad_norm": 1330490.8036484018, |
|
"learning_rate": 2.8141648386085863e-07, |
|
"logits/chosen": 0.1896178424358368, |
|
"logits/rejected": 1.3701179027557373, |
|
"logps/chosen": -78.11041259765625, |
|
"logps/rejected": -545.9954833984375, |
|
"loss": 24713.5375, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.15004639327526093, |
|
"rewards/margins": 0.4731353223323822, |
|
"rewards/rejected": -0.32308894395828247, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 8.91139240506329, |
|
"grad_norm": 1240332.5244059283, |
|
"learning_rate": 2.7984957693513004e-07, |
|
"logits/chosen": 0.09949211776256561, |
|
"logits/rejected": 0.6086061596870422, |
|
"logps/chosen": -84.04310607910156, |
|
"logps/rejected": -550.8171997070312, |
|
"loss": 24452.55, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.14817103743553162, |
|
"rewards/margins": 0.47146469354629517, |
|
"rewards/rejected": -0.32329362630844116, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 8.962025316455696, |
|
"grad_norm": 1279998.0524960216, |
|
"learning_rate": 2.782826700094014e-07, |
|
"logits/chosen": -1.9250777959823608, |
|
"logits/rejected": -1.7448539733886719, |
|
"logps/chosen": -92.84037780761719, |
|
"logps/rejected": -539.1063232421875, |
|
"loss": 25664.2531, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1440330594778061, |
|
"rewards/margins": 0.45180240273475647, |
|
"rewards/rejected": -0.3077693581581116, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 9.012658227848101, |
|
"grad_norm": 1042157.0097295721, |
|
"learning_rate": 2.767157630836728e-07, |
|
"logits/chosen": -2.344456911087036, |
|
"logits/rejected": -2.174999713897705, |
|
"logps/chosen": -74.14456939697266, |
|
"logps/rejected": -549.884033203125, |
|
"loss": 22791.725, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.16015887260437012, |
|
"rewards/margins": 0.47513628005981445, |
|
"rewards/rejected": -0.31497737765312195, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 9.063291139240507, |
|
"grad_norm": 1604328.8989550385, |
|
"learning_rate": 2.751488561579442e-07, |
|
"logits/chosen": -0.4028230607509613, |
|
"logits/rejected": -0.017443586140871048, |
|
"logps/chosen": -78.17924499511719, |
|
"logps/rejected": -555.5220947265625, |
|
"loss": 21934.7781, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.161887988448143, |
|
"rewards/margins": 0.47605371475219727, |
|
"rewards/rejected": -0.3141656517982483, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 9.113924050632912, |
|
"grad_norm": 930218.7877013405, |
|
"learning_rate": 2.7358194923221564e-07, |
|
"logits/chosen": -0.10258030891418457, |
|
"logits/rejected": -0.2491408884525299, |
|
"logps/chosen": -67.35882568359375, |
|
"logps/rejected": -562.8963623046875, |
|
"loss": 20609.7047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1559842973947525, |
|
"rewards/margins": 0.4920543134212494, |
|
"rewards/rejected": -0.33607012033462524, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 9.164556962025316, |
|
"grad_norm": 1965412.9139898522, |
|
"learning_rate": 2.72015042306487e-07, |
|
"logits/chosen": 0.5992544889450073, |
|
"logits/rejected": 0.6971222162246704, |
|
"logps/chosen": -68.12413024902344, |
|
"logps/rejected": -546.7501220703125, |
|
"loss": 21574.0656, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.16274484992027283, |
|
"rewards/margins": 0.475511372089386, |
|
"rewards/rejected": -0.31276652216911316, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 9.215189873417721, |
|
"grad_norm": 1012215.1362345209, |
|
"learning_rate": 2.704481353807584e-07, |
|
"logits/chosen": -0.252922922372818, |
|
"logits/rejected": 0.7370151281356812, |
|
"logps/chosen": -68.61247253417969, |
|
"logps/rejected": -545.773193359375, |
|
"loss": 21584.0, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.15732263028621674, |
|
"rewards/margins": 0.47610074281692505, |
|
"rewards/rejected": -0.3187780976295471, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 9.265822784810126, |
|
"grad_norm": 1317328.2635211374, |
|
"learning_rate": 2.6888122845502977e-07, |
|
"logits/chosen": -0.5902656316757202, |
|
"logits/rejected": -0.200765460729599, |
|
"logps/chosen": -72.17051696777344, |
|
"logps/rejected": -560.718994140625, |
|
"loss": 20662.6562, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.16373535990715027, |
|
"rewards/margins": 0.49004659056663513, |
|
"rewards/rejected": -0.32631123065948486, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 9.316455696202532, |
|
"grad_norm": 1202220.669797323, |
|
"learning_rate": 2.673143215293012e-07, |
|
"logits/chosen": -0.9152681231498718, |
|
"logits/rejected": -0.46515974402427673, |
|
"logps/chosen": -71.53898620605469, |
|
"logps/rejected": -545.0053100585938, |
|
"loss": 22147.6375, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1599002182483673, |
|
"rewards/margins": 0.47435054183006287, |
|
"rewards/rejected": -0.31445032358169556, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 9.367088607594937, |
|
"grad_norm": 858793.4443150639, |
|
"learning_rate": 2.6574741460357254e-07, |
|
"logits/chosen": 0.8187123537063599, |
|
"logits/rejected": 0.9660876393318176, |
|
"logps/chosen": -68.53959655761719, |
|
"logps/rejected": -533.693603515625, |
|
"loss": 22383.2656, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.15871909260749817, |
|
"rewards/margins": 0.46780315041542053, |
|
"rewards/rejected": -0.30908405780792236, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 9.417721518987342, |
|
"grad_norm": 753710.4553891663, |
|
"learning_rate": 2.6418050767784395e-07, |
|
"logits/chosen": 0.07855646312236786, |
|
"logits/rejected": -0.0003270745219197124, |
|
"logps/chosen": -71.92098236083984, |
|
"logps/rejected": -532.4739990234375, |
|
"loss": 22731.7687, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.16191932559013367, |
|
"rewards/margins": 0.4635027348995209, |
|
"rewards/rejected": -0.3015834391117096, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 9.468354430379748, |
|
"grad_norm": 1208088.8106737435, |
|
"learning_rate": 2.626136007521153e-07, |
|
"logits/chosen": -0.23646318912506104, |
|
"logits/rejected": 0.0054475306533277035, |
|
"logps/chosen": -66.38209533691406, |
|
"logps/rejected": -541.2474365234375, |
|
"loss": 22257.4375, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.15835285186767578, |
|
"rewards/margins": 0.47471290826797485, |
|
"rewards/rejected": -0.3163600265979767, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 9.518987341772151, |
|
"grad_norm": 1301078.6439378709, |
|
"learning_rate": 2.610466938263867e-07, |
|
"logits/chosen": -1.2212382555007935, |
|
"logits/rejected": -1.2270792722702026, |
|
"logps/chosen": -69.9106674194336, |
|
"logps/rejected": -537.7271728515625, |
|
"loss": 22528.825, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.15471485257148743, |
|
"rewards/margins": 0.46409493684768677, |
|
"rewards/rejected": -0.30938002467155457, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 9.569620253164556, |
|
"grad_norm": 1146807.5987679055, |
|
"learning_rate": 2.594797869006581e-07, |
|
"logits/chosen": -1.618896484375, |
|
"logits/rejected": -1.3599251508712769, |
|
"logps/chosen": -77.14048767089844, |
|
"logps/rejected": -519.0086059570312, |
|
"loss": 20937.9, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1560250073671341, |
|
"rewards/margins": 0.44421762228012085, |
|
"rewards/rejected": -0.28819265961647034, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 9.620253164556962, |
|
"grad_norm": 1143412.3516794874, |
|
"learning_rate": 2.579128799749295e-07, |
|
"logits/chosen": -0.6647695302963257, |
|
"logits/rejected": -0.6680254936218262, |
|
"logps/chosen": -85.31086730957031, |
|
"logps/rejected": -573.4449462890625, |
|
"loss": 21446.8719, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16069479286670685, |
|
"rewards/margins": 0.486908495426178, |
|
"rewards/rejected": -0.32621368765830994, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 9.670886075949367, |
|
"grad_norm": 874554.4726819041, |
|
"learning_rate": 2.5634597304920085e-07, |
|
"logits/chosen": -2.4332644939422607, |
|
"logits/rejected": -2.143573522567749, |
|
"logps/chosen": -73.66841125488281, |
|
"logps/rejected": -567.8841552734375, |
|
"loss": 21540.7203, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1681254804134369, |
|
"rewards/margins": 0.49868589639663696, |
|
"rewards/rejected": -0.3305602967739105, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 9.721518987341772, |
|
"grad_norm": 1796698.8005837006, |
|
"learning_rate": 2.5477906612347227e-07, |
|
"logits/chosen": 1.2071720361709595, |
|
"logits/rejected": 1.811336874961853, |
|
"logps/chosen": -68.67604064941406, |
|
"logps/rejected": -531.2750244140625, |
|
"loss": 22819.1078, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1492142677307129, |
|
"rewards/margins": 0.4690275192260742, |
|
"rewards/rejected": -0.31981322169303894, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 9.772151898734178, |
|
"grad_norm": 1652289.4059097564, |
|
"learning_rate": 2.532121591977436e-07, |
|
"logits/chosen": -0.47033196687698364, |
|
"logits/rejected": -0.13743743300437927, |
|
"logps/chosen": -58.46977996826172, |
|
"logps/rejected": -548.3218383789062, |
|
"loss": 22147.9906, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16477254033088684, |
|
"rewards/margins": 0.4882374703884125, |
|
"rewards/rejected": -0.32346493005752563, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 9.822784810126583, |
|
"grad_norm": 1031570.3956932048, |
|
"learning_rate": 2.5164525227201504e-07, |
|
"logits/chosen": -1.3281480073928833, |
|
"logits/rejected": -0.6028780937194824, |
|
"logps/chosen": -71.20520782470703, |
|
"logps/rejected": -560.7177124023438, |
|
"loss": 21547.1453, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16829116642475128, |
|
"rewards/margins": 0.4920671880245209, |
|
"rewards/rejected": -0.3237760066986084, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 9.873417721518987, |
|
"grad_norm": 997159.4818661372, |
|
"learning_rate": 2.500783453462864e-07, |
|
"logits/chosen": 0.0865519791841507, |
|
"logits/rejected": 1.0491398572921753, |
|
"logps/chosen": -66.77009582519531, |
|
"logps/rejected": -538.1752319335938, |
|
"loss": 21311.2047, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.15671603381633759, |
|
"rewards/margins": 0.4763658046722412, |
|
"rewards/rejected": -0.3196497857570648, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 9.924050632911392, |
|
"grad_norm": 2765789.1484618983, |
|
"learning_rate": 2.485114384205578e-07, |
|
"logits/chosen": 0.05377687141299248, |
|
"logits/rejected": 0.6552912592887878, |
|
"logps/chosen": -67.99398803710938, |
|
"logps/rejected": -554.9031982421875, |
|
"loss": 20360.5656, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.16012230515480042, |
|
"rewards/margins": 0.48966652154922485, |
|
"rewards/rejected": -0.3295442461967468, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 9.974683544303797, |
|
"grad_norm": 778456.3899893347, |
|
"learning_rate": 2.4694453149482917e-07, |
|
"logits/chosen": -1.8621749877929688, |
|
"logits/rejected": -0.9629243612289429, |
|
"logps/chosen": -76.34040832519531, |
|
"logps/rejected": -570.4073486328125, |
|
"loss": 20853.2188, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.16522939503192902, |
|
"rewards/margins": 0.4955335259437561, |
|
"rewards/rejected": -0.3303041160106659, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 10.025316455696203, |
|
"grad_norm": 1813632.2248899846, |
|
"learning_rate": 2.453776245691006e-07, |
|
"logits/chosen": -1.164282202720642, |
|
"logits/rejected": -1.4965863227844238, |
|
"logps/chosen": -64.31637573242188, |
|
"logps/rejected": -555.818115234375, |
|
"loss": 20145.1469, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.17363281548023224, |
|
"rewards/margins": 0.4858935475349426, |
|
"rewards/rejected": -0.3122607469558716, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 10.075949367088608, |
|
"grad_norm": 1332924.3073966086, |
|
"learning_rate": 2.4381071764337194e-07, |
|
"logits/chosen": -0.629298746585846, |
|
"logits/rejected": -0.301331102848053, |
|
"logps/chosen": -63.670082092285156, |
|
"logps/rejected": -531.6769409179688, |
|
"loss": 19644.3969, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.17206737399101257, |
|
"rewards/margins": 0.46868830919265747, |
|
"rewards/rejected": -0.29662084579467773, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 10.126582278481013, |
|
"grad_norm": 1851357.970280298, |
|
"learning_rate": 2.4224381071764335e-07, |
|
"logits/chosen": -0.845658004283905, |
|
"logits/rejected": -0.27886706590652466, |
|
"logps/chosen": -64.26731872558594, |
|
"logps/rejected": -551.6077270507812, |
|
"loss": 19949.7859, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16876797378063202, |
|
"rewards/margins": 0.4869278073310852, |
|
"rewards/rejected": -0.318159818649292, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.177215189873417, |
|
"grad_norm": 711674.3296077562, |
|
"learning_rate": 2.4067690379191476e-07, |
|
"logits/chosen": -0.5739536285400391, |
|
"logits/rejected": -0.18802312016487122, |
|
"logps/chosen": -68.64383697509766, |
|
"logps/rejected": -557.96484375, |
|
"loss": 18812.7141, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16457512974739075, |
|
"rewards/margins": 0.48935467004776, |
|
"rewards/rejected": -0.3247795104980469, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 10.227848101265822, |
|
"grad_norm": 1174456.0466990366, |
|
"learning_rate": 2.391099968661861e-07, |
|
"logits/chosen": -1.7381559610366821, |
|
"logits/rejected": -0.10386524349451065, |
|
"logps/chosen": -62.054725646972656, |
|
"logps/rejected": -570.2944946289062, |
|
"loss": 19933.8734, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.17416052520275116, |
|
"rewards/margins": 0.515259861946106, |
|
"rewards/rejected": -0.3410993218421936, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 10.278481012658228, |
|
"grad_norm": 1096601.381122318, |
|
"learning_rate": 2.375430899404575e-07, |
|
"logits/chosen": -0.8541361093521118, |
|
"logits/rejected": -0.3781866133213043, |
|
"logps/chosen": -56.479164123535156, |
|
"logps/rejected": -554.1986694335938, |
|
"loss": 19863.0656, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.17042097449302673, |
|
"rewards/margins": 0.4970123767852783, |
|
"rewards/rejected": -0.3265914022922516, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 10.329113924050633, |
|
"grad_norm": 840837.174069809, |
|
"learning_rate": 2.3597618301472892e-07, |
|
"logits/chosen": -1.694748878479004, |
|
"logits/rejected": -1.400233268737793, |
|
"logps/chosen": -75.36106872558594, |
|
"logps/rejected": -590.9649047851562, |
|
"loss": 19475.8531, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1772707998752594, |
|
"rewards/margins": 0.5156514644622803, |
|
"rewards/rejected": -0.33838069438934326, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 10.379746835443038, |
|
"grad_norm": 803470.3932805886, |
|
"learning_rate": 2.344092760890003e-07, |
|
"logits/chosen": -1.5708272457122803, |
|
"logits/rejected": -1.7595367431640625, |
|
"logps/chosen": -67.97745513916016, |
|
"logps/rejected": -574.4043579101562, |
|
"loss": 20348.9719, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1695125252008438, |
|
"rewards/margins": 0.5006899237632751, |
|
"rewards/rejected": -0.33117741346359253, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 10.430379746835444, |
|
"grad_norm": 1004663.9694340345, |
|
"learning_rate": 2.328423691632717e-07, |
|
"logits/chosen": -1.962457299232483, |
|
"logits/rejected": -1.3877923488616943, |
|
"logps/chosen": -68.37916564941406, |
|
"logps/rejected": -552.3621826171875, |
|
"loss": 19908.175, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.17361022531986237, |
|
"rewards/margins": 0.4852239489555359, |
|
"rewards/rejected": -0.31161370873451233, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 10.481012658227849, |
|
"grad_norm": 907359.5128728322, |
|
"learning_rate": 2.3127546223754308e-07, |
|
"logits/chosen": -0.9135034680366516, |
|
"logits/rejected": -0.6688288450241089, |
|
"logps/chosen": -66.34752655029297, |
|
"logps/rejected": -564.5549926757812, |
|
"loss": 19321.1625, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.17101381719112396, |
|
"rewards/margins": 0.49828824400901794, |
|
"rewards/rejected": -0.3272744417190552, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 10.531645569620252, |
|
"grad_norm": 846691.3768602766, |
|
"learning_rate": 2.2970855531181446e-07, |
|
"logits/chosen": -0.16942422091960907, |
|
"logits/rejected": 0.07732643932104111, |
|
"logps/chosen": -70.53272247314453, |
|
"logps/rejected": -561.4410400390625, |
|
"loss": 20015.3156, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.16945675015449524, |
|
"rewards/margins": 0.4873596131801605, |
|
"rewards/rejected": -0.31790289282798767, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 10.582278481012658, |
|
"grad_norm": 1071818.197184184, |
|
"learning_rate": 2.2814164838608585e-07, |
|
"logits/chosen": -3.7376797199249268, |
|
"logits/rejected": -3.6469883918762207, |
|
"logps/chosen": -74.2595443725586, |
|
"logps/rejected": -581.4434814453125, |
|
"loss": 19872.15, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.17747794091701508, |
|
"rewards/margins": 0.5011757612228394, |
|
"rewards/rejected": -0.3236978054046631, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 10.632911392405063, |
|
"grad_norm": 1004256.8801454039, |
|
"learning_rate": 2.2657474146035723e-07, |
|
"logits/chosen": -3.6712310314178467, |
|
"logits/rejected": -2.93229603767395, |
|
"logps/chosen": -71.2375259399414, |
|
"logps/rejected": -567.6956787109375, |
|
"loss": 19287.7531, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.17674970626831055, |
|
"rewards/margins": 0.4991089403629303, |
|
"rewards/rejected": -0.322359174489975, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 10.683544303797468, |
|
"grad_norm": 1106280.1198113484, |
|
"learning_rate": 2.2500783453462862e-07, |
|
"logits/chosen": -0.8213077783584595, |
|
"logits/rejected": 0.14719510078430176, |
|
"logps/chosen": -63.996498107910156, |
|
"logps/rejected": -572.0599975585938, |
|
"loss": 19310.0187, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.17046719789505005, |
|
"rewards/margins": 0.5157285928726196, |
|
"rewards/rejected": -0.34526145458221436, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 10.734177215189874, |
|
"grad_norm": 1464811.168383667, |
|
"learning_rate": 2.234409276089e-07, |
|
"logits/chosen": -0.11996922641992569, |
|
"logits/rejected": 0.22597141563892365, |
|
"logps/chosen": -76.11662292480469, |
|
"logps/rejected": -563.3325805664062, |
|
"loss": 19843.4688, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.16797539591789246, |
|
"rewards/margins": 0.4910767078399658, |
|
"rewards/rejected": -0.32310131192207336, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 10.784810126582279, |
|
"grad_norm": 1138818.9289973595, |
|
"learning_rate": 2.218740206831714e-07, |
|
"logits/chosen": -1.5511647462844849, |
|
"logits/rejected": -0.5638203620910645, |
|
"logps/chosen": -54.634178161621094, |
|
"logps/rejected": -540.9115600585938, |
|
"loss": 19217.1797, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16898050904273987, |
|
"rewards/margins": 0.4923567771911621, |
|
"rewards/rejected": -0.32337623834609985, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 10.835443037974684, |
|
"grad_norm": 927204.123761474, |
|
"learning_rate": 2.203071137574428e-07, |
|
"logits/chosen": -0.11952924728393555, |
|
"logits/rejected": 0.11829443275928497, |
|
"logps/chosen": -68.7413101196289, |
|
"logps/rejected": -549.9212036132812, |
|
"loss": 19664.7969, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16766998171806335, |
|
"rewards/margins": 0.4846928119659424, |
|
"rewards/rejected": -0.31702274084091187, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 10.886075949367088, |
|
"grad_norm": 987326.8653252205, |
|
"learning_rate": 2.187402068317142e-07, |
|
"logits/chosen": -0.7956012487411499, |
|
"logits/rejected": -0.13277845084667206, |
|
"logps/chosen": -66.15580749511719, |
|
"logps/rejected": -539.9743041992188, |
|
"loss": 19319.2359, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.16284069418907166, |
|
"rewards/margins": 0.47170519828796387, |
|
"rewards/rejected": -0.3088645040988922, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 10.936708860759493, |
|
"grad_norm": 1490559.10150877, |
|
"learning_rate": 2.1717329990598557e-07, |
|
"logits/chosen": 0.23329691588878632, |
|
"logits/rejected": 0.3798617720603943, |
|
"logps/chosen": -65.20997619628906, |
|
"logps/rejected": -566.1799926757812, |
|
"loss": 18358.2687, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.15990014374256134, |
|
"rewards/margins": 0.502142608165741, |
|
"rewards/rejected": -0.34224241971969604, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 10.987341772151899, |
|
"grad_norm": 910221.5561234908, |
|
"learning_rate": 2.1560639298025696e-07, |
|
"logits/chosen": -1.0350775718688965, |
|
"logits/rejected": -0.4896017909049988, |
|
"logps/chosen": -80.80205535888672, |
|
"logps/rejected": -605.629150390625, |
|
"loss": 19122.5234, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.17700453102588654, |
|
"rewards/margins": 0.5280236601829529, |
|
"rewards/rejected": -0.35101914405822754, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 11.037974683544304, |
|
"grad_norm": 852169.287356795, |
|
"learning_rate": 2.1403948605452835e-07, |
|
"logits/chosen": -1.0383515357971191, |
|
"logits/rejected": 0.3044077157974243, |
|
"logps/chosen": -60.7518196105957, |
|
"logps/rejected": -550.4581909179688, |
|
"loss": 18261.975, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.16871869564056396, |
|
"rewards/margins": 0.49391689896583557, |
|
"rewards/rejected": -0.32519814372062683, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 11.08860759493671, |
|
"grad_norm": 850664.061578799, |
|
"learning_rate": 2.1247257912879973e-07, |
|
"logits/chosen": -0.5247487425804138, |
|
"logits/rejected": -0.718704342842102, |
|
"logps/chosen": -48.23347473144531, |
|
"logps/rejected": -571.79296875, |
|
"loss": 17780.6719, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.17942146956920624, |
|
"rewards/margins": 0.5196394920349121, |
|
"rewards/rejected": -0.34021803736686707, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 11.139240506329115, |
|
"grad_norm": 795813.8223153341, |
|
"learning_rate": 2.1090567220307112e-07, |
|
"logits/chosen": 0.2913626730442047, |
|
"logits/rejected": 0.3964959681034088, |
|
"logps/chosen": -57.057777404785156, |
|
"logps/rejected": -553.8439331054688, |
|
"loss": 19198.0062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1739949882030487, |
|
"rewards/margins": 0.49791765213012695, |
|
"rewards/rejected": -0.32392266392707825, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 11.189873417721518, |
|
"grad_norm": 1113023.3688515616, |
|
"learning_rate": 2.093387652773425e-07, |
|
"logits/chosen": 1.5053379535675049, |
|
"logits/rejected": 2.2073726654052734, |
|
"logps/chosen": -52.245140075683594, |
|
"logps/rejected": -549.0379028320312, |
|
"loss": 18112.9031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1701221615076065, |
|
"rewards/margins": 0.49869513511657715, |
|
"rewards/rejected": -0.32857298851013184, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 11.240506329113924, |
|
"grad_norm": 1112437.2131689412, |
|
"learning_rate": 2.077718583516139e-07, |
|
"logits/chosen": -0.7113906741142273, |
|
"logits/rejected": -0.593052089214325, |
|
"logps/chosen": -56.02216720581055, |
|
"logps/rejected": -588.62255859375, |
|
"loss": 18765.7359, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.18194417655467987, |
|
"rewards/margins": 0.529647707939148, |
|
"rewards/rejected": -0.3477035462856293, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 11.291139240506329, |
|
"grad_norm": 735799.2580717172, |
|
"learning_rate": 2.0620495142588527e-07, |
|
"logits/chosen": -0.9520748257637024, |
|
"logits/rejected": -0.6387659907341003, |
|
"logps/chosen": -58.523109436035156, |
|
"logps/rejected": -582.5303344726562, |
|
"loss": 17604.2656, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.17585232853889465, |
|
"rewards/margins": 0.522950291633606, |
|
"rewards/rejected": -0.3470980226993561, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 11.341772151898734, |
|
"grad_norm": 716407.5247360148, |
|
"learning_rate": 2.0463804450015669e-07, |
|
"logits/chosen": 1.4925919771194458, |
|
"logits/rejected": 1.6499805450439453, |
|
"logps/chosen": -63.138038635253906, |
|
"logps/rejected": -546.4395751953125, |
|
"loss": 18588.6406, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1618063747882843, |
|
"rewards/margins": 0.48370370268821716, |
|
"rewards/rejected": -0.3218972980976105, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 11.39240506329114, |
|
"grad_norm": 598500.3265676593, |
|
"learning_rate": 2.0307113757442807e-07, |
|
"logits/chosen": 0.6475615501403809, |
|
"logits/rejected": 1.338098406791687, |
|
"logps/chosen": -58.75787353515625, |
|
"logps/rejected": -563.3907470703125, |
|
"loss": 18119.6031, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.17143133282661438, |
|
"rewards/margins": 0.5086871981620789, |
|
"rewards/rejected": -0.3372558653354645, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 11.443037974683545, |
|
"grad_norm": 1221314.1531539639, |
|
"learning_rate": 2.0150423064869946e-07, |
|
"logits/chosen": -0.327157199382782, |
|
"logits/rejected": 0.03896377235651016, |
|
"logps/chosen": -58.68574905395508, |
|
"logps/rejected": -558.2637329101562, |
|
"loss": 17534.2281, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.17224976420402527, |
|
"rewards/margins": 0.49942049384117126, |
|
"rewards/rejected": -0.327170729637146, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 11.49367088607595, |
|
"grad_norm": 456316.6263000263, |
|
"learning_rate": 1.9993732372297084e-07, |
|
"logits/chosen": -0.07340321689844131, |
|
"logits/rejected": 0.9581168293952942, |
|
"logps/chosen": -56.39067459106445, |
|
"logps/rejected": -567.6375732421875, |
|
"loss": 17502.8781, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1778116524219513, |
|
"rewards/margins": 0.5131680965423584, |
|
"rewards/rejected": -0.3353564143180847, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 11.544303797468354, |
|
"grad_norm": 711686.0768962563, |
|
"learning_rate": 1.9837041679724223e-07, |
|
"logits/chosen": -0.8106869459152222, |
|
"logits/rejected": -0.6330159902572632, |
|
"logps/chosen": -61.687591552734375, |
|
"logps/rejected": -573.0241088867188, |
|
"loss": 17796.2391, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.18241460621356964, |
|
"rewards/margins": 0.5145494937896729, |
|
"rewards/rejected": -0.3321349024772644, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 11.594936708860759, |
|
"grad_norm": 1355769.5974116765, |
|
"learning_rate": 1.9680350987151361e-07, |
|
"logits/chosen": 2.7271580696105957, |
|
"logits/rejected": 3.408385753631592, |
|
"logps/chosen": -53.9175910949707, |
|
"logps/rejected": -532.6714477539062, |
|
"loss": 18442.0969, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.16783255338668823, |
|
"rewards/margins": 0.4785786271095276, |
|
"rewards/rejected": -0.31074607372283936, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 11.645569620253164, |
|
"grad_norm": 1885360.6056858273, |
|
"learning_rate": 1.95236602945785e-07, |
|
"logits/chosen": -0.4679819941520691, |
|
"logits/rejected": 0.16113388538360596, |
|
"logps/chosen": -63.9486198425293, |
|
"logps/rejected": -550.3961181640625, |
|
"loss": 17411.3969, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.17148110270500183, |
|
"rewards/margins": 0.4901048243045807, |
|
"rewards/rejected": -0.3186236619949341, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 11.69620253164557, |
|
"grad_norm": 758901.4037823884, |
|
"learning_rate": 1.9366969602005639e-07, |
|
"logits/chosen": 0.85181725025177, |
|
"logits/rejected": 1.3077051639556885, |
|
"logps/chosen": -73.22114562988281, |
|
"logps/rejected": -575.5013427734375, |
|
"loss": 17968.0844, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1745305359363556, |
|
"rewards/margins": 0.5058612823486328, |
|
"rewards/rejected": -0.33133071660995483, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 11.746835443037975, |
|
"grad_norm": 520118.42882549425, |
|
"learning_rate": 1.9210278909432777e-07, |
|
"logits/chosen": -0.6327224969863892, |
|
"logits/rejected": 0.7259325385093689, |
|
"logps/chosen": -60.48676681518555, |
|
"logps/rejected": -574.37939453125, |
|
"loss": 18215.2938, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18099671602249146, |
|
"rewards/margins": 0.5182011127471924, |
|
"rewards/rejected": -0.33720433712005615, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 11.79746835443038, |
|
"grad_norm": 743117.6330674689, |
|
"learning_rate": 1.9053588216859918e-07, |
|
"logits/chosen": 1.2280547618865967, |
|
"logits/rejected": 1.3038314580917358, |
|
"logps/chosen": -59.2470817565918, |
|
"logps/rejected": -559.13916015625, |
|
"loss": 17567.2906, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.17356745898723602, |
|
"rewards/margins": 0.49933862686157227, |
|
"rewards/rejected": -0.32577118277549744, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 11.848101265822784, |
|
"grad_norm": 730673.5249396141, |
|
"learning_rate": 1.8896897524287057e-07, |
|
"logits/chosen": 1.2314859628677368, |
|
"logits/rejected": 1.3703396320343018, |
|
"logps/chosen": -58.14827346801758, |
|
"logps/rejected": -552.53759765625, |
|
"loss": 17758.8719, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1748059093952179, |
|
"rewards/margins": 0.4981175363063812, |
|
"rewards/rejected": -0.3233116567134857, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 11.89873417721519, |
|
"grad_norm": 597117.4885736415, |
|
"learning_rate": 1.8740206831714195e-07, |
|
"logits/chosen": -0.7092142105102539, |
|
"logits/rejected": -0.0756240040063858, |
|
"logps/chosen": -62.97068405151367, |
|
"logps/rejected": -567.6489868164062, |
|
"loss": 18044.8, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.17830543220043182, |
|
"rewards/margins": 0.5064790844917297, |
|
"rewards/rejected": -0.3281736969947815, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 11.949367088607595, |
|
"grad_norm": 687586.0618323467, |
|
"learning_rate": 1.8583516139141334e-07, |
|
"logits/chosen": -1.2183369398117065, |
|
"logits/rejected": -1.056317925453186, |
|
"logps/chosen": -65.71519470214844, |
|
"logps/rejected": -578.7620239257812, |
|
"loss": 18082.8625, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.18341727554798126, |
|
"rewards/margins": 0.5148480534553528, |
|
"rewards/rejected": -0.33143073320388794, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 748926.1941504646, |
|
"learning_rate": 1.8426825446568473e-07, |
|
"logits/chosen": -0.35043638944625854, |
|
"logits/rejected": -1.1868419647216797, |
|
"logps/chosen": -59.269996643066406, |
|
"logps/rejected": -581.2828369140625, |
|
"loss": 17352.5563, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16442957520484924, |
|
"rewards/margins": 0.5158518552780151, |
|
"rewards/rejected": -0.3514222800731659, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 12.050632911392405, |
|
"grad_norm": 924736.9233899026, |
|
"learning_rate": 1.827013475399561e-07, |
|
"logits/chosen": 0.09384210407733917, |
|
"logits/rejected": 0.38976824283599854, |
|
"logps/chosen": -60.1981315612793, |
|
"logps/rejected": -569.2012329101562, |
|
"loss": 16551.6906, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1783694624900818, |
|
"rewards/margins": 0.5092591047286987, |
|
"rewards/rejected": -0.33088964223861694, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 12.10126582278481, |
|
"grad_norm": 453683.3343967912, |
|
"learning_rate": 1.811344406142275e-07, |
|
"logits/chosen": -0.1967567503452301, |
|
"logits/rejected": 0.26000285148620605, |
|
"logps/chosen": -51.80207443237305, |
|
"logps/rejected": -586.1417846679688, |
|
"loss": 16650.6516, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19160635769367218, |
|
"rewards/margins": 0.5359978079795837, |
|
"rewards/rejected": -0.34439152479171753, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 12.151898734177216, |
|
"grad_norm": 760637.6347084254, |
|
"learning_rate": 1.7956753368849888e-07, |
|
"logits/chosen": -2.4950621128082275, |
|
"logits/rejected": -1.7182337045669556, |
|
"logps/chosen": -54.441162109375, |
|
"logps/rejected": -569.5804443359375, |
|
"loss": 16525.3187, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1771778166294098, |
|
"rewards/margins": 0.5123227834701538, |
|
"rewards/rejected": -0.335144966840744, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 12.20253164556962, |
|
"grad_norm": 760695.8247001156, |
|
"learning_rate": 1.7800062676277027e-07, |
|
"logits/chosen": 2.4408202171325684, |
|
"logits/rejected": 1.941209077835083, |
|
"logps/chosen": -50.47087097167969, |
|
"logps/rejected": -550.1649169921875, |
|
"loss": 16281.4594, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1683485209941864, |
|
"rewards/margins": 0.5019410848617554, |
|
"rewards/rejected": -0.33359256386756897, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 12.253164556962025, |
|
"grad_norm": 501646.8806860111, |
|
"learning_rate": 1.7643371983704165e-07, |
|
"logits/chosen": -1.7683095932006836, |
|
"logits/rejected": -1.838817834854126, |
|
"logps/chosen": -53.41362762451172, |
|
"logps/rejected": -574.3419799804688, |
|
"loss": 16772.675, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18047122657299042, |
|
"rewards/margins": 0.5231555700302124, |
|
"rewards/rejected": -0.342684268951416, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 12.30379746835443, |
|
"grad_norm": 705638.6344046313, |
|
"learning_rate": 1.7486681291131307e-07, |
|
"logits/chosen": 0.6870694756507874, |
|
"logits/rejected": 0.9879606366157532, |
|
"logps/chosen": -60.645713806152344, |
|
"logps/rejected": -565.5677490234375, |
|
"loss": 16990.1125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.180276021361351, |
|
"rewards/margins": 0.5076194405555725, |
|
"rewards/rejected": -0.3273434340953827, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 12.354430379746836, |
|
"grad_norm": 583239.6869039454, |
|
"learning_rate": 1.7329990598558445e-07, |
|
"logits/chosen": -0.015002572908997536, |
|
"logits/rejected": 0.6669713258743286, |
|
"logps/chosen": -59.69384765625, |
|
"logps/rejected": -595.3045654296875, |
|
"loss": 16570.7625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19047938287258148, |
|
"rewards/margins": 0.5352143049240112, |
|
"rewards/rejected": -0.34473496675491333, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 12.405063291139241, |
|
"grad_norm": 717458.0522613698, |
|
"learning_rate": 1.7173299905985584e-07, |
|
"logits/chosen": -1.5561044216156006, |
|
"logits/rejected": -1.511528730392456, |
|
"logps/chosen": -48.24024200439453, |
|
"logps/rejected": -585.71484375, |
|
"loss": 16296.25, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18336063623428345, |
|
"rewards/margins": 0.5371404886245728, |
|
"rewards/rejected": -0.3537798523902893, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 12.455696202531646, |
|
"grad_norm": 1561201.446100151, |
|
"learning_rate": 1.7016609213412722e-07, |
|
"logits/chosen": -0.5445646047592163, |
|
"logits/rejected": 0.5015290379524231, |
|
"logps/chosen": -57.12273025512695, |
|
"logps/rejected": -596.54248046875, |
|
"loss": 17012.2562, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1852089911699295, |
|
"rewards/margins": 0.5424550771713257, |
|
"rewards/rejected": -0.35724616050720215, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 12.50632911392405, |
|
"grad_norm": 576931.8180998629, |
|
"learning_rate": 1.685991852083986e-07, |
|
"logits/chosen": 0.7103387713432312, |
|
"logits/rejected": 0.5729061365127563, |
|
"logps/chosen": -45.429290771484375, |
|
"logps/rejected": -540.9015502929688, |
|
"loss": 17545.0859, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.17228493094444275, |
|
"rewards/margins": 0.49700021743774414, |
|
"rewards/rejected": -0.3247153162956238, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 12.556962025316455, |
|
"grad_norm": 790199.4841189157, |
|
"learning_rate": 1.6703227828267e-07, |
|
"logits/chosen": 0.757542610168457, |
|
"logits/rejected": 1.3497235774993896, |
|
"logps/chosen": -60.74102020263672, |
|
"logps/rejected": -570.23583984375, |
|
"loss": 17645.0094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.17620857059955597, |
|
"rewards/margins": 0.5084448456764221, |
|
"rewards/rejected": -0.33223623037338257, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 12.60759493670886, |
|
"grad_norm": 1168730.408088866, |
|
"learning_rate": 1.6546537135694138e-07, |
|
"logits/chosen": 1.1095263957977295, |
|
"logits/rejected": 1.6450704336166382, |
|
"logps/chosen": -55.1762580871582, |
|
"logps/rejected": -562.0362548828125, |
|
"loss": 17481.3469, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1728857308626175, |
|
"rewards/margins": 0.5043104887008667, |
|
"rewards/rejected": -0.3314247727394104, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 12.658227848101266, |
|
"grad_norm": 492108.78941813926, |
|
"learning_rate": 1.6389846443121277e-07, |
|
"logits/chosen": 0.4340684413909912, |
|
"logits/rejected": 0.34048348665237427, |
|
"logps/chosen": -56.212928771972656, |
|
"logps/rejected": -578.192138671875, |
|
"loss": 16462.5594, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.17624449729919434, |
|
"rewards/margins": 0.5216260552406311, |
|
"rewards/rejected": -0.3453815281391144, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 12.708860759493671, |
|
"grad_norm": 513189.7522025148, |
|
"learning_rate": 1.6233155750548415e-07, |
|
"logits/chosen": -0.21513333916664124, |
|
"logits/rejected": -0.05444493144750595, |
|
"logps/chosen": -60.96831512451172, |
|
"logps/rejected": -583.4918823242188, |
|
"loss": 16903.7125, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1871432662010193, |
|
"rewards/margins": 0.5204809904098511, |
|
"rewards/rejected": -0.3333377242088318, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 12.759493670886076, |
|
"grad_norm": 527855.7040773877, |
|
"learning_rate": 1.6076465057975556e-07, |
|
"logits/chosen": -1.166076421737671, |
|
"logits/rejected": -0.5938941240310669, |
|
"logps/chosen": -66.41789245605469, |
|
"logps/rejected": -565.521728515625, |
|
"loss": 16873.3, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.18609380722045898, |
|
"rewards/margins": 0.5067971348762512, |
|
"rewards/rejected": -0.32070332765579224, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 12.810126582278482, |
|
"grad_norm": 454333.8693268159, |
|
"learning_rate": 1.5919774365402695e-07, |
|
"logits/chosen": -3.2188408374786377, |
|
"logits/rejected": -2.827929735183716, |
|
"logps/chosen": -64.64167785644531, |
|
"logps/rejected": -578.556396484375, |
|
"loss": 17413.3594, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1842392235994339, |
|
"rewards/margins": 0.5160521268844604, |
|
"rewards/rejected": -0.33181288838386536, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 12.860759493670885, |
|
"grad_norm": 613283.375359761, |
|
"learning_rate": 1.5763083672829833e-07, |
|
"logits/chosen": -2.0415351390838623, |
|
"logits/rejected": -1.1543810367584229, |
|
"logps/chosen": -56.55009841918945, |
|
"logps/rejected": -565.3232421875, |
|
"loss": 16952.7828, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.17874039709568024, |
|
"rewards/margins": 0.5064669847488403, |
|
"rewards/rejected": -0.3277265429496765, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 12.91139240506329, |
|
"grad_norm": 973991.6151861927, |
|
"learning_rate": 1.5606392980256972e-07, |
|
"logits/chosen": -1.9052120447158813, |
|
"logits/rejected": -1.2125427722930908, |
|
"logps/chosen": -56.37163162231445, |
|
"logps/rejected": -575.3190307617188, |
|
"loss": 17272.6656, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18349668383598328, |
|
"rewards/margins": 0.5194507837295532, |
|
"rewards/rejected": -0.33595409989356995, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 12.962025316455696, |
|
"grad_norm": 1049016.1677939103, |
|
"learning_rate": 1.544970228768411e-07, |
|
"logits/chosen": -0.479561984539032, |
|
"logits/rejected": -0.6837025284767151, |
|
"logps/chosen": -56.96269989013672, |
|
"logps/rejected": -579.6213989257812, |
|
"loss": 17023.0859, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1867980808019638, |
|
"rewards/margins": 0.5234028100967407, |
|
"rewards/rejected": -0.3366047739982605, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 13.012658227848101, |
|
"grad_norm": 335161.21326055715, |
|
"learning_rate": 1.529301159511125e-07, |
|
"logits/chosen": 0.09210095554590225, |
|
"logits/rejected": 0.2885093688964844, |
|
"logps/chosen": -52.608367919921875, |
|
"logps/rejected": -558.9227294921875, |
|
"loss": 15959.725, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1778368204832077, |
|
"rewards/margins": 0.5030940175056458, |
|
"rewards/rejected": -0.32525718212127686, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 13.063291139240507, |
|
"grad_norm": 771775.1017807113, |
|
"learning_rate": 1.5136320902538388e-07, |
|
"logits/chosen": -1.3265520334243774, |
|
"logits/rejected": -0.9296306371688843, |
|
"logps/chosen": -62.875038146972656, |
|
"logps/rejected": -560.3228759765625, |
|
"loss": 15567.6344, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.18662917613983154, |
|
"rewards/margins": 0.49883994460105896, |
|
"rewards/rejected": -0.31221073865890503, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 13.113924050632912, |
|
"grad_norm": 446168.3148918395, |
|
"learning_rate": 1.4979630209965526e-07, |
|
"logits/chosen": -0.11115183681249619, |
|
"logits/rejected": 0.8431870341300964, |
|
"logps/chosen": -46.82927703857422, |
|
"logps/rejected": -552.5628051757812, |
|
"loss": 16255.3438, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.17291709780693054, |
|
"rewards/margins": 0.5057471990585327, |
|
"rewards/rejected": -0.3328301012516022, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 13.164556962025316, |
|
"grad_norm": 586122.4453174556, |
|
"learning_rate": 1.4822939517392665e-07, |
|
"logits/chosen": -0.757349967956543, |
|
"logits/rejected": 0.037270687520504, |
|
"logps/chosen": -55.21142578125, |
|
"logps/rejected": -557.4276123046875, |
|
"loss": 16720.8172, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1850253939628601, |
|
"rewards/margins": 0.5111584663391113, |
|
"rewards/rejected": -0.32613304257392883, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 13.215189873417721, |
|
"grad_norm": 420628.2693101698, |
|
"learning_rate": 1.4666248824819803e-07, |
|
"logits/chosen": -0.11379202455282211, |
|
"logits/rejected": -0.11788152158260345, |
|
"logps/chosen": -49.00257110595703, |
|
"logps/rejected": -576.3326416015625, |
|
"loss": 16306.0688, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18590961396694183, |
|
"rewards/margins": 0.5257736444473267, |
|
"rewards/rejected": -0.33986401557922363, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 13.265822784810126, |
|
"grad_norm": 436219.2086299041, |
|
"learning_rate": 1.4509558132246945e-07, |
|
"logits/chosen": -0.7918820977210999, |
|
"logits/rejected": -0.14419230818748474, |
|
"logps/chosen": -56.56486892700195, |
|
"logps/rejected": -584.7669677734375, |
|
"loss": 16369.2719, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18918678164482117, |
|
"rewards/margins": 0.5305701494216919, |
|
"rewards/rejected": -0.3413834273815155, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 13.316455696202532, |
|
"grad_norm": 596793.3073449759, |
|
"learning_rate": 1.4352867439674083e-07, |
|
"logits/chosen": 1.9564087390899658, |
|
"logits/rejected": 2.246692180633545, |
|
"logps/chosen": -51.851722717285156, |
|
"logps/rejected": -548.3530883789062, |
|
"loss": 16796.1063, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.18290123343467712, |
|
"rewards/margins": 0.4980129599571228, |
|
"rewards/rejected": -0.3151116371154785, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 13.367088607594937, |
|
"grad_norm": 474733.1664905385, |
|
"learning_rate": 1.4196176747101222e-07, |
|
"logits/chosen": 0.530455470085144, |
|
"logits/rejected": 0.14751790463924408, |
|
"logps/chosen": -48.55830001831055, |
|
"logps/rejected": -558.3150024414062, |
|
"loss": 16144.2906, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.17938682436943054, |
|
"rewards/margins": 0.5066471695899963, |
|
"rewards/rejected": -0.3272603154182434, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 13.417721518987342, |
|
"grad_norm": 1649837.8712191964, |
|
"learning_rate": 1.403948605452836e-07, |
|
"logits/chosen": -0.03671743720769882, |
|
"logits/rejected": 0.7579118013381958, |
|
"logps/chosen": -42.065242767333984, |
|
"logps/rejected": -554.230224609375, |
|
"loss": 16118.8047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18058671057224274, |
|
"rewards/margins": 0.5129930377006531, |
|
"rewards/rejected": -0.3324064016342163, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 13.468354430379748, |
|
"grad_norm": 594890.10809389, |
|
"learning_rate": 1.38827953619555e-07, |
|
"logits/chosen": 0.288557231426239, |
|
"logits/rejected": 0.2958771288394928, |
|
"logps/chosen": -52.33495330810547, |
|
"logps/rejected": -561.2686157226562, |
|
"loss": 15733.7453, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1808079034090042, |
|
"rewards/margins": 0.5136345028877258, |
|
"rewards/rejected": -0.3328266143798828, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 13.518987341772151, |
|
"grad_norm": 467820.0894028926, |
|
"learning_rate": 1.3726104669382637e-07, |
|
"logits/chosen": -0.39889806509017944, |
|
"logits/rejected": 0.02098376676440239, |
|
"logps/chosen": -53.63391876220703, |
|
"logps/rejected": -556.4556884765625, |
|
"loss": 15584.0406, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.18383657932281494, |
|
"rewards/margins": 0.5030336976051331, |
|
"rewards/rejected": -0.3191971182823181, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 13.569620253164556, |
|
"grad_norm": 349641.6736805019, |
|
"learning_rate": 1.3569413976809776e-07, |
|
"logits/chosen": -1.0416258573532104, |
|
"logits/rejected": -0.687407374382019, |
|
"logps/chosen": -40.50030517578125, |
|
"logps/rejected": -560.5548706054688, |
|
"loss": 15275.5312, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18312379717826843, |
|
"rewards/margins": 0.5221952199935913, |
|
"rewards/rejected": -0.33907145261764526, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 13.620253164556962, |
|
"grad_norm": 769040.8085386351, |
|
"learning_rate": 1.3412723284236915e-07, |
|
"logits/chosen": 1.7483727931976318, |
|
"logits/rejected": 2.3238413333892822, |
|
"logps/chosen": -49.73235321044922, |
|
"logps/rejected": -559.8514404296875, |
|
"loss": 16850.175, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18260039389133453, |
|
"rewards/margins": 0.5106431245803833, |
|
"rewards/rejected": -0.3280427157878876, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 13.670886075949367, |
|
"grad_norm": 459226.17158416886, |
|
"learning_rate": 1.3256032591664053e-07, |
|
"logits/chosen": -0.2809019684791565, |
|
"logits/rejected": 0.43121522665023804, |
|
"logps/chosen": -58.69781494140625, |
|
"logps/rejected": -588.9169921875, |
|
"loss": 15404.6109, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.19193768501281738, |
|
"rewards/margins": 0.5343278646469116, |
|
"rewards/rejected": -0.34239014983177185, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 13.721518987341772, |
|
"grad_norm": 339517.3364374988, |
|
"learning_rate": 1.3099341899091192e-07, |
|
"logits/chosen": 0.3717317283153534, |
|
"logits/rejected": 0.5634896159172058, |
|
"logps/chosen": -60.52980422973633, |
|
"logps/rejected": -555.2349243164062, |
|
"loss": 15341.8219, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.17079493403434753, |
|
"rewards/margins": 0.489946186542511, |
|
"rewards/rejected": -0.31915122270584106, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 13.772151898734178, |
|
"grad_norm": 1157921.1375110236, |
|
"learning_rate": 1.2942651206518333e-07, |
|
"logits/chosen": -1.758825659751892, |
|
"logits/rejected": -1.0223956108093262, |
|
"logps/chosen": -48.61360549926758, |
|
"logps/rejected": -562.5768432617188, |
|
"loss": 16196.7625, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.180302232503891, |
|
"rewards/margins": 0.5197224020957947, |
|
"rewards/rejected": -0.3394201397895813, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 13.822784810126583, |
|
"grad_norm": 434777.104877517, |
|
"learning_rate": 1.2785960513945471e-07, |
|
"logits/chosen": -0.3282082676887512, |
|
"logits/rejected": 0.4013535976409912, |
|
"logps/chosen": -50.629215240478516, |
|
"logps/rejected": -582.4617309570312, |
|
"loss": 15710.8641, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18200094997882843, |
|
"rewards/margins": 0.5299168825149536, |
|
"rewards/rejected": -0.3479159474372864, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 13.873417721518987, |
|
"grad_norm": 677123.1021845904, |
|
"learning_rate": 1.262926982137261e-07, |
|
"logits/chosen": -0.9533359408378601, |
|
"logits/rejected": -0.11374642699956894, |
|
"logps/chosen": -50.710845947265625, |
|
"logps/rejected": -568.776611328125, |
|
"loss": 16490.0469, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18456825613975525, |
|
"rewards/margins": 0.5208636522293091, |
|
"rewards/rejected": -0.3362954258918762, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 13.924050632911392, |
|
"grad_norm": 608241.5399016802, |
|
"learning_rate": 1.2472579128799749e-07, |
|
"logits/chosen": -0.009487760253250599, |
|
"logits/rejected": 0.5674014091491699, |
|
"logps/chosen": -47.34721755981445, |
|
"logps/rejected": -558.3707275390625, |
|
"loss": 16114.125, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.18486423790454865, |
|
"rewards/margins": 0.5096093416213989, |
|
"rewards/rejected": -0.3247450888156891, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 13.974683544303797, |
|
"grad_norm": 510265.43069577636, |
|
"learning_rate": 1.2315888436226887e-07, |
|
"logits/chosen": -1.1760886907577515, |
|
"logits/rejected": -0.8848980665206909, |
|
"logps/chosen": -50.471961975097656, |
|
"logps/rejected": -569.0016479492188, |
|
"loss": 15240.5234, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1889052391052246, |
|
"rewards/margins": 0.5153056383132935, |
|
"rewards/rejected": -0.3264002799987793, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 14.025316455696203, |
|
"grad_norm": 454762.9481647176, |
|
"learning_rate": 1.2159197743654026e-07, |
|
"logits/chosen": 2.4223504066467285, |
|
"logits/rejected": 3.487738847732544, |
|
"logps/chosen": -44.93278503417969, |
|
"logps/rejected": -561.3870849609375, |
|
"loss": 16557.4125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1806286722421646, |
|
"rewards/margins": 0.5195534229278564, |
|
"rewards/rejected": -0.33892473578453064, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 14.075949367088608, |
|
"grad_norm": 487680.4985531969, |
|
"learning_rate": 1.2002507051081164e-07, |
|
"logits/chosen": 1.9585473537445068, |
|
"logits/rejected": 2.446890354156494, |
|
"logps/chosen": -39.52117919921875, |
|
"logps/rejected": -561.9512939453125, |
|
"loss": 15203.5906, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1834731251001358, |
|
"rewards/margins": 0.5265246629714966, |
|
"rewards/rejected": -0.343051552772522, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 14.126582278481013, |
|
"grad_norm": 335633.29006652284, |
|
"learning_rate": 1.1845816358508304e-07, |
|
"logits/chosen": -0.2361418753862381, |
|
"logits/rejected": 0.4229121804237366, |
|
"logps/chosen": -56.944580078125, |
|
"logps/rejected": -581.4995727539062, |
|
"loss": 14980.4906, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18962779641151428, |
|
"rewards/margins": 0.5268105268478394, |
|
"rewards/rejected": -0.3371827304363251, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 14.177215189873417, |
|
"grad_norm": 433336.52566667914, |
|
"learning_rate": 1.1689125665935443e-07, |
|
"logits/chosen": -0.8853734135627747, |
|
"logits/rejected": 0.24162235856056213, |
|
"logps/chosen": -49.96304702758789, |
|
"logps/rejected": -587.9956665039062, |
|
"loss": 15952.2594, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1883043497800827, |
|
"rewards/margins": 0.5334208607673645, |
|
"rewards/rejected": -0.345116525888443, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 14.227848101265822, |
|
"grad_norm": 352832.2810093542, |
|
"learning_rate": 1.1532434973362581e-07, |
|
"logits/chosen": -0.9270970225334167, |
|
"logits/rejected": -0.8106321096420288, |
|
"logps/chosen": -50.61150360107422, |
|
"logps/rejected": -579.4258422851562, |
|
"loss": 15482.3031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18148374557495117, |
|
"rewards/margins": 0.5241626501083374, |
|
"rewards/rejected": -0.34267887473106384, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 14.278481012658228, |
|
"grad_norm": 518734.4787371263, |
|
"learning_rate": 1.137574428078972e-07, |
|
"logits/chosen": 2.115744113922119, |
|
"logits/rejected": 2.9750027656555176, |
|
"logps/chosen": -41.601097106933594, |
|
"logps/rejected": -573.6159057617188, |
|
"loss": 15787.4719, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.18657180666923523, |
|
"rewards/margins": 0.5376033186912537, |
|
"rewards/rejected": -0.35103151202201843, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 14.329113924050633, |
|
"grad_norm": 637771.2756103254, |
|
"learning_rate": 1.1219053588216858e-07, |
|
"logits/chosen": -0.09557388722896576, |
|
"logits/rejected": -0.5708149671554565, |
|
"logps/chosen": -44.071807861328125, |
|
"logps/rejected": -585.6417236328125, |
|
"loss": 15660.4813, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18580812215805054, |
|
"rewards/margins": 0.5347784757614136, |
|
"rewards/rejected": -0.34897032380104065, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 14.379746835443038, |
|
"grad_norm": 469592.5817335632, |
|
"learning_rate": 1.1062362895643998e-07, |
|
"logits/chosen": 0.14405778050422668, |
|
"logits/rejected": 0.6720622181892395, |
|
"logps/chosen": -45.77620315551758, |
|
"logps/rejected": -562.7021484375, |
|
"loss": 15265.0797, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.18696969747543335, |
|
"rewards/margins": 0.5200961828231812, |
|
"rewards/rejected": -0.3331265151500702, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 14.430379746835444, |
|
"grad_norm": 381405.89470487926, |
|
"learning_rate": 1.0905672203071137e-07, |
|
"logits/chosen": -0.46474942564964294, |
|
"logits/rejected": -0.6803582906723022, |
|
"logps/chosen": -43.475257873535156, |
|
"logps/rejected": -578.9302978515625, |
|
"loss": 15502.7, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18612739443778992, |
|
"rewards/margins": 0.5332227945327759, |
|
"rewards/rejected": -0.34709542989730835, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 14.481012658227849, |
|
"grad_norm": 389034.05049605225, |
|
"learning_rate": 1.0748981510498275e-07, |
|
"logits/chosen": 0.192867711186409, |
|
"logits/rejected": 0.04235720634460449, |
|
"logps/chosen": -45.57283020019531, |
|
"logps/rejected": -573.7398071289062, |
|
"loss": 16059.1625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18987932801246643, |
|
"rewards/margins": 0.5239830613136292, |
|
"rewards/rejected": -0.33410370349884033, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 14.531645569620252, |
|
"grad_norm": 1027736.0673764712, |
|
"learning_rate": 1.0592290817925414e-07, |
|
"logits/chosen": -0.14229407906532288, |
|
"logits/rejected": 0.4352554380893707, |
|
"logps/chosen": -52.69159698486328, |
|
"logps/rejected": -584.4544067382812, |
|
"loss": 15405.6859, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19550864398479462, |
|
"rewards/margins": 0.5430904626846313, |
|
"rewards/rejected": -0.34758180379867554, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 14.582278481012658, |
|
"grad_norm": 384385.74028987245, |
|
"learning_rate": 1.0435600125352554e-07, |
|
"logits/chosen": -2.178337335586548, |
|
"logits/rejected": -0.7508569955825806, |
|
"logps/chosen": -59.098426818847656, |
|
"logps/rejected": -576.6027221679688, |
|
"loss": 14664.3844, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18826426565647125, |
|
"rewards/margins": 0.5217211842536926, |
|
"rewards/rejected": -0.33345693349838257, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 14.632911392405063, |
|
"grad_norm": 329341.72262227273, |
|
"learning_rate": 1.0278909432779692e-07, |
|
"logits/chosen": -0.5238679647445679, |
|
"logits/rejected": 0.5422592163085938, |
|
"logps/chosen": -45.037288665771484, |
|
"logps/rejected": -568.7276000976562, |
|
"loss": 15557.125, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.18480226397514343, |
|
"rewards/margins": 0.5315712094306946, |
|
"rewards/rejected": -0.34676894545555115, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 14.683544303797468, |
|
"grad_norm": 543441.6169659087, |
|
"learning_rate": 1.0122218740206831e-07, |
|
"logits/chosen": -1.954636812210083, |
|
"logits/rejected": -1.2880172729492188, |
|
"logps/chosen": -42.44208908081055, |
|
"logps/rejected": -553.3023681640625, |
|
"loss": 15342.95, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.17853178083896637, |
|
"rewards/margins": 0.5080317258834839, |
|
"rewards/rejected": -0.32949990034103394, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 14.734177215189874, |
|
"grad_norm": 485286.8133606422, |
|
"learning_rate": 9.96552804763397e-08, |
|
"logits/chosen": -0.10534539073705673, |
|
"logits/rejected": -0.22817449271678925, |
|
"logps/chosen": -58.41508102416992, |
|
"logps/rejected": -589.82861328125, |
|
"loss": 14829.6719, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19155274331569672, |
|
"rewards/margins": 0.5371214747428894, |
|
"rewards/rejected": -0.3455687165260315, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 14.784810126582279, |
|
"grad_norm": 443260.47292018944, |
|
"learning_rate": 9.808837355061108e-08, |
|
"logits/chosen": 0.06932596862316132, |
|
"logits/rejected": -0.2167021781206131, |
|
"logps/chosen": -47.265785217285156, |
|
"logps/rejected": -564.3973388671875, |
|
"loss": 15330.0641, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1784828007221222, |
|
"rewards/margins": 0.5103118419647217, |
|
"rewards/rejected": -0.3318290710449219, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 14.835443037974684, |
|
"grad_norm": 483368.1079372665, |
|
"learning_rate": 9.652146662488248e-08, |
|
"logits/chosen": -0.06790392100811005, |
|
"logits/rejected": 0.29011401534080505, |
|
"logps/chosen": -54.78580856323242, |
|
"logps/rejected": -574.620361328125, |
|
"loss": 15093.3531, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18937523663043976, |
|
"rewards/margins": 0.5257763862609863, |
|
"rewards/rejected": -0.33640116453170776, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 14.886075949367088, |
|
"grad_norm": 955906.0887958824, |
|
"learning_rate": 9.495455969915387e-08, |
|
"logits/chosen": 1.4835760593414307, |
|
"logits/rejected": 1.6735947132110596, |
|
"logps/chosen": -46.26830291748047, |
|
"logps/rejected": -551.6137084960938, |
|
"loss": 15061.7437, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1782112419605255, |
|
"rewards/margins": 0.5047247409820557, |
|
"rewards/rejected": -0.32651349902153015, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 14.936708860759493, |
|
"grad_norm": 389874.4777367002, |
|
"learning_rate": 9.338765277342525e-08, |
|
"logits/chosen": -0.45253458619117737, |
|
"logits/rejected": 0.04955162853002548, |
|
"logps/chosen": -44.50522994995117, |
|
"logps/rejected": -556.4650268554688, |
|
"loss": 15850.6094, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1812363862991333, |
|
"rewards/margins": 0.5129731893539429, |
|
"rewards/rejected": -0.33173683285713196, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 14.987341772151899, |
|
"grad_norm": 880494.2982969056, |
|
"learning_rate": 9.182074584769664e-08, |
|
"logits/chosen": -1.3114904165267944, |
|
"logits/rejected": -0.3469497859477997, |
|
"logps/chosen": -48.75851821899414, |
|
"logps/rejected": -542.8458862304688, |
|
"loss": 14465.8125, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.1728508621454239, |
|
"rewards/margins": 0.49641647934913635, |
|
"rewards/rejected": -0.32356563210487366, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 15.037974683544304, |
|
"grad_norm": 518700.7292410764, |
|
"learning_rate": 9.025383892196802e-08, |
|
"logits/chosen": 1.0516235828399658, |
|
"logits/rejected": 1.4486608505249023, |
|
"logps/chosen": -50.19924545288086, |
|
"logps/rejected": -568.3731689453125, |
|
"loss": 15371.2547, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1881760060787201, |
|
"rewards/margins": 0.5164635181427002, |
|
"rewards/rejected": -0.3282875716686249, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 15.08860759493671, |
|
"grad_norm": 331391.7564792058, |
|
"learning_rate": 8.868693199623942e-08, |
|
"logits/chosen": 2.2234063148498535, |
|
"logits/rejected": 2.0345654487609863, |
|
"logps/chosen": -52.14508819580078, |
|
"logps/rejected": -595.8091430664062, |
|
"loss": 14717.8656, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1904282122850418, |
|
"rewards/margins": 0.5425348877906799, |
|
"rewards/rejected": -0.3521067202091217, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 15.139240506329115, |
|
"grad_norm": 245591.75428222032, |
|
"learning_rate": 8.712002507051081e-08, |
|
"logits/chosen": -0.6622523069381714, |
|
"logits/rejected": -0.06956877559423447, |
|
"logps/chosen": -52.00910186767578, |
|
"logps/rejected": -563.0474853515625, |
|
"loss": 15161.7313, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1884680539369583, |
|
"rewards/margins": 0.5108307003974915, |
|
"rewards/rejected": -0.32236260175704956, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 15.189873417721518, |
|
"grad_norm": 310549.6440256543, |
|
"learning_rate": 8.555311814478219e-08, |
|
"logits/chosen": 0.2366395890712738, |
|
"logits/rejected": 0.44344860315322876, |
|
"logps/chosen": -41.386192321777344, |
|
"logps/rejected": -572.7687377929688, |
|
"loss": 14740.5063, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1788499653339386, |
|
"rewards/margins": 0.5284099578857422, |
|
"rewards/rejected": -0.3495599925518036, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 15.240506329113924, |
|
"grad_norm": 306008.0109626414, |
|
"learning_rate": 8.398621121905358e-08, |
|
"logits/chosen": 0.007425785064697266, |
|
"logits/rejected": 0.6882709264755249, |
|
"logps/chosen": -61.54619598388672, |
|
"logps/rejected": -565.9954833984375, |
|
"loss": 14890.1531, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.18634898960590363, |
|
"rewards/margins": 0.5029118061065674, |
|
"rewards/rejected": -0.31656283140182495, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 15.291139240506329, |
|
"grad_norm": 542292.2583731171, |
|
"learning_rate": 8.241930429332496e-08, |
|
"logits/chosen": -1.8317344188690186, |
|
"logits/rejected": -1.2810354232788086, |
|
"logps/chosen": -55.94157791137695, |
|
"logps/rejected": -610.6949462890625, |
|
"loss": 14922.1328, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20198726654052734, |
|
"rewards/margins": 0.5547267198562622, |
|
"rewards/rejected": -0.3527393639087677, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 15.341772151898734, |
|
"grad_norm": 246111.44147055785, |
|
"learning_rate": 8.085239736759636e-08, |
|
"logits/chosen": 0.38201937079429626, |
|
"logits/rejected": 0.48218441009521484, |
|
"logps/chosen": -49.771148681640625, |
|
"logps/rejected": -579.5675048828125, |
|
"loss": 14315.8422, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.18888349831104279, |
|
"rewards/margins": 0.52850741147995, |
|
"rewards/rejected": -0.33962392807006836, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 15.39240506329114, |
|
"grad_norm": 365392.8501035466, |
|
"learning_rate": 7.928549044186775e-08, |
|
"logits/chosen": 0.2196371853351593, |
|
"logits/rejected": 0.5740281939506531, |
|
"logps/chosen": -37.870933532714844, |
|
"logps/rejected": -532.795166015625, |
|
"loss": 14228.8297, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.17593248188495636, |
|
"rewards/margins": 0.4975932538509369, |
|
"rewards/rejected": -0.3216607868671417, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 15.443037974683545, |
|
"grad_norm": 601622.5104727764, |
|
"learning_rate": 7.771858351613913e-08, |
|
"logits/chosen": -0.6718970537185669, |
|
"logits/rejected": -0.666345477104187, |
|
"logps/chosen": -44.54059600830078, |
|
"logps/rejected": -578.719482421875, |
|
"loss": 15052.1406, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19072814285755157, |
|
"rewards/margins": 0.5325638055801392, |
|
"rewards/rejected": -0.3418356776237488, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 15.49367088607595, |
|
"grad_norm": 343253.0399713909, |
|
"learning_rate": 7.615167659041052e-08, |
|
"logits/chosen": -1.7298717498779297, |
|
"logits/rejected": -1.107236385345459, |
|
"logps/chosen": -48.916072845458984, |
|
"logps/rejected": -581.4259643554688, |
|
"loss": 15088.4312, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.18928301334381104, |
|
"rewards/margins": 0.5350446701049805, |
|
"rewards/rejected": -0.34576165676116943, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 15.544303797468354, |
|
"grad_norm": 228770.67672990158, |
|
"learning_rate": 7.45847696646819e-08, |
|
"logits/chosen": 1.368043303489685, |
|
"logits/rejected": 2.1229677200317383, |
|
"logps/chosen": -49.823055267333984, |
|
"logps/rejected": -576.06103515625, |
|
"loss": 13555.7672, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.18895366787910461, |
|
"rewards/margins": 0.5293976664543152, |
|
"rewards/rejected": -0.34044402837753296, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 15.594936708860759, |
|
"grad_norm": 292818.2312129945, |
|
"learning_rate": 7.30178627389533e-08, |
|
"logits/chosen": -0.7066992521286011, |
|
"logits/rejected": 0.058099888265132904, |
|
"logps/chosen": -52.58687210083008, |
|
"logps/rejected": -577.005859375, |
|
"loss": 14893.6594, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.19148708879947662, |
|
"rewards/margins": 0.5295326113700867, |
|
"rewards/rejected": -0.33804553747177124, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 15.645569620253164, |
|
"grad_norm": 275063.1192623706, |
|
"learning_rate": 7.145095581322469e-08, |
|
"logits/chosen": 0.057862140238285065, |
|
"logits/rejected": -0.10827471315860748, |
|
"logps/chosen": -51.52691650390625, |
|
"logps/rejected": -598.4918212890625, |
|
"loss": 14740.6531, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1917671114206314, |
|
"rewards/margins": 0.5416404008865356, |
|
"rewards/rejected": -0.34987324476242065, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 15.69620253164557, |
|
"grad_norm": 270643.231235499, |
|
"learning_rate": 6.988404888749608e-08, |
|
"logits/chosen": 0.49672946333885193, |
|
"logits/rejected": 0.9934390187263489, |
|
"logps/chosen": -53.964393615722656, |
|
"logps/rejected": -592.7462158203125, |
|
"loss": 14747.2812, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.19860555231571198, |
|
"rewards/margins": 0.5442546010017395, |
|
"rewards/rejected": -0.3456490635871887, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 15.746835443037975, |
|
"grad_norm": 366703.97931916115, |
|
"learning_rate": 6.831714196176746e-08, |
|
"logits/chosen": -1.272958517074585, |
|
"logits/rejected": -1.2677191495895386, |
|
"logps/chosen": -46.67731475830078, |
|
"logps/rejected": -578.444091796875, |
|
"loss": 14561.6719, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19132201373577118, |
|
"rewards/margins": 0.5392004251480103, |
|
"rewards/rejected": -0.3478783965110779, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 15.79746835443038, |
|
"grad_norm": 363431.4061189904, |
|
"learning_rate": 6.675023503603886e-08, |
|
"logits/chosen": -0.16689462959766388, |
|
"logits/rejected": 0.6665533781051636, |
|
"logps/chosen": -49.408546447753906, |
|
"logps/rejected": -587.0728759765625, |
|
"loss": 14602.2328, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1951448619365692, |
|
"rewards/margins": 0.538873553276062, |
|
"rewards/rejected": -0.3437287211418152, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 15.848101265822784, |
|
"grad_norm": 1925815.481070705, |
|
"learning_rate": 6.518332811031025e-08, |
|
"logits/chosen": -0.1888163536787033, |
|
"logits/rejected": -0.3901883661746979, |
|
"logps/chosen": -37.012611389160156, |
|
"logps/rejected": -553.5242919921875, |
|
"loss": 15093.5328, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.18000957369804382, |
|
"rewards/margins": 0.5157765746116638, |
|
"rewards/rejected": -0.3357670307159424, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 15.89873417721519, |
|
"grad_norm": 406865.81368112064, |
|
"learning_rate": 6.361642118458163e-08, |
|
"logits/chosen": -1.0143232345581055, |
|
"logits/rejected": -1.1421440839767456, |
|
"logps/chosen": -39.294063568115234, |
|
"logps/rejected": -572.8070068359375, |
|
"loss": 15857.7219, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18329963088035583, |
|
"rewards/margins": 0.5344266891479492, |
|
"rewards/rejected": -0.351127028465271, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 15.949367088607595, |
|
"grad_norm": 283773.4922827141, |
|
"learning_rate": 6.204951425885302e-08, |
|
"logits/chosen": 0.45898357033729553, |
|
"logits/rejected": 1.1897245645523071, |
|
"logps/chosen": -47.45745086669922, |
|
"logps/rejected": -564.1045532226562, |
|
"loss": 15274.2656, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.17995783686637878, |
|
"rewards/margins": 0.516915500164032, |
|
"rewards/rejected": -0.3369576930999756, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 338639.8303682123, |
|
"learning_rate": 6.04826073331244e-08, |
|
"logits/chosen": -1.1235512495040894, |
|
"logits/rejected": 0.0012889147037640214, |
|
"logps/chosen": -41.902889251708984, |
|
"logps/rejected": -569.4451293945312, |
|
"loss": 15055.2062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18416796624660492, |
|
"rewards/margins": 0.5268322825431824, |
|
"rewards/rejected": -0.34266436100006104, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 16.050632911392405, |
|
"grad_norm": 256869.56003810524, |
|
"learning_rate": 5.8915700407395795e-08, |
|
"logits/chosen": -1.1983295679092407, |
|
"logits/rejected": -0.22695603966712952, |
|
"logps/chosen": -41.12403106689453, |
|
"logps/rejected": -573.8383178710938, |
|
"loss": 14636.0719, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1912733018398285, |
|
"rewards/margins": 0.5368129014968872, |
|
"rewards/rejected": -0.3455396294593811, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 16.10126582278481, |
|
"grad_norm": 251620.82775792846, |
|
"learning_rate": 5.734879348166719e-08, |
|
"logits/chosen": -0.662868082523346, |
|
"logits/rejected": 0.3795197606086731, |
|
"logps/chosen": -38.75691604614258, |
|
"logps/rejected": -555.0902709960938, |
|
"loss": 14758.6562, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.1840089112520218, |
|
"rewards/margins": 0.5191300511360168, |
|
"rewards/rejected": -0.335121214389801, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 16.151898734177216, |
|
"grad_norm": 386320.34193101624, |
|
"learning_rate": 5.5781886555938573e-08, |
|
"logits/chosen": 0.9088973999023438, |
|
"logits/rejected": 1.0200951099395752, |
|
"logps/chosen": -37.841434478759766, |
|
"logps/rejected": -549.9398193359375, |
|
"loss": 14645.3125, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.18419453501701355, |
|
"rewards/margins": 0.5178717374801636, |
|
"rewards/rejected": -0.3336772620677948, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 16.20253164556962, |
|
"grad_norm": 323738.56127307797, |
|
"learning_rate": 5.421497963020996e-08, |
|
"logits/chosen": 1.6748106479644775, |
|
"logits/rejected": 1.7903064489364624, |
|
"logps/chosen": -43.683780670166016, |
|
"logps/rejected": -559.7962036132812, |
|
"loss": 14378.5187, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1856391578912735, |
|
"rewards/margins": 0.5182951092720032, |
|
"rewards/rejected": -0.3326559364795685, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 16.253164556962027, |
|
"grad_norm": 254204.27494940045, |
|
"learning_rate": 5.264807270448135e-08, |
|
"logits/chosen": -0.028285836800932884, |
|
"logits/rejected": 0.47511911392211914, |
|
"logps/chosen": -46.74934005737305, |
|
"logps/rejected": -582.1607666015625, |
|
"loss": 14203.1469, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.19257526099681854, |
|
"rewards/margins": 0.5342021584510803, |
|
"rewards/rejected": -0.3416268825531006, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 16.303797468354432, |
|
"grad_norm": 295536.9430947363, |
|
"learning_rate": 5.108116577875274e-08, |
|
"logits/chosen": 0.9740939140319824, |
|
"logits/rejected": 0.8530548810958862, |
|
"logps/chosen": -43.95893478393555, |
|
"logps/rejected": -566.3425903320312, |
|
"loss": 14617.1531, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18452490866184235, |
|
"rewards/margins": 0.5231844782829285, |
|
"rewards/rejected": -0.3386596143245697, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 16.354430379746834, |
|
"grad_norm": 228442.89270088554, |
|
"learning_rate": 4.951425885302413e-08, |
|
"logits/chosen": -0.6641544699668884, |
|
"logits/rejected": -0.42437514662742615, |
|
"logps/chosen": -42.97655487060547, |
|
"logps/rejected": -572.6472778320312, |
|
"loss": 14575.375, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.19143202900886536, |
|
"rewards/margins": 0.5323026776313782, |
|
"rewards/rejected": -0.34087061882019043, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 16.40506329113924, |
|
"grad_norm": 280822.1227003712, |
|
"learning_rate": 4.7947351927295515e-08, |
|
"logits/chosen": 1.1500619649887085, |
|
"logits/rejected": 1.5377223491668701, |
|
"logps/chosen": -40.756866455078125, |
|
"logps/rejected": -555.7669067382812, |
|
"loss": 14355.8438, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18818344175815582, |
|
"rewards/margins": 0.5185222029685974, |
|
"rewards/rejected": -0.3303387761116028, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 16.455696202531644, |
|
"grad_norm": 211726.7404787661, |
|
"learning_rate": 4.63804450015669e-08, |
|
"logits/chosen": -0.1092449203133583, |
|
"logits/rejected": 0.2951999306678772, |
|
"logps/chosen": -42.441200256347656, |
|
"logps/rejected": -545.1079711914062, |
|
"loss": 14375.5266, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.18679597973823547, |
|
"rewards/margins": 0.5060458779335022, |
|
"rewards/rejected": -0.31924980878829956, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 16.50632911392405, |
|
"grad_norm": 356888.551437776, |
|
"learning_rate": 4.481353807583829e-08, |
|
"logits/chosen": -1.3785438537597656, |
|
"logits/rejected": -1.0880242586135864, |
|
"logps/chosen": -54.5753288269043, |
|
"logps/rejected": -585.0982666015625, |
|
"loss": 13676.1484, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19741004705429077, |
|
"rewards/margins": 0.5335227251052856, |
|
"rewards/rejected": -0.3361126780509949, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 16.556962025316455, |
|
"grad_norm": 364581.3025715214, |
|
"learning_rate": 4.324663115010968e-08, |
|
"logits/chosen": -0.7049742341041565, |
|
"logits/rejected": -0.23324167728424072, |
|
"logps/chosen": -51.56848907470703, |
|
"logps/rejected": -578.4015502929688, |
|
"loss": 14484.6266, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.18512576818466187, |
|
"rewards/margins": 0.5236076712608337, |
|
"rewards/rejected": -0.3384818732738495, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 16.60759493670886, |
|
"grad_norm": 336864.8330615521, |
|
"learning_rate": 4.167972422438107e-08, |
|
"logits/chosen": -0.9721381068229675, |
|
"logits/rejected": -1.1028145551681519, |
|
"logps/chosen": -55.94579315185547, |
|
"logps/rejected": -583.2372436523438, |
|
"loss": 14945.2641, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.19380484521389008, |
|
"rewards/margins": 0.5321142673492432, |
|
"rewards/rejected": -0.3383094370365143, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 16.658227848101266, |
|
"grad_norm": 310564.956837095, |
|
"learning_rate": 4.0112817298652456e-08, |
|
"logits/chosen": -0.6065518260002136, |
|
"logits/rejected": -0.21473164856433868, |
|
"logps/chosen": -46.307228088378906, |
|
"logps/rejected": -586.7664184570312, |
|
"loss": 14667.4531, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.19551894068717957, |
|
"rewards/margins": 0.5414855480194092, |
|
"rewards/rejected": -0.345966637134552, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 16.70886075949367, |
|
"grad_norm": 329301.5108160766, |
|
"learning_rate": 3.854591037292385e-08, |
|
"logits/chosen": 0.40292587876319885, |
|
"logits/rejected": 1.5396214723587036, |
|
"logps/chosen": -40.793739318847656, |
|
"logps/rejected": -570.8857421875, |
|
"loss": 14524.3094, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.18828611075878143, |
|
"rewards/margins": 0.5341116189956665, |
|
"rewards/rejected": -0.3458254337310791, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 16.759493670886076, |
|
"grad_norm": 389871.220870713, |
|
"learning_rate": 3.6979003447195234e-08, |
|
"logits/chosen": -0.2180454283952713, |
|
"logits/rejected": 0.63756263256073, |
|
"logps/chosen": -48.842628479003906, |
|
"logps/rejected": -596.3530883789062, |
|
"loss": 15026.0328, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19478780031204224, |
|
"rewards/margins": 0.5423206090927124, |
|
"rewards/rejected": -0.34753280878067017, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 16.810126582278482, |
|
"grad_norm": 297091.2945334893, |
|
"learning_rate": 3.541209652146662e-08, |
|
"logits/chosen": -0.4556306302547455, |
|
"logits/rejected": 0.1757240742444992, |
|
"logps/chosen": -52.64439010620117, |
|
"logps/rejected": -598.89990234375, |
|
"loss": 14405.2531, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.19450917840003967, |
|
"rewards/margins": 0.5458864569664001, |
|
"rewards/rejected": -0.3513772487640381, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 16.860759493670887, |
|
"grad_norm": 1094427.122685082, |
|
"learning_rate": 3.384518959573801e-08, |
|
"logits/chosen": -0.09430136531591415, |
|
"logits/rejected": 0.669711709022522, |
|
"logps/chosen": -48.170013427734375, |
|
"logps/rejected": -584.9744873046875, |
|
"loss": 15005.1063, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.1912693828344345, |
|
"rewards/margins": 0.5353102087974548, |
|
"rewards/rejected": -0.34404081106185913, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 16.911392405063292, |
|
"grad_norm": 266675.6307359935, |
|
"learning_rate": 3.22782826700094e-08, |
|
"logits/chosen": -0.09551366418600082, |
|
"logits/rejected": -0.07008041441440582, |
|
"logps/chosen": -36.88441848754883, |
|
"logps/rejected": -568.5509033203125, |
|
"loss": 13823.6516, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18999743461608887, |
|
"rewards/margins": 0.5339778661727905, |
|
"rewards/rejected": -0.34398046135902405, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 16.962025316455698, |
|
"grad_norm": 562034.347414135, |
|
"learning_rate": 3.071137574428079e-08, |
|
"logits/chosen": 0.6763383746147156, |
|
"logits/rejected": 0.4948856830596924, |
|
"logps/chosen": -46.25956726074219, |
|
"logps/rejected": -565.7184448242188, |
|
"loss": 14414.3859, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.18556642532348633, |
|
"rewards/margins": 0.5159622430801392, |
|
"rewards/rejected": -0.33039581775665283, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 17.0126582278481, |
|
"grad_norm": 218651.56901322177, |
|
"learning_rate": 2.9144468818552176e-08, |
|
"logits/chosen": 0.41573429107666016, |
|
"logits/rejected": 1.103547215461731, |
|
"logps/chosen": -37.6799201965332, |
|
"logps/rejected": -569.5391235351562, |
|
"loss": 14029.3563, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.18778078258037567, |
|
"rewards/margins": 0.5316546559333801, |
|
"rewards/rejected": -0.34387388825416565, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 17.063291139240505, |
|
"grad_norm": 236719.0916690887, |
|
"learning_rate": 2.7577561892823564e-08, |
|
"logits/chosen": -0.09267449378967285, |
|
"logits/rejected": 0.3535307049751282, |
|
"logps/chosen": -43.02147674560547, |
|
"logps/rejected": -571.3306884765625, |
|
"loss": 14216.225, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.18815621733665466, |
|
"rewards/margins": 0.52230304479599, |
|
"rewards/rejected": -0.33414679765701294, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 17.11392405063291, |
|
"grad_norm": 151995.97062770248, |
|
"learning_rate": 2.6010654967094953e-08, |
|
"logits/chosen": 1.3600900173187256, |
|
"logits/rejected": 0.45606088638305664, |
|
"logps/chosen": -33.07421112060547, |
|
"logps/rejected": -574.2869262695312, |
|
"loss": 14569.3875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1843741536140442, |
|
"rewards/margins": 0.533474862575531, |
|
"rewards/rejected": -0.3491007089614868, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 17.164556962025316, |
|
"grad_norm": 229039.39535517112, |
|
"learning_rate": 2.4443748041366342e-08, |
|
"logits/chosen": 0.012326288037002087, |
|
"logits/rejected": -0.24337856471538544, |
|
"logps/chosen": -48.85834503173828, |
|
"logps/rejected": -591.9976806640625, |
|
"loss": 15141.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19536466896533966, |
|
"rewards/margins": 0.5443064570426941, |
|
"rewards/rejected": -0.34894177317619324, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 17.21518987341772, |
|
"grad_norm": 224579.22425486994, |
|
"learning_rate": 2.2876841115637728e-08, |
|
"logits/chosen": -0.07731113582849503, |
|
"logits/rejected": 0.8038260340690613, |
|
"logps/chosen": -42.96089172363281, |
|
"logps/rejected": -587.9930419921875, |
|
"loss": 13962.9406, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.18969421088695526, |
|
"rewards/margins": 0.5471119284629822, |
|
"rewards/rejected": -0.3574177622795105, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 17.265822784810126, |
|
"grad_norm": 194108.45632178357, |
|
"learning_rate": 2.1309934189909117e-08, |
|
"logits/chosen": -1.735790491104126, |
|
"logits/rejected": -0.8417277336120605, |
|
"logps/chosen": -40.28795623779297, |
|
"logps/rejected": -577.9163208007812, |
|
"loss": 14457.5328, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.19212636351585388, |
|
"rewards/margins": 0.54021155834198, |
|
"rewards/rejected": -0.3480851650238037, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 17.31645569620253, |
|
"grad_norm": 323871.0912725565, |
|
"learning_rate": 1.9743027264180506e-08, |
|
"logits/chosen": 1.0423898696899414, |
|
"logits/rejected": 1.1823880672454834, |
|
"logps/chosen": -50.077327728271484, |
|
"logps/rejected": -565.8704223632812, |
|
"loss": 14191.8531, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.18371161818504333, |
|
"rewards/margins": 0.5181502103805542, |
|
"rewards/rejected": -0.3344385623931885, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 17.367088607594937, |
|
"grad_norm": 207973.13380554292, |
|
"learning_rate": 1.8176120338451895e-08, |
|
"logits/chosen": -0.8037737011909485, |
|
"logits/rejected": -0.8005819320678711, |
|
"logps/chosen": -45.626670837402344, |
|
"logps/rejected": -544.7116088867188, |
|
"loss": 14114.0906, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1760983169078827, |
|
"rewards/margins": 0.5017568469047546, |
|
"rewards/rejected": -0.32565850019454956, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 17.417721518987342, |
|
"grad_norm": 191156.31750064602, |
|
"learning_rate": 1.6609213412723284e-08, |
|
"logits/chosen": 1.2277637720108032, |
|
"logits/rejected": 0.573845386505127, |
|
"logps/chosen": -50.492279052734375, |
|
"logps/rejected": -586.3282470703125, |
|
"loss": 13957.7594, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.19206462800502777, |
|
"rewards/margins": 0.5328875184059143, |
|
"rewards/rejected": -0.34082287549972534, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 17.468354430379748, |
|
"grad_norm": 262791.84599779843, |
|
"learning_rate": 1.5042306486994673e-08, |
|
"logits/chosen": 0.29228338599205017, |
|
"logits/rejected": 0.9747223854064941, |
|
"logps/chosen": -37.640201568603516, |
|
"logps/rejected": -557.47119140625, |
|
"loss": 14478.4906, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18818514049053192, |
|
"rewards/margins": 0.5214470624923706, |
|
"rewards/rejected": -0.3332619369029999, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 17.518987341772153, |
|
"grad_norm": 227441.5548714142, |
|
"learning_rate": 1.347539956126606e-08, |
|
"logits/chosen": -0.060483645647764206, |
|
"logits/rejected": 0.41309136152267456, |
|
"logps/chosen": -46.32054138183594, |
|
"logps/rejected": -588.6563720703125, |
|
"loss": 14804.9047, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.19712397456169128, |
|
"rewards/margins": 0.5437620878219604, |
|
"rewards/rejected": -0.34663814306259155, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 17.569620253164558, |
|
"grad_norm": 378558.8588589865, |
|
"learning_rate": 1.1908492635537449e-08, |
|
"logits/chosen": 2.0075535774230957, |
|
"logits/rejected": 2.772726058959961, |
|
"logps/chosen": -46.09113693237305, |
|
"logps/rejected": -582.0597534179688, |
|
"loss": 14645.9562, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.18593838810920715, |
|
"rewards/margins": 0.5331605076789856, |
|
"rewards/rejected": -0.34722214937210083, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 17.620253164556964, |
|
"grad_norm": 263891.6462573049, |
|
"learning_rate": 1.0341585709808836e-08, |
|
"logits/chosen": 0.273967444896698, |
|
"logits/rejected": 1.9021276235580444, |
|
"logps/chosen": -34.29851531982422, |
|
"logps/rejected": -567.2389526367188, |
|
"loss": 15085.2313, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18956169486045837, |
|
"rewards/margins": 0.5355597734451294, |
|
"rewards/rejected": -0.3459981083869934, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 17.67088607594937, |
|
"grad_norm": 276267.9285814808, |
|
"learning_rate": 8.774678784080225e-09, |
|
"logits/chosen": -0.02632077969610691, |
|
"logits/rejected": 0.4594387114048004, |
|
"logps/chosen": -45.098960876464844, |
|
"logps/rejected": -568.720947265625, |
|
"loss": 13750.4469, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.19129987061023712, |
|
"rewards/margins": 0.5272942781448364, |
|
"rewards/rejected": -0.33599433302879333, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 17.72151898734177, |
|
"grad_norm": 156087.69298121333, |
|
"learning_rate": 7.207771858351613e-09, |
|
"logits/chosen": 0.04748225212097168, |
|
"logits/rejected": 0.4610685408115387, |
|
"logps/chosen": -49.872169494628906, |
|
"logps/rejected": -603.3367919921875, |
|
"loss": 13778.1469, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.19287212193012238, |
|
"rewards/margins": 0.5504390001296997, |
|
"rewards/rejected": -0.3575669229030609, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 17.772151898734176, |
|
"grad_norm": 209667.9634643516, |
|
"learning_rate": 5.6408649326230014e-09, |
|
"logits/chosen": 1.4883615970611572, |
|
"logits/rejected": 2.2038960456848145, |
|
"logps/chosen": -46.18961715698242, |
|
"logps/rejected": -575.4703369140625, |
|
"loss": 13653.9672, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.18806029856204987, |
|
"rewards/margins": 0.5310976505279541, |
|
"rewards/rejected": -0.34303733706474304, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 17.82278481012658, |
|
"grad_norm": 222056.5820151951, |
|
"learning_rate": 4.07395800689439e-09, |
|
"logits/chosen": -0.582931637763977, |
|
"logits/rejected": -0.23906604945659637, |
|
"logps/chosen": -60.795921325683594, |
|
"logps/rejected": -590.2525024414062, |
|
"loss": 14149.5938, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.19504059851169586, |
|
"rewards/margins": 0.5360020399093628, |
|
"rewards/rejected": -0.34096142649650574, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 17.873417721518987, |
|
"grad_norm": 213324.38139465638, |
|
"learning_rate": 2.5070510811657785e-09, |
|
"logits/chosen": -0.3791787028312683, |
|
"logits/rejected": 0.26259681582450867, |
|
"logps/chosen": -48.315147399902344, |
|
"logps/rejected": -579.376220703125, |
|
"loss": 14028.4, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19789119064807892, |
|
"rewards/margins": 0.5311988592147827, |
|
"rewards/rejected": -0.333307683467865, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 17.924050632911392, |
|
"grad_norm": 207695.40556695752, |
|
"learning_rate": 9.40144155437167e-10, |
|
"logits/chosen": 2.0871522426605225, |
|
"logits/rejected": 2.378633975982666, |
|
"logps/chosen": -36.07915115356445, |
|
"logps/rejected": -560.524169921875, |
|
"loss": 13942.7234, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.18208447098731995, |
|
"rewards/margins": 0.5249064564704895, |
|
"rewards/rejected": -0.34282201528549194, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 17.954430379746835, |
|
"step": 3546, |
|
"total_flos": 0.0, |
|
"train_loss": 44832.452316430485, |
|
"train_runtime": 5475.4345, |
|
"train_samples_per_second": 41.5, |
|
"train_steps_per_second": 0.648 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3546, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 18, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|