|
{ |
|
"best_metric": 0.5813568234443665, |
|
"best_model_checkpoint": "models/llama-3.2-3b-dpo-2/checkpoint-1500", |
|
"epoch": 2.998693948628646, |
|
"eval_steps": 100, |
|
"global_step": 1722, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0017414018284719198, |
|
"grad_norm": 16.910327090065376, |
|
"learning_rate": 9.615384615384615e-09, |
|
"logits/chosen": 0.5875190496444702, |
|
"logits/rejected": 0.8128950595855713, |
|
"logps/chosen": -403.25274658203125, |
|
"logps/rejected": -280.435302734375, |
|
"loss": 1.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0174140182847192, |
|
"grad_norm": 19.954384726122814, |
|
"learning_rate": 9.615384615384616e-08, |
|
"logits/chosen": 0.5852577686309814, |
|
"logits/rejected": 0.6753351092338562, |
|
"logps/chosen": -359.52099609375, |
|
"logps/rejected": -325.59088134765625, |
|
"loss": 1.0009, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.00044315060949884355, |
|
"rewards/margins": -0.0015449941856786609, |
|
"rewards/rejected": 0.001101843430660665, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0348280365694384, |
|
"grad_norm": 18.013297709246753, |
|
"learning_rate": 1.9230769230769231e-07, |
|
"logits/chosen": 0.5446847677230835, |
|
"logits/rejected": 0.5242202877998352, |
|
"logps/chosen": -337.18963623046875, |
|
"logps/rejected": -339.19342041015625, |
|
"loss": 0.9995, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.001996212173253298, |
|
"rewards/margins": -0.0010775469709187746, |
|
"rewards/rejected": -0.0009186647948808968, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.052242054854157595, |
|
"grad_norm": 17.103619387941173, |
|
"learning_rate": 2.884615384615384e-07, |
|
"logits/chosen": 0.5122469067573547, |
|
"logits/rejected": 0.6459108591079712, |
|
"logps/chosen": -354.17877197265625, |
|
"logps/rejected": -327.595947265625, |
|
"loss": 0.9962, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.006955576129257679, |
|
"rewards/margins": 0.014386234804987907, |
|
"rewards/rejected": -0.007430657744407654, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0696560731388768, |
|
"grad_norm": 19.165758688739448, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"logits/chosen": 0.5354681015014648, |
|
"logits/rejected": 0.6567710638046265, |
|
"logps/chosen": -320.5543212890625, |
|
"logps/rejected": -303.88934326171875, |
|
"loss": 0.9841, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.008432401344180107, |
|
"rewards/margins": 0.04507141932845116, |
|
"rewards/rejected": -0.0366390161216259, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.087070091423596, |
|
"grad_norm": 15.004492365630895, |
|
"learning_rate": 4.807692307692307e-07, |
|
"logits/chosen": 0.7950223684310913, |
|
"logits/rejected": 0.7804186940193176, |
|
"logps/chosen": -311.0839538574219, |
|
"logps/rejected": -302.98931884765625, |
|
"loss": 0.9613, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.035525619983673096, |
|
"rewards/margins": 0.1604817509651184, |
|
"rewards/rejected": -0.12495611608028412, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10448410970831519, |
|
"grad_norm": 14.20109250939024, |
|
"learning_rate": 4.99971689431323e-07, |
|
"logits/chosen": 0.6672784686088562, |
|
"logits/rejected": 0.7780674695968628, |
|
"logps/chosen": -336.57861328125, |
|
"logps/rejected": -307.4258728027344, |
|
"loss": 0.9291, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.24874690175056458, |
|
"rewards/margins": 0.37877365946769714, |
|
"rewards/rejected": -0.13002678751945496, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12189812799303439, |
|
"grad_norm": 13.148974871311646, |
|
"learning_rate": 4.998566887351406e-07, |
|
"logits/chosen": 0.4873111844062805, |
|
"logits/rejected": 0.6285614967346191, |
|
"logps/chosen": -360.8282775878906, |
|
"logps/rejected": -331.92291259765625, |
|
"loss": 0.878, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.38520747423171997, |
|
"rewards/margins": 0.48126062750816345, |
|
"rewards/rejected": -0.09605316817760468, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1393121462777536, |
|
"grad_norm": 12.357509982546857, |
|
"learning_rate": 4.996532691658581e-07, |
|
"logits/chosen": 0.6317829489707947, |
|
"logits/rejected": 0.6504614353179932, |
|
"logps/chosen": -367.9935607910156, |
|
"logps/rejected": -340.3227844238281, |
|
"loss": 0.8333, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.40676337480545044, |
|
"rewards/margins": 1.0554689168930054, |
|
"rewards/rejected": -0.6487056016921997, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1567261645624728, |
|
"grad_norm": 11.26570144090341, |
|
"learning_rate": 4.993615027092575e-07, |
|
"logits/chosen": 0.5946422815322876, |
|
"logits/rejected": 0.6343314051628113, |
|
"logps/chosen": -284.96160888671875, |
|
"logps/rejected": -307.95623779296875, |
|
"loss": 0.7985, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.30818304419517517, |
|
"rewards/margins": 0.9103001356124878, |
|
"rewards/rejected": -0.6021171808242798, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.174140182847192, |
|
"grad_norm": 10.947878449098555, |
|
"learning_rate": 4.989814926151712e-07, |
|
"logits/chosen": 0.5656188726425171, |
|
"logits/rejected": 0.5697994232177734, |
|
"logps/chosen": -331.9007873535156, |
|
"logps/rejected": -353.94854736328125, |
|
"loss": 0.7596, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.4680165648460388, |
|
"rewards/margins": 1.718173623085022, |
|
"rewards/rejected": -1.2501569986343384, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.174140182847192, |
|
"eval_logits/chosen": 0.5481991171836853, |
|
"eval_logits/rejected": 0.6135243773460388, |
|
"eval_logps/chosen": -354.6434020996094, |
|
"eval_logps/rejected": -360.88714599609375, |
|
"eval_loss": 0.7588136792182922, |
|
"eval_rewards/accuracies": 0.699367105960846, |
|
"eval_rewards/chosen": 0.13485638797283173, |
|
"eval_rewards/margins": 1.574654459953308, |
|
"eval_rewards/rejected": -1.4397982358932495, |
|
"eval_runtime": 74.5995, |
|
"eval_samples_per_second": 33.512, |
|
"eval_steps_per_second": 1.059, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19155420113191118, |
|
"grad_norm": 10.162134567361168, |
|
"learning_rate": 4.985133733609451e-07, |
|
"logits/chosen": 0.590889573097229, |
|
"logits/rejected": 0.6486071944236755, |
|
"logps/chosen": -326.9929504394531, |
|
"logps/rejected": -329.12506103515625, |
|
"loss": 0.7376, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.4178914427757263, |
|
"rewards/margins": 2.201124668121338, |
|
"rewards/rejected": -1.7832332849502563, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.20896821941663038, |
|
"grad_norm": 10.257743402467643, |
|
"learning_rate": 4.979573106038489e-07, |
|
"logits/chosen": 0.5782706141471863, |
|
"logits/rejected": 0.6307928562164307, |
|
"logps/chosen": -318.9755859375, |
|
"logps/rejected": -345.50799560546875, |
|
"loss": 0.7276, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.34923794865608215, |
|
"rewards/margins": 2.149462938308716, |
|
"rewards/rejected": -1.800225019454956, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.22638223770134958, |
|
"grad_norm": 11.74103724012099, |
|
"learning_rate": 4.973135011224546e-07, |
|
"logits/chosen": 0.4907318651676178, |
|
"logits/rejected": 0.5648502707481384, |
|
"logps/chosen": -326.4444580078125, |
|
"logps/rejected": -352.13140869140625, |
|
"loss": 0.7176, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4452616572380066, |
|
"rewards/margins": 2.2069687843322754, |
|
"rewards/rejected": -1.7617069482803345, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.24379625598606877, |
|
"grad_norm": 10.814311644424995, |
|
"learning_rate": 4.965821727469999e-07, |
|
"logits/chosen": 0.5636430382728577, |
|
"logits/rejected": 0.653995156288147, |
|
"logps/chosen": -360.30804443359375, |
|
"logps/rejected": -338.9974365234375, |
|
"loss": 0.7097, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.4193500876426697, |
|
"rewards/margins": 2.334038257598877, |
|
"rewards/rejected": -1.9146884679794312, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.26121027427078797, |
|
"grad_norm": 9.638215913082206, |
|
"learning_rate": 4.957635842787648e-07, |
|
"logits/chosen": 0.4618745744228363, |
|
"logits/rejected": 0.5876752138137817, |
|
"logps/chosen": -342.8825988769531, |
|
"logps/rejected": -373.37945556640625, |
|
"loss": 0.7149, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.020968124270439148, |
|
"rewards/margins": 2.3450706005096436, |
|
"rewards/rejected": -2.3241021633148193, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2786242925555072, |
|
"grad_norm": 11.08344519424176, |
|
"learning_rate": 4.948580253984868e-07, |
|
"logits/chosen": 0.47446388006210327, |
|
"logits/rejected": 0.5650290250778198, |
|
"logps/chosen": -307.9524230957031, |
|
"logps/rejected": -348.2874450683594, |
|
"loss": 0.6961, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5717116594314575, |
|
"rewards/margins": 3.2698848247528076, |
|
"rewards/rejected": -2.6981732845306396, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.29603831084022636, |
|
"grad_norm": 9.33184052198387, |
|
"learning_rate": 4.938658165638496e-07, |
|
"logits/chosen": 0.4030691981315613, |
|
"logits/rejected": 0.5435680150985718, |
|
"logps/chosen": -353.84063720703125, |
|
"logps/rejected": -383.1701354980469, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5485956072807312, |
|
"rewards/margins": 3.1341984272003174, |
|
"rewards/rejected": -2.5856029987335205, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3134523291249456, |
|
"grad_norm": 9.495740140284331, |
|
"learning_rate": 4.9278730889608e-07, |
|
"logits/chosen": 0.5938032269477844, |
|
"logits/rejected": 0.6139369606971741, |
|
"logps/chosen": -321.7276916503906, |
|
"logps/rejected": -362.5476989746094, |
|
"loss": 0.6581, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 1.118109941482544, |
|
"rewards/margins": 3.9192118644714355, |
|
"rewards/rejected": -2.8011021614074707, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3308663474096648, |
|
"grad_norm": 9.373432885518516, |
|
"learning_rate": 4.916228840556932e-07, |
|
"logits/chosen": 0.49382227659225464, |
|
"logits/rejected": 0.6422325968742371, |
|
"logps/chosen": -365.77471923828125, |
|
"logps/rejected": -338.041015625, |
|
"loss": 0.6736, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.5264456272125244, |
|
"rewards/margins": 3.0396347045898438, |
|
"rewards/rejected": -2.5131890773773193, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.348280365694384, |
|
"grad_norm": 10.853280262051877, |
|
"learning_rate": 4.903729541074324e-07, |
|
"logits/chosen": 0.4458913207054138, |
|
"logits/rejected": 0.560121476650238, |
|
"logps/chosen": -360.0127258300781, |
|
"logps/rejected": -370.9368896484375, |
|
"loss": 0.6725, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.29008936882019043, |
|
"rewards/margins": 2.945146083831787, |
|
"rewards/rejected": -2.655057191848755, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.348280365694384, |
|
"eval_logits/chosen": 0.47184163331985474, |
|
"eval_logits/rejected": 0.533531904220581, |
|
"eval_logps/chosen": -349.7451477050781, |
|
"eval_logps/rejected": -373.811767578125, |
|
"eval_loss": 0.6680147051811218, |
|
"eval_rewards/accuracies": 0.7278481125831604, |
|
"eval_rewards/chosen": 0.6246804594993591, |
|
"eval_rewards/margins": 3.356940984725952, |
|
"eval_rewards/rejected": -2.7322607040405273, |
|
"eval_runtime": 73.458, |
|
"eval_samples_per_second": 34.033, |
|
"eval_steps_per_second": 1.075, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3656943839791032, |
|
"grad_norm": 9.744674847212522, |
|
"learning_rate": 4.890379613744469e-07, |
|
"logits/chosen": 0.5317627787590027, |
|
"logits/rejected": 0.6278539896011353, |
|
"logps/chosen": -336.4283142089844, |
|
"logps/rejected": -345.31683349609375, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.5727043747901917, |
|
"rewards/margins": 3.1886203289031982, |
|
"rewards/rejected": -2.6159162521362305, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.38310840226382237, |
|
"grad_norm": 9.347496298393207, |
|
"learning_rate": 4.87618378281764e-07, |
|
"logits/chosen": 0.4476338326931, |
|
"logits/rejected": 0.559643566608429, |
|
"logps/chosen": -353.37103271484375, |
|
"logps/rejected": -345.65435791015625, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.609833836555481, |
|
"rewards/margins": 3.4250805377960205, |
|
"rewards/rejected": -2.81524658203125, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4005224205485416, |
|
"grad_norm": 10.64305700872199, |
|
"learning_rate": 4.861147071891086e-07, |
|
"logits/chosen": 0.5430992245674133, |
|
"logits/rejected": 0.5794849395751953, |
|
"logps/chosen": -322.3576965332031, |
|
"logps/rejected": -377.4114074707031, |
|
"loss": 0.6652, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.870169997215271, |
|
"rewards/margins": 4.160640716552734, |
|
"rewards/rejected": -3.290470838546753, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.41793643883326076, |
|
"grad_norm": 9.671652973719253, |
|
"learning_rate": 4.845274802131277e-07, |
|
"logits/chosen": 0.5680447816848755, |
|
"logits/rejected": 0.604617178440094, |
|
"logps/chosen": -293.92315673828125, |
|
"logps/rejected": -329.440673828125, |
|
"loss": 0.6565, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.4282473623752594, |
|
"rewards/margins": 3.685936689376831, |
|
"rewards/rejected": -3.2576892375946045, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.43535045711798, |
|
"grad_norm": 10.210911077399032, |
|
"learning_rate": 4.828572590390871e-07, |
|
"logits/chosen": 0.46727150678634644, |
|
"logits/rejected": 0.6490423083305359, |
|
"logps/chosen": -329.9657287597656, |
|
"logps/rejected": -337.4406433105469, |
|
"loss": 0.6563, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.10539127886295319, |
|
"rewards/margins": 2.982442855834961, |
|
"rewards/rejected": -2.877051591873169, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.45276447540269915, |
|
"grad_norm": 11.398369421316715, |
|
"learning_rate": 4.811046347221025e-07, |
|
"logits/chosen": 0.403046190738678, |
|
"logits/rejected": 0.6530165672302246, |
|
"logps/chosen": -334.22760009765625, |
|
"logps/rejected": -334.1841735839844, |
|
"loss": 0.6755, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.48005884885787964, |
|
"rewards/margins": 3.5819506645202637, |
|
"rewards/rejected": -3.1018917560577393, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4701784936874184, |
|
"grad_norm": 10.244143897450304, |
|
"learning_rate": 4.792702274779775e-07, |
|
"logits/chosen": 0.5057202577590942, |
|
"logits/rejected": 0.6287623643875122, |
|
"logps/chosen": -352.79278564453125, |
|
"logps/rejected": -397.2536926269531, |
|
"loss": 0.6706, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.09758170694112778, |
|
"rewards/margins": 2.9382576942443848, |
|
"rewards/rejected": -2.8406760692596436, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.48759251197213754, |
|
"grad_norm": 11.19817676192306, |
|
"learning_rate": 4.773546864637237e-07, |
|
"logits/chosen": 0.3693320155143738, |
|
"logits/rejected": 0.47319895029067993, |
|
"logps/chosen": -362.92987060546875, |
|
"logps/rejected": -393.7507019042969, |
|
"loss": 0.6669, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.7348660826683044, |
|
"rewards/margins": 3.7446212768554688, |
|
"rewards/rejected": -3.0097551345825195, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5050065302568568, |
|
"grad_norm": 11.48370881876219, |
|
"learning_rate": 4.7535868954783724e-07, |
|
"logits/chosen": 0.5243381261825562, |
|
"logits/rejected": 0.5710750818252563, |
|
"logps/chosen": -337.2316589355469, |
|
"logps/rejected": -344.5747375488281, |
|
"loss": 0.6147, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 1.3425434827804565, |
|
"rewards/margins": 4.192605495452881, |
|
"rewards/rejected": -2.8500616550445557, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5224205485415759, |
|
"grad_norm": 11.566809258335352, |
|
"learning_rate": 4.732829430704162e-07, |
|
"logits/chosen": 0.5479331016540527, |
|
"logits/rejected": 0.6467794179916382, |
|
"logps/chosen": -357.7979736328125, |
|
"logps/rejected": -371.5728759765625, |
|
"loss": 0.6452, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 1.06717848777771, |
|
"rewards/margins": 4.3429388999938965, |
|
"rewards/rejected": -3.2757606506347656, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5224205485415759, |
|
"eval_logits/chosen": 0.48656851053237915, |
|
"eval_logits/rejected": 0.5476837158203125, |
|
"eval_logps/chosen": -354.2216491699219, |
|
"eval_logps/rejected": -384.525634765625, |
|
"eval_loss": 0.6514016389846802, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": 0.1770300567150116, |
|
"eval_rewards/margins": 3.98067569732666, |
|
"eval_rewards/rejected": -3.803645610809326, |
|
"eval_runtime": 70.9012, |
|
"eval_samples_per_second": 35.26, |
|
"eval_steps_per_second": 1.114, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5398345668262952, |
|
"grad_norm": 10.500375745607627, |
|
"learning_rate": 4.7112818159320187e-07, |
|
"logits/chosen": 0.5276615023612976, |
|
"logits/rejected": 0.5516486167907715, |
|
"logps/chosen": -326.74200439453125, |
|
"logps/rejected": -359.82073974609375, |
|
"loss": 0.6401, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 1.0380756855010986, |
|
"rewards/margins": 4.506098747253418, |
|
"rewards/rejected": -3.4680233001708984, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5572485851110144, |
|
"grad_norm": 10.088286466293443, |
|
"learning_rate": 4.6889516763963356e-07, |
|
"logits/chosen": 0.5277293920516968, |
|
"logits/rejected": 0.5241761207580566, |
|
"logps/chosen": -315.60980224609375, |
|
"logps/rejected": -370.9276428222656, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.2694261074066162, |
|
"rewards/margins": 4.256104469299316, |
|
"rewards/rejected": -2.9866783618927, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5746626033957336, |
|
"grad_norm": 9.154194830865354, |
|
"learning_rate": 4.665846914250079e-07, |
|
"logits/chosen": 0.36231085658073425, |
|
"logits/rejected": 0.5745257139205933, |
|
"logps/chosen": -350.9997863769531, |
|
"logps/rejected": -364.403564453125, |
|
"loss": 0.6387, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.9238360524177551, |
|
"rewards/margins": 3.7312512397766113, |
|
"rewards/rejected": -2.807415008544922, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5920766216804527, |
|
"grad_norm": 10.584885840971705, |
|
"learning_rate": 4.641975705768386e-07, |
|
"logits/chosen": 0.521003782749176, |
|
"logits/rejected": 0.566765546798706, |
|
"logps/chosen": -316.1827392578125, |
|
"logps/rejected": -351.51727294921875, |
|
"loss": 0.6389, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.7287189364433289, |
|
"rewards/margins": 3.9497039318084717, |
|
"rewards/rejected": -3.2209854125976562, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6094906399651719, |
|
"grad_norm": 12.315232300296437, |
|
"learning_rate": 4.6173464984551615e-07, |
|
"logits/chosen": 0.4950438439846039, |
|
"logits/rejected": 0.5969202518463135, |
|
"logps/chosen": -326.7777099609375, |
|
"logps/rejected": -357.15496826171875, |
|
"loss": 0.6432, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.4777583181858063, |
|
"rewards/margins": 4.0313310623168945, |
|
"rewards/rejected": -3.5535731315612793, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6269046582498912, |
|
"grad_norm": 10.162516049642594, |
|
"learning_rate": 4.591968008053685e-07, |
|
"logits/chosen": 0.47471824288368225, |
|
"logits/rejected": 0.5946096181869507, |
|
"logps/chosen": -313.26104736328125, |
|
"logps/rejected": -352.19842529296875, |
|
"loss": 0.6269, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.6822806596755981, |
|
"rewards/margins": 3.897700548171997, |
|
"rewards/rejected": -3.2154202461242676, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6443186765346104, |
|
"grad_norm": 12.196841859188872, |
|
"learning_rate": 4.5658492154623025e-07, |
|
"logits/chosen": 0.30948159098625183, |
|
"logits/rejected": 0.3537755012512207, |
|
"logps/chosen": -361.9256896972656, |
|
"logps/rejected": -385.45635986328125, |
|
"loss": 0.6485, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 1.5001431703567505, |
|
"rewards/margins": 4.353220462799072, |
|
"rewards/rejected": -2.853076457977295, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6617326948193296, |
|
"grad_norm": 14.472880692058368, |
|
"learning_rate": 4.538999363556276e-07, |
|
"logits/chosen": 0.4529440999031067, |
|
"logits/rejected": 0.5350080728530884, |
|
"logps/chosen": -315.1925048828125, |
|
"logps/rejected": -350.2633361816406, |
|
"loss": 0.624, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 1.4465656280517578, |
|
"rewards/margins": 4.741408348083496, |
|
"rewards/rejected": -3.294842481613159, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6791467131040487, |
|
"grad_norm": 10.826712989058954, |
|
"learning_rate": 4.5114279539169353e-07, |
|
"logits/chosen": 0.4620792269706726, |
|
"logits/rejected": 0.4736354351043701, |
|
"logps/chosen": -323.7712707519531, |
|
"logps/rejected": -383.2652282714844, |
|
"loss": 0.6394, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.8169974088668823, |
|
"rewards/margins": 5.20931339263916, |
|
"rewards/rejected": -4.39231538772583, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.696560731388768, |
|
"grad_norm": 10.67277873114678, |
|
"learning_rate": 4.483144743469268e-07, |
|
"logits/chosen": 0.38168865442276, |
|
"logits/rejected": 0.49841055274009705, |
|
"logps/chosen": -322.7072448730469, |
|
"logps/rejected": -355.303955078125, |
|
"loss": 0.6259, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.9774483442306519, |
|
"rewards/margins": 3.831352949142456, |
|
"rewards/rejected": -2.8539042472839355, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.696560731388768, |
|
"eval_logits/chosen": 0.39267322421073914, |
|
"eval_logits/rejected": 0.45306530594825745, |
|
"eval_logps/chosen": -346.1069641113281, |
|
"eval_logps/rejected": -381.87127685546875, |
|
"eval_loss": 0.6327958703041077, |
|
"eval_rewards/accuracies": 0.7721518874168396, |
|
"eval_rewards/chosen": 0.9885040521621704, |
|
"eval_rewards/margins": 4.526716232299805, |
|
"eval_rewards/rejected": -3.538212299346924, |
|
"eval_runtime": 70.7229, |
|
"eval_samples_per_second": 35.349, |
|
"eval_steps_per_second": 1.117, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7139747496734872, |
|
"grad_norm": 9.610566425967104, |
|
"learning_rate": 4.454159741029162e-07, |
|
"logits/chosen": 0.363937646150589, |
|
"logits/rejected": 0.43550723791122437, |
|
"logps/chosen": -343.9782409667969, |
|
"logps/rejected": -362.17962646484375, |
|
"loss": 0.6254, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 1.068337082862854, |
|
"rewards/margins": 4.6207051277160645, |
|
"rewards/rejected": -3.5523681640625, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7313887679582064, |
|
"grad_norm": 12.035966522406508, |
|
"learning_rate": 4.4244832037614953e-07, |
|
"logits/chosen": 0.41232219338417053, |
|
"logits/rejected": 0.47881880402565, |
|
"logps/chosen": -335.7079772949219, |
|
"logps/rejected": -382.82403564453125, |
|
"loss": 0.5908, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 1.1208299398422241, |
|
"rewards/margins": 5.079546928405762, |
|
"rewards/rejected": -3.9587173461914062, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7488027862429255, |
|
"grad_norm": 10.889845296101432, |
|
"learning_rate": 4.394125633550352e-07, |
|
"logits/chosen": 0.44531410932540894, |
|
"logits/rejected": 0.5164756774902344, |
|
"logps/chosen": -337.1859436035156, |
|
"logps/rejected": -394.09906005859375, |
|
"loss": 0.6163, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.9268868565559387, |
|
"rewards/margins": 5.295614719390869, |
|
"rewards/rejected": -4.368727684020996, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7662168045276447, |
|
"grad_norm": 11.219285319452393, |
|
"learning_rate": 4.363097773282631e-07, |
|
"logits/chosen": 0.4676145613193512, |
|
"logits/rejected": 0.46276092529296875, |
|
"logps/chosen": -303.9361877441406, |
|
"logps/rejected": -363.8067321777344, |
|
"loss": 0.6134, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 1.4285213947296143, |
|
"rewards/margins": 5.457498550415039, |
|
"rewards/rejected": -4.028977394104004, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.783630822812364, |
|
"grad_norm": 13.505959690725264, |
|
"learning_rate": 4.3314106030463736e-07, |
|
"logits/chosen": 0.4363015592098236, |
|
"logits/rejected": 0.5514327883720398, |
|
"logps/chosen": -316.35748291015625, |
|
"logps/rejected": -357.69384765625, |
|
"loss": 0.6506, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 1.2509381771087646, |
|
"rewards/margins": 4.098021507263184, |
|
"rewards/rejected": -2.847083330154419, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8010448410970832, |
|
"grad_norm": 10.355065488972091, |
|
"learning_rate": 4.299075336245144e-07, |
|
"logits/chosen": 0.3801150918006897, |
|
"logits/rejected": 0.44872745871543884, |
|
"logps/chosen": -349.3464050292969, |
|
"logps/rejected": -385.3382873535156, |
|
"loss": 0.644, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.4206287860870361, |
|
"rewards/margins": 5.481839656829834, |
|
"rewards/rejected": -4.061211109161377, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8184588593818024, |
|
"grad_norm": 11.847424597987043, |
|
"learning_rate": 4.2661034156298474e-07, |
|
"logits/chosen": 0.4114614427089691, |
|
"logits/rejected": 0.48444080352783203, |
|
"logps/chosen": -317.6768493652344, |
|
"logps/rejected": -355.9494934082031, |
|
"loss": 0.6044, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 1.9520959854125977, |
|
"rewards/margins": 5.045262813568115, |
|
"rewards/rejected": -3.0931668281555176, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8358728776665215, |
|
"grad_norm": 12.525368413543276, |
|
"learning_rate": 4.232506509249397e-07, |
|
"logits/chosen": 0.3970574736595154, |
|
"logits/rejected": 0.5053227543830872, |
|
"logps/chosen": -333.1136474609375, |
|
"logps/rejected": -341.1883544921875, |
|
"loss": 0.5974, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.5816986560821533, |
|
"rewards/margins": 4.520462989807129, |
|
"rewards/rejected": -2.9387643337249756, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.8532868959512407, |
|
"grad_norm": 13.142971908163307, |
|
"learning_rate": 4.1982965063216353e-07, |
|
"logits/chosen": 0.3946320712566376, |
|
"logits/rejected": 0.48155736923217773, |
|
"logps/chosen": -375.7856140136719, |
|
"logps/rejected": -376.10791015625, |
|
"loss": 0.644, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 1.4027400016784668, |
|
"rewards/margins": 4.7545166015625, |
|
"rewards/rejected": -3.351776599884033, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.87070091423596, |
|
"grad_norm": 14.103469850176968, |
|
"learning_rate": 4.1634855130259996e-07, |
|
"logits/chosen": 0.4162313938140869, |
|
"logits/rejected": 0.4359792172908783, |
|
"logps/chosen": -310.9317321777344, |
|
"logps/rejected": -360.1820373535156, |
|
"loss": 0.5709, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 2.0291037559509277, |
|
"rewards/margins": 5.953708648681641, |
|
"rewards/rejected": -3.924605131149292, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.87070091423596, |
|
"eval_logits/chosen": 0.3562653958797455, |
|
"eval_logits/rejected": 0.41477009654045105, |
|
"eval_logps/chosen": -346.84149169921875, |
|
"eval_logps/rejected": -386.58038330078125, |
|
"eval_loss": 0.6218530535697937, |
|
"eval_rewards/accuracies": 0.7816455960273743, |
|
"eval_rewards/chosen": 0.915044367313385, |
|
"eval_rewards/margins": 4.924162864685059, |
|
"eval_rewards/rejected": -4.009118556976318, |
|
"eval_runtime": 70.6374, |
|
"eval_samples_per_second": 35.392, |
|
"eval_steps_per_second": 1.118, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8881149325206792, |
|
"grad_norm": 12.576666962608346, |
|
"learning_rate": 4.128085848219408e-07, |
|
"logits/chosen": 0.3370770215988159, |
|
"logits/rejected": 0.47335267066955566, |
|
"logps/chosen": -318.4114990234375, |
|
"logps/rejected": -353.0730895996094, |
|
"loss": 0.6228, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.7541842460632324, |
|
"rewards/margins": 3.9734835624694824, |
|
"rewards/rejected": -3.21929931640625, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.9055289508053983, |
|
"grad_norm": 12.733877298048826, |
|
"learning_rate": 4.092110039076884e-07, |
|
"logits/chosen": 0.4031103253364563, |
|
"logits/rejected": 0.4697874188423157, |
|
"logps/chosen": -305.5635681152344, |
|
"logps/rejected": -355.68865966796875, |
|
"loss": 0.6122, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 1.5306105613708496, |
|
"rewards/margins": 5.436779499053955, |
|
"rewards/rejected": -3.9061686992645264, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.9229429690901175, |
|
"grad_norm": 15.20563525082569, |
|
"learning_rate": 4.055570816658457e-07, |
|
"logits/chosen": 0.2911894917488098, |
|
"logits/rejected": 0.41246724128723145, |
|
"logps/chosen": -349.1706848144531, |
|
"logps/rejected": -364.3077087402344, |
|
"loss": 0.5931, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 1.3659398555755615, |
|
"rewards/margins": 4.93996524810791, |
|
"rewards/rejected": -3.5740256309509277, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.9403569873748368, |
|
"grad_norm": 11.280219686189042, |
|
"learning_rate": 4.0184811114039184e-07, |
|
"logits/chosen": 0.42698025703430176, |
|
"logits/rejected": 0.5653012990951538, |
|
"logps/chosen": -307.64794921875, |
|
"logps/rejected": -315.4483337402344, |
|
"loss": 0.6033, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 1.7323646545410156, |
|
"rewards/margins": 4.723189353942871, |
|
"rewards/rejected": -2.9908242225646973, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.957771005659556, |
|
"grad_norm": 16.19771019567853, |
|
"learning_rate": 3.9808540485570195e-07, |
|
"logits/chosen": 0.46557921171188354, |
|
"logits/rejected": 0.5172798037528992, |
|
"logps/chosen": -310.8573303222656, |
|
"logps/rejected": -352.49908447265625, |
|
"loss": 0.6182, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.6635421514511108, |
|
"rewards/margins": 4.843376636505127, |
|
"rewards/rejected": -3.1798348426818848, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9751850239442751, |
|
"grad_norm": 9.424230661957003, |
|
"learning_rate": 3.9427029435207295e-07, |
|
"logits/chosen": 0.18435801565647125, |
|
"logits/rejected": 0.24499377608299255, |
|
"logps/chosen": -365.77032470703125, |
|
"logps/rejected": -416.8031311035156, |
|
"loss": 0.6237, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 1.7337768077850342, |
|
"rewards/margins": 5.349893093109131, |
|
"rewards/rejected": -3.6161162853240967, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9925990422289943, |
|
"grad_norm": 12.241682769598608, |
|
"learning_rate": 3.904041297145201e-07, |
|
"logits/chosen": 0.3250972628593445, |
|
"logits/rejected": 0.4511636793613434, |
|
"logps/chosen": -360.701904296875, |
|
"logps/rejected": -388.0128479003906, |
|
"loss": 0.6399, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 1.4425889253616333, |
|
"rewards/margins": 4.446944236755371, |
|
"rewards/rejected": -3.0043554306030273, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.0100130605137136, |
|
"grad_norm": 11.181442404802889, |
|
"learning_rate": 3.8648827909501176e-07, |
|
"logits/chosen": 0.29660564661026, |
|
"logits/rejected": 0.44677844643592834, |
|
"logps/chosen": -361.0950012207031, |
|
"logps/rejected": -377.21856689453125, |
|
"loss": 0.602, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.9495298862457275, |
|
"rewards/margins": 4.22812557220459, |
|
"rewards/rejected": -2.2785956859588623, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.0274270787984328, |
|
"grad_norm": 13.05290689111488, |
|
"learning_rate": 3.825241282283096e-07, |
|
"logits/chosen": 0.27790385484695435, |
|
"logits/rejected": 0.3375425934791565, |
|
"logps/chosen": -330.90374755859375, |
|
"logps/rejected": -361.35601806640625, |
|
"loss": 0.5808, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 1.607259750366211, |
|
"rewards/margins": 4.201107025146484, |
|
"rewards/rejected": -2.5938477516174316, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.0448410970831519, |
|
"grad_norm": 11.463096590603662, |
|
"learning_rate": 3.785130799415871e-07, |
|
"logits/chosen": 0.24200713634490967, |
|
"logits/rejected": 0.35302695631980896, |
|
"logps/chosen": -299.9937744140625, |
|
"logps/rejected": -359.466064453125, |
|
"loss": 0.5835, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.2321202754974365, |
|
"rewards/margins": 5.646553039550781, |
|
"rewards/rejected": -4.414433002471924, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0448410970831519, |
|
"eval_logits/chosen": 0.2933425307273865, |
|
"eval_logits/rejected": 0.35039564967155457, |
|
"eval_logps/chosen": -340.9584045410156, |
|
"eval_logps/rejected": -382.8789978027344, |
|
"eval_loss": 0.6093774437904358, |
|
"eval_rewards/accuracies": 0.7721518874168396, |
|
"eval_rewards/chosen": 1.5033540725708008, |
|
"eval_rewards/margins": 5.142338275909424, |
|
"eval_rewards/rejected": -3.638984441757202, |
|
"eval_runtime": 70.631, |
|
"eval_samples_per_second": 35.395, |
|
"eval_steps_per_second": 1.118, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0622551153678712, |
|
"grad_norm": 12.526851438420609, |
|
"learning_rate": 3.7445655365799936e-07, |
|
"logits/chosen": 0.20473480224609375, |
|
"logits/rejected": 0.28544989228248596, |
|
"logps/chosen": -334.8023986816406, |
|
"logps/rejected": -346.4919128417969, |
|
"loss": 0.5649, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 2.128657579421997, |
|
"rewards/margins": 5.316029071807861, |
|
"rewards/rejected": -3.187371015548706, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.0796691336525903, |
|
"grad_norm": 11.47710312237594, |
|
"learning_rate": 3.7035598489438006e-07, |
|
"logits/chosen": 0.233549565076828, |
|
"logits/rejected": 0.35726696252822876, |
|
"logps/chosen": -349.82196044921875, |
|
"logps/rejected": -364.85955810546875, |
|
"loss": 0.58, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.5395448803901672, |
|
"rewards/margins": 4.185884952545166, |
|
"rewards/rejected": -3.6463401317596436, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.0970831519373094, |
|
"grad_norm": 12.961022529112094, |
|
"learning_rate": 3.662128247532433e-07, |
|
"logits/chosen": 0.290211945772171, |
|
"logits/rejected": 0.35047486424446106, |
|
"logps/chosen": -325.6573181152344, |
|
"logps/rejected": -386.93597412109375, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 1.4243555068969727, |
|
"rewards/margins": 5.428494930267334, |
|
"rewards/rejected": -4.004139423370361, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.1144971702220288, |
|
"grad_norm": 17.398617895045366, |
|
"learning_rate": 3.6202853940926934e-07, |
|
"logits/chosen": 0.3017658293247223, |
|
"logits/rejected": 0.4195927083492279, |
|
"logps/chosen": -324.07952880859375, |
|
"logps/rejected": -373.904296875, |
|
"loss": 0.5541, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 1.8470207452774048, |
|
"rewards/margins": 6.674212455749512, |
|
"rewards/rejected": -4.827191352844238, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.1319111885067479, |
|
"grad_norm": 10.562372952225951, |
|
"learning_rate": 3.5780460959045754e-07, |
|
"logits/chosen": 0.2920629680156708, |
|
"logits/rejected": 0.47484827041625977, |
|
"logps/chosen": -352.8973693847656, |
|
"logps/rejected": -350.5732421875, |
|
"loss": 0.5682, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 1.6669800281524658, |
|
"rewards/margins": 5.411717891693115, |
|
"rewards/rejected": -3.7447381019592285, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.1493252067914672, |
|
"grad_norm": 13.783936557369373, |
|
"learning_rate": 3.5354253005412884e-07, |
|
"logits/chosen": 0.2738053798675537, |
|
"logits/rejected": 0.35663852095603943, |
|
"logps/chosen": -324.64068603515625, |
|
"logps/rejected": -380.22943115234375, |
|
"loss": 0.5667, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 2.069589614868164, |
|
"rewards/margins": 6.046194553375244, |
|
"rewards/rejected": -3.9766056537628174, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.1667392250761863, |
|
"grad_norm": 13.97568005507501, |
|
"learning_rate": 3.49243809057963e-07, |
|
"logits/chosen": 0.4103721082210541, |
|
"logits/rejected": 0.4501830041408539, |
|
"logps/chosen": -294.91845703125, |
|
"logps/rejected": -333.655517578125, |
|
"loss": 0.5689, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 1.485319972038269, |
|
"rewards/margins": 5.04614782333374, |
|
"rewards/rejected": -3.5608277320861816, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.1841532433609054, |
|
"grad_norm": 16.332889829547256, |
|
"learning_rate": 3.449099678262595e-07, |
|
"logits/chosen": 0.3343231976032257, |
|
"logits/rejected": 0.3634433150291443, |
|
"logps/chosen": -338.9833984375, |
|
"logps/rejected": -394.47454833984375, |
|
"loss": 0.5436, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 1.4713348150253296, |
|
"rewards/margins": 4.879803657531738, |
|
"rewards/rejected": -3.408468723297119, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.2015672616456248, |
|
"grad_norm": 12.006988661080449, |
|
"learning_rate": 3.4054254001160854e-07, |
|
"logits/chosen": 0.19994381070137024, |
|
"logits/rejected": 0.29035601019859314, |
|
"logps/chosen": -322.84503173828125, |
|
"logps/rejected": -359.9244384765625, |
|
"loss": 0.5763, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 2.1871232986450195, |
|
"rewards/margins": 5.3036580085754395, |
|
"rewards/rejected": -3.11653470993042, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.2189812799303439, |
|
"grad_norm": 12.099919696795636, |
|
"learning_rate": 3.3614307115216525e-07, |
|
"logits/chosen": 0.3054899573326111, |
|
"logits/rejected": 0.3647094964981079, |
|
"logps/chosen": -335.3572998046875, |
|
"logps/rejected": -434.33526611328125, |
|
"loss": 0.5571, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 2.1948211193084717, |
|
"rewards/margins": 6.700743675231934, |
|
"rewards/rejected": -4.505923271179199, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.2189812799303439, |
|
"eval_logits/chosen": 0.26492393016815186, |
|
"eval_logits/rejected": 0.3217140734195709, |
|
"eval_logps/chosen": -340.2962341308594, |
|
"eval_logps/rejected": -383.6949157714844, |
|
"eval_loss": 0.5991856455802917, |
|
"eval_rewards/accuracies": 0.7689873576164246, |
|
"eval_rewards/chosen": 1.569573163986206, |
|
"eval_rewards/margins": 5.29014778137207, |
|
"eval_rewards/rejected": -3.7205748558044434, |
|
"eval_runtime": 71.1179, |
|
"eval_samples_per_second": 35.153, |
|
"eval_steps_per_second": 1.111, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.2363952982150632, |
|
"grad_norm": 12.738566526927597, |
|
"learning_rate": 3.317131181247166e-07, |
|
"logits/chosen": 0.37958696484565735, |
|
"logits/rejected": 0.35304760932922363, |
|
"logps/chosen": -288.7842712402344, |
|
"logps/rejected": -364.54840087890625, |
|
"loss": 0.5859, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 2.730753183364868, |
|
"rewards/margins": 6.293435096740723, |
|
"rewards/rejected": -3.5626816749572754, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.2538093164997823, |
|
"grad_norm": 13.111265189779358, |
|
"learning_rate": 3.272542485937368e-07, |
|
"logits/chosen": 0.2904764413833618, |
|
"logits/rejected": 0.4464187026023865, |
|
"logps/chosen": -304.67669677734375, |
|
"logps/rejected": -352.59979248046875, |
|
"loss": 0.5557, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 2.094412326812744, |
|
"rewards/margins": 5.624947547912598, |
|
"rewards/rejected": -3.5305354595184326, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.2712233347845014, |
|
"grad_norm": 10.886620928807497, |
|
"learning_rate": 3.2276804045662483e-07, |
|
"logits/chosen": 0.27517110109329224, |
|
"logits/rejected": 0.3347955644130707, |
|
"logps/chosen": -316.2988586425781, |
|
"logps/rejected": -356.43768310546875, |
|
"loss": 0.5636, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.5192517042160034, |
|
"rewards/margins": 5.492722511291504, |
|
"rewards/rejected": -3.973471164703369, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.2886373530692208, |
|
"grad_norm": 13.8403479629244, |
|
"learning_rate": 3.182560812853202e-07, |
|
"logits/chosen": 0.11149139702320099, |
|
"logits/rejected": 0.3251429498195648, |
|
"logps/chosen": -330.1148376464844, |
|
"logps/rejected": -378.6295471191406, |
|
"loss": 0.5628, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 1.484678864479065, |
|
"rewards/margins": 6.525158882141113, |
|
"rewards/rejected": -5.0404791831970215, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.30605137135394, |
|
"grad_norm": 12.534702754120891, |
|
"learning_rate": 3.1371996776449617e-07, |
|
"logits/chosen": 0.23182816803455353, |
|
"logits/rejected": 0.28866177797317505, |
|
"logps/chosen": -328.12646484375, |
|
"logps/rejected": -370.0992126464844, |
|
"loss": 0.5467, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 2.179534435272217, |
|
"rewards/margins": 6.180920600891113, |
|
"rewards/rejected": -4.0013861656188965, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.3234653896386592, |
|
"grad_norm": 11.31518460314422, |
|
"learning_rate": 3.091613051265273e-07, |
|
"logits/chosen": 0.31086990237236023, |
|
"logits/rejected": 0.4172425866127014, |
|
"logps/chosen": -297.3329772949219, |
|
"logps/rejected": -360.5836181640625, |
|
"loss": 0.5642, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 2.1698195934295654, |
|
"rewards/margins": 5.839700222015381, |
|
"rewards/rejected": -3.6698811054229736, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.3408794079233783, |
|
"grad_norm": 10.379774861709702, |
|
"learning_rate": 3.045817065834326e-07, |
|
"logits/chosen": 0.29344993829727173, |
|
"logits/rejected": 0.32599514722824097, |
|
"logps/chosen": -327.0237121582031, |
|
"logps/rejected": -358.82928466796875, |
|
"loss": 0.5485, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.6931324005126953, |
|
"rewards/margins": 5.127728462219238, |
|
"rewards/rejected": -3.4345970153808594, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.3582934262080975, |
|
"grad_norm": 13.434946271526595, |
|
"learning_rate": 2.999827927559946e-07, |
|
"logits/chosen": 0.2696860730648041, |
|
"logits/rejected": 0.34511059522628784, |
|
"logps/chosen": -296.4570617675781, |
|
"logps/rejected": -353.8194274902344, |
|
"loss": 0.5431, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 2.2521262168884277, |
|
"rewards/margins": 5.2349138259887695, |
|
"rewards/rejected": -2.982787609100342, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.3757074444928168, |
|
"grad_norm": 11.291499182850764, |
|
"learning_rate": 2.9536619110025697e-07, |
|
"logits/chosen": 0.18604174256324768, |
|
"logits/rejected": 0.3162630796432495, |
|
"logps/chosen": -326.1578369140625, |
|
"logps/rejected": -353.59295654296875, |
|
"loss": 0.5544, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 1.702147126197815, |
|
"rewards/margins": 5.362542152404785, |
|
"rewards/rejected": -3.6603941917419434, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.393121462777536, |
|
"grad_norm": 12.702999923621702, |
|
"learning_rate": 2.907335353316027e-07, |
|
"logits/chosen": 0.26579901576042175, |
|
"logits/rejected": 0.41910189390182495, |
|
"logps/chosen": -320.19598388671875, |
|
"logps/rejected": -349.5892028808594, |
|
"loss": 0.5532, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 1.7423750162124634, |
|
"rewards/margins": 5.454329490661621, |
|
"rewards/rejected": -3.7119553089141846, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.393121462777536, |
|
"eval_logits/chosen": 0.2383224368095398, |
|
"eval_logits/rejected": 0.29605910181999207, |
|
"eval_logps/chosen": -338.84527587890625, |
|
"eval_logps/rejected": -383.7506408691406, |
|
"eval_loss": 0.5953884720802307, |
|
"eval_rewards/accuracies": 0.7784810066223145, |
|
"eval_rewards/chosen": 1.714667797088623, |
|
"eval_rewards/margins": 5.440816879272461, |
|
"eval_rewards/rejected": -3.726149082183838, |
|
"eval_runtime": 70.5705, |
|
"eval_samples_per_second": 35.426, |
|
"eval_steps_per_second": 1.119, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.4105354810622552, |
|
"grad_norm": 15.05280589745863, |
|
"learning_rate": 2.860864648466179e-07, |
|
"logits/chosen": 0.27184057235717773, |
|
"logits/rejected": 0.3507101535797119, |
|
"logps/chosen": -332.3158264160156, |
|
"logps/rejected": -380.0777587890625, |
|
"loss": 0.5504, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 1.7377653121948242, |
|
"rewards/margins": 6.386282920837402, |
|
"rewards/rejected": -4.648517608642578, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.4279494993469743, |
|
"grad_norm": 14.457454699405789, |
|
"learning_rate": 2.814266241429447e-07, |
|
"logits/chosen": 0.15302935242652893, |
|
"logits/rejected": 0.317359060049057, |
|
"logps/chosen": -344.7713317871094, |
|
"logps/rejected": -361.2349548339844, |
|
"loss": 0.5695, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.5794878005981445, |
|
"rewards/margins": 5.287659645080566, |
|
"rewards/rejected": -3.708172559738159, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.4453635176316935, |
|
"grad_norm": 12.867419385191441, |
|
"learning_rate": 2.767556622373292e-07, |
|
"logits/chosen": 0.22975726425647736, |
|
"logits/rejected": 0.23659822344779968, |
|
"logps/chosen": -314.1470642089844, |
|
"logps/rejected": -377.7834167480469, |
|
"loss": 0.5733, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 2.079237222671509, |
|
"rewards/margins": 5.952226161956787, |
|
"rewards/rejected": -3.8729889392852783, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.4627775359164126, |
|
"grad_norm": 14.1466386443094, |
|
"learning_rate": 2.7207523208207013e-07, |
|
"logits/chosen": 0.1673222780227661, |
|
"logits/rejected": 0.2645789682865143, |
|
"logps/chosen": -338.6355285644531, |
|
"logps/rejected": -376.7158203125, |
|
"loss": 0.5545, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 1.4028663635253906, |
|
"rewards/margins": 5.203114032745361, |
|
"rewards/rejected": -3.8002476692199707, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.480191554201132, |
|
"grad_norm": 11.594267715010666, |
|
"learning_rate": 2.6738698998007466e-07, |
|
"logits/chosen": 0.15234848856925964, |
|
"logits/rejected": 0.2801158130168915, |
|
"logps/chosen": -370.71966552734375, |
|
"logps/rejected": -379.37567138671875, |
|
"loss": 0.5661, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 2.7123475074768066, |
|
"rewards/margins": 5.849883079528809, |
|
"rewards/rejected": -3.137535572052002, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.4976055724858512, |
|
"grad_norm": 15.37507940467823, |
|
"learning_rate": 2.62692594998729e-07, |
|
"logits/chosen": 0.1588628888130188, |
|
"logits/rejected": 0.29955458641052246, |
|
"logps/chosen": -316.0282897949219, |
|
"logps/rejected": -358.241455078125, |
|
"loss": 0.5643, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 2.1699862480163574, |
|
"rewards/margins": 5.592405796051025, |
|
"rewards/rejected": -3.422419786453247, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.5150195907705704, |
|
"grad_norm": 9.693159960410714, |
|
"learning_rate": 2.5799370838278996e-07, |
|
"logits/chosen": 0.3118472099304199, |
|
"logits/rejected": 0.4519326686859131, |
|
"logps/chosen": -304.1625061035156, |
|
"logps/rejected": -326.2646484375, |
|
"loss": 0.5422, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 2.2456982135772705, |
|
"rewards/margins": 6.166834831237793, |
|
"rewards/rejected": -3.9211363792419434, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.5324336090552895, |
|
"grad_norm": 11.9031904060217, |
|
"learning_rate": 2.5329199296650667e-07, |
|
"logits/chosen": 0.3134709596633911, |
|
"logits/rejected": 0.28860020637512207, |
|
"logps/chosen": -309.3736572265625, |
|
"logps/rejected": -387.1235656738281, |
|
"loss": 0.5562, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 2.08250093460083, |
|
"rewards/margins": 5.88448429107666, |
|
"rewards/rejected": -3.80198335647583, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.5498476273400086, |
|
"grad_norm": 13.923538398639188, |
|
"learning_rate": 2.4858911258517953e-07, |
|
"logits/chosen": 0.2275421917438507, |
|
"logits/rejected": 0.3342249095439911, |
|
"logps/chosen": -296.0014953613281, |
|
"logps/rejected": -338.13580322265625, |
|
"loss": 0.5327, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 1.5528757572174072, |
|
"rewards/margins": 5.536377906799316, |
|
"rewards/rejected": -3.983502149581909, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.567261645624728, |
|
"grad_norm": 14.148141568298001, |
|
"learning_rate": 2.438867314863649e-07, |
|
"logits/chosen": 0.09563969075679779, |
|
"logits/rejected": 0.15113821625709534, |
|
"logps/chosen": -343.15765380859375, |
|
"logps/rejected": -385.72100830078125, |
|
"loss": 0.5168, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 2.650477886199951, |
|
"rewards/margins": 6.407762050628662, |
|
"rewards/rejected": -3.757284164428711, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.567261645624728, |
|
"eval_logits/chosen": 0.22664774954319, |
|
"eval_logits/rejected": 0.28383785486221313, |
|
"eval_logps/chosen": -336.0577392578125, |
|
"eval_logps/rejected": -380.47088623046875, |
|
"eval_loss": 0.5929730534553528, |
|
"eval_rewards/accuracies": 0.7753164768218994, |
|
"eval_rewards/chosen": 1.993420958518982, |
|
"eval_rewards/margins": 5.391592025756836, |
|
"eval_rewards/rejected": -3.3981709480285645, |
|
"eval_runtime": 71.3572, |
|
"eval_samples_per_second": 35.035, |
|
"eval_steps_per_second": 1.107, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.5846756639094473, |
|
"grad_norm": 13.105682838644256, |
|
"learning_rate": 2.391865137409335e-07, |
|
"logits/chosen": 0.22982971370220184, |
|
"logits/rejected": 0.374919593334198, |
|
"logps/chosen": -331.8807678222656, |
|
"logps/rejected": -353.5442810058594, |
|
"loss": 0.5451, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.277345895767212, |
|
"rewards/margins": 4.490405082702637, |
|
"rewards/rejected": -3.2130589485168457, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.6020896821941664, |
|
"grad_norm": 12.840872961914933, |
|
"learning_rate": 2.3449012265419263e-07, |
|
"logits/chosen": 0.22155573964118958, |
|
"logits/rejected": 0.3400117754936218, |
|
"logps/chosen": -313.2369079589844, |
|
"logps/rejected": -364.26910400390625, |
|
"loss": 0.5456, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.4575579166412354, |
|
"rewards/margins": 5.934301853179932, |
|
"rewards/rejected": -4.476743221282959, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.6195037004788855, |
|
"grad_norm": 12.101267909285811, |
|
"learning_rate": 2.297992201772775e-07, |
|
"logits/chosen": 0.17083992063999176, |
|
"logits/rejected": 0.2611861526966095, |
|
"logps/chosen": -355.92559814453125, |
|
"logps/rejected": -386.23529052734375, |
|
"loss": 0.5311, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 1.9388973712921143, |
|
"rewards/margins": 6.043047904968262, |
|
"rewards/rejected": -4.104150295257568, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.6369177187636046, |
|
"grad_norm": 15.414581786862673, |
|
"learning_rate": 2.2511546631902348e-07, |
|
"logits/chosen": 0.1613834798336029, |
|
"logits/rejected": 0.1402578502893448, |
|
"logps/chosen": -334.2019348144531, |
|
"logps/rejected": -398.59014892578125, |
|
"loss": 0.5263, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 2.1678028106689453, |
|
"rewards/margins": 6.486172676086426, |
|
"rewards/rejected": -4.318368911743164, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.654331737048324, |
|
"grad_norm": 13.343592771065993, |
|
"learning_rate": 2.2044051855852414e-07, |
|
"logits/chosen": 0.23705515265464783, |
|
"logits/rejected": 0.37995973229408264, |
|
"logps/chosen": -293.1175231933594, |
|
"logps/rejected": -328.06488037109375, |
|
"loss": 0.5589, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 2.2306838035583496, |
|
"rewards/margins": 6.733452796936035, |
|
"rewards/rejected": -4.502768516540527, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.6717457553330433, |
|
"grad_norm": 12.544780148755235, |
|
"learning_rate": 2.15776031258586e-07, |
|
"logits/chosen": 0.16758579015731812, |
|
"logits/rejected": 0.2723161578178406, |
|
"logps/chosen": -332.1502685546875, |
|
"logps/rejected": -419.9957580566406, |
|
"loss": 0.5405, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 2.5997824668884277, |
|
"rewards/margins": 7.705753326416016, |
|
"rewards/rejected": -5.105970859527588, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.6891597736177624, |
|
"grad_norm": 14.398467775390248, |
|
"learning_rate": 2.1112365508028512e-07, |
|
"logits/chosen": 0.22164323925971985, |
|
"logits/rejected": 0.35882630944252014, |
|
"logps/chosen": -311.85748291015625, |
|
"logps/rejected": -345.79376220703125, |
|
"loss": 0.5509, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 2.5555522441864014, |
|
"rewards/margins": 6.213547229766846, |
|
"rewards/rejected": -3.6579947471618652, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.7065737919024815, |
|
"grad_norm": 10.483483913623502, |
|
"learning_rate": 2.064850363988335e-07, |
|
"logits/chosen": 0.24829120934009552, |
|
"logits/rejected": 0.26147156953811646, |
|
"logps/chosen": -323.1725158691406, |
|
"logps/rejected": -371.21734619140625, |
|
"loss": 0.5522, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 1.836071252822876, |
|
"rewards/margins": 6.013415336608887, |
|
"rewards/rejected": -4.177343368530273, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.7239878101872006, |
|
"grad_norm": 13.297935553611692, |
|
"learning_rate": 2.0186181672096303e-07, |
|
"logits/chosen": 0.22199273109436035, |
|
"logits/rejected": 0.34791290760040283, |
|
"logps/chosen": -365.37841796875, |
|
"logps/rejected": -357.319091796875, |
|
"loss": 0.5596, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 1.8863897323608398, |
|
"rewards/margins": 5.01448917388916, |
|
"rewards/rejected": -3.1280996799468994, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.74140182847192, |
|
"grad_norm": 11.129567589353632, |
|
"learning_rate": 1.9725563210403152e-07, |
|
"logits/chosen": 0.27127689123153687, |
|
"logits/rejected": 0.3801821172237396, |
|
"logps/chosen": -304.1812438964844, |
|
"logps/rejected": -333.21844482421875, |
|
"loss": 0.5232, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 1.9027084112167358, |
|
"rewards/margins": 4.797359943389893, |
|
"rewards/rejected": -2.8946516513824463, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.74140182847192, |
|
"eval_logits/chosen": 0.22198127210140228, |
|
"eval_logits/rejected": 0.2787408232688904, |
|
"eval_logps/chosen": -338.6839294433594, |
|
"eval_logps/rejected": -386.5126953125, |
|
"eval_loss": 0.5883829593658447, |
|
"eval_rewards/accuracies": 0.7816455960273743, |
|
"eval_rewards/chosen": 1.7307991981506348, |
|
"eval_rewards/margins": 5.733153343200684, |
|
"eval_rewards/rejected": -4.002354621887207, |
|
"eval_runtime": 70.4432, |
|
"eval_samples_per_second": 35.49, |
|
"eval_steps_per_second": 1.121, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.758815846756639, |
|
"grad_norm": 14.281945700020602, |
|
"learning_rate": 1.9266811257705763e-07, |
|
"logits/chosen": 0.21578781306743622, |
|
"logits/rejected": 0.30397921800613403, |
|
"logps/chosen": -329.77044677734375, |
|
"logps/rejected": -368.17926025390625, |
|
"loss": 0.5424, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 2.238178253173828, |
|
"rewards/margins": 6.452548980712891, |
|
"rewards/rejected": -4.214371204376221, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.7762298650413584, |
|
"grad_norm": 13.233745779571802, |
|
"learning_rate": 1.8810088156388848e-07, |
|
"logits/chosen": 0.2450670748949051, |
|
"logits/rejected": 0.3020363748073578, |
|
"logps/chosen": -298.3768615722656, |
|
"logps/rejected": -362.04327392578125, |
|
"loss": 0.5603, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 2.085683822631836, |
|
"rewards/margins": 5.677943229675293, |
|
"rewards/rejected": -3.592259645462036, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.7936438833260775, |
|
"grad_norm": 11.453241931075056, |
|
"learning_rate": 1.8355555530870564e-07, |
|
"logits/chosen": 0.338943213224411, |
|
"logits/rejected": 0.35035696625709534, |
|
"logps/chosen": -256.6951599121094, |
|
"logps/rejected": -336.91552734375, |
|
"loss": 0.5418, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 2.064159870147705, |
|
"rewards/margins": 6.696907043457031, |
|
"rewards/rejected": -4.632746696472168, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.8110579016107966, |
|
"grad_norm": 12.077192617585345, |
|
"learning_rate": 1.7903374230407104e-07, |
|
"logits/chosen": 0.2699436843395233, |
|
"logits/rejected": 0.28396037220954895, |
|
"logps/chosen": -290.0744934082031, |
|
"logps/rejected": -392.7906494140625, |
|
"loss": 0.5544, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 2.268066167831421, |
|
"rewards/margins": 6.793740749359131, |
|
"rewards/rejected": -4.525674343109131, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.828471919895516, |
|
"grad_norm": 12.946293550899172, |
|
"learning_rate": 1.7453704272171603e-07, |
|
"logits/chosen": 0.1450766772031784, |
|
"logits/rejected": 0.24746115505695343, |
|
"logps/chosen": -318.6651611328125, |
|
"logps/rejected": -367.54510498046875, |
|
"loss": 0.5793, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 2.2983639240264893, |
|
"rewards/margins": 7.265191078186035, |
|
"rewards/rejected": -4.966826915740967, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.845885938180235, |
|
"grad_norm": 14.659109006168315, |
|
"learning_rate": 1.7006704784627575e-07, |
|
"logits/chosen": 0.21030068397521973, |
|
"logits/rejected": 0.31739652156829834, |
|
"logps/chosen": -316.27313232421875, |
|
"logps/rejected": -366.7966613769531, |
|
"loss": 0.5398, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 2.333252429962158, |
|
"rewards/margins": 6.3725409507751465, |
|
"rewards/rejected": -4.0392889976501465, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.8632999564649544, |
|
"grad_norm": 11.575655038254915, |
|
"learning_rate": 1.656253395121674e-07, |
|
"logits/chosen": 0.2197595089673996, |
|
"logits/rejected": 0.3256808817386627, |
|
"logps/chosen": -326.97381591796875, |
|
"logps/rejected": -351.43341064453125, |
|
"loss": 0.5353, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 2.4176201820373535, |
|
"rewards/margins": 6.151999473571777, |
|
"rewards/rejected": -3.734379291534424, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.8807139747496735, |
|
"grad_norm": 16.012865418831396, |
|
"learning_rate": 1.6121348954381397e-07, |
|
"logits/chosen": 0.2955351173877716, |
|
"logits/rejected": 0.4262094497680664, |
|
"logps/chosen": -309.02435302734375, |
|
"logps/rejected": -357.2958068847656, |
|
"loss": 0.5462, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 1.9097408056259155, |
|
"rewards/margins": 5.701088905334473, |
|
"rewards/rejected": -3.7913482189178467, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.8981279930343926, |
|
"grad_norm": 16.856514300669417, |
|
"learning_rate": 1.5683305919940872e-07, |
|
"logits/chosen": 0.28492286801338196, |
|
"logits/rejected": 0.370318740606308, |
|
"logps/chosen": -324.5318908691406, |
|
"logps/rejected": -355.6055908203125, |
|
"loss": 0.5671, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 1.586484670639038, |
|
"rewards/margins": 5.815837860107422, |
|
"rewards/rejected": -4.229353427886963, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.9155420113191117, |
|
"grad_norm": 11.897078714973112, |
|
"learning_rate": 1.5248559861842053e-07, |
|
"logits/chosen": 0.23165082931518555, |
|
"logits/rejected": 0.2759276032447815, |
|
"logps/chosen": -305.85174560546875, |
|
"logps/rejected": -377.9584045410156, |
|
"loss": 0.5574, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 1.902627944946289, |
|
"rewards/margins": 6.008756160736084, |
|
"rewards/rejected": -4.106128692626953, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.9155420113191117, |
|
"eval_logits/chosen": 0.21340951323509216, |
|
"eval_logits/rejected": 0.2706116735935211, |
|
"eval_logps/chosen": -337.5714416503906, |
|
"eval_logps/rejected": -385.840087890625, |
|
"eval_loss": 0.5849184393882751, |
|
"eval_rewards/accuracies": 0.7911392450332642, |
|
"eval_rewards/chosen": 1.8420459032058716, |
|
"eval_rewards/margins": 5.777135372161865, |
|
"eval_rewards/rejected": -3.935089588165283, |
|
"eval_runtime": 70.3599, |
|
"eval_samples_per_second": 35.532, |
|
"eval_steps_per_second": 1.123, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.932956029603831, |
|
"grad_norm": 13.676521515428378, |
|
"learning_rate": 1.481726462730327e-07, |
|
"logits/chosen": 0.2556658387184143, |
|
"logits/rejected": 0.3973894417285919, |
|
"logps/chosen": -321.7857666015625, |
|
"logps/rejected": -359.394775390625, |
|
"loss": 0.5369, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.8009535074234009, |
|
"rewards/margins": 6.82882022857666, |
|
"rewards/rejected": -5.027866363525391, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.9503700478885504, |
|
"grad_norm": 14.221820537526096, |
|
"learning_rate": 1.438957284237109e-07, |
|
"logits/chosen": 0.16190308332443237, |
|
"logits/rejected": 0.28133997321128845, |
|
"logps/chosen": -370.27801513671875, |
|
"logps/rejected": -407.38848876953125, |
|
"loss": 0.5675, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 1.6363109350204468, |
|
"rewards/margins": 6.053797721862793, |
|
"rewards/rejected": -4.417486667633057, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.9677840661732695, |
|
"grad_norm": 12.575846049875057, |
|
"learning_rate": 1.396563585790927e-07, |
|
"logits/chosen": 0.23068062961101532, |
|
"logits/rejected": 0.28086987137794495, |
|
"logps/chosen": -291.02239990234375, |
|
"logps/rejected": -354.7329406738281, |
|
"loss": 0.5651, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 1.4899548292160034, |
|
"rewards/margins": 5.830671787261963, |
|
"rewards/rejected": -4.340716361999512, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.9851980844579886, |
|
"grad_norm": 12.302064991278899, |
|
"learning_rate": 1.3545603696038987e-07, |
|
"logits/chosen": 0.2145300656557083, |
|
"logits/rejected": 0.2948189675807953, |
|
"logps/chosen": -322.04754638671875, |
|
"logps/rejected": -364.0850830078125, |
|
"loss": 0.5366, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 1.6283271312713623, |
|
"rewards/margins": 5.910183429718018, |
|
"rewards/rejected": -4.281857013702393, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.0026121027427077, |
|
"grad_norm": 13.71124690365027, |
|
"learning_rate": 1.3129624997049228e-07, |
|
"logits/chosen": 0.22102287411689758, |
|
"logits/rejected": 0.25346601009368896, |
|
"logps/chosen": -299.03997802734375, |
|
"logps/rejected": -374.4776916503906, |
|
"loss": 0.5346, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 2.4188122749328613, |
|
"rewards/margins": 6.5757155418396, |
|
"rewards/rejected": -4.156903266906738, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.0200261210274273, |
|
"grad_norm": 15.920538460335408, |
|
"learning_rate": 1.2717846966796214e-07, |
|
"logits/chosen": 0.20584805309772491, |
|
"logits/rejected": 0.30077749490737915, |
|
"logps/chosen": -354.84576416015625, |
|
"logps/rejected": -403.926513671875, |
|
"loss": 0.5265, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 2.021745204925537, |
|
"rewards/margins": 6.796699523925781, |
|
"rewards/rejected": -4.774954319000244, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.0374401393121464, |
|
"grad_norm": 12.844613549152813, |
|
"learning_rate": 1.2310415324610435e-07, |
|
"logits/chosen": 0.17609162628650665, |
|
"logits/rejected": 0.24105580151081085, |
|
"logps/chosen": -340.9830017089844, |
|
"logps/rejected": -415.0347595214844, |
|
"loss": 0.5227, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 2.298529863357544, |
|
"rewards/margins": 6.613799095153809, |
|
"rewards/rejected": -4.315269947052002, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.0548541575968655, |
|
"grad_norm": 11.610832192571714, |
|
"learning_rate": 1.1907474251729752e-07, |
|
"logits/chosen": 0.1663503348827362, |
|
"logits/rejected": 0.31382399797439575, |
|
"logps/chosen": -316.79449462890625, |
|
"logps/rejected": -344.66070556640625, |
|
"loss": 0.5237, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 2.4878411293029785, |
|
"rewards/margins": 6.203591346740723, |
|
"rewards/rejected": -3.715749740600586, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.0722681758815846, |
|
"grad_norm": 13.49440122728778, |
|
"learning_rate": 1.1509166340276739e-07, |
|
"logits/chosen": 0.08771935105323792, |
|
"logits/rejected": 0.24409714341163635, |
|
"logps/chosen": -349.5455017089844, |
|
"logps/rejected": -384.40618896484375, |
|
"loss": 0.5192, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 2.7756285667419434, |
|
"rewards/margins": 7.52169132232666, |
|
"rewards/rejected": -4.746062278747559, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.0896821941663037, |
|
"grad_norm": 13.523283111167856, |
|
"learning_rate": 1.1115632542798467e-07, |
|
"logits/chosen": 0.2006814181804657, |
|
"logits/rejected": 0.27478426694869995, |
|
"logps/chosen": -333.44720458984375, |
|
"logps/rejected": -384.8387145996094, |
|
"loss": 0.5077, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 1.8601166009902954, |
|
"rewards/margins": 5.925691604614258, |
|
"rewards/rejected": -4.06557559967041, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.0896821941663037, |
|
"eval_logits/chosen": 0.2083120197057724, |
|
"eval_logits/rejected": 0.2656678259372711, |
|
"eval_logps/chosen": -339.80426025390625, |
|
"eval_logps/rejected": -388.9606628417969, |
|
"eval_loss": 0.5841708183288574, |
|
"eval_rewards/accuracies": 0.7879746556282043, |
|
"eval_rewards/chosen": 1.6187670230865479, |
|
"eval_rewards/margins": 5.865919589996338, |
|
"eval_rewards/rejected": -4.247152805328369, |
|
"eval_runtime": 72.6682, |
|
"eval_samples_per_second": 34.403, |
|
"eval_steps_per_second": 1.087, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.1070962124510233, |
|
"grad_norm": 15.694137545891444, |
|
"learning_rate": 1.0727012122386415e-07, |
|
"logits/chosen": 0.22686560451984406, |
|
"logits/rejected": 0.31223005056381226, |
|
"logps/chosen": -321.732177734375, |
|
"logps/rejected": -370.21453857421875, |
|
"loss": 0.5185, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 2.1967837810516357, |
|
"rewards/margins": 6.164200782775879, |
|
"rewards/rejected": -3.967416286468506, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.1245102307357424, |
|
"grad_norm": 9.832217655573857, |
|
"learning_rate": 1.0343442603394282e-07, |
|
"logits/chosen": 0.15876635909080505, |
|
"logits/rejected": 0.19615477323532104, |
|
"logps/chosen": -317.2491149902344, |
|
"logps/rejected": -393.39251708984375, |
|
"loss": 0.4803, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 2.478919267654419, |
|
"rewards/margins": 6.481255531311035, |
|
"rewards/rejected": -4.002336502075195, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.1419242490204615, |
|
"grad_norm": 13.519648851089453, |
|
"learning_rate": 9.965059722771113e-08, |
|
"logits/chosen": 0.14877358078956604, |
|
"logits/rejected": 0.22332414984703064, |
|
"logps/chosen": -342.74566650390625, |
|
"logps/rejected": -403.8954162597656, |
|
"loss": 0.4879, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 2.4974751472473145, |
|
"rewards/margins": 6.861520290374756, |
|
"rewards/rejected": -4.364045143127441, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.1593382673051806, |
|
"grad_norm": 11.584711006576716, |
|
"learning_rate": 9.59199738202689e-08, |
|
"logits/chosen": 0.2872892916202545, |
|
"logits/rejected": 0.3496358394622803, |
|
"logps/chosen": -306.6114196777344, |
|
"logps/rejected": -357.1216125488281, |
|
"loss": 0.5042, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 2.1603891849517822, |
|
"rewards/margins": 7.638640403747559, |
|
"rewards/rejected": -5.478252410888672, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.1767522855898997, |
|
"grad_norm": 13.250705440919303, |
|
"learning_rate": 9.22438759984775e-08, |
|
"logits/chosen": 0.23227651417255402, |
|
"logits/rejected": 0.2910657525062561, |
|
"logps/chosen": -294.86236572265625, |
|
"logps/rejected": -350.4806213378906, |
|
"loss": 0.4824, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 2.0290393829345703, |
|
"rewards/margins": 6.93194580078125, |
|
"rewards/rejected": -4.9029059410095215, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.194166303874619, |
|
"grad_norm": 10.291356601407243, |
|
"learning_rate": 8.862360465377356e-08, |
|
"logits/chosen": 0.17067928612232208, |
|
"logits/rejected": 0.24521295726299286, |
|
"logps/chosen": -333.3260803222656, |
|
"logps/rejected": -365.29766845703125, |
|
"loss": 0.5221, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 1.595489263534546, |
|
"rewards/margins": 5.210162162780762, |
|
"rewards/rejected": -3.6146724224090576, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.2115803221593384, |
|
"grad_norm": 13.900410810772762, |
|
"learning_rate": 8.506044092181233e-08, |
|
"logits/chosen": 0.20954521000385284, |
|
"logits/rejected": 0.259429007768631, |
|
"logps/chosen": -309.11968994140625, |
|
"logps/rejected": -383.56964111328125, |
|
"loss": 0.5201, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 2.049609422683716, |
|
"rewards/margins": 6.555842399597168, |
|
"rewards/rejected": -4.506233215332031, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.2289943404440575, |
|
"grad_norm": 12.91211847131762, |
|
"learning_rate": 8.15556457291011e-08, |
|
"logits/chosen": 0.16506996750831604, |
|
"logits/rejected": 0.3168487846851349, |
|
"logps/chosen": -314.2242431640625, |
|
"logps/rejected": -387.1705017089844, |
|
"loss": 0.4894, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 2.2459182739257812, |
|
"rewards/margins": 6.696404933929443, |
|
"rewards/rejected": -4.45048713684082, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.2464083587287766, |
|
"grad_norm": 14.30007682532237, |
|
"learning_rate": 7.811045934678467e-08, |
|
"logits/chosen": 0.20867156982421875, |
|
"logits/rejected": 0.3092384338378906, |
|
"logps/chosen": -326.1697692871094, |
|
"logps/rejected": -394.4437561035156, |
|
"loss": 0.5004, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 2.4621286392211914, |
|
"rewards/margins": 6.926805019378662, |
|
"rewards/rejected": -4.464676856994629, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.2638223770134958, |
|
"grad_norm": 18.216105796084022, |
|
"learning_rate": 7.472610095174051e-08, |
|
"logits/chosen": 0.1239229068160057, |
|
"logits/rejected": 0.2724478840827942, |
|
"logps/chosen": -338.5323791503906, |
|
"logps/rejected": -356.60479736328125, |
|
"loss": 0.4952, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 1.9667659997940063, |
|
"rewards/margins": 5.838881492614746, |
|
"rewards/rejected": -3.87211537361145, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.2638223770134958, |
|
"eval_logits/chosen": 0.21147681772708893, |
|
"eval_logits/rejected": 0.26940545439720154, |
|
"eval_logps/chosen": -336.6758728027344, |
|
"eval_logps/rejected": -385.40179443359375, |
|
"eval_loss": 0.5836542844772339, |
|
"eval_rewards/accuracies": 0.7816455960273743, |
|
"eval_rewards/chosen": 1.9316082000732422, |
|
"eval_rewards/margins": 5.822873115539551, |
|
"eval_rewards/rejected": -3.8912646770477295, |
|
"eval_runtime": 70.4626, |
|
"eval_samples_per_second": 35.48, |
|
"eval_steps_per_second": 1.121, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.281236395298215, |
|
"grad_norm": 12.712872435452034, |
|
"learning_rate": 7.140376819513782e-08, |
|
"logits/chosen": 0.17473125457763672, |
|
"logits/rejected": 0.24829836189746857, |
|
"logps/chosen": -291.07281494140625, |
|
"logps/rejected": -378.64520263671875, |
|
"loss": 0.5163, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 2.978382110595703, |
|
"rewards/margins": 7.624729156494141, |
|
"rewards/rejected": -4.646347522735596, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.2986504135829344, |
|
"grad_norm": 12.944938100249528, |
|
"learning_rate": 6.814463677861556e-08, |
|
"logits/chosen": 0.20871727168560028, |
|
"logits/rejected": 0.31438320875167847, |
|
"logps/chosen": -343.16485595703125, |
|
"logps/rejected": -391.0745544433594, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.9679534435272217, |
|
"rewards/margins": 5.962490081787109, |
|
"rewards/rejected": -3.9945366382598877, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.3160644318676535, |
|
"grad_norm": 11.974646389262404, |
|
"learning_rate": 6.494986003822631e-08, |
|
"logits/chosen": 0.2468450963497162, |
|
"logits/rejected": 0.37068819999694824, |
|
"logps/chosen": -307.6761779785156, |
|
"logps/rejected": -366.2533874511719, |
|
"loss": 0.4976, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 2.4139225482940674, |
|
"rewards/margins": 6.581143379211426, |
|
"rewards/rejected": -4.167220592498779, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.3334784501523727, |
|
"grad_norm": 13.867952710064575, |
|
"learning_rate": 6.182056853629625e-08, |
|
"logits/chosen": 0.2119470089673996, |
|
"logits/rejected": 0.32034552097320557, |
|
"logps/chosen": -315.0056457519531, |
|
"logps/rejected": -343.30120849609375, |
|
"loss": 0.4945, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 2.3241541385650635, |
|
"rewards/margins": 6.567809104919434, |
|
"rewards/rejected": -4.243655204772949, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.3508924684370918, |
|
"grad_norm": 12.973425439181085, |
|
"learning_rate": 5.875786966134294e-08, |
|
"logits/chosen": 0.3225061893463135, |
|
"logits/rejected": 0.2947853207588196, |
|
"logps/chosen": -304.63616943359375, |
|
"logps/rejected": -394.45196533203125, |
|
"loss": 0.5386, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.3494211435317993, |
|
"rewards/margins": 5.996374130249023, |
|
"rewards/rejected": -4.6469526290893555, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.368306486721811, |
|
"grad_norm": 15.471139649509851, |
|
"learning_rate": 5.576284723619537e-08, |
|
"logits/chosen": 0.15628595650196075, |
|
"logits/rejected": 0.2223624885082245, |
|
"logps/chosen": -351.3752136230469, |
|
"logps/rejected": -382.9642333984375, |
|
"loss": 0.5025, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 1.9524848461151123, |
|
"rewards/margins": 6.2653093338012695, |
|
"rewards/rejected": -4.312824249267578, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.3857205050065304, |
|
"grad_norm": 12.394641149417646, |
|
"learning_rate": 5.283656113445181e-08, |
|
"logits/chosen": 0.20236392319202423, |
|
"logits/rejected": 0.2501729130744934, |
|
"logps/chosen": -332.77020263671875, |
|
"logps/rejected": -397.8937683105469, |
|
"loss": 0.4842, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 2.4705593585968018, |
|
"rewards/margins": 6.949694633483887, |
|
"rewards/rejected": -4.479134559631348, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.4031345232912495, |
|
"grad_norm": 11.158717402277203, |
|
"learning_rate": 4.998004690541358e-08, |
|
"logits/chosen": 0.18652646243572235, |
|
"logits/rejected": 0.2707822322845459, |
|
"logps/chosen": -306.2278747558594, |
|
"logps/rejected": -334.9998474121094, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 2.4948792457580566, |
|
"rewards/margins": 6.4533843994140625, |
|
"rewards/rejected": -3.9585044384002686, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.4205485415759687, |
|
"grad_norm": 9.57471341907718, |
|
"learning_rate": 4.719431540762697e-08, |
|
"logits/chosen": 0.3041616976261139, |
|
"logits/rejected": 0.37496739625930786, |
|
"logps/chosen": -292.21923828125, |
|
"logps/rejected": -355.483154296875, |
|
"loss": 0.5055, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 2.382023572921753, |
|
"rewards/margins": 6.094801902770996, |
|
"rewards/rejected": -3.7127785682678223, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.4379625598606878, |
|
"grad_norm": 13.814429830375937, |
|
"learning_rate": 4.44803524511618e-08, |
|
"logits/chosen": 0.2870798110961914, |
|
"logits/rejected": 0.2993796169757843, |
|
"logps/chosen": -281.04522705078125, |
|
"logps/rejected": -349.2628173828125, |
|
"loss": 0.5236, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 2.1859898567199707, |
|
"rewards/margins": 6.1772050857543945, |
|
"rewards/rejected": -3.991215467453003, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.4379625598606878, |
|
"eval_logits/chosen": 0.18954265117645264, |
|
"eval_logits/rejected": 0.24652473628520966, |
|
"eval_logps/chosen": -337.7025451660156, |
|
"eval_logps/rejected": -387.1252746582031, |
|
"eval_loss": 0.5812255144119263, |
|
"eval_rewards/accuracies": 0.7879746556282043, |
|
"eval_rewards/chosen": 1.8289377689361572, |
|
"eval_rewards/margins": 5.892548084259033, |
|
"eval_rewards/rejected": -4.063610076904297, |
|
"eval_runtime": 70.4535, |
|
"eval_samples_per_second": 35.484, |
|
"eval_steps_per_second": 1.121, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.455376578145407, |
|
"grad_norm": 16.245030931518695, |
|
"learning_rate": 4.183911844875504e-08, |
|
"logits/chosen": 0.32038670778274536, |
|
"logits/rejected": 0.38889437913894653, |
|
"logps/chosen": -293.1011657714844, |
|
"logps/rejected": -366.3570556640625, |
|
"loss": 0.5035, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 2.785410165786743, |
|
"rewards/margins": 7.212292671203613, |
|
"rewards/rejected": -4.426882743835449, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.4727905964301264, |
|
"grad_norm": 15.3110536389278, |
|
"learning_rate": 3.9271548075940994e-08, |
|
"logits/chosen": 0.22183004021644592, |
|
"logits/rejected": 0.20372053980827332, |
|
"logps/chosen": -289.2795104980469, |
|
"logps/rejected": -411.65118408203125, |
|
"loss": 0.5158, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 2.527660608291626, |
|
"rewards/margins": 7.5610809326171875, |
|
"rewards/rejected": -5.033421039581299, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.4902046147148456, |
|
"grad_norm": 12.657778002838807, |
|
"learning_rate": 3.677854994029045e-08, |
|
"logits/chosen": 0.16141930222511292, |
|
"logits/rejected": 0.28222227096557617, |
|
"logps/chosen": -329.6971740722656, |
|
"logps/rejected": -367.878662109375, |
|
"loss": 0.5147, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 2.3014779090881348, |
|
"rewards/margins": 6.771458625793457, |
|
"rewards/rejected": -4.469980716705322, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.5076186329995647, |
|
"grad_norm": 15.358056422127024, |
|
"learning_rate": 3.436100625987373e-08, |
|
"logits/chosen": 0.16851040720939636, |
|
"logits/rejected": 0.21520404517650604, |
|
"logps/chosen": -313.4747619628906, |
|
"logps/rejected": -369.7133483886719, |
|
"loss": 0.5227, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 1.977530837059021, |
|
"rewards/margins": 5.828042507171631, |
|
"rewards/rejected": -3.8505122661590576, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.525032651284284, |
|
"grad_norm": 14.637744065096362, |
|
"learning_rate": 3.201977255106292e-08, |
|
"logits/chosen": 0.19486942887306213, |
|
"logits/rejected": 0.33268415927886963, |
|
"logps/chosen": -309.989990234375, |
|
"logps/rejected": -368.7453308105469, |
|
"loss": 0.5085, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 1.9779468774795532, |
|
"rewards/margins": 7.094501495361328, |
|
"rewards/rejected": -5.1165547370910645, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.542446669569003, |
|
"grad_norm": 14.541660553593134, |
|
"learning_rate": 2.975567732578324e-08, |
|
"logits/chosen": 0.23976822197437286, |
|
"logits/rejected": 0.220484659075737, |
|
"logps/chosen": -272.70562744140625, |
|
"logps/rejected": -348.28485107421875, |
|
"loss": 0.5083, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 2.1403632164001465, |
|
"rewards/margins": 6.054244518280029, |
|
"rewards/rejected": -3.91388201713562, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.559860687853722, |
|
"grad_norm": 15.96580948389595, |
|
"learning_rate": 2.7569521798320368e-08, |
|
"logits/chosen": 0.073675736784935, |
|
"logits/rejected": 0.2692302465438843, |
|
"logps/chosen": -336.4574279785156, |
|
"logps/rejected": -362.8096618652344, |
|
"loss": 0.49, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 2.4895553588867188, |
|
"rewards/margins": 6.369107723236084, |
|
"rewards/rejected": -3.8795523643493652, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.5772747061384416, |
|
"grad_norm": 12.263431349665568, |
|
"learning_rate": 2.5462079601788367e-08, |
|
"logits/chosen": 0.2085539847612381, |
|
"logits/rejected": 0.31902140378952026, |
|
"logps/chosen": -305.46807861328125, |
|
"logps/rejected": -364.35003662109375, |
|
"loss": 0.5051, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 1.7462059259414673, |
|
"rewards/margins": 5.998797416687012, |
|
"rewards/rejected": -4.252591609954834, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.5946887244231607, |
|
"grad_norm": 14.474568168334054, |
|
"learning_rate": 2.3434096514357403e-08, |
|
"logits/chosen": 0.17831678688526154, |
|
"logits/rejected": 0.23579061031341553, |
|
"logps/chosen": -312.26025390625, |
|
"logps/rejected": -350.52056884765625, |
|
"loss": 0.5033, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 2.2567784786224365, |
|
"rewards/margins": 6.1402106285095215, |
|
"rewards/rejected": -3.883431911468506, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.61210274270788, |
|
"grad_norm": 11.85075934174815, |
|
"learning_rate": 2.1486290195339153e-08, |
|
"logits/chosen": 0.19868260622024536, |
|
"logits/rejected": 0.25412097573280334, |
|
"logps/chosen": -341.95849609375, |
|
"logps/rejected": -417.5733337402344, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 1.9825395345687866, |
|
"rewards/margins": 6.966047763824463, |
|
"rewards/rejected": -4.983508110046387, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.61210274270788, |
|
"eval_logits/chosen": 0.182608962059021, |
|
"eval_logits/rejected": 0.23953425884246826, |
|
"eval_logps/chosen": -338.5595703125, |
|
"eval_logps/rejected": -388.22418212890625, |
|
"eval_loss": 0.5813568234443665, |
|
"eval_rewards/accuracies": 0.7848101258277893, |
|
"eval_rewards/chosen": 1.7432384490966797, |
|
"eval_rewards/margins": 5.916741847991943, |
|
"eval_rewards/rejected": -4.173503398895264, |
|
"eval_runtime": 70.4593, |
|
"eval_samples_per_second": 35.481, |
|
"eval_steps_per_second": 1.121, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.629516760992599, |
|
"grad_norm": 14.554814181690626, |
|
"learning_rate": 1.9619349931222745e-08, |
|
"logits/chosen": 0.1401619017124176, |
|
"logits/rejected": 0.16610558331012726, |
|
"logps/chosen": -323.71453857421875, |
|
"logps/rejected": -358.86737060546875, |
|
"loss": 0.5201, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 1.5991357564926147, |
|
"rewards/margins": 5.076028347015381, |
|
"rewards/rejected": -3.4768924713134766, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.6469307792773185, |
|
"grad_norm": 14.072160649981779, |
|
"learning_rate": 1.783393639175096e-08, |
|
"logits/chosen": 0.2151796817779541, |
|
"logits/rejected": 0.20476499199867249, |
|
"logps/chosen": -325.95294189453125, |
|
"logps/rejected": -387.7116394042969, |
|
"loss": 0.5068, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 2.143306016921997, |
|
"rewards/margins": 6.754826545715332, |
|
"rewards/rejected": -4.611520290374756, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.6643447975620376, |
|
"grad_norm": 13.955290549780965, |
|
"learning_rate": 1.6130681396123807e-08, |
|
"logits/chosen": 0.21071608364582062, |
|
"logits/rejected": 0.3641330599784851, |
|
"logps/chosen": -315.24664306640625, |
|
"logps/rejected": -334.0530090332031, |
|
"loss": 0.5079, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 2.3173837661743164, |
|
"rewards/margins": 7.015236854553223, |
|
"rewards/rejected": -4.69785213470459, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.6817588158467567, |
|
"grad_norm": 11.853297635416354, |
|
"learning_rate": 1.4510187689411146e-08, |
|
"logits/chosen": 0.2876327931880951, |
|
"logits/rejected": 0.2770300507545471, |
|
"logps/chosen": -317.94451904296875, |
|
"logps/rejected": -363.93353271484375, |
|
"loss": 0.5135, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 2.1870007514953613, |
|
"rewards/margins": 6.509056091308594, |
|
"rewards/rejected": -4.322054386138916, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.699172834131476, |
|
"grad_norm": 12.475974805623585, |
|
"learning_rate": 1.2973028729254376e-08, |
|
"logits/chosen": 0.12307579815387726, |
|
"logits/rejected": 0.2281501293182373, |
|
"logps/chosen": -334.509033203125, |
|
"logps/rejected": -369.52850341796875, |
|
"loss": 0.5532, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 1.6136415004730225, |
|
"rewards/margins": 5.368951320648193, |
|
"rewards/rejected": -3.75531005859375, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.716586852416195, |
|
"grad_norm": 12.22576416710605, |
|
"learning_rate": 1.1519748482932217e-08, |
|
"logits/chosen": 0.1852325052022934, |
|
"logits/rejected": 0.23839490115642548, |
|
"logps/chosen": -318.0132751464844, |
|
"logps/rejected": -392.6969299316406, |
|
"loss": 0.5038, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 2.2853362560272217, |
|
"rewards/margins": 7.032902717590332, |
|
"rewards/rejected": -4.747566223144531, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.734000870700914, |
|
"grad_norm": 10.409074294198579, |
|
"learning_rate": 1.0150861234862362e-08, |
|
"logits/chosen": 0.207319974899292, |
|
"logits/rejected": 0.3117375075817108, |
|
"logps/chosen": -320.4088439941406, |
|
"logps/rejected": -359.54144287109375, |
|
"loss": 0.5016, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 1.6186834573745728, |
|
"rewards/margins": 6.262956142425537, |
|
"rewards/rejected": -4.644272804260254, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.7514148889856336, |
|
"grad_norm": 13.421082903045352, |
|
"learning_rate": 8.866851404607412e-09, |
|
"logits/chosen": 0.19717201590538025, |
|
"logits/rejected": 0.323054701089859, |
|
"logps/chosen": -331.4278259277344, |
|
"logps/rejected": -357.0315856933594, |
|
"loss": 0.5102, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 1.7053760290145874, |
|
"rewards/margins": 6.334009647369385, |
|
"rewards/rejected": -4.628633499145508, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.7688289072703527, |
|
"grad_norm": 11.2379650890193, |
|
"learning_rate": 7.668173375449338e-09, |
|
"logits/chosen": 0.2212512046098709, |
|
"logits/rejected": 0.34725892543792725, |
|
"logps/chosen": -327.44915771484375, |
|
"logps/rejected": -375.58099365234375, |
|
"loss": 0.5008, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 2.510315418243408, |
|
"rewards/margins": 6.8775482177734375, |
|
"rewards/rejected": -4.3672332763671875, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.786242925555072, |
|
"grad_norm": 13.18885343490197, |
|
"learning_rate": 6.555251333592998e-09, |
|
"logits/chosen": 0.17324404418468475, |
|
"logits/rejected": 0.2584208548069, |
|
"logps/chosen": -314.3653259277344, |
|
"logps/rejected": -364.072021484375, |
|
"loss": 0.5246, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 2.691312074661255, |
|
"rewards/margins": 6.843153953552246, |
|
"rewards/rejected": -4.15184211730957, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.786242925555072, |
|
"eval_logits/chosen": 0.18249349296092987, |
|
"eval_logits/rejected": 0.2394995093345642, |
|
"eval_logps/chosen": -337.3701477050781, |
|
"eval_logps/rejected": -386.60931396484375, |
|
"eval_loss": 0.5809482336044312, |
|
"eval_rewards/accuracies": 0.7879746556282043, |
|
"eval_rewards/chosen": 1.8621777296066284, |
|
"eval_rewards/margins": 5.874190807342529, |
|
"eval_rewards/rejected": -4.012012958526611, |
|
"eval_runtime": 70.7757, |
|
"eval_samples_per_second": 35.323, |
|
"eval_steps_per_second": 1.116, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.803656943839791, |
|
"grad_norm": 11.423877289010216, |
|
"learning_rate": 5.5284791180557626e-09, |
|
"logits/chosen": 0.13084809482097626, |
|
"logits/rejected": 0.2546931207180023, |
|
"logps/chosen": -317.41326904296875, |
|
"logps/rejected": -364.1874084472656, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 1.877049207687378, |
|
"rewards/margins": 6.358148097991943, |
|
"rewards/rejected": -4.481098175048828, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.8210709621245105, |
|
"grad_norm": 12.979992159418385, |
|
"learning_rate": 4.5882200812965846e-09, |
|
"logits/chosen": 0.05934108421206474, |
|
"logits/rejected": 0.15001478791236877, |
|
"logps/chosen": -352.37542724609375, |
|
"logps/rejected": -369.80731201171875, |
|
"loss": 0.5187, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 2.666846752166748, |
|
"rewards/margins": 6.001718997955322, |
|
"rewards/rejected": -3.3348724842071533, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.8384849804092296, |
|
"grad_norm": 10.84919385210753, |
|
"learning_rate": 3.734806960633274e-09, |
|
"logits/chosen": 0.2253892421722412, |
|
"logits/rejected": 0.22172415256500244, |
|
"logps/chosen": -297.4983825683594, |
|
"logps/rejected": -369.169189453125, |
|
"loss": 0.5103, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 3.0143134593963623, |
|
"rewards/margins": 7.2586469650268555, |
|
"rewards/rejected": -4.2443342208862305, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.8558989986939487, |
|
"grad_norm": 12.859015837238639, |
|
"learning_rate": 2.9685417604940755e-09, |
|
"logits/chosen": 0.07296539843082428, |
|
"logits/rejected": 0.22939100861549377, |
|
"logps/chosen": -346.80975341796875, |
|
"logps/rejected": -376.8129577636719, |
|
"loss": 0.5063, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 2.122498035430908, |
|
"rewards/margins": 5.796047210693359, |
|
"rewards/rejected": -3.6735496520996094, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.873313016978668, |
|
"grad_norm": 12.979116784826523, |
|
"learning_rate": 2.289695645544709e-09, |
|
"logits/chosen": 0.16871683299541473, |
|
"logits/rejected": 0.23157720267772675, |
|
"logps/chosen": -316.1363830566406, |
|
"logps/rejected": -375.68621826171875, |
|
"loss": 0.511, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 2.305443525314331, |
|
"rewards/margins": 6.389838218688965, |
|
"rewards/rejected": -4.0843939781188965, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.890727035263387, |
|
"grad_norm": 11.980615628068511, |
|
"learning_rate": 1.698508844729274e-09, |
|
"logits/chosen": 0.18221810460090637, |
|
"logits/rejected": 0.2593774199485779, |
|
"logps/chosen": -307.32135009765625, |
|
"logps/rejected": -360.2170104980469, |
|
"loss": 0.4977, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 2.1851983070373535, |
|
"rewards/margins": 6.605923652648926, |
|
"rewards/rejected": -4.4207258224487305, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.908141053548106, |
|
"grad_norm": 14.609213847266954, |
|
"learning_rate": 1.195190566258164e-09, |
|
"logits/chosen": 0.17112967371940613, |
|
"logits/rejected": 0.22373953461647034, |
|
"logps/chosen": -311.63116455078125, |
|
"logps/rejected": -366.12811279296875, |
|
"loss": 0.5065, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 2.0072693824768066, |
|
"rewards/margins": 6.717558860778809, |
|
"rewards/rejected": -4.71028995513916, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.925555071832825, |
|
"grad_norm": 14.416615274869756, |
|
"learning_rate": 7.799189235740133e-10, |
|
"logits/chosen": 0.292160302400589, |
|
"logits/rejected": 0.35216864943504333, |
|
"logps/chosen": -307.4648132324219, |
|
"logps/rejected": -359.35198974609375, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 2.0985865592956543, |
|
"rewards/margins": 6.522464752197266, |
|
"rewards/rejected": -4.423877716064453, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.9429690901175447, |
|
"grad_norm": 11.727002920198004, |
|
"learning_rate": 4.5284087232114186e-10, |
|
"logits/chosen": 0.14393924176692963, |
|
"logits/rejected": 0.29668131470680237, |
|
"logps/chosen": -344.20501708984375, |
|
"logps/rejected": -383.1951599121094, |
|
"loss": 0.51, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 1.8797314167022705, |
|
"rewards/margins": 6.819934844970703, |
|
"rewards/rejected": -4.940203666687012, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.960383108402264, |
|
"grad_norm": 12.672419053954089, |
|
"learning_rate": 2.1407215834112756e-10, |
|
"logits/chosen": 0.2254418432712555, |
|
"logits/rejected": 0.3040105700492859, |
|
"logps/chosen": -312.3700256347656, |
|
"logps/rejected": -358.2984619140625, |
|
"loss": 0.5042, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 2.4021451473236084, |
|
"rewards/margins": 6.556065559387207, |
|
"rewards/rejected": -4.153920650482178, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.960383108402264, |
|
"eval_logits/chosen": 0.1785387396812439, |
|
"eval_logits/rejected": 0.23552976548671722, |
|
"eval_logps/chosen": -337.86688232421875, |
|
"eval_logps/rejected": -387.3112487792969, |
|
"eval_loss": 0.5807977914810181, |
|
"eval_rewards/accuracies": 0.7879746556282043, |
|
"eval_rewards/chosen": 1.8125056028366089, |
|
"eval_rewards/margins": 5.89471435546875, |
|
"eval_rewards/rejected": -4.08220911026001, |
|
"eval_runtime": 70.4219, |
|
"eval_samples_per_second": 35.5, |
|
"eval_steps_per_second": 1.122, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.977797126686983, |
|
"grad_norm": 15.591584256119575, |
|
"learning_rate": 6.369727671279301e-11, |
|
"logits/chosen": 0.2616572380065918, |
|
"logits/rejected": 0.33414459228515625, |
|
"logps/chosen": -307.9795837402344, |
|
"logps/rejected": -374.9503479003906, |
|
"loss": 0.4879, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 2.143859386444092, |
|
"rewards/margins": 6.652590274810791, |
|
"rewards/rejected": -4.508730888366699, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.9952111449717025, |
|
"grad_norm": 11.823556322241616, |
|
"learning_rate": 1.7694418513736209e-12, |
|
"logits/chosen": 0.21972191333770752, |
|
"logits/rejected": 0.25740760564804077, |
|
"logps/chosen": -297.4487609863281, |
|
"logps/rejected": -355.6648864746094, |
|
"loss": 0.4947, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 2.5511362552642822, |
|
"rewards/margins": 6.9618659019470215, |
|
"rewards/rejected": -4.410729885101318, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.998693948628646, |
|
"step": 1722, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5855818307081304, |
|
"train_runtime": 16735.6732, |
|
"train_samples_per_second": 13.174, |
|
"train_steps_per_second": 0.103 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1722, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 300, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|