|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 391, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 210.34713052784278, |
|
"learning_rate": 2.5e-09, |
|
"logits/chosen": -4.623842239379883, |
|
"logits/rejected": -4.85917854309082, |
|
"logps/chosen": -239.31422424316406, |
|
"logps/rejected": -207.56365966796875, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 198.95172630432864, |
|
"learning_rate": 2.5e-08, |
|
"logits/chosen": -4.3338446617126465, |
|
"logits/rejected": -4.64424991607666, |
|
"logps/chosen": -265.20184326171875, |
|
"logps/rejected": -215.72174072265625, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.4236111044883728, |
|
"rewards/chosen": -0.004745930898934603, |
|
"rewards/margins": -0.004067909903824329, |
|
"rewards/rejected": -0.0006780209369026124, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 204.7891876677461, |
|
"learning_rate": 5e-08, |
|
"logits/chosen": -4.509727478027344, |
|
"logits/rejected": -4.74410343170166, |
|
"logps/chosen": -267.73052978515625, |
|
"logps/rejected": -216.7478485107422, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.010470375418663025, |
|
"rewards/margins": 0.01739482954144478, |
|
"rewards/rejected": -0.006924452725797892, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 204.94575488992174, |
|
"learning_rate": 7.5e-08, |
|
"logits/chosen": -4.5970940589904785, |
|
"logits/rejected": -4.777865409851074, |
|
"logps/chosen": -257.5598449707031, |
|
"logps/rejected": -215.4015350341797, |
|
"loss": 0.6544, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 0.04864828661084175, |
|
"rewards/margins": 0.09208732843399048, |
|
"rewards/rejected": -0.04343904182314873, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 163.67699084811588, |
|
"learning_rate": 1e-07, |
|
"logits/chosen": -4.643096923828125, |
|
"logits/rejected": -4.7387237548828125, |
|
"logps/chosen": -249.96743774414062, |
|
"logps/rejected": -223.3234405517578, |
|
"loss": 0.5584, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.18158790469169617, |
|
"rewards/margins": 0.36420467495918274, |
|
"rewards/rejected": -0.18261677026748657, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 125.1152304775479, |
|
"learning_rate": 9.979985922607475e-08, |
|
"logits/chosen": -4.558148384094238, |
|
"logits/rejected": -4.785082817077637, |
|
"logps/chosen": -265.6357727050781, |
|
"logps/rejected": -234.0360107421875, |
|
"loss": 0.45, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.3166799247264862, |
|
"rewards/margins": 0.7249041795730591, |
|
"rewards/rejected": -0.4082241952419281, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 110.5697848266263, |
|
"learning_rate": 9.92010391574745e-08, |
|
"logits/chosen": -4.701218605041504, |
|
"logits/rejected": -4.855440139770508, |
|
"logps/chosen": -232.1560821533203, |
|
"logps/rejected": -235.8180389404297, |
|
"loss": 0.3379, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.44831886887550354, |
|
"rewards/margins": 1.4881489276885986, |
|
"rewards/rejected": -1.039829969406128, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 103.09926490168155, |
|
"learning_rate": 9.820833372667812e-08, |
|
"logits/chosen": -4.597586631774902, |
|
"logits/rejected": -4.846543312072754, |
|
"logps/chosen": -243.5035858154297, |
|
"logps/rejected": -245.3424072265625, |
|
"loss": 0.3085, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.4819186329841614, |
|
"rewards/margins": 1.8609161376953125, |
|
"rewards/rejected": -1.378997564315796, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 89.87848352821936, |
|
"learning_rate": 9.682969016701356e-08, |
|
"logits/chosen": -4.592278957366943, |
|
"logits/rejected": -4.840281963348389, |
|
"logps/chosen": -249.3519744873047, |
|
"logps/rejected": -261.445068359375, |
|
"loss": 0.2624, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 0.593399703502655, |
|
"rewards/margins": 2.1497161388397217, |
|
"rewards/rejected": -1.5563163757324219, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 98.45898295424381, |
|
"learning_rate": 9.507614539004081e-08, |
|
"logits/chosen": -4.667254447937012, |
|
"logits/rejected": -4.913816928863525, |
|
"logps/chosen": -235.763427734375, |
|
"logps/rejected": -244.2578582763672, |
|
"loss": 0.2462, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 0.7724655866622925, |
|
"rewards/margins": 2.8438591957092285, |
|
"rewards/rejected": -2.0713934898376465, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 87.96881533227138, |
|
"learning_rate": 9.296173762811083e-08, |
|
"logits/chosen": -4.5116472244262695, |
|
"logits/rejected": -4.829812049865723, |
|
"logps/chosen": -238.08468627929688, |
|
"logps/rejected": -269.5484619140625, |
|
"loss": 0.2472, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.931675910949707, |
|
"rewards/margins": 3.0536458492279053, |
|
"rewards/rejected": -2.1219699382781982, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -4.58513879776001, |
|
"eval_logits/rejected": -4.80186128616333, |
|
"eval_logps/chosen": -394.6981201171875, |
|
"eval_logps/rejected": -515.9166259765625, |
|
"eval_loss": 0.9610964059829712, |
|
"eval_rewards/accuracies": 0.390625, |
|
"eval_rewards/chosen": -0.21118265390396118, |
|
"eval_rewards/margins": -0.3347358703613281, |
|
"eval_rewards/rejected": 0.12355318665504456, |
|
"eval_runtime": 97.8315, |
|
"eval_samples_per_second": 20.443, |
|
"eval_steps_per_second": 0.327, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 84.98735748868098, |
|
"learning_rate": 9.050339404945832e-08, |
|
"logits/chosen": -4.55401611328125, |
|
"logits/rejected": -4.845933437347412, |
|
"logps/chosen": -229.4434356689453, |
|
"logps/rejected": -257.52984619140625, |
|
"loss": 0.2226, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.9871166348457336, |
|
"rewards/margins": 3.0293149948120117, |
|
"rewards/rejected": -2.042198419570923, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 75.78122724506682, |
|
"learning_rate": 8.77207952455395e-08, |
|
"logits/chosen": -4.49249792098999, |
|
"logits/rejected": -4.787415981292725, |
|
"logps/chosen": -252.7578125, |
|
"logps/rejected": -273.38555908203125, |
|
"loss": 0.2215, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.1615877151489258, |
|
"rewards/margins": 3.400435209274292, |
|
"rewards/rejected": -2.238847255706787, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 109.1136183108071, |
|
"learning_rate": 8.463621767547997e-08, |
|
"logits/chosen": -4.589264869689941, |
|
"logits/rejected": -4.87318229675293, |
|
"logps/chosen": -239.29531860351562, |
|
"logps/rejected": -265.04693603515625, |
|
"loss": 0.2169, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 0.9574294090270996, |
|
"rewards/margins": 3.4433422088623047, |
|
"rewards/rejected": -2.485912799835205, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 79.72525878658313, |
|
"learning_rate": 8.127435532896387e-08, |
|
"logits/chosen": -4.636221885681152, |
|
"logits/rejected": -4.9098310470581055, |
|
"logps/chosen": -267.59625244140625, |
|
"logps/rejected": -288.02349853515625, |
|
"loss": 0.2063, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 0.8299416303634644, |
|
"rewards/margins": 3.622443675994873, |
|
"rewards/rejected": -2.792501926422119, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 94.45112212404622, |
|
"learning_rate": 7.766212203526569e-08, |
|
"logits/chosen": -4.643942832946777, |
|
"logits/rejected": -4.911728382110596, |
|
"logps/chosen": -233.4263153076172, |
|
"logps/rejected": -277.07818603515625, |
|
"loss": 0.2098, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 0.9495984315872192, |
|
"rewards/margins": 3.8475449085235596, |
|
"rewards/rejected": -2.89794659614563, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 85.35291313866578, |
|
"learning_rate": 7.382843600106538e-08, |
|
"logits/chosen": -4.690377235412598, |
|
"logits/rejected": -4.9024457931518555, |
|
"logps/chosen": -233.21981811523438, |
|
"logps/rejected": -271.2682189941406, |
|
"loss": 0.1861, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.9118326306343079, |
|
"rewards/margins": 3.6947906017303467, |
|
"rewards/rejected": -2.7829582691192627, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 75.89103255157417, |
|
"learning_rate": 6.980398830195784e-08, |
|
"logits/chosen": -4.554282188415527, |
|
"logits/rejected": -4.874223232269287, |
|
"logps/chosen": -236.4412078857422, |
|
"logps/rejected": -279.4911804199219, |
|
"loss": 0.1833, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.2316501140594482, |
|
"rewards/margins": 4.21605920791626, |
|
"rewards/rejected": -2.9844090938568115, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 77.81291773020575, |
|
"learning_rate": 6.562099718102787e-08, |
|
"logits/chosen": -4.651320934295654, |
|
"logits/rejected": -4.9173784255981445, |
|
"logps/chosen": -215.70126342773438, |
|
"logps/rejected": -251.5159149169922, |
|
"loss": 0.2065, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.0581190586090088, |
|
"rewards/margins": 3.832904815673828, |
|
"rewards/rejected": -2.7747855186462402, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 86.461876717381, |
|
"learning_rate": 6.131295012148612e-08, |
|
"logits/chosen": -4.617634296417236, |
|
"logits/rejected": -4.793360233306885, |
|
"logps/chosen": -239.32681274414062, |
|
"logps/rejected": -286.96124267578125, |
|
"loss": 0.2013, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 1.0783030986785889, |
|
"rewards/margins": 3.6680614948272705, |
|
"rewards/rejected": -2.5897579193115234, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 84.24320751887706, |
|
"learning_rate": 5.691433575823665e-08, |
|
"logits/chosen": -4.624228477478027, |
|
"logits/rejected": -4.830000877380371, |
|
"logps/chosen": -233.09713745117188, |
|
"logps/rejected": -271.84051513671875, |
|
"loss": 0.2112, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.9521042108535767, |
|
"rewards/margins": 3.7768027782440186, |
|
"rewards/rejected": -2.8246986865997314, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_logits/chosen": -4.606511116027832, |
|
"eval_logits/rejected": -4.8388166427612305, |
|
"eval_logps/chosen": -405.0722351074219, |
|
"eval_logps/rejected": -524.7885131835938, |
|
"eval_loss": 1.102483868598938, |
|
"eval_rewards/accuracies": 0.375, |
|
"eval_rewards/chosen": -0.729888916015625, |
|
"eval_rewards/margins": -0.40984660387039185, |
|
"eval_rewards/rejected": -0.32004231214523315, |
|
"eval_runtime": 97.8012, |
|
"eval_samples_per_second": 20.45, |
|
"eval_steps_per_second": 0.327, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 70.97621814359026, |
|
"learning_rate": 5.2460367774593905e-08, |
|
"logits/chosen": -4.6944451332092285, |
|
"logits/rejected": -4.962179183959961, |
|
"logps/chosen": -243.93307495117188, |
|
"logps/rejected": -297.62066650390625, |
|
"loss": 0.1723, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.0976004600524902, |
|
"rewards/margins": 4.546332836151123, |
|
"rewards/rejected": -3.448732376098633, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 71.88477654183092, |
|
"learning_rate": 4.798670299452925e-08, |
|
"logits/chosen": -4.529160499572754, |
|
"logits/rejected": -4.8643479347229, |
|
"logps/chosen": -241.5579833984375, |
|
"logps/rejected": -293.224365234375, |
|
"loss": 0.1923, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.0626676082611084, |
|
"rewards/margins": 4.490227699279785, |
|
"rewards/rejected": -3.4275600910186768, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 83.09100453064212, |
|
"learning_rate": 4.3529155927297226e-08, |
|
"logits/chosen": -4.6047258377075195, |
|
"logits/rejected": -4.93651008605957, |
|
"logps/chosen": -241.11477661132812, |
|
"logps/rejected": -293.9808044433594, |
|
"loss": 0.2012, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.9927155375480652, |
|
"rewards/margins": 4.5062031745910645, |
|
"rewards/rejected": -3.5134873390197754, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 70.11336436391163, |
|
"learning_rate": 3.9123412049691636e-08, |
|
"logits/chosen": -4.588685035705566, |
|
"logits/rejected": -4.866146087646484, |
|
"logps/chosen": -252.31533813476562, |
|
"logps/rejected": -294.6343688964844, |
|
"loss": 0.1875, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.0253931283950806, |
|
"rewards/margins": 4.710432529449463, |
|
"rewards/rejected": -3.6850390434265137, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 77.75874575792918, |
|
"learning_rate": 3.480474212128766e-08, |
|
"logits/chosen": -4.716187000274658, |
|
"logits/rejected": -4.966707229614258, |
|
"logps/chosen": -231.89279174804688, |
|
"logps/rejected": -266.51666259765625, |
|
"loss": 0.1825, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.840434193611145, |
|
"rewards/margins": 3.7858078479766846, |
|
"rewards/rejected": -2.94537353515625, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 97.12524424809816, |
|
"learning_rate": 3.060771981975726e-08, |
|
"logits/chosen": -4.585513114929199, |
|
"logits/rejected": -4.878482341766357, |
|
"logps/chosen": -234.92617797851562, |
|
"logps/rejected": -297.1214904785156, |
|
"loss": 0.1837, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.9561206102371216, |
|
"rewards/margins": 4.824769973754883, |
|
"rewards/rejected": -3.86864972114563, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 92.49874438996748, |
|
"learning_rate": 2.6565944956764818e-08, |
|
"logits/chosen": -4.684746742248535, |
|
"logits/rejected": -4.911890983581543, |
|
"logps/chosen": -243.29568481445312, |
|
"logps/rejected": -288.39111328125, |
|
"loss": 0.1961, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.8868792653083801, |
|
"rewards/margins": 4.555182456970215, |
|
"rewards/rejected": -3.6683037281036377, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 73.7028241699641, |
|
"learning_rate": 2.2711774490274766e-08, |
|
"logits/chosen": -4.634344577789307, |
|
"logits/rejected": -4.873081207275391, |
|
"logps/chosen": -245.1703338623047, |
|
"logps/rejected": -317.2539978027344, |
|
"loss": 0.1644, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.9668266177177429, |
|
"rewards/margins": 4.682557582855225, |
|
"rewards/rejected": -3.715731143951416, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 142.25337407808868, |
|
"learning_rate": 1.9076063486687256e-08, |
|
"logits/chosen": -4.503401756286621, |
|
"logits/rejected": -4.866554260253906, |
|
"logps/chosen": -250.9346160888672, |
|
"logps/rejected": -283.41046142578125, |
|
"loss": 0.1799, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 1.083687424659729, |
|
"rewards/margins": 4.472739219665527, |
|
"rewards/rejected": -3.389052152633667, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 88.66793876665662, |
|
"learning_rate": 1.5687918106563324e-08, |
|
"logits/chosen": -4.625166416168213, |
|
"logits/rejected": -4.831929683685303, |
|
"logps/chosen": -232.6981658935547, |
|
"logps/rejected": -288.00457763671875, |
|
"loss": 0.195, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 1.0354994535446167, |
|
"rewards/margins": 4.637454509735107, |
|
"rewards/rejected": -3.6019554138183594, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_logits/chosen": -4.624210357666016, |
|
"eval_logits/rejected": -4.856749057769775, |
|
"eval_logps/chosen": -411.3396911621094, |
|
"eval_logps/rejected": -531.6535034179688, |
|
"eval_loss": 1.1301820278167725, |
|
"eval_rewards/accuracies": 0.41015625, |
|
"eval_rewards/chosen": -1.0432608127593994, |
|
"eval_rewards/margins": -0.3799673318862915, |
|
"eval_rewards/rejected": -0.6632934212684631, |
|
"eval_runtime": 97.9609, |
|
"eval_samples_per_second": 20.416, |
|
"eval_steps_per_second": 0.327, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 90.90394303193246, |
|
"learning_rate": 1.257446259144494e-08, |
|
"logits/chosen": -4.541079044342041, |
|
"logits/rejected": -4.873132228851318, |
|
"logps/chosen": -239.60592651367188, |
|
"logps/rejected": -298.159423828125, |
|
"loss": 0.1847, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.076683759689331, |
|
"rewards/margins": 4.874758243560791, |
|
"rewards/rejected": -3.7980740070343018, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 87.85310576006609, |
|
"learning_rate": 9.760622117187234e-09, |
|
"logits/chosen": -4.597599029541016, |
|
"logits/rejected": -4.9500837326049805, |
|
"logps/chosen": -227.94247436523438, |
|
"logps/rejected": -279.3809814453125, |
|
"loss": 0.1934, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 0.8610901832580566, |
|
"rewards/margins": 4.507565498352051, |
|
"rewards/rejected": -3.6464743614196777, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 85.81889719468313, |
|
"learning_rate": 7.2689232521989885e-09, |
|
"logits/chosen": -4.554391860961914, |
|
"logits/rejected": -4.864416599273682, |
|
"logps/chosen": -249.89169311523438, |
|
"logps/rejected": -304.54913330078125, |
|
"loss": 0.1773, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.9128581881523132, |
|
"rewards/margins": 4.5053324699401855, |
|
"rewards/rejected": -3.5924744606018066, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 77.2990699180903, |
|
"learning_rate": 5.119313618049309e-09, |
|
"logits/chosen": -4.570425987243652, |
|
"logits/rejected": -4.913475513458252, |
|
"logps/chosen": -250.6792449951172, |
|
"logps/rejected": -277.26556396484375, |
|
"loss": 0.1723, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 1.1733391284942627, |
|
"rewards/margins": 4.697513580322266, |
|
"rewards/rejected": -3.524174451828003, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 74.61892537865367, |
|
"learning_rate": 3.3290021961708158e-09, |
|
"logits/chosen": -4.588479995727539, |
|
"logits/rejected": -4.761317253112793, |
|
"logps/chosen": -238.91921997070312, |
|
"logps/rejected": -291.2458190917969, |
|
"loss": 0.1937, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.8275976181030273, |
|
"rewards/margins": 4.001389026641846, |
|
"rewards/rejected": -3.1737911701202393, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 81.22407668854541, |
|
"learning_rate": 1.9123215591052013e-09, |
|
"logits/chosen": -4.583038806915283, |
|
"logits/rejected": -4.805889129638672, |
|
"logps/chosen": -244.8368682861328, |
|
"logps/rejected": -294.9869079589844, |
|
"loss": 0.1907, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 0.8399018049240112, |
|
"rewards/margins": 4.216121673583984, |
|
"rewards/rejected": -3.3762192726135254, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 75.17805842008224, |
|
"learning_rate": 8.806131292167618e-10, |
|
"logits/chosen": -4.595518112182617, |
|
"logits/rejected": -4.752079010009766, |
|
"logps/chosen": -239.1554412841797, |
|
"logps/rejected": -302.4869079589844, |
|
"loss": 0.1904, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.8832891583442688, |
|
"rewards/margins": 4.165283679962158, |
|
"rewards/rejected": -3.281994581222534, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 91.00267878372446, |
|
"learning_rate": 2.4213638345040867e-10, |
|
"logits/chosen": -4.70483922958374, |
|
"logits/rejected": -4.97845983505249, |
|
"logps/chosen": -242.5469207763672, |
|
"logps/rejected": -292.7474670410156, |
|
"loss": 0.1788, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.9767888188362122, |
|
"rewards/margins": 4.587931156158447, |
|
"rewards/rejected": -3.61114239692688, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 100.5241948062632, |
|
"learning_rate": 2.0027310073833516e-12, |
|
"logits/chosen": -4.696263313293457, |
|
"logits/rejected": -4.96966028213501, |
|
"logps/chosen": -238.3385772705078, |
|
"logps/rejected": -292.5868835449219, |
|
"loss": 0.1773, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 1.066699743270874, |
|
"rewards/margins": 4.670289516448975, |
|
"rewards/rejected": -3.6035892963409424, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 391, |
|
"total_flos": 0.0, |
|
"train_loss": 0.256967593336959, |
|
"train_runtime": 6146.1986, |
|
"train_samples_per_second": 8.135, |
|
"train_steps_per_second": 0.064 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 391, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|