|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.994495412844037, |
|
"eval_steps": 500, |
|
"global_step": 408, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.014678899082568808, |
|
"grad_norm": 1.6221877336502075, |
|
"learning_rate": 2.439024390243903e-07, |
|
"logits/chosen": -0.7747717499732971, |
|
"logits/rejected": -2.1629748344421387, |
|
"logps/chosen": -248.1531524658203, |
|
"logps/rejected": -193.47401428222656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.029357798165137616, |
|
"grad_norm": 1.7869738340377808, |
|
"learning_rate": 4.878048780487805e-07, |
|
"logits/chosen": -0.8566749095916748, |
|
"logits/rejected": -2.0849714279174805, |
|
"logps/chosen": -272.6571960449219, |
|
"logps/rejected": -212.95504760742188, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.004582214169204235, |
|
"rewards/margins": 0.005821239203214645, |
|
"rewards/rejected": -0.001239025266841054, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.044036697247706424, |
|
"grad_norm": 1.8637501001358032, |
|
"learning_rate": 7.317073170731707e-07, |
|
"logits/chosen": -1.0140094757080078, |
|
"logits/rejected": -2.308026075363159, |
|
"logps/chosen": -277.5355529785156, |
|
"logps/rejected": -211.09579467773438, |
|
"loss": 0.6967, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0007174371276050806, |
|
"rewards/margins": -0.005946946796029806, |
|
"rewards/rejected": 0.005229509435594082, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.05871559633027523, |
|
"grad_norm": 1.6607173681259155, |
|
"learning_rate": 9.75609756097561e-07, |
|
"logits/chosen": -1.1254106760025024, |
|
"logits/rejected": -2.3103413581848145, |
|
"logps/chosen": -236.63705444335938, |
|
"logps/rejected": -172.72019958496094, |
|
"loss": 0.6966, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": -0.003201799700036645, |
|
"rewards/margins": -0.005821935832500458, |
|
"rewards/rejected": 0.002620136830955744, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.07339449541284404, |
|
"grad_norm": 1.9114854335784912, |
|
"learning_rate": 1.2195121951219514e-06, |
|
"logits/chosen": -0.8396108746528625, |
|
"logits/rejected": -2.356397867202759, |
|
"logps/chosen": -285.3742980957031, |
|
"logps/rejected": -167.82122802734375, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": -0.00033334456384181976, |
|
"rewards/margins": -0.00014529703184962273, |
|
"rewards/rejected": -0.00018804636783897877, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08807339449541285, |
|
"grad_norm": 2.11832332611084, |
|
"learning_rate": 1.4634146341463414e-06, |
|
"logits/chosen": -0.8920382857322693, |
|
"logits/rejected": -2.228362798690796, |
|
"logps/chosen": -321.5919189453125, |
|
"logps/rejected": -234.6510467529297, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.007709708996117115, |
|
"rewards/margins": 0.00042582384776324034, |
|
"rewards/rejected": -0.008135532028973103, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.10275229357798166, |
|
"grad_norm": 1.8611751794815063, |
|
"learning_rate": 1.707317073170732e-06, |
|
"logits/chosen": -0.7588200569152832, |
|
"logits/rejected": -2.1696934700012207, |
|
"logps/chosen": -239.06419372558594, |
|
"logps/rejected": -176.63113403320312, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.004243845120072365, |
|
"rewards/margins": 0.0036368640139698982, |
|
"rewards/rejected": 0.0006069811061024666, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.11743119266055047, |
|
"grad_norm": 1.886559009552002, |
|
"learning_rate": 1.951219512195122e-06, |
|
"logits/chosen": -0.9555227756500244, |
|
"logits/rejected": -2.175183057785034, |
|
"logps/chosen": -252.0240478515625, |
|
"logps/rejected": -196.8078155517578, |
|
"loss": 0.6971, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.009129801765084267, |
|
"rewards/margins": -0.006077417638152838, |
|
"rewards/rejected": -0.0030523836612701416, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.13211009174311927, |
|
"grad_norm": 2.0312628746032715, |
|
"learning_rate": 2.1951219512195125e-06, |
|
"logits/chosen": -0.8100669384002686, |
|
"logits/rejected": -2.175964832305908, |
|
"logps/chosen": -294.4822692871094, |
|
"logps/rejected": -208.19273376464844, |
|
"loss": 0.6959, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.003589821048080921, |
|
"rewards/margins": -0.0039059999398887157, |
|
"rewards/rejected": 0.007495820987969637, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.14678899082568808, |
|
"grad_norm": 1.7934964895248413, |
|
"learning_rate": 2.4390243902439027e-06, |
|
"logits/chosen": -0.8831109404563904, |
|
"logits/rejected": -2.3040294647216797, |
|
"logps/chosen": -289.34765625, |
|
"logps/rejected": -165.73974609375, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.004587325267493725, |
|
"rewards/margins": 0.0008857820648699999, |
|
"rewards/rejected": 0.003701543901115656, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1614678899082569, |
|
"grad_norm": 1.853503942489624, |
|
"learning_rate": 2.682926829268293e-06, |
|
"logits/chosen": -1.0103861093521118, |
|
"logits/rejected": -2.400630474090576, |
|
"logps/chosen": -279.53070068359375, |
|
"logps/rejected": -195.76055908203125, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.006311780773103237, |
|
"rewards/margins": 0.01845557615160942, |
|
"rewards/rejected": -0.012143796309828758, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.1761467889908257, |
|
"grad_norm": 1.876775860786438, |
|
"learning_rate": 2.926829268292683e-06, |
|
"logits/chosen": -0.8252619504928589, |
|
"logits/rejected": -2.4449005126953125, |
|
"logps/chosen": -349.03436279296875, |
|
"logps/rejected": -165.7700958251953, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.009145505726337433, |
|
"rewards/margins": 0.021766357123851776, |
|
"rewards/rejected": -0.012620854191482067, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1908256880733945, |
|
"grad_norm": 1.8004379272460938, |
|
"learning_rate": 3.1707317073170736e-06, |
|
"logits/chosen": -1.0986889600753784, |
|
"logits/rejected": -2.3428730964660645, |
|
"logps/chosen": -307.81927490234375, |
|
"logps/rejected": -190.91653442382812, |
|
"loss": 0.6865, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0025536715984344482, |
|
"rewards/margins": 0.014828955754637718, |
|
"rewards/rejected": -0.012275285087525845, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.20550458715596331, |
|
"grad_norm": 1.6710703372955322, |
|
"learning_rate": 3.414634146341464e-06, |
|
"logits/chosen": -1.0080599784851074, |
|
"logits/rejected": -2.3728153705596924, |
|
"logps/chosen": -268.09893798828125, |
|
"logps/rejected": -173.4761199951172, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.009534865617752075, |
|
"rewards/margins": 0.010362524539232254, |
|
"rewards/rejected": -0.000827658106572926, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.22018348623853212, |
|
"grad_norm": 1.6752780675888062, |
|
"learning_rate": 3.6585365853658537e-06, |
|
"logits/chosen": -0.8720824718475342, |
|
"logits/rejected": -2.2563316822052, |
|
"logps/chosen": -272.8302001953125, |
|
"logps/rejected": -207.05535888671875, |
|
"loss": 0.6955, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.008826537057757378, |
|
"rewards/margins": -0.0032597517129033804, |
|
"rewards/rejected": -0.005566785112023354, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.23486238532110093, |
|
"grad_norm": 1.8804222345352173, |
|
"learning_rate": 3.902439024390244e-06, |
|
"logits/chosen": -0.9830515384674072, |
|
"logits/rejected": -2.316348075866699, |
|
"logps/chosen": -269.2813415527344, |
|
"logps/rejected": -172.68609619140625, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.004377659410238266, |
|
"rewards/margins": 0.005325704347342253, |
|
"rewards/rejected": -0.009703361429274082, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.24954128440366974, |
|
"grad_norm": 2.0692217350006104, |
|
"learning_rate": 4.146341463414634e-06, |
|
"logits/chosen": -0.969600260257721, |
|
"logits/rejected": -2.3533740043640137, |
|
"logps/chosen": -307.40386962890625, |
|
"logps/rejected": -173.79537963867188, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.010469496250152588, |
|
"rewards/margins": 0.020308587700128555, |
|
"rewards/rejected": -0.009839091449975967, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.26422018348623855, |
|
"grad_norm": 1.9420565366744995, |
|
"learning_rate": 4.390243902439025e-06, |
|
"logits/chosen": -1.0136321783065796, |
|
"logits/rejected": -2.2236032485961914, |
|
"logps/chosen": -293.4641418457031, |
|
"logps/rejected": -213.1688232421875, |
|
"loss": 0.68, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.019286828115582466, |
|
"rewards/margins": 0.028108475729823112, |
|
"rewards/rejected": -0.008821647614240646, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.27889908256880735, |
|
"grad_norm": 1.8155481815338135, |
|
"learning_rate": 4.634146341463416e-06, |
|
"logits/chosen": -1.0221481323242188, |
|
"logits/rejected": -2.349212884902954, |
|
"logps/chosen": -273.8066101074219, |
|
"logps/rejected": -209.1209259033203, |
|
"loss": 0.6859, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.0073402514681220055, |
|
"rewards/margins": 0.016111046075820923, |
|
"rewards/rejected": -0.023451298475265503, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.29357798165137616, |
|
"grad_norm": 1.9104901552200317, |
|
"learning_rate": 4.8780487804878055e-06, |
|
"logits/chosen": -0.8466785550117493, |
|
"logits/rejected": -2.214355945587158, |
|
"logps/chosen": -325.6095275878906, |
|
"logps/rejected": -189.9873046875, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00729791447520256, |
|
"rewards/margins": 0.011406856589019299, |
|
"rewards/rejected": -0.018704771995544434, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.30825688073394497, |
|
"grad_norm": 1.8166546821594238, |
|
"learning_rate": 4.999908404322799e-06, |
|
"logits/chosen": -0.8763958215713501, |
|
"logits/rejected": -2.4433987140655518, |
|
"logps/chosen": -300.2408752441406, |
|
"logps/rejected": -182.14207458496094, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.0032976572401821613, |
|
"rewards/margins": 0.028552744537591934, |
|
"rewards/rejected": -0.02525508403778076, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.3229357798165138, |
|
"grad_norm": 1.839516043663025, |
|
"learning_rate": 4.999175679175577e-06, |
|
"logits/chosen": -0.9340443015098572, |
|
"logits/rejected": -2.2553768157958984, |
|
"logps/chosen": -234.7823486328125, |
|
"logps/rejected": -172.17637634277344, |
|
"loss": 0.673, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.006214863620698452, |
|
"rewards/margins": 0.04187798872590065, |
|
"rewards/rejected": -0.03566312417387962, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.3376146788990826, |
|
"grad_norm": 1.9286494255065918, |
|
"learning_rate": 4.997710443643461e-06, |
|
"logits/chosen": -0.9604765772819519, |
|
"logits/rejected": -2.187485694885254, |
|
"logps/chosen": -242.36959838867188, |
|
"logps/rejected": -215.9381103515625, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.011644757352769375, |
|
"rewards/margins": 0.019570371136069298, |
|
"rewards/rejected": -0.0312151238322258, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.3522935779816514, |
|
"grad_norm": 2.0474560260772705, |
|
"learning_rate": 4.995513127188151e-06, |
|
"logits/chosen": -0.9198620319366455, |
|
"logits/rejected": -2.353731155395508, |
|
"logps/chosen": -342.00421142578125, |
|
"logps/rejected": -190.65626525878906, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": 0.014831364154815674, |
|
"rewards/margins": 0.0657879188656807, |
|
"rewards/rejected": -0.05095655843615532, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3669724770642202, |
|
"grad_norm": 2.0733015537261963, |
|
"learning_rate": 4.992584373844853e-06, |
|
"logits/chosen": -1.0313482284545898, |
|
"logits/rejected": -2.189700126647949, |
|
"logps/chosen": -327.1130065917969, |
|
"logps/rejected": -193.40577697753906, |
|
"loss": 0.6693, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 0.002555199433118105, |
|
"rewards/margins": 0.04951968789100647, |
|
"rewards/rejected": -0.04696448892354965, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.381651376146789, |
|
"grad_norm": 1.9505176544189453, |
|
"learning_rate": 4.98892504203351e-06, |
|
"logits/chosen": -1.071111798286438, |
|
"logits/rejected": -2.2670626640319824, |
|
"logps/chosen": -263.16058349609375, |
|
"logps/rejected": -167.900390625, |
|
"loss": 0.6679, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": 0.0007026728708297014, |
|
"rewards/margins": 0.052561890333890915, |
|
"rewards/rejected": -0.05185921862721443, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3963302752293578, |
|
"grad_norm": 1.940078854560852, |
|
"learning_rate": 4.9845362043071925e-06, |
|
"logits/chosen": -0.8662849068641663, |
|
"logits/rejected": -2.2801122665405273, |
|
"logps/chosen": -272.940673828125, |
|
"logps/rejected": -171.84495544433594, |
|
"loss": 0.6581, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.007315092254430056, |
|
"rewards/margins": 0.07306656986474991, |
|
"rewards/rejected": -0.08038166165351868, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.41100917431192663, |
|
"grad_norm": 2.0381312370300293, |
|
"learning_rate": 4.97941914703774e-06, |
|
"logits/chosen": -0.9563165903091431, |
|
"logits/rejected": -2.2946839332580566, |
|
"logps/chosen": -269.4028015136719, |
|
"logps/rejected": -211.50933837890625, |
|
"loss": 0.6426, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": 0.006978507619351149, |
|
"rewards/margins": 0.10549122840166092, |
|
"rewards/rejected": -0.09851271659135818, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.42568807339449544, |
|
"grad_norm": 2.180021286010742, |
|
"learning_rate": 4.973575370038718e-06, |
|
"logits/chosen": -0.8351713418960571, |
|
"logits/rejected": -2.1809074878692627, |
|
"logps/chosen": -285.6255798339844, |
|
"logps/rejected": -202.8754119873047, |
|
"loss": 0.6367, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.02219582349061966, |
|
"rewards/margins": 0.11899632215499878, |
|
"rewards/rejected": -0.09680050611495972, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.44036697247706424, |
|
"grad_norm": 2.023611545562744, |
|
"learning_rate": 4.967006586125827e-06, |
|
"logits/chosen": -1.099950909614563, |
|
"logits/rejected": -2.2148141860961914, |
|
"logps/chosen": -283.2462158203125, |
|
"logps/rejected": -195.86944580078125, |
|
"loss": 0.6195, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": 0.03062710538506508, |
|
"rewards/margins": 0.15735232830047607, |
|
"rewards/rejected": -0.1267252117395401, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45504587155963305, |
|
"grad_norm": 2.1152608394622803, |
|
"learning_rate": 4.959714720614871e-06, |
|
"logits/chosen": -0.9920430183410645, |
|
"logits/rejected": -2.3323516845703125, |
|
"logps/chosen": -300.185791015625, |
|
"logps/rejected": -194.13824462890625, |
|
"loss": 0.6116, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 0.019140686839818954, |
|
"rewards/margins": 0.1737564355134964, |
|
"rewards/rejected": -0.15461575984954834, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.46972477064220186, |
|
"grad_norm": 2.0386292934417725, |
|
"learning_rate": 4.951701910757446e-06, |
|
"logits/chosen": -0.9583941102027893, |
|
"logits/rejected": -2.1204147338867188, |
|
"logps/chosen": -236.59213256835938, |
|
"logps/rejected": -199.07980346679688, |
|
"loss": 0.6071, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.02290705032646656, |
|
"rewards/margins": 0.18515917658805847, |
|
"rewards/rejected": -0.16225211322307587, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.48440366972477067, |
|
"grad_norm": 2.033529043197632, |
|
"learning_rate": 4.942970505114514e-06, |
|
"logits/chosen": -0.8373759388923645, |
|
"logits/rejected": -2.231731414794922, |
|
"logps/chosen": -290.7356262207031, |
|
"logps/rejected": -188.03863525390625, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.026615679264068604, |
|
"rewards/margins": 0.24059633910655975, |
|
"rewards/rejected": -0.21398067474365234, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4990825688073395, |
|
"grad_norm": 1.8862334489822388, |
|
"learning_rate": 4.933523062868033e-06, |
|
"logits/chosen": -0.8499428629875183, |
|
"logits/rejected": -2.3141591548919678, |
|
"logps/chosen": -253.00765991210938, |
|
"logps/rejected": -174.625732421875, |
|
"loss": 0.5955, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": -0.006930825300514698, |
|
"rewards/margins": 0.21197618544101715, |
|
"rewards/rejected": -0.21890701353549957, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.5137614678899083, |
|
"grad_norm": 2.032834053039551, |
|
"learning_rate": 4.923362353070859e-06, |
|
"logits/chosen": -0.6541658639907837, |
|
"logits/rejected": -2.2294771671295166, |
|
"logps/chosen": -271.1413879394531, |
|
"logps/rejected": -170.8126220703125, |
|
"loss": 0.5739, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.015698038041591644, |
|
"rewards/margins": 0.261942982673645, |
|
"rewards/rejected": -0.2462449073791504, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5284403669724771, |
|
"grad_norm": 1.8714652061462402, |
|
"learning_rate": 4.912491353835138e-06, |
|
"logits/chosen": -0.9249717593193054, |
|
"logits/rejected": -2.170142650604248, |
|
"logps/chosen": -245.40841674804688, |
|
"logps/rejected": -196.6009521484375, |
|
"loss": 0.5615, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 0.004103909246623516, |
|
"rewards/margins": 0.29537737369537354, |
|
"rewards/rejected": -0.29127344489097595, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.5431192660550459, |
|
"grad_norm": 1.8837171792984009, |
|
"learning_rate": 4.900913251459418e-06, |
|
"logits/chosen": -0.8780805468559265, |
|
"logits/rejected": -2.149808406829834, |
|
"logps/chosen": -252.37844848632812, |
|
"logps/rejected": -185.75148010253906, |
|
"loss": 0.5552, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.003393257036805153, |
|
"rewards/margins": 0.3080388009548187, |
|
"rewards/rejected": -0.3114320635795593, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.5577981651376147, |
|
"grad_norm": 1.8990751504898071, |
|
"learning_rate": 4.8886314394947396e-06, |
|
"logits/chosen": -0.7858492732048035, |
|
"logits/rejected": -2.1859428882598877, |
|
"logps/chosen": -260.1080322265625, |
|
"logps/rejected": -198.69061279296875, |
|
"loss": 0.5215, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.011569229885935783, |
|
"rewards/margins": 0.3970867097377777, |
|
"rewards/rejected": -0.3855174779891968, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.5724770642201835, |
|
"grad_norm": 1.9948140382766724, |
|
"learning_rate": 4.875649517749985e-06, |
|
"logits/chosen": -0.8377081751823425, |
|
"logits/rejected": -2.2524642944335938, |
|
"logps/chosen": -267.073486328125, |
|
"logps/rejected": -202.4425811767578, |
|
"loss": 0.5145, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.0006968197412788868, |
|
"rewards/margins": 0.4172001779079437, |
|
"rewards/rejected": -0.41650334000587463, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.5871559633027523, |
|
"grad_norm": 1.8098971843719482, |
|
"learning_rate": 4.861971291236772e-06, |
|
"logits/chosen": -0.957794725894928, |
|
"logits/rejected": -2.141266345977783, |
|
"logps/chosen": -311.6953430175781, |
|
"logps/rejected": -200.5380859375, |
|
"loss": 0.53, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": 0.013315271586179733, |
|
"rewards/margins": 0.3864455223083496, |
|
"rewards/rejected": -0.3731302320957184, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6018348623853211, |
|
"grad_norm": 1.9890069961547852, |
|
"learning_rate": 4.847600769054201e-06, |
|
"logits/chosen": -0.8957269191741943, |
|
"logits/rejected": -2.102496385574341, |
|
"logps/chosen": -346.88726806640625, |
|
"logps/rejected": -232.16412353515625, |
|
"loss": 0.5022, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.019277315586805344, |
|
"rewards/margins": 0.4478000998497009, |
|
"rewards/rejected": -0.4285227954387665, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.6165137614678899, |
|
"grad_norm": 1.801570177078247, |
|
"learning_rate": 4.832542163213787e-06, |
|
"logits/chosen": -0.7643452882766724, |
|
"logits/rejected": -2.177706718444824, |
|
"logps/chosen": -246.15560913085938, |
|
"logps/rejected": -167.4416046142578, |
|
"loss": 0.4934, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.00458180159330368, |
|
"rewards/margins": 0.48400723934173584, |
|
"rewards/rejected": -0.4885890483856201, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.6311926605504588, |
|
"grad_norm": 1.8030428886413574, |
|
"learning_rate": 4.816799887404911e-06, |
|
"logits/chosen": -0.9738877415657043, |
|
"logits/rejected": -2.146649122238159, |
|
"logps/chosen": -286.7640686035156, |
|
"logps/rejected": -197.47161865234375, |
|
"loss": 0.4836, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.009418713860213757, |
|
"rewards/margins": 0.5085384845733643, |
|
"rewards/rejected": -0.4991198182106018, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.6458715596330276, |
|
"grad_norm": 1.814665675163269, |
|
"learning_rate": 4.800378555701168e-06, |
|
"logits/chosen": -0.7987996339797974, |
|
"logits/rejected": -2.0137877464294434, |
|
"logps/chosen": -339.0395202636719, |
|
"logps/rejected": -201.66848754882812, |
|
"loss": 0.4407, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.0017917649820446968, |
|
"rewards/margins": 0.6508389115333557, |
|
"rewards/rejected": -0.6526306867599487, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.6605504587155964, |
|
"grad_norm": 1.9332906007766724, |
|
"learning_rate": 4.783282981207979e-06, |
|
"logits/chosen": -0.8255445957183838, |
|
"logits/rejected": -2.3335933685302734, |
|
"logps/chosen": -282.8710632324219, |
|
"logps/rejected": -184.0906524658203, |
|
"loss": 0.4509, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.0369749516248703, |
|
"rewards/margins": 0.6082916855812073, |
|
"rewards/rejected": -0.6452666521072388, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6752293577981652, |
|
"grad_norm": 1.6867598295211792, |
|
"learning_rate": 4.765518174651864e-06, |
|
"logits/chosen": -0.8639607429504395, |
|
"logits/rejected": -2.074816942214966, |
|
"logps/chosen": -271.90521240234375, |
|
"logps/rejected": -205.10400390625, |
|
"loss": 0.4227, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.017826007679104805, |
|
"rewards/margins": 0.6985148787498474, |
|
"rewards/rejected": -0.7163408398628235, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.689908256880734, |
|
"grad_norm": 1.8225523233413696, |
|
"learning_rate": 4.747089342911793e-06, |
|
"logits/chosen": -0.652176022529602, |
|
"logits/rejected": -2.121016502380371, |
|
"logps/chosen": -276.7890930175781, |
|
"logps/rejected": -188.45567321777344, |
|
"loss": 0.421, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.014648550190031528, |
|
"rewards/margins": 0.7090162038803101, |
|
"rewards/rejected": -0.7236647605895996, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.7045871559633028, |
|
"grad_norm": 1.730716586112976, |
|
"learning_rate": 4.728001887493048e-06, |
|
"logits/chosen": -0.7071309685707092, |
|
"logits/rejected": -2.1385228633880615, |
|
"logps/chosen": -284.6495056152344, |
|
"logps/rejected": -207.17828369140625, |
|
"loss": 0.4037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.011353883892297745, |
|
"rewards/margins": 0.7462868690490723, |
|
"rewards/rejected": -0.7576407790184021, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.7192660550458716, |
|
"grad_norm": 1.7052290439605713, |
|
"learning_rate": 4.708261402944036e-06, |
|
"logits/chosen": -0.8458466529846191, |
|
"logits/rejected": -2.128801107406616, |
|
"logps/chosen": -300.57489013671875, |
|
"logps/rejected": -204.04148864746094, |
|
"loss": 0.3953, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.1128288209438324, |
|
"rewards/margins": 0.834511399269104, |
|
"rewards/rejected": -0.9473402500152588, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.7339449541284404, |
|
"grad_norm": 1.7650856971740723, |
|
"learning_rate": 4.687873675216522e-06, |
|
"logits/chosen": -0.694564700126648, |
|
"logits/rejected": -1.9271796941757202, |
|
"logps/chosen": -287.0938415527344, |
|
"logps/rejected": -212.9856414794922, |
|
"loss": 0.3808, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.029861731454730034, |
|
"rewards/margins": 0.8324739933013916, |
|
"rewards/rejected": -0.862335741519928, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7486238532110092, |
|
"grad_norm": 1.5149580240249634, |
|
"learning_rate": 4.666844679969765e-06, |
|
"logits/chosen": -0.9843853116035461, |
|
"logits/rejected": -2.171410083770752, |
|
"logps/chosen": -286.80181884765625, |
|
"logps/rejected": -227.1826629638672, |
|
"loss": 0.3233, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.03178291767835617, |
|
"rewards/margins": 1.0707846879959106, |
|
"rewards/rejected": -1.1025675535202026, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.763302752293578, |
|
"grad_norm": 1.7084800004959106, |
|
"learning_rate": 4.6451805808190464e-06, |
|
"logits/chosen": -0.7646867632865906, |
|
"logits/rejected": -2.0860140323638916, |
|
"logps/chosen": -272.03631591796875, |
|
"logps/rejected": -193.9704132080078, |
|
"loss": 0.3502, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.09451958537101746, |
|
"rewards/margins": 0.9896395206451416, |
|
"rewards/rejected": -1.0841591358184814, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.7779816513761468, |
|
"grad_norm": 1.6702779531478882, |
|
"learning_rate": 4.622887727529104e-06, |
|
"logits/chosen": -0.7461971044540405, |
|
"logits/rejected": -2.0296335220336914, |
|
"logps/chosen": -245.9069061279297, |
|
"logps/rejected": -227.5930633544922, |
|
"loss": 0.3127, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.1017722487449646, |
|
"rewards/margins": 1.1482524871826172, |
|
"rewards/rejected": -1.2500247955322266, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.7926605504587156, |
|
"grad_norm": 1.4205806255340576, |
|
"learning_rate": 4.599972654153018e-06, |
|
"logits/chosen": -0.6252501606941223, |
|
"logits/rejected": -2.0300350189208984, |
|
"logps/chosen": -287.1758728027344, |
|
"logps/rejected": -191.18710327148438, |
|
"loss": 0.303, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.03701897710561752, |
|
"rewards/margins": 1.180790901184082, |
|
"rewards/rejected": -1.217809796333313, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.8073394495412844, |
|
"grad_norm": 1.5258698463439941, |
|
"learning_rate": 4.5764420771170735e-06, |
|
"logits/chosen": -0.6961276531219482, |
|
"logits/rejected": -2.080204725265503, |
|
"logps/chosen": -265.47674560546875, |
|
"logps/rejected": -210.6199493408203, |
|
"loss": 0.317, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.10400635749101639, |
|
"rewards/margins": 1.1012389659881592, |
|
"rewards/rejected": -1.2052453756332397, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.8220183486238533, |
|
"grad_norm": 1.397694706916809, |
|
"learning_rate": 4.552302893252166e-06, |
|
"logits/chosen": -0.9548951387405396, |
|
"logits/rejected": -2.0966246128082275, |
|
"logps/chosen": -294.2589111328125, |
|
"logps/rejected": -223.81103515625, |
|
"loss": 0.2781, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0296751968562603, |
|
"rewards/margins": 1.2581570148468018, |
|
"rewards/rejected": -1.287832260131836, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.8366972477064221, |
|
"grad_norm": 1.4720474481582642, |
|
"learning_rate": 4.52756217777234e-06, |
|
"logits/chosen": -0.9968761801719666, |
|
"logits/rejected": -2.1294007301330566, |
|
"logps/chosen": -298.44085693359375, |
|
"logps/rejected": -224.59336853027344, |
|
"loss": 0.3122, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.12370166927576065, |
|
"rewards/margins": 1.1629935503005981, |
|
"rewards/rejected": -1.286695122718811, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.8513761467889909, |
|
"grad_norm": 1.354582667350769, |
|
"learning_rate": 4.502227182201035e-06, |
|
"logits/chosen": -0.6440415382385254, |
|
"logits/rejected": -1.8898322582244873, |
|
"logps/chosen": -253.26058959960938, |
|
"logps/rejected": -196.10592651367188, |
|
"loss": 0.2555, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.07564966380596161, |
|
"rewards/margins": 1.4416813850402832, |
|
"rewards/rejected": -1.5173311233520508, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.8660550458715597, |
|
"grad_norm": 1.3083575963974, |
|
"learning_rate": 4.476305332245662e-06, |
|
"logits/chosen": -0.7695977687835693, |
|
"logits/rejected": -2.1613001823425293, |
|
"logps/chosen": -302.2278137207031, |
|
"logps/rejected": -171.18521118164062, |
|
"loss": 0.2852, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.09552773833274841, |
|
"rewards/margins": 1.2843879461288452, |
|
"rewards/rejected": -1.379915475845337, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.8807339449541285, |
|
"grad_norm": 1.4233521223068237, |
|
"learning_rate": 4.449804225621116e-06, |
|
"logits/chosen": -0.769046425819397, |
|
"logits/rejected": -2.0579347610473633, |
|
"logps/chosen": -268.5230407714844, |
|
"logps/rejected": -199.9818572998047, |
|
"loss": 0.273, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.09396351873874664, |
|
"rewards/margins": 1.3788070678710938, |
|
"rewards/rejected": -1.4727706909179688, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8954128440366973, |
|
"grad_norm": 1.5535210371017456, |
|
"learning_rate": 4.422731629822887e-06, |
|
"logits/chosen": -0.6471429467201233, |
|
"logits/rejected": -1.874241828918457, |
|
"logps/chosen": -303.5356750488281, |
|
"logps/rejected": -214.80975341796875, |
|
"loss": 0.2744, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.19331903755664825, |
|
"rewards/margins": 1.402610182762146, |
|
"rewards/rejected": -1.5959291458129883, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.9100917431192661, |
|
"grad_norm": 1.4141969680786133, |
|
"learning_rate": 4.395095479850396e-06, |
|
"logits/chosen": -0.7038341760635376, |
|
"logits/rejected": -1.7795767784118652, |
|
"logps/chosen": -278.2030334472656, |
|
"logps/rejected": -206.88665771484375, |
|
"loss": 0.298, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.1966189742088318, |
|
"rewards/margins": 1.306958794593811, |
|
"rewards/rejected": -1.503577709197998, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.9247706422018349, |
|
"grad_norm": 1.2042831182479858, |
|
"learning_rate": 4.366903875881243e-06, |
|
"logits/chosen": -0.7927061319351196, |
|
"logits/rejected": -2.196439743041992, |
|
"logps/chosen": -264.9695129394531, |
|
"logps/rejected": -187.66848754882812, |
|
"loss": 0.2327, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.16811293363571167, |
|
"rewards/margins": 1.5934040546417236, |
|
"rewards/rejected": -1.76151704788208, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.9394495412844037, |
|
"grad_norm": 1.2989264726638794, |
|
"learning_rate": 4.3381650808970365e-06, |
|
"logits/chosen": -0.7110509872436523, |
|
"logits/rejected": -1.7662936449050903, |
|
"logps/chosen": -244.37979125976562, |
|
"logps/rejected": -208.79306030273438, |
|
"loss": 0.2276, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.14624029397964478, |
|
"rewards/margins": 1.6342490911483765, |
|
"rewards/rejected": -1.7804893255233765, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.9541284403669725, |
|
"grad_norm": 1.248761773109436, |
|
"learning_rate": 4.308887518261507e-06, |
|
"logits/chosen": -0.5622550249099731, |
|
"logits/rejected": -1.7699952125549316, |
|
"logps/chosen": -268.9392395019531, |
|
"logps/rejected": -218.73167419433594, |
|
"loss": 0.2147, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.11277558654546738, |
|
"rewards/margins": 1.7473864555358887, |
|
"rewards/rejected": -1.8601620197296143, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9688073394495413, |
|
"grad_norm": 1.3512104749679565, |
|
"learning_rate": 4.279079769251617e-06, |
|
"logits/chosen": -0.8375473618507385, |
|
"logits/rejected": -2.039938449859619, |
|
"logps/chosen": -341.29595947265625, |
|
"logps/rejected": -234.00616455078125, |
|
"loss": 0.2101, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.11729223281145096, |
|
"rewards/margins": 1.6870366334915161, |
|
"rewards/rejected": -1.8043287992477417, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.9834862385321101, |
|
"grad_norm": 1.2423762083053589, |
|
"learning_rate": 4.248750570542373e-06, |
|
"logits/chosen": -0.7437604069709778, |
|
"logits/rejected": -1.9573638439178467, |
|
"logps/chosen": -260.789794921875, |
|
"logps/rejected": -202.69984436035156, |
|
"loss": 0.2094, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.1173069104552269, |
|
"rewards/margins": 1.8479876518249512, |
|
"rewards/rejected": -1.965294599533081, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.998165137614679, |
|
"grad_norm": 1.075205683708191, |
|
"learning_rate": 4.21790881164611e-06, |
|
"logits/chosen": -0.6165802478790283, |
|
"logits/rejected": -1.9370815753936768, |
|
"logps/chosen": -272.431884765625, |
|
"logps/rejected": -222.2723388671875, |
|
"loss": 0.1783, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1736324280500412, |
|
"rewards/margins": 2.0683982372283936, |
|
"rewards/rejected": -2.2420308589935303, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.0128440366972478, |
|
"grad_norm": 1.2893800735473633, |
|
"learning_rate": 4.186563532306957e-06, |
|
"logits/chosen": -0.6493566632270813, |
|
"logits/rejected": -1.9289031028747559, |
|
"logps/chosen": -281.0344543457031, |
|
"logps/rejected": -191.85787963867188, |
|
"loss": 0.2379, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.31633660197257996, |
|
"rewards/margins": 1.6654047966003418, |
|
"rewards/rejected": -1.9817413091659546, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.0275229357798166, |
|
"grad_norm": 1.2571147680282593, |
|
"learning_rate": 4.154723919851291e-06, |
|
"logits/chosen": -0.8061354160308838, |
|
"logits/rejected": -1.9505449533462524, |
|
"logps/chosen": -283.486083984375, |
|
"logps/rejected": -197.98060607910156, |
|
"loss": 0.2287, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.296590119600296, |
|
"rewards/margins": 1.6940876245498657, |
|
"rewards/rejected": -1.990677833557129, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0422018348623854, |
|
"grad_norm": 1.1618239879608154, |
|
"learning_rate": 4.122399306494918e-06, |
|
"logits/chosen": -0.8136368989944458, |
|
"logits/rejected": -2.0863683223724365, |
|
"logps/chosen": -327.2650146484375, |
|
"logps/rejected": -223.3490753173828, |
|
"loss": 0.2024, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.327208012342453, |
|
"rewards/margins": 1.8999422788619995, |
|
"rewards/rejected": -2.2271502017974854, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.0568807339449542, |
|
"grad_norm": 1.1636958122253418, |
|
"learning_rate": 4.089599166607794e-06, |
|
"logits/chosen": -0.7669267058372498, |
|
"logits/rejected": -1.9041523933410645, |
|
"logps/chosen": -284.34210205078125, |
|
"logps/rejected": -217.7493133544922, |
|
"loss": 0.1682, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3707123100757599, |
|
"rewards/margins": 2.189767360687256, |
|
"rewards/rejected": -2.5604796409606934, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.071559633027523, |
|
"grad_norm": 1.1475207805633545, |
|
"learning_rate": 4.05633311393708e-06, |
|
"logits/chosen": -0.60993492603302, |
|
"logits/rejected": -1.7652596235275269, |
|
"logps/chosen": -250.69619750976562, |
|
"logps/rejected": -198.53050231933594, |
|
"loss": 0.1732, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.399620920419693, |
|
"rewards/margins": 2.0180652141571045, |
|
"rewards/rejected": -2.4176859855651855, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.0862385321100918, |
|
"grad_norm": 1.0832470655441284, |
|
"learning_rate": 4.022610898789349e-06, |
|
"logits/chosen": -0.6742774844169617, |
|
"logits/rejected": -1.9231276512145996, |
|
"logps/chosen": -258.0038757324219, |
|
"logps/rejected": -214.44337463378906, |
|
"loss": 0.1719, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.2988199293613434, |
|
"rewards/margins": 2.191661834716797, |
|
"rewards/rejected": -2.4904818534851074, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.1009174311926606, |
|
"grad_norm": 1.4428913593292236, |
|
"learning_rate": 3.988442405172755e-06, |
|
"logits/chosen": -0.49406272172927856, |
|
"logits/rejected": -1.771551489830017, |
|
"logps/chosen": -272.45654296875, |
|
"logps/rejected": -232.3162841796875, |
|
"loss": 0.1474, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.2635681927204132, |
|
"rewards/margins": 2.3189291954040527, |
|
"rewards/rejected": -2.5824971199035645, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.1155963302752294, |
|
"grad_norm": 0.7493584752082825, |
|
"learning_rate": 3.953837647900031e-06, |
|
"logits/chosen": -0.6438045501708984, |
|
"logits/rejected": -1.8923343420028687, |
|
"logps/chosen": -263.40106201171875, |
|
"logps/rejected": -226.0013427734375, |
|
"loss": 0.1388, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.19538956880569458, |
|
"rewards/margins": 2.50761342048645, |
|
"rewards/rejected": -2.703003168106079, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.1302752293577982, |
|
"grad_norm": 1.5492652654647827, |
|
"learning_rate": 3.918806769653135e-06, |
|
"logits/chosen": -0.5117900371551514, |
|
"logits/rejected": -1.775586724281311, |
|
"logps/chosen": -309.75885009765625, |
|
"logps/rejected": -225.79420471191406, |
|
"loss": 0.2011, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.4026896357536316, |
|
"rewards/margins": 2.218205451965332, |
|
"rewards/rejected": -2.6208949089050293, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.144954128440367, |
|
"grad_norm": 1.2843326330184937, |
|
"learning_rate": 3.88336003801042e-06, |
|
"logits/chosen": -0.6652198433876038, |
|
"logits/rejected": -1.7710597515106201, |
|
"logps/chosen": -246.24229431152344, |
|
"logps/rejected": -204.2578125, |
|
"loss": 0.1966, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.3051142990589142, |
|
"rewards/margins": 2.153491497039795, |
|
"rewards/rejected": -2.4586057662963867, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.1596330275229358, |
|
"grad_norm": 0.8037837147712708, |
|
"learning_rate": 3.847507842437205e-06, |
|
"logits/chosen": -0.5658106803894043, |
|
"logits/rejected": -1.8227128982543945, |
|
"logps/chosen": -290.1756286621094, |
|
"logps/rejected": -201.77011108398438, |
|
"loss": 0.1418, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3708067536354065, |
|
"rewards/margins": 2.446641206741333, |
|
"rewards/rejected": -2.8174479007720947, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.1743119266055047, |
|
"grad_norm": 1.2949639558792114, |
|
"learning_rate": 3.811260691240604e-06, |
|
"logits/chosen": -0.5217835903167725, |
|
"logits/rejected": -1.8691177368164062, |
|
"logps/chosen": -331.89788818359375, |
|
"logps/rejected": -220.96426391601562, |
|
"loss": 0.1652, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.45729154348373413, |
|
"rewards/margins": 2.46223783493042, |
|
"rewards/rejected": -2.9195291996002197, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.1889908256880735, |
|
"grad_norm": 1.2344180345535278, |
|
"learning_rate": 3.774629208489547e-06, |
|
"logits/chosen": -0.631909966468811, |
|
"logits/rejected": -1.9337414503097534, |
|
"logps/chosen": -235.50601196289062, |
|
"logps/rejected": -202.8614044189453, |
|
"loss": 0.1554, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.4534618854522705, |
|
"rewards/margins": 2.3637375831604004, |
|
"rewards/rejected": -2.817199468612671, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.2036697247706423, |
|
"grad_norm": 0.9420382976531982, |
|
"learning_rate": 3.7376241309008433e-06, |
|
"logits/chosen": -0.780289888381958, |
|
"logits/rejected": -1.9408084154129028, |
|
"logps/chosen": -317.29364013671875, |
|
"logps/rejected": -214.83853149414062, |
|
"loss": 0.1339, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.27944618463516235, |
|
"rewards/margins": 2.5412490367889404, |
|
"rewards/rejected": -2.820695400238037, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.218348623853211, |
|
"grad_norm": 0.9693976640701294, |
|
"learning_rate": 3.7002563046922502e-06, |
|
"logits/chosen": -0.7415852546691895, |
|
"logits/rejected": -1.9916393756866455, |
|
"logps/chosen": -319.46490478515625, |
|
"logps/rejected": -206.1062774658203, |
|
"loss": 0.1372, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.4460511803627014, |
|
"rewards/margins": 2.6280391216278076, |
|
"rewards/rejected": -3.0740902423858643, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.2330275229357799, |
|
"grad_norm": 0.6352249383926392, |
|
"learning_rate": 3.6625366824034337e-06, |
|
"logits/chosen": -0.4777001142501831, |
|
"logits/rejected": -1.8029272556304932, |
|
"logps/chosen": -270.57958984375, |
|
"logps/rejected": -244.18014526367188, |
|
"loss": 0.1086, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.3066992461681366, |
|
"rewards/margins": 3.132423162460327, |
|
"rewards/rejected": -3.439122438430786, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.2477064220183487, |
|
"grad_norm": 1.2337560653686523, |
|
"learning_rate": 3.6244763196857714e-06, |
|
"logits/chosen": -0.5872082710266113, |
|
"logits/rejected": -1.7992435693740845, |
|
"logps/chosen": -289.64788818359375, |
|
"logps/rejected": -216.86793518066406, |
|
"loss": 0.141, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.5070605874061584, |
|
"rewards/margins": 2.9018495082855225, |
|
"rewards/rejected": -3.408910036087036, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.2623853211009175, |
|
"grad_norm": 0.9028024673461914, |
|
"learning_rate": 3.5860863720619333e-06, |
|
"logits/chosen": -0.6659822463989258, |
|
"logits/rejected": -1.8413385152816772, |
|
"logps/chosen": -280.3963623046875, |
|
"logps/rejected": -216.70907592773438, |
|
"loss": 0.1117, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3429238796234131, |
|
"rewards/margins": 2.7253475189208984, |
|
"rewards/rejected": -3.0682713985443115, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.2770642201834863, |
|
"grad_norm": 1.1501646041870117, |
|
"learning_rate": 3.547378091656186e-06, |
|
"logits/chosen": -0.5201318264007568, |
|
"logits/rejected": -1.8148276805877686, |
|
"logps/chosen": -287.64886474609375, |
|
"logps/rejected": -212.76695251464844, |
|
"loss": 0.114, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.4755076467990875, |
|
"rewards/margins": 3.0462517738342285, |
|
"rewards/rejected": -3.521759033203125, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.2917431192660551, |
|
"grad_norm": 1.032481074333191, |
|
"learning_rate": 3.5083628238963913e-06, |
|
"logits/chosen": -0.7503701448440552, |
|
"logits/rejected": -1.6679210662841797, |
|
"logps/chosen": -228.3303985595703, |
|
"logps/rejected": -207.5117645263672, |
|
"loss": 0.1515, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.3945405185222626, |
|
"rewards/margins": 2.6488091945648193, |
|
"rewards/rejected": -3.0433499813079834, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.306422018348624, |
|
"grad_norm": 0.6138296723365784, |
|
"learning_rate": 3.4690520041886473e-06, |
|
"logits/chosen": -0.5367707014083862, |
|
"logits/rejected": -1.7213959693908691, |
|
"logps/chosen": -267.9410705566406, |
|
"logps/rejected": -249.54847717285156, |
|
"loss": 0.0978, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.42533883452415466, |
|
"rewards/margins": 3.054936408996582, |
|
"rewards/rejected": -3.4802753925323486, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.3211009174311927, |
|
"grad_norm": 0.6739828586578369, |
|
"learning_rate": 3.4294571545655653e-06, |
|
"logits/chosen": -0.5461620688438416, |
|
"logits/rejected": -1.905317783355713, |
|
"logps/chosen": -284.7737731933594, |
|
"logps/rejected": -219.20343017578125, |
|
"loss": 0.0795, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4114089012145996, |
|
"rewards/margins": 3.219048023223877, |
|
"rewards/rejected": -3.6304564476013184, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.3357798165137615, |
|
"grad_norm": 0.8482432961463928, |
|
"learning_rate": 3.38958988030915e-06, |
|
"logits/chosen": -0.8901345729827881, |
|
"logits/rejected": -1.8333852291107178, |
|
"logps/chosen": -267.26361083984375, |
|
"logps/rejected": -262.9141845703125, |
|
"loss": 0.1353, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.4545396566390991, |
|
"rewards/margins": 3.137350082397461, |
|
"rewards/rejected": -3.5918896198272705, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.3504587155963304, |
|
"grad_norm": 1.3089014291763306, |
|
"learning_rate": 3.3494618665492833e-06, |
|
"logits/chosen": -0.755707859992981, |
|
"logits/rejected": -1.7024749517440796, |
|
"logps/chosen": -250.79086303710938, |
|
"logps/rejected": -227.2308349609375, |
|
"loss": 0.1522, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.7349607348442078, |
|
"rewards/margins": 2.6569156646728516, |
|
"rewards/rejected": -3.391876459121704, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.3651376146788992, |
|
"grad_norm": 1.3347550630569458, |
|
"learning_rate": 3.3090848748388042e-06, |
|
"logits/chosen": -0.6413676738739014, |
|
"logits/rejected": -1.825758457183838, |
|
"logps/chosen": -347.5032653808594, |
|
"logps/rejected": -237.12234497070312, |
|
"loss": 0.1274, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.6479396820068359, |
|
"rewards/margins": 3.553611993789673, |
|
"rewards/rejected": -4.201551914215088, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.379816513761468, |
|
"grad_norm": 1.3518805503845215, |
|
"learning_rate": 3.2684707397061887e-06, |
|
"logits/chosen": -0.6914655566215515, |
|
"logits/rejected": -1.7866692543029785, |
|
"logps/chosen": -285.770751953125, |
|
"logps/rejected": -212.2018585205078, |
|
"loss": 0.1099, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.5680760741233826, |
|
"rewards/margins": 3.106520652770996, |
|
"rewards/rejected": -3.6745967864990234, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.3944954128440368, |
|
"grad_norm": 0.8207036256790161, |
|
"learning_rate": 3.2276313651868364e-06, |
|
"logits/chosen": -0.6406370997428894, |
|
"logits/rejected": -1.84329354763031, |
|
"logps/chosen": -293.601806640625, |
|
"logps/rejected": -199.5387420654297, |
|
"loss": 0.1137, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.6545761823654175, |
|
"rewards/margins": 2.952256917953491, |
|
"rewards/rejected": -3.606832981109619, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.4091743119266056, |
|
"grad_norm": 0.5552340149879456, |
|
"learning_rate": 3.1865787213339926e-06, |
|
"logits/chosen": -0.6593654155731201, |
|
"logits/rejected": -1.8798208236694336, |
|
"logps/chosen": -274.5489501953125, |
|
"logps/rejected": -228.21701049804688, |
|
"loss": 0.1071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5877947211265564, |
|
"rewards/margins": 3.3616068363189697, |
|
"rewards/rejected": -3.94940185546875, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.4238532110091744, |
|
"grad_norm": 1.2562384605407715, |
|
"learning_rate": 3.1453248407103156e-06, |
|
"logits/chosen": -0.7084455490112305, |
|
"logits/rejected": -1.8440113067626953, |
|
"logps/chosen": -284.3964538574219, |
|
"logps/rejected": -209.3497314453125, |
|
"loss": 0.1227, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.7787793278694153, |
|
"rewards/margins": 2.995060443878174, |
|
"rewards/rejected": -3.7738397121429443, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.4385321100917432, |
|
"grad_norm": 1.6725304126739502, |
|
"learning_rate": 3.1038818148611178e-06, |
|
"logits/chosen": -0.6947388648986816, |
|
"logits/rejected": -1.848347544670105, |
|
"logps/chosen": -306.3591003417969, |
|
"logps/rejected": -223.3616943359375, |
|
"loss": 0.135, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.6587986946105957, |
|
"rewards/margins": 3.338867664337158, |
|
"rewards/rejected": -3.997666120529175, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.453211009174312, |
|
"grad_norm": 0.9124309420585632, |
|
"learning_rate": 3.062261790770331e-06, |
|
"logits/chosen": -0.5535175800323486, |
|
"logits/rejected": -1.734500527381897, |
|
"logps/chosen": -254.01608276367188, |
|
"logps/rejected": -220.9300994873047, |
|
"loss": 0.1372, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.5250685214996338, |
|
"rewards/margins": 3.1529276371002197, |
|
"rewards/rejected": -3.6779963970184326, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.4678899082568808, |
|
"grad_norm": 0.8558027744293213, |
|
"learning_rate": 3.0204769673002123e-06, |
|
"logits/chosen": -0.6569163203239441, |
|
"logits/rejected": -1.8613836765289307, |
|
"logps/chosen": -328.30047607421875, |
|
"logps/rejected": -240.08651733398438, |
|
"loss": 0.1041, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.5923442840576172, |
|
"rewards/margins": 3.3542637825012207, |
|
"rewards/rejected": -3.946608543395996, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.4825688073394496, |
|
"grad_norm": 0.998695969581604, |
|
"learning_rate": 2.978539591615848e-06, |
|
"logits/chosen": -0.7506656050682068, |
|
"logits/rejected": -1.6931097507476807, |
|
"logps/chosen": -295.73895263671875, |
|
"logps/rejected": -241.44549560546875, |
|
"loss": 0.0991, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8281759023666382, |
|
"rewards/margins": 3.4153995513916016, |
|
"rewards/rejected": -4.243575572967529, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.4972477064220184, |
|
"grad_norm": 1.2060972452163696, |
|
"learning_rate": 2.936461955595501e-06, |
|
"logits/chosen": -0.7403116226196289, |
|
"logits/rejected": -1.9083077907562256, |
|
"logps/chosen": -292.3081970214844, |
|
"logps/rejected": -228.6778106689453, |
|
"loss": 0.1185, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.5762312412261963, |
|
"rewards/margins": 3.2148995399475098, |
|
"rewards/rejected": -3.7911314964294434, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.5119266055045872, |
|
"grad_norm": 0.8591629862785339, |
|
"learning_rate": 2.8942563922278487e-06, |
|
"logits/chosen": -0.732769787311554, |
|
"logits/rejected": -1.8991410732269287, |
|
"logps/chosen": -282.7076721191406, |
|
"logps/rejected": -240.99298095703125, |
|
"loss": 0.0849, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6992833018302917, |
|
"rewards/margins": 3.560328722000122, |
|
"rewards/rejected": -4.259612083435059, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.526605504587156, |
|
"grad_norm": 2.0930187702178955, |
|
"learning_rate": 2.8519352719971783e-06, |
|
"logits/chosen": -0.7622525691986084, |
|
"logits/rejected": -1.8798342943191528, |
|
"logps/chosen": -311.9097900390625, |
|
"logps/rejected": -244.85189819335938, |
|
"loss": 0.1431, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.6880066394805908, |
|
"rewards/margins": 3.4552805423736572, |
|
"rewards/rejected": -4.143287181854248, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.5412844036697249, |
|
"grad_norm": 0.9879018068313599, |
|
"learning_rate": 2.8095109992575824e-06, |
|
"logits/chosen": -0.6440776586532593, |
|
"logits/rejected": -1.8101108074188232, |
|
"logps/chosen": -319.416748046875, |
|
"logps/rejected": -243.45770263671875, |
|
"loss": 0.0775, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.561879575252533, |
|
"rewards/margins": 3.422084093093872, |
|
"rewards/rejected": -3.98396372795105, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.5559633027522937, |
|
"grad_norm": 0.8792589902877808, |
|
"learning_rate": 2.7669960085972407e-06, |
|
"logits/chosen": -0.6799444556236267, |
|
"logits/rejected": -1.9857302904129028, |
|
"logps/chosen": -347.2530212402344, |
|
"logps/rejected": -264.5433044433594, |
|
"loss": 0.0919, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.8007718324661255, |
|
"rewards/margins": 3.4677510261535645, |
|
"rewards/rejected": -4.2685227394104, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.5706422018348625, |
|
"grad_norm": 0.6059939861297607, |
|
"learning_rate": 2.7244027611938247e-06, |
|
"logits/chosen": -0.48384833335876465, |
|
"logits/rejected": -1.7149556875228882, |
|
"logps/chosen": -247.654296875, |
|
"logps/rejected": -265.72039794921875, |
|
"loss": 0.0837, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6536756157875061, |
|
"rewards/margins": 3.7947700023651123, |
|
"rewards/rejected": -4.4484453201293945, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.5853211009174313, |
|
"grad_norm": 1.9957736730575562, |
|
"learning_rate": 2.6817437411621194e-06, |
|
"logits/chosen": -0.6645992994308472, |
|
"logits/rejected": -1.8484892845153809, |
|
"logps/chosen": -338.16943359375, |
|
"logps/rejected": -281.8222351074219, |
|
"loss": 0.1234, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.8013834953308105, |
|
"rewards/margins": 3.5191164016723633, |
|
"rewards/rejected": -4.320499897003174, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 1.6058154106140137, |
|
"learning_rate": 2.639031451894923e-06, |
|
"logits/chosen": -0.7305362224578857, |
|
"logits/rejected": -1.720422625541687, |
|
"logps/chosen": -322.87603759765625, |
|
"logps/rejected": -265.8577880859375, |
|
"loss": 0.0921, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.6010589599609375, |
|
"rewards/margins": 3.54419207572937, |
|
"rewards/rejected": -4.1452507972717285, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.614678899082569, |
|
"grad_norm": 1.0787535905838013, |
|
"learning_rate": 2.5962784123982843e-06, |
|
"logits/chosen": -0.815077006816864, |
|
"logits/rejected": -1.973067283630371, |
|
"logps/chosen": -302.24835205078125, |
|
"logps/rejected": -244.04307556152344, |
|
"loss": 0.1152, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.7761982083320618, |
|
"rewards/margins": 3.647855520248413, |
|
"rewards/rejected": -4.42405366897583, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.6293577981651377, |
|
"grad_norm": 0.6867660880088806, |
|
"learning_rate": 2.5534971536221804e-06, |
|
"logits/chosen": -0.5493606925010681, |
|
"logits/rejected": -1.746401071548462, |
|
"logps/chosen": -265.7420349121094, |
|
"logps/rejected": -235.35208129882812, |
|
"loss": 0.0729, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7543711066246033, |
|
"rewards/margins": 3.5390028953552246, |
|
"rewards/rejected": -4.2933735847473145, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.6440366972477065, |
|
"grad_norm": 0.44982749223709106, |
|
"learning_rate": 2.5107002147876814e-06, |
|
"logits/chosen": -0.6843823194503784, |
|
"logits/rejected": -1.5824543237686157, |
|
"logps/chosen": -257.94091796875, |
|
"logps/rejected": -254.91258239746094, |
|
"loss": 0.0712, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6873564124107361, |
|
"rewards/margins": 3.931499481201172, |
|
"rewards/rejected": -4.618855953216553, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.6587155963302753, |
|
"grad_norm": 1.7579375505447388, |
|
"learning_rate": 2.467900139711693e-06, |
|
"logits/chosen": -0.7247440814971924, |
|
"logits/rejected": -1.7569100856781006, |
|
"logps/chosen": -272.9012145996094, |
|
"logps/rejected": -242.78591918945312, |
|
"loss": 0.1394, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.8232018947601318, |
|
"rewards/margins": 3.6535580158233643, |
|
"rewards/rejected": -4.476759910583496, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.6733944954128441, |
|
"grad_norm": 0.9240662455558777, |
|
"learning_rate": 2.4251094731303586e-06, |
|
"logits/chosen": -0.641674280166626, |
|
"logits/rejected": -1.8788131475448608, |
|
"logps/chosen": -286.55889892578125, |
|
"logps/rejected": -223.2461395263672, |
|
"loss": 0.0863, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.6447327136993408, |
|
"rewards/margins": 3.462599754333496, |
|
"rewards/rejected": -4.107332229614258, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.688073394495413, |
|
"grad_norm": 0.6030136346817017, |
|
"learning_rate": 2.3823407570221812e-06, |
|
"logits/chosen": -0.5796603560447693, |
|
"logits/rejected": -1.8217395544052124, |
|
"logps/chosen": -295.2866516113281, |
|
"logps/rejected": -220.7737274169922, |
|
"loss": 0.0741, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6853188276290894, |
|
"rewards/margins": 3.6318960189819336, |
|
"rewards/rejected": -4.3172149658203125, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.7027522935779817, |
|
"grad_norm": 0.568932294845581, |
|
"learning_rate": 2.3396065269319655e-06, |
|
"logits/chosen": -0.676604151725769, |
|
"logits/rejected": -1.9570648670196533, |
|
"logps/chosen": -296.9806823730469, |
|
"logps/rejected": -219.16986083984375, |
|
"loss": 0.0842, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.6757611036300659, |
|
"rewards/margins": 3.6589701175689697, |
|
"rewards/rejected": -4.334731101989746, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.7174311926605506, |
|
"grad_norm": 0.6324030756950378, |
|
"learning_rate": 2.2969193082966353e-06, |
|
"logits/chosen": -0.5455877780914307, |
|
"logits/rejected": -1.8318365812301636, |
|
"logps/chosen": -281.55133056640625, |
|
"logps/rejected": -238.96044921875, |
|
"loss": 0.0526, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8087357878684998, |
|
"rewards/margins": 3.9599123001098633, |
|
"rewards/rejected": -4.76864767074585, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.7321100917431194, |
|
"grad_norm": 0.5559871196746826, |
|
"learning_rate": 2.2542916127740194e-06, |
|
"logits/chosen": -0.5671895146369934, |
|
"logits/rejected": -1.5007973909378052, |
|
"logps/chosen": -308.0108642578125, |
|
"logps/rejected": -282.2166748046875, |
|
"loss": 0.0559, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6229729652404785, |
|
"rewards/margins": 4.207201957702637, |
|
"rewards/rejected": -4.830175399780273, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.7467889908256882, |
|
"grad_norm": 0.7297521233558655, |
|
"learning_rate": 2.211735934575674e-06, |
|
"logits/chosen": -0.5655158162117004, |
|
"logits/rejected": -1.8047404289245605, |
|
"logps/chosen": -280.84136962890625, |
|
"logps/rejected": -210.9255828857422, |
|
"loss": 0.117, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.9368782043457031, |
|
"rewards/margins": 3.448589324951172, |
|
"rewards/rejected": -4.385467529296875, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.761467889908257, |
|
"grad_norm": 0.5144051909446716, |
|
"learning_rate": 2.1692647468048235e-06, |
|
"logits/chosen": -0.8172545433044434, |
|
"logits/rejected": -1.7720967531204224, |
|
"logps/chosen": -303.94207763671875, |
|
"logps/rejected": -259.8736572265625, |
|
"loss": 0.0538, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8221498727798462, |
|
"rewards/margins": 4.422685623168945, |
|
"rewards/rejected": -5.244835376739502, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.7761467889908258, |
|
"grad_norm": 0.9769014120101929, |
|
"learning_rate": 2.126890497800477e-06, |
|
"logits/chosen": -0.7060015797615051, |
|
"logits/rejected": -1.8099745512008667, |
|
"logps/chosen": -294.390869140625, |
|
"logps/rejected": -243.9739227294922, |
|
"loss": 0.116, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8548272848129272, |
|
"rewards/margins": 3.3701939582824707, |
|
"rewards/rejected": -4.2250213623046875, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.7908256880733946, |
|
"grad_norm": 0.70880126953125, |
|
"learning_rate": 2.084625607488816e-06, |
|
"logits/chosen": -0.597684919834137, |
|
"logits/rejected": -1.7976946830749512, |
|
"logps/chosen": -271.812255859375, |
|
"logps/rejected": -240.36590576171875, |
|
"loss": 0.0667, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7783107757568359, |
|
"rewards/margins": 4.291589260101318, |
|
"rewards/rejected": -5.0699005126953125, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.8055045871559634, |
|
"grad_norm": 0.4248654842376709, |
|
"learning_rate": 2.0424824637428995e-06, |
|
"logits/chosen": -0.5851184725761414, |
|
"logits/rejected": -1.9045813083648682, |
|
"logps/chosen": -265.13494873046875, |
|
"logps/rejected": -221.10482788085938, |
|
"loss": 0.053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8591616749763489, |
|
"rewards/margins": 4.024895668029785, |
|
"rewards/rejected": -4.88405704498291, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.8201834862385322, |
|
"grad_norm": 0.7956784963607788, |
|
"learning_rate": 2.0004734187517744e-06, |
|
"logits/chosen": -0.7580417394638062, |
|
"logits/rejected": -1.747441053390503, |
|
"logps/chosen": -317.2771301269531, |
|
"logps/rejected": -223.77203369140625, |
|
"loss": 0.0986, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.8614228963851929, |
|
"rewards/margins": 3.6957850456237793, |
|
"rewards/rejected": -4.557208061218262, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.834862385321101, |
|
"grad_norm": 0.5855574607849121, |
|
"learning_rate": 1.9586107854000327e-06, |
|
"logits/chosen": -0.8148155808448792, |
|
"logits/rejected": -1.9937602281570435, |
|
"logps/chosen": -292.9911804199219, |
|
"logps/rejected": -214.19070434570312, |
|
"loss": 0.0871, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.7950844764709473, |
|
"rewards/margins": 3.577152729034424, |
|
"rewards/rejected": -4.372237205505371, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.8495412844036698, |
|
"grad_norm": 0.8180505037307739, |
|
"learning_rate": 1.916906833658899e-06, |
|
"logits/chosen": -0.6161235570907593, |
|
"logits/rejected": -1.8707021474838257, |
|
"logps/chosen": -320.8139953613281, |
|
"logps/rejected": -268.6239929199219, |
|
"loss": 0.0714, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.8487557768821716, |
|
"rewards/margins": 4.137137413024902, |
|
"rewards/rejected": -4.985893249511719, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.8642201834862386, |
|
"grad_norm": 0.4743252992630005, |
|
"learning_rate": 1.8753737869898921e-06, |
|
"logits/chosen": -0.6861028671264648, |
|
"logits/rejected": -1.8028208017349243, |
|
"logps/chosen": -246.8392791748047, |
|
"logps/rejected": -235.8388214111328, |
|
"loss": 0.051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8036968111991882, |
|
"rewards/margins": 4.426541805267334, |
|
"rewards/rejected": -5.230238437652588, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.8788990825688074, |
|
"grad_norm": 0.49114933609962463, |
|
"learning_rate": 1.8340238187621185e-06, |
|
"logits/chosen": -0.5393966436386108, |
|
"logits/rejected": -1.8631210327148438, |
|
"logps/chosen": -259.17840576171875, |
|
"logps/rejected": -220.71517944335938, |
|
"loss": 0.1047, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.7826576232910156, |
|
"rewards/margins": 3.6857450008392334, |
|
"rewards/rejected": -4.468402862548828, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.8935779816513763, |
|
"grad_norm": 0.9561637043952942, |
|
"learning_rate": 1.7928690486842438e-06, |
|
"logits/chosen": -0.6505446434020996, |
|
"logits/rejected": -1.810225248336792, |
|
"logps/chosen": -251.15536499023438, |
|
"logps/rejected": -207.4978790283203, |
|
"loss": 0.0772, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8502838611602783, |
|
"rewards/margins": 3.7762393951416016, |
|
"rewards/rejected": -4.626523017883301, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.908256880733945, |
|
"grad_norm": 1.0296114683151245, |
|
"learning_rate": 1.7519215392522026e-06, |
|
"logits/chosen": -0.7036296725273132, |
|
"logits/rejected": -1.9768420457839966, |
|
"logps/chosen": -280.53668212890625, |
|
"logps/rejected": -218.00341796875, |
|
"loss": 0.0681, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.8542452454566956, |
|
"rewards/margins": 4.108546733856201, |
|
"rewards/rejected": -4.96279239654541, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.9229357798165139, |
|
"grad_norm": 0.5742794275283813, |
|
"learning_rate": 1.7111932922136715e-06, |
|
"logits/chosen": -0.7176038026809692, |
|
"logits/rejected": -1.6738024950027466, |
|
"logps/chosen": -250.91835021972656, |
|
"logps/rejected": -257.7945556640625, |
|
"loss": 0.062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8737991452217102, |
|
"rewards/margins": 4.347530841827393, |
|
"rewards/rejected": -5.221330165863037, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.9376146788990827, |
|
"grad_norm": 0.4386001229286194, |
|
"learning_rate": 1.6706962450503408e-06, |
|
"logits/chosen": -0.5108492374420166, |
|
"logits/rejected": -1.8909492492675781, |
|
"logps/chosen": -278.94171142578125, |
|
"logps/rejected": -244.6939697265625, |
|
"loss": 0.0459, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7861737608909607, |
|
"rewards/margins": 4.580504417419434, |
|
"rewards/rejected": -5.366678714752197, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.9522935779816515, |
|
"grad_norm": 0.8080554604530334, |
|
"learning_rate": 1.630442267479034e-06, |
|
"logits/chosen": -0.4731358587741852, |
|
"logits/rejected": -1.8190574645996094, |
|
"logps/chosen": -261.9861145019531, |
|
"logps/rejected": -251.31875610351562, |
|
"loss": 0.061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7401555180549622, |
|
"rewards/margins": 4.329522609710693, |
|
"rewards/rejected": -5.06967830657959, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.9669724770642203, |
|
"grad_norm": 0.4730006754398346, |
|
"learning_rate": 1.5904431579726837e-06, |
|
"logits/chosen": -0.6266091465950012, |
|
"logits/rejected": -1.9366010427474976, |
|
"logps/chosen": -292.2561340332031, |
|
"logps/rejected": -217.6514129638672, |
|
"loss": 0.0583, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8204957842826843, |
|
"rewards/margins": 4.180571556091309, |
|
"rewards/rejected": -5.0010666847229, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.981651376146789, |
|
"grad_norm": 0.9025997519493103, |
|
"learning_rate": 1.5507106403021897e-06, |
|
"logits/chosen": -0.603181004524231, |
|
"logits/rejected": -1.930855393409729, |
|
"logps/chosen": -323.4551696777344, |
|
"logps/rejected": -252.92965698242188, |
|
"loss": 0.074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6537671685218811, |
|
"rewards/margins": 4.156303405761719, |
|
"rewards/rejected": -4.810070991516113, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.996330275229358, |
|
"grad_norm": 0.5256826877593994, |
|
"learning_rate": 1.511256360100171e-06, |
|
"logits/chosen": -0.5993788242340088, |
|
"logits/rejected": -1.7707700729370117, |
|
"logps/chosen": -290.8062744140625, |
|
"logps/rejected": -243.2689971923828, |
|
"loss": 0.0755, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.7740217447280884, |
|
"rewards/margins": 4.305649757385254, |
|
"rewards/rejected": -5.079671382904053, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 2.0110091743119267, |
|
"grad_norm": 0.8649439215660095, |
|
"learning_rate": 1.4720918814476234e-06, |
|
"logits/chosen": -0.8103897571563721, |
|
"logits/rejected": -1.9931031465530396, |
|
"logps/chosen": -253.79019165039062, |
|
"logps/rejected": -234.64596557617188, |
|
"loss": 0.0833, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.7869517207145691, |
|
"rewards/margins": 4.74979829788208, |
|
"rewards/rejected": -5.536749839782715, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 2.0256880733944955, |
|
"grad_norm": 1.317905068397522, |
|
"learning_rate": 1.4332286834844792e-06, |
|
"logits/chosen": -0.7999382019042969, |
|
"logits/rejected": -1.9115333557128906, |
|
"logps/chosen": -285.6707458496094, |
|
"logps/rejected": -239.1488037109375, |
|
"loss": 0.0923, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.9859182834625244, |
|
"rewards/margins": 4.015196800231934, |
|
"rewards/rejected": -5.001114845275879, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 2.0403669724770643, |
|
"grad_norm": 0.6509101986885071, |
|
"learning_rate": 1.3946781570450563e-06, |
|
"logits/chosen": -0.701880156993866, |
|
"logits/rejected": -1.9084625244140625, |
|
"logps/chosen": -299.8097839355469, |
|
"logps/rejected": -249.23460388183594, |
|
"loss": 0.0472, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8493328094482422, |
|
"rewards/margins": 4.347517490386963, |
|
"rewards/rejected": -5.196850299835205, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 2.055045871559633, |
|
"grad_norm": 0.4122222065925598, |
|
"learning_rate": 1.3564516013194023e-06, |
|
"logits/chosen": -0.4526348114013672, |
|
"logits/rejected": -1.7306469678878784, |
|
"logps/chosen": -264.879150390625, |
|
"logps/rejected": -240.08486938476562, |
|
"loss": 0.0461, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8477758169174194, |
|
"rewards/margins": 4.5718865394592285, |
|
"rewards/rejected": -5.4196624755859375, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.069724770642202, |
|
"grad_norm": 0.6205916404724121, |
|
"learning_rate": 1.3185602205414894e-06, |
|
"logits/chosen": -0.7086562514305115, |
|
"logits/rejected": -1.7690290212631226, |
|
"logps/chosen": -270.14947509765625, |
|
"logps/rejected": -225.78253173828125, |
|
"loss": 0.0774, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.8577483892440796, |
|
"rewards/margins": 4.2042083740234375, |
|
"rewards/rejected": -5.061956882476807, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 2.0844036697247708, |
|
"grad_norm": 0.44248032569885254, |
|
"learning_rate": 1.2810151207052465e-06, |
|
"logits/chosen": -0.7329645156860352, |
|
"logits/rejected": -1.7238507270812988, |
|
"logps/chosen": -336.89178466796875, |
|
"logps/rejected": -278.45166015625, |
|
"loss": 0.0692, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1881827116012573, |
|
"rewards/margins": 4.469137191772461, |
|
"rewards/rejected": -5.657320022583008, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 2.0990825688073396, |
|
"grad_norm": 0.5951194763183594, |
|
"learning_rate": 1.2438273063093811e-06, |
|
"logits/chosen": -0.5224804282188416, |
|
"logits/rejected": -1.7726545333862305, |
|
"logps/chosen": -276.5761413574219, |
|
"logps/rejected": -220.84750366210938, |
|
"loss": 0.0851, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -1.0991380214691162, |
|
"rewards/margins": 3.8999533653259277, |
|
"rewards/rejected": -4.999091148376465, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 2.1137614678899084, |
|
"grad_norm": 1.2658673524856567, |
|
"learning_rate": 1.2070076771319536e-06, |
|
"logits/chosen": -0.8442491292953491, |
|
"logits/rejected": -1.6956205368041992, |
|
"logps/chosen": -353.8062744140625, |
|
"logps/rejected": -247.41055297851562, |
|
"loss": 0.1044, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.956983208656311, |
|
"rewards/margins": 3.6315712928771973, |
|
"rewards/rejected": -4.588554859161377, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 2.128440366972477, |
|
"grad_norm": 0.564155101776123, |
|
"learning_rate": 1.1705670250356417e-06, |
|
"logits/chosen": -0.5238651633262634, |
|
"logits/rejected": -1.782932996749878, |
|
"logps/chosen": -307.71112060546875, |
|
"logps/rejected": -247.0592803955078, |
|
"loss": 0.0566, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6434872150421143, |
|
"rewards/margins": 4.430055618286133, |
|
"rewards/rejected": -5.073543548583984, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.143119266055046, |
|
"grad_norm": 0.506363570690155, |
|
"learning_rate": 1.1345160308046413e-06, |
|
"logits/chosen": -0.6401405334472656, |
|
"logits/rejected": -2.098745822906494, |
|
"logps/chosen": -381.30963134765625, |
|
"logps/rejected": -261.1710205078125, |
|
"loss": 0.0635, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -1.0125696659088135, |
|
"rewards/margins": 4.525051593780518, |
|
"rewards/rejected": -5.537620544433594, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 2.157798165137615, |
|
"grad_norm": 0.584295392036438, |
|
"learning_rate": 1.0988652610141154e-06, |
|
"logits/chosen": -0.6318463683128357, |
|
"logits/rejected": -1.676975965499878, |
|
"logps/chosen": -274.4164733886719, |
|
"logps/rejected": -269.0091247558594, |
|
"loss": 0.06, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8393394947052002, |
|
"rewards/margins": 4.4933929443359375, |
|
"rewards/rejected": -5.3327317237854, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 2.1724770642201836, |
|
"grad_norm": 0.2857895791530609, |
|
"learning_rate": 1.063625164933124e-06, |
|
"logits/chosen": -0.6415454149246216, |
|
"logits/rejected": -1.8679744005203247, |
|
"logps/chosen": -327.9555969238281, |
|
"logps/rejected": -269.9202880859375, |
|
"loss": 0.0498, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.8799739480018616, |
|
"rewards/margins": 5.01440954208374, |
|
"rewards/rejected": -5.894383907318115, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 2.1871559633027524, |
|
"grad_norm": 0.7591383457183838, |
|
"learning_rate": 1.0288060714619359e-06, |
|
"logits/chosen": -0.8029307126998901, |
|
"logits/rejected": -1.9948023557662964, |
|
"logps/chosen": -312.4205627441406, |
|
"logps/rejected": -219.84292602539062, |
|
"loss": 0.0694, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8505744338035583, |
|
"rewards/margins": 4.322221279144287, |
|
"rewards/rejected": -5.172795295715332, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 2.2018348623853212, |
|
"grad_norm": 0.6418690085411072, |
|
"learning_rate": 9.944181861046188e-07, |
|
"logits/chosen": -0.6180175542831421, |
|
"logits/rejected": -1.778718113899231, |
|
"logps/chosen": -333.8564758300781, |
|
"logps/rejected": -259.21697998046875, |
|
"loss": 0.1021, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -1.1542073488235474, |
|
"rewards/margins": 4.588565826416016, |
|
"rewards/rejected": -5.742773056030273, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.21651376146789, |
|
"grad_norm": 0.8044218420982361, |
|
"learning_rate": 9.604715879777986e-07, |
|
"logits/chosen": -0.6669445037841797, |
|
"logits/rejected": -1.950725793838501, |
|
"logps/chosen": -277.66876220703125, |
|
"logps/rejected": -205.39151000976562, |
|
"loss": 0.076, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0598458051681519, |
|
"rewards/margins": 4.117983341217041, |
|
"rewards/rejected": -5.177828788757324, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 2.231192660550459, |
|
"grad_norm": 0.6962106823921204, |
|
"learning_rate": 9.269762268564616e-07, |
|
"logits/chosen": -0.7753230929374695, |
|
"logits/rejected": -1.840043306350708, |
|
"logps/chosen": -252.92071533203125, |
|
"logps/rejected": -211.79078674316406, |
|
"loss": 0.0652, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7847457528114319, |
|
"rewards/margins": 4.182995319366455, |
|
"rewards/rejected": -4.967741012573242, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 2.2458715596330276, |
|
"grad_norm": 0.66120445728302, |
|
"learning_rate": 8.939419202576694e-07, |
|
"logits/chosen": -0.4923960864543915, |
|
"logits/rejected": -1.4844017028808594, |
|
"logps/chosen": -257.7115173339844, |
|
"logps/rejected": -228.1881561279297, |
|
"loss": 0.0903, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.871938943862915, |
|
"rewards/margins": 3.5912954807281494, |
|
"rewards/rejected": -4.4632344245910645, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 2.2605504587155965, |
|
"grad_norm": 0.7900298237800598, |
|
"learning_rate": 8.61378350563033e-07, |
|
"logits/chosen": -0.626375675201416, |
|
"logits/rejected": -1.8259055614471436, |
|
"logps/chosen": -248.50527954101562, |
|
"logps/rejected": -247.7064208984375, |
|
"loss": 0.0611, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8818211555480957, |
|
"rewards/margins": 4.562689781188965, |
|
"rewards/rejected": -5.4445109367370605, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.2752293577981653, |
|
"grad_norm": 0.6607710123062134, |
|
"learning_rate": 8.292950621808022e-07, |
|
"logits/chosen": -0.7079221606254578, |
|
"logits/rejected": -1.9055808782577515, |
|
"logps/chosen": -281.2016296386719, |
|
"logps/rejected": -247.4744415283203, |
|
"loss": 0.0573, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8795925378799438, |
|
"rewards/margins": 4.619358539581299, |
|
"rewards/rejected": -5.498951435089111, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.289908256880734, |
|
"grad_norm": 0.6514431238174438, |
|
"learning_rate": 7.977014587483925e-07, |
|
"logits/chosen": -0.7440314888954163, |
|
"logits/rejected": -1.7653181552886963, |
|
"logps/chosen": -271.65496826171875, |
|
"logps/rejected": -283.68035888671875, |
|
"loss": 0.0607, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.8735625147819519, |
|
"rewards/margins": 4.6589860916137695, |
|
"rewards/rejected": -5.532548904418945, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 2.304587155963303, |
|
"grad_norm": 0.9269477128982544, |
|
"learning_rate": 7.666068003761684e-07, |
|
"logits/chosen": -0.6487716436386108, |
|
"logits/rejected": -1.9153821468353271, |
|
"logps/chosen": -295.9359436035156, |
|
"logps/rejected": -225.51235961914062, |
|
"loss": 0.06, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.9806410670280457, |
|
"rewards/margins": 4.522603988647461, |
|
"rewards/rejected": -5.503244876861572, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 2.3192660550458717, |
|
"grad_norm": 0.8910574316978455, |
|
"learning_rate": 7.360202009332993e-07, |
|
"logits/chosen": -0.7844710350036621, |
|
"logits/rejected": -1.9116116762161255, |
|
"logps/chosen": -295.5791015625, |
|
"logps/rejected": -242.4950714111328, |
|
"loss": 0.0489, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8433384299278259, |
|
"rewards/margins": 4.688782691955566, |
|
"rewards/rejected": -5.532121658325195, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 2.3339449541284405, |
|
"grad_norm": 0.6180950403213501, |
|
"learning_rate": 7.059506253764773e-07, |
|
"logits/chosen": -0.6881747245788574, |
|
"logits/rejected": -1.776421070098877, |
|
"logps/chosen": -311.3552551269531, |
|
"logps/rejected": -251.24072265625, |
|
"loss": 0.0501, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9155367016792297, |
|
"rewards/margins": 4.762095928192139, |
|
"rewards/rejected": -5.6776323318481445, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 2.3486238532110093, |
|
"grad_norm": 0.5022120475769043, |
|
"learning_rate": 6.764068871222825e-07, |
|
"logits/chosen": -0.46809208393096924, |
|
"logits/rejected": -1.7341670989990234, |
|
"logps/chosen": -285.6492919921875, |
|
"logps/rejected": -240.606689453125, |
|
"loss": 0.0592, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8136368989944458, |
|
"rewards/margins": 4.417351722717285, |
|
"rewards/rejected": -5.230988025665283, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.363302752293578, |
|
"grad_norm": 0.5374433994293213, |
|
"learning_rate": 6.473976454639608e-07, |
|
"logits/chosen": -0.6733486652374268, |
|
"logits/rejected": -1.9388556480407715, |
|
"logps/chosen": -291.48248291015625, |
|
"logps/rejected": -225.9014129638672, |
|
"loss": 0.055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9106884598731995, |
|
"rewards/margins": 4.666688442230225, |
|
"rewards/rejected": -5.577376842498779, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.377981651376147, |
|
"grad_norm": 0.9357534646987915, |
|
"learning_rate": 6.189314030333796e-07, |
|
"logits/chosen": -0.49531441926956177, |
|
"logits/rejected": -1.6569920778274536, |
|
"logps/chosen": -278.2015686035156, |
|
"logps/rejected": -277.6153564453125, |
|
"loss": 0.0458, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8881136178970337, |
|
"rewards/margins": 4.91811466217041, |
|
"rewards/rejected": -5.806228160858154, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 2.3926605504587157, |
|
"grad_norm": 1.5413628816604614, |
|
"learning_rate": 5.910165033089e-07, |
|
"logits/chosen": -0.5966196656227112, |
|
"logits/rejected": -1.830047845840454, |
|
"logps/chosen": -315.2392883300781, |
|
"logps/rejected": -256.3215026855469, |
|
"loss": 0.058, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.8759629726409912, |
|
"rewards/margins": 4.466618537902832, |
|
"rewards/rejected": -5.342581748962402, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 2.4073394495412845, |
|
"grad_norm": 0.5090343356132507, |
|
"learning_rate": 5.636611281698956e-07, |
|
"logits/chosen": -0.641990602016449, |
|
"logits/rejected": -1.7657126188278198, |
|
"logps/chosen": -262.68450927734375, |
|
"logps/rejected": -237.4891815185547, |
|
"loss": 0.055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9292994737625122, |
|
"rewards/margins": 4.346236228942871, |
|
"rewards/rejected": -5.275536060333252, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 2.4220183486238533, |
|
"grad_norm": 0.9838851094245911, |
|
"learning_rate": 5.368732954986389e-07, |
|
"logits/chosen": -0.7656599879264832, |
|
"logits/rejected": -1.8343071937561035, |
|
"logps/chosen": -278.4713134765625, |
|
"logps/rejected": -249.50767517089844, |
|
"loss": 0.0729, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9321765899658203, |
|
"rewards/margins": 4.340839385986328, |
|
"rewards/rejected": -5.27301549911499, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.436697247706422, |
|
"grad_norm": 0.7453677654266357, |
|
"learning_rate": 5.106608568302504e-07, |
|
"logits/chosen": -0.6978477239608765, |
|
"logits/rejected": -1.8032922744750977, |
|
"logps/chosen": -256.7540283203125, |
|
"logps/rejected": -254.54901123046875, |
|
"loss": 0.0686, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.9668062329292297, |
|
"rewards/margins": 4.748128890991211, |
|
"rewards/rejected": -5.714934349060059, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 2.451376146788991, |
|
"grad_norm": 0.6675239205360413, |
|
"learning_rate": 4.850314950514124e-07, |
|
"logits/chosen": -0.5538415908813477, |
|
"logits/rejected": -1.6941921710968018, |
|
"logps/chosen": -279.3875732421875, |
|
"logps/rejected": -249.05357360839844, |
|
"loss": 0.0605, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.9551371335983276, |
|
"rewards/margins": 4.742038249969482, |
|
"rewards/rejected": -5.6971755027771, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 2.4660550458715598, |
|
"grad_norm": 1.0032905340194702, |
|
"learning_rate": 4.599927221485034e-07, |
|
"logits/chosen": -0.5769209265708923, |
|
"logits/rejected": -1.8620692491531372, |
|
"logps/chosen": -276.2666931152344, |
|
"logps/rejected": -226.5665283203125, |
|
"loss": 0.0726, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.9197105169296265, |
|
"rewards/margins": 4.4358601570129395, |
|
"rewards/rejected": -5.355570316314697, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.4807339449541286, |
|
"grad_norm": 0.46133214235305786, |
|
"learning_rate": 4.3555187700583175e-07, |
|
"logits/chosen": -0.5237099528312683, |
|
"logits/rejected": -1.8421297073364258, |
|
"logps/chosen": -263.33160400390625, |
|
"logps/rejected": -245.607177734375, |
|
"loss": 0.0352, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8943026661872864, |
|
"rewards/margins": 4.7068657875061035, |
|
"rewards/rejected": -5.601168155670166, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 2.4954128440366974, |
|
"grad_norm": 0.41858726739883423, |
|
"learning_rate": 4.1171612325460244e-07, |
|
"logits/chosen": -0.6014917492866516, |
|
"logits/rejected": -1.6842682361602783, |
|
"logps/chosen": -279.3096923828125, |
|
"logps/rejected": -236.51959228515625, |
|
"loss": 0.0792, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.9955892562866211, |
|
"rewards/margins": 4.224085807800293, |
|
"rewards/rejected": -5.219675064086914, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.510091743119266, |
|
"grad_norm": 0.3274426758289337, |
|
"learning_rate": 3.8849244717325206e-07, |
|
"logits/chosen": -0.639855682849884, |
|
"logits/rejected": -1.704068899154663, |
|
"logps/chosen": -266.26715087890625, |
|
"logps/rejected": -260.8788146972656, |
|
"loss": 0.049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8671805262565613, |
|
"rewards/margins": 4.942291736602783, |
|
"rewards/rejected": -5.80947208404541, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 2.524770642201835, |
|
"grad_norm": 1.3120359182357788, |
|
"learning_rate": 3.658876556397628e-07, |
|
"logits/chosen": -0.8555523753166199, |
|
"logits/rejected": -1.9403518438339233, |
|
"logps/chosen": -254.93899536132812, |
|
"logps/rejected": -226.33721923828125, |
|
"loss": 0.0831, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -1.1139277219772339, |
|
"rewards/margins": 4.305876731872559, |
|
"rewards/rejected": -5.419804096221924, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 2.539449541284404, |
|
"grad_norm": 0.6721531748771667, |
|
"learning_rate": 3.4390837413656256e-07, |
|
"logits/chosen": -0.6973453164100647, |
|
"logits/rejected": -1.883147120475769, |
|
"logps/chosen": -277.8430480957031, |
|
"logps/rejected": -262.0320739746094, |
|
"loss": 0.0622, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.942565381526947, |
|
"rewards/margins": 4.684934139251709, |
|
"rewards/rejected": -5.627499580383301, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 2.5541284403669726, |
|
"grad_norm": 0.9806049466133118, |
|
"learning_rate": 3.225610448085903e-07, |
|
"logits/chosen": -0.6397227048873901, |
|
"logits/rejected": -1.8119735717773438, |
|
"logps/chosen": -270.50787353515625, |
|
"logps/rejected": -238.79200744628906, |
|
"loss": 0.08, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -1.1652170419692993, |
|
"rewards/margins": 4.398360252380371, |
|
"rewards/rejected": -5.563577651977539, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 2.5688073394495414, |
|
"grad_norm": 0.6500720381736755, |
|
"learning_rate": 3.018519245750989e-07, |
|
"logits/chosen": -0.6613521575927734, |
|
"logits/rejected": -1.692039132118225, |
|
"logps/chosen": -319.7649230957031, |
|
"logps/rejected": -278.594482421875, |
|
"loss": 0.0602, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8958297371864319, |
|
"rewards/margins": 4.4838547706604, |
|
"rewards/rejected": -5.3796844482421875, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.5834862385321102, |
|
"grad_norm": 1.183739423751831, |
|
"learning_rate": 2.817870832957459e-07, |
|
"logits/chosen": -0.611799418926239, |
|
"logits/rejected": -1.9181246757507324, |
|
"logps/chosen": -258.9988098144531, |
|
"logps/rejected": -234.6461639404297, |
|
"loss": 0.0814, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0336565971374512, |
|
"rewards/margins": 4.347400665283203, |
|
"rewards/rejected": -5.381057262420654, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 2.598165137614679, |
|
"grad_norm": 1.2900700569152832, |
|
"learning_rate": 2.6237240199151386e-07, |
|
"logits/chosen": -0.6546847820281982, |
|
"logits/rejected": -1.7483494281768799, |
|
"logps/chosen": -262.4722595214844, |
|
"logps/rejected": -225.11770629882812, |
|
"loss": 0.0855, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -1.0690879821777344, |
|
"rewards/margins": 4.176665782928467, |
|
"rewards/rejected": -5.245754241943359, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 2.612844036697248, |
|
"grad_norm": 0.7886202335357666, |
|
"learning_rate": 2.436135711209786e-07, |
|
"logits/chosen": -0.9137476682662964, |
|
"logits/rejected": -1.9900403022766113, |
|
"logps/chosen": -279.0731201171875, |
|
"logps/rejected": -215.946533203125, |
|
"loss": 0.0888, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -1.0406084060668945, |
|
"rewards/margins": 3.958219528198242, |
|
"rewards/rejected": -4.998827934265137, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 2.6275229357798167, |
|
"grad_norm": 0.7906404137611389, |
|
"learning_rate": 2.2551608891243026e-07, |
|
"logits/chosen": -0.9193136096000671, |
|
"logits/rejected": -1.8110696077346802, |
|
"logps/chosen": -351.8749694824219, |
|
"logps/rejected": -266.59405517578125, |
|
"loss": 0.064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1595898866653442, |
|
"rewards/margins": 4.084352493286133, |
|
"rewards/rejected": -5.243941783905029, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 2.6422018348623855, |
|
"grad_norm": 0.37772616744041443, |
|
"learning_rate": 2.0808525975233807e-07, |
|
"logits/chosen": -0.5619891881942749, |
|
"logits/rejected": -1.8202831745147705, |
|
"logps/chosen": -285.0718078613281, |
|
"logps/rejected": -255.26644897460938, |
|
"loss": 0.0537, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.134505033493042, |
|
"rewards/margins": 4.348145484924316, |
|
"rewards/rejected": -5.482650279998779, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.6568807339449543, |
|
"grad_norm": 0.7285224795341492, |
|
"learning_rate": 1.9132619263063144e-07, |
|
"logits/chosen": -0.5652111768722534, |
|
"logits/rejected": -1.8313266038894653, |
|
"logps/chosen": -340.82025146484375, |
|
"logps/rejected": -269.52020263671875, |
|
"loss": 0.0568, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.871527373790741, |
|
"rewards/margins": 4.85342264175415, |
|
"rewards/rejected": -5.724949836730957, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 2.671559633027523, |
|
"grad_norm": 0.4651119112968445, |
|
"learning_rate": 1.7524379964325155e-07, |
|
"logits/chosen": -0.6477389931678772, |
|
"logits/rejected": -1.8673548698425293, |
|
"logps/chosen": -327.2629699707031, |
|
"logps/rejected": -260.98095703125, |
|
"loss": 0.0737, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -1.1671123504638672, |
|
"rewards/margins": 4.629344940185547, |
|
"rewards/rejected": -5.796456813812256, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.686238532110092, |
|
"grad_norm": 0.469937264919281, |
|
"learning_rate": 1.5984279455240975e-07, |
|
"logits/chosen": -0.6893140077590942, |
|
"logits/rejected": -1.7950717210769653, |
|
"logps/chosen": -279.4891662597656, |
|
"logps/rejected": -248.5086669921875, |
|
"loss": 0.0408, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8348989486694336, |
|
"rewards/margins": 4.693355560302734, |
|
"rewards/rejected": -5.52825403213501, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 2.7009174311926607, |
|
"grad_norm": 0.3594943881034851, |
|
"learning_rate": 1.451276914049818e-07, |
|
"logits/chosen": -0.6432421207427979, |
|
"logits/rejected": -1.7735950946807861, |
|
"logps/chosen": -256.6174011230469, |
|
"logps/rejected": -232.81739807128906, |
|
"loss": 0.0514, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8760130405426025, |
|
"rewards/margins": 4.517703056335449, |
|
"rewards/rejected": -5.393716335296631, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 2.7155963302752295, |
|
"grad_norm": 0.8683236241340637, |
|
"learning_rate": 1.3110280320943692e-07, |
|
"logits/chosen": -0.6071776151657104, |
|
"logits/rejected": -1.878743290901184, |
|
"logps/chosen": -268.89874267578125, |
|
"logps/rejected": -227.52093505859375, |
|
"loss": 0.045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9803122282028198, |
|
"rewards/margins": 4.631489276885986, |
|
"rewards/rejected": -5.611801624298096, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.7302752293577983, |
|
"grad_norm": 0.3329331874847412, |
|
"learning_rate": 1.1777224067169218e-07, |
|
"logits/chosen": -0.6055042147636414, |
|
"logits/rejected": -1.763778805732727, |
|
"logps/chosen": -276.3477478027344, |
|
"logps/rejected": -251.5518798828125, |
|
"loss": 0.0493, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8497152924537659, |
|
"rewards/margins": 4.952919960021973, |
|
"rewards/rejected": -5.8026347160339355, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 2.744954128440367, |
|
"grad_norm": 0.6363049149513245, |
|
"learning_rate": 1.0513991099025872e-07, |
|
"logits/chosen": -0.7439496517181396, |
|
"logits/rejected": -1.894633412361145, |
|
"logps/chosen": -322.8822326660156, |
|
"logps/rejected": -242.20285034179688, |
|
"loss": 0.0739, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.9003086090087891, |
|
"rewards/margins": 3.9627842903137207, |
|
"rewards/rejected": -4.863093376159668, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 2.759633027522936, |
|
"grad_norm": 0.7462967038154602, |
|
"learning_rate": 9.320951671104194e-08, |
|
"logits/chosen": -0.6117614507675171, |
|
"logits/rejected": -1.8900599479675293, |
|
"logps/chosen": -307.9691467285156, |
|
"logps/rejected": -243.08108520507812, |
|
"loss": 0.0622, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6807048916816711, |
|
"rewards/margins": 4.540770053863525, |
|
"rewards/rejected": -5.221475124359131, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 2.7743119266055047, |
|
"grad_norm": 0.5611811876296997, |
|
"learning_rate": 8.198455464212108e-08, |
|
"logits/chosen": -0.7264785766601562, |
|
"logits/rejected": -1.8659650087356567, |
|
"logps/chosen": -290.5928955078125, |
|
"logps/rejected": -231.83981323242188, |
|
"loss": 0.0447, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7871606349945068, |
|
"rewards/margins": 4.830132484436035, |
|
"rewards/rejected": -5.617292881011963, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.7889908256880735, |
|
"grad_norm": 1.062226414680481, |
|
"learning_rate": 7.146831482883115e-08, |
|
"logits/chosen": -0.41257715225219727, |
|
"logits/rejected": -1.8329212665557861, |
|
"logps/chosen": -295.2815856933594, |
|
"logps/rejected": -231.45794677734375, |
|
"loss": 0.0673, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.9870278239250183, |
|
"rewards/margins": 4.97043514251709, |
|
"rewards/rejected": -5.957463264465332, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.8036697247706424, |
|
"grad_norm": 0.9914811253547668, |
|
"learning_rate": 6.16638795894492e-08, |
|
"logits/chosen": -0.5121580958366394, |
|
"logits/rejected": -1.6407995223999023, |
|
"logps/chosen": -260.80078125, |
|
"logps/rejected": -255.26918029785156, |
|
"loss": 0.0655, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9135465025901794, |
|
"rewards/margins": 4.523343563079834, |
|
"rewards/rejected": -5.4368896484375, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 2.818348623853211, |
|
"grad_norm": 0.7881761193275452, |
|
"learning_rate": 5.257412261176375e-08, |
|
"logits/chosen": -0.7487360835075378, |
|
"logits/rejected": -1.7608070373535156, |
|
"logps/chosen": -270.4583740234375, |
|
"logps/rejected": -244.21591186523438, |
|
"loss": 0.0516, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9595422744750977, |
|
"rewards/margins": 4.419933319091797, |
|
"rewards/rejected": -5.3794755935668945, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.83302752293578, |
|
"grad_norm": 1.0176548957824707, |
|
"learning_rate": 4.4201708110795384e-08, |
|
"logits/chosen": -0.635594367980957, |
|
"logits/rejected": -1.7197755575180054, |
|
"logps/chosen": -291.9073791503906, |
|
"logps/rejected": -257.1024169921875, |
|
"loss": 0.0643, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9439809322357178, |
|
"rewards/margins": 4.196859359741211, |
|
"rewards/rejected": -5.140840530395508, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 2.847706422018349, |
|
"grad_norm": 0.39054185152053833, |
|
"learning_rate": 3.654909004791152e-08, |
|
"logits/chosen": -0.704308807849884, |
|
"logits/rejected": -1.9551911354064941, |
|
"logps/chosen": -291.7530517578125, |
|
"logps/rejected": -240.93592834472656, |
|
"loss": 0.0604, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.8366247415542603, |
|
"rewards/margins": 4.685898780822754, |
|
"rewards/rejected": -5.522522926330566, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 2.8623853211009176, |
|
"grad_norm": 0.536412239074707, |
|
"learning_rate": 2.9618511411570462e-08, |
|
"logits/chosen": -0.7265235185623169, |
|
"logits/rejected": -1.952091097831726, |
|
"logps/chosen": -284.6298522949219, |
|
"logps/rejected": -226.05140686035156, |
|
"loss": 0.0692, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -1.0189356803894043, |
|
"rewards/margins": 4.247507572174072, |
|
"rewards/rejected": -5.266443252563477, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.8770642201834864, |
|
"grad_norm": 0.7545129060745239, |
|
"learning_rate": 2.3412003559898088e-08, |
|
"logits/chosen": -0.6549080610275269, |
|
"logits/rejected": -1.6496838331222534, |
|
"logps/chosen": -270.34552001953125, |
|
"logps/rejected": -260.4422302246094, |
|
"loss": 0.0697, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.8946044445037842, |
|
"rewards/margins": 4.255610466003418, |
|
"rewards/rejected": -5.150214672088623, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 2.891743119266055, |
|
"grad_norm": 2.283723831176758, |
|
"learning_rate": 1.793138562529634e-08, |
|
"logits/chosen": -0.6721558570861816, |
|
"logits/rejected": -1.893051266670227, |
|
"logps/chosen": -344.69683837890625, |
|
"logps/rejected": -235.408447265625, |
|
"loss": 0.0654, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.9549738764762878, |
|
"rewards/margins": 4.040391445159912, |
|
"rewards/rejected": -4.995365142822266, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 2.906422018348624, |
|
"grad_norm": 0.4293775260448456, |
|
"learning_rate": 1.317826398125277e-08, |
|
"logits/chosen": -0.7664704322814941, |
|
"logits/rejected": -1.8490607738494873, |
|
"logps/chosen": -290.996337890625, |
|
"logps/rejected": -263.40814208984375, |
|
"loss": 0.0492, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9369643926620483, |
|
"rewards/margins": 5.010738849639893, |
|
"rewards/rejected": -5.9477033615112305, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 2.921100917431193, |
|
"grad_norm": 0.5313416123390198, |
|
"learning_rate": 9.15403177151275e-09, |
|
"logits/chosen": -0.6522485017776489, |
|
"logits/rejected": -1.8088821172714233, |
|
"logps/chosen": -272.3266296386719, |
|
"logps/rejected": -275.0253601074219, |
|
"loss": 0.0367, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9389075040817261, |
|
"rewards/margins": 4.837636470794678, |
|
"rewards/rejected": -5.776544094085693, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 2.9357798165137616, |
|
"grad_norm": 0.7444632053375244, |
|
"learning_rate": 5.85986850174608e-09, |
|
"logits/chosen": -0.6238083839416504, |
|
"logits/rejected": -2.0871429443359375, |
|
"logps/chosen": -310.4813232421875, |
|
"logps/rejected": -242.80917358398438, |
|
"loss": 0.0519, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9147568345069885, |
|
"rewards/margins": 4.864949703216553, |
|
"rewards/rejected": -5.779706001281738, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.9504587155963304, |
|
"grad_norm": 1.1092686653137207, |
|
"learning_rate": 3.296739693834927e-09, |
|
"logits/chosen": -0.82442706823349, |
|
"logits/rejected": -1.7480018138885498, |
|
"logps/chosen": -306.08526611328125, |
|
"logps/rejected": -235.5576171875, |
|
"loss": 0.0999, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -1.1727603673934937, |
|
"rewards/margins": 3.904568672180176, |
|
"rewards/rejected": -5.077329158782959, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 2.9651376146788992, |
|
"grad_norm": 0.5375072360038757, |
|
"learning_rate": 1.4653966028774225e-09, |
|
"logits/chosen": -0.6952610611915588, |
|
"logits/rejected": -1.7414966821670532, |
|
"logps/chosen": -313.39166259765625, |
|
"logps/rejected": -276.1308288574219, |
|
"loss": 0.0371, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1172269582748413, |
|
"rewards/margins": 4.964841842651367, |
|
"rewards/rejected": -6.0820698738098145, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 2.979816513761468, |
|
"grad_norm": 0.6843758225440979, |
|
"learning_rate": 3.6637599699351766e-10, |
|
"logits/chosen": -0.6138722896575928, |
|
"logits/rejected": -1.8517141342163086, |
|
"logps/chosen": -289.3631591796875, |
|
"logps/rejected": -234.05596923828125, |
|
"loss": 0.0817, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0136228799819946, |
|
"rewards/margins": 4.223280906677246, |
|
"rewards/rejected": -5.236903667449951, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.994495412844037, |
|
"grad_norm": 0.888327956199646, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.7368099093437195, |
|
"logits/rejected": -1.7805346250534058, |
|
"logps/chosen": -316.90399169921875, |
|
"logps/rejected": -268.7693176269531, |
|
"loss": 0.065, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.9527171850204468, |
|
"rewards/margins": 4.548114776611328, |
|
"rewards/rejected": -5.5008320808410645, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 2.994495412844037, |
|
"step": 408, |
|
"total_flos": 7.837376281021809e+17, |
|
"train_loss": 0.23205441671113172, |
|
"train_runtime": 8090.8922, |
|
"train_samples_per_second": 1.616, |
|
"train_steps_per_second": 0.05 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 408, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.837376281021809e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|