|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9996190476190476, |
|
"eval_steps": 500, |
|
"global_step": 656, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 7.575757575757576e-08, |
|
"logits/chosen": -0.06230628490447998, |
|
"logits/rejected": 0.387611985206604, |
|
"logps/chosen": -299.96368408203125, |
|
"logps/rejected": -309.7692565917969, |
|
"loss": 0.3457, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.575757575757576e-07, |
|
"logits/chosen": 0.10603617876768112, |
|
"logits/rejected": 0.24432696402072906, |
|
"logps/chosen": -349.7105712890625, |
|
"logps/rejected": -287.6678466796875, |
|
"loss": 0.3317, |
|
"rewards/accuracies": 0.3611111044883728, |
|
"rewards/chosen": -0.00022038168390281498, |
|
"rewards/margins": -0.0001302291639149189, |
|
"rewards/rejected": -9.01525272638537e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5151515151515152e-06, |
|
"logits/chosen": 0.09682648628950119, |
|
"logits/rejected": 0.25353121757507324, |
|
"logps/chosen": -334.0283203125, |
|
"logps/rejected": -270.40179443359375, |
|
"loss": 0.3365, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 8.945484069045051e-07, |
|
"rewards/margins": 2.460894847899908e-06, |
|
"rewards/rejected": -1.5663565591239603e-06, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.2727272727272728e-06, |
|
"logits/chosen": 0.12519071996212006, |
|
"logits/rejected": 0.20205554366111755, |
|
"logps/chosen": -287.5645446777344, |
|
"logps/rejected": -266.9909362792969, |
|
"loss": 0.3417, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0003230299334973097, |
|
"rewards/margins": 0.0002776235924102366, |
|
"rewards/rejected": 4.540634108707309e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.0303030303030305e-06, |
|
"logits/chosen": 0.04107561707496643, |
|
"logits/rejected": 0.20281007885932922, |
|
"logps/chosen": -319.4472351074219, |
|
"logps/rejected": -262.85418701171875, |
|
"loss": 0.3615, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0008775835740379989, |
|
"rewards/margins": 0.0006280745146796107, |
|
"rewards/rejected": 0.0002495090593583882, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.7878787878787882e-06, |
|
"logits/chosen": 0.12531228363513947, |
|
"logits/rejected": 0.27179789543151855, |
|
"logps/chosen": -359.4334411621094, |
|
"logps/rejected": -278.2669982910156, |
|
"loss": 0.3412, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.0017220426816493273, |
|
"rewards/margins": 0.001711520948447287, |
|
"rewards/rejected": 1.0521779586269986e-05, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"logits/chosen": 0.07350507378578186, |
|
"logits/rejected": 0.22942480444908142, |
|
"logps/chosen": -346.56927490234375, |
|
"logps/rejected": -306.72686767578125, |
|
"loss": 0.3553, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.0025817144196480513, |
|
"rewards/margins": 0.003724290756508708, |
|
"rewards/rejected": -0.001142576104030013, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999432965739786e-06, |
|
"logits/chosen": 0.09025775641202927, |
|
"logits/rejected": 0.31351155042648315, |
|
"logps/chosen": -381.5528869628906, |
|
"logps/rejected": -286.2391662597656, |
|
"loss": 0.3633, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0029081101529300213, |
|
"rewards/margins": 0.0062509761191904545, |
|
"rewards/rejected": -0.003342865500599146, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9930567839810125e-06, |
|
"logits/chosen": 0.0820033922791481, |
|
"logits/rejected": 0.2647712826728821, |
|
"logps/chosen": -350.4310607910156, |
|
"logps/rejected": -280.1022644042969, |
|
"loss": 0.3175, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.0049164495430886745, |
|
"rewards/margins": 0.010990149341523647, |
|
"rewards/rejected": -0.006073700729757547, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.979613761906212e-06, |
|
"logits/chosen": 0.11785046756267548, |
|
"logits/rejected": 0.3276062607765198, |
|
"logps/chosen": -370.00860595703125, |
|
"logps/rejected": -322.6431579589844, |
|
"loss": 0.3244, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.004425252787768841, |
|
"rewards/margins": 0.019340159371495247, |
|
"rewards/rejected": -0.014914905652403831, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.959142005221991e-06, |
|
"logits/chosen": 0.19294002652168274, |
|
"logits/rejected": 0.3200022578239441, |
|
"logps/chosen": -369.6689147949219, |
|
"logps/rejected": -329.09893798828125, |
|
"loss": 0.2978, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.018663963302969933, |
|
"rewards/margins": 0.04274021461606026, |
|
"rewards/rejected": -0.06140417978167534, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931699543346854e-06, |
|
"logits/chosen": 0.23536458611488342, |
|
"logits/rejected": 0.35565608739852905, |
|
"logps/chosen": -432.9308166503906, |
|
"logps/rejected": -432.2447204589844, |
|
"loss": 0.2948, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.07575322687625885, |
|
"rewards/margins": 0.06633279472589493, |
|
"rewards/rejected": -0.14208602905273438, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.897364164920515e-06, |
|
"logits/chosen": 0.24238090217113495, |
|
"logits/rejected": 0.2669451832771301, |
|
"logps/chosen": -502.5521545410156, |
|
"logps/rejected": -526.5723876953125, |
|
"loss": 0.286, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.16139724850654602, |
|
"rewards/margins": 0.09947613626718521, |
|
"rewards/rejected": -0.26087337732315063, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8562331973035396e-06, |
|
"logits/chosen": 0.24233976006507874, |
|
"logits/rejected": 0.37175804376602173, |
|
"logps/chosen": -561.4464721679688, |
|
"logps/rejected": -616.5276489257812, |
|
"loss": 0.2701, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1863406002521515, |
|
"rewards/margins": 0.11555895954370499, |
|
"rewards/rejected": -0.30189958214759827, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.808423230692374e-06, |
|
"logits/chosen": 0.2752463221549988, |
|
"logits/rejected": 0.41067615151405334, |
|
"logps/chosen": -544.420654296875, |
|
"logps/rejected": -609.9964599609375, |
|
"loss": 0.2437, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.16954469680786133, |
|
"rewards/margins": 0.15670140087604523, |
|
"rewards/rejected": -0.32624611258506775, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.754069787631761e-06, |
|
"logits/chosen": 0.22061054408550262, |
|
"logits/rejected": 0.38979893922805786, |
|
"logps/chosen": -499.3285217285156, |
|
"logps/rejected": -558.7503051757812, |
|
"loss": 0.2709, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.1981675624847412, |
|
"rewards/margins": 0.09831173717975616, |
|
"rewards/rejected": -0.2964792847633362, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.693326938861367e-06, |
|
"logits/chosen": 0.2467188537120819, |
|
"logits/rejected": 0.35028940439224243, |
|
"logps/chosen": -526.26513671875, |
|
"logps/rejected": -599.4227294921875, |
|
"loss": 0.2605, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.1936112642288208, |
|
"rewards/margins": 0.1178494542837143, |
|
"rewards/rejected": -0.3114607036113739, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626366866585528e-06, |
|
"logits/chosen": 0.22420647740364075, |
|
"logits/rejected": 0.4213325083255768, |
|
"logps/chosen": -622.0628662109375, |
|
"logps/rejected": -708.9386596679688, |
|
"loss": 0.2402, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.2440744936466217, |
|
"rewards/margins": 0.15784001350402832, |
|
"rewards/rejected": -0.40191444754600525, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.553379376404085e-06, |
|
"logits/chosen": 0.27953073382377625, |
|
"logits/rejected": 0.3603507876396179, |
|
"logps/chosen": -601.0642700195312, |
|
"logps/rejected": -663.9993896484375, |
|
"loss": 0.2735, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2422056943178177, |
|
"rewards/margins": 0.1294884979724884, |
|
"rewards/rejected": -0.3716942071914673, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.474571359287791e-06, |
|
"logits/chosen": 0.23259811103343964, |
|
"logits/rejected": 0.4210383892059326, |
|
"logps/chosen": -572.4783935546875, |
|
"logps/rejected": -687.89306640625, |
|
"loss": 0.2247, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.20176899433135986, |
|
"rewards/margins": 0.18673528730869293, |
|
"rewards/rejected": -0.3885042667388916, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.3901662051233755e-06, |
|
"logits/chosen": 0.2610084116458893, |
|
"logits/rejected": 0.3713424801826477, |
|
"logps/chosen": -539.9381103515625, |
|
"logps/rejected": -680.5265502929688, |
|
"loss": 0.2243, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.20527365803718567, |
|
"rewards/margins": 0.1885356605052948, |
|
"rewards/rejected": -0.3938092887401581, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.30040316949064e-06, |
|
"logits/chosen": 0.2715567946434021, |
|
"logits/rejected": 0.3666667640209198, |
|
"logps/chosen": -536.0437622070312, |
|
"logps/rejected": -629.3165283203125, |
|
"loss": 0.248, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.18539923429489136, |
|
"rewards/margins": 0.16864952445030212, |
|
"rewards/rejected": -0.35404878854751587, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.205536695466524e-06, |
|
"logits/chosen": 0.23633995652198792, |
|
"logits/rejected": 0.3769295811653137, |
|
"logps/chosen": -584.9187622070312, |
|
"logps/rejected": -705.7189331054688, |
|
"loss": 0.2185, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.2217777669429779, |
|
"rewards/margins": 0.20478694140911102, |
|
"rewards/rejected": -0.4265647530555725, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.105835692378557e-06, |
|
"logits/chosen": 0.29004430770874023, |
|
"logits/rejected": 0.4667590260505676, |
|
"logps/chosen": -701.4776611328125, |
|
"logps/rejected": -798.4501953125, |
|
"loss": 0.2323, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.316620796918869, |
|
"rewards/margins": 0.18385566771030426, |
|
"rewards/rejected": -0.5004764795303345, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.001582773552153e-06, |
|
"logits/chosen": 0.27633368968963623, |
|
"logits/rejected": 0.42621952295303345, |
|
"logps/chosen": -621.6236572265625, |
|
"logps/rejected": -752.4913940429688, |
|
"loss": 0.2466, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.2713666260242462, |
|
"rewards/margins": 0.17953212559223175, |
|
"rewards/rejected": -0.4508987367153168, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.893073455212438e-06, |
|
"logits/chosen": 0.2567233443260193, |
|
"logits/rejected": 0.3767459988594055, |
|
"logps/chosen": -553.5460205078125, |
|
"logps/rejected": -691.7283935546875, |
|
"loss": 0.2438, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.23452623188495636, |
|
"rewards/margins": 0.17362844944000244, |
|
"rewards/rejected": -0.4081546664237976, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7806153188114027e-06, |
|
"logits/chosen": 0.24300554394721985, |
|
"logits/rejected": 0.30316272377967834, |
|
"logps/chosen": -578.8638916015625, |
|
"logps/rejected": -715.2518310546875, |
|
"loss": 0.2468, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.2516656219959259, |
|
"rewards/margins": 0.16586793959140778, |
|
"rewards/rejected": -0.4175335466861725, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6645271391548542e-06, |
|
"logits/chosen": 0.23739242553710938, |
|
"logits/rejected": 0.426413357257843, |
|
"logps/chosen": -634.893798828125, |
|
"logps/rejected": -719.962890625, |
|
"loss": 0.2531, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.25505417585372925, |
|
"rewards/margins": 0.18401430547237396, |
|
"rewards/rejected": -0.4390684962272644, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5451379808006014e-06, |
|
"logits/chosen": 0.2224760502576828, |
|
"logits/rejected": 0.3537066578865051, |
|
"logps/chosen": -589.7808227539062, |
|
"logps/rejected": -747.9609375, |
|
"loss": 0.2283, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.2638700306415558, |
|
"rewards/margins": 0.1954762488603592, |
|
"rewards/rejected": -0.45934629440307617, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4227862652892106e-06, |
|
"logits/chosen": 0.23644611239433289, |
|
"logits/rejected": 0.4177249073982239, |
|
"logps/chosen": -645.470703125, |
|
"logps/rejected": -779.50927734375, |
|
"loss": 0.2437, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.27952614426612854, |
|
"rewards/margins": 0.19378896057605743, |
|
"rewards/rejected": -0.4733150601387024, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2978188118513814e-06, |
|
"logits/chosen": 0.2835056483745575, |
|
"logits/rejected": 0.3992118239402771, |
|
"logps/chosen": -620.6287841796875, |
|
"logps/rejected": -733.6190185546875, |
|
"loss": 0.2377, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.2810095250606537, |
|
"rewards/margins": 0.17397195100784302, |
|
"rewards/rejected": -0.4549815058708191, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1705898543111576e-06, |
|
"logits/chosen": 0.2648586630821228, |
|
"logits/rejected": 0.36566048860549927, |
|
"logps/chosen": -600.49853515625, |
|
"logps/rejected": -694.8892822265625, |
|
"loss": 0.2408, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2665795385837555, |
|
"rewards/margins": 0.14690735936164856, |
|
"rewards/rejected": -0.41348689794540405, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.041460036971664e-06, |
|
"logits/chosen": 0.2214949131011963, |
|
"logits/rejected": 0.38505813479423523, |
|
"logps/chosen": -669.3818969726562, |
|
"logps/rejected": -743.3441162109375, |
|
"loss": 0.2197, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.27733737230300903, |
|
"rewards/margins": 0.16739198565483093, |
|
"rewards/rejected": -0.4447293281555176, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.910795392329649e-06, |
|
"logits/chosen": 0.19933928549289703, |
|
"logits/rejected": 0.3905678391456604, |
|
"logps/chosen": -631.4853515625, |
|
"logps/rejected": -736.2476806640625, |
|
"loss": 0.2262, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.2883307933807373, |
|
"rewards/margins": 0.17361871898174286, |
|
"rewards/rejected": -0.46194949746131897, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7789663035166035e-06, |
|
"logits/chosen": 0.19208988547325134, |
|
"logits/rejected": 0.3280327022075653, |
|
"logps/chosen": -637.7025146484375, |
|
"logps/rejected": -742.5934448242188, |
|
"loss": 0.229, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.2803892493247986, |
|
"rewards/margins": 0.18691286444664001, |
|
"rewards/rejected": -0.46730202436447144, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6463464544075344e-06, |
|
"logits/chosen": 0.2192709892988205, |
|
"logits/rejected": 0.354716956615448, |
|
"logps/chosen": -666.2679443359375, |
|
"logps/rejected": -781.5311889648438, |
|
"loss": 0.2365, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.313618928194046, |
|
"rewards/margins": 0.2000730335712433, |
|
"rewards/rejected": -0.5136920213699341, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.513311770373421e-06, |
|
"logits/chosen": 0.24396447837352753, |
|
"logits/rejected": 0.3740311563014984, |
|
"logps/chosen": -632.1403198242188, |
|
"logps/rejected": -780.8329467773438, |
|
"loss": 0.2137, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.29758018255233765, |
|
"rewards/margins": 0.20380613207817078, |
|
"rewards/rejected": -0.501386284828186, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.380239352679908e-06, |
|
"logits/chosen": 0.2605392634868622, |
|
"logits/rejected": 0.3702816367149353, |
|
"logps/chosen": -659.3692626953125, |
|
"logps/rejected": -771.1994018554688, |
|
"loss": 0.2134, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2961762547492981, |
|
"rewards/margins": 0.19712337851524353, |
|
"rewards/rejected": -0.49329957365989685, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.247506409552795e-06, |
|
"logits/chosen": 0.2626807987689972, |
|
"logits/rejected": 0.4523330330848694, |
|
"logps/chosen": -723.3246459960938, |
|
"logps/rejected": -843.2516479492188, |
|
"loss": 0.2222, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.33206573128700256, |
|
"rewards/margins": 0.2155541181564331, |
|
"rewards/rejected": -0.5476198792457581, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1154891869403436e-06, |
|
"logits/chosen": 0.2673259675502777, |
|
"logits/rejected": 0.3809369206428528, |
|
"logps/chosen": -627.7708740234375, |
|
"logps/rejected": -776.5219116210938, |
|
"loss": 0.2345, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2913658320903778, |
|
"rewards/margins": 0.19852493703365326, |
|
"rewards/rejected": -0.48989081382751465, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9845619020032552e-06, |
|
"logits/chosen": 0.22264230251312256, |
|
"logits/rejected": 0.36639919877052307, |
|
"logps/chosen": -656.4054565429688, |
|
"logps/rejected": -789.1956787109375, |
|
"loss": 0.2189, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.30770599842071533, |
|
"rewards/margins": 0.20198026299476624, |
|
"rewards/rejected": -0.5096862316131592, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8550956823554708e-06, |
|
"logits/chosen": 0.25498437881469727, |
|
"logits/rejected": 0.39235639572143555, |
|
"logps/chosen": -597.4380493164062, |
|
"logps/rejected": -743.4146728515625, |
|
"loss": 0.2504, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2715555429458618, |
|
"rewards/margins": 0.1882806420326233, |
|
"rewards/rejected": -0.4598361849784851, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": 0.2268383502960205, |
|
"logits/rejected": 0.3757990598678589, |
|
"logps/chosen": -628.30517578125, |
|
"logps/rejected": -744.66455078125, |
|
"loss": 0.2356, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2700210213661194, |
|
"rewards/margins": 0.19132563471794128, |
|
"rewards/rejected": -0.4613465666770935, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6020092013802002e-06, |
|
"logits/chosen": 0.26629549264907837, |
|
"logits/rejected": 0.38806790113449097, |
|
"logps/chosen": -643.1737060546875, |
|
"logps/rejected": -752.5285034179688, |
|
"loss": 0.2192, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.28404101729393005, |
|
"rewards/margins": 0.18914134800434113, |
|
"rewards/rejected": -0.4731822907924652, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4791063411799938e-06, |
|
"logits/chosen": 0.17316266894340515, |
|
"logits/rejected": 0.3880611062049866, |
|
"logps/chosen": -670.3546142578125, |
|
"logps/rejected": -764.1770629882812, |
|
"loss": 0.2366, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.3103855550289154, |
|
"rewards/margins": 0.17021675407886505, |
|
"rewards/rejected": -0.48060232400894165, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3590973149722103e-06, |
|
"logits/chosen": 0.24187886714935303, |
|
"logits/rejected": 0.37198665738105774, |
|
"logps/chosen": -628.3953857421875, |
|
"logps/rejected": -773.7864990234375, |
|
"loss": 0.2157, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.3083949089050293, |
|
"rewards/margins": 0.18738897144794464, |
|
"rewards/rejected": -0.49578380584716797, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2423223013801946e-06, |
|
"logits/chosen": 0.19144193828105927, |
|
"logits/rejected": 0.3342723846435547, |
|
"logps/chosen": -679.7613525390625, |
|
"logps/rejected": -820.7508544921875, |
|
"loss": 0.2274, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3411591649055481, |
|
"rewards/margins": 0.18641141057014465, |
|
"rewards/rejected": -0.5275705456733704, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1291123118671665e-06, |
|
"logits/chosen": 0.21762657165527344, |
|
"logits/rejected": 0.307900607585907, |
|
"logps/chosen": -651.0516967773438, |
|
"logps/rejected": -805.5900268554688, |
|
"loss": 0.2194, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.309937983751297, |
|
"rewards/margins": 0.20782515406608582, |
|
"rewards/rejected": -0.5177631378173828, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.019788252448267e-06, |
|
"logits/chosen": 0.184129536151886, |
|
"logits/rejected": 0.3611859083175659, |
|
"logps/chosen": -619.1923828125, |
|
"logps/rejected": -762.8614501953125, |
|
"loss": 0.2191, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.30183008313179016, |
|
"rewards/margins": 0.18515999615192413, |
|
"rewards/rejected": -0.4869900643825531, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.146600140475945e-07, |
|
"logits/chosen": 0.20372629165649414, |
|
"logits/rejected": 0.33465132117271423, |
|
"logps/chosen": -636.4930419921875, |
|
"logps/rejected": -756.65576171875, |
|
"loss": 0.2029, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.2839277386665344, |
|
"rewards/margins": 0.197633758187294, |
|
"rewards/rejected": -0.4815615117549896, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.140255940787059e-07, |
|
"logits/chosen": 0.18791750073432922, |
|
"logits/rejected": 0.4121164381504059, |
|
"logps/chosen": -636.9640502929688, |
|
"logps/rejected": -798.8194580078125, |
|
"loss": 0.2107, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.27898019552230835, |
|
"rewards/margins": 0.21688583493232727, |
|
"rewards/rejected": -0.49586600065231323, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.181702517385789e-07, |
|
"logits/chosen": 0.19490660727024078, |
|
"logits/rejected": 0.3919000029563904, |
|
"logps/chosen": -693.1792602539062, |
|
"logps/rejected": -796.6826171875, |
|
"loss": 0.2212, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.30116355419158936, |
|
"rewards/margins": 0.22377757728099823, |
|
"rewards/rejected": -0.524941086769104, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.273656994094232e-07, |
|
"logits/chosen": 0.21901503205299377, |
|
"logits/rejected": 0.33780670166015625, |
|
"logps/chosen": -620.2666015625, |
|
"logps/rejected": -768.0850830078125, |
|
"loss": 0.2174, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.29874101281166077, |
|
"rewards/margins": 0.19197914004325867, |
|
"rewards/rejected": -0.49072012305259705, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.418693324604082e-07, |
|
"logits/chosen": 0.16905078291893005, |
|
"logits/rejected": 0.3485548198223114, |
|
"logps/chosen": -657.9064331054688, |
|
"logps/rejected": -817.0252075195312, |
|
"loss": 0.22, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.3066551983356476, |
|
"rewards/margins": 0.22827258706092834, |
|
"rewards/rejected": -0.5349277853965759, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.619234996325314e-07, |
|
"logits/chosen": 0.2440187931060791, |
|
"logits/rejected": 0.34625181555747986, |
|
"logps/chosen": -640.2761840820312, |
|
"logps/rejected": -762.24609375, |
|
"loss": 0.2044, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2988249659538269, |
|
"rewards/margins": 0.19742272794246674, |
|
"rewards/rejected": -0.49624767899513245, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.877548160747768e-07, |
|
"logits/chosen": 0.20869365334510803, |
|
"logits/rejected": 0.31174546480178833, |
|
"logps/chosen": -660.8912353515625, |
|
"logps/rejected": -801.5670166015625, |
|
"loss": 0.2247, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.3307650685310364, |
|
"rewards/margins": 0.18925593793392181, |
|
"rewards/rejected": -0.5200210213661194, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.195735209788528e-07, |
|
"logits/chosen": 0.20122747123241425, |
|
"logits/rejected": 0.35053348541259766, |
|
"logps/chosen": -679.0135498046875, |
|
"logps/rejected": -827.4410400390625, |
|
"loss": 0.2419, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.3423236012458801, |
|
"rewards/margins": 0.19195982813835144, |
|
"rewards/rejected": -0.534283459186554, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.5757288163336806e-07, |
|
"logits/chosen": 0.26682180166244507, |
|
"logits/rejected": 0.3439808785915375, |
|
"logps/chosen": -672.2916259765625, |
|
"logps/rejected": -777.9793701171875, |
|
"loss": 0.2386, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.312061071395874, |
|
"rewards/margins": 0.18959736824035645, |
|
"rewards/rejected": -0.5016584396362305, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.019286455866981e-07, |
|
"logits/chosen": 0.23167696595191956, |
|
"logits/rejected": 0.3346760869026184, |
|
"logps/chosen": -632.02587890625, |
|
"logps/rejected": -782.4437255859375, |
|
"loss": 0.224, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.29868921637535095, |
|
"rewards/margins": 0.1968630850315094, |
|
"rewards/rejected": -0.49555230140686035, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5279854247146703e-07, |
|
"logits/chosen": 0.19181525707244873, |
|
"logits/rejected": 0.3878282904624939, |
|
"logps/chosen": -682.7297973632812, |
|
"logps/rejected": -799.3133544921875, |
|
"loss": 0.221, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.3136585056781769, |
|
"rewards/margins": 0.19802789390087128, |
|
"rewards/rejected": -0.5116864442825317, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1032183690276754e-07, |
|
"logits/chosen": 0.20669761300086975, |
|
"logits/rejected": 0.3533198833465576, |
|
"logps/chosen": -694.8823852539062, |
|
"logps/rejected": -831.9597778320312, |
|
"loss": 0.1987, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3144986033439636, |
|
"rewards/margins": 0.2219509333372116, |
|
"rewards/rejected": -0.5364495515823364, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.46189337174788e-08, |
|
"logits/chosen": 0.2405429631471634, |
|
"logits/rejected": 0.3472765386104584, |
|
"logps/chosen": -657.0903930664062, |
|
"logps/rejected": -797.1058959960938, |
|
"loss": 0.2369, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.310764878988266, |
|
"rewards/margins": 0.20058993995189667, |
|
"rewards/rejected": -0.5113548040390015, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.579103667367385e-08, |
|
"logits/chosen": 0.2070445716381073, |
|
"logits/rejected": 0.4006822109222412, |
|
"logps/chosen": -655.3494873046875, |
|
"logps/rejected": -780.321533203125, |
|
"loss": 0.222, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.32885631918907166, |
|
"rewards/margins": 0.17823253571987152, |
|
"rewards/rejected": -0.507088840007782, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3919861577572924e-08, |
|
"logits/chosen": 0.23205919563770294, |
|
"logits/rejected": 0.3889433741569519, |
|
"logps/chosen": -668.5777587890625, |
|
"logps/rejected": -799.8650512695312, |
|
"loss": 0.235, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3039870858192444, |
|
"rewards/margins": 0.20831787586212158, |
|
"rewards/rejected": -0.5123049020767212, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.067404651211808e-09, |
|
"logits/chosen": 0.1636372208595276, |
|
"logits/rejected": 0.3309451639652252, |
|
"logps/chosen": -615.0830688476562, |
|
"logps/rejected": -778.7999267578125, |
|
"loss": 0.2582, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3035675883293152, |
|
"rewards/margins": 0.19927778840065002, |
|
"rewards/rejected": -0.5028454065322876, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2757667974155896e-09, |
|
"logits/chosen": 0.23857417702674866, |
|
"logits/rejected": 0.35767877101898193, |
|
"logps/chosen": -685.1931762695312, |
|
"logps/rejected": -803.7652587890625, |
|
"loss": 0.2486, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3033232092857361, |
|
"rewards/margins": 0.2285349816083908, |
|
"rewards/rejected": -0.5318582057952881, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 656, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2497515143236009, |
|
"train_runtime": 7980.5775, |
|
"train_samples_per_second": 2.631, |
|
"train_steps_per_second": 0.082 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 656, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|