|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9996190476190476, |
|
"eval_steps": 500, |
|
"global_step": 1312, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.787878787878788e-08, |
|
"logits/chosen": 0.17224127054214478, |
|
"logits/rejected": 0.18124699592590332, |
|
"logps/chosen": -379.32623291015625, |
|
"logps/rejected": -349.5926208496094, |
|
"loss": 0.2902, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.787878787878788e-07, |
|
"logits/chosen": 0.07856506109237671, |
|
"logits/rejected": 0.2510358691215515, |
|
"logps/chosen": -334.9958190917969, |
|
"logps/rejected": -283.54034423828125, |
|
"loss": 0.3745, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 1.0113296411873307e-05, |
|
"rewards/margins": 5.3244151786202565e-05, |
|
"rewards/rejected": -4.313084718887694e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.575757575757576e-07, |
|
"logits/chosen": 0.07835443317890167, |
|
"logits/rejected": 0.23690445721149445, |
|
"logps/chosen": -343.5411682128906, |
|
"logps/rejected": -300.49774169921875, |
|
"loss": 0.345, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 9.991687193178223e-07, |
|
"rewards/margins": 5.578011041507125e-05, |
|
"rewards/rejected": -5.478093953570351e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1363636363636364e-06, |
|
"logits/chosen": 0.09362699836492538, |
|
"logits/rejected": 0.24969105422496796, |
|
"logps/chosen": -384.11199951171875, |
|
"logps/rejected": -290.873779296875, |
|
"loss": 0.3667, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.00015662855003029108, |
|
"rewards/margins": 9.987165867642034e-06, |
|
"rewards/rejected": 0.0001466413668822497, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5151515151515152e-06, |
|
"logits/chosen": 0.08275317400693893, |
|
"logits/rejected": 0.20892572402954102, |
|
"logps/chosen": -361.24462890625, |
|
"logps/rejected": -296.53094482421875, |
|
"loss": 0.3012, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.00019404105842113495, |
|
"rewards/margins": 0.000132537359604612, |
|
"rewards/rejected": 6.150371336843818e-05, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8939393939393941e-06, |
|
"logits/chosen": 0.14996236562728882, |
|
"logits/rejected": 0.2148081511259079, |
|
"logps/chosen": -339.32916259765625, |
|
"logps/rejected": -292.900146484375, |
|
"loss": 0.3391, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0002820778754539788, |
|
"rewards/margins": 0.00035016084439121187, |
|
"rewards/rejected": -6.80829762131907e-05, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.2727272727272728e-06, |
|
"logits/chosen": 0.09292325377464294, |
|
"logits/rejected": 0.25342074036598206, |
|
"logps/chosen": -357.79498291015625, |
|
"logps/rejected": -279.56475830078125, |
|
"loss": 0.3523, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.0005591081571765244, |
|
"rewards/margins": 0.0007476316532120109, |
|
"rewards/rejected": -0.00018852358334697783, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.6515151515151514e-06, |
|
"logits/chosen": 0.1520170122385025, |
|
"logits/rejected": 0.23838527500629425, |
|
"logps/chosen": -344.97381591796875, |
|
"logps/rejected": -272.6285400390625, |
|
"loss": 0.333, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.000808493874501437, |
|
"rewards/margins": 0.0009522804175503552, |
|
"rewards/rejected": -0.0001437865139450878, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.0303030303030305e-06, |
|
"logits/chosen": 0.10541319847106934, |
|
"logits/rejected": 0.2519112229347229, |
|
"logps/chosen": -337.99932861328125, |
|
"logps/rejected": -285.3942565917969, |
|
"loss": 0.3276, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.0017709163948893547, |
|
"rewards/margins": 0.0019623935222625732, |
|
"rewards/rejected": -0.00019147712737321854, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.409090909090909e-06, |
|
"logits/chosen": 0.08055099099874496, |
|
"logits/rejected": 0.23055055737495422, |
|
"logps/chosen": -358.501220703125, |
|
"logps/rejected": -301.07373046875, |
|
"loss": 0.325, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.0031805038452148438, |
|
"rewards/margins": 0.0035764030180871487, |
|
"rewards/rejected": -0.0003958995803259313, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.7878787878787882e-06, |
|
"logits/chosen": 0.1381727159023285, |
|
"logits/rejected": 0.36128586530685425, |
|
"logps/chosen": -408.2177734375, |
|
"logps/rejected": -295.70391845703125, |
|
"loss": 0.3551, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.005324442870914936, |
|
"rewards/margins": 0.006567128002643585, |
|
"rewards/rejected": -0.00124268583022058, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": 0.11063258349895477, |
|
"logits/rejected": 0.3397473692893982, |
|
"logps/chosen": -365.5294494628906, |
|
"logps/rejected": -296.68212890625, |
|
"loss": 0.3176, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.0065561323426663876, |
|
"rewards/margins": 0.009804257191717625, |
|
"rewards/rejected": -0.0032481239177286625, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"logits/chosen": 0.10760724544525146, |
|
"logits/rejected": 0.3315739035606384, |
|
"logps/chosen": -369.8674011230469, |
|
"logps/rejected": -290.553955078125, |
|
"loss": 0.2862, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.009247648529708385, |
|
"rewards/margins": 0.017721759155392647, |
|
"rewards/rejected": -0.008474110625684261, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.924242424242425e-06, |
|
"logits/chosen": 0.19570864737033844, |
|
"logits/rejected": 0.29995661973953247, |
|
"logps/chosen": -357.7752685546875, |
|
"logps/rejected": -298.80865478515625, |
|
"loss": 0.3091, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.0016540288925170898, |
|
"rewards/margins": 0.030770784243941307, |
|
"rewards/rejected": -0.029116755351424217, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999432965739786e-06, |
|
"logits/chosen": 0.18279646337032318, |
|
"logits/rejected": 0.24193449318408966, |
|
"logps/chosen": -326.20623779296875, |
|
"logps/rejected": -340.66204833984375, |
|
"loss": 0.3146, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.02705274522304535, |
|
"rewards/margins": 0.038378529250621796, |
|
"rewards/rejected": -0.06543128192424774, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.997129829895409e-06, |
|
"logits/chosen": 0.14213022589683533, |
|
"logits/rejected": 0.2512076199054718, |
|
"logps/chosen": -408.73541259765625, |
|
"logps/rejected": -432.90771484375, |
|
"loss": 0.2979, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.0731552243232727, |
|
"rewards/margins": 0.08651129901409149, |
|
"rewards/rejected": -0.159666508436203, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9930567839810125e-06, |
|
"logits/chosen": 0.17261534929275513, |
|
"logits/rejected": 0.27892106771469116, |
|
"logps/chosen": -483.6229553222656, |
|
"logps/rejected": -527.606201171875, |
|
"loss": 0.2727, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.121568962931633, |
|
"rewards/margins": 0.10641299188137054, |
|
"rewards/rejected": -0.22798196971416473, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987216714880929e-06, |
|
"logits/chosen": 0.19240622222423553, |
|
"logits/rejected": 0.22908082604408264, |
|
"logps/chosen": -516.4906005859375, |
|
"logps/rejected": -531.3421020507812, |
|
"loss": 0.278, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.17584487795829773, |
|
"rewards/margins": 0.10210248082876205, |
|
"rewards/rejected": -0.27794739603996277, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.979613761906212e-06, |
|
"logits/chosen": 0.12470928579568863, |
|
"logits/rejected": 0.2596343755722046, |
|
"logps/chosen": -551.5357666015625, |
|
"logps/rejected": -654.6400756835938, |
|
"loss": 0.2664, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.21089038252830505, |
|
"rewards/margins": 0.15304332971572876, |
|
"rewards/rejected": -0.3639337420463562, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.970253313860788e-06, |
|
"logits/chosen": 0.1833106130361557, |
|
"logits/rejected": 0.2698153257369995, |
|
"logps/chosen": -562.19677734375, |
|
"logps/rejected": -634.8347778320312, |
|
"loss": 0.2832, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.2445872575044632, |
|
"rewards/margins": 0.11998845636844635, |
|
"rewards/rejected": -0.36457571387290955, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.959142005221991e-06, |
|
"logits/chosen": 0.13080090284347534, |
|
"logits/rejected": 0.21319513022899628, |
|
"logps/chosen": -603.390380859375, |
|
"logps/rejected": -718.3425903320312, |
|
"loss": 0.2595, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.25654521584510803, |
|
"rewards/margins": 0.16353540122509003, |
|
"rewards/rejected": -0.42008060216903687, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.94628771143819e-06, |
|
"logits/chosen": 0.17947080731391907, |
|
"logits/rejected": 0.2654314935207367, |
|
"logps/chosen": -653.5524291992188, |
|
"logps/rejected": -726.80126953125, |
|
"loss": 0.3026, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2890639305114746, |
|
"rewards/margins": 0.12797169387340546, |
|
"rewards/rejected": -0.41703566908836365, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931699543346854e-06, |
|
"logits/chosen": 0.1152615174651146, |
|
"logits/rejected": 0.25124651193618774, |
|
"logps/chosen": -588.23046875, |
|
"logps/rejected": -704.0632934570312, |
|
"loss": 0.2673, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2535329759120941, |
|
"rewards/margins": 0.17576026916503906, |
|
"rewards/rejected": -0.4292932152748108, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9153878407169815e-06, |
|
"logits/chosen": 0.13138779997825623, |
|
"logits/rejected": 0.1749398410320282, |
|
"logps/chosen": -536.669921875, |
|
"logps/rejected": -619.0616455078125, |
|
"loss": 0.2609, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.2183808982372284, |
|
"rewards/margins": 0.12902414798736572, |
|
"rewards/rejected": -0.3474050462245941, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.897364164920515e-06, |
|
"logits/chosen": 0.14722837507724762, |
|
"logits/rejected": 0.2979207932949066, |
|
"logps/chosen": -644.4813232421875, |
|
"logps/rejected": -710.7215576171875, |
|
"loss": 0.2364, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.2495686262845993, |
|
"rewards/margins": 0.14611390233039856, |
|
"rewards/rejected": -0.39568251371383667, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"logits/chosen": 0.14406076073646545, |
|
"logits/rejected": 0.24922068417072296, |
|
"logps/chosen": -602.2944946289062, |
|
"logps/rejected": -689.9060668945312, |
|
"loss": 0.2803, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.25432199239730835, |
|
"rewards/margins": 0.14366620779037476, |
|
"rewards/rejected": -0.3979881703853607, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8562331973035396e-06, |
|
"logits/chosen": 0.14575393497943878, |
|
"logits/rejected": 0.28581660985946655, |
|
"logps/chosen": -567.1799926757812, |
|
"logps/rejected": -649.4906005859375, |
|
"loss": 0.2541, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.21804265677928925, |
|
"rewards/margins": 0.13621987402439117, |
|
"rewards/rejected": -0.3542625308036804, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.833155058197842e-06, |
|
"logits/chosen": 0.20403075218200684, |
|
"logits/rejected": 0.32390326261520386, |
|
"logps/chosen": -611.0584106445312, |
|
"logps/rejected": -643.7525634765625, |
|
"loss": 0.2827, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.23078171908855438, |
|
"rewards/margins": 0.12669074535369873, |
|
"rewards/rejected": -0.3574724495410919, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.808423230692374e-06, |
|
"logits/chosen": 0.16620513796806335, |
|
"logits/rejected": 0.3157016634941101, |
|
"logps/chosen": -557.9627075195312, |
|
"logps/rejected": -638.0758666992188, |
|
"loss": 0.2304, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.1864185631275177, |
|
"rewards/margins": 0.15307244658470154, |
|
"rewards/rejected": -0.33949097990989685, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7820552441562625e-06, |
|
"logits/chosen": 0.18919572234153748, |
|
"logits/rejected": 0.24673417210578918, |
|
"logps/chosen": -532.9969482421875, |
|
"logps/rejected": -591.529296875, |
|
"loss": 0.2764, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.19002854824066162, |
|
"rewards/margins": 0.12101318687200546, |
|
"rewards/rejected": -0.3110417425632477, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.754069787631761e-06, |
|
"logits/chosen": 0.15965518355369568, |
|
"logits/rejected": 0.3048693537712097, |
|
"logps/chosen": -550.864990234375, |
|
"logps/rejected": -679.41357421875, |
|
"loss": 0.25, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.21647436916828156, |
|
"rewards/margins": 0.16393651068210602, |
|
"rewards/rejected": -0.3804108500480652, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.724486696587862e-06, |
|
"logits/chosen": 0.17140202224254608, |
|
"logits/rejected": 0.21409063041210175, |
|
"logps/chosen": -635.85791015625, |
|
"logps/rejected": -747.188232421875, |
|
"loss": 0.258, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.29053109884262085, |
|
"rewards/margins": 0.15401865541934967, |
|
"rewards/rejected": -0.4445497393608093, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.693326938861367e-06, |
|
"logits/chosen": 0.19796046614646912, |
|
"logits/rejected": 0.25672799348831177, |
|
"logps/chosen": -643.25537109375, |
|
"logps/rejected": -801.0643310546875, |
|
"loss": 0.2427, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.31196296215057373, |
|
"rewards/margins": 0.19956240057945251, |
|
"rewards/rejected": -0.5115253925323486, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.660612599795343e-06, |
|
"logits/chosen": 0.11408114433288574, |
|
"logits/rejected": 0.22174029052257538, |
|
"logps/chosen": -647.49658203125, |
|
"logps/rejected": -756.5814208984375, |
|
"loss": 0.2472, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.29143720865249634, |
|
"rewards/margins": 0.1790235936641693, |
|
"rewards/rejected": -0.47046083211898804, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626366866585528e-06, |
|
"logits/chosen": 0.08651003241539001, |
|
"logits/rejected": 0.29140934348106384, |
|
"logps/chosen": -647.8692626953125, |
|
"logps/rejected": -736.3836669921875, |
|
"loss": 0.2449, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.28671354055404663, |
|
"rewards/margins": 0.16735221445560455, |
|
"rewards/rejected": -0.45406574010849, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.590614011845758e-06, |
|
"logits/chosen": 0.14404548704624176, |
|
"logits/rejected": 0.26181453466415405, |
|
"logps/chosen": -638.6263427734375, |
|
"logps/rejected": -730.7149658203125, |
|
"loss": 0.2148, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.2571524977684021, |
|
"rewards/margins": 0.1780269742012024, |
|
"rewards/rejected": -0.4351794719696045, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.553379376404085e-06, |
|
"logits/chosen": 0.17719906568527222, |
|
"logits/rejected": 0.17130860686302185, |
|
"logps/chosen": -565.2915649414062, |
|
"logps/rejected": -659.3453369140625, |
|
"loss": 0.252, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.23078814148902893, |
|
"rewards/margins": 0.16517826914787292, |
|
"rewards/rejected": -0.39596638083457947, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.514689351341751e-06, |
|
"logits/chosen": 0.18946215510368347, |
|
"logits/rejected": 0.2520686089992523, |
|
"logps/chosen": -707.220458984375, |
|
"logps/rejected": -799.5474853515625, |
|
"loss": 0.2296, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.29390206933021545, |
|
"rewards/margins": 0.1939956247806549, |
|
"rewards/rejected": -0.48789769411087036, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.474571359287791e-06, |
|
"logits/chosen": 0.1794353723526001, |
|
"logits/rejected": 0.20889122784137726, |
|
"logps/chosen": -659.88720703125, |
|
"logps/rejected": -792.5078125, |
|
"loss": 0.2767, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.32972732186317444, |
|
"rewards/margins": 0.16797736287117004, |
|
"rewards/rejected": -0.4977046847343445, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4330538349824684e-06, |
|
"logits/chosen": 0.15636876225471497, |
|
"logits/rejected": 0.2390608787536621, |
|
"logps/chosen": -675.450927734375, |
|
"logps/rejected": -797.5559692382812, |
|
"loss": 0.2478, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.3252645432949066, |
|
"rewards/margins": 0.17893439531326294, |
|
"rewards/rejected": -0.5041989684104919, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.3901662051233755e-06, |
|
"logits/chosen": 0.1320812702178955, |
|
"logits/rejected": 0.25596413016319275, |
|
"logps/chosen": -722.6912841796875, |
|
"logps/rejected": -806.4666748046875, |
|
"loss": 0.2426, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.33014723658561707, |
|
"rewards/margins": 0.17146170139312744, |
|
"rewards/rejected": -0.5016089677810669, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.345938867508439e-06, |
|
"logits/chosen": 0.15700757503509521, |
|
"logits/rejected": 0.2542612552642822, |
|
"logps/chosen": -715.8280029296875, |
|
"logps/rejected": -812.6696166992188, |
|
"loss": 0.2483, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.34785327315330505, |
|
"rewards/margins": 0.17259207367897034, |
|
"rewards/rejected": -0.5204453468322754, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.30040316949064e-06, |
|
"logits/chosen": 0.12117477506399155, |
|
"logits/rejected": 0.21865728497505188, |
|
"logps/chosen": -640.8743896484375, |
|
"logps/rejected": -720.25439453125, |
|
"loss": 0.2724, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.30353325605392456, |
|
"rewards/margins": 0.136855810880661, |
|
"rewards/rejected": -0.44038906693458557, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.253591385759705e-06, |
|
"logits/chosen": 0.1048569455742836, |
|
"logits/rejected": 0.2262450009584427, |
|
"logps/chosen": -654.50634765625, |
|
"logps/rejected": -762.971435546875, |
|
"loss": 0.2437, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.30736270546913147, |
|
"rewards/margins": 0.18053661286830902, |
|
"rewards/rejected": -0.48789939284324646, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.205536695466524e-06, |
|
"logits/chosen": 0.1525915265083313, |
|
"logits/rejected": 0.27372902631759644, |
|
"logps/chosen": -654.9791259765625, |
|
"logps/rejected": -811.6696166992188, |
|
"loss": 0.2447, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.319768488407135, |
|
"rewards/margins": 0.19004443287849426, |
|
"rewards/rejected": -0.5098129510879517, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.15627315870651e-06, |
|
"logits/chosen": 0.136866956949234, |
|
"logits/rejected": 0.21996262669563293, |
|
"logps/chosen": -676.5054931640625, |
|
"logps/rejected": -751.3809204101562, |
|
"loss": 0.2517, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3240048289299011, |
|
"rewards/margins": 0.1569635421037674, |
|
"rewards/rejected": -0.48096832633018494, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.105835692378557e-06, |
|
"logits/chosen": 0.17371432483196259, |
|
"logits/rejected": 0.2416466474533081, |
|
"logps/chosen": -675.8497314453125, |
|
"logps/rejected": -758.9337158203125, |
|
"loss": 0.2008, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.28624922037124634, |
|
"rewards/margins": 0.17619088292121887, |
|
"rewards/rejected": -0.4624401032924652, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.05426004543672e-06, |
|
"logits/chosen": 0.12746404111385345, |
|
"logits/rejected": 0.2692243754863739, |
|
"logps/chosen": -697.2105712890625, |
|
"logps/rejected": -788.6505126953125, |
|
"loss": 0.2271, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.3282613158226013, |
|
"rewards/margins": 0.18242862820625305, |
|
"rewards/rejected": -0.510689914226532, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.001582773552153e-06, |
|
"logits/chosen": 0.19310589134693146, |
|
"logits/rejected": 0.24649909138679504, |
|
"logps/chosen": -676.9974365234375, |
|
"logps/rejected": -746.8987426757812, |
|
"loss": 0.2236, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.31384724378585815, |
|
"rewards/margins": 0.16469937562942505, |
|
"rewards/rejected": -0.478546679019928, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.947841213203262e-06, |
|
"logits/chosen": 0.15690350532531738, |
|
"logits/rejected": 0.300692081451416, |
|
"logps/chosen": -735.6814575195312, |
|
"logps/rejected": -848.1085815429688, |
|
"loss": 0.2125, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.34843936562538147, |
|
"rewards/margins": 0.20286861062049866, |
|
"rewards/rejected": -0.5513080358505249, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.893073455212438e-06, |
|
"logits/chosen": 0.18581806123256683, |
|
"logits/rejected": 0.28662142157554626, |
|
"logps/chosen": -699.9567260742188, |
|
"logps/rejected": -875.0812377929688, |
|
"loss": 0.2029, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.35302621126174927, |
|
"rewards/margins": 0.2367512434720993, |
|
"rewards/rejected": -0.589777410030365, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.837318317748134e-06, |
|
"logits/chosen": 0.2250034064054489, |
|
"logits/rejected": 0.25657814741134644, |
|
"logps/chosen": -629.2000732421875, |
|
"logps/rejected": -729.8438720703125, |
|
"loss": 0.2881, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2759498357772827, |
|
"rewards/margins": 0.16293799877166748, |
|
"rewards/rejected": -0.43888789415359497, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7806153188114027e-06, |
|
"logits/chosen": 0.15463793277740479, |
|
"logits/rejected": 0.26033270359039307, |
|
"logps/chosen": -536.32861328125, |
|
"logps/rejected": -633.5609130859375, |
|
"loss": 0.2673, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.219068244099617, |
|
"rewards/margins": 0.13708043098449707, |
|
"rewards/rejected": -0.3561486601829529, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7230046482264256e-06, |
|
"logits/chosen": 0.1772613823413849, |
|
"logits/rejected": 0.3070564270019531, |
|
"logps/chosen": -638.0252685546875, |
|
"logps/rejected": -699.7626953125, |
|
"loss": 0.2509, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.24965958297252655, |
|
"rewards/margins": 0.15243235230445862, |
|
"rewards/rejected": -0.402091920375824, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6645271391548542e-06, |
|
"logits/chosen": 0.1249006986618042, |
|
"logits/rejected": 0.20080497860908508, |
|
"logps/chosen": -620.0784912109375, |
|
"logps/rejected": -716.3678588867188, |
|
"loss": 0.2308, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.2955653667449951, |
|
"rewards/margins": 0.15287812054157257, |
|
"rewards/rejected": -0.4484435021877289, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6052242391541746e-06, |
|
"logits/chosen": 0.1353389322757721, |
|
"logits/rejected": 0.22389094531536102, |
|
"logps/chosen": -614.9147338867188, |
|
"logps/rejected": -771.8558349609375, |
|
"loss": 0.2102, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2764993906021118, |
|
"rewards/margins": 0.18298561871051788, |
|
"rewards/rejected": -0.4594849944114685, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5451379808006014e-06, |
|
"logits/chosen": 0.17011868953704834, |
|
"logits/rejected": 0.27556750178337097, |
|
"logps/chosen": -658.0120849609375, |
|
"logps/rejected": -755.5858764648438, |
|
"loss": 0.2345, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.29901498556137085, |
|
"rewards/margins": 0.18377789855003357, |
|
"rewards/rejected": -0.48279285430908203, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.484310951897323e-06, |
|
"logits/chosen": 0.1761491745710373, |
|
"logits/rejected": 0.30945947766304016, |
|
"logps/chosen": -717.3760986328125, |
|
"logps/rejected": -787.5486450195312, |
|
"loss": 0.2483, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.32107868790626526, |
|
"rewards/margins": 0.1693280041217804, |
|
"rewards/rejected": -0.49040669202804565, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4227862652892106e-06, |
|
"logits/chosen": 0.20576810836791992, |
|
"logits/rejected": 0.26615187525749207, |
|
"logps/chosen": -668.9432983398438, |
|
"logps/rejected": -773.5794067382812, |
|
"loss": 0.2502, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.31081390380859375, |
|
"rewards/margins": 0.15438678860664368, |
|
"rewards/rejected": -0.46520066261291504, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3606075283054005e-06, |
|
"logits/chosen": 0.18598072230815887, |
|
"logits/rejected": 0.2837832570075989, |
|
"logps/chosen": -603.8995361328125, |
|
"logps/rejected": -735.7412719726562, |
|
"loss": 0.208, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2771075367927551, |
|
"rewards/margins": 0.1824244260787964, |
|
"rewards/rejected": -0.4595320224761963, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2978188118513814e-06, |
|
"logits/chosen": 0.1718396097421646, |
|
"logits/rejected": 0.3636724054813385, |
|
"logps/chosen": -653.9473266601562, |
|
"logps/rejected": -759.1709594726562, |
|
"loss": 0.2263, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.28921762108802795, |
|
"rewards/margins": 0.19078542292118073, |
|
"rewards/rejected": -0.4800030589103699, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.234464619172522e-06, |
|
"logits/chosen": 0.1497826725244522, |
|
"logits/rejected": 0.2777649164199829, |
|
"logps/chosen": -638.6043090820312, |
|
"logps/rejected": -779.666748046875, |
|
"loss": 0.223, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.28172770142555237, |
|
"rewards/margins": 0.22779683768749237, |
|
"rewards/rejected": -0.5095245242118835, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1705898543111576e-06, |
|
"logits/chosen": 0.13684847950935364, |
|
"logits/rejected": 0.24817728996276855, |
|
"logps/chosen": -634.4373779296875, |
|
"logps/rejected": -796.2052612304688, |
|
"loss": 0.2257, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.2933855652809143, |
|
"rewards/margins": 0.20933008193969727, |
|
"rewards/rejected": -0.5027156472206116, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.106239790279606e-06, |
|
"logits/chosen": 0.16658630967140198, |
|
"logits/rejected": 0.2954631447792053, |
|
"logps/chosen": -654.8861083984375, |
|
"logps/rejected": -780.149169921875, |
|
"loss": 0.2097, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.3087119460105896, |
|
"rewards/margins": 0.1822930872440338, |
|
"rewards/rejected": -0.4910050928592682, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.041460036971664e-06, |
|
"logits/chosen": 0.13499195873737335, |
|
"logits/rejected": 0.27665549516677856, |
|
"logps/chosen": -655.3276977539062, |
|
"logps/rejected": -844.3450927734375, |
|
"loss": 0.2251, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.3263702094554901, |
|
"rewards/margins": 0.22413411736488342, |
|
"rewards/rejected": -0.5505043268203735, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.976296508835326e-06, |
|
"logits/chosen": 0.16133855283260345, |
|
"logits/rejected": 0.256599485874176, |
|
"logps/chosen": -660.0172119140625, |
|
"logps/rejected": -769.0188598632812, |
|
"loss": 0.2228, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.3122148811817169, |
|
"rewards/margins": 0.18627096712589264, |
|
"rewards/rejected": -0.49848586320877075, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.910795392329649e-06, |
|
"logits/chosen": 0.1639477163553238, |
|
"logits/rejected": 0.24110262095928192, |
|
"logps/chosen": -739.8809204101562, |
|
"logps/rejected": -895.7755126953125, |
|
"loss": 0.2189, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.33884376287460327, |
|
"rewards/margins": 0.23572292923927307, |
|
"rewards/rejected": -0.5745667219161987, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8450031131888147e-06, |
|
"logits/chosen": 0.13471753895282745, |
|
"logits/rejected": 0.27453264594078064, |
|
"logps/chosen": -660.6944580078125, |
|
"logps/rejected": -760.9178466796875, |
|
"loss": 0.2316, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.32461482286453247, |
|
"rewards/margins": 0.18133333325386047, |
|
"rewards/rejected": -0.5059481859207153, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7789663035166035e-06, |
|
"logits/chosen": 0.07190994918346405, |
|
"logits/rejected": 0.22703304886817932, |
|
"logps/chosen": -670.5394287109375, |
|
"logps/rejected": -823.9978637695312, |
|
"loss": 0.2036, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.3125961720943451, |
|
"rewards/margins": 0.2316323220729828, |
|
"rewards/rejected": -0.5442285537719727, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.7127317687345973e-06, |
|
"logits/chosen": 0.07092462480068207, |
|
"logits/rejected": 0.26120471954345703, |
|
"logps/chosen": -671.69189453125, |
|
"logps/rejected": -811.6243286132812, |
|
"loss": 0.2018, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.30282872915267944, |
|
"rewards/margins": 0.21179743111133575, |
|
"rewards/rejected": -0.514626145362854, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6463464544075344e-06, |
|
"logits/chosen": 0.14573340117931366, |
|
"logits/rejected": 0.23916473984718323, |
|
"logps/chosen": -684.8336181640625, |
|
"logps/rejected": -777.0270385742188, |
|
"loss": 0.1987, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.3061198592185974, |
|
"rewards/margins": 0.19952335953712463, |
|
"rewards/rejected": -0.5056431889533997, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.579857412969345e-06, |
|
"logits/chosen": 0.11962984502315521, |
|
"logits/rejected": 0.23462708294391632, |
|
"logps/chosen": -730.8359375, |
|
"logps/rejected": -856.9847412109375, |
|
"loss": 0.237, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.37815654277801514, |
|
"rewards/margins": 0.18362250924110413, |
|
"rewards/rejected": -0.5617790818214417, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.513311770373421e-06, |
|
"logits/chosen": 0.11839403957128525, |
|
"logits/rejected": 0.16228660941123962, |
|
"logps/chosen": -644.6649780273438, |
|
"logps/rejected": -824.513671875, |
|
"loss": 0.2276, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.32936322689056396, |
|
"rewards/margins": 0.2137618511915207, |
|
"rewards/rejected": -0.5431250929832458, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.446756692690804e-06, |
|
"logits/chosen": 0.13511911034584045, |
|
"logits/rejected": 0.2399536371231079, |
|
"logps/chosen": -717.3846435546875, |
|
"logps/rejected": -858.6282958984375, |
|
"loss": 0.2365, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3688200116157532, |
|
"rewards/margins": 0.2047378122806549, |
|
"rewards/rejected": -0.5735577940940857, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.380239352679908e-06, |
|
"logits/chosen": 0.14800789952278137, |
|
"logits/rejected": 0.2702687382698059, |
|
"logps/chosen": -674.1021728515625, |
|
"logps/rejected": -830.0198974609375, |
|
"loss": 0.2284, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.31319013237953186, |
|
"rewards/margins": 0.2306814193725586, |
|
"rewards/rejected": -0.5438715219497681, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.313806896351529e-06, |
|
"logits/chosen": 0.07657043635845184, |
|
"logits/rejected": 0.2402833253145218, |
|
"logps/chosen": -611.2683715820312, |
|
"logps/rejected": -762.6300048828125, |
|
"loss": 0.2543, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.3043960928916931, |
|
"rewards/margins": 0.188668355345726, |
|
"rewards/rejected": -0.49306440353393555, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.247506409552795e-06, |
|
"logits/chosen": 0.11987291276454926, |
|
"logits/rejected": 0.1521257907152176, |
|
"logps/chosen": -654.2996826171875, |
|
"logps/rejected": -776.6150512695312, |
|
"loss": 0.248, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.2802131772041321, |
|
"rewards/margins": 0.1828833967447281, |
|
"rewards/rejected": -0.463096559047699, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1813848845937695e-06, |
|
"logits/chosen": 0.07216247916221619, |
|
"logits/rejected": 0.16098852455615997, |
|
"logps/chosen": -612.78759765625, |
|
"logps/rejected": -761.1871948242188, |
|
"loss": 0.2486, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.31463655829429626, |
|
"rewards/margins": 0.16312028467655182, |
|
"rewards/rejected": -0.4777568280696869, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1154891869403436e-06, |
|
"logits/chosen": 0.056468479335308075, |
|
"logits/rejected": 0.21281781792640686, |
|
"logps/chosen": -676.2210693359375, |
|
"logps/rejected": -826.8810424804688, |
|
"loss": 0.202, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.31608521938323975, |
|
"rewards/margins": 0.22052684426307678, |
|
"rewards/rejected": -0.5366120934486389, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0498660219970395e-06, |
|
"logits/chosen": 0.14304831624031067, |
|
"logits/rejected": 0.2865908741950989, |
|
"logps/chosen": -720.0322875976562, |
|
"logps/rejected": -834.4865112304688, |
|
"loss": 0.2143, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.3310641646385193, |
|
"rewards/margins": 0.21627302467823029, |
|
"rewards/rejected": -0.5473372340202332, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9845619020032552e-06, |
|
"logits/chosen": 0.0913710817694664, |
|
"logits/rejected": 0.2621229588985443, |
|
"logps/chosen": -681.4385986328125, |
|
"logps/rejected": -793.8607788085938, |
|
"loss": 0.2232, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.33381250500679016, |
|
"rewards/margins": 0.18326430022716522, |
|
"rewards/rejected": -0.5170767903327942, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9196231130664282e-06, |
|
"logits/chosen": 0.12347328662872314, |
|
"logits/rejected": 0.22142863273620605, |
|
"logps/chosen": -656.6223754882812, |
|
"logps/rejected": -849.30908203125, |
|
"loss": 0.2207, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.32625263929367065, |
|
"rewards/margins": 0.2373245507478714, |
|
"rewards/rejected": -0.5635771751403809, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8550956823554708e-06, |
|
"logits/chosen": 0.12958547472953796, |
|
"logits/rejected": 0.20247094333171844, |
|
"logps/chosen": -676.3146362304688, |
|
"logps/rejected": -852.1516723632812, |
|
"loss": 0.2231, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.3258567452430725, |
|
"rewards/margins": 0.22926822304725647, |
|
"rewards/rejected": -0.5551249384880066, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7910253454777346e-06, |
|
"logits/chosen": 0.1100487932562828, |
|
"logits/rejected": 0.178089901804924, |
|
"logps/chosen": -628.2322387695312, |
|
"logps/rejected": -760.2506103515625, |
|
"loss": 0.2364, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3000151813030243, |
|
"rewards/margins": 0.20657257735729218, |
|
"rewards/rejected": -0.5065878033638, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": 0.06159307807683945, |
|
"logits/rejected": 0.21589604020118713, |
|
"logps/chosen": -658.613525390625, |
|
"logps/rejected": -839.2703247070312, |
|
"loss": 0.2066, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.3004804253578186, |
|
"rewards/margins": 0.23831859230995178, |
|
"rewards/rejected": -0.5387989282608032, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6644372435748823e-06, |
|
"logits/chosen": 0.13025906682014465, |
|
"logits/rejected": 0.25792089104652405, |
|
"logps/chosen": -657.7048950195312, |
|
"logps/rejected": -741.805908203125, |
|
"loss": 0.2056, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3026004731655121, |
|
"rewards/margins": 0.18632087111473083, |
|
"rewards/rejected": -0.4889214038848877, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6020092013802002e-06, |
|
"logits/chosen": 0.13131192326545715, |
|
"logits/rejected": 0.2267313450574875, |
|
"logps/chosen": -609.1980590820312, |
|
"logps/rejected": -762.2578735351562, |
|
"loss": 0.2173, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.2911318838596344, |
|
"rewards/margins": 0.19050107896327972, |
|
"rewards/rejected": -0.48163294792175293, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5402176350860653e-06, |
|
"logits/chosen": 0.1238790899515152, |
|
"logits/rejected": 0.215298131108284, |
|
"logps/chosen": -654.3424072265625, |
|
"logps/rejected": -766.5152587890625, |
|
"loss": 0.2276, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.311137855052948, |
|
"rewards/margins": 0.18526899814605713, |
|
"rewards/rejected": -0.4964068531990051, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4791063411799938e-06, |
|
"logits/chosen": 0.07573570311069489, |
|
"logits/rejected": 0.16694195568561554, |
|
"logps/chosen": -611.1826782226562, |
|
"logps/rejected": -747.8428955078125, |
|
"loss": 0.225, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.28111016750335693, |
|
"rewards/margins": 0.19318345189094543, |
|
"rewards/rejected": -0.47429361939430237, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4187186339875697e-06, |
|
"logits/chosen": 0.14150698482990265, |
|
"logits/rejected": 0.19218984246253967, |
|
"logps/chosen": -653.740478515625, |
|
"logps/rejected": -790.3258666992188, |
|
"loss": 0.1959, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.2978840172290802, |
|
"rewards/margins": 0.19249173998832703, |
|
"rewards/rejected": -0.4903757572174072, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3590973149722103e-06, |
|
"logits/chosen": 0.07968850433826447, |
|
"logits/rejected": 0.24394066631793976, |
|
"logps/chosen": -660.4170532226562, |
|
"logps/rejected": -787.5786743164062, |
|
"loss": 0.2189, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.3150777518749237, |
|
"rewards/margins": 0.19490757584571838, |
|
"rewards/rejected": -0.5099853277206421, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.300284642398445e-06, |
|
"logits/chosen": 0.1339327096939087, |
|
"logits/rejected": 0.19529958069324493, |
|
"logps/chosen": -637.8862915039062, |
|
"logps/rejected": -804.20263671875, |
|
"loss": 0.2179, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.30193617939949036, |
|
"rewards/margins": 0.2071986198425293, |
|
"rewards/rejected": -0.5091347694396973, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2423223013801946e-06, |
|
"logits/chosen": 0.09140697866678238, |
|
"logits/rejected": 0.23906917870044708, |
|
"logps/chosen": -615.2650146484375, |
|
"logps/rejected": -794.8267211914062, |
|
"loss": 0.2345, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.290291965007782, |
|
"rewards/margins": 0.21678297221660614, |
|
"rewards/rejected": -0.5070749521255493, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1852513743352886e-06, |
|
"logits/chosen": 0.09063401818275452, |
|
"logits/rejected": 0.18860659003257751, |
|
"logps/chosen": -603.1500854492188, |
|
"logps/rejected": -759.2166748046875, |
|
"loss": 0.1973, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.26861757040023804, |
|
"rewards/margins": 0.2037418633699417, |
|
"rewards/rejected": -0.47235947847366333, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1291123118671665e-06, |
|
"logits/chosen": 0.082930788397789, |
|
"logits/rejected": 0.177327498793602, |
|
"logps/chosen": -685.0097045898438, |
|
"logps/rejected": -753.899658203125, |
|
"loss": 0.2698, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.31101205945014954, |
|
"rewards/margins": 0.16297343373298645, |
|
"rewards/rejected": -0.473985493183136, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.073944904094385e-06, |
|
"logits/chosen": 0.12176795303821564, |
|
"logits/rejected": 0.24055452644824982, |
|
"logps/chosen": -727.9547729492188, |
|
"logps/rejected": -807.3270874023438, |
|
"loss": 0.2284, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.3418019115924835, |
|
"rewards/margins": 0.1853446215391159, |
|
"rewards/rejected": -0.5271465182304382, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.019788252448267e-06, |
|
"logits/chosen": 0.1278046816587448, |
|
"logits/rejected": 0.18657834827899933, |
|
"logps/chosen": -629.8650512695312, |
|
"logps/rejected": -775.0892333984375, |
|
"loss": 0.2073, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.31090590357780457, |
|
"rewards/margins": 0.19522252678871155, |
|
"rewards/rejected": -0.5061284899711609, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.66680741958685e-07, |
|
"logits/chosen": 0.06174594908952713, |
|
"logits/rejected": 0.21714496612548828, |
|
"logps/chosen": -706.654052734375, |
|
"logps/rejected": -845.1712646484375, |
|
"loss": 0.2177, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.33396342396736145, |
|
"rewards/margins": 0.21265073120594025, |
|
"rewards/rejected": -0.5466141700744629, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.146600140475945e-07, |
|
"logits/chosen": 0.09592024236917496, |
|
"logits/rejected": 0.1995583027601242, |
|
"logps/chosen": -628.862548828125, |
|
"logps/rejected": -779.5205688476562, |
|
"loss": 0.2233, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.3146194815635681, |
|
"rewards/margins": 0.17806106805801392, |
|
"rewards/rejected": -0.49268054962158203, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.637629398496378e-07, |
|
"logits/chosen": 0.06787069141864777, |
|
"logits/rejected": 0.19926394522190094, |
|
"logps/chosen": -675.4822387695312, |
|
"logps/rejected": -792.4046630859375, |
|
"loss": 0.2406, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.31674814224243164, |
|
"rewards/margins": 0.19142326712608337, |
|
"rewards/rejected": -0.5081714391708374, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.140255940787059e-07, |
|
"logits/chosen": 0.14530155062675476, |
|
"logits/rejected": 0.2198048084974289, |
|
"logps/chosen": -646.3609008789062, |
|
"logps/rejected": -774.7366333007812, |
|
"loss": 0.2234, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.30126315355300903, |
|
"rewards/margins": 0.1865108758211136, |
|
"rewards/rejected": -0.4877740442752838, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.654832294589776e-07, |
|
"logits/chosen": 0.14776812493801117, |
|
"logits/rejected": 0.18824756145477295, |
|
"logps/chosen": -676.3685302734375, |
|
"logps/rejected": -820.2717895507812, |
|
"loss": 0.242, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.3261755406856537, |
|
"rewards/margins": 0.19091561436653137, |
|
"rewards/rejected": -0.5170911550521851, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.181702517385789e-07, |
|
"logits/chosen": 0.15526942908763885, |
|
"logits/rejected": 0.20596864819526672, |
|
"logps/chosen": -720.7452392578125, |
|
"logps/rejected": -820.1512451171875, |
|
"loss": 0.2423, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.33318132162094116, |
|
"rewards/margins": 0.173573836684227, |
|
"rewards/rejected": -0.5067551732063293, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.721201953035511e-07, |
|
"logits/chosen": 0.09918368607759476, |
|
"logits/rejected": 0.23762516677379608, |
|
"logps/chosen": -690.7906494140625, |
|
"logps/rejected": -799.6129150390625, |
|
"loss": 0.222, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.31717661023139954, |
|
"rewards/margins": 0.19311536848545074, |
|
"rewards/rejected": -0.5102919340133667, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.273656994094232e-07, |
|
"logits/chosen": 0.08452818542718887, |
|
"logits/rejected": 0.15428626537322998, |
|
"logps/chosen": -674.3331298828125, |
|
"logps/rejected": -852.9098510742188, |
|
"loss": 0.212, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.3417903482913971, |
|
"rewards/margins": 0.20718708634376526, |
|
"rewards/rejected": -0.5489774942398071, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.839384850472359e-07, |
|
"logits/chosen": 0.11137109994888306, |
|
"logits/rejected": 0.23986658453941345, |
|
"logps/chosen": -681.7805786132812, |
|
"logps/rejected": -840.8401489257812, |
|
"loss": 0.2013, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.33553558588027954, |
|
"rewards/margins": 0.22832973301410675, |
|
"rewards/rejected": -0.5638653039932251, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.418693324604082e-07, |
|
"logits/chosen": 0.054320525377988815, |
|
"logits/rejected": 0.2136838734149933, |
|
"logps/chosen": -713.266357421875, |
|
"logps/rejected": -872.5158081054688, |
|
"loss": 0.1902, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3621819019317627, |
|
"rewards/margins": 0.21344491839408875, |
|
"rewards/rejected": -0.5756268501281738, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.01188059328386e-07, |
|
"logits/chosen": 0.10655899345874786, |
|
"logits/rejected": 0.20958073437213898, |
|
"logps/chosen": -671.4863891601562, |
|
"logps/rejected": -796.0863647460938, |
|
"loss": 0.1935, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.31292790174484253, |
|
"rewards/margins": 0.21162652969360352, |
|
"rewards/rejected": -0.5245543718338013, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.619234996325314e-07, |
|
"logits/chosen": 0.1319715678691864, |
|
"logits/rejected": 0.21460673213005066, |
|
"logps/chosen": -709.2652587890625, |
|
"logps/rejected": -860.7151489257812, |
|
"loss": 0.2324, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3620796799659729, |
|
"rewards/margins": 0.1911153942346573, |
|
"rewards/rejected": -0.5531951189041138, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.241034832192434e-07, |
|
"logits/chosen": 0.10131983458995819, |
|
"logits/rejected": 0.1885637789964676, |
|
"logps/chosen": -677.9981079101562, |
|
"logps/rejected": -864.3063354492188, |
|
"loss": 0.2159, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.3420425057411194, |
|
"rewards/margins": 0.23228974640369415, |
|
"rewards/rejected": -0.5743322372436523, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.877548160747768e-07, |
|
"logits/chosen": 0.11466535180807114, |
|
"logits/rejected": 0.24565303325653076, |
|
"logps/chosen": -680.638671875, |
|
"logps/rejected": -798.11376953125, |
|
"loss": 0.1963, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.30832600593566895, |
|
"rewards/margins": 0.23151281476020813, |
|
"rewards/rejected": -0.5398387908935547, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.529032613257574e-07, |
|
"logits/chosen": 0.10837472975254059, |
|
"logits/rejected": 0.17577466368675232, |
|
"logps/chosen": -682.72216796875, |
|
"logps/rejected": -828.9464111328125, |
|
"loss": 0.2376, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3374750316143036, |
|
"rewards/margins": 0.20901791751384735, |
|
"rewards/rejected": -0.5464929342269897, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.195735209788528e-07, |
|
"logits/chosen": 0.1015692800283432, |
|
"logits/rejected": 0.1785646677017212, |
|
"logps/chosen": -639.4000244140625, |
|
"logps/rejected": -781.9521484375, |
|
"loss": 0.2455, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.33476072549819946, |
|
"rewards/margins": 0.18203067779541016, |
|
"rewards/rejected": -0.5167914032936096, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.8778921841253774e-07, |
|
"logits/chosen": 0.06415721774101257, |
|
"logits/rejected": 0.2145998477935791, |
|
"logps/chosen": -690.4867553710938, |
|
"logps/rejected": -865.8243408203125, |
|
"loss": 0.1726, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.3243727684020996, |
|
"rewards/margins": 0.251103937625885, |
|
"rewards/rejected": -0.5754767656326294, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.5757288163336806e-07, |
|
"logits/chosen": 0.08277393132448196, |
|
"logits/rejected": 0.20556513965129852, |
|
"logps/chosen": -701.6412353515625, |
|
"logps/rejected": -884.2093505859375, |
|
"loss": 0.2016, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.34318509697914124, |
|
"rewards/margins": 0.23703384399414062, |
|
"rewards/rejected": -0.5802189707756042, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.2894592730863336e-07, |
|
"logits/chosen": 0.08898656070232391, |
|
"logits/rejected": 0.19494621455669403, |
|
"logps/chosen": -663.3425903320312, |
|
"logps/rejected": -838.2100830078125, |
|
"loss": 0.2081, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.3233157992362976, |
|
"rewards/margins": 0.22612051665782928, |
|
"rewards/rejected": -0.5494363903999329, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.019286455866981e-07, |
|
"logits/chosen": 0.09930244833230972, |
|
"logits/rejected": 0.25030988454818726, |
|
"logps/chosen": -665.492919921875, |
|
"logps/rejected": -791.5203857421875, |
|
"loss": 0.2154, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.3297829031944275, |
|
"rewards/margins": 0.19049373269081116, |
|
"rewards/rejected": -0.520276665687561, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.7654018571579557e-07, |
|
"logits/chosen": 0.07714973390102386, |
|
"logits/rejected": 0.1515274941921234, |
|
"logps/chosen": -710.2567749023438, |
|
"logps/rejected": -828.8133544921875, |
|
"loss": 0.2217, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.3351573944091797, |
|
"rewards/margins": 0.20143434405326843, |
|
"rewards/rejected": -0.5365917086601257, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5279854247146703e-07, |
|
"logits/chosen": 0.08655952662229538, |
|
"logits/rejected": 0.18316319584846497, |
|
"logps/chosen": -664.911376953125, |
|
"logps/rejected": -804.0281982421875, |
|
"loss": 0.2363, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.3494827151298523, |
|
"rewards/margins": 0.19858424365520477, |
|
"rewards/rejected": -0.5480669140815735, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.307205434022671e-07, |
|
"logits/chosen": 0.0854581817984581, |
|
"logits/rejected": 0.21030446887016296, |
|
"logps/chosen": -680.3010864257812, |
|
"logps/rejected": -877.068359375, |
|
"loss": 0.1977, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.33833056688308716, |
|
"rewards/margins": 0.23077313601970673, |
|
"rewards/rejected": -0.5691036581993103, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1032183690276754e-07, |
|
"logits/chosen": 0.07660888135433197, |
|
"logits/rejected": 0.1921808272600174, |
|
"logps/chosen": -673.0133056640625, |
|
"logps/rejected": -855.6375732421875, |
|
"loss": 0.2076, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.32487696409225464, |
|
"rewards/margins": 0.2425091564655304, |
|
"rewards/rejected": -0.5673861503601074, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.161688112232836e-08, |
|
"logits/chosen": 0.10119873285293579, |
|
"logits/rejected": 0.22447247803211212, |
|
"logps/chosen": -711.1212158203125, |
|
"logps/rejected": -857.6790771484375, |
|
"loss": 0.2041, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.35159099102020264, |
|
"rewards/margins": 0.22090163826942444, |
|
"rewards/rejected": -0.5724925994873047, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.46189337174788e-08, |
|
"logits/chosen": 0.0487164705991745, |
|
"logits/rejected": 0.1770685911178589, |
|
"logps/chosen": -655.6456298828125, |
|
"logps/rejected": -825.86572265625, |
|
"loss": 0.1868, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.3402259647846222, |
|
"rewards/margins": 0.2170572578907013, |
|
"rewards/rejected": -0.5572832822799683, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.934004245518793e-08, |
|
"logits/chosen": 0.09085109829902649, |
|
"logits/rejected": 0.22920957207679749, |
|
"logps/chosen": -656.9745483398438, |
|
"logps/rejected": -807.7351684570312, |
|
"loss": 0.2086, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.3091570734977722, |
|
"rewards/margins": 0.2053556740283966, |
|
"rewards/rejected": -0.5145127177238464, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.579103667367385e-08, |
|
"logits/chosen": 0.10511846840381622, |
|
"logits/rejected": 0.22097325325012207, |
|
"logps/chosen": -677.5391235351562, |
|
"logps/rejected": -815.5825805664062, |
|
"loss": 0.2188, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3394906222820282, |
|
"rewards/margins": 0.2081030309200287, |
|
"rewards/rejected": -0.5475937128067017, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.398151960681162e-08, |
|
"logits/chosen": 0.04612383991479874, |
|
"logits/rejected": 0.16620075702667236, |
|
"logps/chosen": -679.2454833984375, |
|
"logps/rejected": -800.4119262695312, |
|
"loss": 0.2539, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.35038450360298157, |
|
"rewards/margins": 0.1713865101337433, |
|
"rewards/rejected": -0.5217710733413696, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3919861577572924e-08, |
|
"logits/chosen": 0.11022261530160904, |
|
"logits/rejected": 0.166605606675148, |
|
"logps/chosen": -690.1370239257812, |
|
"logps/rejected": -797.578369140625, |
|
"loss": 0.2181, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.32749998569488525, |
|
"rewards/margins": 0.20777150988578796, |
|
"rewards/rejected": -0.5352715253829956, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5613194065327854e-08, |
|
"logits/chosen": 0.052184127271175385, |
|
"logits/rejected": 0.22973528504371643, |
|
"logps/chosen": -629.1040649414062, |
|
"logps/rejected": -766.4088134765625, |
|
"loss": 0.1872, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.2912130355834961, |
|
"rewards/margins": 0.22654838860034943, |
|
"rewards/rejected": -0.5177614092826843, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.067404651211808e-09, |
|
"logits/chosen": 0.11986621469259262, |
|
"logits/rejected": 0.21088480949401855, |
|
"logps/chosen": -687.1105346679688, |
|
"logps/rejected": -877.3173828125, |
|
"loss": 0.2028, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3289037048816681, |
|
"rewards/margins": 0.23036351799964905, |
|
"rewards/rejected": -0.5592672228813171, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.287132845137709e-09, |
|
"logits/chosen": 0.08242613077163696, |
|
"logits/rejected": 0.20786967873573303, |
|
"logps/chosen": -682.5579223632812, |
|
"logps/rejected": -803.6607666015625, |
|
"loss": 0.2108, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.32676413655281067, |
|
"rewards/margins": 0.19439134001731873, |
|
"rewards/rejected": -0.5211554765701294, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2757667974155896e-09, |
|
"logits/chosen": 0.03548216074705124, |
|
"logits/rejected": 0.12074669450521469, |
|
"logps/chosen": -665.2953491210938, |
|
"logps/rejected": -846.1324462890625, |
|
"loss": 0.2049, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.3495555818080902, |
|
"rewards/margins": 0.1981724500656128, |
|
"rewards/rejected": -0.5477280020713806, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.544089730633804e-11, |
|
"logits/chosen": 0.10499806702136993, |
|
"logits/rejected": 0.2223556488752365, |
|
"logps/chosen": -656.1048583984375, |
|
"logps/rejected": -808.8086547851562, |
|
"loss": 0.1796, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.3139723837375641, |
|
"rewards/margins": 0.22497034072875977, |
|
"rewards/rejected": -0.5389427542686462, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1312, |
|
"total_flos": 0.0, |
|
"train_loss": 0.24134081795175627, |
|
"train_runtime": 12108.13, |
|
"train_samples_per_second": 1.734, |
|
"train_steps_per_second": 0.108 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1312, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|