|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 3821, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00026171159382360636, |
|
"grad_norm": 3.590468168258667, |
|
"learning_rate": 1.3054830287206268e-08, |
|
"logits/chosen": 0.6792653799057007, |
|
"logits/rejected": 1.31020188331604, |
|
"logps/chosen": -469.49981689453125, |
|
"logps/rejected": -525.3796997070312, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0026171159382360636, |
|
"grad_norm": 3.1591908931732178, |
|
"learning_rate": 1.3054830287206266e-07, |
|
"logits/chosen": 1.5021909475326538, |
|
"logits/rejected": 1.427976131439209, |
|
"logps/chosen": -398.0495300292969, |
|
"logps/rejected": -356.86016845703125, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5138888955116272, |
|
"rewards/chosen": -8.217601134674624e-05, |
|
"rewards/margins": 0.0007430262048728764, |
|
"rewards/rejected": -0.000825202208943665, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.005234231876472127, |
|
"grad_norm": 3.51601243019104, |
|
"learning_rate": 2.610966057441253e-07, |
|
"logits/chosen": 1.3317842483520508, |
|
"logits/rejected": 1.638771414756775, |
|
"logps/chosen": -435.8251953125, |
|
"logps/rejected": -342.2559509277344, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.00043620201176963747, |
|
"rewards/margins": 0.0005908687599003315, |
|
"rewards/rejected": -0.00015466664626728743, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007851347814708191, |
|
"grad_norm": 3.3784544467926025, |
|
"learning_rate": 3.9164490861618804e-07, |
|
"logits/chosen": 1.3975493907928467, |
|
"logits/rejected": 1.4588085412979126, |
|
"logps/chosen": -377.9482727050781, |
|
"logps/rejected": -355.25885009765625, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0005684455973096192, |
|
"rewards/margins": 0.0005999829736538231, |
|
"rewards/rejected": -3.1537445465801284e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 2.979464292526245, |
|
"learning_rate": 5.221932114882506e-07, |
|
"logits/chosen": 1.6848558187484741, |
|
"logits/rejected": 1.9189517498016357, |
|
"logps/chosen": -316.1363525390625, |
|
"logps/rejected": -315.66058349609375, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.00017661902529653162, |
|
"rewards/margins": -0.0004952313611283898, |
|
"rewards/rejected": 0.00031861235038377345, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01308557969118032, |
|
"grad_norm": 3.1099374294281006, |
|
"learning_rate": 6.527415143603135e-07, |
|
"logits/chosen": 1.5289150476455688, |
|
"logits/rejected": 1.5212490558624268, |
|
"logps/chosen": -398.5328674316406, |
|
"logps/rejected": -336.2831115722656, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0001419751497451216, |
|
"rewards/margins": 0.00047931409790180624, |
|
"rewards/rejected": -0.0003373388899490237, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.015702695629416383, |
|
"grad_norm": 2.992774248123169, |
|
"learning_rate": 7.832898172323761e-07, |
|
"logits/chosen": 1.4803454875946045, |
|
"logits/rejected": 1.6450494527816772, |
|
"logps/chosen": -373.1556396484375, |
|
"logps/rejected": -328.5540771484375, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.00015133472334127873, |
|
"rewards/margins": -8.949339098762721e-05, |
|
"rewards/rejected": 0.00024082818708848208, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.018319811567652448, |
|
"grad_norm": 3.166975498199463, |
|
"learning_rate": 9.138381201044387e-07, |
|
"logits/chosen": 1.3975117206573486, |
|
"logits/rejected": 1.632108449935913, |
|
"logps/chosen": -385.858154296875, |
|
"logps/rejected": -334.81219482421875, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.00018815476505551487, |
|
"rewards/margins": -0.0004339146544225514, |
|
"rewards/rejected": 0.0002457600203342736, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 3.23146915435791, |
|
"learning_rate": 1.0443864229765013e-06, |
|
"logits/chosen": 1.7188682556152344, |
|
"logits/rejected": 1.6811161041259766, |
|
"logps/chosen": -383.59771728515625, |
|
"logps/rejected": -346.7442321777344, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.00110594870056957, |
|
"rewards/margins": -0.0007629155879840255, |
|
"rewards/rejected": -0.00034303305437788367, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.023554043444124574, |
|
"grad_norm": 3.223733901977539, |
|
"learning_rate": 1.1749347258485642e-06, |
|
"logits/chosen": 1.5662400722503662, |
|
"logits/rejected": 1.790412187576294, |
|
"logps/chosen": -364.4346923828125, |
|
"logps/rejected": -327.1966552734375, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.0004470300336834043, |
|
"rewards/margins": 0.0006952629191800952, |
|
"rewards/rejected": -0.00114229298196733, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02617115938236064, |
|
"grad_norm": 2.721588373184204, |
|
"learning_rate": 1.305483028720627e-06, |
|
"logits/chosen": 1.4209253787994385, |
|
"logits/rejected": 1.6146681308746338, |
|
"logps/chosen": -368.99169921875, |
|
"logps/rejected": -334.1181640625, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0008529865299351513, |
|
"rewards/margins": -8.540081762475893e-05, |
|
"rewards/rejected": -0.0007675857050344348, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02617115938236064, |
|
"eval_logits/chosen": 1.2742817401885986, |
|
"eval_logits/rejected": 1.4772121906280518, |
|
"eval_logps/chosen": -388.4209289550781, |
|
"eval_logps/rejected": -344.7744140625, |
|
"eval_loss": 0.6929848194122314, |
|
"eval_rewards/accuracies": 0.5210000276565552, |
|
"eval_rewards/chosen": -0.0005557815893553197, |
|
"eval_rewards/margins": 0.00034635106567293406, |
|
"eval_rewards/rejected": -0.0009021326550282538, |
|
"eval_runtime": 233.4714, |
|
"eval_samples_per_second": 8.566, |
|
"eval_steps_per_second": 1.071, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.028788275320596704, |
|
"grad_norm": 3.0894269943237305, |
|
"learning_rate": 1.4360313315926894e-06, |
|
"logits/chosen": 1.4099972248077393, |
|
"logits/rejected": 1.5973542928695679, |
|
"logps/chosen": -405.90130615234375, |
|
"logps/rejected": -338.3550720214844, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.0014196943957358599, |
|
"rewards/margins": 0.00048225713544525206, |
|
"rewards/rejected": -0.0019019513856619596, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 3.1108322143554688, |
|
"learning_rate": 1.5665796344647521e-06, |
|
"logits/chosen": 1.4038909673690796, |
|
"logits/rejected": 1.6449644565582275, |
|
"logps/chosen": -425.57330322265625, |
|
"logps/rejected": -380.5539855957031, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.0003092998522333801, |
|
"rewards/margins": 0.0012004419695585966, |
|
"rewards/rejected": -0.0015097421128302813, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03402250719706883, |
|
"grad_norm": 3.5928196907043457, |
|
"learning_rate": 1.6971279373368146e-06, |
|
"logits/chosen": 1.4526954889297485, |
|
"logits/rejected": 1.6924293041229248, |
|
"logps/chosen": -368.2237548828125, |
|
"logps/rejected": -353.4678955078125, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0008503898861818016, |
|
"rewards/margins": 0.0015992727130651474, |
|
"rewards/rejected": -0.00244966265745461, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.036639623135304895, |
|
"grad_norm": 3.48256778717041, |
|
"learning_rate": 1.8276762402088774e-06, |
|
"logits/chosen": 1.583683729171753, |
|
"logits/rejected": 1.6599153280258179, |
|
"logps/chosen": -401.0623474121094, |
|
"logps/rejected": -320.5968017578125, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0012277833884581923, |
|
"rewards/margins": 0.0012606108793988824, |
|
"rewards/rejected": -0.0024883942678570747, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03925673907354096, |
|
"grad_norm": 3.6008684635162354, |
|
"learning_rate": 1.9582245430809403e-06, |
|
"logits/chosen": 1.608473539352417, |
|
"logits/rejected": 1.6299419403076172, |
|
"logps/chosen": -419.42083740234375, |
|
"logps/rejected": -340.1468811035156, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.0011100767878815532, |
|
"rewards/margins": 0.0020758803002536297, |
|
"rewards/rejected": -0.003185956971719861, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 3.3119959831237793, |
|
"learning_rate": 2.0887728459530026e-06, |
|
"logits/chosen": 1.191816806793213, |
|
"logits/rejected": 1.4365403652191162, |
|
"logps/chosen": -375.5677185058594, |
|
"logps/rejected": -358.6431884765625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.0033069555647671223, |
|
"rewards/margins": 0.0001626126904739067, |
|
"rewards/rejected": -0.0034695682115852833, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04449097095001309, |
|
"grad_norm": 3.996933937072754, |
|
"learning_rate": 2.2193211488250653e-06, |
|
"logits/chosen": 1.554457426071167, |
|
"logits/rejected": 1.7651093006134033, |
|
"logps/chosen": -324.51995849609375, |
|
"logps/rejected": -305.23175048828125, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0038299753796309233, |
|
"rewards/margins": 0.0007525371038354933, |
|
"rewards/rejected": -0.004582512192428112, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04710808688824915, |
|
"grad_norm": 2.7113592624664307, |
|
"learning_rate": 2.3498694516971284e-06, |
|
"logits/chosen": 1.3284379243850708, |
|
"logits/rejected": 1.6581776142120361, |
|
"logps/chosen": -373.4314880371094, |
|
"logps/rejected": -329.12628173828125, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0027942766901105642, |
|
"rewards/margins": 0.0031869211234152317, |
|
"rewards/rejected": -0.005981197580695152, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04972520282648522, |
|
"grad_norm": 3.2230942249298096, |
|
"learning_rate": 2.4804177545691907e-06, |
|
"logits/chosen": 1.333396315574646, |
|
"logits/rejected": 1.4282341003417969, |
|
"logps/chosen": -385.22369384765625, |
|
"logps/rejected": -338.3575744628906, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0038786418735980988, |
|
"rewards/margins": 0.0016467362875118852, |
|
"rewards/rejected": -0.005525378044694662, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 2.9138338565826416, |
|
"learning_rate": 2.610966057441254e-06, |
|
"logits/chosen": 1.4856141805648804, |
|
"logits/rejected": 1.6409976482391357, |
|
"logps/chosen": -361.8666687011719, |
|
"logps/rejected": -304.0251159667969, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.005739121697843075, |
|
"rewards/margins": 0.00320308655500412, |
|
"rewards/rejected": -0.00894220918416977, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"eval_logits/chosen": 1.275007724761963, |
|
"eval_logits/rejected": 1.4769618511199951, |
|
"eval_logps/chosen": -388.9444274902344, |
|
"eval_logps/rejected": -345.5613098144531, |
|
"eval_loss": 0.6916878819465637, |
|
"eval_rewards/accuracies": 0.5799999833106995, |
|
"eval_rewards/chosen": -0.005790840368717909, |
|
"eval_rewards/margins": 0.0029809444677084684, |
|
"eval_rewards/rejected": -0.008771784603595734, |
|
"eval_runtime": 233.1655, |
|
"eval_samples_per_second": 8.578, |
|
"eval_steps_per_second": 1.072, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05495943470295734, |
|
"grad_norm": 3.762040615081787, |
|
"learning_rate": 2.741514360313316e-06, |
|
"logits/chosen": 1.5017220973968506, |
|
"logits/rejected": 1.6038427352905273, |
|
"logps/chosen": -397.37506103515625, |
|
"logps/rejected": -332.4270324707031, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.006693325936794281, |
|
"rewards/margins": 0.004596198443323374, |
|
"rewards/rejected": -0.011289524845778942, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05757655064119341, |
|
"grad_norm": 3.389441728591919, |
|
"learning_rate": 2.872062663185379e-06, |
|
"logits/chosen": 1.4440081119537354, |
|
"logits/rejected": 1.5644251108169556, |
|
"logps/chosen": -370.3480224609375, |
|
"logps/rejected": -320.7288818359375, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.008492258377373219, |
|
"rewards/margins": 0.005523340776562691, |
|
"rewards/rejected": -0.014015598222613335, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06019366657942947, |
|
"grad_norm": 3.2270307540893555, |
|
"learning_rate": 3.0026109660574416e-06, |
|
"logits/chosen": 1.2713916301727295, |
|
"logits/rejected": 1.3412346839904785, |
|
"logps/chosen": -443.724853515625, |
|
"logps/rejected": -387.1663513183594, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.011385348625481129, |
|
"rewards/margins": 0.00459087360650301, |
|
"rewards/rejected": -0.01597622036933899, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 3.316723346710205, |
|
"learning_rate": 3.1331592689295043e-06, |
|
"logits/chosen": 1.3093677759170532, |
|
"logits/rejected": 1.5563104152679443, |
|
"logps/chosen": -427.25787353515625, |
|
"logps/rejected": -385.2057189941406, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.016488298773765564, |
|
"rewards/margins": 0.005019473843276501, |
|
"rewards/rejected": -0.02150776982307434, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06542789845590159, |
|
"grad_norm": 3.3876242637634277, |
|
"learning_rate": 3.263707571801567e-06, |
|
"logits/chosen": 1.4990646839141846, |
|
"logits/rejected": 1.7924457788467407, |
|
"logps/chosen": -395.66754150390625, |
|
"logps/rejected": -346.0106506347656, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.021374255418777466, |
|
"rewards/margins": 0.006892757024616003, |
|
"rewards/rejected": -0.028267016634345055, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06804501439413765, |
|
"grad_norm": 3.321073055267334, |
|
"learning_rate": 3.3942558746736293e-06, |
|
"logits/chosen": 1.3270446062088013, |
|
"logits/rejected": 1.5052978992462158, |
|
"logps/chosen": -402.4129638671875, |
|
"logps/rejected": -366.2283630371094, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.028442109003663063, |
|
"rewards/margins": 0.010941008105874062, |
|
"rewards/rejected": -0.039383117109537125, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07066213033237373, |
|
"grad_norm": 2.7087340354919434, |
|
"learning_rate": 3.524804177545692e-06, |
|
"logits/chosen": 1.3542237281799316, |
|
"logits/rejected": 1.5894794464111328, |
|
"logps/chosen": -386.6691589355469, |
|
"logps/rejected": -324.80499267578125, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.035862237215042114, |
|
"rewards/margins": 0.014425190165638924, |
|
"rewards/rejected": -0.05028742551803589, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 3.6236772537231445, |
|
"learning_rate": 3.6553524804177547e-06, |
|
"logits/chosen": 1.2839621305465698, |
|
"logits/rejected": 1.604859709739685, |
|
"logps/chosen": -376.8522033691406, |
|
"logps/rejected": -334.2494812011719, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.046802129596471786, |
|
"rewards/margins": 0.012729302048683167, |
|
"rewards/rejected": -0.05953143909573555, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07589636220884585, |
|
"grad_norm": 4.159788131713867, |
|
"learning_rate": 3.7859007832898174e-06, |
|
"logits/chosen": 1.5032262802124023, |
|
"logits/rejected": 1.5165659189224243, |
|
"logps/chosen": -416.37554931640625, |
|
"logps/rejected": -369.2384338378906, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.05019887164235115, |
|
"rewards/margins": 0.017000939697027206, |
|
"rewards/rejected": -0.06719981133937836, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07851347814708191, |
|
"grad_norm": 3.8966574668884277, |
|
"learning_rate": 3.9164490861618806e-06, |
|
"logits/chosen": 1.5554125308990479, |
|
"logits/rejected": 1.7502315044403076, |
|
"logps/chosen": -363.2331237792969, |
|
"logps/rejected": -331.22332763671875, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.056589674204587936, |
|
"rewards/margins": 0.014802152290940285, |
|
"rewards/rejected": -0.07139183580875397, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07851347814708191, |
|
"eval_logits/chosen": 1.225297212600708, |
|
"eval_logits/rejected": 1.4230777025222778, |
|
"eval_logps/chosen": -394.262451171875, |
|
"eval_logps/rejected": -352.1134338378906, |
|
"eval_loss": 0.6859813332557678, |
|
"eval_rewards/accuracies": 0.5989999771118164, |
|
"eval_rewards/chosen": -0.05897095054388046, |
|
"eval_rewards/margins": 0.015322154387831688, |
|
"eval_rewards/rejected": -0.0742930993437767, |
|
"eval_runtime": 233.2642, |
|
"eval_samples_per_second": 8.574, |
|
"eval_steps_per_second": 1.072, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08113059408531798, |
|
"grad_norm": 4.554100513458252, |
|
"learning_rate": 4.046997389033943e-06, |
|
"logits/chosen": 1.3358051776885986, |
|
"logits/rejected": 1.3814319372177124, |
|
"logps/chosen": -428.4581604003906, |
|
"logps/rejected": -350.5341491699219, |
|
"loss": 0.6805, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.053123779594898224, |
|
"rewards/margins": 0.026760786771774292, |
|
"rewards/rejected": -0.07988456636667252, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 3.4826040267944336, |
|
"learning_rate": 4.177545691906005e-06, |
|
"logits/chosen": 1.4008252620697021, |
|
"logits/rejected": 1.653390884399414, |
|
"logps/chosen": -378.8886413574219, |
|
"logps/rejected": -348.64361572265625, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.06168674677610397, |
|
"rewards/margins": 0.017807736992836, |
|
"rewards/rejected": -0.07949449121952057, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08636482596179011, |
|
"grad_norm": 3.816162586212158, |
|
"learning_rate": 4.308093994778068e-06, |
|
"logits/chosen": 1.3938804864883423, |
|
"logits/rejected": 1.5117136240005493, |
|
"logps/chosen": -373.7430114746094, |
|
"logps/rejected": -343.16607666015625, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.07209007441997528, |
|
"rewards/margins": 0.020073365420103073, |
|
"rewards/rejected": -0.09216342866420746, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08898194190002617, |
|
"grad_norm": 4.358788013458252, |
|
"learning_rate": 4.4386422976501306e-06, |
|
"logits/chosen": 1.266187071800232, |
|
"logits/rejected": 1.4517450332641602, |
|
"logps/chosen": -425.5559997558594, |
|
"logps/rejected": -387.93243408203125, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.06175302714109421, |
|
"rewards/margins": 0.02637804113328457, |
|
"rewards/rejected": -0.08813107013702393, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09159905783826224, |
|
"grad_norm": 3.675837755203247, |
|
"learning_rate": 4.569190600522193e-06, |
|
"logits/chosen": 1.0587990283966064, |
|
"logits/rejected": 1.394778847694397, |
|
"logps/chosen": -435.980224609375, |
|
"logps/rejected": -399.3922424316406, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.07773401588201523, |
|
"rewards/margins": 0.02536655031144619, |
|
"rewards/rejected": -0.10310056060552597, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 2.5249226093292236, |
|
"learning_rate": 4.699738903394257e-06, |
|
"logits/chosen": 1.3578028678894043, |
|
"logits/rejected": 1.5769567489624023, |
|
"logps/chosen": -368.1285705566406, |
|
"logps/rejected": -327.1725769042969, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1062256470322609, |
|
"rewards/margins": 0.025300273671746254, |
|
"rewards/rejected": -0.131525918841362, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.09683328971473436, |
|
"grad_norm": 4.508261680603027, |
|
"learning_rate": 4.8302872062663196e-06, |
|
"logits/chosen": 1.4787975549697876, |
|
"logits/rejected": 1.530562400817871, |
|
"logps/chosen": -423.5401306152344, |
|
"logps/rejected": -350.29522705078125, |
|
"loss": 0.6712, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.11056496202945709, |
|
"rewards/margins": 0.047700513154268265, |
|
"rewards/rejected": -0.15826547145843506, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09945040565297043, |
|
"grad_norm": 4.113176345825195, |
|
"learning_rate": 4.9608355091383814e-06, |
|
"logits/chosen": 1.3677705526351929, |
|
"logits/rejected": 1.590041995048523, |
|
"logps/chosen": -425.14569091796875, |
|
"logps/rejected": -369.8869934082031, |
|
"loss": 0.6761, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.12270758301019669, |
|
"rewards/margins": 0.03834443539381027, |
|
"rewards/rejected": -0.16105201840400696, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1020675215912065, |
|
"grad_norm": 4.8123579025268555, |
|
"learning_rate": 4.9999488562447675e-06, |
|
"logits/chosen": 1.3154010772705078, |
|
"logits/rejected": 1.4253056049346924, |
|
"logps/chosen": -410.3934631347656, |
|
"logps/rejected": -374.3392639160156, |
|
"loss": 0.6646, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.11665485054254532, |
|
"rewards/margins": 0.062035609036684036, |
|
"rewards/rejected": -0.17869044840335846, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 4.263484001159668, |
|
"learning_rate": 4.999698361256577e-06, |
|
"logits/chosen": 1.333717703819275, |
|
"logits/rejected": 1.4957849979400635, |
|
"logps/chosen": -405.444580078125, |
|
"logps/rejected": -337.0886535644531, |
|
"loss": 0.6757, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.1346513330936432, |
|
"rewards/margins": 0.039198193699121475, |
|
"rewards/rejected": -0.17384955286979675, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"eval_logits/chosen": 1.2136365175247192, |
|
"eval_logits/rejected": 1.3996493816375732, |
|
"eval_logps/chosen": -403.8213195800781, |
|
"eval_logps/rejected": -363.8988037109375, |
|
"eval_loss": 0.6773815751075745, |
|
"eval_rewards/accuracies": 0.6025000214576721, |
|
"eval_rewards/chosen": -0.15455959737300873, |
|
"eval_rewards/margins": 0.03758702799677849, |
|
"eval_rewards/rejected": -0.19214662909507751, |
|
"eval_runtime": 232.5337, |
|
"eval_samples_per_second": 8.601, |
|
"eval_steps_per_second": 1.075, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.10730175346767862, |
|
"grad_norm": 4.715285301208496, |
|
"learning_rate": 4.999239142174581e-06, |
|
"logits/chosen": 1.370822548866272, |
|
"logits/rejected": 1.4222373962402344, |
|
"logps/chosen": -386.025146484375, |
|
"logps/rejected": -370.2654724121094, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.15298400819301605, |
|
"rewards/margins": 0.026256907731294632, |
|
"rewards/rejected": -0.17924091219902039, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10991886940591468, |
|
"grad_norm": 4.306619167327881, |
|
"learning_rate": 4.99857123734344e-06, |
|
"logits/chosen": 1.382204294204712, |
|
"logits/rejected": 1.4024231433868408, |
|
"logps/chosen": -378.82562255859375, |
|
"logps/rejected": -337.8069152832031, |
|
"loss": 0.6765, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.15577563643455505, |
|
"rewards/margins": 0.039799682796001434, |
|
"rewards/rejected": -0.1955752968788147, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11253598534415074, |
|
"grad_norm": 4.360618591308594, |
|
"learning_rate": 4.997694702533016e-06, |
|
"logits/chosen": 1.2584686279296875, |
|
"logits/rejected": 1.6165319681167603, |
|
"logps/chosen": -416.8006286621094, |
|
"logps/rejected": -379.16168212890625, |
|
"loss": 0.6744, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1570083498954773, |
|
"rewards/margins": 0.043786775320768356, |
|
"rewards/rejected": -0.20079509913921356, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 3.9530985355377197, |
|
"learning_rate": 4.996609610933713e-06, |
|
"logits/chosen": 1.2687292098999023, |
|
"logits/rejected": 1.2826169729232788, |
|
"logps/chosen": -423.39794921875, |
|
"logps/rejected": -383.66290283203125, |
|
"loss": 0.6748, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.14532431960105896, |
|
"rewards/margins": 0.042948439717292786, |
|
"rewards/rejected": -0.18827277421951294, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.11777021722062288, |
|
"grad_norm": 4.231596946716309, |
|
"learning_rate": 4.995316053150366e-06, |
|
"logits/chosen": 1.1246349811553955, |
|
"logits/rejected": 1.26097571849823, |
|
"logps/chosen": -403.4371032714844, |
|
"logps/rejected": -370.98431396484375, |
|
"loss": 0.6652, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.12421522289514542, |
|
"rewards/margins": 0.0634991005063057, |
|
"rewards/rejected": -0.18771430850028992, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12038733315885894, |
|
"grad_norm": 4.21218729019165, |
|
"learning_rate": 4.9938141371946815e-06, |
|
"logits/chosen": 1.165198564529419, |
|
"logits/rejected": 1.3747196197509766, |
|
"logps/chosen": -396.4094543457031, |
|
"logps/rejected": -366.3015441894531, |
|
"loss": 0.6628, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.1397528052330017, |
|
"rewards/margins": 0.06794790923595428, |
|
"rewards/rejected": -0.2077006995677948, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.123004449097095, |
|
"grad_norm": 5.95582389831543, |
|
"learning_rate": 4.992103988476206e-06, |
|
"logits/chosen": 1.2146246433258057, |
|
"logits/rejected": 1.2889845371246338, |
|
"logps/chosen": -386.59368896484375, |
|
"logps/rejected": -354.281005859375, |
|
"loss": 0.6667, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.13470637798309326, |
|
"rewards/margins": 0.06365668773651123, |
|
"rewards/rejected": -0.1983630657196045, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 4.180170059204102, |
|
"learning_rate": 4.990185749791866e-06, |
|
"logits/chosen": 1.0521671772003174, |
|
"logits/rejected": 1.2878140211105347, |
|
"logps/chosen": -396.9014587402344, |
|
"logps/rejected": -366.1920166015625, |
|
"loss": 0.6647, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.1293117105960846, |
|
"rewards/margins": 0.06927163153886795, |
|
"rewards/rejected": -0.19858333468437195, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.12823868097356714, |
|
"grad_norm": 4.745175361633301, |
|
"learning_rate": 4.9880595813140395e-06, |
|
"logits/chosen": 1.0240387916564941, |
|
"logits/rejected": 1.2297166585922241, |
|
"logps/chosen": -430.8408203125, |
|
"logps/rejected": -386.9019470214844, |
|
"loss": 0.6617, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.1291750818490982, |
|
"rewards/margins": 0.07389305531978607, |
|
"rewards/rejected": -0.20306813716888428, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13085579691180318, |
|
"grad_norm": 4.876718521118164, |
|
"learning_rate": 4.985725660577184e-06, |
|
"logits/chosen": 0.956185519695282, |
|
"logits/rejected": 1.1582107543945312, |
|
"logps/chosen": -418.2518615722656, |
|
"logps/rejected": -358.01739501953125, |
|
"loss": 0.6581, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.15920230746269226, |
|
"rewards/margins": 0.08411301672458649, |
|
"rewards/rejected": -0.24331530928611755, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13085579691180318, |
|
"eval_logits/chosen": 0.9564015865325928, |
|
"eval_logits/rejected": 1.1466065645217896, |
|
"eval_logps/chosen": -404.720947265625, |
|
"eval_logps/rejected": -367.4447326660156, |
|
"eval_loss": 0.6681177020072937, |
|
"eval_rewards/accuracies": 0.6240000128746033, |
|
"eval_rewards/chosen": -0.16355587542057037, |
|
"eval_rewards/margins": 0.06404965370893478, |
|
"eval_rewards/rejected": -0.22760552167892456, |
|
"eval_runtime": 232.074, |
|
"eval_samples_per_second": 8.618, |
|
"eval_steps_per_second": 1.077, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13347291285003926, |
|
"grad_norm": 5.33396053314209, |
|
"learning_rate": 4.983184182463009e-06, |
|
"logits/chosen": 1.1448547840118408, |
|
"logits/rejected": 1.1940263509750366, |
|
"logps/chosen": -420.2618713378906, |
|
"logps/rejected": -372.01531982421875, |
|
"loss": 0.6574, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.1364787369966507, |
|
"rewards/margins": 0.08503785729408264, |
|
"rewards/rejected": -0.22151657938957214, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 5.8554887771606445, |
|
"learning_rate": 4.980435359184203e-06, |
|
"logits/chosen": 1.2189310789108276, |
|
"logits/rejected": 1.1558836698532104, |
|
"logps/chosen": -412.2118225097656, |
|
"logps/rejected": -386.9334411621094, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.18587224185466766, |
|
"rewards/margins": 0.07138343900442123, |
|
"rewards/rejected": -0.2572557032108307, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.13870714472651138, |
|
"grad_norm": 5.761895656585693, |
|
"learning_rate": 4.9774794202667236e-06, |
|
"logits/chosen": 1.0874278545379639, |
|
"logits/rejected": 1.3288378715515137, |
|
"logps/chosen": -404.8669738769531, |
|
"logps/rejected": -405.3680419921875, |
|
"loss": 0.6552, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.18935520946979523, |
|
"rewards/margins": 0.0935024693608284, |
|
"rewards/rejected": -0.28285765647888184, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14132426066474746, |
|
"grad_norm": 5.3668413162231445, |
|
"learning_rate": 4.974316612530615e-06, |
|
"logits/chosen": 1.3489412069320679, |
|
"logits/rejected": 1.4930012226104736, |
|
"logps/chosen": -424.3651428222656, |
|
"logps/rejected": -362.0076599121094, |
|
"loss": 0.6354, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.20577266812324524, |
|
"rewards/margins": 0.13636724650859833, |
|
"rewards/rejected": -0.3421398997306824, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1439413766029835, |
|
"grad_norm": 5.151000022888184, |
|
"learning_rate": 4.970947200069416e-06, |
|
"logits/chosen": 1.1908769607543945, |
|
"logits/rejected": 1.2403991222381592, |
|
"logps/chosen": -418.28729248046875, |
|
"logps/rejected": -380.58282470703125, |
|
"loss": 0.6608, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.16425392031669617, |
|
"rewards/margins": 0.0870148092508316, |
|
"rewards/rejected": -0.2512687146663666, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 5.040604591369629, |
|
"learning_rate": 4.967371464228096e-06, |
|
"logits/chosen": 1.0223934650421143, |
|
"logits/rejected": 1.145374059677124, |
|
"logps/chosen": -404.68829345703125, |
|
"logps/rejected": -392.9267578125, |
|
"loss": 0.6599, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.19183708727359772, |
|
"rewards/margins": 0.08984865993261337, |
|
"rewards/rejected": -0.2816857099533081, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.14917560847945563, |
|
"grad_norm": 6.167598724365234, |
|
"learning_rate": 4.963589703579569e-06, |
|
"logits/chosen": 1.0712225437164307, |
|
"logits/rejected": 1.3134175539016724, |
|
"logps/chosen": -472.494384765625, |
|
"logps/rejected": -424.765380859375, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.29787617921829224, |
|
"rewards/margins": 0.08284131437540054, |
|
"rewards/rejected": -0.3807174861431122, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1517927244176917, |
|
"grad_norm": 5.881194114685059, |
|
"learning_rate": 4.9596022338997615e-06, |
|
"logits/chosen": 0.8614290356636047, |
|
"logits/rejected": 1.003142237663269, |
|
"logps/chosen": -461.50421142578125, |
|
"logps/rejected": -403.1651916503906, |
|
"loss": 0.6505, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.2679108679294586, |
|
"rewards/margins": 0.11834441125392914, |
|
"rewards/rejected": -0.38625526428222656, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15440984035592778, |
|
"grad_norm": 5.650794506072998, |
|
"learning_rate": 4.955409388141243e-06, |
|
"logits/chosen": 0.8646121025085449, |
|
"logits/rejected": 1.1550391912460327, |
|
"logps/chosen": -393.10260009765625, |
|
"logps/rejected": -357.4300231933594, |
|
"loss": 0.6638, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.21748176217079163, |
|
"rewards/margins": 0.07719887048006058, |
|
"rewards/rejected": -0.2946805953979492, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 5.714654922485352, |
|
"learning_rate": 4.951011516405429e-06, |
|
"logits/chosen": 0.9980852007865906, |
|
"logits/rejected": 1.1865313053131104, |
|
"logps/chosen": -385.1533508300781, |
|
"logps/rejected": -367.5033264160156, |
|
"loss": 0.658, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.20146425068378448, |
|
"rewards/margins": 0.09325651824474335, |
|
"rewards/rejected": -0.29472076892852783, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"eval_logits/chosen": 0.9498924016952515, |
|
"eval_logits/rejected": 1.1416826248168945, |
|
"eval_logps/chosen": -415.15386962890625, |
|
"eval_logps/rejected": -380.47955322265625, |
|
"eval_loss": 0.6596394181251526, |
|
"eval_rewards/accuracies": 0.6234999895095825, |
|
"eval_rewards/chosen": -0.2678852677345276, |
|
"eval_rewards/margins": 0.09006918221712112, |
|
"eval_rewards/rejected": -0.3579544723033905, |
|
"eval_runtime": 231.9217, |
|
"eval_samples_per_second": 8.624, |
|
"eval_steps_per_second": 1.078, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1596440722323999, |
|
"grad_norm": 5.238458156585693, |
|
"learning_rate": 4.946408985913344e-06, |
|
"logits/chosen": 1.1951860189437866, |
|
"logits/rejected": 1.289475679397583, |
|
"logps/chosen": -383.46795654296875, |
|
"logps/rejected": -362.46417236328125, |
|
"loss": 0.6652, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.29597288370132446, |
|
"rewards/margins": 0.07488597929477692, |
|
"rewards/rejected": -0.3708588182926178, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.16226118817063595, |
|
"grad_norm": 6.363269805908203, |
|
"learning_rate": 4.941602180974958e-06, |
|
"logits/chosen": 1.051048994064331, |
|
"logits/rejected": 1.357006311416626, |
|
"logps/chosen": -452.8377380371094, |
|
"logps/rejected": -374.8196716308594, |
|
"loss": 0.6508, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.29147928953170776, |
|
"rewards/margins": 0.10656224191188812, |
|
"rewards/rejected": -0.3980415463447571, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16487830410887203, |
|
"grad_norm": 6.250793933868408, |
|
"learning_rate": 4.936591502957101e-06, |
|
"logits/chosen": 0.9079286456108093, |
|
"logits/rejected": 1.184887170791626, |
|
"logps/chosen": -387.5869445800781, |
|
"logps/rejected": -360.51885986328125, |
|
"loss": 0.6415, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.17826662957668304, |
|
"rewards/margins": 0.12794804573059082, |
|
"rewards/rejected": -0.30621469020843506, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 7.378194332122803, |
|
"learning_rate": 4.931377370249946e-06, |
|
"logits/chosen": 0.7079142332077026, |
|
"logits/rejected": 0.9907251596450806, |
|
"logps/chosen": -420.49560546875, |
|
"logps/rejected": -371.4837951660156, |
|
"loss": 0.657, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.23468086123466492, |
|
"rewards/margins": 0.09351176023483276, |
|
"rewards/rejected": -0.3281926214694977, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17011253598534415, |
|
"grad_norm": 10.734910011291504, |
|
"learning_rate": 4.925960218232073e-06, |
|
"logits/chosen": 0.8249115943908691, |
|
"logits/rejected": 1.072989583015442, |
|
"logps/chosen": -398.7706604003906, |
|
"logps/rejected": -382.7182922363281, |
|
"loss": 0.6394, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.24996908009052277, |
|
"rewards/margins": 0.1392596811056137, |
|
"rewards/rejected": -0.3892287611961365, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17272965192358022, |
|
"grad_norm": 6.875140190124512, |
|
"learning_rate": 4.920340499234116e-06, |
|
"logits/chosen": 0.8695880770683289, |
|
"logits/rejected": 1.1250814199447632, |
|
"logps/chosen": -394.7156982421875, |
|
"logps/rejected": -351.32427978515625, |
|
"loss": 0.639, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.18945911526679993, |
|
"rewards/margins": 0.13935169577598572, |
|
"rewards/rejected": -0.32881081104278564, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.17534676786181627, |
|
"grad_norm": 7.728440761566162, |
|
"learning_rate": 4.914518682500995e-06, |
|
"logits/chosen": 0.9572404623031616, |
|
"logits/rejected": 1.0353472232818604, |
|
"logps/chosen": -424.74468994140625, |
|
"logps/rejected": -388.115234375, |
|
"loss": 0.6416, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.19141948223114014, |
|
"rewards/margins": 0.1302730143070221, |
|
"rewards/rejected": -0.32169249653816223, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 8.876616477966309, |
|
"learning_rate": 4.9084952541527315e-06, |
|
"logits/chosen": 0.7888752222061157, |
|
"logits/rejected": 0.9395732879638672, |
|
"logps/chosen": -428.37847900390625, |
|
"logps/rejected": -380.4373779296875, |
|
"loss": 0.6262, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.34512442350387573, |
|
"rewards/margins": 0.17706379294395447, |
|
"rewards/rejected": -0.5221882462501526, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.1805809997382884, |
|
"grad_norm": 7.472078800201416, |
|
"learning_rate": 4.902270717143858e-06, |
|
"logits/chosen": 1.0732382535934448, |
|
"logits/rejected": 1.275301218032837, |
|
"logps/chosen": -385.16851806640625, |
|
"logps/rejected": -399.53167724609375, |
|
"loss": 0.6284, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.3828611969947815, |
|
"rewards/margins": 0.16961130499839783, |
|
"rewards/rejected": -0.5524724721908569, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.18319811567652447, |
|
"grad_norm": 6.828216552734375, |
|
"learning_rate": 4.895845591221427e-06, |
|
"logits/chosen": 0.8568657636642456, |
|
"logits/rejected": 0.9334108233451843, |
|
"logps/chosen": -401.2918395996094, |
|
"logps/rejected": -391.73291015625, |
|
"loss": 0.6399, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.2788159251213074, |
|
"rewards/margins": 0.14835360646247864, |
|
"rewards/rejected": -0.4271695017814636, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18319811567652447, |
|
"eval_logits/chosen": 0.7495799660682678, |
|
"eval_logits/rejected": 0.9458017349243164, |
|
"eval_logps/chosen": -412.70025634765625, |
|
"eval_logps/rejected": -382.4018859863281, |
|
"eval_loss": 0.6480182409286499, |
|
"eval_rewards/accuracies": 0.640999972820282, |
|
"eval_rewards/chosen": -0.24334919452667236, |
|
"eval_rewards/margins": 0.13382813334465027, |
|
"eval_rewards/rejected": -0.37717729806900024, |
|
"eval_runtime": 232.1999, |
|
"eval_samples_per_second": 8.613, |
|
"eval_steps_per_second": 1.077, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18581523161476055, |
|
"grad_norm": 7.924125671386719, |
|
"learning_rate": 4.8892204128816e-06, |
|
"logits/chosen": 0.7438673377037048, |
|
"logits/rejected": 1.0076682567596436, |
|
"logps/chosen": -439.7687072753906, |
|
"logps/rejected": -405.40509033203125, |
|
"loss": 0.6513, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.19243761897087097, |
|
"rewards/margins": 0.12698553502559662, |
|
"rewards/rejected": -0.3194231390953064, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 9.067682266235352, |
|
"learning_rate": 4.882395735324864e-06, |
|
"logits/chosen": 0.6921774744987488, |
|
"logits/rejected": 1.028187870979309, |
|
"logps/chosen": -423.8224182128906, |
|
"logps/rejected": -385.47412109375, |
|
"loss": 0.66, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.3247512876987457, |
|
"rewards/margins": 0.11575271934270859, |
|
"rewards/rejected": -0.4405040144920349, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19104946349123267, |
|
"grad_norm": 7.999166965484619, |
|
"learning_rate": 4.87537212840983e-06, |
|
"logits/chosen": 0.8577788472175598, |
|
"logits/rejected": 1.1802966594696045, |
|
"logps/chosen": -425.0936584472656, |
|
"logps/rejected": -394.3600769042969, |
|
"loss": 0.6348, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.457479327917099, |
|
"rewards/margins": 0.16357269883155823, |
|
"rewards/rejected": -0.6210519671440125, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.19366657942946872, |
|
"grad_norm": 8.635887145996094, |
|
"learning_rate": 4.8681501786056545e-06, |
|
"logits/chosen": 1.0259983539581299, |
|
"logits/rejected": 1.2564369440078735, |
|
"logps/chosen": -366.40948486328125, |
|
"logps/rejected": -322.6649475097656, |
|
"loss": 0.6061, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.2946811616420746, |
|
"rewards/margins": 0.22826531529426575, |
|
"rewards/rejected": -0.5229464769363403, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.1962836953677048, |
|
"grad_norm": 7.924373626708984, |
|
"learning_rate": 4.860730488943068e-06, |
|
"logits/chosen": 0.9873319864273071, |
|
"logits/rejected": 1.1646772623062134, |
|
"logps/chosen": -393.21209716796875, |
|
"logps/rejected": -388.07452392578125, |
|
"loss": 0.6309, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.27078738808631897, |
|
"rewards/margins": 0.1725112944841385, |
|
"rewards/rejected": -0.44329872727394104, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 8.054422378540039, |
|
"learning_rate": 4.853113678964022e-06, |
|
"logits/chosen": 0.6722021102905273, |
|
"logits/rejected": 0.8681543469429016, |
|
"logps/chosen": -433.37750244140625, |
|
"logps/rejected": -429.5255432128906, |
|
"loss": 0.6333, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.43984508514404297, |
|
"rewards/margins": 0.16787569224834442, |
|
"rewards/rejected": -0.6077207326889038, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.20151792724417691, |
|
"grad_norm": 7.1459150314331055, |
|
"learning_rate": 4.845300384669958e-06, |
|
"logits/chosen": 0.7385894060134888, |
|
"logits/rejected": 0.9126585721969604, |
|
"logps/chosen": -414.93292236328125, |
|
"logps/rejected": -372.8153381347656, |
|
"loss": 0.6588, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.4202548861503601, |
|
"rewards/margins": 0.1190139502286911, |
|
"rewards/rejected": -0.5392688512802124, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.204135043182413, |
|
"grad_norm": 9.462651252746582, |
|
"learning_rate": 4.837291258468701e-06, |
|
"logits/chosen": 0.6594001650810242, |
|
"logits/rejected": 0.8065937161445618, |
|
"logps/chosen": -452.4710388183594, |
|
"logps/rejected": -414.4306640625, |
|
"loss": 0.6375, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2922487258911133, |
|
"rewards/margins": 0.16755884885787964, |
|
"rewards/rejected": -0.4598075747489929, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.20675215912064904, |
|
"grad_norm": 9.359882354736328, |
|
"learning_rate": 4.829086969119984e-06, |
|
"logits/chosen": 0.8586047887802124, |
|
"logits/rejected": 1.0241984128952026, |
|
"logps/chosen": -411.26116943359375, |
|
"logps/rejected": -411.509033203125, |
|
"loss": 0.6571, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.47147512435913086, |
|
"rewards/margins": 0.1179923564195633, |
|
"rewards/rejected": -0.589467465877533, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 10.965747833251953, |
|
"learning_rate": 4.820688201679605e-06, |
|
"logits/chosen": 0.6815871000289917, |
|
"logits/rejected": 0.9268299341201782, |
|
"logps/chosen": -432.5615234375, |
|
"logps/rejected": -362.436767578125, |
|
"loss": 0.624, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5902279019355774, |
|
"rewards/margins": 0.18446585536003113, |
|
"rewards/rejected": -0.7746937870979309, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"eval_logits/chosen": 0.6242519021034241, |
|
"eval_logits/rejected": 0.8198153972625732, |
|
"eval_logps/chosen": -442.35064697265625, |
|
"eval_logps/rejected": -415.1210632324219, |
|
"eval_loss": 0.6389787793159485, |
|
"eval_rewards/accuracies": 0.6514999866485596, |
|
"eval_rewards/chosen": -0.5398533940315247, |
|
"eval_rewards/margins": 0.16451531648635864, |
|
"eval_rewards/rejected": -0.7043687105178833, |
|
"eval_runtime": 232.5028, |
|
"eval_samples_per_second": 8.602, |
|
"eval_steps_per_second": 1.075, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21198639099712116, |
|
"grad_norm": 9.325600624084473, |
|
"learning_rate": 4.8120956574422315e-06, |
|
"logits/chosen": 0.5215608477592468, |
|
"logits/rejected": 0.7568296194076538, |
|
"logps/chosen": -452.6517639160156, |
|
"logps/rejected": -429.9583435058594, |
|
"loss": 0.6763, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4800487160682678, |
|
"rewards/margins": 0.09066037833690643, |
|
"rewards/rejected": -0.5707091093063354, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.21460350693535724, |
|
"grad_norm": 9.269844055175781, |
|
"learning_rate": 4.803310053882831e-06, |
|
"logits/chosen": 1.0071885585784912, |
|
"logits/rejected": 1.0069457292556763, |
|
"logps/chosen": -374.1867370605469, |
|
"logps/rejected": -393.0777893066406, |
|
"loss": 0.6512, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.38367071747779846, |
|
"rewards/margins": 0.1374969780445099, |
|
"rewards/rejected": -0.5211676955223083, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.2172206228735933, |
|
"grad_norm": 9.348451614379883, |
|
"learning_rate": 4.794332124596775e-06, |
|
"logits/chosen": 0.6886093616485596, |
|
"logits/rejected": 0.855501651763916, |
|
"logps/chosen": -426.2472229003906, |
|
"logps/rejected": -433.111083984375, |
|
"loss": 0.6489, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.3134116232395172, |
|
"rewards/margins": 0.13843798637390137, |
|
"rewards/rejected": -0.4518495500087738, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 9.539639472961426, |
|
"learning_rate": 4.785162619238575e-06, |
|
"logits/chosen": 0.7601666450500488, |
|
"logits/rejected": 0.965703010559082, |
|
"logps/chosen": -439.4012756347656, |
|
"logps/rejected": -400.94439697265625, |
|
"loss": 0.632, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5403390526771545, |
|
"rewards/margins": 0.17277175188064575, |
|
"rewards/rejected": -0.7131107449531555, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.22245485475006543, |
|
"grad_norm": 8.668850898742676, |
|
"learning_rate": 4.775802303459288e-06, |
|
"logits/chosen": 0.8507216572761536, |
|
"logits/rejected": 0.9332345724105835, |
|
"logps/chosen": -440.80999755859375, |
|
"logps/rejected": -420.0621643066406, |
|
"loss": 0.6386, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.583116352558136, |
|
"rewards/margins": 0.16593685746192932, |
|
"rewards/rejected": -0.7490531206130981, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.22507197068830148, |
|
"grad_norm": 12.724005699157715, |
|
"learning_rate": 4.766251958842589e-06, |
|
"logits/chosen": 0.8601281046867371, |
|
"logits/rejected": 0.9346219897270203, |
|
"logps/chosen": -455.83502197265625, |
|
"logps/rejected": -436.79815673828125, |
|
"loss": 0.6329, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.6050734519958496, |
|
"rewards/margins": 0.1777031421661377, |
|
"rewards/rejected": -0.7827765941619873, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.22768908662653756, |
|
"grad_norm": 11.345113754272461, |
|
"learning_rate": 4.7565123828395066e-06, |
|
"logits/chosen": 0.7511667013168335, |
|
"logits/rejected": 0.9087456464767456, |
|
"logps/chosen": -427.225830078125, |
|
"logps/rejected": -417.6124572753906, |
|
"loss": 0.6469, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.47927623987197876, |
|
"rewards/margins": 0.181664377450943, |
|
"rewards/rejected": -0.6609406471252441, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 11.099597930908203, |
|
"learning_rate": 4.746584388701831e-06, |
|
"logits/chosen": 0.8890976905822754, |
|
"logits/rejected": 0.843266487121582, |
|
"logps/chosen": -428.44091796875, |
|
"logps/rejected": -412.8895568847656, |
|
"loss": 0.6171, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4830327033996582, |
|
"rewards/margins": 0.21945062279701233, |
|
"rewards/rejected": -0.7024833559989929, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.23292331850300968, |
|
"grad_norm": 11.448017120361328, |
|
"learning_rate": 4.736468805414218e-06, |
|
"logits/chosen": 0.9304903745651245, |
|
"logits/rejected": 1.2368038892745972, |
|
"logps/chosen": -408.22283935546875, |
|
"logps/rejected": -436.41815185546875, |
|
"loss": 0.6201, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.45535406470298767, |
|
"rewards/margins": 0.22440317273139954, |
|
"rewards/rejected": -0.6797571778297424, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.23554043444124576, |
|
"grad_norm": 14.195487976074219, |
|
"learning_rate": 4.7261664776249595e-06, |
|
"logits/chosen": 0.8509295582771301, |
|
"logits/rejected": 1.094995141029358, |
|
"logps/chosen": -421.479248046875, |
|
"logps/rejected": -408.42083740234375, |
|
"loss": 0.62, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5554867386817932, |
|
"rewards/margins": 0.23488807678222656, |
|
"rewards/rejected": -0.7903748750686646, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.23554043444124576, |
|
"eval_logits/chosen": 0.7130433917045593, |
|
"eval_logits/rejected": 0.9079583883285522, |
|
"eval_logps/chosen": -454.4474792480469, |
|
"eval_logps/rejected": -431.30230712890625, |
|
"eval_loss": 0.6320670247077942, |
|
"eval_rewards/accuracies": 0.6485000252723694, |
|
"eval_rewards/chosen": -0.6608208417892456, |
|
"eval_rewards/margins": 0.2053609937429428, |
|
"eval_rewards/rejected": -0.8661818504333496, |
|
"eval_runtime": 232.3653, |
|
"eval_samples_per_second": 8.607, |
|
"eval_steps_per_second": 1.076, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2381575503794818, |
|
"grad_norm": 9.822765350341797, |
|
"learning_rate": 4.715678265575463e-06, |
|
"logits/chosen": 0.9885305166244507, |
|
"logits/rejected": 0.9318181872367859, |
|
"logps/chosen": -475.46728515625, |
|
"logps/rejected": -405.56658935546875, |
|
"loss": 0.6189, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5485895872116089, |
|
"rewards/margins": 0.2235954999923706, |
|
"rewards/rejected": -0.7721850872039795, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 8.151389122009277, |
|
"learning_rate": 4.705005045028415e-06, |
|
"logits/chosen": 0.8163010478019714, |
|
"logits/rejected": 0.7590088844299316, |
|
"logps/chosen": -439.08563232421875, |
|
"logps/rejected": -413.9178771972656, |
|
"loss": 0.6244, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5029494166374207, |
|
"rewards/margins": 0.2026442587375641, |
|
"rewards/rejected": -0.7055937051773071, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.24339178225595393, |
|
"grad_norm": 12.819987297058105, |
|
"learning_rate": 4.694147707194659e-06, |
|
"logits/chosen": 0.6603835225105286, |
|
"logits/rejected": 0.8101722598075867, |
|
"logps/chosen": -468.04083251953125, |
|
"logps/rejected": -449.27423095703125, |
|
"loss": 0.595, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5709540247917175, |
|
"rewards/margins": 0.2979514002799988, |
|
"rewards/rejected": -0.8689054250717163, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.24600889819419, |
|
"grad_norm": 11.70290470123291, |
|
"learning_rate": 4.683107158658782e-06, |
|
"logits/chosen": 0.6703850030899048, |
|
"logits/rejected": 1.1179869174957275, |
|
"logps/chosen": -492.76904296875, |
|
"logps/rejected": -475.07244873046875, |
|
"loss": 0.6061, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8049885630607605, |
|
"rewards/margins": 0.24929532408714294, |
|
"rewards/rejected": -1.054283857345581, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.24862601413242608, |
|
"grad_norm": 9.882152557373047, |
|
"learning_rate": 4.671884321303407e-06, |
|
"logits/chosen": 0.8308131098747253, |
|
"logits/rejected": 0.9495989084243774, |
|
"logps/chosen": -425.8990173339844, |
|
"logps/rejected": -416.7378845214844, |
|
"loss": 0.6092, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6653159856796265, |
|
"rewards/margins": 0.2662131190299988, |
|
"rewards/rejected": -0.9315292239189148, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 10.33724308013916, |
|
"learning_rate": 4.660480132232224e-06, |
|
"logits/chosen": 0.7533870935440063, |
|
"logits/rejected": 0.8427373766899109, |
|
"logps/chosen": -445.78387451171875, |
|
"logps/rejected": -416.93218994140625, |
|
"loss": 0.6309, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.417325496673584, |
|
"rewards/margins": 0.21080616116523743, |
|
"rewards/rejected": -0.6281316876411438, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.25386024600889817, |
|
"grad_norm": 12.866530418395996, |
|
"learning_rate": 4.6488955436917414e-06, |
|
"logits/chosen": 0.6672025322914124, |
|
"logits/rejected": 0.8596879243850708, |
|
"logps/chosen": -435.473388671875, |
|
"logps/rejected": -387.2782287597656, |
|
"loss": 0.6097, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4055888056755066, |
|
"rewards/margins": 0.25641921162605286, |
|
"rewards/rejected": -0.6620079278945923, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.2564773619471343, |
|
"grad_norm": 12.976202964782715, |
|
"learning_rate": 4.6371315229917644e-06, |
|
"logits/chosen": 0.6292930841445923, |
|
"logits/rejected": 0.7828453183174133, |
|
"logps/chosen": -468.067138671875, |
|
"logps/rejected": -448.0206604003906, |
|
"loss": 0.5999, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5305265784263611, |
|
"rewards/margins": 0.2725897431373596, |
|
"rewards/rejected": -0.8031163215637207, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2590944778853703, |
|
"grad_norm": 11.27432632446289, |
|
"learning_rate": 4.625189052424638e-06, |
|
"logits/chosen": 0.7714609503746033, |
|
"logits/rejected": 1.057544469833374, |
|
"logps/chosen": -416.63323974609375, |
|
"logps/rejected": -403.56793212890625, |
|
"loss": 0.5821, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6991704702377319, |
|
"rewards/margins": 0.3505772650241852, |
|
"rewards/rejected": -1.0497477054595947, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 11.419754981994629, |
|
"learning_rate": 4.613069129183218e-06, |
|
"logits/chosen": 0.7187921404838562, |
|
"logits/rejected": 1.0341460704803467, |
|
"logps/chosen": -520.6634521484375, |
|
"logps/rejected": -485.7464904785156, |
|
"loss": 0.6255, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.8484965562820435, |
|
"rewards/margins": 0.24263262748718262, |
|
"rewards/rejected": -1.091129183769226, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"eval_logits/chosen": 0.5235105156898499, |
|
"eval_logits/rejected": 0.711112916469574, |
|
"eval_logps/chosen": -473.25457763671875, |
|
"eval_logps/rejected": -453.2762756347656, |
|
"eval_loss": 0.6269846558570862, |
|
"eval_rewards/accuracies": 0.6455000042915344, |
|
"eval_rewards/chosen": -0.848892092704773, |
|
"eval_rewards/margins": 0.2370292991399765, |
|
"eval_rewards/rejected": -1.0859214067459106, |
|
"eval_runtime": 232.3711, |
|
"eval_samples_per_second": 8.607, |
|
"eval_steps_per_second": 1.076, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2643287097618425, |
|
"grad_norm": 21.2219295501709, |
|
"learning_rate": 4.600772765277607e-06, |
|
"logits/chosen": 0.5445064306259155, |
|
"logits/rejected": 0.8773614764213562, |
|
"logps/chosen": -415.87518310546875, |
|
"logps/rejected": -414.38336181640625, |
|
"loss": 0.6134, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7228370904922485, |
|
"rewards/margins": 0.2761087417602539, |
|
"rewards/rejected": -0.9989458322525024, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.2669458257000785, |
|
"grad_norm": 12.491921424865723, |
|
"learning_rate": 4.588300987450652e-06, |
|
"logits/chosen": 0.7395948767662048, |
|
"logits/rejected": 1.0148208141326904, |
|
"logps/chosen": -416.64801025390625, |
|
"logps/rejected": -378.2071838378906, |
|
"loss": 0.6405, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.36920469999313354, |
|
"rewards/margins": 0.19941550493240356, |
|
"rewards/rejected": -0.5686202645301819, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.26956294163831457, |
|
"grad_norm": 11.955611228942871, |
|
"learning_rate": 4.5756548370922136e-06, |
|
"logits/chosen": 0.5796680450439453, |
|
"logits/rejected": 0.7352942228317261, |
|
"logps/chosen": -382.8646545410156, |
|
"logps/rejected": -367.3566589355469, |
|
"loss": 0.6535, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.2818133234977722, |
|
"rewards/margins": 0.14685805141925812, |
|
"rewards/rejected": -0.42867136001586914, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 12.265511512756348, |
|
"learning_rate": 4.562835370152206e-06, |
|
"logits/chosen": 0.3429097533226013, |
|
"logits/rejected": 0.5369440913200378, |
|
"logps/chosen": -484.8700256347656, |
|
"logps/rejected": -457.33905029296875, |
|
"loss": 0.5784, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4302898943424225, |
|
"rewards/margins": 0.34176695346832275, |
|
"rewards/rejected": -0.7720568180084229, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2747971735147867, |
|
"grad_norm": 13.330986022949219, |
|
"learning_rate": 4.54984365705243e-06, |
|
"logits/chosen": 0.48474931716918945, |
|
"logits/rejected": 0.6024073362350464, |
|
"logps/chosen": -467.50018310546875, |
|
"logps/rejected": -466.6949768066406, |
|
"loss": 0.5833, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7076524496078491, |
|
"rewards/margins": 0.3223820626735687, |
|
"rewards/rejected": -1.0300344228744507, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.27741428945302277, |
|
"grad_norm": 18.709505081176758, |
|
"learning_rate": 4.536680782597191e-06, |
|
"logits/chosen": 0.4200347363948822, |
|
"logits/rejected": 0.6729756593704224, |
|
"logps/chosen": -439.62579345703125, |
|
"logps/rejected": -422.578857421875, |
|
"loss": 0.6394, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.8081327676773071, |
|
"rewards/margins": 0.21756339073181152, |
|
"rewards/rejected": -1.0256961584091187, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2800314053912588, |
|
"grad_norm": 14.954957962036133, |
|
"learning_rate": 4.523347845882718e-06, |
|
"logits/chosen": 0.44082099199295044, |
|
"logits/rejected": 0.5635146498680115, |
|
"logps/chosen": -464.96136474609375, |
|
"logps/rejected": -430.59521484375, |
|
"loss": 0.555, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.6111140847206116, |
|
"rewards/margins": 0.41279348731040955, |
|
"rewards/rejected": -1.0239075422286987, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 14.649702072143555, |
|
"learning_rate": 4.50984596020539e-06, |
|
"logits/chosen": 0.3772805631160736, |
|
"logits/rejected": 0.5361444354057312, |
|
"logps/chosen": -446.0318298339844, |
|
"logps/rejected": -424.20208740234375, |
|
"loss": 0.6135, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5018765926361084, |
|
"rewards/margins": 0.24910588562488556, |
|
"rewards/rejected": -0.7509824633598328, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.28526563726773096, |
|
"grad_norm": 12.78677749633789, |
|
"learning_rate": 4.4961762529687745e-06, |
|
"logits/chosen": 0.40019339323043823, |
|
"logits/rejected": 0.5373650789260864, |
|
"logps/chosen": -439.0340881347656, |
|
"logps/rejected": -412.9305725097656, |
|
"loss": 0.6463, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6333376169204712, |
|
"rewards/margins": 0.1645718812942505, |
|
"rewards/rejected": -0.7979093790054321, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.287882753205967, |
|
"grad_norm": 13.784514427185059, |
|
"learning_rate": 4.482339865589492e-06, |
|
"logits/chosen": 0.5023793578147888, |
|
"logits/rejected": 0.6304869651794434, |
|
"logps/chosen": -468.329345703125, |
|
"logps/rejected": -410.9677734375, |
|
"loss": 0.6257, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7563449144363403, |
|
"rewards/margins": 0.21642132103443146, |
|
"rewards/rejected": -0.9727662205696106, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.287882753205967, |
|
"eval_logits/chosen": 0.2741233706474304, |
|
"eval_logits/rejected": 0.45645061135292053, |
|
"eval_logps/chosen": -476.776611328125, |
|
"eval_logps/rejected": -455.3140563964844, |
|
"eval_loss": 0.6249045133590698, |
|
"eval_rewards/accuracies": 0.6539999842643738, |
|
"eval_rewards/chosen": -0.8841127753257751, |
|
"eval_rewards/margins": 0.22218641638755798, |
|
"eval_rewards/rejected": -1.1062991619110107, |
|
"eval_runtime": 232.1027, |
|
"eval_samples_per_second": 8.617, |
|
"eval_steps_per_second": 1.077, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2904998691442031, |
|
"grad_norm": 13.539247512817383, |
|
"learning_rate": 4.468337953401909e-06, |
|
"logits/chosen": 0.508640468120575, |
|
"logits/rejected": 0.6552512049674988, |
|
"logps/chosen": -493.59344482421875, |
|
"logps/rejected": -498.8395080566406, |
|
"loss": 0.6222, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.9703060984611511, |
|
"rewards/margins": 0.22064876556396484, |
|
"rewards/rejected": -1.1909549236297607, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 11.673506736755371, |
|
"learning_rate": 4.45417168556166e-06, |
|
"logits/chosen": 0.32166963815689087, |
|
"logits/rejected": 0.6035802960395813, |
|
"logps/chosen": -456.86944580078125, |
|
"logps/rejected": -448.83294677734375, |
|
"loss": 0.6182, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.8095690608024597, |
|
"rewards/margins": 0.2476453334093094, |
|
"rewards/rejected": -1.0572144985198975, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.2957341010206752, |
|
"grad_norm": 9.967942237854004, |
|
"learning_rate": 4.439842244948036e-06, |
|
"logits/chosen": 0.2381783425807953, |
|
"logits/rejected": 0.4971030354499817, |
|
"logps/chosen": -444.052001953125, |
|
"logps/rejected": -437.64495849609375, |
|
"loss": 0.6464, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.6728368997573853, |
|
"rewards/margins": 0.2183237075805664, |
|
"rewards/rejected": -0.8911606073379517, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.29835121695891126, |
|
"grad_norm": 12.411190032958984, |
|
"learning_rate": 4.425350828065204e-06, |
|
"logits/chosen": 0.4094735085964203, |
|
"logits/rejected": 0.5186284184455872, |
|
"logps/chosen": -472.64093017578125, |
|
"logps/rejected": -419.64739990234375, |
|
"loss": 0.5997, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5493424534797668, |
|
"rewards/margins": 0.3005039095878601, |
|
"rewards/rejected": -0.849846363067627, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.30096833289714736, |
|
"grad_norm": 17.98678207397461, |
|
"learning_rate": 4.410698644942303e-06, |
|
"logits/chosen": 0.19235338270664215, |
|
"logits/rejected": 0.4098784029483795, |
|
"logps/chosen": -451.133056640625, |
|
"logps/rejected": -423.66448974609375, |
|
"loss": 0.6083, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.474482923746109, |
|
"rewards/margins": 0.28159815073013306, |
|
"rewards/rejected": -0.7560810446739197, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 11.179768562316895, |
|
"learning_rate": 4.395886919032406e-06, |
|
"logits/chosen": 0.5191640853881836, |
|
"logits/rejected": 0.6151641607284546, |
|
"logps/chosen": -423.7428283691406, |
|
"logps/rejected": -409.55303955078125, |
|
"loss": 0.6194, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.469729483127594, |
|
"rewards/margins": 0.2616717517375946, |
|
"rewards/rejected": -0.7314012050628662, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.30620256477361946, |
|
"grad_norm": 13.860902786254883, |
|
"learning_rate": 4.380916887110366e-06, |
|
"logits/chosen": 0.3877313733100891, |
|
"logits/rejected": 0.25373178720474243, |
|
"logps/chosen": -457.0462951660156, |
|
"logps/rejected": -414.69024658203125, |
|
"loss": 0.631, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.8331391215324402, |
|
"rewards/margins": 0.2332063466310501, |
|
"rewards/rejected": -1.066345453262329, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.30881968071185556, |
|
"grad_norm": 10.883024215698242, |
|
"learning_rate": 4.365789799169539e-06, |
|
"logits/chosen": 0.5446051955223083, |
|
"logits/rejected": 0.45434585213661194, |
|
"logps/chosen": -477.3246154785156, |
|
"logps/rejected": -486.14422607421875, |
|
"loss": 0.6232, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1283986568450928, |
|
"rewards/margins": 0.22864672541618347, |
|
"rewards/rejected": -1.3570451736450195, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3114367966500916, |
|
"grad_norm": 11.807137489318848, |
|
"learning_rate": 4.350506918317416e-06, |
|
"logits/chosen": 0.48607057332992554, |
|
"logits/rejected": 0.6121966242790222, |
|
"logps/chosen": -470.0353088378906, |
|
"logps/rejected": -458.76055908203125, |
|
"loss": 0.6312, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.05269455909729, |
|
"rewards/margins": 0.19717691838741302, |
|
"rewards/rejected": -1.2498712539672852, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 14.276663780212402, |
|
"learning_rate": 4.335069520670149e-06, |
|
"logits/chosen": 0.42629900574684143, |
|
"logits/rejected": 0.4847659170627594, |
|
"logps/chosen": -411.36407470703125, |
|
"logps/rejected": -420.76239013671875, |
|
"loss": 0.6512, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7268288731575012, |
|
"rewards/margins": 0.17092524468898773, |
|
"rewards/rejected": -0.8977540731430054, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"eval_logits/chosen": 0.18179067969322205, |
|
"eval_logits/rejected": 0.35566091537475586, |
|
"eval_logps/chosen": -452.5130920410156, |
|
"eval_logps/rejected": -432.3995056152344, |
|
"eval_loss": 0.6197048425674438, |
|
"eval_rewards/accuracies": 0.6629999876022339, |
|
"eval_rewards/chosen": -0.6414775252342224, |
|
"eval_rewards/margins": 0.23567558825016022, |
|
"eval_rewards/rejected": -0.8771531581878662, |
|
"eval_runtime": 232.4956, |
|
"eval_samples_per_second": 8.602, |
|
"eval_steps_per_second": 1.075, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3166710285265637, |
|
"grad_norm": 9.877076148986816, |
|
"learning_rate": 4.319478895246e-06, |
|
"logits/chosen": 0.31032776832580566, |
|
"logits/rejected": 0.4457179009914398, |
|
"logps/chosen": -426.0264587402344, |
|
"logps/rejected": -397.55316162109375, |
|
"loss": 0.6026, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.5681090950965881, |
|
"rewards/margins": 0.26915818452835083, |
|
"rewards/rejected": -0.837267279624939, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.3192881444647998, |
|
"grad_norm": 11.549198150634766, |
|
"learning_rate": 4.303736343857704e-06, |
|
"logits/chosen": 0.3116544485092163, |
|
"logits/rejected": 0.5375791788101196, |
|
"logps/chosen": -420.4837951660156, |
|
"logps/rejected": -431.21929931640625, |
|
"loss": 0.6428, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.3886328339576721, |
|
"rewards/margins": 0.19474461674690247, |
|
"rewards/rejected": -0.5833774209022522, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.32190526040303585, |
|
"grad_norm": 11.519137382507324, |
|
"learning_rate": 4.287843181003772e-06, |
|
"logits/chosen": 0.19896575808525085, |
|
"logits/rejected": 0.25549182295799255, |
|
"logps/chosen": -476.87017822265625, |
|
"logps/rejected": -413.09197998046875, |
|
"loss": 0.6303, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.39009106159210205, |
|
"rewards/margins": 0.20869462192058563, |
|
"rewards/rejected": -0.5987856984138489, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 11.55118465423584, |
|
"learning_rate": 4.27180073375873e-06, |
|
"logits/chosen": 0.39684659242630005, |
|
"logits/rejected": 0.3274468183517456, |
|
"logps/chosen": -453.70587158203125, |
|
"logps/rejected": -417.8851623535156, |
|
"loss": 0.5939, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.4496288299560547, |
|
"rewards/margins": 0.3144444525241852, |
|
"rewards/rejected": -0.7640732526779175, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.327139492279508, |
|
"grad_norm": 10.041382789611816, |
|
"learning_rate": 4.255610341662304e-06, |
|
"logits/chosen": 0.13150617480278015, |
|
"logits/rejected": 0.4586234986782074, |
|
"logps/chosen": -432.53570556640625, |
|
"logps/rejected": -412.88726806640625, |
|
"loss": 0.6162, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.56169593334198, |
|
"rewards/margins": 0.2593781054019928, |
|
"rewards/rejected": -0.8210738897323608, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.32975660821774405, |
|
"grad_norm": 15.793495178222656, |
|
"learning_rate": 4.2392733566075764e-06, |
|
"logits/chosen": 0.2116355448961258, |
|
"logits/rejected": 0.34284886717796326, |
|
"logps/chosen": -430.3287658691406, |
|
"logps/rejected": -423.35980224609375, |
|
"loss": 0.6349, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6224905252456665, |
|
"rewards/margins": 0.2327694147825241, |
|
"rewards/rejected": -0.8552600145339966, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.3323737241559801, |
|
"grad_norm": 11.136332511901855, |
|
"learning_rate": 4.2227911427280975e-06, |
|
"logits/chosen": 0.19596245884895325, |
|
"logits/rejected": 0.3888585865497589, |
|
"logps/chosen": -438.890625, |
|
"logps/rejected": -403.1433410644531, |
|
"loss": 0.6353, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.6415246725082397, |
|
"rewards/margins": 0.2291472852230072, |
|
"rewards/rejected": -0.8706718683242798, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 20.09882926940918, |
|
"learning_rate": 4.206165076283983e-06, |
|
"logits/chosen": 0.2686145603656769, |
|
"logits/rejected": 0.49140438437461853, |
|
"logps/chosen": -447.68115234375, |
|
"logps/rejected": -423.4256896972656, |
|
"loss": 0.585, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7579549551010132, |
|
"rewards/margins": 0.31806570291519165, |
|
"rewards/rejected": -1.07602059841156, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.33760795603245225, |
|
"grad_norm": 15.394529342651367, |
|
"learning_rate": 4.189396545546995e-06, |
|
"logits/chosen": 0.0032246888149529696, |
|
"logits/rejected": 0.3693595230579376, |
|
"logps/chosen": -438.0142517089844, |
|
"logps/rejected": -418.50775146484375, |
|
"loss": 0.6328, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6667572259902954, |
|
"rewards/margins": 0.2417328804731369, |
|
"rewards/rejected": -0.9084900617599487, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.3402250719706883, |
|
"grad_norm": 17.138986587524414, |
|
"learning_rate": 4.172486950684627e-06, |
|
"logits/chosen": 0.1562187373638153, |
|
"logits/rejected": 0.4008878171443939, |
|
"logps/chosen": -412.3981018066406, |
|
"logps/rejected": -425.25299072265625, |
|
"loss": 0.5864, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.4970017075538635, |
|
"rewards/margins": 0.32930120825767517, |
|
"rewards/rejected": -0.8263028860092163, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.3402250719706883, |
|
"eval_logits/chosen": 0.12899738550186157, |
|
"eval_logits/rejected": 0.29827845096588135, |
|
"eval_logps/chosen": -457.74737548828125, |
|
"eval_logps/rejected": -441.21051025390625, |
|
"eval_loss": 0.6130329370498657, |
|
"eval_rewards/accuracies": 0.6735000014305115, |
|
"eval_rewards/chosen": -0.693820059299469, |
|
"eval_rewards/margins": 0.27144384384155273, |
|
"eval_rewards/rejected": -0.965263843536377, |
|
"eval_runtime": 232.1045, |
|
"eval_samples_per_second": 8.617, |
|
"eval_steps_per_second": 1.077, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34284218790892435, |
|
"grad_norm": 15.050552368164062, |
|
"learning_rate": 4.155437703643182e-06, |
|
"logits/chosen": 0.3422376215457916, |
|
"logits/rejected": 0.3849483132362366, |
|
"logps/chosen": -431.77166748046875, |
|
"logps/rejected": -416.0318298339844, |
|
"loss": 0.5837, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.7273216843605042, |
|
"rewards/margins": 0.3494306206703186, |
|
"rewards/rejected": -1.0767523050308228, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 16.636903762817383, |
|
"learning_rate": 4.138250228029882e-06, |
|
"logits/chosen": 0.0636025071144104, |
|
"logits/rejected": 0.21294847130775452, |
|
"logps/chosen": -461.449951171875, |
|
"logps/rejected": -475.20361328125, |
|
"loss": 0.6341, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.8283727765083313, |
|
"rewards/margins": 0.2532418668270111, |
|
"rewards/rejected": -1.0816147327423096, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3480764197853965, |
|
"grad_norm": 18.111169815063477, |
|
"learning_rate": 4.120925958993994e-06, |
|
"logits/chosen": 0.23635880649089813, |
|
"logits/rejected": 0.25746363401412964, |
|
"logps/chosen": -399.59442138671875, |
|
"logps/rejected": -405.0094299316406, |
|
"loss": 0.6418, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.571780800819397, |
|
"rewards/margins": 0.21083179116249084, |
|
"rewards/rejected": -0.7826126217842102, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.35069353572363254, |
|
"grad_norm": 16.408119201660156, |
|
"learning_rate": 4.103466343106999e-06, |
|
"logits/chosen": 0.3088940680027008, |
|
"logits/rejected": 0.4904406666755676, |
|
"logps/chosen": -449.12713623046875, |
|
"logps/rejected": -436.40716552734375, |
|
"loss": 0.6052, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.4821189045906067, |
|
"rewards/margins": 0.28531405329704285, |
|
"rewards/rejected": -0.7674329280853271, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.35331065166186865, |
|
"grad_norm": 13.181236267089844, |
|
"learning_rate": 4.085872838241797e-06, |
|
"logits/chosen": 0.26650765538215637, |
|
"logits/rejected": 0.46960416436195374, |
|
"logps/chosen": -435.21270751953125, |
|
"logps/rejected": -414.5130310058594, |
|
"loss": 0.6259, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.4914736747741699, |
|
"rewards/margins": 0.24917948246002197, |
|
"rewards/rejected": -0.7406532168388367, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 14.832420349121094, |
|
"learning_rate": 4.06814691345098e-06, |
|
"logits/chosen": 0.24336537718772888, |
|
"logits/rejected": 0.30810093879699707, |
|
"logps/chosen": -410.4248962402344, |
|
"logps/rejected": -418.159423828125, |
|
"loss": 0.5866, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5495951771736145, |
|
"rewards/margins": 0.3245389461517334, |
|
"rewards/rejected": -0.8741341829299927, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.35854488353834074, |
|
"grad_norm": 17.649641036987305, |
|
"learning_rate": 4.050290048844171e-06, |
|
"logits/chosen": 0.168039470911026, |
|
"logits/rejected": 0.265805184841156, |
|
"logps/chosen": -489.49468994140625, |
|
"logps/rejected": -486.09100341796875, |
|
"loss": 0.6079, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.8853768110275269, |
|
"rewards/margins": 0.28231385350227356, |
|
"rewards/rejected": -1.167690634727478, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3611619994765768, |
|
"grad_norm": 15.066794395446777, |
|
"learning_rate": 4.032303735464422e-06, |
|
"logits/chosen": 0.2061309516429901, |
|
"logits/rejected": 0.3621533513069153, |
|
"logps/chosen": -498.93890380859375, |
|
"logps/rejected": -477.98974609375, |
|
"loss": 0.5812, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.9270712733268738, |
|
"rewards/margins": 0.3421303629875183, |
|
"rewards/rejected": -1.2692015171051025, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3637791154148129, |
|
"grad_norm": 14.546058654785156, |
|
"learning_rate": 4.014189475163727e-06, |
|
"logits/chosen": 0.295467346906662, |
|
"logits/rejected": 0.3572823405265808, |
|
"logps/chosen": -464.5303649902344, |
|
"logps/rejected": -447.1439514160156, |
|
"loss": 0.6072, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.8640359044075012, |
|
"rewards/margins": 0.27750641107559204, |
|
"rewards/rejected": -1.1415421962738037, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 24.52462387084961, |
|
"learning_rate": 3.995948780477605e-06, |
|
"logits/chosen": 0.19618520140647888, |
|
"logits/rejected": 0.33636996150016785, |
|
"logps/chosen": -475.90826416015625, |
|
"logps/rejected": -455.0912170410156, |
|
"loss": 0.6226, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.7381902933120728, |
|
"rewards/margins": 0.2615019679069519, |
|
"rewards/rejected": -0.9996922612190247, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"eval_logits/chosen": 0.1471870094537735, |
|
"eval_logits/rejected": 0.3099009096622467, |
|
"eval_logps/chosen": -460.8104553222656, |
|
"eval_logps/rejected": -446.4751281738281, |
|
"eval_loss": 0.608772337436676, |
|
"eval_rewards/accuracies": 0.6790000200271606, |
|
"eval_rewards/chosen": -0.7244512438774109, |
|
"eval_rewards/margins": 0.29345834255218506, |
|
"eval_rewards/rejected": -1.0179095268249512, |
|
"eval_runtime": 232.5568, |
|
"eval_samples_per_second": 8.6, |
|
"eval_steps_per_second": 1.075, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.369013347291285, |
|
"grad_norm": 18.559682846069336, |
|
"learning_rate": 3.977583174498816e-06, |
|
"logits/chosen": 0.2730047106742859, |
|
"logits/rejected": 0.5137112140655518, |
|
"logps/chosen": -473.2476501464844, |
|
"logps/rejected": -456.55609130859375, |
|
"loss": 0.5941, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.868022084236145, |
|
"rewards/margins": 0.3129437267780304, |
|
"rewards/rejected": -1.1809656620025635, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.3716304632295211, |
|
"grad_norm": 15.352100372314453, |
|
"learning_rate": 3.959094190750172e-06, |
|
"logits/chosen": 0.22451026737689972, |
|
"logits/rejected": 0.3914637267589569, |
|
"logps/chosen": -482.42010498046875, |
|
"logps/rejected": -452.3109436035156, |
|
"loss": 0.607, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7052127718925476, |
|
"rewards/margins": 0.31599652767181396, |
|
"rewards/rejected": -1.0212092399597168, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.37424757916775714, |
|
"grad_norm": 20.098033905029297, |
|
"learning_rate": 3.9404833730564975e-06, |
|
"logits/chosen": 0.1833394318819046, |
|
"logits/rejected": 0.30950406193733215, |
|
"logps/chosen": -431.68438720703125, |
|
"logps/rejected": -421.0174255371094, |
|
"loss": 0.6279, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5920284986495972, |
|
"rewards/margins": 0.2607758641242981, |
|
"rewards/rejected": -0.8528043627738953, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 16.98012351989746, |
|
"learning_rate": 3.921752275415712e-06, |
|
"logits/chosen": 0.39584654569625854, |
|
"logits/rejected": 0.6170969605445862, |
|
"logps/chosen": -442.19915771484375, |
|
"logps/rejected": -441.6769104003906, |
|
"loss": 0.5883, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8056619763374329, |
|
"rewards/margins": 0.3388480842113495, |
|
"rewards/rejected": -1.1445101499557495, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.37948181104422923, |
|
"grad_norm": 13.900838851928711, |
|
"learning_rate": 3.902902461869079e-06, |
|
"logits/chosen": 0.3026077151298523, |
|
"logits/rejected": 0.5092092156410217, |
|
"logps/chosen": -435.91571044921875, |
|
"logps/rejected": -435.0750427246094, |
|
"loss": 0.6001, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.8758746981620789, |
|
"rewards/margins": 0.34648483991622925, |
|
"rewards/rejected": -1.2223594188690186, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.38209892698246534, |
|
"grad_norm": 17.466562271118164, |
|
"learning_rate": 3.883935506370605e-06, |
|
"logits/chosen": 0.2131034880876541, |
|
"logits/rejected": 0.42597731947898865, |
|
"logps/chosen": -423.911376953125, |
|
"logps/rejected": -408.1385192871094, |
|
"loss": 0.613, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6358692049980164, |
|
"rewards/margins": 0.3088337481021881, |
|
"rewards/rejected": -0.9447029232978821, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.3847160429207014, |
|
"grad_norm": 11.749993324279785, |
|
"learning_rate": 3.864852992655617e-06, |
|
"logits/chosen": 0.30351871252059937, |
|
"logits/rejected": 0.3591347336769104, |
|
"logps/chosen": -443.84735107421875, |
|
"logps/rejected": -447.32763671875, |
|
"loss": 0.5455, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.6212112903594971, |
|
"rewards/margins": 0.44100433588027954, |
|
"rewards/rejected": -1.0622155666351318, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 15.301169395446777, |
|
"learning_rate": 3.845656514108516e-06, |
|
"logits/chosen": 0.30313563346862793, |
|
"logits/rejected": 0.3141325116157532, |
|
"logps/chosen": -478.4485778808594, |
|
"logps/rejected": -414.6806640625, |
|
"loss": 0.6202, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.9370290040969849, |
|
"rewards/margins": 0.28061023354530334, |
|
"rewards/rejected": -1.2176392078399658, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.38995027479717354, |
|
"grad_norm": 16.264097213745117, |
|
"learning_rate": 3.826347673629738e-06, |
|
"logits/chosen": 0.2156684696674347, |
|
"logits/rejected": 0.3405511975288391, |
|
"logps/chosen": -454.978515625, |
|
"logps/rejected": -446.1961975097656, |
|
"loss": 0.5828, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8145572543144226, |
|
"rewards/margins": 0.3832133412361145, |
|
"rewards/rejected": -1.1977707147598267, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.3925673907354096, |
|
"grad_norm": 17.33570098876953, |
|
"learning_rate": 3.8069280835019062e-06, |
|
"logits/chosen": 0.197922945022583, |
|
"logits/rejected": 0.31282711029052734, |
|
"logps/chosen": -465.8154296875, |
|
"logps/rejected": -456.82562255859375, |
|
"loss": 0.5748, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8404709696769714, |
|
"rewards/margins": 0.37942442297935486, |
|
"rewards/rejected": -1.219895362854004, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3925673907354096, |
|
"eval_logits/chosen": 0.08960460871458054, |
|
"eval_logits/rejected": 0.23796047270298004, |
|
"eval_logps/chosen": -480.02130126953125, |
|
"eval_logps/rejected": -468.2979431152344, |
|
"eval_loss": 0.6048462986946106, |
|
"eval_rewards/accuracies": 0.6754999756813049, |
|
"eval_rewards/chosen": -0.9165594577789307, |
|
"eval_rewards/margins": 0.3195783197879791, |
|
"eval_rewards/rejected": -1.236137866973877, |
|
"eval_runtime": 231.9563, |
|
"eval_samples_per_second": 8.622, |
|
"eval_steps_per_second": 1.078, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39518450667364563, |
|
"grad_norm": 14.863387107849121, |
|
"learning_rate": 3.7873993652552077e-06, |
|
"logits/chosen": 0.23345918953418732, |
|
"logits/rejected": 0.28646907210350037, |
|
"logps/chosen": -454.75897216796875, |
|
"logps/rejected": -453.5748596191406, |
|
"loss": 0.6597, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.0461337566375732, |
|
"rewards/margins": 0.21926303207874298, |
|
"rewards/rejected": -1.2653969526290894, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 16.0816593170166, |
|
"learning_rate": 3.7677631495319953e-06, |
|
"logits/chosen": 0.2003081738948822, |
|
"logits/rejected": 0.36653000116348267, |
|
"logps/chosen": -488.4042053222656, |
|
"logps/rejected": -490.30853271484375, |
|
"loss": 0.581, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9618334770202637, |
|
"rewards/margins": 0.38063231110572815, |
|
"rewards/rejected": -1.3424657583236694, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4004187385501178, |
|
"grad_norm": 15.07925033569336, |
|
"learning_rate": 3.748021075950633e-06, |
|
"logits/chosen": -0.012658292427659035, |
|
"logits/rejected": 0.16820164024829865, |
|
"logps/chosen": -478.895751953125, |
|
"logps/rejected": -465.083984375, |
|
"loss": 0.6449, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.834586501121521, |
|
"rewards/margins": 0.23068487644195557, |
|
"rewards/rejected": -1.0652713775634766, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.40303585448835383, |
|
"grad_norm": 13.859041213989258, |
|
"learning_rate": 3.7281747929685824e-06, |
|
"logits/chosen": 0.3084440231323242, |
|
"logits/rejected": 0.4003655016422272, |
|
"logps/chosen": -465.21844482421875, |
|
"logps/rejected": -456.33465576171875, |
|
"loss": 0.6143, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1239235401153564, |
|
"rewards/margins": 0.2595583498477936, |
|
"rewards/rejected": -1.3834818601608276, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.4056529704265899, |
|
"grad_norm": 12.961313247680664, |
|
"learning_rate": 3.7082259577447604e-06, |
|
"logits/chosen": 0.20876283943653107, |
|
"logits/rejected": 0.4126996099948883, |
|
"logps/chosen": -499.909912109375, |
|
"logps/rejected": -486.24749755859375, |
|
"loss": 0.5952, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.9955336451530457, |
|
"rewards/margins": 0.3382071256637573, |
|
"rewards/rejected": -1.3337408304214478, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 15.985527038574219, |
|
"learning_rate": 3.6881762360011688e-06, |
|
"logits/chosen": 0.12506040930747986, |
|
"logits/rejected": 0.17536480724811554, |
|
"logps/chosen": -477.6473083496094, |
|
"logps/rejected": -436.26751708984375, |
|
"loss": 0.6062, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7066253423690796, |
|
"rewards/margins": 0.3402264714241028, |
|
"rewards/rejected": -1.0468518733978271, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.410887202303062, |
|
"grad_norm": 22.8974666595459, |
|
"learning_rate": 3.668027301883802e-06, |
|
"logits/chosen": 0.0588788278400898, |
|
"logits/rejected": 0.12763305008411407, |
|
"logps/chosen": -440.4716796875, |
|
"logps/rejected": -444.61834716796875, |
|
"loss": 0.6024, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7403033375740051, |
|
"rewards/margins": 0.37406405806541443, |
|
"rewards/rejected": -1.1143672466278076, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.4135043182412981, |
|
"grad_norm": 15.58066463470459, |
|
"learning_rate": 3.64778083782286e-06, |
|
"logits/chosen": 0.27244722843170166, |
|
"logits/rejected": 0.3991895318031311, |
|
"logps/chosen": -443.55645751953125, |
|
"logps/rejected": -494.92254638671875, |
|
"loss": 0.5942, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7955921292304993, |
|
"rewards/margins": 0.36010387539863586, |
|
"rewards/rejected": -1.1556960344314575, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.4161214341795342, |
|
"grad_norm": 13.258322715759277, |
|
"learning_rate": 3.627438534392268e-06, |
|
"logits/chosen": -0.04510800167918205, |
|
"logits/rejected": 0.03904765844345093, |
|
"logps/chosen": -436.66802978515625, |
|
"logps/rejected": -471.71142578125, |
|
"loss": 0.5738, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.744075357913971, |
|
"rewards/margins": 0.400721937417984, |
|
"rewards/rejected": -1.1447973251342773, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 14.653417587280273, |
|
"learning_rate": 3.607002090168506e-06, |
|
"logits/chosen": 0.014941488392651081, |
|
"logits/rejected": 0.006946629378944635, |
|
"logps/chosen": -499.9811096191406, |
|
"logps/rejected": -463.3321228027344, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.9098555445671082, |
|
"rewards/margins": 0.21712689101696014, |
|
"rewards/rejected": -1.1269824504852295, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"eval_logits/chosen": -0.08284498751163483, |
|
"eval_logits/rejected": 0.05344332382082939, |
|
"eval_logps/chosen": -487.8919982910156, |
|
"eval_logps/rejected": -479.4829406738281, |
|
"eval_loss": 0.6062743067741394, |
|
"eval_rewards/accuracies": 0.6704999804496765, |
|
"eval_rewards/chosen": -0.9952664971351624, |
|
"eval_rewards/margins": 0.35272136330604553, |
|
"eval_rewards/rejected": -1.3479877710342407, |
|
"eval_runtime": 231.9207, |
|
"eval_samples_per_second": 8.624, |
|
"eval_steps_per_second": 1.078, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4213556660560063, |
|
"grad_norm": 14.061836242675781, |
|
"learning_rate": 3.586473211588787e-06, |
|
"logits/chosen": 0.043830014765262604, |
|
"logits/rejected": 0.0922635942697525, |
|
"logps/chosen": -452.1427307128906, |
|
"logps/rejected": -491.6693420410156, |
|
"loss": 0.5682, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8795869946479797, |
|
"rewards/margins": 0.42901507019996643, |
|
"rewards/rejected": -1.3086020946502686, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.4239727819942423, |
|
"grad_norm": 27.227012634277344, |
|
"learning_rate": 3.5658536128085623e-06, |
|
"logits/chosen": 0.06352569162845612, |
|
"logits/rejected": 0.29197776317596436, |
|
"logps/chosen": -464.83154296875, |
|
"logps/rejected": -456.54644775390625, |
|
"loss": 0.6573, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.966151237487793, |
|
"rewards/margins": 0.261862576007843, |
|
"rewards/rejected": -1.2280137538909912, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.4265898979324784, |
|
"grad_norm": 17.129220962524414, |
|
"learning_rate": 3.545145015558399e-06, |
|
"logits/chosen": 0.09727749973535538, |
|
"logits/rejected": 0.07895330339670181, |
|
"logps/chosen": -418.5646057128906, |
|
"logps/rejected": -414.50347900390625, |
|
"loss": 0.635, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.9049051403999329, |
|
"rewards/margins": 0.2770025134086609, |
|
"rewards/rejected": -1.1819076538085938, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 13.82589340209961, |
|
"learning_rate": 3.5243491490002056e-06, |
|
"logits/chosen": -0.012812698259949684, |
|
"logits/rejected": -0.03551667556166649, |
|
"logps/chosen": -469.703857421875, |
|
"logps/rejected": -462.33489990234375, |
|
"loss": 0.6404, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.8767460584640503, |
|
"rewards/margins": 0.29123008251190186, |
|
"rewards/rejected": -1.1679762601852417, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.4318241298089505, |
|
"grad_norm": 15.320180892944336, |
|
"learning_rate": 3.503467749582857e-06, |
|
"logits/chosen": 0.16704775393009186, |
|
"logits/rejected": 0.20051440596580505, |
|
"logps/chosen": -468.2303161621094, |
|
"logps/rejected": -435.6004333496094, |
|
"loss": 0.6469, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9747709035873413, |
|
"rewards/margins": 0.2773420214653015, |
|
"rewards/rejected": -1.2521127462387085, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.4344412457471866, |
|
"grad_norm": 14.137253761291504, |
|
"learning_rate": 3.4825025608971947e-06, |
|
"logits/chosen": 0.20161625742912292, |
|
"logits/rejected": 0.34558919072151184, |
|
"logps/chosen": -457.50775146484375, |
|
"logps/rejected": -459.1556091308594, |
|
"loss": 0.6473, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.1610692739486694, |
|
"rewards/margins": 0.21469669044017792, |
|
"rewards/rejected": -1.3757660388946533, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.43705836168542267, |
|
"grad_norm": 14.53886604309082, |
|
"learning_rate": 3.4614553335304407e-06, |
|
"logits/chosen": 0.08533845096826553, |
|
"logits/rejected": 0.3017124533653259, |
|
"logps/chosen": -496.14801025390625, |
|
"logps/rejected": -463.46807861328125, |
|
"loss": 0.601, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9761697053909302, |
|
"rewards/margins": 0.33948642015457153, |
|
"rewards/rejected": -1.3156561851501465, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 24.965370178222656, |
|
"learning_rate": 3.4403278249200222e-06, |
|
"logits/chosen": 0.06942877918481827, |
|
"logits/rejected": 0.1279851198196411, |
|
"logps/chosen": -469.521728515625, |
|
"logps/rejected": -445.25592041015625, |
|
"loss": 0.5627, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6999729871749878, |
|
"rewards/margins": 0.4319036900997162, |
|
"rewards/rejected": -1.1318767070770264, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.44229259356189476, |
|
"grad_norm": 15.348676681518555, |
|
"learning_rate": 3.4191217992068293e-06, |
|
"logits/chosen": 0.06874585151672363, |
|
"logits/rejected": 0.13581883907318115, |
|
"logps/chosen": -501.0218200683594, |
|
"logps/rejected": -451.73345947265625, |
|
"loss": 0.6089, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9231773614883423, |
|
"rewards/margins": 0.31995171308517456, |
|
"rewards/rejected": -1.243129014968872, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.44490970950013087, |
|
"grad_norm": 19.28348731994629, |
|
"learning_rate": 3.3978390270879056e-06, |
|
"logits/chosen": 0.1793755143880844, |
|
"logits/rejected": 0.4792613983154297, |
|
"logps/chosen": -450.6305236816406, |
|
"logps/rejected": -468.32086181640625, |
|
"loss": 0.6395, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2974417209625244, |
|
"rewards/margins": 0.2531173825263977, |
|
"rewards/rejected": -1.5505590438842773, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.44490970950013087, |
|
"eval_logits/chosen": 0.0606597363948822, |
|
"eval_logits/rejected": 0.20719292759895325, |
|
"eval_logps/chosen": -515.066162109375, |
|
"eval_logps/rejected": -504.28570556640625, |
|
"eval_loss": 0.6020700931549072, |
|
"eval_rewards/accuracies": 0.6744999885559082, |
|
"eval_rewards/chosen": -1.2670079469680786, |
|
"eval_rewards/margins": 0.32900768518447876, |
|
"eval_rewards/rejected": -1.5960155725479126, |
|
"eval_runtime": 232.1756, |
|
"eval_samples_per_second": 8.614, |
|
"eval_steps_per_second": 1.077, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4475268254383669, |
|
"grad_norm": 26.0466365814209, |
|
"learning_rate": 3.3764812856685995e-06, |
|
"logits/chosen": 0.11054261028766632, |
|
"logits/rejected": 0.11302468925714493, |
|
"logps/chosen": -463.68212890625, |
|
"logps/rejected": -505.44464111328125, |
|
"loss": 0.6036, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.1472562551498413, |
|
"rewards/margins": 0.3336459994316101, |
|
"rewards/rejected": -1.4809024333953857, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 12.329084396362305, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"logits/chosen": 0.14257648587226868, |
|
"logits/rejected": 0.2127263993024826, |
|
"logps/chosen": -476.0755920410156, |
|
"logps/rejected": -482.536376953125, |
|
"loss": 0.5637, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8113042712211609, |
|
"rewards/margins": 0.41295966506004333, |
|
"rewards/rejected": -1.2242640256881714, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.45276105731483907, |
|
"grad_norm": 14.099928855895996, |
|
"learning_rate": 3.3335480345008907e-06, |
|
"logits/chosen": 0.11843159049749374, |
|
"logits/rejected": 0.2315410077571869, |
|
"logps/chosen": -438.8074645996094, |
|
"logps/rejected": -451.05816650390625, |
|
"loss": 0.6125, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6928716897964478, |
|
"rewards/margins": 0.3718397617340088, |
|
"rewards/rejected": -1.064711570739746, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.4553781732530751, |
|
"grad_norm": 12.286911010742188, |
|
"learning_rate": 3.3119761096666055e-06, |
|
"logits/chosen": 0.025634441524744034, |
|
"logits/rejected": 0.12711207568645477, |
|
"logps/chosen": -447.642578125, |
|
"logps/rejected": -428.96246337890625, |
|
"loss": 0.6114, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6357846856117249, |
|
"rewards/margins": 0.2916674017906189, |
|
"rewards/rejected": -0.9274520874023438, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.45799528919131116, |
|
"grad_norm": 14.236140251159668, |
|
"learning_rate": 3.290336385060832e-06, |
|
"logits/chosen": 0.07329438626766205, |
|
"logits/rejected": 0.2325226366519928, |
|
"logps/chosen": -458.25213623046875, |
|
"logps/rejected": -445.90557861328125, |
|
"loss": 0.5975, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8362909555435181, |
|
"rewards/margins": 0.3581870198249817, |
|
"rewards/rejected": -1.1944780349731445, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 15.764031410217285, |
|
"learning_rate": 3.268630667594348e-06, |
|
"logits/chosen": 0.11211331933736801, |
|
"logits/rejected": 0.12331026792526245, |
|
"logps/chosen": -460.3636169433594, |
|
"logps/rejected": -454.080078125, |
|
"loss": 0.5906, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8475208282470703, |
|
"rewards/margins": 0.3742133677005768, |
|
"rewards/rejected": -1.2217340469360352, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4632295210677833, |
|
"grad_norm": 23.4930362701416, |
|
"learning_rate": 3.2468607696883147e-06, |
|
"logits/chosen": 0.24953755736351013, |
|
"logits/rejected": 0.34629741311073303, |
|
"logps/chosen": -477.17791748046875, |
|
"logps/rejected": -515.0440673828125, |
|
"loss": 0.5613, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.0197489261627197, |
|
"rewards/margins": 0.4735226035118103, |
|
"rewards/rejected": -1.4932715892791748, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.46584663700601936, |
|
"grad_norm": 13.870096206665039, |
|
"learning_rate": 3.225028509122944e-06, |
|
"logits/chosen": 0.12339513003826141, |
|
"logits/rejected": 0.26224666833877563, |
|
"logps/chosen": -495.1161193847656, |
|
"logps/rejected": -489.4485778808594, |
|
"loss": 0.613, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2092769145965576, |
|
"rewards/margins": 0.3337065279483795, |
|
"rewards/rejected": -1.5429834127426147, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.4684637529442554, |
|
"grad_norm": 19.58800506591797, |
|
"learning_rate": 3.2031357088857083e-06, |
|
"logits/chosen": 0.04813681170344353, |
|
"logits/rejected": 0.21235807240009308, |
|
"logps/chosen": -539.0248413085938, |
|
"logps/rejected": -540.2000732421875, |
|
"loss": 0.6242, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.3390930891036987, |
|
"rewards/margins": 0.3306949734687805, |
|
"rewards/rejected": -1.669788122177124, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 16.04212760925293, |
|
"learning_rate": 3.181184197019127e-06, |
|
"logits/chosen": 0.1568802297115326, |
|
"logits/rejected": 0.3320377767086029, |
|
"logps/chosen": -475.43487548828125, |
|
"logps/rejected": -508.42083740234375, |
|
"loss": 0.5924, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.2451696395874023, |
|
"rewards/margins": 0.4058682918548584, |
|
"rewards/rejected": -1.6510378122329712, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"eval_logits/chosen": -0.0006541285547427833, |
|
"eval_logits/rejected": 0.12322327494621277, |
|
"eval_logps/chosen": -501.73223876953125, |
|
"eval_logps/rejected": -494.2189636230469, |
|
"eval_loss": 0.5999693870544434, |
|
"eval_rewards/accuracies": 0.6654999852180481, |
|
"eval_rewards/chosen": -1.133669137954712, |
|
"eval_rewards/margins": 0.3616788983345032, |
|
"eval_rewards/rejected": -1.4953482151031494, |
|
"eval_runtime": 232.2077, |
|
"eval_samples_per_second": 8.613, |
|
"eval_steps_per_second": 1.077, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47369798482072756, |
|
"grad_norm": 16.250635147094727, |
|
"learning_rate": 3.159175806468126e-06, |
|
"logits/chosen": -0.059101611375808716, |
|
"logits/rejected": 0.07854647934436798, |
|
"logps/chosen": -468.9906311035156, |
|
"logps/rejected": -467.6946716308594, |
|
"loss": 0.5778, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1028201580047607, |
|
"rewards/margins": 0.41374215483665466, |
|
"rewards/rejected": -1.5165622234344482, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.4763151007589636, |
|
"grad_norm": 17.706560134887695, |
|
"learning_rate": 3.1371123749269804e-06, |
|
"logits/chosen": 0.029452210292220116, |
|
"logits/rejected": 0.0658307746052742, |
|
"logps/chosen": -503.1075134277344, |
|
"logps/rejected": -495.166259765625, |
|
"loss": 0.6576, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.9831246137619019, |
|
"rewards/margins": 0.2420085221529007, |
|
"rewards/rejected": -1.2251330614089966, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.4789322166971997, |
|
"grad_norm": 11.094071388244629, |
|
"learning_rate": 3.114995744685877e-06, |
|
"logits/chosen": 0.15399818122386932, |
|
"logits/rejected": 0.09563325345516205, |
|
"logps/chosen": -425.6133728027344, |
|
"logps/rejected": -413.8519592285156, |
|
"loss": 0.6349, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6909037828445435, |
|
"rewards/margins": 0.21615329384803772, |
|
"rewards/rejected": -0.907056987285614, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 13.76625919342041, |
|
"learning_rate": 3.0928277624770743e-06, |
|
"logits/chosen": 0.03457440435886383, |
|
"logits/rejected": 0.3043617010116577, |
|
"logps/chosen": -482.749267578125, |
|
"logps/rejected": -466.3192443847656, |
|
"loss": 0.575, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.6658245921134949, |
|
"rewards/margins": 0.4141596257686615, |
|
"rewards/rejected": -1.079984188079834, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.4841664485736718, |
|
"grad_norm": 13.214118003845215, |
|
"learning_rate": 3.070610279320708e-06, |
|
"logits/chosen": 0.10331498086452484, |
|
"logits/rejected": 0.20391520857810974, |
|
"logps/chosen": -504.2576599121094, |
|
"logps/rejected": -493.19635009765625, |
|
"loss": 0.5699, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9062315225601196, |
|
"rewards/margins": 0.4142914414405823, |
|
"rewards/rejected": -1.3205230236053467, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.48678356451190785, |
|
"grad_norm": 15.12294864654541, |
|
"learning_rate": 3.0483451503702264e-06, |
|
"logits/chosen": 0.22254931926727295, |
|
"logits/rejected": 0.15011247992515564, |
|
"logps/chosen": -541.047119140625, |
|
"logps/rejected": -546.2518310546875, |
|
"loss": 0.5984, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.3822373151779175, |
|
"rewards/margins": 0.3731249272823334, |
|
"rewards/rejected": -1.7553622722625732, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.48940068045014395, |
|
"grad_norm": 19.084123611450195, |
|
"learning_rate": 3.0260342347574916e-06, |
|
"logits/chosen": 0.16859467327594757, |
|
"logits/rejected": 0.2447008639574051, |
|
"logps/chosen": -518.3531494140625, |
|
"logps/rejected": -513.4488525390625, |
|
"loss": 0.567, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2804909944534302, |
|
"rewards/margins": 0.4136219620704651, |
|
"rewards/rejected": -1.69411301612854, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 15.792978286743164, |
|
"learning_rate": 3.0036793954375358e-06, |
|
"logits/chosen": 0.11333123594522476, |
|
"logits/rejected": 0.27916672825813293, |
|
"logps/chosen": -503.6768493652344, |
|
"logps/rejected": -482.2354431152344, |
|
"loss": 0.5638, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.0980170965194702, |
|
"rewards/margins": 0.46111616492271423, |
|
"rewards/rejected": -1.5591331720352173, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.49463491232661605, |
|
"grad_norm": 16.74842071533203, |
|
"learning_rate": 2.981282499033009e-06, |
|
"logits/chosen": -0.023114752024412155, |
|
"logits/rejected": 0.1362176537513733, |
|
"logps/chosen": -517.9249267578125, |
|
"logps/rejected": -498.3343200683594, |
|
"loss": 0.6287, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0479462146759033, |
|
"rewards/margins": 0.3232496380805969, |
|
"rewards/rejected": -1.3711960315704346, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.49725202826485215, |
|
"grad_norm": 14.8408784866333, |
|
"learning_rate": 2.9588454156783163e-06, |
|
"logits/chosen": -0.03425337374210358, |
|
"logits/rejected": 0.01658450812101364, |
|
"logps/chosen": -510.95263671875, |
|
"logps/rejected": -505.4061584472656, |
|
"loss": 0.5875, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.101326584815979, |
|
"rewards/margins": 0.37253618240356445, |
|
"rewards/rejected": -1.4738627672195435, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.49725202826485215, |
|
"eval_logits/chosen": 0.06292513012886047, |
|
"eval_logits/rejected": 0.18740317225456238, |
|
"eval_logps/chosen": -508.2808532714844, |
|
"eval_logps/rejected": -502.21710205078125, |
|
"eval_loss": 0.5985915660858154, |
|
"eval_rewards/accuracies": 0.6744999885559082, |
|
"eval_rewards/chosen": -1.1991546154022217, |
|
"eval_rewards/margins": 0.3761745095252991, |
|
"eval_rewards/rejected": -1.5753291845321655, |
|
"eval_runtime": 232.6562, |
|
"eval_samples_per_second": 8.596, |
|
"eval_steps_per_second": 1.075, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.4998691442030882, |
|
"grad_norm": 13.105058670043945, |
|
"learning_rate": 2.9363700188634597e-06, |
|
"logits/chosen": 0.08050940185785294, |
|
"logits/rejected": 0.27998119592666626, |
|
"logps/chosen": -500.57720947265625, |
|
"logps/rejected": -478.83624267578125, |
|
"loss": 0.5974, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2193187475204468, |
|
"rewards/margins": 0.34275728464126587, |
|
"rewards/rejected": -1.5620760917663574, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 17.656320571899414, |
|
"learning_rate": 2.9138581852776053e-06, |
|
"logits/chosen": 0.2168809473514557, |
|
"logits/rejected": 0.3105737566947937, |
|
"logps/chosen": -496.070556640625, |
|
"logps/rejected": -499.7674865722656, |
|
"loss": 0.5806, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1544568538665771, |
|
"rewards/margins": 0.4054805636405945, |
|
"rewards/rejected": -1.5599374771118164, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.5051033760795604, |
|
"grad_norm": 14.08234977722168, |
|
"learning_rate": 2.8913117946523805e-06, |
|
"logits/chosen": 0.2844335436820984, |
|
"logits/rejected": 0.31109169125556946, |
|
"logps/chosen": -513.3364868164062, |
|
"logps/rejected": -496.12200927734375, |
|
"loss": 0.5716, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.3235498666763306, |
|
"rewards/margins": 0.4011419713497162, |
|
"rewards/rejected": -1.7246919870376587, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.5077204920177963, |
|
"grad_norm": 15.062522888183594, |
|
"learning_rate": 2.8687327296049126e-06, |
|
"logits/chosen": 0.230653315782547, |
|
"logits/rejected": 0.3991672396659851, |
|
"logps/chosen": -498.00933837890625, |
|
"logps/rejected": -510.3081970214844, |
|
"loss": 0.5859, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1839993000030518, |
|
"rewards/margins": 0.4453356862068176, |
|
"rewards/rejected": -1.6293350458145142, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.5103376079560324, |
|
"grad_norm": 19.26543426513672, |
|
"learning_rate": 2.8461228754806376e-06, |
|
"logits/chosen": 0.14215265214443207, |
|
"logits/rejected": 0.23736266791820526, |
|
"logps/chosen": -510.92657470703125, |
|
"logps/rejected": -494.1617126464844, |
|
"loss": 0.5901, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0615785121917725, |
|
"rewards/margins": 0.360460102558136, |
|
"rewards/rejected": -1.4220386743545532, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 13.139251708984375, |
|
"learning_rate": 2.823484120195865e-06, |
|
"logits/chosen": 0.15903696417808533, |
|
"logits/rejected": 0.29216212034225464, |
|
"logps/chosen": -534.9984741210938, |
|
"logps/rejected": -512.8052978515625, |
|
"loss": 0.5718, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2044841051101685, |
|
"rewards/margins": 0.41181907057762146, |
|
"rewards/rejected": -1.6163032054901123, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.5155718398325045, |
|
"grad_norm": 21.8537654876709, |
|
"learning_rate": 2.8008183540801486e-06, |
|
"logits/chosen": 0.16536223888397217, |
|
"logits/rejected": 0.2516060173511505, |
|
"logps/chosen": -516.1781005859375, |
|
"logps/rejected": -486.567138671875, |
|
"loss": 0.6009, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.140059471130371, |
|
"rewards/margins": 0.38968387246131897, |
|
"rewards/rejected": -1.5297433137893677, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.5181889557707406, |
|
"grad_norm": 18.018861770629883, |
|
"learning_rate": 2.7781274697184353e-06, |
|
"logits/chosen": 0.16366654634475708, |
|
"logits/rejected": 0.278939425945282, |
|
"logps/chosen": -435.4315490722656, |
|
"logps/rejected": -477.2860412597656, |
|
"loss": 0.6252, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.983991265296936, |
|
"rewards/margins": 0.28731080889701843, |
|
"rewards/rejected": -1.2713019847869873, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.5208060717089767, |
|
"grad_norm": 15.223612785339355, |
|
"learning_rate": 2.7554133617930397e-06, |
|
"logits/chosen": 0.05067938566207886, |
|
"logits/rejected": 0.06571893393993378, |
|
"logps/chosen": -452.55804443359375, |
|
"logps/rejected": -454.023193359375, |
|
"loss": 0.5889, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8877269625663757, |
|
"rewards/margins": 0.38499319553375244, |
|
"rewards/rejected": -1.2727200984954834, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 17.48723602294922, |
|
"learning_rate": 2.7326779269254363e-06, |
|
"logits/chosen": -0.015536749735474586, |
|
"logits/rejected": 0.16148407757282257, |
|
"logps/chosen": -518.9827880859375, |
|
"logps/rejected": -480.99163818359375, |
|
"loss": 0.5849, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1106268167495728, |
|
"rewards/margins": 0.4065285623073578, |
|
"rewards/rejected": -1.5171552896499634, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"eval_logits/chosen": -0.0025373969692736864, |
|
"eval_logits/rejected": 0.12474588304758072, |
|
"eval_logps/chosen": -524.7777099609375, |
|
"eval_logps/rejected": -517.0887451171875, |
|
"eval_loss": 0.5969316959381104, |
|
"eval_rewards/accuracies": 0.6819999814033508, |
|
"eval_rewards/chosen": -1.3641233444213867, |
|
"eval_rewards/margins": 0.3599224388599396, |
|
"eval_rewards/rejected": -1.7240456342697144, |
|
"eval_runtime": 232.6406, |
|
"eval_samples_per_second": 8.597, |
|
"eval_steps_per_second": 1.075, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5260403035854488, |
|
"grad_norm": 17.689542770385742, |
|
"learning_rate": 2.7099230635178954e-06, |
|
"logits/chosen": 0.2274688184261322, |
|
"logits/rejected": 0.25372716784477234, |
|
"logps/chosen": -526.2362060546875, |
|
"logps/rejected": -533.8927001953125, |
|
"loss": 0.5753, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.3567404747009277, |
|
"rewards/margins": 0.4240929186344147, |
|
"rewards/rejected": -1.7808334827423096, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.528657419523685, |
|
"grad_norm": 15.211199760437012, |
|
"learning_rate": 2.6871506715949608e-06, |
|
"logits/chosen": 0.058562636375427246, |
|
"logits/rejected": 0.18520574271678925, |
|
"logps/chosen": -499.6806640625, |
|
"logps/rejected": -491.3092346191406, |
|
"loss": 0.5954, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.1945785284042358, |
|
"rewards/margins": 0.3566603362560272, |
|
"rewards/rejected": -1.551238775253296, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.5312745354619209, |
|
"grad_norm": 15.991866111755371, |
|
"learning_rate": 2.6643626526448063e-06, |
|
"logits/chosen": -0.09116406738758087, |
|
"logits/rejected": -0.05098678544163704, |
|
"logps/chosen": -533.4771728515625, |
|
"logps/rejected": -504.9336853027344, |
|
"loss": 0.5447, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.0096272230148315, |
|
"rewards/margins": 0.49922627210617065, |
|
"rewards/rejected": -1.508853554725647, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 15.529874801635742, |
|
"learning_rate": 2.6415609094604562e-06, |
|
"logits/chosen": 0.0014547407627105713, |
|
"logits/rejected": -0.014313450083136559, |
|
"logps/chosen": -489.4344177246094, |
|
"logps/rejected": -481.03558349609375, |
|
"loss": 0.6275, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.1068042516708374, |
|
"rewards/margins": 0.29210469126701355, |
|
"rewards/rejected": -1.3989089727401733, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.5365087673383931, |
|
"grad_norm": 14.723438262939453, |
|
"learning_rate": 2.618747345980904e-06, |
|
"logits/chosen": 0.06219317764043808, |
|
"logits/rejected": 0.29134300351142883, |
|
"logps/chosen": -482.73651123046875, |
|
"logps/rejected": -454.9242248535156, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.2507166862487793, |
|
"rewards/margins": 0.40432801842689514, |
|
"rewards/rejected": -1.6550447940826416, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.5391258832766291, |
|
"grad_norm": 14.240797996520996, |
|
"learning_rate": 2.595923867132136e-06, |
|
"logits/chosen": -0.00904160737991333, |
|
"logits/rejected": -0.042715176939964294, |
|
"logps/chosen": -539.594482421875, |
|
"logps/rejected": -535.2293701171875, |
|
"loss": 0.5718, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3310154676437378, |
|
"rewards/margins": 0.48368293046951294, |
|
"rewards/rejected": -1.814698576927185, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.5417429992148652, |
|
"grad_norm": 14.852096557617188, |
|
"learning_rate": 2.5730923786680672e-06, |
|
"logits/chosen": -0.025297870859503746, |
|
"logits/rejected": 0.19150254130363464, |
|
"logps/chosen": -528.1922607421875, |
|
"logps/rejected": -553.71142578125, |
|
"loss": 0.587, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.5110130310058594, |
|
"rewards/margins": 0.3934626281261444, |
|
"rewards/rejected": -1.9044758081436157, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 16.273681640625, |
|
"learning_rate": 2.5502547870114137e-06, |
|
"logits/chosen": -0.0205762330442667, |
|
"logits/rejected": 0.12017925083637238, |
|
"logps/chosen": -543.7810668945312, |
|
"logps/rejected": -533.4263916015625, |
|
"loss": 0.6134, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.7067127227783203, |
|
"rewards/margins": 0.3284439742565155, |
|
"rewards/rejected": -2.035156726837158, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5469772310913373, |
|
"grad_norm": 23.068370819091797, |
|
"learning_rate": 2.527412999094507e-06, |
|
"logits/chosen": 0.06104808300733566, |
|
"logits/rejected": 0.21988165378570557, |
|
"logps/chosen": -568.421630859375, |
|
"logps/rejected": -585.5074462890625, |
|
"loss": 0.5597, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.414439082145691, |
|
"rewards/margins": 0.4884655475616455, |
|
"rewards/rejected": -1.902904748916626, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5495943470295734, |
|
"grad_norm": 18.669837951660156, |
|
"learning_rate": 2.504568922200064e-06, |
|
"logits/chosen": 0.03404618427157402, |
|
"logits/rejected": 0.2708562910556793, |
|
"logps/chosen": -476.51324462890625, |
|
"logps/rejected": -472.4541931152344, |
|
"loss": 0.6106, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2663251161575317, |
|
"rewards/margins": 0.32750216126441956, |
|
"rewards/rejected": -1.5938273668289185, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5495943470295734, |
|
"eval_logits/chosen": -0.016485024243593216, |
|
"eval_logits/rejected": 0.10226120799779892, |
|
"eval_logps/chosen": -514.2800903320312, |
|
"eval_logps/rejected": -508.7902526855469, |
|
"eval_loss": 0.5930544137954712, |
|
"eval_rewards/accuracies": 0.6834999918937683, |
|
"eval_rewards/chosen": -1.2591471672058105, |
|
"eval_rewards/margins": 0.38191384077072144, |
|
"eval_rewards/rejected": -1.6410611867904663, |
|
"eval_runtime": 232.4639, |
|
"eval_samples_per_second": 8.603, |
|
"eval_steps_per_second": 1.075, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5522114629678094, |
|
"grad_norm": 17.219968795776367, |
|
"learning_rate": 2.4817244638019333e-06, |
|
"logits/chosen": 0.07551795244216919, |
|
"logits/rejected": 0.11292078346014023, |
|
"logps/chosen": -530.4259033203125, |
|
"logps/rejected": -491.468017578125, |
|
"loss": 0.5957, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.1608909368515015, |
|
"rewards/margins": 0.4003227651119232, |
|
"rewards/rejected": -1.561213731765747, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 19.8704776763916, |
|
"learning_rate": 2.4588815314058155e-06, |
|
"logits/chosen": 0.16289404034614563, |
|
"logits/rejected": 0.24848175048828125, |
|
"logps/chosen": -452.452880859375, |
|
"logps/rejected": -434.36090087890625, |
|
"loss": 0.5882, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9118096232414246, |
|
"rewards/margins": 0.3754151463508606, |
|
"rewards/rejected": -1.2872246503829956, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5574456948442816, |
|
"grad_norm": 15.798819541931152, |
|
"learning_rate": 2.4360420323899922e-06, |
|
"logits/chosen": 0.11843502521514893, |
|
"logits/rejected": 0.15708817541599274, |
|
"logps/chosen": -505.29168701171875, |
|
"logps/rejected": -489.7765197753906, |
|
"loss": 0.5713, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.04365873336792, |
|
"rewards/margins": 0.4483584761619568, |
|
"rewards/rejected": -1.492017149925232, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5600628107825176, |
|
"grad_norm": 17.67369842529297, |
|
"learning_rate": 2.4132078738460585e-06, |
|
"logits/chosen": 0.17071916162967682, |
|
"logits/rejected": 0.209875226020813, |
|
"logps/chosen": -521.3018798828125, |
|
"logps/rejected": -482.3414001464844, |
|
"loss": 0.5975, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2791138887405396, |
|
"rewards/margins": 0.3632059097290039, |
|
"rewards/rejected": -1.642319679260254, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5626799267207537, |
|
"grad_norm": 21.954570770263672, |
|
"learning_rate": 2.3903809624196826e-06, |
|
"logits/chosen": 0.3113669753074646, |
|
"logits/rejected": 0.2932060956954956, |
|
"logps/chosen": -475.5943908691406, |
|
"logps/rejected": -454.9044494628906, |
|
"loss": 0.6099, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.2669929265975952, |
|
"rewards/margins": 0.3418061137199402, |
|
"rewards/rejected": -1.6087989807128906, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 25.539379119873047, |
|
"learning_rate": 2.3675632041513978e-06, |
|
"logits/chosen": 0.1442708671092987, |
|
"logits/rejected": 0.23462197184562683, |
|
"logps/chosen": -535.3931884765625, |
|
"logps/rejected": -477.28369140625, |
|
"loss": 0.567, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.161911964416504, |
|
"rewards/margins": 0.46348389983177185, |
|
"rewards/rejected": -1.6253957748413086, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5679141585972258, |
|
"grad_norm": 19.380157470703125, |
|
"learning_rate": 2.3447565043174533e-06, |
|
"logits/chosen": 0.21937327086925507, |
|
"logits/rejected": 0.30190104246139526, |
|
"logps/chosen": -508.240966796875, |
|
"logps/rejected": -481.8168029785156, |
|
"loss": 0.5906, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.4066482782363892, |
|
"rewards/margins": 0.38738176226615906, |
|
"rewards/rejected": -1.7940301895141602, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5705312745354619, |
|
"grad_norm": 18.660661697387695, |
|
"learning_rate": 2.321962767270724e-06, |
|
"logits/chosen": 0.22592106461524963, |
|
"logits/rejected": 0.28465738892555237, |
|
"logps/chosen": -527.2122802734375, |
|
"logps/rejected": -483.3441467285156, |
|
"loss": 0.6442, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.577532410621643, |
|
"rewards/margins": 0.2664092779159546, |
|
"rewards/rejected": -1.8439416885375977, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.573148390473698, |
|
"grad_norm": 20.082679748535156, |
|
"learning_rate": 2.299183896281692e-06, |
|
"logits/chosen": 0.16324841976165771, |
|
"logits/rejected": 0.29000192880630493, |
|
"logps/chosen": -521.3306274414062, |
|
"logps/rejected": -535.11328125, |
|
"loss": 0.6251, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.5316604375839233, |
|
"rewards/margins": 0.3131243586540222, |
|
"rewards/rejected": -1.8447847366333008, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 15.897833824157715, |
|
"learning_rate": 2.2764217933795297e-06, |
|
"logits/chosen": 0.2278885841369629, |
|
"logits/rejected": 0.2919641137123108, |
|
"logps/chosen": -514.6935424804688, |
|
"logps/rejected": -508.80096435546875, |
|
"loss": 0.5783, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2934855222702026, |
|
"rewards/margins": 0.41693955659866333, |
|
"rewards/rejected": -1.7104251384735107, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"eval_logits/chosen": 0.14214755594730377, |
|
"eval_logits/rejected": 0.2720797061920166, |
|
"eval_logps/chosen": -512.494873046875, |
|
"eval_logps/rejected": -506.34222412109375, |
|
"eval_loss": 0.594093382358551, |
|
"eval_rewards/accuracies": 0.6809999942779541, |
|
"eval_rewards/chosen": -1.2412952184677124, |
|
"eval_rewards/margins": 0.3752853572368622, |
|
"eval_rewards/rejected": -1.616580605506897, |
|
"eval_runtime": 232.5254, |
|
"eval_samples_per_second": 8.601, |
|
"eval_steps_per_second": 1.075, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5783826223501701, |
|
"grad_norm": 15.479647636413574, |
|
"learning_rate": 2.2536783591932786e-06, |
|
"logits/chosen": 0.140645831823349, |
|
"logits/rejected": 0.33969563245773315, |
|
"logps/chosen": -515.4466552734375, |
|
"logps/rejected": -524.2593994140625, |
|
"loss": 0.604, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.2855024337768555, |
|
"rewards/margins": 0.35720211267471313, |
|
"rewards/rejected": -1.6427046060562134, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5809997382884062, |
|
"grad_norm": 15.445068359375, |
|
"learning_rate": 2.230955492793149e-06, |
|
"logits/chosen": 0.2617935538291931, |
|
"logits/rejected": 0.2599295973777771, |
|
"logps/chosen": -544.3765869140625, |
|
"logps/rejected": -532.53271484375, |
|
"loss": 0.6289, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3259150981903076, |
|
"rewards/margins": 0.3112506568431854, |
|
"rewards/rejected": -1.6371657848358154, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5836168542266422, |
|
"grad_norm": 16.804216384887695, |
|
"learning_rate": 2.208255091531947e-06, |
|
"logits/chosen": 0.33556073904037476, |
|
"logits/rejected": 0.504830539226532, |
|
"logps/chosen": -567.1902465820312, |
|
"logps/rejected": -558.8784790039062, |
|
"loss": 0.5847, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4798108339309692, |
|
"rewards/margins": 0.43180447816848755, |
|
"rewards/rejected": -1.9116153717041016, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 15.775924682617188, |
|
"learning_rate": 2.1855790508866435e-06, |
|
"logits/chosen": 0.34685009717941284, |
|
"logits/rejected": 0.377047598361969, |
|
"logps/chosen": -602.8464965820312, |
|
"logps/rejected": -586.4207763671875, |
|
"loss": 0.6197, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.6576731204986572, |
|
"rewards/margins": 0.36601829528808594, |
|
"rewards/rejected": -2.0236916542053223, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.5888510861031143, |
|
"grad_norm": 14.079179763793945, |
|
"learning_rate": 2.162929264300107e-06, |
|
"logits/chosen": 0.2139190137386322, |
|
"logits/rejected": 0.42656293511390686, |
|
"logps/chosen": -567.9656982421875, |
|
"logps/rejected": -579.9403076171875, |
|
"loss": 0.555, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.7237701416015625, |
|
"rewards/margins": 0.4748326241970062, |
|
"rewards/rejected": -2.1986026763916016, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5914682020413504, |
|
"grad_norm": 18.067665100097656, |
|
"learning_rate": 2.1403076230230006e-06, |
|
"logits/chosen": 0.4777112603187561, |
|
"logits/rejected": 0.43725594878196716, |
|
"logps/chosen": -557.2151489257812, |
|
"logps/rejected": -545.1485595703125, |
|
"loss": 0.6474, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.7369823455810547, |
|
"rewards/margins": 0.29405802488327026, |
|
"rewards/rejected": -2.0310404300689697, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5940853179795865, |
|
"grad_norm": 19.236621856689453, |
|
"learning_rate": 2.11771601595586e-06, |
|
"logits/chosen": 0.40373674035072327, |
|
"logits/rejected": 0.3856516480445862, |
|
"logps/chosen": -574.0790405273438, |
|
"logps/rejected": -524.0260009765625, |
|
"loss": 0.5974, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.5017083883285522, |
|
"rewards/margins": 0.43927374482154846, |
|
"rewards/rejected": -1.9409822225570679, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 16.725879669189453, |
|
"learning_rate": 2.0951563294913737e-06, |
|
"logits/chosen": 0.3718245029449463, |
|
"logits/rejected": 0.4380251467227936, |
|
"logps/chosen": -513.0089111328125, |
|
"logps/rejected": -508.85760498046875, |
|
"loss": 0.5285, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3701801300048828, |
|
"rewards/margins": 0.5219612717628479, |
|
"rewards/rejected": -1.892141342163086, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5993195498560586, |
|
"grad_norm": 17.321361541748047, |
|
"learning_rate": 2.0726304473568693e-06, |
|
"logits/chosen": 0.25947481393814087, |
|
"logits/rejected": 0.3379734754562378, |
|
"logps/chosen": -519.076904296875, |
|
"logps/rejected": -498.84307861328125, |
|
"loss": 0.5948, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3756697177886963, |
|
"rewards/margins": 0.35469603538513184, |
|
"rewards/rejected": -1.730365514755249, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.6019366657942947, |
|
"grad_norm": 27.503204345703125, |
|
"learning_rate": 2.050140250457023e-06, |
|
"logits/chosen": 0.13081859052181244, |
|
"logits/rejected": 0.22465069591999054, |
|
"logps/chosen": -527.054931640625, |
|
"logps/rejected": -524.3948974609375, |
|
"loss": 0.574, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.3721132278442383, |
|
"rewards/margins": 0.4912486672401428, |
|
"rewards/rejected": -1.8633617162704468, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6019366657942947, |
|
"eval_logits/chosen": 0.20482583343982697, |
|
"eval_logits/rejected": 0.3316424489021301, |
|
"eval_logps/chosen": -533.3546752929688, |
|
"eval_logps/rejected": -529.7435302734375, |
|
"eval_loss": 0.5939305424690247, |
|
"eval_rewards/accuracies": 0.6819999814033508, |
|
"eval_rewards/chosen": -1.4498937129974365, |
|
"eval_rewards/margins": 0.40070000290870667, |
|
"eval_rewards/rejected": -1.8505936861038208, |
|
"eval_runtime": 232.6476, |
|
"eval_samples_per_second": 8.597, |
|
"eval_steps_per_second": 1.075, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6045537817325307, |
|
"grad_norm": 14.945230484008789, |
|
"learning_rate": 2.0276876167168042e-06, |
|
"logits/chosen": 0.3673093914985657, |
|
"logits/rejected": 0.3980174660682678, |
|
"logps/chosen": -465.44775390625, |
|
"logps/rejected": -453.03076171875, |
|
"loss": 0.5977, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.4102133512496948, |
|
"rewards/margins": 0.36651554703712463, |
|
"rewards/rejected": -1.7767289876937866, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 14.364018440246582, |
|
"learning_rate": 2.0052744209246682e-06, |
|
"logits/chosen": 0.34174802899360657, |
|
"logits/rejected": 0.3505280613899231, |
|
"logps/chosen": -476.4820861816406, |
|
"logps/rejected": -462.1893615722656, |
|
"loss": 0.5789, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.1048295497894287, |
|
"rewards/margins": 0.4239567816257477, |
|
"rewards/rejected": -1.5287864208221436, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.6097880136090029, |
|
"grad_norm": 19.915328979492188, |
|
"learning_rate": 1.9829025345760127e-06, |
|
"logits/chosen": 0.22554683685302734, |
|
"logits/rejected": 0.3321411609649658, |
|
"logps/chosen": -512.6522827148438, |
|
"logps/rejected": -518.2416381835938, |
|
"loss": 0.618, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.0541471242904663, |
|
"rewards/margins": 0.31440818309783936, |
|
"rewards/rejected": -1.3685553073883057, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.6124051295472389, |
|
"grad_norm": 27.91488265991211, |
|
"learning_rate": 1.9605738257169115e-06, |
|
"logits/chosen": 0.38059157133102417, |
|
"logits/rejected": 0.4925920367240906, |
|
"logps/chosen": -471.82098388671875, |
|
"logps/rejected": -469.058837890625, |
|
"loss": 0.6262, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.1800696849822998, |
|
"rewards/margins": 0.3158087134361267, |
|
"rewards/rejected": -1.4958784580230713, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.615022245485475, |
|
"grad_norm": 13.445463180541992, |
|
"learning_rate": 1.9382901587881275e-06, |
|
"logits/chosen": 0.22165732085704803, |
|
"logits/rejected": 0.3755477964878082, |
|
"logps/chosen": -512.750244140625, |
|
"logps/rejected": -493.84869384765625, |
|
"loss": 0.5605, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2496296167373657, |
|
"rewards/margins": 0.4642421305179596, |
|
"rewards/rejected": -1.713871717453003, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 17.654233932495117, |
|
"learning_rate": 1.916053394469437e-06, |
|
"logits/chosen": 0.22464020550251007, |
|
"logits/rejected": 0.412786066532135, |
|
"logps/chosen": -504.1710510253906, |
|
"logps/rejected": -522.4702758789062, |
|
"loss": 0.5487, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2194569110870361, |
|
"rewards/margins": 0.5263023972511292, |
|
"rewards/rejected": -1.7457596063613892, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.6202564773619471, |
|
"grad_norm": 14.313925743103027, |
|
"learning_rate": 1.8938653895242604e-06, |
|
"logits/chosen": 0.3136211335659027, |
|
"logits/rejected": 0.38369834423065186, |
|
"logps/chosen": -522.3583984375, |
|
"logps/rejected": -514.4608154296875, |
|
"loss": 0.5689, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2894423007965088, |
|
"rewards/margins": 0.50419020652771, |
|
"rewards/rejected": -1.7936325073242188, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.6228735933001832, |
|
"grad_norm": 22.269620895385742, |
|
"learning_rate": 1.8717279966446267e-06, |
|
"logits/chosen": 0.28245988488197327, |
|
"logits/rejected": 0.3623971939086914, |
|
"logps/chosen": -478.1153869628906, |
|
"logps/rejected": -492.8341369628906, |
|
"loss": 0.6142, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.2947511672973633, |
|
"rewards/margins": 0.3845621645450592, |
|
"rewards/rejected": -1.6793134212493896, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.6254907092384192, |
|
"grad_norm": 15.111832618713379, |
|
"learning_rate": 1.8496430642964698e-06, |
|
"logits/chosen": 0.28817129135131836, |
|
"logits/rejected": 0.31284889578819275, |
|
"logps/chosen": -510.97576904296875, |
|
"logps/rejected": -507.06689453125, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.2796354293823242, |
|
"rewards/margins": 0.38231566548347473, |
|
"rewards/rejected": -1.6619510650634766, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 17.842960357666016, |
|
"learning_rate": 1.827612436565286e-06, |
|
"logits/chosen": 0.2098797857761383, |
|
"logits/rejected": 0.3998289704322815, |
|
"logps/chosen": -493.5147399902344, |
|
"logps/rejected": -490.57366943359375, |
|
"loss": 0.581, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1564563512802124, |
|
"rewards/margins": 0.40797433257102966, |
|
"rewards/rejected": -1.564430594444275, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"eval_logits/chosen": 0.1449422538280487, |
|
"eval_logits/rejected": 0.2640918791294098, |
|
"eval_logps/chosen": -505.70361328125, |
|
"eval_logps/rejected": -501.2297668457031, |
|
"eval_loss": 0.5944039225578308, |
|
"eval_rewards/accuracies": 0.6869999766349792, |
|
"eval_rewards/chosen": -1.1733826398849487, |
|
"eval_rewards/margins": 0.39207327365875244, |
|
"eval_rewards/rejected": -1.5654560327529907, |
|
"eval_runtime": 232.3606, |
|
"eval_samples_per_second": 8.607, |
|
"eval_steps_per_second": 1.076, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6307249411148914, |
|
"grad_norm": 17.78997802734375, |
|
"learning_rate": 1.8056379530021492e-06, |
|
"logits/chosen": 0.18455150723457336, |
|
"logits/rejected": 0.3481082618236542, |
|
"logps/chosen": -462.8374938964844, |
|
"logps/rejected": -461.21051025390625, |
|
"loss": 0.6006, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1027500629425049, |
|
"rewards/margins": 0.3634553551673889, |
|
"rewards/rejected": -1.466205358505249, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.6333420570531274, |
|
"grad_norm": 20.840116500854492, |
|
"learning_rate": 1.7837214484701154e-06, |
|
"logits/chosen": 0.2612837255001068, |
|
"logits/rejected": 0.32097476720809937, |
|
"logps/chosen": -474.12774658203125, |
|
"logps/rejected": -475.11376953125, |
|
"loss": 0.5625, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.1177818775177002, |
|
"rewards/margins": 0.450967937707901, |
|
"rewards/rejected": -1.5687499046325684, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.6359591729913635, |
|
"grad_norm": 17.94804573059082, |
|
"learning_rate": 1.7618647529910043e-06, |
|
"logits/chosen": 0.24861130118370056, |
|
"logits/rejected": 0.3079971969127655, |
|
"logps/chosen": -495.91021728515625, |
|
"logps/rejected": -495.01434326171875, |
|
"loss": 0.5697, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1746553182601929, |
|
"rewards/margins": 0.4058234691619873, |
|
"rewards/rejected": -1.5804787874221802, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 16.069711685180664, |
|
"learning_rate": 1.7400696915925996e-06, |
|
"logits/chosen": 0.05449223518371582, |
|
"logits/rejected": 0.3324371874332428, |
|
"logps/chosen": -504.40509033203125, |
|
"logps/rejected": -470.92529296875, |
|
"loss": 0.5925, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2023115158081055, |
|
"rewards/margins": 0.4562528729438782, |
|
"rewards/rejected": -1.6585643291473389, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.6411934048678356, |
|
"grad_norm": 23.202844619750977, |
|
"learning_rate": 1.718338084156254e-06, |
|
"logits/chosen": 0.07136271893978119, |
|
"logits/rejected": 0.224413201212883, |
|
"logps/chosen": -545.9983520507812, |
|
"logps/rejected": -521.3710327148438, |
|
"loss": 0.5708, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2068856954574585, |
|
"rewards/margins": 0.4760914742946625, |
|
"rewards/rejected": -1.682977318763733, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.6438105208060717, |
|
"grad_norm": 13.684337615966797, |
|
"learning_rate": 1.6966717452649372e-06, |
|
"logits/chosen": 0.2635645270347595, |
|
"logits/rejected": 0.22264519333839417, |
|
"logps/chosen": -511.80584716796875, |
|
"logps/rejected": -482.666015625, |
|
"loss": 0.5498, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1622530221939087, |
|
"rewards/margins": 0.5128196477890015, |
|
"rewards/rejected": -1.6750726699829102, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.6464276367443078, |
|
"grad_norm": 16.29932975769043, |
|
"learning_rate": 1.6750724840517103e-06, |
|
"logits/chosen": 0.17669948935508728, |
|
"logits/rejected": 0.28178560733795166, |
|
"logps/chosen": -506.5672912597656, |
|
"logps/rejected": -530.7100830078125, |
|
"loss": 0.5961, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.264910340309143, |
|
"rewards/margins": 0.4237841069698334, |
|
"rewards/rejected": -1.6886943578720093, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 22.445497512817383, |
|
"learning_rate": 1.6535421040486686e-06, |
|
"logits/chosen": 0.25904372334480286, |
|
"logits/rejected": 0.34389209747314453, |
|
"logps/chosen": -510.6214294433594, |
|
"logps/rejected": -497.93377685546875, |
|
"loss": 0.5676, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5339219570159912, |
|
"rewards/margins": 0.4442078173160553, |
|
"rewards/rejected": -1.9781297445297241, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.6516618686207799, |
|
"grad_norm": 13.630302429199219, |
|
"learning_rate": 1.6320824030363458e-06, |
|
"logits/chosen": 0.07676917314529419, |
|
"logits/rejected": 0.0776657983660698, |
|
"logps/chosen": -501.290771484375, |
|
"logps/rejected": -504.23504638671875, |
|
"loss": 0.5775, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.433424711227417, |
|
"rewards/margins": 0.43815937638282776, |
|
"rewards/rejected": -1.871584177017212, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.654278984559016, |
|
"grad_norm": 17.672494888305664, |
|
"learning_rate": 1.6106951728936028e-06, |
|
"logits/chosen": 0.06636019051074982, |
|
"logits/rejected": 0.24429766833782196, |
|
"logps/chosen": -503.6896057128906, |
|
"logps/rejected": -528.3905029296875, |
|
"loss": 0.5516, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.2013368606567383, |
|
"rewards/margins": 0.5229911208152771, |
|
"rewards/rejected": -1.724327802658081, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.654278984559016, |
|
"eval_logits/chosen": 0.06589578092098236, |
|
"eval_logits/rejected": 0.17816391587257385, |
|
"eval_logps/chosen": -510.28302001953125, |
|
"eval_logps/rejected": -507.39532470703125, |
|
"eval_loss": 0.5968104004859924, |
|
"eval_rewards/accuracies": 0.6775000095367432, |
|
"eval_rewards/chosen": -1.219177007675171, |
|
"eval_rewards/margins": 0.4079345464706421, |
|
"eval_rewards/rejected": -1.627111554145813, |
|
"eval_runtime": 232.5509, |
|
"eval_samples_per_second": 8.6, |
|
"eval_steps_per_second": 1.075, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.656896100497252, |
|
"grad_norm": 21.569828033447266, |
|
"learning_rate": 1.5893821994479996e-06, |
|
"logits/chosen": 0.22845594584941864, |
|
"logits/rejected": 0.29911938309669495, |
|
"logps/chosen": -505.616943359375, |
|
"logps/rejected": -486.45947265625, |
|
"loss": 0.5867, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.05279541015625, |
|
"rewards/margins": 0.43759018182754517, |
|
"rewards/rejected": -1.4903854131698608, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 19.897043228149414, |
|
"learning_rate": 1.5681452623266868e-06, |
|
"logits/chosen": -0.006847086362540722, |
|
"logits/rejected": 0.17560932040214539, |
|
"logps/chosen": -525.3328857421875, |
|
"logps/rejected": -500.6075134277344, |
|
"loss": 0.5357, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.1965337991714478, |
|
"rewards/margins": 0.5665737390518188, |
|
"rewards/rejected": -1.7631075382232666, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6621303323737242, |
|
"grad_norm": 14.37187385559082, |
|
"learning_rate": 1.5469861348078014e-06, |
|
"logits/chosen": 0.15944847464561462, |
|
"logits/rejected": 0.2913690209388733, |
|
"logps/chosen": -473.12652587890625, |
|
"logps/rejected": -505.186279296875, |
|
"loss": 0.5359, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.2122926712036133, |
|
"rewards/margins": 0.5305719375610352, |
|
"rewards/rejected": -1.7428646087646484, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6647474483119602, |
|
"grad_norm": 14.125144958496094, |
|
"learning_rate": 1.5259065836724035e-06, |
|
"logits/chosen": 0.17984794080257416, |
|
"logits/rejected": 0.2315172702074051, |
|
"logps/chosen": -485.05938720703125, |
|
"logps/rejected": -505.0470275878906, |
|
"loss": 0.6083, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.2600443363189697, |
|
"rewards/margins": 0.4279232621192932, |
|
"rewards/rejected": -1.6879676580429077, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6673645642501963, |
|
"grad_norm": 27.50576400756836, |
|
"learning_rate": 1.5049083690569456e-06, |
|
"logits/chosen": 0.17406558990478516, |
|
"logits/rejected": 0.3330245018005371, |
|
"logps/chosen": -472.1449279785156, |
|
"logps/rejected": -494.12677001953125, |
|
"loss": 0.6191, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.262279987335205, |
|
"rewards/margins": 0.39653074741363525, |
|
"rewards/rejected": -1.6588106155395508, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 27.87693977355957, |
|
"learning_rate": 1.4839932443063057e-06, |
|
"logits/chosen": 0.2273554801940918, |
|
"logits/rejected": 0.20852844417095184, |
|
"logps/chosen": -547.5466918945312, |
|
"logps/rejected": -501.408203125, |
|
"loss": 0.5473, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.1462863683700562, |
|
"rewards/margins": 0.48758673667907715, |
|
"rewards/rejected": -1.6338729858398438, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6725987961266684, |
|
"grad_norm": 23.339811325073242, |
|
"learning_rate": 1.4631629558273803e-06, |
|
"logits/chosen": 0.1783367097377777, |
|
"logits/rejected": 0.3085087239742279, |
|
"logps/chosen": -486.72100830078125, |
|
"logps/rejected": -489.66387939453125, |
|
"loss": 0.6158, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1978503465652466, |
|
"rewards/margins": 0.3335895240306854, |
|
"rewards/rejected": -1.5314397811889648, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6752159120649045, |
|
"grad_norm": 15.076952934265137, |
|
"learning_rate": 1.4424192429432657e-06, |
|
"logits/chosen": 0.16626477241516113, |
|
"logits/rejected": 0.18546536564826965, |
|
"logps/chosen": -478.89300537109375, |
|
"logps/rejected": -508.7444763183594, |
|
"loss": 0.5569, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0055302381515503, |
|
"rewards/margins": 0.4730769693851471, |
|
"rewards/rejected": -1.478607177734375, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6778330280031405, |
|
"grad_norm": 27.09433364868164, |
|
"learning_rate": 1.421763837748016e-06, |
|
"logits/chosen": 0.2768346667289734, |
|
"logits/rejected": 0.35653841495513916, |
|
"logps/chosen": -494.996337890625, |
|
"logps/rejected": -491.51116943359375, |
|
"loss": 0.6019, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.140509009361267, |
|
"rewards/margins": 0.38551202416419983, |
|
"rewards/rejected": -1.5260212421417236, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 22.913333892822266, |
|
"learning_rate": 1.401198464962021e-06, |
|
"logits/chosen": 0.15527863800525665, |
|
"logits/rejected": 0.21972744166851044, |
|
"logps/chosen": -525.5530395507812, |
|
"logps/rejected": -504.53814697265625, |
|
"loss": 0.5515, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2042077779769897, |
|
"rewards/margins": 0.5288984179496765, |
|
"rewards/rejected": -1.7331063747406006, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"eval_logits/chosen": 0.08117599785327911, |
|
"eval_logits/rejected": 0.1933600753545761, |
|
"eval_logps/chosen": -520.3548583984375, |
|
"eval_logps/rejected": -520.2850952148438, |
|
"eval_loss": 0.5958514213562012, |
|
"eval_rewards/accuracies": 0.6765000224113464, |
|
"eval_rewards/chosen": -1.3198949098587036, |
|
"eval_rewards/margins": 0.43611443042755127, |
|
"eval_rewards/rejected": -1.7560093402862549, |
|
"eval_runtime": 232.575, |
|
"eval_samples_per_second": 8.599, |
|
"eval_steps_per_second": 1.075, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6830672598796127, |
|
"grad_norm": 23.55072593688965, |
|
"learning_rate": 1.3807248417879896e-06, |
|
"logits/chosen": 0.03950003907084465, |
|
"logits/rejected": 0.09041625261306763, |
|
"logps/chosen": -530.7230224609375, |
|
"logps/rejected": -526.6931762695312, |
|
"loss": 0.5821, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3074090480804443, |
|
"rewards/margins": 0.4792531430721283, |
|
"rewards/rejected": -1.7866621017456055, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6856843758178487, |
|
"grad_norm": 52.15616226196289, |
|
"learning_rate": 1.3603446777675665e-06, |
|
"logits/chosen": 0.2288985550403595, |
|
"logits/rejected": 0.39459601044654846, |
|
"logps/chosen": -510.23687744140625, |
|
"logps/rejected": -519.2042236328125, |
|
"loss": 0.5945, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.2479314804077148, |
|
"rewards/margins": 0.4636309742927551, |
|
"rewards/rejected": -1.7115623950958252, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6883014917560848, |
|
"grad_norm": 17.42203712463379, |
|
"learning_rate": 1.3400596746385817e-06, |
|
"logits/chosen": 0.1373758614063263, |
|
"logits/rejected": 0.24959711730480194, |
|
"logps/chosen": -511.2616271972656, |
|
"logps/rejected": -500.71527099609375, |
|
"loss": 0.6122, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1762168407440186, |
|
"rewards/margins": 0.40532511472702026, |
|
"rewards/rejected": -1.5815417766571045, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 23.382965087890625, |
|
"learning_rate": 1.3198715261929587e-06, |
|
"logits/chosen": 0.24336513876914978, |
|
"logits/rejected": 0.290175199508667, |
|
"logps/chosen": -476.31549072265625, |
|
"logps/rejected": -488.55010986328125, |
|
"loss": 0.5516, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.297133207321167, |
|
"rewards/margins": 0.47855645418167114, |
|
"rewards/rejected": -1.775689721107483, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6935357236325569, |
|
"grad_norm": 27.794416427612305, |
|
"learning_rate": 1.2997819181352823e-06, |
|
"logits/chosen": 0.029675770550966263, |
|
"logits/rejected": 0.17278780043125153, |
|
"logps/chosen": -564.1571655273438, |
|
"logps/rejected": -551.5831909179688, |
|
"loss": 0.552, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2936184406280518, |
|
"rewards/margins": 0.5748482942581177, |
|
"rewards/rejected": -1.8684667348861694, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.696152839570793, |
|
"grad_norm": 27.250629425048828, |
|
"learning_rate": 1.2797925279420454e-06, |
|
"logits/chosen": 0.1551249921321869, |
|
"logits/rejected": 0.22874709963798523, |
|
"logps/chosen": -531.533447265625, |
|
"logps/rejected": -539.8661499023438, |
|
"loss": 0.5713, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.3435332775115967, |
|
"rewards/margins": 0.5102267265319824, |
|
"rewards/rejected": -1.8537601232528687, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6987699555090291, |
|
"grad_norm": 17.067325592041016, |
|
"learning_rate": 1.2599050247215764e-06, |
|
"logits/chosen": 0.08357984572649002, |
|
"logits/rejected": 0.1700626015663147, |
|
"logps/chosen": -526.3895263671875, |
|
"logps/rejected": -539.6080322265625, |
|
"loss": 0.5181, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.2903873920440674, |
|
"rewards/margins": 0.6351937651634216, |
|
"rewards/rejected": -1.9255812168121338, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 21.607013702392578, |
|
"learning_rate": 1.2401210690746705e-06, |
|
"logits/chosen": 0.07735034078359604, |
|
"logits/rejected": 0.2136649787425995, |
|
"logps/chosen": -531.7027587890625, |
|
"logps/rejected": -509.68701171875, |
|
"loss": 0.6257, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3712577819824219, |
|
"rewards/margins": 0.37780770659446716, |
|
"rewards/rejected": -1.749065637588501, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.7040041873855012, |
|
"grad_norm": 22.4666748046875, |
|
"learning_rate": 1.2204423129559306e-06, |
|
"logits/chosen": 0.19079174101352692, |
|
"logits/rejected": 0.2903195023536682, |
|
"logps/chosen": -524.7943725585938, |
|
"logps/rejected": -556.605224609375, |
|
"loss": 0.5842, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.4239646196365356, |
|
"rewards/margins": 0.49202004075050354, |
|
"rewards/rejected": -1.9159847497940063, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.7066213033237373, |
|
"grad_norm": 26.54542350769043, |
|
"learning_rate": 1.20087039953583e-06, |
|
"logits/chosen": 0.18427929282188416, |
|
"logits/rejected": 0.3345043957233429, |
|
"logps/chosen": -519.188720703125, |
|
"logps/rejected": -516.6758422851562, |
|
"loss": 0.6139, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3359040021896362, |
|
"rewards/margins": 0.4217115342617035, |
|
"rewards/rejected": -1.7576156854629517, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7066213033237373, |
|
"eval_logits/chosen": 0.08023716509342194, |
|
"eval_logits/rejected": 0.194534033536911, |
|
"eval_logps/chosen": -529.7188720703125, |
|
"eval_logps/rejected": -528.66455078125, |
|
"eval_loss": 0.594265878200531, |
|
"eval_rewards/accuracies": 0.6784999966621399, |
|
"eval_rewards/chosen": -1.4135349988937378, |
|
"eval_rewards/margins": 0.4262690842151642, |
|
"eval_rewards/rejected": -1.8398040533065796, |
|
"eval_runtime": 232.327, |
|
"eval_samples_per_second": 8.609, |
|
"eval_steps_per_second": 1.076, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7092384192619733, |
|
"grad_norm": 21.013721466064453, |
|
"learning_rate": 1.181406963063507e-06, |
|
"logits/chosen": 0.23000892996788025, |
|
"logits/rejected": 0.30790433287620544, |
|
"logps/chosen": -526.0445556640625, |
|
"logps/rejected": -554.0514526367188, |
|
"loss": 0.594, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.3565577268600464, |
|
"rewards/margins": 0.46391409635543823, |
|
"rewards/rejected": -1.8204717636108398, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 18.518587112426758, |
|
"learning_rate": 1.1620536287303052e-06, |
|
"logits/chosen": 0.19795748591423035, |
|
"logits/rejected": 0.2533331513404846, |
|
"logps/chosen": -560.046630859375, |
|
"logps/rejected": -535.3902587890625, |
|
"loss": 0.653, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.4724786281585693, |
|
"rewards/margins": 0.31183096766471863, |
|
"rewards/rejected": -1.7843097448349, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.7144726511384454, |
|
"grad_norm": 18.251569747924805, |
|
"learning_rate": 1.1428120125340717e-06, |
|
"logits/chosen": 0.37381523847579956, |
|
"logits/rejected": 0.42608457803726196, |
|
"logps/chosen": -497.0757751464844, |
|
"logps/rejected": -489.75286865234375, |
|
"loss": 0.5237, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.37063729763031, |
|
"rewards/margins": 0.5976725816726685, |
|
"rewards/rejected": -1.9683096408843994, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.7170897670766815, |
|
"grad_norm": 33.466217041015625, |
|
"learning_rate": 1.123683721144223e-06, |
|
"logits/chosen": 0.33738958835601807, |
|
"logits/rejected": 0.4564022123813629, |
|
"logps/chosen": -546.6898193359375, |
|
"logps/rejected": -542.0783081054688, |
|
"loss": 0.5693, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.379553198814392, |
|
"rewards/margins": 0.47975024580955505, |
|
"rewards/rejected": -1.8593032360076904, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.7197068830149176, |
|
"grad_norm": 16.89527702331543, |
|
"learning_rate": 1.1046703517675848e-06, |
|
"logits/chosen": 0.325847327709198, |
|
"logits/rejected": 0.5274810791015625, |
|
"logps/chosen": -478.98846435546875, |
|
"logps/rejected": -527.6720581054688, |
|
"loss": 0.5674, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1666440963745117, |
|
"rewards/margins": 0.46127814054489136, |
|
"rewards/rejected": -1.6279222965240479, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 26.121501922607422, |
|
"learning_rate": 1.085773492015028e-06, |
|
"logits/chosen": 0.14125783741474152, |
|
"logits/rejected": 0.25838667154312134, |
|
"logps/chosen": -488.41827392578125, |
|
"logps/rejected": -485.6160583496094, |
|
"loss": 0.5577, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.267000436782837, |
|
"rewards/margins": 0.5229637622833252, |
|
"rewards/rejected": -1.7899643182754517, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.7249411148913897, |
|
"grad_norm": 26.272478103637695, |
|
"learning_rate": 1.0669947197689034e-06, |
|
"logits/chosen": 0.3034301698207855, |
|
"logits/rejected": 0.33288899064064026, |
|
"logps/chosen": -546.6353149414062, |
|
"logps/rejected": -535.7976684570312, |
|
"loss": 0.5917, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.4825105667114258, |
|
"rewards/margins": 0.4382646679878235, |
|
"rewards/rejected": -1.920775055885315, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.7275582308296258, |
|
"grad_norm": 24.98711585998535, |
|
"learning_rate": 1.048335603051291e-06, |
|
"logits/chosen": 0.2563607692718506, |
|
"logits/rejected": 0.35739272832870483, |
|
"logps/chosen": -576.2966918945312, |
|
"logps/rejected": -583.8340454101562, |
|
"loss": 0.5134, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4835859537124634, |
|
"rewards/margins": 0.6780148148536682, |
|
"rewards/rejected": -2.1616008281707764, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.7301753467678618, |
|
"grad_norm": 28.010181427001953, |
|
"learning_rate": 1.0297976998930665e-06, |
|
"logits/chosen": 0.23113617300987244, |
|
"logits/rejected": 0.324890673160553, |
|
"logps/chosen": -531.7484130859375, |
|
"logps/rejected": -531.4666748046875, |
|
"loss": 0.5544, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.5267302989959717, |
|
"rewards/margins": 0.5725789666175842, |
|
"rewards/rejected": -2.0993094444274902, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 32.755393981933594, |
|
"learning_rate": 1.0113825582038078e-06, |
|
"logits/chosen": 0.2566104531288147, |
|
"logits/rejected": 0.37072187662124634, |
|
"logps/chosen": -558.6234741210938, |
|
"logps/rejected": -554.4527587890625, |
|
"loss": 0.5976, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.625914216041565, |
|
"rewards/margins": 0.4005354344844818, |
|
"rewards/rejected": -2.026449680328369, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"eval_logits/chosen": 0.11360778659582138, |
|
"eval_logits/rejected": 0.23131908476352692, |
|
"eval_logps/chosen": -545.891845703125, |
|
"eval_logps/rejected": -547.037109375, |
|
"eval_loss": 0.5920617580413818, |
|
"eval_rewards/accuracies": 0.6784999966621399, |
|
"eval_rewards/chosen": -1.5752650499343872, |
|
"eval_rewards/margins": 0.44826528429985046, |
|
"eval_rewards/rejected": -2.0235302448272705, |
|
"eval_runtime": 232.6397, |
|
"eval_samples_per_second": 8.597, |
|
"eval_steps_per_second": 1.075, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.735409578644334, |
|
"grad_norm": 14.789016723632812, |
|
"learning_rate": 9.930917156425477e-07, |
|
"logits/chosen": 0.17329376935958862, |
|
"logits/rejected": 0.3116983473300934, |
|
"logps/chosen": -529.4105224609375, |
|
"logps/rejected": -557.4210815429688, |
|
"loss": 0.574, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.6027743816375732, |
|
"rewards/margins": 0.5013204216957092, |
|
"rewards/rejected": -2.104094982147217, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.73802669458257, |
|
"grad_norm": 22.683055877685547, |
|
"learning_rate": 9.749266994893756e-07, |
|
"logits/chosen": 0.33349448442459106, |
|
"logits/rejected": 0.34794288873672485, |
|
"logps/chosen": -520.3793334960938, |
|
"logps/rejected": -538.2870483398438, |
|
"loss": 0.6172, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6407073736190796, |
|
"rewards/margins": 0.37328147888183594, |
|
"rewards/rejected": -2.013988971710205, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.7406438105208061, |
|
"grad_norm": 27.596086502075195, |
|
"learning_rate": 9.56889026517913e-07, |
|
"logits/chosen": 0.3088427484035492, |
|
"logits/rejected": 0.48099619150161743, |
|
"logps/chosen": -532.8345947265625, |
|
"logps/rejected": -520.9815673828125, |
|
"loss": 0.6012, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.565934181213379, |
|
"rewards/margins": 0.41419801115989685, |
|
"rewards/rejected": -1.9801323413848877, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 27.7423095703125, |
|
"learning_rate": 9.389802028686617e-07, |
|
"logits/chosen": 0.35369396209716797, |
|
"logits/rejected": 0.2665908932685852, |
|
"logps/chosen": -524.5148315429688, |
|
"logps/rejected": -510.5908203125, |
|
"loss": 0.616, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.4806346893310547, |
|
"rewards/margins": 0.37274661660194397, |
|
"rewards/rejected": -1.8533813953399658, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.7458780423972782, |
|
"grad_norm": 16.51445770263672, |
|
"learning_rate": 9.212017239232427e-07, |
|
"logits/chosen": 0.1338292360305786, |
|
"logits/rejected": 0.34962359070777893, |
|
"logps/chosen": -557.58154296875, |
|
"logps/rejected": -550.517578125, |
|
"loss": 0.5427, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.4360605478286743, |
|
"rewards/margins": 0.5636521577835083, |
|
"rewards/rejected": -1.9997127056121826, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.7484951583355143, |
|
"grad_norm": 25.57477378845215, |
|
"learning_rate": 9.03555074179533e-07, |
|
"logits/chosen": 0.12778687477111816, |
|
"logits/rejected": 0.31376713514328003, |
|
"logps/chosen": -526.8626098632812, |
|
"logps/rejected": -553.7708740234375, |
|
"loss": 0.5618, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.3927412033081055, |
|
"rewards/margins": 0.521297812461853, |
|
"rewards/rejected": -1.9140390157699585, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.7511122742737504, |
|
"grad_norm": 18.769468307495117, |
|
"learning_rate": 8.860417271277067e-07, |
|
"logits/chosen": 0.10975948721170425, |
|
"logits/rejected": 0.36845940351486206, |
|
"logps/chosen": -543.9274291992188, |
|
"logps/rejected": -546.5499267578125, |
|
"loss": 0.6186, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3972618579864502, |
|
"rewards/margins": 0.3473066985607147, |
|
"rewards/rejected": -1.7445685863494873, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 22.453828811645508, |
|
"learning_rate": 8.686631451272029e-07, |
|
"logits/chosen": 0.16480425000190735, |
|
"logits/rejected": 0.2969481647014618, |
|
"logps/chosen": -519.5490112304688, |
|
"logps/rejected": -512.5626831054688, |
|
"loss": 0.6033, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.5151922702789307, |
|
"rewards/margins": 0.39446836709976196, |
|
"rewards/rejected": -1.9096605777740479, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.7563465061502225, |
|
"grad_norm": 20.046194076538086, |
|
"learning_rate": 8.514207792846168e-07, |
|
"logits/chosen": 0.31565916538238525, |
|
"logits/rejected": 0.39657875895500183, |
|
"logps/chosen": -525.2118530273438, |
|
"logps/rejected": -516.3604736328125, |
|
"loss": 0.5859, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.533144235610962, |
|
"rewards/margins": 0.43004053831100464, |
|
"rewards/rejected": -1.9631845951080322, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.7589636220884585, |
|
"grad_norm": 19.951534271240234, |
|
"learning_rate": 8.343160693325356e-07, |
|
"logits/chosen": 0.24349746108055115, |
|
"logits/rejected": 0.3447602689266205, |
|
"logps/chosen": -516.9713134765625, |
|
"logps/rejected": -535.793212890625, |
|
"loss": 0.586, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4326432943344116, |
|
"rewards/margins": 0.43857187032699585, |
|
"rewards/rejected": -1.8712152242660522, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7589636220884585, |
|
"eval_logits/chosen": 0.12212979793548584, |
|
"eval_logits/rejected": 0.24171759188175201, |
|
"eval_logps/chosen": -535.29541015625, |
|
"eval_logps/rejected": -534.5630493164062, |
|
"eval_loss": 0.5905064940452576, |
|
"eval_rewards/accuracies": 0.6779999732971191, |
|
"eval_rewards/chosen": -1.4693007469177246, |
|
"eval_rewards/margins": 0.4294882118701935, |
|
"eval_rewards/rejected": -1.8987890481948853, |
|
"eval_runtime": 232.2009, |
|
"eval_samples_per_second": 8.613, |
|
"eval_steps_per_second": 1.077, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7615807380266946, |
|
"grad_norm": 15.65670394897461, |
|
"learning_rate": 8.173504435093174e-07, |
|
"logits/chosen": 0.3191450834274292, |
|
"logits/rejected": 0.452759325504303, |
|
"logps/chosen": -491.33831787109375, |
|
"logps/rejected": -494.2073669433594, |
|
"loss": 0.5618, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.4124243259429932, |
|
"rewards/margins": 0.518468976020813, |
|
"rewards/rejected": -1.9308933019638062, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 17.081989288330078, |
|
"learning_rate": 8.00525318439836e-07, |
|
"logits/chosen": 0.25279700756073, |
|
"logits/rejected": 0.3255782127380371, |
|
"logps/chosen": -534.6649169921875, |
|
"logps/rejected": -551.4168701171875, |
|
"loss": 0.6125, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3950837850570679, |
|
"rewards/margins": 0.39895501732826233, |
|
"rewards/rejected": -1.7940387725830078, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7668149699031667, |
|
"grad_norm": 21.055017471313477, |
|
"learning_rate": 7.838420990171927e-07, |
|
"logits/chosen": 0.20293910801410675, |
|
"logits/rejected": 0.274809867143631, |
|
"logps/chosen": -526.1995849609375, |
|
"logps/rejected": -538.4735717773438, |
|
"loss": 0.5446, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.3020565509796143, |
|
"rewards/margins": 0.5154568552970886, |
|
"rewards/rejected": -1.8175132274627686, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7694320858414028, |
|
"grad_norm": 17.768627166748047, |
|
"learning_rate": 7.673021782854084e-07, |
|
"logits/chosen": 0.3839934468269348, |
|
"logits/rejected": 0.3556897044181824, |
|
"logps/chosen": -529.2373046875, |
|
"logps/rejected": -502.952880859375, |
|
"loss": 0.5787, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.4653526544570923, |
|
"rewards/margins": 0.48818325996398926, |
|
"rewards/rejected": -1.953536033630371, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7720492017796389, |
|
"grad_norm": 19.30914878845215, |
|
"learning_rate": 7.509069373231039e-07, |
|
"logits/chosen": 0.23126430809497833, |
|
"logits/rejected": 0.22901423275470734, |
|
"logps/chosen": -515.2665405273438, |
|
"logps/rejected": -515.766357421875, |
|
"loss": 0.5974, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.515284538269043, |
|
"rewards/margins": 0.42790335416793823, |
|
"rewards/rejected": -1.9431880712509155, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 20.416574478149414, |
|
"learning_rate": 7.346577451281822e-07, |
|
"logits/chosen": 0.31689321994781494, |
|
"logits/rejected": 0.37642043828964233, |
|
"logps/chosen": -539.6643676757812, |
|
"logps/rejected": -538.9885864257812, |
|
"loss": 0.5562, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.518949270248413, |
|
"rewards/margins": 0.5324884653091431, |
|
"rewards/rejected": -2.0514376163482666, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.777283433656111, |
|
"grad_norm": 28.220792770385742, |
|
"learning_rate": 7.185559585035138e-07, |
|
"logits/chosen": 0.07478724420070648, |
|
"logits/rejected": 0.2702252268791199, |
|
"logps/chosen": -551.8621215820312, |
|
"logps/rejected": -564.0684814453125, |
|
"loss": 0.5573, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.43454909324646, |
|
"rewards/margins": 0.5544053912162781, |
|
"rewards/rejected": -1.988954782485962, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7799005495943471, |
|
"grad_norm": 18.90873146057129, |
|
"learning_rate": 7.026029219436504e-07, |
|
"logits/chosen": 0.16967260837554932, |
|
"logits/rejected": 0.33861225843429565, |
|
"logps/chosen": -534.6746215820312, |
|
"logps/rejected": -526.2569580078125, |
|
"loss": 0.6105, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.4762811660766602, |
|
"rewards/margins": 0.38691508769989014, |
|
"rewards/rejected": -1.8631963729858398, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7825176655325831, |
|
"grad_norm": 15.61099910736084, |
|
"learning_rate": 6.867999675225523e-07, |
|
"logits/chosen": 0.2207004576921463, |
|
"logits/rejected": 0.2613358795642853, |
|
"logps/chosen": -489.48529052734375, |
|
"logps/rejected": -498.1441955566406, |
|
"loss": 0.5601, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.4658935070037842, |
|
"rewards/margins": 0.5164823532104492, |
|
"rewards/rejected": -1.9823758602142334, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 29.250232696533203, |
|
"learning_rate": 6.711484147823663e-07, |
|
"logits/chosen": 0.1829605996608734, |
|
"logits/rejected": 0.3020482361316681, |
|
"logps/chosen": -495.75927734375, |
|
"logps/rejected": -530.975830078125, |
|
"loss": 0.5671, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4143750667572021, |
|
"rewards/margins": 0.502804696559906, |
|
"rewards/rejected": -1.9171797037124634, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"eval_logits/chosen": 0.1228410005569458, |
|
"eval_logits/rejected": 0.24235635995864868, |
|
"eval_logps/chosen": -534.0778198242188, |
|
"eval_logps/rejected": -533.9715576171875, |
|
"eval_loss": 0.5899218916893005, |
|
"eval_rewards/accuracies": 0.6794999837875366, |
|
"eval_rewards/chosen": -1.4571242332458496, |
|
"eval_rewards/margins": 0.4357497990131378, |
|
"eval_rewards/rejected": -1.8928741216659546, |
|
"eval_runtime": 232.4162, |
|
"eval_samples_per_second": 8.605, |
|
"eval_steps_per_second": 1.076, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7877518974090553, |
|
"grad_norm": 21.612812042236328, |
|
"learning_rate": 6.556495706232413e-07, |
|
"logits/chosen": 0.28465574979782104, |
|
"logits/rejected": 0.3168385922908783, |
|
"logps/chosen": -515.8154296875, |
|
"logps/rejected": -536.3228759765625, |
|
"loss": 0.5725, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.4145126342773438, |
|
"rewards/margins": 0.49681711196899414, |
|
"rewards/rejected": -1.9113296270370483, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7903690133472913, |
|
"grad_norm": 19.919925689697266, |
|
"learning_rate": 6.403047291942057e-07, |
|
"logits/chosen": 0.25551754236221313, |
|
"logits/rejected": 0.3729092478752136, |
|
"logps/chosen": -478.81451416015625, |
|
"logps/rejected": -482.24725341796875, |
|
"loss": 0.5659, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4676393270492554, |
|
"rewards/margins": 0.47457486391067505, |
|
"rewards/rejected": -1.9422142505645752, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7929861292855274, |
|
"grad_norm": 25.699249267578125, |
|
"learning_rate": 6.251151717851023e-07, |
|
"logits/chosen": 0.3613748848438263, |
|
"logits/rejected": 0.464524507522583, |
|
"logps/chosen": -483.13775634765625, |
|
"logps/rejected": -510.97576904296875, |
|
"loss": 0.6078, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.4159023761749268, |
|
"rewards/margins": 0.4669385850429535, |
|
"rewards/rejected": -1.882840871810913, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 18.529563903808594, |
|
"learning_rate": 6.100821667196041e-07, |
|
"logits/chosen": 0.1572251319885254, |
|
"logits/rejected": 0.26168403029441833, |
|
"logps/chosen": -538.1602783203125, |
|
"logps/rejected": -491.727783203125, |
|
"loss": 0.5572, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.387502908706665, |
|
"rewards/margins": 0.5043372511863708, |
|
"rewards/rejected": -1.8918402194976807, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7982203611619995, |
|
"grad_norm": 18.66206169128418, |
|
"learning_rate": 5.952069692493062e-07, |
|
"logits/chosen": 0.15295560657978058, |
|
"logits/rejected": 0.2732795178890228, |
|
"logps/chosen": -466.65350341796875, |
|
"logps/rejected": -510.0077209472656, |
|
"loss": 0.5483, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.4113075733184814, |
|
"rewards/margins": 0.5156591534614563, |
|
"rewards/rejected": -1.926966667175293, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.8008374771002356, |
|
"grad_norm": 32.57720947265625, |
|
"learning_rate": 5.80490821448918e-07, |
|
"logits/chosen": 0.18402081727981567, |
|
"logits/rejected": 0.25058144330978394, |
|
"logps/chosen": -528.1961059570312, |
|
"logps/rejected": -616.8970947265625, |
|
"loss": 0.5587, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4876148700714111, |
|
"rewards/margins": 0.5890025496482849, |
|
"rewards/rejected": -2.0766172409057617, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.8034545930384716, |
|
"grad_norm": 22.268369674682617, |
|
"learning_rate": 5.659349521125459e-07, |
|
"logits/chosen": 0.08234192430973053, |
|
"logits/rejected": 0.09182853996753693, |
|
"logps/chosen": -555.3732299804688, |
|
"logps/rejected": -554.161865234375, |
|
"loss": 0.596, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.4457889795303345, |
|
"rewards/margins": 0.41241535544395447, |
|
"rewards/rejected": -1.8582042455673218, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 18.810224533081055, |
|
"learning_rate": 5.5154057665109e-07, |
|
"logits/chosen": 0.11081822216510773, |
|
"logits/rejected": 0.25048089027404785, |
|
"logps/chosen": -493.99493408203125, |
|
"logps/rejected": -511.19464111328125, |
|
"loss": 0.5361, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3138765096664429, |
|
"rewards/margins": 0.5303700566291809, |
|
"rewards/rejected": -1.844246506690979, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.8086888249149438, |
|
"grad_norm": 20.452537536621094, |
|
"learning_rate": 5.373088969907586e-07, |
|
"logits/chosen": 0.1712993085384369, |
|
"logits/rejected": 0.28175634145736694, |
|
"logps/chosen": -530.8388671875, |
|
"logps/rejected": -525.1220703125, |
|
"loss": 0.5466, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.279170274734497, |
|
"rewards/margins": 0.5140555500984192, |
|
"rewards/rejected": -1.793225884437561, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.8113059408531798, |
|
"grad_norm": 18.429285049438477, |
|
"learning_rate": 5.23241101472709e-07, |
|
"logits/chosen": 0.1318766474723816, |
|
"logits/rejected": 0.12211046367883682, |
|
"logps/chosen": -517.6220092773438, |
|
"logps/rejected": -529.6044921875, |
|
"loss": 0.56, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.189562201499939, |
|
"rewards/margins": 0.5340698957443237, |
|
"rewards/rejected": -1.7236320972442627, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8113059408531798, |
|
"eval_logits/chosen": 0.08669886738061905, |
|
"eval_logits/rejected": 0.2020561546087265, |
|
"eval_logps/chosen": -520.4529418945312, |
|
"eval_logps/rejected": -520.4334106445312, |
|
"eval_loss": 0.5915951132774353, |
|
"eval_rewards/accuracies": 0.6809999942779541, |
|
"eval_rewards/chosen": -1.3208762407302856, |
|
"eval_rewards/margins": 0.43661609292030334, |
|
"eval_rewards/rejected": -1.757492184638977, |
|
"eval_runtime": 232.359, |
|
"eval_samples_per_second": 8.607, |
|
"eval_steps_per_second": 1.076, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8139230567914159, |
|
"grad_norm": 21.272666931152344, |
|
"learning_rate": 5.09338364753818e-07, |
|
"logits/chosen": 0.22215643525123596, |
|
"logits/rejected": 0.2583310008049011, |
|
"logps/chosen": -535.0568237304688, |
|
"logps/rejected": -544.3440551757812, |
|
"loss": 0.5716, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.234817624092102, |
|
"rewards/margins": 0.4773196280002594, |
|
"rewards/rejected": -1.71213698387146, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 26.98297882080078, |
|
"learning_rate": 4.956018477086005e-07, |
|
"logits/chosen": 0.21868661046028137, |
|
"logits/rejected": 0.32554829120635986, |
|
"logps/chosen": -545.9993896484375, |
|
"logps/rejected": -515.1287231445312, |
|
"loss": 0.6281, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.3831281661987305, |
|
"rewards/margins": 0.3917158544063568, |
|
"rewards/rejected": -1.7748441696166992, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.819157288667888, |
|
"grad_norm": 19.549560546875, |
|
"learning_rate": 4.820326973322764e-07, |
|
"logits/chosen": 0.072993703186512, |
|
"logits/rejected": 0.19295060634613037, |
|
"logps/chosen": -512.5438842773438, |
|
"logps/rejected": -527.2860107421875, |
|
"loss": 0.5642, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.34549880027771, |
|
"rewards/margins": 0.4567781090736389, |
|
"rewards/rejected": -1.8022769689559937, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.821774404606124, |
|
"grad_norm": 22.311878204345703, |
|
"learning_rate": 4.686320466449981e-07, |
|
"logits/chosen": 0.20619972050189972, |
|
"logits/rejected": 0.35505905747413635, |
|
"logps/chosen": -488.4278869628906, |
|
"logps/rejected": -507.36785888671875, |
|
"loss": 0.5786, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3550437688827515, |
|
"rewards/margins": 0.4867793619632721, |
|
"rewards/rejected": -1.841822862625122, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.8243915205443602, |
|
"grad_norm": 16.643848419189453, |
|
"learning_rate": 4.554010145972418e-07, |
|
"logits/chosen": 0.21577072143554688, |
|
"logits/rejected": 0.33193427324295044, |
|
"logps/chosen": -521.0413208007812, |
|
"logps/rejected": -540.6106567382812, |
|
"loss": 0.5785, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3828619718551636, |
|
"rewards/margins": 0.4862847924232483, |
|
"rewards/rejected": -1.869146704673767, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 17.693086624145508, |
|
"learning_rate": 4.4234070597637455e-07, |
|
"logits/chosen": 0.11184321343898773, |
|
"logits/rejected": 0.19088369607925415, |
|
"logps/chosen": -536.9678955078125, |
|
"logps/rejected": -554.2827758789062, |
|
"loss": 0.5834, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.369461178779602, |
|
"rewards/margins": 0.48122233152389526, |
|
"rewards/rejected": -1.850683569908142, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.8296257524208323, |
|
"grad_norm": 16.465007781982422, |
|
"learning_rate": 4.2945221131440783e-07, |
|
"logits/chosen": 0.24646055698394775, |
|
"logits/rejected": 0.306671679019928, |
|
"logps/chosen": -524.2401123046875, |
|
"logps/rejected": -516.0563354492188, |
|
"loss": 0.5793, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3744683265686035, |
|
"rewards/margins": 0.4835619032382965, |
|
"rewards/rejected": -1.8580303192138672, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.8322428683590684, |
|
"grad_norm": 22.869001388549805, |
|
"learning_rate": 4.167366067969381e-07, |
|
"logits/chosen": 0.07770199328660965, |
|
"logits/rejected": 0.25347238779067993, |
|
"logps/chosen": -455.5186462402344, |
|
"logps/rejected": -507.84356689453125, |
|
"loss": 0.5827, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.2709424495697021, |
|
"rewards/margins": 0.4785892367362976, |
|
"rewards/rejected": -1.7495317459106445, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.8348599842973043, |
|
"grad_norm": 20.31063461303711, |
|
"learning_rate": 4.041949541732826e-07, |
|
"logits/chosen": 0.24958959221839905, |
|
"logits/rejected": 0.255808025598526, |
|
"logps/chosen": -538.5, |
|
"logps/rejected": -554.1825561523438, |
|
"loss": 0.5788, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4217197895050049, |
|
"rewards/margins": 0.490082323551178, |
|
"rewards/rejected": -1.9118019342422485, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 17.181446075439453, |
|
"learning_rate": 3.9182830066782614e-07, |
|
"logits/chosen": 0.1832321137189865, |
|
"logits/rejected": 0.20756061375141144, |
|
"logps/chosen": -523.9074096679688, |
|
"logps/rejected": -562.7293701171875, |
|
"loss": 0.5796, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.473734736442566, |
|
"rewards/margins": 0.465187132358551, |
|
"rewards/rejected": -1.9389216899871826, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"eval_logits/chosen": 0.07104705274105072, |
|
"eval_logits/rejected": 0.18436755239963531, |
|
"eval_logps/chosen": -532.9956665039062, |
|
"eval_logps/rejected": -534.0466918945312, |
|
"eval_loss": 0.590150773525238, |
|
"eval_rewards/accuracies": 0.684499979019165, |
|
"eval_rewards/chosen": -1.4463036060333252, |
|
"eval_rewards/margins": 0.44732123613357544, |
|
"eval_rewards/rejected": -1.8936247825622559, |
|
"eval_runtime": 232.4834, |
|
"eval_samples_per_second": 8.603, |
|
"eval_steps_per_second": 1.075, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8400942161737766, |
|
"grad_norm": 15.772056579589844, |
|
"learning_rate": 3.796376788925771e-07, |
|
"logits/chosen": 0.17947904765605927, |
|
"logits/rejected": 0.38756972551345825, |
|
"logps/chosen": -536.089599609375, |
|
"logps/rejected": -513.5861206054688, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.4111121892929077, |
|
"rewards/margins": 0.3696535527706146, |
|
"rewards/rejected": -1.7807658910751343, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.8427113321120125, |
|
"grad_norm": 22.664976119995117, |
|
"learning_rate": 3.676241067609465e-07, |
|
"logits/chosen": 0.21588890254497528, |
|
"logits/rejected": 0.31630846858024597, |
|
"logps/chosen": -577.00146484375, |
|
"logps/rejected": -551.9093017578125, |
|
"loss": 0.5948, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.4189621210098267, |
|
"rewards/margins": 0.4751489758491516, |
|
"rewards/rejected": -1.8941110372543335, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.8453284480502486, |
|
"grad_norm": 30.950883865356445, |
|
"learning_rate": 3.5578858740274976e-07, |
|
"logits/chosen": 0.08771739155054092, |
|
"logits/rejected": 0.16890794038772583, |
|
"logps/chosen": -532.2547607421875, |
|
"logps/rejected": -527.4913330078125, |
|
"loss": 0.6527, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.4851154088974, |
|
"rewards/margins": 0.3038921654224396, |
|
"rewards/rejected": -1.789007544517517, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 22.29638671875, |
|
"learning_rate": 3.44132109080447e-07, |
|
"logits/chosen": 0.05612761899828911, |
|
"logits/rejected": 0.13626542687416077, |
|
"logps/chosen": -520.4820556640625, |
|
"logps/rejected": -517.2299194335938, |
|
"loss": 0.554, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.3912557363510132, |
|
"rewards/margins": 0.4790167808532715, |
|
"rewards/rejected": -1.8702728748321533, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.8505626799267207, |
|
"grad_norm": 18.589750289916992, |
|
"learning_rate": 3.3265564510662344e-07, |
|
"logits/chosen": 0.04296109080314636, |
|
"logits/rejected": 0.2635151743888855, |
|
"logps/chosen": -563.4909057617188, |
|
"logps/rejected": -560.6986694335938, |
|
"loss": 0.538, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.3561357259750366, |
|
"rewards/margins": 0.6071338653564453, |
|
"rewards/rejected": -1.963269591331482, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.8531797958649568, |
|
"grad_norm": 23.977201461791992, |
|
"learning_rate": 3.213601537627195e-07, |
|
"logits/chosen": 0.2044192999601364, |
|
"logits/rejected": 0.20763865113258362, |
|
"logps/chosen": -533.2398681640625, |
|
"logps/rejected": -539.3159790039062, |
|
"loss": 0.6017, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.5438867807388306, |
|
"rewards/margins": 0.37856417894363403, |
|
"rewards/rejected": -1.9224510192871094, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.8557969118031928, |
|
"grad_norm": 19.610824584960938, |
|
"learning_rate": 3.1024657821901063e-07, |
|
"logits/chosen": 0.09625023603439331, |
|
"logits/rejected": 0.09076298773288727, |
|
"logps/chosen": -519.69287109375, |
|
"logps/rejected": -526.6182861328125, |
|
"loss": 0.5844, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.3638288974761963, |
|
"rewards/margins": 0.4889647364616394, |
|
"rewards/rejected": -1.8527934551239014, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 17.58568000793457, |
|
"learning_rate": 2.9931584645585654e-07, |
|
"logits/chosen": 0.24739189445972443, |
|
"logits/rejected": 0.3013184666633606, |
|
"logps/chosen": -541.0255126953125, |
|
"logps/rejected": -576.6962890625, |
|
"loss": 0.5773, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.3129136562347412, |
|
"rewards/margins": 0.46883121132850647, |
|
"rewards/rejected": -1.781745195388794, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.861031143679665, |
|
"grad_norm": 12.26266860961914, |
|
"learning_rate": 2.885688711862136e-07, |
|
"logits/chosen": 0.14454945921897888, |
|
"logits/rejected": 0.2927783131599426, |
|
"logps/chosen": -537.5690307617188, |
|
"logps/rejected": -562.3256225585938, |
|
"loss": 0.5883, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.4469431638717651, |
|
"rewards/margins": 0.4720059335231781, |
|
"rewards/rejected": -1.9189491271972656, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.863648259617901, |
|
"grad_norm": 20.302330017089844, |
|
"learning_rate": 2.7800654977942486e-07, |
|
"logits/chosen": 0.06817831099033356, |
|
"logits/rejected": 0.22290131449699402, |
|
"logps/chosen": -533.7552490234375, |
|
"logps/rejected": -575.96875, |
|
"loss": 0.55, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.4392116069793701, |
|
"rewards/margins": 0.5773890018463135, |
|
"rewards/rejected": -2.0166003704071045, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.863648259617901, |
|
"eval_logits/chosen": 0.06103089079260826, |
|
"eval_logits/rejected": 0.17368435859680176, |
|
"eval_logps/chosen": -540.1639404296875, |
|
"eval_logps/rejected": -541.0390625, |
|
"eval_loss": 0.5897097587585449, |
|
"eval_rewards/accuracies": 0.6834999918937683, |
|
"eval_rewards/chosen": -1.5179859399795532, |
|
"eval_rewards/margins": 0.44556280970573425, |
|
"eval_rewards/rejected": -1.9635487794876099, |
|
"eval_runtime": 232.0365, |
|
"eval_samples_per_second": 8.619, |
|
"eval_steps_per_second": 1.077, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.8662653755561371, |
|
"grad_norm": 45.810630798339844, |
|
"learning_rate": 2.6762976418628797e-07, |
|
"logits/chosen": 0.32271671295166016, |
|
"logits/rejected": 0.39709824323654175, |
|
"logps/chosen": -474.7618103027344, |
|
"logps/rejected": -456.5228576660156, |
|
"loss": 0.5846, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.407684087753296, |
|
"rewards/margins": 0.46905022859573364, |
|
"rewards/rejected": -1.8767343759536743, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 20.996036529541016, |
|
"learning_rate": 2.5743938086541354e-07, |
|
"logits/chosen": 0.15398995578289032, |
|
"logits/rejected": 0.3004111349582672, |
|
"logps/chosen": -523.9410400390625, |
|
"logps/rejected": -531.103759765625, |
|
"loss": 0.5832, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.478339672088623, |
|
"rewards/margins": 0.49801668524742126, |
|
"rewards/rejected": -1.9763562679290771, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.8714996074326092, |
|
"grad_norm": 21.416170120239258, |
|
"learning_rate": 2.4743625071087574e-07, |
|
"logits/chosen": 0.09900239109992981, |
|
"logits/rejected": 0.1008148342370987, |
|
"logps/chosen": -542.8681640625, |
|
"logps/rejected": -543.7512817382812, |
|
"loss": 0.562, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3520573377609253, |
|
"rewards/margins": 0.5512968301773071, |
|
"rewards/rejected": -1.9033544063568115, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8741167233708453, |
|
"grad_norm": 20.47162437438965, |
|
"learning_rate": 2.3762120898116498e-07, |
|
"logits/chosen": 0.07932907342910767, |
|
"logits/rejected": 0.2061280757188797, |
|
"logps/chosen": -550.5972290039062, |
|
"logps/rejected": -576.6636962890625, |
|
"loss": 0.5604, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.505156397819519, |
|
"rewards/margins": 0.4955100119113922, |
|
"rewards/rejected": -2.000666379928589, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.8767338393090814, |
|
"grad_norm": 18.11985969543457, |
|
"learning_rate": 2.2799507522944048e-07, |
|
"logits/chosen": 0.16951636970043182, |
|
"logits/rejected": 0.21532103419303894, |
|
"logps/chosen": -531.1329345703125, |
|
"logps/rejected": -555.5535278320312, |
|
"loss": 0.5539, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.3905164003372192, |
|
"rewards/margins": 0.5168679356575012, |
|
"rewards/rejected": -1.9073841571807861, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 23.173076629638672, |
|
"learning_rate": 2.1855865323510056e-07, |
|
"logits/chosen": 0.10670924186706543, |
|
"logits/rejected": 0.25408512353897095, |
|
"logps/chosen": -534.140625, |
|
"logps/rejected": -571.8042602539062, |
|
"loss": 0.5092, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.2978779077529907, |
|
"rewards/margins": 0.7049534320831299, |
|
"rewards/rejected": -2.00283145904541, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8819680711855535, |
|
"grad_norm": 26.09234619140625, |
|
"learning_rate": 2.0931273093666575e-07, |
|
"logits/chosen": 0.1856442391872406, |
|
"logits/rejected": 0.2775765657424927, |
|
"logps/chosen": -504.78472900390625, |
|
"logps/rejected": -510.97772216796875, |
|
"loss": 0.5706, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4099186658859253, |
|
"rewards/margins": 0.48924511671066284, |
|
"rewards/rejected": -1.899163842201233, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8845851871237895, |
|
"grad_norm": 18.44064712524414, |
|
"learning_rate": 2.002580803659873e-07, |
|
"logits/chosen": 0.18279746174812317, |
|
"logits/rejected": 0.25130945444107056, |
|
"logps/chosen": -501.90191650390625, |
|
"logps/rejected": -510.632080078125, |
|
"loss": 0.5763, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.326120138168335, |
|
"rewards/margins": 0.5073062777519226, |
|
"rewards/rejected": -1.8334262371063232, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8872023030620256, |
|
"grad_norm": 20.44449234008789, |
|
"learning_rate": 1.913954575837826e-07, |
|
"logits/chosen": 0.26278918981552124, |
|
"logits/rejected": 0.3742792010307312, |
|
"logps/chosen": -547.27490234375, |
|
"logps/rejected": -502.9208984375, |
|
"loss": 0.569, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.3170632123947144, |
|
"rewards/margins": 0.5174871683120728, |
|
"rewards/rejected": -1.8345504999160767, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 23.813940048217773, |
|
"learning_rate": 1.827256026165028e-07, |
|
"logits/chosen": 0.15093761682510376, |
|
"logits/rejected": 0.15371516346931458, |
|
"logps/chosen": -576.1170654296875, |
|
"logps/rejected": -542.66357421875, |
|
"loss": 0.5519, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2780691385269165, |
|
"rewards/margins": 0.5459933280944824, |
|
"rewards/rejected": -1.8240623474121094, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"eval_logits/chosen": 0.049354203045368195, |
|
"eval_logits/rejected": 0.16207368671894073, |
|
"eval_logps/chosen": -524.5573120117188, |
|
"eval_logps/rejected": -524.9038696289062, |
|
"eval_loss": 0.5905484557151794, |
|
"eval_rewards/accuracies": 0.684499979019165, |
|
"eval_rewards/chosen": -1.3619197607040405, |
|
"eval_rewards/margins": 0.4402773082256317, |
|
"eval_rewards/rejected": -1.8021970987319946, |
|
"eval_runtime": 232.2898, |
|
"eval_samples_per_second": 8.61, |
|
"eval_steps_per_second": 1.076, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8924365349384977, |
|
"grad_norm": 22.652015686035156, |
|
"learning_rate": 1.7424923939454274e-07, |
|
"logits/chosen": 0.09193596988916397, |
|
"logits/rejected": 0.12269000709056854, |
|
"logps/chosen": -560.306884765625, |
|
"logps/rejected": -547.3897705078125, |
|
"loss": 0.5522, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.3574447631835938, |
|
"rewards/margins": 0.512332558631897, |
|
"rewards/rejected": -1.8697776794433594, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8950536508767338, |
|
"grad_norm": 20.60724449157715, |
|
"learning_rate": 1.6596707569179304e-07, |
|
"logits/chosen": 0.11068376153707504, |
|
"logits/rejected": 0.2603934407234192, |
|
"logps/chosen": -541.2059326171875, |
|
"logps/rejected": -526.3267211914062, |
|
"loss": 0.5604, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.222874402999878, |
|
"rewards/margins": 0.528007447719574, |
|
"rewards/rejected": -1.7508817911148071, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.8976707668149699, |
|
"grad_norm": 19.67418098449707, |
|
"learning_rate": 1.578798030665385e-07, |
|
"logits/chosen": 0.07019755989313126, |
|
"logits/rejected": 0.25848856568336487, |
|
"logps/chosen": -518.75146484375, |
|
"logps/rejected": -542.2596435546875, |
|
"loss": 0.528, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2500635385513306, |
|
"rewards/margins": 0.5684723854064941, |
|
"rewards/rejected": -1.8185360431671143, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 21.947776794433594, |
|
"learning_rate": 1.499880968037165e-07, |
|
"logits/chosen": 0.2768905758857727, |
|
"logits/rejected": 0.3939592242240906, |
|
"logps/chosen": -490.6131896972656, |
|
"logps/rejected": -477.58380126953125, |
|
"loss": 0.5807, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2620429992675781, |
|
"rewards/margins": 0.421647846698761, |
|
"rewards/rejected": -1.6836907863616943, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.902904998691442, |
|
"grad_norm": 23.7869930267334, |
|
"learning_rate": 1.4229261585852805e-07, |
|
"logits/chosen": 0.28201746940612793, |
|
"logits/rejected": 0.20732636749744415, |
|
"logps/chosen": -523.0221557617188, |
|
"logps/rejected": -520.0089721679688, |
|
"loss": 0.5617, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2637730836868286, |
|
"rewards/margins": 0.4759213328361511, |
|
"rewards/rejected": -1.739694595336914, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.9055221146296781, |
|
"grad_norm": 24.20222282409668, |
|
"learning_rate": 1.3479400280141886e-07, |
|
"logits/chosen": 0.11287301778793335, |
|
"logits/rejected": 0.17427489161491394, |
|
"logps/chosen": -487.00262451171875, |
|
"logps/rejected": -517.9505615234375, |
|
"loss": 0.5644, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.300456166267395, |
|
"rewards/margins": 0.5554144382476807, |
|
"rewards/rejected": -1.8558704853057861, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.9081392305679141, |
|
"grad_norm": 23.366870880126953, |
|
"learning_rate": 1.2749288376442044e-07, |
|
"logits/chosen": 0.08213616907596588, |
|
"logits/rejected": 0.1927454173564911, |
|
"logps/chosen": -560.6731567382812, |
|
"logps/rejected": -532.0469970703125, |
|
"loss": 0.5432, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.3204230070114136, |
|
"rewards/margins": 0.5710306763648987, |
|
"rewards/rejected": -1.891453742980957, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 21.51507568359375, |
|
"learning_rate": 1.203898683888713e-07, |
|
"logits/chosen": 0.22756421566009521, |
|
"logits/rejected": 0.25787419080734253, |
|
"logps/chosen": -500.9013671875, |
|
"logps/rejected": -513.2033081054688, |
|
"loss": 0.6123, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.4208608865737915, |
|
"rewards/margins": 0.40193843841552734, |
|
"rewards/rejected": -1.8227994441986084, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.9133734624443863, |
|
"grad_norm": 22.2541446685791, |
|
"learning_rate": 1.1348554977451132e-07, |
|
"logits/chosen": 0.16574542224407196, |
|
"logits/rejected": 0.22871682047843933, |
|
"logps/chosen": -550.3922119140625, |
|
"logps/rejected": -538.7455444335938, |
|
"loss": 0.5858, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.4138902425765991, |
|
"rewards/margins": 0.44176238775253296, |
|
"rewards/rejected": -1.8556525707244873, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.9159905783826223, |
|
"grad_norm": 17.759902954101562, |
|
"learning_rate": 1.0678050442995802e-07, |
|
"logits/chosen": 0.006184411235153675, |
|
"logits/rejected": 0.05719981715083122, |
|
"logps/chosen": -541.510009765625, |
|
"logps/rejected": -514.5938110351562, |
|
"loss": 0.5939, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.4001209735870361, |
|
"rewards/margins": 0.437242329120636, |
|
"rewards/rejected": -1.8373632431030273, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9159905783826223, |
|
"eval_logits/chosen": 0.056149620562791824, |
|
"eval_logits/rejected": 0.16871705651283264, |
|
"eval_logps/chosen": -528.9203491210938, |
|
"eval_logps/rejected": -529.6995239257812, |
|
"eval_loss": 0.589903712272644, |
|
"eval_rewards/accuracies": 0.6800000071525574, |
|
"eval_rewards/chosen": -1.405550241470337, |
|
"eval_rewards/margins": 0.44460350275039673, |
|
"eval_rewards/rejected": -1.8501536846160889, |
|
"eval_runtime": 232.5763, |
|
"eval_samples_per_second": 8.599, |
|
"eval_steps_per_second": 1.075, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9186076943208584, |
|
"grad_norm": 16.307315826416016, |
|
"learning_rate": 1.0027529222456755e-07, |
|
"logits/chosen": 0.26652759313583374, |
|
"logits/rejected": 0.27262991666793823, |
|
"logps/chosen": -505.27471923828125, |
|
"logps/rejected": -525.823974609375, |
|
"loss": 0.5144, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.2464582920074463, |
|
"rewards/margins": 0.6129791140556335, |
|
"rewards/rejected": -1.859437346458435, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 19.756351470947266, |
|
"learning_rate": 9.397045634168766e-08, |
|
"logits/chosen": 0.0008102863794192672, |
|
"logits/rejected": 0.19065120816230774, |
|
"logps/chosen": -516.8560791015625, |
|
"logps/rejected": -553.0278930664062, |
|
"loss": 0.5415, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2412116527557373, |
|
"rewards/margins": 0.5996343493461609, |
|
"rewards/rejected": -1.8408458232879639, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.9238419261973305, |
|
"grad_norm": 20.353652954101562, |
|
"learning_rate": 8.78665232332998e-08, |
|
"logits/chosen": 0.12386944144964218, |
|
"logits/rejected": 0.27118274569511414, |
|
"logps/chosen": -481.002685546875, |
|
"logps/rejected": -510.55230712890625, |
|
"loss": 0.5669, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.342132806777954, |
|
"rewards/margins": 0.4541274905204773, |
|
"rewards/rejected": -1.7962604761123657, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.9264590421355666, |
|
"grad_norm": 21.46174430847168, |
|
"learning_rate": 8.196400257606208e-08, |
|
"logits/chosen": 0.1211334615945816, |
|
"logits/rejected": 0.4012266993522644, |
|
"logps/chosen": -564.7858276367188, |
|
"logps/rejected": -576.843994140625, |
|
"loss": 0.5587, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.3764750957489014, |
|
"rewards/margins": 0.5135365724563599, |
|
"rewards/rejected": -1.8900115489959717, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.9290761580738026, |
|
"grad_norm": 21.205127716064453, |
|
"learning_rate": 7.626338722875076e-08, |
|
"logits/chosen": 0.26407915353775024, |
|
"logits/rejected": 0.28373825550079346, |
|
"logps/chosen": -528.8353271484375, |
|
"logps/rejected": -558.7482299804688, |
|
"loss": 0.6023, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.4041508436203003, |
|
"rewards/margins": 0.4070183336734772, |
|
"rewards/rejected": -1.8111692667007446, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 19.687849044799805, |
|
"learning_rate": 7.076515319110688e-08, |
|
"logits/chosen": 0.2693914771080017, |
|
"logits/rejected": 0.3067939281463623, |
|
"logps/chosen": -492.1351623535156, |
|
"logps/rejected": -476.29388427734375, |
|
"loss": 0.5713, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.274938941001892, |
|
"rewards/margins": 0.5187603235244751, |
|
"rewards/rejected": -1.7936992645263672, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.9343103899502748, |
|
"grad_norm": 21.987916946411133, |
|
"learning_rate": 6.54697595640899e-08, |
|
"logits/chosen": 0.05399775505065918, |
|
"logits/rejected": 0.1626650094985962, |
|
"logps/chosen": -542.186767578125, |
|
"logps/rejected": -550.5693359375, |
|
"loss": 0.5266, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.230676293373108, |
|
"rewards/margins": 0.5804111361503601, |
|
"rewards/rejected": -1.8110872507095337, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.9369275058885108, |
|
"grad_norm": 13.982370376586914, |
|
"learning_rate": 6.037764851154426e-08, |
|
"logits/chosen": 0.10340269654989243, |
|
"logits/rejected": 0.30877891182899475, |
|
"logps/chosen": -515.5814819335938, |
|
"logps/rejected": -562.509521484375, |
|
"loss": 0.5631, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.2640832662582397, |
|
"rewards/margins": 0.5411828756332397, |
|
"rewards/rejected": -1.8052661418914795, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.9395446218267469, |
|
"grad_norm": 18.1751708984375, |
|
"learning_rate": 5.548924522327748e-08, |
|
"logits/chosen": 0.06784630566835403, |
|
"logits/rejected": 0.2033408135175705, |
|
"logps/chosen": -519.9273681640625, |
|
"logps/rejected": -530.5051879882812, |
|
"loss": 0.5538, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2889801263809204, |
|
"rewards/margins": 0.5127965211868286, |
|
"rewards/rejected": -1.801776647567749, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 20.21278953552246, |
|
"learning_rate": 5.0804957879556915e-08, |
|
"logits/chosen": 0.056394852697849274, |
|
"logits/rejected": 0.23709776997566223, |
|
"logps/chosen": -462.7340393066406, |
|
"logps/rejected": -502.6092834472656, |
|
"loss": 0.5632, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2453665733337402, |
|
"rewards/margins": 0.4758334755897522, |
|
"rewards/rejected": -1.7211999893188477, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"eval_logits/chosen": 0.051600489765405655, |
|
"eval_logits/rejected": 0.1634536236524582, |
|
"eval_logps/chosen": -525.2161254882812, |
|
"eval_logps/rejected": -525.723388671875, |
|
"eval_loss": 0.5906327962875366, |
|
"eval_rewards/accuracies": 0.6815000176429749, |
|
"eval_rewards/chosen": -1.3685081005096436, |
|
"eval_rewards/margins": 0.4418841004371643, |
|
"eval_rewards/rejected": -1.8103920221328735, |
|
"eval_runtime": 232.6085, |
|
"eval_samples_per_second": 8.598, |
|
"eval_steps_per_second": 1.075, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.944778853703219, |
|
"grad_norm": 24.784046173095703, |
|
"learning_rate": 4.632517761702815e-08, |
|
"logits/chosen": 0.1571771204471588, |
|
"logits/rejected": 0.32081273198127747, |
|
"logps/chosen": -489.2625427246094, |
|
"logps/rejected": -496.1024475097656, |
|
"loss": 0.5707, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3370298147201538, |
|
"rewards/margins": 0.49337905645370483, |
|
"rewards/rejected": -1.830409049987793, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.9473959696414551, |
|
"grad_norm": 31.509357452392578, |
|
"learning_rate": 4.205027849605359e-08, |
|
"logits/chosen": 0.29533377289772034, |
|
"logits/rejected": 0.265569269657135, |
|
"logps/chosen": -500.92803955078125, |
|
"logps/rejected": -486.66973876953125, |
|
"loss": 0.6225, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.3463305234909058, |
|
"rewards/margins": 0.3861822783946991, |
|
"rewards/rejected": -1.7325128316879272, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.9500130855796912, |
|
"grad_norm": 21.457324981689453, |
|
"learning_rate": 3.798061746947995e-08, |
|
"logits/chosen": 0.2158433496952057, |
|
"logits/rejected": 0.3043590486049652, |
|
"logps/chosen": -524.4801025390625, |
|
"logps/rejected": -508.20953369140625, |
|
"loss": 0.5838, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4179335832595825, |
|
"rewards/margins": 0.46254149079322815, |
|
"rewards/rejected": -1.8804748058319092, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 25.418264389038086, |
|
"learning_rate": 3.411653435283158e-08, |
|
"logits/chosen": 0.06112390756607056, |
|
"logits/rejected": 0.14306578040122986, |
|
"logps/chosen": -532.900390625, |
|
"logps/rejected": -495.4185485839844, |
|
"loss": 0.5824, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.300320029258728, |
|
"rewards/margins": 0.4503448009490967, |
|
"rewards/rejected": -1.7506647109985352, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.9552473174561633, |
|
"grad_norm": 17.69974708557129, |
|
"learning_rate": 3.04583517959367e-08, |
|
"logits/chosen": 0.006334272213280201, |
|
"logits/rejected": 0.10581526905298233, |
|
"logps/chosen": -490.5302734375, |
|
"logps/rejected": -496.6221618652344, |
|
"loss": 0.5431, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.2481968402862549, |
|
"rewards/margins": 0.5572769045829773, |
|
"rewards/rejected": -1.8054739236831665, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.9578644333943994, |
|
"grad_norm": 21.300947189331055, |
|
"learning_rate": 2.7006375255985984e-08, |
|
"logits/chosen": 0.26631540060043335, |
|
"logits/rejected": 0.2855226993560791, |
|
"logps/chosen": -517.7824096679688, |
|
"logps/rejected": -547.991455078125, |
|
"loss": 0.6044, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.3668075799942017, |
|
"rewards/margins": 0.38124316930770874, |
|
"rewards/rejected": -1.7480506896972656, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.9604815493326354, |
|
"grad_norm": 20.375226974487305, |
|
"learning_rate": 2.3760892972027328e-08, |
|
"logits/chosen": 0.05325336381793022, |
|
"logits/rejected": 0.11790470033884048, |
|
"logps/chosen": -533.8929443359375, |
|
"logps/rejected": -533.8286743164062, |
|
"loss": 0.5873, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.4277513027191162, |
|
"rewards/margins": 0.46243423223495483, |
|
"rewards/rejected": -1.8901855945587158, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 22.46816062927246, |
|
"learning_rate": 2.072217594089765e-08, |
|
"logits/chosen": 0.17507997155189514, |
|
"logits/rejected": 0.20632827281951904, |
|
"logps/chosen": -517.567138671875, |
|
"logps/rejected": -549.365234375, |
|
"loss": 0.543, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3189188241958618, |
|
"rewards/margins": 0.5807808637619019, |
|
"rewards/rejected": -1.8996999263763428, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.9657157812091076, |
|
"grad_norm": 17.85133934020996, |
|
"learning_rate": 1.789047789459375e-08, |
|
"logits/chosen": 0.027010198682546616, |
|
"logits/rejected": 0.21749186515808105, |
|
"logps/chosen": -594.4285278320312, |
|
"logps/rejected": -558.7322387695312, |
|
"loss": 0.5585, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.2619202136993408, |
|
"rewards/margins": 0.5537473559379578, |
|
"rewards/rejected": -1.8156675100326538, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.9683328971473436, |
|
"grad_norm": 28.183317184448242, |
|
"learning_rate": 1.5266035279088708e-08, |
|
"logits/chosen": 0.10956914722919464, |
|
"logits/rejected": 0.19487416744232178, |
|
"logps/chosen": -572.9104614257812, |
|
"logps/rejected": -580.4852905273438, |
|
"loss": 0.5488, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.4588168859481812, |
|
"rewards/margins": 0.5328525304794312, |
|
"rewards/rejected": -1.9916694164276123, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.9683328971473436, |
|
"eval_logits/chosen": 0.05304437875747681, |
|
"eval_logits/rejected": 0.1655142903327942, |
|
"eval_logps/chosen": -525.91552734375, |
|
"eval_logps/rejected": -526.5010375976562, |
|
"eval_loss": 0.5903691649436951, |
|
"eval_rewards/accuracies": 0.6834999918937683, |
|
"eval_rewards/chosen": -1.375501275062561, |
|
"eval_rewards/margins": 0.4426679015159607, |
|
"eval_rewards/rejected": -1.818169116973877, |
|
"eval_runtime": 232.1562, |
|
"eval_samples_per_second": 8.615, |
|
"eval_steps_per_second": 1.077, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.9709500130855797, |
|
"grad_norm": 33.79894256591797, |
|
"learning_rate": 1.2849067234584623e-08, |
|
"logits/chosen": 0.24655885994434357, |
|
"logits/rejected": 0.3253239095211029, |
|
"logps/chosen": -476.681396484375, |
|
"logps/rejected": -499.00494384765625, |
|
"loss": 0.6091, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.3145527839660645, |
|
"rewards/margins": 0.4425618648529053, |
|
"rewards/rejected": -1.7571147680282593, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 20.961793899536133, |
|
"learning_rate": 1.0639775577218625e-08, |
|
"logits/chosen": 0.16103433072566986, |
|
"logits/rejected": 0.22914746403694153, |
|
"logps/chosen": -490.27777099609375, |
|
"logps/rejected": -488.0320739746094, |
|
"loss": 0.5535, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.3744248151779175, |
|
"rewards/margins": 0.5473370552062988, |
|
"rewards/rejected": -1.9217618703842163, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.9761842449620518, |
|
"grad_norm": 18.385028839111328, |
|
"learning_rate": 8.638344782207486e-09, |
|
"logits/chosen": 0.1808285266160965, |
|
"logits/rejected": 0.2911062240600586, |
|
"logps/chosen": -493.48321533203125, |
|
"logps/rejected": -501.9202575683594, |
|
"loss": 0.56, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.277074933052063, |
|
"rewards/margins": 0.5030657649040222, |
|
"rewards/rejected": -1.7801406383514404, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.9788013609002879, |
|
"grad_norm": 19.80834197998047, |
|
"learning_rate": 6.84494196844715e-09, |
|
"logits/chosen": 0.11059341579675674, |
|
"logits/rejected": 0.3094932436943054, |
|
"logps/chosen": -525.2141723632812, |
|
"logps/rejected": -548.9354248046875, |
|
"loss": 0.5209, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2858692407608032, |
|
"rewards/margins": 0.6285649538040161, |
|
"rewards/rejected": -1.9144340753555298, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.9814184768385239, |
|
"grad_norm": 17.796337127685547, |
|
"learning_rate": 5.259716884556121e-09, |
|
"logits/chosen": 0.08546547591686249, |
|
"logits/rejected": 0.2145168036222458, |
|
"logps/chosen": -523.2117919921875, |
|
"logps/rejected": -539.4160766601562, |
|
"loss": 0.5475, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.280202865600586, |
|
"rewards/margins": 0.5271276831626892, |
|
"rewards/rejected": -1.8073304891586304, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 19.8670654296875, |
|
"learning_rate": 3.882801896372967e-09, |
|
"logits/chosen": 0.17666058242321014, |
|
"logits/rejected": 0.23271194100379944, |
|
"logps/chosen": -512.79736328125, |
|
"logps/rejected": -507.11505126953125, |
|
"loss": 0.6112, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.3305553197860718, |
|
"rewards/margins": 0.4106437563896179, |
|
"rewards/rejected": -1.7411991357803345, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.9866527087149961, |
|
"grad_norm": 21.29877281188965, |
|
"learning_rate": 2.7143119759026614e-09, |
|
"logits/chosen": 0.22256436944007874, |
|
"logits/rejected": 0.329804003238678, |
|
"logps/chosen": -550.2042236328125, |
|
"logps/rejected": -548.9179077148438, |
|
"loss": 0.5132, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2466299533843994, |
|
"rewards/margins": 0.5815029144287109, |
|
"rewards/rejected": -1.8281329870224, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9892698246532321, |
|
"grad_norm": 21.455854415893555, |
|
"learning_rate": 1.754344691717591e-09, |
|
"logits/chosen": 0.06916506588459015, |
|
"logits/rejected": 0.15464463829994202, |
|
"logps/chosen": -504.8829040527344, |
|
"logps/rejected": -547.6551513671875, |
|
"loss": 0.6246, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.3374298810958862, |
|
"rewards/margins": 0.33050769567489624, |
|
"rewards/rejected": -1.6679375171661377, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9918869405914682, |
|
"grad_norm": 24.640779495239258, |
|
"learning_rate": 1.0029802008096335e-09, |
|
"logits/chosen": 0.07244547456502914, |
|
"logits/rejected": 0.21235093474388123, |
|
"logps/chosen": -528.9513549804688, |
|
"logps/rejected": -539.2667236328125, |
|
"loss": 0.5648, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3598108291625977, |
|
"rewards/margins": 0.5505877733230591, |
|
"rewards/rejected": -1.9103988409042358, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 23.488046646118164, |
|
"learning_rate": 4.602812418974534e-10, |
|
"logits/chosen": 0.006186048500239849, |
|
"logits/rejected": 0.1412689983844757, |
|
"logps/chosen": -535.95263671875, |
|
"logps/rejected": -533.4951782226562, |
|
"loss": 0.5674, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.274775505065918, |
|
"rewards/margins": 0.5218435525894165, |
|
"rewards/rejected": -1.7966190576553345, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"eval_logits/chosen": 0.052397292107343674, |
|
"eval_logits/rejected": 0.16461612284183502, |
|
"eval_logps/chosen": -525.971923828125, |
|
"eval_logps/rejected": -526.6827392578125, |
|
"eval_loss": 0.5901351571083069, |
|
"eval_rewards/accuracies": 0.684499979019165, |
|
"eval_rewards/chosen": -1.3760651350021362, |
|
"eval_rewards/margins": 0.44392091035842896, |
|
"eval_rewards/rejected": -1.81998610496521, |
|
"eval_runtime": 232.5667, |
|
"eval_samples_per_second": 8.6, |
|
"eval_steps_per_second": 1.075, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9971211724679403, |
|
"grad_norm": 21.794204711914062, |
|
"learning_rate": 1.2629313018819312e-10, |
|
"logits/chosen": 0.02779226377606392, |
|
"logits/rejected": 0.21272841095924377, |
|
"logps/chosen": -511.3211364746094, |
|
"logps/rejected": -515.0205688476562, |
|
"loss": 0.5489, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2764627933502197, |
|
"rewards/margins": 0.537833571434021, |
|
"rewards/rejected": -1.8142963647842407, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9997382884061764, |
|
"grad_norm": 21.642515182495117, |
|
"learning_rate": 1.0437535929996855e-12, |
|
"logits/chosen": 0.1357661634683609, |
|
"logits/rejected": 0.14389568567276, |
|
"logps/chosen": -550.05810546875, |
|
"logps/rejected": -537.67041015625, |
|
"loss": 0.543, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.337266206741333, |
|
"rewards/margins": 0.5570909380912781, |
|
"rewards/rejected": -1.8943573236465454, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3821, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0, |
|
"train_runtime": 0.0101, |
|
"train_samples_per_second": 6068984.159, |
|
"train_steps_per_second": 379323.919 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3821, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|