|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9995451099317665, |
|
"eval_steps": 100, |
|
"global_step": 1648, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.5390625, |
|
"learning_rate": 3.03030303030303e-09, |
|
"logits/chosen": -3.4050943851470947, |
|
"logits/rejected": -3.1368675231933594, |
|
"logps/chosen": -118.80651092529297, |
|
"logps/rejected": -84.5186767578125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/margins_max": 0.0, |
|
"rewards/margins_min": 0.0, |
|
"rewards/margins_std": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.65625, |
|
"learning_rate": 3.0303030303030305e-08, |
|
"logits/chosen": -3.4118552207946777, |
|
"logits/rejected": -3.234715700149536, |
|
"logps/chosen": -112.32723236083984, |
|
"logps/rejected": -153.78240966796875, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": 0.0004033078148495406, |
|
"rewards/margins": 0.00041662290459498763, |
|
"rewards/margins_max": 0.0021899566054344177, |
|
"rewards/margins_min": -0.0013567109126597643, |
|
"rewards/margins_std": 0.002507872646674514, |
|
"rewards/rejected": -1.3315144315129146e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.8828125, |
|
"learning_rate": 6.060606060606061e-08, |
|
"logits/chosen": -3.4354801177978516, |
|
"logits/rejected": -3.176407814025879, |
|
"logps/chosen": -118.2829360961914, |
|
"logps/rejected": -184.0032958984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.00016470803529955447, |
|
"rewards/margins": 1.1057045412599109e-05, |
|
"rewards/margins_max": 0.00216041412204504, |
|
"rewards/margins_min": -0.002138300333172083, |
|
"rewards/margins_std": 0.0030396501533687115, |
|
"rewards/rejected": 0.0001536509662400931, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.8671875, |
|
"learning_rate": 9.09090909090909e-08, |
|
"logits/chosen": -3.4078497886657715, |
|
"logits/rejected": -3.205293655395508, |
|
"logps/chosen": -127.51212310791016, |
|
"logps/rejected": -157.24716186523438, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.00028022227343171835, |
|
"rewards/margins": 0.0013433375861495733, |
|
"rewards/margins_max": 0.0038831476122140884, |
|
"rewards/margins_min": -0.0011964720906689763, |
|
"rewards/margins_std": 0.003591833170503378, |
|
"rewards/rejected": -0.0010631154291331768, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.71875, |
|
"learning_rate": 1.2121212121212122e-07, |
|
"logits/chosen": -3.4350059032440186, |
|
"logits/rejected": -3.2142701148986816, |
|
"logps/chosen": -121.0025634765625, |
|
"logps/rejected": -145.43264770507812, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.0012690603034570813, |
|
"rewards/margins": 0.003167077898979187, |
|
"rewards/margins_max": 0.005492820404469967, |
|
"rewards/margins_min": 0.0008413357427343726, |
|
"rewards/margins_std": 0.003289096523076296, |
|
"rewards/rejected": -0.0018980179447680712, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.75, |
|
"learning_rate": 1.5151515151515152e-07, |
|
"logits/chosen": -3.4711899757385254, |
|
"logits/rejected": -3.23637056350708, |
|
"logps/chosen": -114.65794372558594, |
|
"logps/rejected": -166.53250122070312, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.0023323404602706432, |
|
"rewards/margins": 0.004964292515069246, |
|
"rewards/margins_max": 0.007555422373116016, |
|
"rewards/margins_min": 0.0023731617256999016, |
|
"rewards/margins_std": 0.0036644123028963804, |
|
"rewards/rejected": -0.0026319522876292467, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.0, |
|
"learning_rate": 1.818181818181818e-07, |
|
"logits/chosen": -3.4538276195526123, |
|
"logits/rejected": -3.1886672973632812, |
|
"logps/chosen": -109.4487533569336, |
|
"logps/rejected": -172.9461669921875, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.003310429397970438, |
|
"rewards/margins": 0.007032909896224737, |
|
"rewards/margins_max": 0.011247309856116772, |
|
"rewards/margins_min": 0.0028185099363327026, |
|
"rewards/margins_std": 0.005960061680525541, |
|
"rewards/rejected": -0.003722480731084943, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.8359375, |
|
"learning_rate": 2.121212121212121e-07, |
|
"logits/chosen": -3.4295284748077393, |
|
"logits/rejected": -3.1960196495056152, |
|
"logps/chosen": -125.6326904296875, |
|
"logps/rejected": -177.14407348632812, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.0038899246137589216, |
|
"rewards/margins": 0.009991476312279701, |
|
"rewards/margins_max": 0.013599397614598274, |
|
"rewards/margins_min": 0.006383554544299841, |
|
"rewards/margins_std": 0.005102371331304312, |
|
"rewards/rejected": -0.006101551465690136, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.65625, |
|
"learning_rate": 2.4242424242424244e-07, |
|
"logits/chosen": -3.4546008110046387, |
|
"logits/rejected": -3.259620189666748, |
|
"logps/chosen": -105.1754150390625, |
|
"logps/rejected": -149.17739868164062, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.004929172340780497, |
|
"rewards/margins": 0.013479220680892467, |
|
"rewards/margins_max": 0.019636893644928932, |
|
"rewards/margins_min": 0.007321546785533428, |
|
"rewards/margins_std": 0.008708265610039234, |
|
"rewards/rejected": -0.008550046943128109, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.8828125, |
|
"learning_rate": 2.727272727272727e-07, |
|
"logits/chosen": -3.4643356800079346, |
|
"logits/rejected": -3.227538585662842, |
|
"logps/chosen": -122.521240234375, |
|
"logps/rejected": -154.5928497314453, |
|
"loss": 0.6834, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.008893799968063831, |
|
"rewards/margins": 0.021132633090019226, |
|
"rewards/margins_max": 0.027842596173286438, |
|
"rewards/margins_min": 0.014422670006752014, |
|
"rewards/margins_std": 0.00948932021856308, |
|
"rewards/rejected": -0.012238833121955395, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.71875, |
|
"learning_rate": 3.0303030303030305e-07, |
|
"logits/chosen": -3.4493160247802734, |
|
"logits/rejected": -3.2681262493133545, |
|
"logps/chosen": -111.17362976074219, |
|
"logps/rejected": -195.10665893554688, |
|
"loss": 0.6813, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.009519929066300392, |
|
"rewards/margins": 0.023902228102087975, |
|
"rewards/margins_max": 0.0342855267226696, |
|
"rewards/margins_min": 0.013518924824893475, |
|
"rewards/margins_std": 0.014684207737445831, |
|
"rewards/rejected": -0.014382297173142433, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.8125, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/chosen": -3.3599190711975098, |
|
"logits/rejected": -3.1862587928771973, |
|
"logps/chosen": -117.963134765625, |
|
"logps/rejected": -156.41465759277344, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.008118102326989174, |
|
"rewards/margins": 0.030242860317230225, |
|
"rewards/margins_max": 0.040411077439785004, |
|
"rewards/margins_min": 0.020074646919965744, |
|
"rewards/margins_std": 0.014380025677382946, |
|
"rewards/rejected": -0.0221247561275959, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.609375, |
|
"learning_rate": 3.636363636363636e-07, |
|
"logits/chosen": -3.4270176887512207, |
|
"logits/rejected": -3.26020884513855, |
|
"logps/chosen": -109.92674255371094, |
|
"logps/rejected": -144.8209228515625, |
|
"loss": 0.6757, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.010196666233241558, |
|
"rewards/margins": 0.03683094307780266, |
|
"rewards/margins_max": 0.0503312349319458, |
|
"rewards/margins_min": 0.023330653086304665, |
|
"rewards/margins_std": 0.019092293456196785, |
|
"rewards/rejected": -0.026634279638528824, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.6484375, |
|
"learning_rate": 3.939393939393939e-07, |
|
"logits/chosen": -3.4294886589050293, |
|
"logits/rejected": -3.2369658946990967, |
|
"logps/chosen": -126.5963363647461, |
|
"logps/rejected": -178.168212890625, |
|
"loss": 0.6705, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.009229905903339386, |
|
"rewards/margins": 0.043803971260786057, |
|
"rewards/margins_max": 0.06445904076099396, |
|
"rewards/margins_min": 0.023148905485868454, |
|
"rewards/margins_std": 0.02921067550778389, |
|
"rewards/rejected": -0.03457406908273697, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.7421875, |
|
"learning_rate": 4.242424242424242e-07, |
|
"logits/chosen": -3.441509246826172, |
|
"logits/rejected": -3.2002804279327393, |
|
"logps/chosen": -130.19882202148438, |
|
"logps/rejected": -178.93299865722656, |
|
"loss": 0.6643, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.0069100684486329556, |
|
"rewards/margins": 0.05727902799844742, |
|
"rewards/margins_max": 0.07939378917217255, |
|
"rewards/margins_min": 0.03516425937414169, |
|
"rewards/margins_std": 0.03127499669790268, |
|
"rewards/rejected": -0.050368957221508026, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.125, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -3.466477155685425, |
|
"logits/rejected": -3.275334119796753, |
|
"logps/chosen": -124.30558776855469, |
|
"logps/rejected": -181.0602569580078, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.008361677639186382, |
|
"rewards/margins": 0.06292758136987686, |
|
"rewards/margins_max": 0.08513649553060532, |
|
"rewards/margins_min": 0.04071866348385811, |
|
"rewards/margins_std": 0.0314081534743309, |
|
"rewards/rejected": -0.054565899074077606, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.9140625, |
|
"learning_rate": 4.848484848484849e-07, |
|
"logits/chosen": -3.3910937309265137, |
|
"logits/rejected": -3.2401318550109863, |
|
"logps/chosen": -99.83372497558594, |
|
"logps/rejected": -162.27804565429688, |
|
"loss": 0.6578, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.006624563131481409, |
|
"rewards/margins": 0.06912867724895477, |
|
"rewards/margins_max": 0.1061328873038292, |
|
"rewards/margins_min": 0.032124463468790054, |
|
"rewards/margins_std": 0.05233185365796089, |
|
"rewards/rejected": -0.0625041052699089, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.9375, |
|
"learning_rate": 4.999859762744229e-07, |
|
"logits/chosen": -3.403299331665039, |
|
"logits/rejected": -3.2240326404571533, |
|
"logps/chosen": -101.01579284667969, |
|
"logps/rejected": -159.98538208007812, |
|
"loss": 0.6513, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.00876183807849884, |
|
"rewards/margins": 0.08230775594711304, |
|
"rewards/margins_max": 0.11581530421972275, |
|
"rewards/margins_min": 0.048800211399793625, |
|
"rewards/margins_std": 0.047386832535266876, |
|
"rewards/rejected": -0.0735459253191948, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.953125, |
|
"learning_rate": 4.998737959095448e-07, |
|
"logits/chosen": -3.4143004417419434, |
|
"logits/rejected": -3.1833884716033936, |
|
"logps/chosen": -105.91117095947266, |
|
"logps/rejected": -135.10708618164062, |
|
"loss": 0.6477, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.002522200345993042, |
|
"rewards/margins": 0.09561987221240997, |
|
"rewards/margins_max": 0.14069953560829163, |
|
"rewards/margins_min": 0.05054020881652832, |
|
"rewards/margins_std": 0.06375227868556976, |
|
"rewards/rejected": -0.09309767186641693, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.046875, |
|
"learning_rate": 4.996494855203493e-07, |
|
"logits/chosen": -3.47766375541687, |
|
"logits/rejected": -3.207594633102417, |
|
"logps/chosen": -113.1792221069336, |
|
"logps/rejected": -175.5690460205078, |
|
"loss": 0.6359, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.0006620381027460098, |
|
"rewards/margins": 0.11444854736328125, |
|
"rewards/margins_max": 0.15897879004478455, |
|
"rewards/margins_min": 0.06991832703351974, |
|
"rewards/margins_std": 0.06297525763511658, |
|
"rewards/rejected": -0.11378651857376099, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.09375, |
|
"learning_rate": 4.993131457653681e-07, |
|
"logits/chosen": -3.4641525745391846, |
|
"logits/rejected": -3.2808594703674316, |
|
"logps/chosen": -100.21434020996094, |
|
"logps/rejected": -156.946044921875, |
|
"loss": 0.6389, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.01082407496869564, |
|
"rewards/margins": 0.11547299474477768, |
|
"rewards/margins_max": 0.16020886600017548, |
|
"rewards/margins_min": 0.07073714584112167, |
|
"rewards/margins_std": 0.06326606869697571, |
|
"rewards/rejected": -0.12629708647727966, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.34375, |
|
"learning_rate": 4.988649275759334e-07, |
|
"logits/chosen": -3.428915500640869, |
|
"logits/rejected": -3.1432971954345703, |
|
"logps/chosen": -110.36918640136719, |
|
"logps/rejected": -165.47640991210938, |
|
"loss": 0.6294, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.012739740312099457, |
|
"rewards/margins": 0.12643049657344818, |
|
"rewards/margins_max": 0.18977002799510956, |
|
"rewards/margins_min": 0.0630909651517868, |
|
"rewards/margins_std": 0.0895756185054779, |
|
"rewards/rejected": -0.13917024433612823, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.875, |
|
"learning_rate": 4.983050320884483e-07, |
|
"logits/chosen": -3.4887309074401855, |
|
"logits/rejected": -3.2058892250061035, |
|
"logps/chosen": -126.2535629272461, |
|
"logps/rejected": -187.00015258789062, |
|
"loss": 0.6223, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.01476545911282301, |
|
"rewards/margins": 0.15231844782829285, |
|
"rewards/margins_max": 0.2008267194032669, |
|
"rewards/margins_min": 0.10381016880273819, |
|
"rewards/margins_std": 0.06860103458166122, |
|
"rewards/rejected": -0.16708388924598694, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.125, |
|
"learning_rate": 4.976337105541267e-07, |
|
"logits/chosen": -3.403496503829956, |
|
"logits/rejected": -3.164135217666626, |
|
"logps/chosen": -130.16421508789062, |
|
"logps/rejected": -158.1027374267578, |
|
"loss": 0.6215, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.04382479190826416, |
|
"rewards/margins": 0.14249354600906372, |
|
"rewards/margins_max": 0.20711453258991241, |
|
"rewards/margins_min": 0.07787257432937622, |
|
"rewards/margins_std": 0.09138786792755127, |
|
"rewards/rejected": -0.1863183230161667, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.234375, |
|
"learning_rate": 4.968512642262464e-07, |
|
"logits/chosen": -3.423377513885498, |
|
"logits/rejected": -3.2418792247772217, |
|
"logps/chosen": -104.84086608886719, |
|
"logps/rejected": -180.81430053710938, |
|
"loss": 0.5991, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.016986923292279243, |
|
"rewards/margins": 0.21991240978240967, |
|
"rewards/margins_max": 0.29189637303352356, |
|
"rewards/margins_min": 0.14792843163013458, |
|
"rewards/margins_std": 0.10180072486400604, |
|
"rewards/rejected": -0.23689934611320496, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.03125, |
|
"learning_rate": 4.959580442249614e-07, |
|
"logits/chosen": -3.5027713775634766, |
|
"logits/rejected": -3.174872875213623, |
|
"logps/chosen": -121.6041259765625, |
|
"logps/rejected": -184.39622497558594, |
|
"loss": 0.5971, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.03534800559282303, |
|
"rewards/margins": 0.20791885256767273, |
|
"rewards/margins_max": 0.3044799268245697, |
|
"rewards/margins_min": 0.11135780811309814, |
|
"rewards/margins_std": 0.13655796647071838, |
|
"rewards/rejected": -0.24326686561107635, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.515625, |
|
"learning_rate": 4.94954451379739e-07, |
|
"logits/chosen": -3.4629738330841064, |
|
"logits/rejected": -3.254920244216919, |
|
"logps/chosen": -126.48948669433594, |
|
"logps/rejected": -186.8356475830078, |
|
"loss": 0.5893, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.0572446808218956, |
|
"rewards/margins": 0.242076113820076, |
|
"rewards/margins_max": 0.3453850746154785, |
|
"rewards/margins_min": 0.13876716792583466, |
|
"rewards/margins_std": 0.1461009383201599, |
|
"rewards/rejected": -0.2993208169937134, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.296875, |
|
"learning_rate": 4.938409360494883e-07, |
|
"logits/chosen": -3.4049344062805176, |
|
"logits/rejected": -3.1644232273101807, |
|
"logps/chosen": -117.92335510253906, |
|
"logps/rejected": -183.36587524414062, |
|
"loss": 0.5894, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.053103696554899216, |
|
"rewards/margins": 0.23206424713134766, |
|
"rewards/margins_max": 0.3247820734977722, |
|
"rewards/margins_min": 0.1393464058637619, |
|
"rewards/margins_std": 0.13112285733222961, |
|
"rewards/rejected": -0.28516796231269836, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.296875, |
|
"learning_rate": 4.926179979204632e-07, |
|
"logits/chosen": -3.4576289653778076, |
|
"logits/rejected": -3.24690318107605, |
|
"logps/chosen": -123.93232727050781, |
|
"logps/rejected": -194.07188415527344, |
|
"loss": 0.5881, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.0886482372879982, |
|
"rewards/margins": 0.2548428773880005, |
|
"rewards/margins_max": 0.3780335485935211, |
|
"rewards/margins_min": 0.13165222108364105, |
|
"rewards/margins_std": 0.17421790957450867, |
|
"rewards/rejected": -0.3434911370277405, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.171875, |
|
"learning_rate": 4.912861857820302e-07, |
|
"logits/chosen": -3.3650596141815186, |
|
"logits/rejected": -3.2302684783935547, |
|
"logps/chosen": -111.41851806640625, |
|
"logps/rejected": -206.7620849609375, |
|
"loss": 0.5658, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.10818646103143692, |
|
"rewards/margins": 0.2999250292778015, |
|
"rewards/margins_max": 0.4337504506111145, |
|
"rewards/margins_min": 0.1660996377468109, |
|
"rewards/margins_std": 0.18925771117210388, |
|
"rewards/rejected": -0.40811148285865784, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.8125, |
|
"learning_rate": 4.898460972804008e-07, |
|
"logits/chosen": -3.420971632003784, |
|
"logits/rejected": -3.1563363075256348, |
|
"logps/chosen": -122.5914077758789, |
|
"logps/rejected": -199.3488006591797, |
|
"loss": 0.549, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.08986975252628326, |
|
"rewards/margins": 0.31944385170936584, |
|
"rewards/margins_max": 0.4357198178768158, |
|
"rewards/margins_min": 0.2031678408384323, |
|
"rewards/margins_std": 0.16443908214569092, |
|
"rewards/rejected": -0.4093135893344879, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.625, |
|
"learning_rate": 4.882983786504399e-07, |
|
"logits/chosen": -3.4148566722869873, |
|
"logits/rejected": -3.1982669830322266, |
|
"logps/chosen": -136.39987182617188, |
|
"logps/rejected": -236.8019256591797, |
|
"loss": 0.544, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.15588413178920746, |
|
"rewards/margins": 0.3820186257362366, |
|
"rewards/margins_max": 0.5593412518501282, |
|
"rewards/margins_min": 0.20469605922698975, |
|
"rewards/margins_std": 0.25077205896377563, |
|
"rewards/rejected": -0.5379027724266052, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.421875, |
|
"learning_rate": 4.866437244256695e-07, |
|
"logits/chosen": -3.411226987838745, |
|
"logits/rejected": -3.205670118331909, |
|
"logps/chosen": -129.0833282470703, |
|
"logps/rejected": -206.7379913330078, |
|
"loss": 0.5469, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.14963629841804504, |
|
"rewards/margins": 0.37762314081192017, |
|
"rewards/margins_max": 0.5498967170715332, |
|
"rewards/margins_min": 0.20534953474998474, |
|
"rewards/margins_std": 0.24363164603710175, |
|
"rewards/rejected": -0.5272594690322876, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.625, |
|
"learning_rate": 4.848828771266001e-07, |
|
"logits/chosen": -3.5033020973205566, |
|
"logits/rejected": -3.273409366607666, |
|
"logps/chosen": -156.30401611328125, |
|
"logps/rejected": -197.61549377441406, |
|
"loss": 0.5528, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19874437153339386, |
|
"rewards/margins": 0.3051304817199707, |
|
"rewards/margins_max": 0.47203540802001953, |
|
"rewards/margins_min": 0.13822560012340546, |
|
"rewards/margins_std": 0.2360391616821289, |
|
"rewards/rejected": -0.5038748979568481, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.703125, |
|
"learning_rate": 4.830166269275266e-07, |
|
"logits/chosen": -3.443110942840576, |
|
"logits/rejected": -3.2243683338165283, |
|
"logps/chosen": -148.33139038085938, |
|
"logps/rejected": -215.1509246826172, |
|
"loss": 0.5203, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.20774701237678528, |
|
"rewards/margins": 0.386242538690567, |
|
"rewards/margins_max": 0.5645895600318909, |
|
"rewards/margins_min": 0.20789547264575958, |
|
"rewards/margins_std": 0.2522208094596863, |
|
"rewards/rejected": -0.5939895510673523, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 3.234375, |
|
"learning_rate": 4.8104581130194e-07, |
|
"logits/chosen": -3.4214928150177, |
|
"logits/rejected": -3.2243239879608154, |
|
"logps/chosen": -139.30186462402344, |
|
"logps/rejected": -219.8242645263672, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.22474750876426697, |
|
"rewards/margins": 0.41614609956741333, |
|
"rewards/margins_max": 0.6199635863304138, |
|
"rewards/margins_min": 0.21232867240905762, |
|
"rewards/margins_std": 0.2882413864135742, |
|
"rewards/rejected": -0.6408936977386475, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 4.789713146467143e-07, |
|
"logits/chosen": -3.401512861251831, |
|
"logits/rejected": -3.190495252609253, |
|
"logps/chosen": -146.44760131835938, |
|
"logps/rejected": -218.2511444091797, |
|
"loss": 0.5132, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2993583083152771, |
|
"rewards/margins": 0.397621214389801, |
|
"rewards/margins_max": 0.5621191263198853, |
|
"rewards/margins_min": 0.2331233024597168, |
|
"rewards/margins_std": 0.23263517022132874, |
|
"rewards/rejected": -0.6969794631004333, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.734375, |
|
"learning_rate": 4.767940678852368e-07, |
|
"logits/chosen": -3.405986785888672, |
|
"logits/rejected": -3.1692707538604736, |
|
"logps/chosen": -153.48361206054688, |
|
"logps/rejected": -218.69650268554688, |
|
"loss": 0.5181, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.3115563988685608, |
|
"rewards/margins": 0.4646086096763611, |
|
"rewards/margins_max": 0.6958065032958984, |
|
"rewards/margins_min": 0.23341062664985657, |
|
"rewards/margins_std": 0.32696327567100525, |
|
"rewards/rejected": -0.7761648893356323, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 3.171875, |
|
"learning_rate": 4.7451504804965823e-07, |
|
"logits/chosen": -3.3532516956329346, |
|
"logits/rejected": -3.2124714851379395, |
|
"logps/chosen": -138.7807159423828, |
|
"logps/rejected": -285.9217834472656, |
|
"loss": 0.4719, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.31872931122779846, |
|
"rewards/margins": 0.5318517088890076, |
|
"rewards/margins_max": 0.7545040249824524, |
|
"rewards/margins_min": 0.30919957160949707, |
|
"rewards/margins_std": 0.31487777829170227, |
|
"rewards/rejected": -0.850581169128418, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.078125, |
|
"learning_rate": 4.7213527784245395e-07, |
|
"logits/chosen": -3.4123377799987793, |
|
"logits/rejected": -3.1568984985351562, |
|
"logps/chosen": -145.73069763183594, |
|
"logps/rejected": -233.9403533935547, |
|
"loss": 0.4702, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.35064345598220825, |
|
"rewards/margins": 0.5569049715995789, |
|
"rewards/margins_max": 0.7923904657363892, |
|
"rewards/margins_min": 0.3214194178581238, |
|
"rewards/margins_std": 0.33302679657936096, |
|
"rewards/rejected": -0.9075484275817871, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.375, |
|
"learning_rate": 4.6965582517748917e-07, |
|
"logits/chosen": -3.3509891033172607, |
|
"logits/rejected": -3.1365480422973633, |
|
"logps/chosen": -157.23922729492188, |
|
"logps/rejected": -269.90496826171875, |
|
"loss": 0.4719, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.4497006833553314, |
|
"rewards/margins": 0.6059035062789917, |
|
"rewards/margins_max": 0.8811772465705872, |
|
"rewards/margins_min": 0.33062973618507385, |
|
"rewards/margins_std": 0.38929590582847595, |
|
"rewards/rejected": -1.0556042194366455, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.46875, |
|
"learning_rate": 4.6707780270079635e-07, |
|
"logits/chosen": -3.4341049194335938, |
|
"logits/rejected": -3.1959335803985596, |
|
"logps/chosen": -172.27877807617188, |
|
"logps/rejected": -291.41204833984375, |
|
"loss": 0.4602, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.44683152437210083, |
|
"rewards/margins": 0.6504033207893372, |
|
"rewards/margins_max": 0.9344033002853394, |
|
"rewards/margins_min": 0.36640337109565735, |
|
"rewards/margins_std": 0.40163666009902954, |
|
"rewards/rejected": -1.0972349643707275, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 4.0, |
|
"learning_rate": 4.6440236729127876e-07, |
|
"logits/chosen": -3.3718056678771973, |
|
"logits/rejected": -3.1097371578216553, |
|
"logps/chosen": -169.48446655273438, |
|
"logps/rejected": -302.61016845703125, |
|
"loss": 0.4337, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.5628658533096313, |
|
"rewards/margins": 0.7150717973709106, |
|
"rewards/margins_max": 1.017971396446228, |
|
"rewards/margins_min": 0.4121721684932709, |
|
"rewards/margins_std": 0.42836475372314453, |
|
"rewards/rejected": -1.277937650680542, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.6875, |
|
"learning_rate": 4.616307195415654e-07, |
|
"logits/chosen": -3.3012547492980957, |
|
"logits/rejected": -3.127958059310913, |
|
"logps/chosen": -167.62088012695312, |
|
"logps/rejected": -272.51800537109375, |
|
"loss": 0.4286, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.5819985270500183, |
|
"rewards/margins": 0.7929113507270813, |
|
"rewards/margins_max": 1.1863847970962524, |
|
"rewards/margins_min": 0.3994379937648773, |
|
"rewards/margins_std": 0.5564553737640381, |
|
"rewards/rejected": -1.3749098777770996, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.421875, |
|
"learning_rate": 4.587641032192488e-07, |
|
"logits/chosen": -3.333758592605591, |
|
"logits/rejected": -3.151676654815674, |
|
"logps/chosen": -181.68984985351562, |
|
"logps/rejected": -296.03082275390625, |
|
"loss": 0.3825, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7118282914161682, |
|
"rewards/margins": 0.7851115465164185, |
|
"rewards/margins_max": 1.1220605373382568, |
|
"rewards/margins_min": 0.4481624662876129, |
|
"rewards/margins_std": 0.4765179753303528, |
|
"rewards/rejected": -1.4969398975372314, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 4.71875, |
|
"learning_rate": 4.558038047087486e-07, |
|
"logits/chosen": -3.2601521015167236, |
|
"logits/rejected": -3.0268001556396484, |
|
"logps/chosen": -185.16275024414062, |
|
"logps/rejected": -308.6865234375, |
|
"loss": 0.3983, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.7758798599243164, |
|
"rewards/margins": 0.886875331401825, |
|
"rewards/margins_max": 1.3197309970855713, |
|
"rewards/margins_min": 0.454019695520401, |
|
"rewards/margins_std": 0.6121503114700317, |
|
"rewards/rejected": -1.6627552509307861, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 5.09375, |
|
"learning_rate": 4.527511524340508e-07, |
|
"logits/chosen": -3.229076862335205, |
|
"logits/rejected": -3.024235486984253, |
|
"logps/chosen": -197.80221557617188, |
|
"logps/rejected": -347.35479736328125, |
|
"loss": 0.3766, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.8850440979003906, |
|
"rewards/margins": 1.0945155620574951, |
|
"rewards/margins_max": 1.7435153722763062, |
|
"rewards/margins_min": 0.4455157220363617, |
|
"rewards/margins_std": 0.9178244471549988, |
|
"rewards/rejected": -1.9795596599578857, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 4.65625, |
|
"learning_rate": 4.49607516262582e-07, |
|
"logits/chosen": -3.2619071006774902, |
|
"logits/rejected": -3.051602840423584, |
|
"logps/chosen": -231.6276397705078, |
|
"logps/rejected": -371.31866455078125, |
|
"loss": 0.3899, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.0247620344161987, |
|
"rewards/margins": 1.10407555103302, |
|
"rewards/margins_max": 1.7106988430023193, |
|
"rewards/margins_min": 0.49745243787765503, |
|
"rewards/margins_std": 0.8578945994377136, |
|
"rewards/rejected": -2.1288375854492188, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.90625, |
|
"learning_rate": 4.4637430689048626e-07, |
|
"logits/chosen": -3.2792510986328125, |
|
"logits/rejected": -3.040688991546631, |
|
"logps/chosen": -198.93948364257812, |
|
"logps/rejected": -341.1888427734375, |
|
"loss": 0.3362, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.007499098777771, |
|
"rewards/margins": 1.0881038904190063, |
|
"rewards/margins_max": 1.634576439857483, |
|
"rewards/margins_min": 0.5416311025619507, |
|
"rewards/margins_std": 0.772828996181488, |
|
"rewards/rejected": -2.0956027507781982, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 5.34375, |
|
"learning_rate": 4.4305297520957944e-07, |
|
"logits/chosen": -3.227466106414795, |
|
"logits/rejected": -3.0447263717651367, |
|
"logps/chosen": -218.6245574951172, |
|
"logps/rejected": -389.6742248535156, |
|
"loss": 0.325, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.055724024772644, |
|
"rewards/margins": 1.3826014995574951, |
|
"rewards/margins_max": 1.864708662033081, |
|
"rewards/margins_min": 0.9004942178726196, |
|
"rewards/margins_std": 0.6818027496337891, |
|
"rewards/rejected": -2.438325881958008, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 5.125, |
|
"learning_rate": 4.396450116562669e-07, |
|
"logits/chosen": -3.2189323902130127, |
|
"logits/rejected": -3.0369110107421875, |
|
"logps/chosen": -239.6689910888672, |
|
"logps/rejected": -412.5196838378906, |
|
"loss": 0.3732, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.3009732961654663, |
|
"rewards/margins": 1.1667307615280151, |
|
"rewards/margins_max": 1.9371881484985352, |
|
"rewards/margins_min": 0.39627307653427124, |
|
"rewards/margins_std": 1.0895916223526, |
|
"rewards/rejected": -2.4677042961120605, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 9.625, |
|
"learning_rate": 4.3615194554271483e-07, |
|
"logits/chosen": -3.2492318153381348, |
|
"logits/rejected": -3.042893886566162, |
|
"logps/chosen": -261.33001708984375, |
|
"logps/rejected": -447.9276428222656, |
|
"loss": 0.3188, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.3674191236495972, |
|
"rewards/margins": 1.4249876737594604, |
|
"rewards/margins_max": 2.208099842071533, |
|
"rewards/margins_min": 0.6418755054473877, |
|
"rewards/margins_std": 1.1074877977371216, |
|
"rewards/rejected": -2.7924067974090576, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 5.8125, |
|
"learning_rate": 4.325753443705767e-07, |
|
"logits/chosen": -3.217289686203003, |
|
"logits/rejected": -3.0037760734558105, |
|
"logps/chosen": -238.28817749023438, |
|
"logps/rejected": -447.4773864746094, |
|
"loss": 0.2919, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.272761344909668, |
|
"rewards/margins": 1.5220317840576172, |
|
"rewards/margins_max": 2.1593213081359863, |
|
"rewards/margins_min": 0.8847425580024719, |
|
"rewards/margins_std": 0.9012632369995117, |
|
"rewards/rejected": -2.794793128967285, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 5.25, |
|
"learning_rate": 4.289168131275822e-07, |
|
"logits/chosen": -3.1981008052825928, |
|
"logits/rejected": -2.9687576293945312, |
|
"logps/chosen": -248.31906127929688, |
|
"logps/rejected": -526.4910888671875, |
|
"loss": 0.3017, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.4457924365997314, |
|
"rewards/margins": 1.7948505878448486, |
|
"rewards/margins_max": 2.742126226425171, |
|
"rewards/margins_min": 0.8475747108459473, |
|
"rewards/margins_std": 1.339650273323059, |
|
"rewards/rejected": -3.240643262863159, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 7.125, |
|
"learning_rate": 4.251779935673044e-07, |
|
"logits/chosen": -3.1895217895507812, |
|
"logits/rejected": -2.982194423675537, |
|
"logps/chosen": -287.61956787109375, |
|
"logps/rejected": -510.9176330566406, |
|
"loss": 0.3085, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.6795003414154053, |
|
"rewards/margins": 1.9267494678497314, |
|
"rewards/margins_max": 2.750401735305786, |
|
"rewards/margins_min": 1.1030967235565186, |
|
"rewards/margins_std": 1.1648204326629639, |
|
"rewards/rejected": -3.606250047683716, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 5.21875, |
|
"learning_rate": 4.213605634724283e-07, |
|
"logits/chosen": -3.2357590198516846, |
|
"logits/rejected": -2.958969831466675, |
|
"logps/chosen": -260.2586975097656, |
|
"logps/rejected": -494.48944091796875, |
|
"loss": 0.2896, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.4327127933502197, |
|
"rewards/margins": 1.9400370121002197, |
|
"rewards/margins_max": 3.0310521125793457, |
|
"rewards/margins_min": 0.8490220308303833, |
|
"rewards/margins_std": 1.5429283380508423, |
|
"rewards/rejected": -3.3727500438690186, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 4.174662359018515e-07, |
|
"logits/chosen": -3.204619884490967, |
|
"logits/rejected": -2.958706855773926, |
|
"logps/chosen": -267.38641357421875, |
|
"logps/rejected": -474.48919677734375, |
|
"loss": 0.3059, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.5095288753509521, |
|
"rewards/margins": 1.707524299621582, |
|
"rewards/margins_max": 2.5599889755249023, |
|
"rewards/margins_min": 0.8550596237182617, |
|
"rewards/margins_std": 1.2055673599243164, |
|
"rewards/rejected": -3.217053174972534, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 8.125, |
|
"learning_rate": 4.134967584219549e-07, |
|
"logits/chosen": -3.152198314666748, |
|
"logits/rejected": -2.9612772464752197, |
|
"logps/chosen": -279.5748291015625, |
|
"logps/rejected": -507.6549377441406, |
|
"loss": 0.2771, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.6234207153320312, |
|
"rewards/margins": 1.8984496593475342, |
|
"rewards/margins_max": 2.928384780883789, |
|
"rewards/margins_min": 0.8685151934623718, |
|
"rewards/margins_std": 1.456547737121582, |
|
"rewards/rejected": -3.5218708515167236, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 6.21875, |
|
"learning_rate": 4.09453912322388e-07, |
|
"logits/chosen": -3.1358606815338135, |
|
"logits/rejected": -2.9630966186523438, |
|
"logps/chosen": -279.37298583984375, |
|
"logps/rejected": -529.50732421875, |
|
"loss": 0.3097, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.6641263961791992, |
|
"rewards/margins": 2.1226673126220703, |
|
"rewards/margins_max": 3.121896266937256, |
|
"rewards/margins_min": 1.1234381198883057, |
|
"rewards/margins_std": 1.413123369216919, |
|
"rewards/rejected": -3.7867934703826904, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 5.0625, |
|
"learning_rate": 4.0533951181672137e-07, |
|
"logits/chosen": -3.190006971359253, |
|
"logits/rejected": -3.0020487308502197, |
|
"logps/chosen": -259.03961181640625, |
|
"logps/rejected": -524.3358154296875, |
|
"loss": 0.2348, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.5405361652374268, |
|
"rewards/margins": 1.977190613746643, |
|
"rewards/margins_max": 2.800302028656006, |
|
"rewards/margins_min": 1.1540789604187012, |
|
"rewards/margins_std": 1.164055585861206, |
|
"rewards/rejected": -3.5177268981933594, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 13.6875, |
|
"learning_rate": 4.011554032283242e-07, |
|
"logits/chosen": -3.20314359664917, |
|
"logits/rejected": -2.951345682144165, |
|
"logps/chosen": -268.9664001464844, |
|
"logps/rejected": -507.90948486328125, |
|
"loss": 0.2691, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.6380701065063477, |
|
"rewards/margins": 2.148407459259033, |
|
"rewards/margins_max": 3.0667290687561035, |
|
"rewards/margins_min": 1.2300859689712524, |
|
"rewards/margins_std": 1.2987029552459717, |
|
"rewards/rejected": -3.7864773273468018, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 11.0, |
|
"learning_rate": 3.9690346416183314e-07, |
|
"logits/chosen": -3.1131813526153564, |
|
"logits/rejected": -2.9457013607025146, |
|
"logps/chosen": -289.3182373046875, |
|
"logps/rejected": -539.81298828125, |
|
"loss": 0.2696, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.7127681970596313, |
|
"rewards/margins": 2.1862919330596924, |
|
"rewards/margins_max": 3.286137104034424, |
|
"rewards/margins_min": 1.08644700050354, |
|
"rewards/margins_std": 1.5554157495498657, |
|
"rewards/rejected": -3.8990604877471924, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 13.125, |
|
"learning_rate": 3.9258560266058334e-07, |
|
"logits/chosen": -3.1740329265594482, |
|
"logits/rejected": -3.020383834838867, |
|
"logps/chosen": -305.628173828125, |
|
"logps/rejected": -612.4352416992188, |
|
"loss": 0.2626, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.030916690826416, |
|
"rewards/margins": 2.430964946746826, |
|
"rewards/margins_max": 3.6694366931915283, |
|
"rewards/margins_min": 1.1924933195114136, |
|
"rewards/margins_std": 1.7514636516571045, |
|
"rewards/rejected": -4.461881160736084, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 7.78125, |
|
"learning_rate": 3.882037563503806e-07, |
|
"logits/chosen": -3.1754307746887207, |
|
"logits/rejected": -2.973268985748291, |
|
"logps/chosen": -300.1325988769531, |
|
"logps/rejected": -587.9697265625, |
|
"loss": 0.2976, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.968949556350708, |
|
"rewards/margins": 2.2382559776306152, |
|
"rewards/margins_max": 3.457249402999878, |
|
"rewards/margins_min": 1.0192627906799316, |
|
"rewards/margins_std": 1.7239166498184204, |
|
"rewards/rejected": -4.207205295562744, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 10.0625, |
|
"learning_rate": 3.8375989156999803e-07, |
|
"logits/chosen": -3.1942696571350098, |
|
"logits/rejected": -3.031660556793213, |
|
"logps/chosen": -277.3270263671875, |
|
"logps/rejected": -628.7125244140625, |
|
"loss": 0.2381, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.681290864944458, |
|
"rewards/margins": 2.714406967163086, |
|
"rewards/margins_max": 3.8859188556671143, |
|
"rewards/margins_min": 1.5428953170776367, |
|
"rewards/margins_std": 1.6567678451538086, |
|
"rewards/rejected": -4.395698070526123, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 11.0, |
|
"learning_rate": 3.7925600248878865e-07, |
|
"logits/chosen": -3.0972537994384766, |
|
"logits/rejected": -2.915043592453003, |
|
"logps/chosen": -309.4454650878906, |
|
"logps/rejected": -582.4273071289062, |
|
"loss": 0.2835, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.8756641149520874, |
|
"rewards/margins": 2.2718684673309326, |
|
"rewards/margins_max": 3.4796149730682373, |
|
"rewards/margins_min": 1.064121961593628, |
|
"rewards/margins_std": 1.7080116271972656, |
|
"rewards/rejected": -4.1475324630737305, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 8.625, |
|
"learning_rate": 3.746941102118081e-07, |
|
"logits/chosen": -3.1687800884246826, |
|
"logits/rejected": -2.932328224182129, |
|
"logps/chosen": -321.97003173828125, |
|
"logps/rejected": -614.5501708984375, |
|
"loss": 0.2362, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.9460325241088867, |
|
"rewards/margins": 2.4259374141693115, |
|
"rewards/margins_max": 3.647425413131714, |
|
"rewards/margins_min": 1.2044496536254883, |
|
"rewards/margins_std": 1.7274446487426758, |
|
"rewards/rejected": -4.371970176696777, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 7.0625, |
|
"learning_rate": 3.700762618728508e-07, |
|
"logits/chosen": -3.105429172515869, |
|
"logits/rejected": -2.8866400718688965, |
|
"logps/chosen": -302.71575927734375, |
|
"logps/rejected": -675.2667236328125, |
|
"loss": 0.2554, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.801047682762146, |
|
"rewards/margins": 2.991947650909424, |
|
"rewards/margins_max": 4.486474990844727, |
|
"rewards/margins_min": 1.4974205493927002, |
|
"rewards/margins_std": 2.1135807037353516, |
|
"rewards/rejected": -4.792995452880859, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 3.828125, |
|
"learning_rate": 3.654045297158057e-07, |
|
"logits/chosen": -3.164304256439209, |
|
"logits/rejected": -2.9912238121032715, |
|
"logps/chosen": -284.3798828125, |
|
"logps/rejected": -547.4119873046875, |
|
"loss": 0.2154, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.7268387079238892, |
|
"rewards/margins": 2.4997963905334473, |
|
"rewards/margins_max": 3.5198917388916016, |
|
"rewards/margins_min": 1.4797013998031616, |
|
"rewards/margins_std": 1.4426321983337402, |
|
"rewards/rejected": -4.226634979248047, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 5.65625, |
|
"learning_rate": 3.606810101647431e-07, |
|
"logits/chosen": -3.19686222076416, |
|
"logits/rejected": -2.9458096027374268, |
|
"logps/chosen": -318.38238525390625, |
|
"logps/rejected": -583.8440551757812, |
|
"loss": 0.2514, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.952331304550171, |
|
"rewards/margins": 2.3931100368499756, |
|
"rewards/margins_max": 3.5836410522460938, |
|
"rewards/margins_min": 1.2025787830352783, |
|
"rewards/margins_std": 1.6836650371551514, |
|
"rewards/rejected": -4.3454413414001465, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 8.25, |
|
"learning_rate": 3.559078228831526e-07, |
|
"logits/chosen": -3.1194119453430176, |
|
"logits/rejected": -2.977457046508789, |
|
"logps/chosen": -285.14794921875, |
|
"logps/rejected": -596.258056640625, |
|
"loss": 0.2487, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.8175990581512451, |
|
"rewards/margins": 2.7383248805999756, |
|
"rewards/margins_max": 3.9793529510498047, |
|
"rewards/margins_min": 1.4972972869873047, |
|
"rewards/margins_std": 1.7550785541534424, |
|
"rewards/rejected": -4.555924415588379, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 8.625, |
|
"learning_rate": 3.510871098227503e-07, |
|
"logits/chosen": -3.2031445503234863, |
|
"logits/rejected": -2.9235167503356934, |
|
"logps/chosen": -335.8504638671875, |
|
"logps/rejected": -606.4368896484375, |
|
"loss": 0.2089, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.097665309906006, |
|
"rewards/margins": 2.564664125442505, |
|
"rewards/margins_max": 3.574242115020752, |
|
"rewards/margins_min": 1.5550854206085205, |
|
"rewards/margins_std": 1.4277592897415161, |
|
"rewards/rejected": -4.662329196929932, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 12.5625, |
|
"learning_rate": 3.462210342622853e-07, |
|
"logits/chosen": -3.1175758838653564, |
|
"logits/rejected": -2.900524616241455, |
|
"logps/chosen": -316.82696533203125, |
|
"logps/rejected": -691.4205932617188, |
|
"loss": 0.2222, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.02522349357605, |
|
"rewards/margins": 3.1580607891082764, |
|
"rewards/margins_max": 4.554937839508057, |
|
"rewards/margins_min": 1.761183500289917, |
|
"rewards/margins_std": 1.9754825830459595, |
|
"rewards/rejected": -5.183283805847168, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 3.4131177983677614e-07, |
|
"logits/chosen": -3.1615021228790283, |
|
"logits/rejected": -2.9676241874694824, |
|
"logps/chosen": -309.53033447265625, |
|
"logps/rejected": -637.4588623046875, |
|
"loss": 0.3259, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.038564920425415, |
|
"rewards/margins": 2.944957971572876, |
|
"rewards/margins_max": 4.9082746505737305, |
|
"rewards/margins_min": 0.9816409349441528, |
|
"rewards/margins_std": 2.7765493392944336, |
|
"rewards/rejected": -4.983522891998291, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 16.375, |
|
"learning_rate": 3.363615495576114e-07, |
|
"logits/chosen": -3.172344207763672, |
|
"logits/rejected": -2.932992935180664, |
|
"logps/chosen": -322.94537353515625, |
|
"logps/rejected": -613.6011962890625, |
|
"loss": 0.2776, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.8910267353057861, |
|
"rewards/margins": 2.67472243309021, |
|
"rewards/margins_max": 4.281624794006348, |
|
"rewards/margins_min": 1.0678198337554932, |
|
"rewards/margins_std": 2.272503614425659, |
|
"rewards/rejected": -4.565749168395996, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 6.71875, |
|
"learning_rate": 3.31372564823956e-07, |
|
"logits/chosen": -3.1593432426452637, |
|
"logits/rejected": -2.9213168621063232, |
|
"logps/chosen": -300.8254089355469, |
|
"logps/rejected": -554.28173828125, |
|
"loss": 0.2749, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.9807758331298828, |
|
"rewards/margins": 2.3717269897460938, |
|
"rewards/margins_max": 3.653285503387451, |
|
"rewards/margins_min": 1.0901682376861572, |
|
"rewards/margins_std": 1.8123977184295654, |
|
"rewards/rejected": -4.352502346038818, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 12.9375, |
|
"learning_rate": 3.2634706442590585e-07, |
|
"logits/chosen": -3.1167142391204834, |
|
"logits/rejected": -2.943542718887329, |
|
"logps/chosen": -320.70477294921875, |
|
"logps/rejected": -626.4170532226562, |
|
"loss": 0.2486, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.1256468296051025, |
|
"rewards/margins": 2.6652743816375732, |
|
"rewards/margins_max": 4.235686302185059, |
|
"rewards/margins_min": 1.0948628187179565, |
|
"rewards/margins_std": 2.220897674560547, |
|
"rewards/rejected": -4.790921211242676, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 6.46875, |
|
"learning_rate": 3.2128730353983824e-07, |
|
"logits/chosen": -3.12074875831604, |
|
"logits/rejected": -2.914388418197632, |
|
"logps/chosen": -299.7171936035156, |
|
"logps/rejected": -626.5369262695312, |
|
"loss": 0.2256, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.9603103399276733, |
|
"rewards/margins": 2.961648464202881, |
|
"rewards/margins_max": 4.249537467956543, |
|
"rewards/margins_min": 1.673760175704956, |
|
"rewards/margins_std": 1.8213493824005127, |
|
"rewards/rejected": -4.921958923339844, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 7.1875, |
|
"learning_rate": 3.161955527164092e-07, |
|
"logits/chosen": -3.1619656085968018, |
|
"logits/rejected": -2.9911611080169678, |
|
"logps/chosen": -314.0920104980469, |
|
"logps/rejected": -611.2723999023438, |
|
"loss": 0.2768, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.0593760013580322, |
|
"rewards/margins": 2.617433547973633, |
|
"rewards/margins_max": 3.8496804237365723, |
|
"rewards/margins_min": 1.3851864337921143, |
|
"rewards/margins_std": 1.7426605224609375, |
|
"rewards/rejected": -4.676808834075928, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 3.11074096861651e-07, |
|
"logits/chosen": -3.1253132820129395, |
|
"logits/rejected": -2.948439836502075, |
|
"logps/chosen": -307.06842041015625, |
|
"logps/rejected": -656.5684814453125, |
|
"loss": 0.2793, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.9304516315460205, |
|
"rewards/margins": 3.045482873916626, |
|
"rewards/margins_max": 4.538393974304199, |
|
"rewards/margins_min": 1.5525717735290527, |
|
"rewards/margins_std": 2.111295223236084, |
|
"rewards/rejected": -4.9759345054626465, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 7.9375, |
|
"learning_rate": 3.0592523421162923e-07, |
|
"logits/chosen": -3.14684796333313, |
|
"logits/rejected": -2.9417788982391357, |
|
"logps/chosen": -311.675048828125, |
|
"logps/rejected": -673.3569946289062, |
|
"loss": 0.2156, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.02833890914917, |
|
"rewards/margins": 3.267068386077881, |
|
"rewards/margins_max": 4.807781219482422, |
|
"rewards/margins_min": 1.7263562679290771, |
|
"rewards/margins_std": 2.178896427154541, |
|
"rewards/rejected": -5.295407295227051, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 6.0, |
|
"learning_rate": 3.0075127530111604e-07, |
|
"logits/chosen": -3.143428325653076, |
|
"logits/rejected": -2.8957009315490723, |
|
"logps/chosen": -306.6546936035156, |
|
"logps/rejected": -662.9694213867188, |
|
"loss": 0.1952, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.7990596294403076, |
|
"rewards/margins": 3.0265026092529297, |
|
"rewards/margins_max": 4.350580215454102, |
|
"rewards/margins_min": 1.7024250030517578, |
|
"rewards/margins_std": 1.8725284337997437, |
|
"rewards/rejected": -4.825562000274658, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 6.65625, |
|
"learning_rate": 2.9555454192674635e-07, |
|
"logits/chosen": -3.1340126991271973, |
|
"logits/rejected": -2.944532871246338, |
|
"logps/chosen": -296.8179016113281, |
|
"logps/rejected": -654.5142822265625, |
|
"loss": 0.205, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.8869205713272095, |
|
"rewards/margins": 3.0365943908691406, |
|
"rewards/margins_max": 4.509632110595703, |
|
"rewards/margins_min": 1.563556432723999, |
|
"rewards/margins_std": 2.0831899642944336, |
|
"rewards/rejected": -4.9235148429870605, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 5.1875, |
|
"learning_rate": 2.903373661051188e-07, |
|
"logits/chosen": -3.221536636352539, |
|
"logits/rejected": -3.0096938610076904, |
|
"logps/chosen": -318.65667724609375, |
|
"logps/rejected": -705.4375, |
|
"loss": 0.1578, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.9611215591430664, |
|
"rewards/margins": 3.1713013648986816, |
|
"rewards/margins_max": 4.483790874481201, |
|
"rewards/margins_min": 1.858811378479004, |
|
"rewards/margins_std": 1.8561407327651978, |
|
"rewards/rejected": -5.13242244720459, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 5.40625, |
|
"learning_rate": 2.851020890263113e-07, |
|
"logits/chosen": -3.156846046447754, |
|
"logits/rejected": -2.902345657348633, |
|
"logps/chosen": -333.4150085449219, |
|
"logps/rejected": -682.9464111328125, |
|
"loss": 0.2228, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.0641589164733887, |
|
"rewards/margins": 3.215531826019287, |
|
"rewards/margins_max": 4.6603498458862305, |
|
"rewards/margins_min": 1.7707140445709229, |
|
"rewards/margins_std": 2.043280839920044, |
|
"rewards/rejected": -5.279690742492676, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 8.75, |
|
"learning_rate": 2.798510600032803e-07, |
|
"logits/chosen": -3.1748039722442627, |
|
"logits/rejected": -2.9051835536956787, |
|
"logps/chosen": -336.9716796875, |
|
"logps/rejected": -674.93212890625, |
|
"loss": 0.2113, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.1552605628967285, |
|
"rewards/margins": 3.1116385459899902, |
|
"rewards/margins_max": 4.737573623657227, |
|
"rewards/margins_min": 1.4857032299041748, |
|
"rewards/margins_std": 2.29941987991333, |
|
"rewards/rejected": -5.266899108886719, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 2.745866354176137e-07, |
|
"logits/chosen": -3.108320713043213, |
|
"logits/rejected": -2.8731348514556885, |
|
"logps/chosen": -337.5401916503906, |
|
"logps/rejected": -751.5953369140625, |
|
"loss": 0.2018, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.291527509689331, |
|
"rewards/margins": 3.7230491638183594, |
|
"rewards/margins_max": 5.445635795593262, |
|
"rewards/margins_min": 2.000462770462036, |
|
"rewards/margins_std": 2.436105251312256, |
|
"rewards/rejected": -6.014577388763428, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 6.84375, |
|
"learning_rate": 2.693111776621136e-07, |
|
"logits/chosen": -3.124844789505005, |
|
"logits/rejected": -2.866428852081299, |
|
"logps/chosen": -368.060302734375, |
|
"logps/rejected": -777.5119018554688, |
|
"loss": 0.2207, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.525402069091797, |
|
"rewards/margins": 3.681495189666748, |
|
"rewards/margins_max": 5.426387786865234, |
|
"rewards/margins_min": 1.936603307723999, |
|
"rewards/margins_std": 2.4676499366760254, |
|
"rewards/rejected": -6.206897735595703, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 7.625, |
|
"learning_rate": 2.640270540806793e-07, |
|
"logits/chosen": -3.0661511421203613, |
|
"logits/rejected": -2.8748667240142822, |
|
"logps/chosen": -348.7222595214844, |
|
"logps/rejected": -695.6060791015625, |
|
"loss": 0.2213, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.3182373046875, |
|
"rewards/margins": 3.1713945865631104, |
|
"rewards/margins_max": 4.741438388824463, |
|
"rewards/margins_min": 1.6013505458831787, |
|
"rewards/margins_std": 2.2203774452209473, |
|
"rewards/rejected": -5.489631652832031, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 5.75, |
|
"learning_rate": 2.5873663590597063e-07, |
|
"logits/chosen": -3.138188362121582, |
|
"logits/rejected": -2.8532166481018066, |
|
"logps/chosen": -332.19305419921875, |
|
"logps/rejected": -696.7973022460938, |
|
"loss": 0.2109, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.0587775707244873, |
|
"rewards/margins": 3.378685474395752, |
|
"rewards/margins_max": 4.750607967376709, |
|
"rewards/margins_min": 2.0067625045776367, |
|
"rewards/margins_std": 1.9401918649673462, |
|
"rewards/rejected": -5.43746280670166, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 21.875, |
|
"learning_rate": 2.5344229719532484e-07, |
|
"logits/chosen": -3.1494667530059814, |
|
"logits/rejected": -2.9058408737182617, |
|
"logps/chosen": -326.4560241699219, |
|
"logps/rejected": -656.7224731445312, |
|
"loss": 0.2174, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.120781421661377, |
|
"rewards/margins": 3.120711088180542, |
|
"rewards/margins_max": 4.613424777984619, |
|
"rewards/margins_min": 1.627997636795044, |
|
"rewards/margins_std": 2.111015796661377, |
|
"rewards/rejected": -5.24149227142334, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 12.0, |
|
"learning_rate": 2.481464137654068e-07, |
|
"logits/chosen": -3.1502187252044678, |
|
"logits/rejected": -2.901831865310669, |
|
"logps/chosen": -336.9727783203125, |
|
"logps/rejected": -758.7743530273438, |
|
"loss": 0.2516, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.269904375076294, |
|
"rewards/margins": 3.7340915203094482, |
|
"rewards/margins_max": 5.41564416885376, |
|
"rewards/margins_min": 2.052539110183716, |
|
"rewards/margins_std": 2.3780744075775146, |
|
"rewards/rejected": -6.003995895385742, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 8.875, |
|
"learning_rate": 2.428513621260683e-07, |
|
"logits/chosen": -3.167316436767578, |
|
"logits/rejected": -2.951566457748413, |
|
"logps/chosen": -350.9910583496094, |
|
"logps/rejected": -678.45703125, |
|
"loss": 0.2035, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.3870468139648438, |
|
"rewards/margins": 3.059993267059326, |
|
"rewards/margins_max": 4.496026039123535, |
|
"rewards/margins_min": 1.6239604949951172, |
|
"rewards/margins_std": 2.0308570861816406, |
|
"rewards/rejected": -5.44704008102417, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 15.5625, |
|
"learning_rate": 2.375595184138986e-07, |
|
"logits/chosen": -3.1135799884796143, |
|
"logits/rejected": -2.903838872909546, |
|
"logps/chosen": -323.6688232421875, |
|
"logps/rejected": -727.5667114257812, |
|
"loss": 0.265, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.1313717365264893, |
|
"rewards/margins": 3.3087127208709717, |
|
"rewards/margins_max": 4.953644752502441, |
|
"rewards/margins_min": 1.6637804508209229, |
|
"rewards/margins_std": 2.3262856006622314, |
|
"rewards/rejected": -5.440084934234619, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 17.375, |
|
"learning_rate": 2.3227325732593993e-07, |
|
"logits/chosen": -3.1387646198272705, |
|
"logits/rejected": -2.882930278778076, |
|
"logps/chosen": -320.67132568359375, |
|
"logps/rejected": -736.220458984375, |
|
"loss": 0.1934, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.9799764156341553, |
|
"rewards/margins": 3.8522841930389404, |
|
"rewards/margins_max": 5.505575180053711, |
|
"rewards/margins_min": 2.198992967605591, |
|
"rewards/margins_std": 2.338106870651245, |
|
"rewards/rejected": -5.832260608673096, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 13.1875, |
|
"learning_rate": 2.2699495105405114e-07, |
|
"logits/chosen": -3.074521541595459, |
|
"logits/rejected": -2.8952858448028564, |
|
"logps/chosen": -324.089111328125, |
|
"logps/rejected": -743.9403076171875, |
|
"loss": 0.2139, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.197805881500244, |
|
"rewards/margins": 3.5057437419891357, |
|
"rewards/margins_max": 5.082973957061768, |
|
"rewards/margins_min": 1.928513526916504, |
|
"rewards/margins_std": 2.2305400371551514, |
|
"rewards/rejected": -5.703549385070801, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 13.5, |
|
"learning_rate": 2.217269682203937e-07, |
|
"logits/chosen": -3.0950160026550293, |
|
"logits/rejected": -2.8411691188812256, |
|
"logps/chosen": -309.6170959472656, |
|
"logps/rejected": -720.2401123046875, |
|
"loss": 0.23, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.961046814918518, |
|
"rewards/margins": 3.8151397705078125, |
|
"rewards/margins_max": 5.517041206359863, |
|
"rewards/margins_min": 2.113239049911499, |
|
"rewards/margins_std": 2.406851291656494, |
|
"rewards/rejected": -5.776186943054199, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 2.71875, |
|
"learning_rate": 2.164716728145213e-07, |
|
"logits/chosen": -3.1319289207458496, |
|
"logits/rejected": -2.9833462238311768, |
|
"logps/chosen": -353.3096618652344, |
|
"logps/rejected": -787.9588623046875, |
|
"loss": 0.1898, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.2338433265686035, |
|
"rewards/margins": 3.6919732093811035, |
|
"rewards/margins_max": 5.161503791809082, |
|
"rewards/margins_min": 2.222442626953125, |
|
"rewards/margins_std": 2.078230381011963, |
|
"rewards/rejected": -5.925817012786865, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 7.09375, |
|
"learning_rate": 2.1123142313254704e-07, |
|
"logits/chosen": -3.119903087615967, |
|
"logits/rejected": -2.9215176105499268, |
|
"logps/chosen": -327.38165283203125, |
|
"logps/rejected": -698.1976318359375, |
|
"loss": 0.2065, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.0507330894470215, |
|
"rewards/margins": 3.221263885498047, |
|
"rewards/margins_max": 4.741235733032227, |
|
"rewards/margins_min": 1.7012920379638672, |
|
"rewards/margins_std": 2.1495652198791504, |
|
"rewards/rejected": -5.271997451782227, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 2.0600857071886596e-07, |
|
"logits/chosen": -3.111619234085083, |
|
"logits/rejected": -2.886859655380249, |
|
"logps/chosen": -348.9073181152344, |
|
"logps/rejected": -710.0587768554688, |
|
"loss": 0.2172, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.1807503700256348, |
|
"rewards/margins": 3.2503695487976074, |
|
"rewards/margins_max": 4.767851829528809, |
|
"rewards/margins_min": 1.7328875064849854, |
|
"rewards/margins_std": 2.1460437774658203, |
|
"rewards/rejected": -5.431119441986084, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 9.625, |
|
"learning_rate": 2.0080545931090784e-07, |
|
"logits/chosen": -3.1535375118255615, |
|
"logits/rejected": -2.965236186981201, |
|
"logps/chosen": -344.1661071777344, |
|
"logps/rejected": -811.3351440429688, |
|
"loss": 0.2185, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.2291502952575684, |
|
"rewards/margins": 4.049864768981934, |
|
"rewards/margins_max": 5.990394592285156, |
|
"rewards/margins_min": 2.1093358993530273, |
|
"rewards/margins_std": 2.744323253631592, |
|
"rewards/rejected": -6.27901554107666, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 17.375, |
|
"learning_rate": 1.9562442378739238e-07, |
|
"logits/chosen": -3.125776767730713, |
|
"logits/rejected": -2.9081075191497803, |
|
"logps/chosen": -299.2303161621094, |
|
"logps/rejected": -708.6509399414062, |
|
"loss": 0.2322, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.9368846416473389, |
|
"rewards/margins": 3.5816776752471924, |
|
"rewards/margins_max": 5.060166835784912, |
|
"rewards/margins_min": 2.1031877994537354, |
|
"rewards/margins_std": 2.09089994430542, |
|
"rewards/rejected": -5.518561840057373, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 5.96875, |
|
"learning_rate": 1.9046778912056043e-07, |
|
"logits/chosen": -3.1317784786224365, |
|
"logits/rejected": -2.9367737770080566, |
|
"logps/chosen": -297.7860412597656, |
|
"logps/rejected": -669.5913696289062, |
|
"loss": 0.2341, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.9930238723754883, |
|
"rewards/margins": 3.1364190578460693, |
|
"rewards/margins_max": 4.621811866760254, |
|
"rewards/margins_min": 1.6510257720947266, |
|
"rewards/margins_std": 2.10066294670105, |
|
"rewards/rejected": -5.129443168640137, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 10.0625, |
|
"learning_rate": 1.8533786933285106e-07, |
|
"logits/chosen": -3.1418776512145996, |
|
"logits/rejected": -2.9141292572021484, |
|
"logps/chosen": -344.0506286621094, |
|
"logps/rejected": -775.5567626953125, |
|
"loss": 0.2484, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.0711874961853027, |
|
"rewards/margins": 3.7657127380371094, |
|
"rewards/margins_max": 5.59161901473999, |
|
"rewards/margins_min": 1.9398069381713867, |
|
"rewards/margins_std": 2.582221031188965, |
|
"rewards/rejected": -5.836900234222412, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 13.1875, |
|
"learning_rate": 1.8023696645849063e-07, |
|
"logits/chosen": -3.1590495109558105, |
|
"logits/rejected": -2.9567525386810303, |
|
"logps/chosen": -320.74176025390625, |
|
"logps/rejected": -711.3518676757812, |
|
"loss": 0.1803, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.144990921020508, |
|
"rewards/margins": 3.530397891998291, |
|
"rewards/margins_max": 5.047208309173584, |
|
"rewards/margins_min": 2.0135867595672607, |
|
"rewards/margins_std": 2.145094394683838, |
|
"rewards/rejected": -5.675388336181641, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 28.25, |
|
"learning_rate": 1.7516736951046394e-07, |
|
"logits/chosen": -3.1330792903900146, |
|
"logits/rejected": -2.947277545928955, |
|
"logps/chosen": -344.958251953125, |
|
"logps/rejected": -704.8740234375, |
|
"loss": 0.2298, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.3996665477752686, |
|
"rewards/margins": 3.01820707321167, |
|
"rewards/margins_max": 4.695876598358154, |
|
"rewards/margins_min": 1.3405380249023438, |
|
"rewards/margins_std": 2.3725826740264893, |
|
"rewards/rejected": -5.417874336242676, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 17.125, |
|
"learning_rate": 1.7013135345332651e-07, |
|
"logits/chosen": -3.1549530029296875, |
|
"logits/rejected": -2.8647735118865967, |
|
"logps/chosen": -316.10333251953125, |
|
"logps/rejected": -794.4426879882812, |
|
"loss": 0.2395, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.046635627746582, |
|
"rewards/margins": 3.969254970550537, |
|
"rewards/margins_max": 5.835241794586182, |
|
"rewards/margins_min": 2.103269577026367, |
|
"rewards/margins_std": 2.6389026641845703, |
|
"rewards/rejected": -6.0158915519714355, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 6.25, |
|
"learning_rate": 1.6513117818232216e-07, |
|
"logits/chosen": -3.1065542697906494, |
|
"logits/rejected": -2.925407648086548, |
|
"logps/chosen": -311.7358093261719, |
|
"logps/rejected": -697.4700927734375, |
|
"loss": 0.2107, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.1502268314361572, |
|
"rewards/margins": 3.424119234085083, |
|
"rewards/margins_max": 5.143943786621094, |
|
"rewards/margins_min": 1.7042953968048096, |
|
"rewards/margins_std": 2.4321987628936768, |
|
"rewards/rejected": -5.574346542358398, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 7.1875, |
|
"learning_rate": 1.6016908750926284e-07, |
|
"logits/chosen": -3.199125289916992, |
|
"logits/rejected": -2.9049952030181885, |
|
"logps/chosen": -323.9697265625, |
|
"logps/rejected": -733.3807373046875, |
|
"loss": 0.2562, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.1301119327545166, |
|
"rewards/margins": 3.5133304595947266, |
|
"rewards/margins_max": 5.211588382720947, |
|
"rewards/margins_min": 1.8150726556777954, |
|
"rewards/margins_std": 2.4016995429992676, |
|
"rewards/rejected": -5.643442630767822, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 2.03125, |
|
"learning_rate": 1.5524730815562517e-07, |
|
"logits/chosen": -3.093618631362915, |
|
"logits/rejected": -2.912240743637085, |
|
"logps/chosen": -318.77142333984375, |
|
"logps/rejected": -740.55126953125, |
|
"loss": 0.1697, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.103801727294922, |
|
"rewards/margins": 3.8205482959747314, |
|
"rewards/margins_max": 5.348752975463867, |
|
"rewards/margins_min": 2.2923431396484375, |
|
"rewards/margins_std": 2.161208152770996, |
|
"rewards/rejected": -5.924350261688232, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 3.8125, |
|
"learning_rate": 1.5036804875331733e-07, |
|
"logits/chosen": -3.138913869857788, |
|
"logits/rejected": -2.938734292984009, |
|
"logps/chosen": -347.51422119140625, |
|
"logps/rejected": -744.4155883789062, |
|
"loss": 0.2236, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.3483636379241943, |
|
"rewards/margins": 3.551708936691284, |
|
"rewards/margins_max": 5.2547502517700195, |
|
"rewards/margins_min": 1.8486677408218384, |
|
"rewards/margins_std": 2.408463954925537, |
|
"rewards/rejected": -5.900073051452637, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 7.65625, |
|
"learning_rate": 1.455334988535621e-07, |
|
"logits/chosen": -3.1878058910369873, |
|
"logits/rejected": -2.8721015453338623, |
|
"logps/chosen": -320.9921875, |
|
"logps/rejected": -697.1282958984375, |
|
"loss": 0.2256, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.09895658493042, |
|
"rewards/margins": 3.339977264404297, |
|
"rewards/margins_max": 4.8780035972595215, |
|
"rewards/margins_min": 1.8019511699676514, |
|
"rewards/margins_std": 2.1750974655151367, |
|
"rewards/rejected": -5.438933849334717, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 1.4074582794434387e-07, |
|
"logits/chosen": -3.1153368949890137, |
|
"logits/rejected": -2.913144826889038, |
|
"logps/chosen": -312.5743103027344, |
|
"logps/rejected": -745.4815673828125, |
|
"loss": 0.1932, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.063194751739502, |
|
"rewards/margins": 3.72094988822937, |
|
"rewards/margins_max": 5.494509696960449, |
|
"rewards/margins_min": 1.9473907947540283, |
|
"rewards/margins_std": 2.5081920623779297, |
|
"rewards/rejected": -5.784144878387451, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 10.0, |
|
"learning_rate": 1.36007184476858e-07, |
|
"logits/chosen": -3.132167339324951, |
|
"logits/rejected": -2.922752857208252, |
|
"logps/chosen": -326.2919921875, |
|
"logps/rejected": -773.3209228515625, |
|
"loss": 0.1876, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.1564865112304688, |
|
"rewards/margins": 3.9871773719787598, |
|
"rewards/margins_max": 6.1475114822387695, |
|
"rewards/margins_min": 1.8268429040908813, |
|
"rewards/margins_std": 3.0551745891571045, |
|
"rewards/rejected": -6.143664360046387, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 17.5, |
|
"learning_rate": 1.313196949014001e-07, |
|
"logits/chosen": -3.1706137657165527, |
|
"logits/rejected": -2.879790782928467, |
|
"logps/chosen": -335.986328125, |
|
"logps/rejected": -716.2806396484375, |
|
"loss": 0.251, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.1077635288238525, |
|
"rewards/margins": 3.5597450733184814, |
|
"rewards/margins_max": 5.525472164154053, |
|
"rewards/margins_min": 1.5940181016921997, |
|
"rewards/margins_std": 2.7799577713012695, |
|
"rewards/rejected": -5.667508125305176, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 22.0, |
|
"learning_rate": 1.266854627131295e-07, |
|
"logits/chosen": -3.135575771331787, |
|
"logits/rejected": -2.9847023487091064, |
|
"logps/chosen": -302.3170471191406, |
|
"logps/rejected": -696.525390625, |
|
"loss": 0.2497, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.0112688541412354, |
|
"rewards/margins": 3.4218602180480957, |
|
"rewards/margins_max": 4.963052749633789, |
|
"rewards/margins_min": 1.8806670904159546, |
|
"rewards/margins_std": 2.1795761585235596, |
|
"rewards/rejected": -5.433128833770752, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 7.84375, |
|
"learning_rate": 1.2210656750813203e-07, |
|
"logits/chosen": -3.0703582763671875, |
|
"logits/rejected": -2.871122121810913, |
|
"logps/chosen": -362.7223205566406, |
|
"logps/rejected": -770.84912109375, |
|
"loss": 0.2566, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.4078450202941895, |
|
"rewards/margins": 3.343618392944336, |
|
"rewards/margins_max": 5.4634623527526855, |
|
"rewards/margins_min": 1.2237741947174072, |
|
"rewards/margins_std": 2.9979124069213867, |
|
"rewards/rejected": -5.751462936401367, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 8.0, |
|
"learning_rate": 1.1758506405020885e-07, |
|
"logits/chosen": -3.186342716217041, |
|
"logits/rejected": -2.8949544429779053, |
|
"logps/chosen": -336.4505920410156, |
|
"logps/rejected": -697.22412109375, |
|
"loss": 0.1831, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.248119592666626, |
|
"rewards/margins": 3.425248384475708, |
|
"rewards/margins_max": 5.014912128448486, |
|
"rewards/margins_min": 1.8355858325958252, |
|
"rewards/margins_std": 2.2481231689453125, |
|
"rewards/rejected": -5.673368453979492, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 7.78125, |
|
"learning_rate": 1.1312298134880799e-07, |
|
"logits/chosen": -3.218524217605591, |
|
"logits/rejected": -2.952272891998291, |
|
"logps/chosen": -343.3241882324219, |
|
"logps/rejected": -660.6552734375, |
|
"loss": 0.2477, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.2785308361053467, |
|
"rewards/margins": 2.914747714996338, |
|
"rewards/margins_max": 4.317625999450684, |
|
"rewards/margins_min": 1.511869192123413, |
|
"rewards/margins_std": 1.983970046043396, |
|
"rewards/rejected": -5.193279266357422, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 5.40625, |
|
"learning_rate": 1.0872232174851281e-07, |
|
"logits/chosen": -3.1693577766418457, |
|
"logits/rejected": -2.916531562805176, |
|
"logps/chosen": -357.85003662109375, |
|
"logps/rejected": -758.5368041992188, |
|
"loss": 0.2052, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.358463764190674, |
|
"rewards/margins": 3.578223705291748, |
|
"rewards/margins_max": 5.278543472290039, |
|
"rewards/margins_min": 1.8779041767120361, |
|
"rewards/margins_std": 2.4046151638031006, |
|
"rewards/rejected": -5.936688423156738, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 4.6875, |
|
"learning_rate": 1.0438506003049735e-07, |
|
"logits/chosen": -3.1248936653137207, |
|
"logits/rejected": -2.8823959827423096, |
|
"logps/chosen": -327.49066162109375, |
|
"logps/rejected": -708.4923095703125, |
|
"loss": 0.2082, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.1310365200042725, |
|
"rewards/margins": 3.5814547538757324, |
|
"rewards/margins_max": 5.441410064697266, |
|
"rewards/margins_min": 1.7214996814727783, |
|
"rewards/margins_std": 2.630373954772949, |
|
"rewards/rejected": -5.712491035461426, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 17.125, |
|
"learning_rate": 1.0011314252634908e-07, |
|
"logits/chosen": -3.1236038208007812, |
|
"logits/rejected": -2.9256346225738525, |
|
"logps/chosen": -326.3320617675781, |
|
"logps/rejected": -626.0516357421875, |
|
"loss": 0.2082, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.1023125648498535, |
|
"rewards/margins": 2.722761631011963, |
|
"rewards/margins_max": 3.6524386405944824, |
|
"rewards/margins_min": 1.793083906173706, |
|
"rewards/margins_std": 1.3147621154785156, |
|
"rewards/rejected": -4.825074195861816, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 8.25, |
|
"learning_rate": 9.590848624465989e-08, |
|
"logits/chosen": -3.152843475341797, |
|
"logits/rejected": -2.9420909881591797, |
|
"logps/chosen": -330.8764343261719, |
|
"logps/rejected": -701.1426391601562, |
|
"loss": 0.2014, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.0373191833496094, |
|
"rewards/margins": 3.338742733001709, |
|
"rewards/margins_max": 4.737041473388672, |
|
"rewards/margins_min": 1.9404443502426147, |
|
"rewards/margins_std": 1.9774929285049438, |
|
"rewards/rejected": -5.376061916351318, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 6.78125, |
|
"learning_rate": 9.17729780107746e-08, |
|
"logits/chosen": -3.130746603012085, |
|
"logits/rejected": -2.9680044651031494, |
|
"logps/chosen": -304.0748596191406, |
|
"logps/rejected": -809.8206787109375, |
|
"loss": 0.1795, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.9301007986068726, |
|
"rewards/margins": 4.500051021575928, |
|
"rewards/margins_max": 6.72606897354126, |
|
"rewards/margins_min": 2.274033308029175, |
|
"rewards/margins_std": 3.1480648517608643, |
|
"rewards/rejected": -6.430152893066406, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 8.770847362008426e-08, |
|
"logits/chosen": -3.1518819332122803, |
|
"logits/rejected": -2.929631233215332, |
|
"logps/chosen": -314.208251953125, |
|
"logps/rejected": -736.5687255859375, |
|
"loss": 0.1798, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.0510923862457275, |
|
"rewards/margins": 3.5633749961853027, |
|
"rewards/margins_max": 5.141615867614746, |
|
"rewards/margins_min": 1.9851341247558594, |
|
"rewards/margins_std": 2.2319698333740234, |
|
"rewards/rejected": -5.614466667175293, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 12.0625, |
|
"learning_rate": 8.371679700524476e-08, |
|
"logits/chosen": -3.0984597206115723, |
|
"logits/rejected": -2.8767600059509277, |
|
"logps/chosen": -379.36767578125, |
|
"logps/rejected": -761.9058837890625, |
|
"loss": 0.3184, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.6515004634857178, |
|
"rewards/margins": 3.444476366043091, |
|
"rewards/margins_max": 5.3620100021362305, |
|
"rewards/margins_min": 1.526942491531372, |
|
"rewards/margins_std": 2.7118022441864014, |
|
"rewards/rejected": -6.095976829528809, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 8.5, |
|
"learning_rate": 7.979973941769255e-08, |
|
"logits/chosen": -3.072702646255493, |
|
"logits/rejected": -2.9324452877044678, |
|
"logps/chosen": -315.5204162597656, |
|
"logps/rejected": -710.9278564453125, |
|
"loss": 0.1895, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.1958985328674316, |
|
"rewards/margins": 3.221574068069458, |
|
"rewards/margins_max": 4.918590068817139, |
|
"rewards/margins_min": 1.5245568752288818, |
|
"rewards/margins_std": 2.399944305419922, |
|
"rewards/rejected": -5.417471885681152, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 8.9375, |
|
"learning_rate": 7.595905862382704e-08, |
|
"logits/chosen": -3.1583075523376465, |
|
"logits/rejected": -2.929879665374756, |
|
"logps/chosen": -333.9653015136719, |
|
"logps/rejected": -782.7625732421875, |
|
"loss": 0.1715, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.2348623275756836, |
|
"rewards/margins": 3.9564566612243652, |
|
"rewards/margins_max": 5.616944313049316, |
|
"rewards/margins_min": 2.295968532562256, |
|
"rewards/margins_std": 2.3482847213745117, |
|
"rewards/rejected": -6.191318988800049, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 8.5, |
|
"learning_rate": 7.219647811621874e-08, |
|
"logits/chosen": -3.1165127754211426, |
|
"logits/rejected": -3.0133025646209717, |
|
"logps/chosen": -289.7486877441406, |
|
"logps/rejected": -685.6094970703125, |
|
"loss": 0.2154, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.0258007049560547, |
|
"rewards/margins": 3.255826234817505, |
|
"rewards/margins_max": 4.866274833679199, |
|
"rewards/margins_min": 1.6453778743743896, |
|
"rewards/margins_std": 2.277517795562744, |
|
"rewards/rejected": -5.2816267013549805, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 5.8125, |
|
"learning_rate": 6.851368634019777e-08, |
|
"logits/chosen": -3.133932590484619, |
|
"logits/rejected": -2.8580222129821777, |
|
"logps/chosen": -356.24627685546875, |
|
"logps/rejected": -722.9967041015625, |
|
"loss": 0.2697, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.307548999786377, |
|
"rewards/margins": 3.4757111072540283, |
|
"rewards/margins_max": 5.241189479827881, |
|
"rewards/margins_min": 1.7102329730987549, |
|
"rewards/margins_std": 2.4967634677886963, |
|
"rewards/rejected": -5.783260822296143, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 6.491233593616971e-08, |
|
"logits/chosen": -3.173882484436035, |
|
"logits/rejected": -2.9467577934265137, |
|
"logps/chosen": -351.42913818359375, |
|
"logps/rejected": -698.7190551757812, |
|
"loss": 0.2373, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.2619590759277344, |
|
"rewards/margins": 3.2130227088928223, |
|
"rewards/margins_max": 4.786438941955566, |
|
"rewards/margins_min": 1.6396061182022095, |
|
"rewards/margins_std": 2.225146770477295, |
|
"rewards/rejected": -5.474982261657715, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 5.59375, |
|
"learning_rate": 6.139404299799863e-08, |
|
"logits/chosen": -3.14751935005188, |
|
"logits/rejected": -2.9100093841552734, |
|
"logps/chosen": -293.3255920410156, |
|
"logps/rejected": -721.6419677734375, |
|
"loss": 0.2033, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.9109737873077393, |
|
"rewards/margins": 4.052936553955078, |
|
"rewards/margins_max": 6.207828044891357, |
|
"rewards/margins_min": 1.8980449438095093, |
|
"rewards/margins_std": 3.0474772453308105, |
|
"rewards/rejected": -5.963910102844238, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 6.5625, |
|
"learning_rate": 5.796038634779057e-08, |
|
"logits/chosen": -3.1226589679718018, |
|
"logits/rejected": -2.879516363143921, |
|
"logps/chosen": -336.1898498535156, |
|
"logps/rejected": -733.499267578125, |
|
"loss": 0.2087, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.25373911857605, |
|
"rewards/margins": 3.4867053031921387, |
|
"rewards/margins_max": 5.092923164367676, |
|
"rewards/margins_min": 1.8804876804351807, |
|
"rewards/margins_std": 2.2715346813201904, |
|
"rewards/rejected": -5.740444660186768, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 9.875, |
|
"learning_rate": 5.4612906827402466e-08, |
|
"logits/chosen": -3.1812329292297363, |
|
"logits/rejected": -2.9600119590759277, |
|
"logps/chosen": -335.65435791015625, |
|
"logps/rejected": -723.2822265625, |
|
"loss": 0.2063, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -2.1229610443115234, |
|
"rewards/margins": 3.5574288368225098, |
|
"rewards/margins_max": 4.972989082336426, |
|
"rewards/margins_min": 2.141868829727173, |
|
"rewards/margins_std": 2.001904249191284, |
|
"rewards/rejected": -5.680389404296875, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 15.75, |
|
"learning_rate": 5.1353106606994514e-08, |
|
"logits/chosen": -3.166288375854492, |
|
"logits/rejected": -2.9235687255859375, |
|
"logps/chosen": -365.612060546875, |
|
"logps/rejected": -702.6682739257812, |
|
"loss": 0.2482, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.405470371246338, |
|
"rewards/margins": 3.1533291339874268, |
|
"rewards/margins_max": 5.086661338806152, |
|
"rewards/margins_min": 1.2199971675872803, |
|
"rewards/margins_std": 2.7341442108154297, |
|
"rewards/rejected": -5.558799743652344, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 7.75, |
|
"learning_rate": 4.818244851093642e-08, |
|
"logits/chosen": -3.1529836654663086, |
|
"logits/rejected": -2.902099609375, |
|
"logps/chosen": -335.6700439453125, |
|
"logps/rejected": -789.0332641601562, |
|
"loss": 0.183, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.0865893363952637, |
|
"rewards/margins": 3.7543671131134033, |
|
"rewards/margins_max": 5.634464740753174, |
|
"rewards/margins_min": 1.874269723892212, |
|
"rewards/margins_std": 2.6588597297668457, |
|
"rewards/rejected": -5.840956211090088, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 7.1875, |
|
"learning_rate": 4.5102355361369607e-08, |
|
"logits/chosen": -3.128056764602661, |
|
"logits/rejected": -2.8609917163848877, |
|
"logps/chosen": -304.3298034667969, |
|
"logps/rejected": -660.6272583007812, |
|
"loss": 0.1713, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.9923508167266846, |
|
"rewards/margins": 3.305687665939331, |
|
"rewards/margins_max": 4.834566593170166, |
|
"rewards/margins_min": 1.7768090963363647, |
|
"rewards/margins_std": 2.162160873413086, |
|
"rewards/rejected": -5.298038959503174, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 14.0, |
|
"learning_rate": 4.21142093397209e-08, |
|
"logits/chosen": -3.1291439533233643, |
|
"logits/rejected": -2.9123919010162354, |
|
"logps/chosen": -331.235107421875, |
|
"logps/rejected": -673.1749877929688, |
|
"loss": 0.2041, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.2359235286712646, |
|
"rewards/margins": 3.1155121326446533, |
|
"rewards/margins_max": 4.511401176452637, |
|
"rewards/margins_min": 1.7196223735809326, |
|
"rewards/margins_std": 1.974085807800293, |
|
"rewards/rejected": -5.351435661315918, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 5.25, |
|
"learning_rate": 3.921935136645327e-08, |
|
"logits/chosen": -3.1134796142578125, |
|
"logits/rejected": -2.8933892250061035, |
|
"logps/chosen": -319.20086669921875, |
|
"logps/rejected": -797.2918701171875, |
|
"loss": 0.1232, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -2.06329607963562, |
|
"rewards/margins": 4.198972225189209, |
|
"rewards/margins_max": 6.017431259155273, |
|
"rewards/margins_min": 2.3805129528045654, |
|
"rewards/margins_std": 2.5716898441314697, |
|
"rewards/rejected": -6.26226806640625, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 4.4375, |
|
"learning_rate": 3.6419080499331986e-08, |
|
"logits/chosen": -3.1291165351867676, |
|
"logits/rejected": -2.9171788692474365, |
|
"logps/chosen": -314.25543212890625, |
|
"logps/rejected": -668.8634643554688, |
|
"loss": 0.2378, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.037966251373291, |
|
"rewards/margins": 3.3993752002716064, |
|
"rewards/margins_max": 4.708580017089844, |
|
"rewards/margins_min": 2.090170383453369, |
|
"rewards/margins_std": 1.8514951467514038, |
|
"rewards/rejected": -5.437341213226318, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 3.8125, |
|
"learning_rate": 3.371465335047713e-08, |
|
"logits/chosen": -3.1593098640441895, |
|
"logits/rejected": -2.941859722137451, |
|
"logps/chosen": -320.30194091796875, |
|
"logps/rejected": -844.5147705078125, |
|
"loss": 0.1994, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.0999464988708496, |
|
"rewards/margins": 4.604376316070557, |
|
"rewards/margins_max": 6.516266822814941, |
|
"rewards/margins_min": 2.6924843788146973, |
|
"rewards/margins_std": 2.7038230895996094, |
|
"rewards/rejected": -6.70432186126709, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 8.1875, |
|
"learning_rate": 3.110728352246311e-08, |
|
"logits/chosen": -3.1501519680023193, |
|
"logits/rejected": -2.896111011505127, |
|
"logps/chosen": -315.76397705078125, |
|
"logps/rejected": -660.5819702148438, |
|
"loss": 0.2063, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.091414213180542, |
|
"rewards/margins": 3.229884624481201, |
|
"rewards/margins_max": 4.599147796630859, |
|
"rewards/margins_min": 1.8606210947036743, |
|
"rewards/margins_std": 1.9364306926727295, |
|
"rewards/rejected": -5.321299076080322, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 4.0, |
|
"learning_rate": 2.8598141063718217e-08, |
|
"logits/chosen": -3.1871862411499023, |
|
"logits/rejected": -2.935353994369507, |
|
"logps/chosen": -326.63946533203125, |
|
"logps/rejected": -735.0399780273438, |
|
"loss": 0.1791, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.211333990097046, |
|
"rewards/margins": 3.781127452850342, |
|
"rewards/margins_max": 5.652678489685059, |
|
"rewards/margins_min": 1.9095767736434937, |
|
"rewards/margins_std": 2.646772861480713, |
|
"rewards/rejected": -5.992461681365967, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 7.8125, |
|
"learning_rate": 2.6188351943469966e-08, |
|
"logits/chosen": -3.1684048175811768, |
|
"logits/rejected": -2.9018070697784424, |
|
"logps/chosen": -379.37646484375, |
|
"logps/rejected": -710.8772583007812, |
|
"loss": 0.2164, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.436944007873535, |
|
"rewards/margins": 3.187828540802002, |
|
"rewards/margins_max": 4.914183139801025, |
|
"rewards/margins_min": 1.461474061012268, |
|
"rewards/margins_std": 2.4414334297180176, |
|
"rewards/rejected": -5.624772071838379, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 3.984375, |
|
"learning_rate": 2.3878997546469577e-08, |
|
"logits/chosen": -3.173720121383667, |
|
"logits/rejected": -2.9183566570281982, |
|
"logps/chosen": -352.3770446777344, |
|
"logps/rejected": -733.684814453125, |
|
"loss": 0.1905, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.2058091163635254, |
|
"rewards/margins": 3.5168144702911377, |
|
"rewards/margins_max": 4.97069787979126, |
|
"rewards/margins_min": 2.062930107116699, |
|
"rewards/margins_std": 2.056102752685547, |
|
"rewards/rejected": -5.722623348236084, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 2.1671114187724603e-08, |
|
"logits/chosen": -3.183567523956299, |
|
"logits/rejected": -2.9641623497009277, |
|
"logps/chosen": -312.87066650390625, |
|
"logps/rejected": -769.3661499023438, |
|
"loss": 0.2043, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.0123302936553955, |
|
"rewards/margins": 4.006140232086182, |
|
"rewards/margins_max": 5.548596382141113, |
|
"rewards/margins_min": 2.4636826515197754, |
|
"rewards/margins_std": 2.181363582611084, |
|
"rewards/rejected": -6.018470287322998, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 6.65625, |
|
"learning_rate": 1.9565692647456e-08, |
|
"logits/chosen": -3.114928722381592, |
|
"logits/rejected": -2.891458749771118, |
|
"logps/chosen": -323.8619384765625, |
|
"logps/rejected": -699.9288940429688, |
|
"loss": 0.2067, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.1312808990478516, |
|
"rewards/margins": 3.579425096511841, |
|
"rewards/margins_max": 5.033545017242432, |
|
"rewards/margins_min": 2.125305414199829, |
|
"rewards/margins_std": 2.0564355850219727, |
|
"rewards/rejected": -5.710705757141113, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 21.625, |
|
"learning_rate": 1.7563677726488645e-08, |
|
"logits/chosen": -3.192821502685547, |
|
"logits/rejected": -2.9360315799713135, |
|
"logps/chosen": -325.11724853515625, |
|
"logps/rejected": -732.0574340820312, |
|
"loss": 0.1636, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.197850465774536, |
|
"rewards/margins": 3.762909412384033, |
|
"rewards/margins_max": 5.297985553741455, |
|
"rewards/margins_min": 2.2278337478637695, |
|
"rewards/margins_std": 2.170924663543701, |
|
"rewards/rejected": -5.96075963973999, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 16.625, |
|
"learning_rate": 1.5665967822275417e-08, |
|
"logits/chosen": -3.124788999557495, |
|
"logits/rejected": -2.9426681995391846, |
|
"logps/chosen": -326.2402648925781, |
|
"logps/rejected": -790.9951782226562, |
|
"loss": 0.2929, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.1549830436706543, |
|
"rewards/margins": 3.9306769371032715, |
|
"rewards/margins_max": 5.507823944091797, |
|
"rewards/margins_min": 2.353529691696167, |
|
"rewards/margins_std": 2.2304234504699707, |
|
"rewards/rejected": -6.085659980773926, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 6.34375, |
|
"learning_rate": 1.3873414525744115e-08, |
|
"logits/chosen": -3.157076120376587, |
|
"logits/rejected": -2.8868565559387207, |
|
"logps/chosen": -340.90924072265625, |
|
"logps/rejected": -718.08740234375, |
|
"loss": 0.1802, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.0659420490264893, |
|
"rewards/margins": 3.228773593902588, |
|
"rewards/margins_max": 4.96280574798584, |
|
"rewards/margins_min": 1.4947407245635986, |
|
"rewards/margins_std": 2.4522926807403564, |
|
"rewards/rejected": -5.29471492767334, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 7.59375, |
|
"learning_rate": 1.2186822239149158e-08, |
|
"logits/chosen": -3.1129629611968994, |
|
"logits/rejected": -2.8585550785064697, |
|
"logps/chosen": -352.6842346191406, |
|
"logps/rejected": -787.0264892578125, |
|
"loss": 0.2004, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.368502616882324, |
|
"rewards/margins": 3.9364609718322754, |
|
"rewards/margins_max": 5.6117143630981445, |
|
"rewards/margins_min": 2.2612078189849854, |
|
"rewards/margins_std": 2.369166135787964, |
|
"rewards/rejected": -6.304963111877441, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 9.875, |
|
"learning_rate": 1.0606947815098467e-08, |
|
"logits/chosen": -3.1238672733306885, |
|
"logits/rejected": -2.8686344623565674, |
|
"logps/chosen": -307.52020263671875, |
|
"logps/rejected": -777.89306640625, |
|
"loss": 0.217, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.9263112545013428, |
|
"rewards/margins": 4.194777488708496, |
|
"rewards/margins_max": 6.070175647735596, |
|
"rewards/margins_min": 2.3193793296813965, |
|
"rewards/margins_std": 2.6522135734558105, |
|
"rewards/rejected": -6.121088981628418, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 37.0, |
|
"learning_rate": 9.134500216918722e-09, |
|
"logits/chosen": -3.1342949867248535, |
|
"logits/rejected": -2.919426441192627, |
|
"logps/chosen": -335.2420959472656, |
|
"logps/rejected": -697.2754516601562, |
|
"loss": 0.2387, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.338940143585205, |
|
"rewards/margins": 3.315152645111084, |
|
"rewards/margins_max": 4.826213836669922, |
|
"rewards/margins_min": 1.8040918111801147, |
|
"rewards/margins_std": 2.136962652206421, |
|
"rewards/rejected": -5.654092311859131, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 4.0625, |
|
"learning_rate": 7.770140200510338e-09, |
|
"logits/chosen": -3.099794626235962, |
|
"logits/rejected": -2.881441593170166, |
|
"logps/chosen": -366.3176574707031, |
|
"logps/rejected": -856.1891479492188, |
|
"loss": 0.1464, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.4581868648529053, |
|
"rewards/margins": 4.456292152404785, |
|
"rewards/margins_max": 6.471911430358887, |
|
"rewards/margins_min": 2.440671682357788, |
|
"rewards/margins_std": 2.8505172729492188, |
|
"rewards/rejected": -6.9144792556762695, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 18.75, |
|
"learning_rate": 6.5144800178352776e-09, |
|
"logits/chosen": -3.127145290374756, |
|
"logits/rejected": -2.9245965480804443, |
|
"logps/chosen": -357.00323486328125, |
|
"logps/rejected": -728.6698608398438, |
|
"loss": 0.2302, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.4451308250427246, |
|
"rewards/margins": 3.3258328437805176, |
|
"rewards/margins_max": 4.757521152496338, |
|
"rewards/margins_min": 1.894144058227539, |
|
"rewards/margins_std": 2.0247135162353516, |
|
"rewards/rejected": -5.7709641456604, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 5.368083142171409e-09, |
|
"logits/chosen": -3.1004772186279297, |
|
"logits/rejected": -2.87990403175354, |
|
"logps/chosen": -353.71478271484375, |
|
"logps/rejected": -771.7055053710938, |
|
"loss": 0.2079, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.3986122608184814, |
|
"rewards/margins": 3.5793299674987793, |
|
"rewards/margins_max": 5.495848178863525, |
|
"rewards/margins_min": 1.6628128290176392, |
|
"rewards/margins_std": 2.710364580154419, |
|
"rewards/rejected": -5.97794246673584, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 5.78125, |
|
"learning_rate": 4.331464015255526e-09, |
|
"logits/chosen": -3.149285078048706, |
|
"logits/rejected": -2.8654189109802246, |
|
"logps/chosen": -333.75341796875, |
|
"logps/rejected": -872.8679809570312, |
|
"loss": 0.2164, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.111713409423828, |
|
"rewards/margins": 4.595676422119141, |
|
"rewards/margins_max": 6.780759334564209, |
|
"rewards/margins_min": 2.410592555999756, |
|
"rewards/margins_std": 3.090175151824951, |
|
"rewards/rejected": -6.707389831542969, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 15.375, |
|
"learning_rate": 3.4050878164293695e-09, |
|
"logits/chosen": -3.1380608081817627, |
|
"logits/rejected": -2.8875515460968018, |
|
"logps/chosen": -356.3845520019531, |
|
"logps/rejected": -684.7333984375, |
|
"loss": 0.2406, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.311772108078003, |
|
"rewards/margins": 2.945913553237915, |
|
"rewards/margins_max": 4.341352462768555, |
|
"rewards/margins_min": 1.5504741668701172, |
|
"rewards/margins_std": 1.9734489917755127, |
|
"rewards/rejected": -5.257685661315918, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 7.375, |
|
"learning_rate": 2.5893702538920537e-09, |
|
"logits/chosen": -3.112511396408081, |
|
"logits/rejected": -2.9152560234069824, |
|
"logps/chosen": -335.6734924316406, |
|
"logps/rejected": -702.3567504882812, |
|
"loss": 0.2425, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.2349658012390137, |
|
"rewards/margins": 3.2430100440979004, |
|
"rewards/margins_max": 4.741458892822266, |
|
"rewards/margins_min": 1.7445614337921143, |
|
"rewards/margins_std": 2.119126081466675, |
|
"rewards/rejected": -5.477975845336914, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 7.65625, |
|
"learning_rate": 1.884677378152372e-09, |
|
"logits/chosen": -3.1265830993652344, |
|
"logits/rejected": -2.887659788131714, |
|
"logps/chosen": -346.2403869628906, |
|
"logps/rejected": -709.3991088867188, |
|
"loss": 0.2288, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.28578782081604, |
|
"rewards/margins": 3.251765727996826, |
|
"rewards/margins_max": 5.136622428894043, |
|
"rewards/margins_min": 1.3669096231460571, |
|
"rewards/margins_std": 2.6655895709991455, |
|
"rewards/rejected": -5.5375542640686035, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 7.9375, |
|
"learning_rate": 1.2913254177648325e-09, |
|
"logits/chosen": -3.1185574531555176, |
|
"logits/rejected": -2.887942314147949, |
|
"logps/chosen": -304.56109619140625, |
|
"logps/rejected": -669.3530883789062, |
|
"loss": 0.2095, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -2.0278687477111816, |
|
"rewards/margins": 3.3642592430114746, |
|
"rewards/margins_max": 5.217440128326416, |
|
"rewards/margins_min": 1.511077880859375, |
|
"rewards/margins_std": 2.6207940578460693, |
|
"rewards/rejected": -5.39212703704834, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 8.095806374232295e-10, |
|
"logits/chosen": -3.1527209281921387, |
|
"logits/rejected": -2.837277889251709, |
|
"logps/chosen": -360.6504211425781, |
|
"logps/rejected": -759.2733154296875, |
|
"loss": 0.2634, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.213390827178955, |
|
"rewards/margins": 3.6566321849823, |
|
"rewards/margins_max": 5.329567909240723, |
|
"rewards/margins_min": 1.9836972951889038, |
|
"rewards/margins_std": 2.3658881187438965, |
|
"rewards/rejected": -5.870023250579834, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 15.4375, |
|
"learning_rate": 4.3965921847513576e-10, |
|
"logits/chosen": -3.155099391937256, |
|
"logits/rejected": -2.9255661964416504, |
|
"logps/chosen": -323.4555358886719, |
|
"logps/rejected": -777.19677734375, |
|
"loss": 0.1381, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.105621337890625, |
|
"rewards/margins": 4.154127597808838, |
|
"rewards/margins_max": 5.744351387023926, |
|
"rewards/margins_min": 2.563903570175171, |
|
"rewards/margins_std": 2.2489163875579834, |
|
"rewards/rejected": -6.259748935699463, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 5.40625, |
|
"learning_rate": 1.8172716191142134e-10, |
|
"logits/chosen": -3.1000237464904785, |
|
"logits/rejected": -2.900533676147461, |
|
"logps/chosen": -336.09130859375, |
|
"logps/rejected": -730.9088745117188, |
|
"loss": 0.2025, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.1460416316986084, |
|
"rewards/margins": 3.563811779022217, |
|
"rewards/margins_max": 5.688532829284668, |
|
"rewards/margins_min": 1.4390910863876343, |
|
"rewards/margins_std": 3.0048089027404785, |
|
"rewards/rejected": -5.709853172302246, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 3.59002138737019e-11, |
|
"logits/chosen": -3.079613208770752, |
|
"logits/rejected": -2.889169931411743, |
|
"logps/chosen": -328.8927917480469, |
|
"logps/rejected": -725.4284057617188, |
|
"loss": 0.1653, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.0904576778411865, |
|
"rewards/margins": 3.3967292308807373, |
|
"rewards/margins_max": 5.103400230407715, |
|
"rewards/margins_min": 1.6900584697723389, |
|
"rewards/margins_std": 2.413597583770752, |
|
"rewards/rejected": -5.487187385559082, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.2152950763702393, |
|
"eval_logits/rejected": -2.1189112663269043, |
|
"eval_logps/chosen": -352.6255798339844, |
|
"eval_logps/rejected": -347.4267272949219, |
|
"eval_loss": 0.6940016150474548, |
|
"eval_rewards/accuracies": 0.5379999876022339, |
|
"eval_rewards/chosen": -0.7885112166404724, |
|
"eval_rewards/margins": 0.058900706470012665, |
|
"eval_rewards/margins_max": 0.7288501262664795, |
|
"eval_rewards/margins_min": -0.5646029114723206, |
|
"eval_rewards/margins_std": 0.42255058884620667, |
|
"eval_rewards/rejected": -0.8474118709564209, |
|
"eval_runtime": 884.8339, |
|
"eval_samples_per_second": 4.521, |
|
"eval_steps_per_second": 0.283, |
|
"step": 1648 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1648, |
|
"total_flos": 0.0, |
|
"train_loss": 0.33046212517520757, |
|
"train_runtime": 17297.4072, |
|
"train_samples_per_second": 1.525, |
|
"train_steps_per_second": 0.095 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1648, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|