|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0010468463752944255, |
|
"grad_norm": 13.816486358642578, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -2.9122443199157715, |
|
"logits/rejected": -2.8823766708374023, |
|
"logps/chosen": -276.3387451171875, |
|
"logps/rejected": -242.270751953125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 14.41542911529541, |
|
"learning_rate": 5.208333333333334e-07, |
|
"logits/chosen": -2.8143603801727295, |
|
"logits/rejected": -2.7806081771850586, |
|
"logps/chosen": -269.4888610839844, |
|
"logps/rejected": -283.96014404296875, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0007840646430850029, |
|
"rewards/margins": -0.0032457474153488874, |
|
"rewards/rejected": 0.00246168184094131, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 14.011621475219727, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"logits/chosen": -2.8383383750915527, |
|
"logits/rejected": -2.778038740158081, |
|
"logps/chosen": -289.6752624511719, |
|
"logps/rejected": -246.4437255859375, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0002006460854317993, |
|
"rewards/margins": 0.0035436502657830715, |
|
"rewards/rejected": -0.0033430042676627636, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 13.931632995605469, |
|
"learning_rate": 1.5625e-06, |
|
"logits/chosen": -2.8146023750305176, |
|
"logits/rejected": -2.8039023876190186, |
|
"logps/chosen": -259.8967590332031, |
|
"logps/rejected": -239.6516571044922, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.011000868864357471, |
|
"rewards/margins": 0.01744804158806801, |
|
"rewards/rejected": -0.006447173655033112, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 13.794343948364258, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"logits/chosen": -2.823273181915283, |
|
"logits/rejected": -2.7753758430480957, |
|
"logps/chosen": -267.2137451171875, |
|
"logps/rejected": -260.3147277832031, |
|
"loss": 0.6693, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.014138467609882355, |
|
"rewards/margins": 0.059490323066711426, |
|
"rewards/rejected": -0.04535185918211937, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 13.499931335449219, |
|
"learning_rate": 2.604166666666667e-06, |
|
"logits/chosen": -2.756686210632324, |
|
"logits/rejected": -2.7536370754241943, |
|
"logps/chosen": -224.6168670654297, |
|
"logps/rejected": -232.76455688476562, |
|
"loss": 0.6412, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.026402924209833145, |
|
"rewards/margins": 0.13416479527950287, |
|
"rewards/rejected": -0.10776187479496002, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 11.923255920410156, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -2.7912814617156982, |
|
"logits/rejected": -2.7667124271392822, |
|
"logps/chosen": -244.9199676513672, |
|
"logps/rejected": -246.4986114501953, |
|
"loss": 0.6221, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.030213266611099243, |
|
"rewards/margins": 0.2403673380613327, |
|
"rewards/rejected": -0.21015405654907227, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 12.119612693786621, |
|
"learning_rate": 3.6458333333333333e-06, |
|
"logits/chosen": -2.8213372230529785, |
|
"logits/rejected": -2.7878777980804443, |
|
"logps/chosen": -299.50091552734375, |
|
"logps/rejected": -263.40069580078125, |
|
"loss": 0.5969, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.06017110496759415, |
|
"rewards/margins": 0.3069015145301819, |
|
"rewards/rejected": -0.24673044681549072, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 15.060250282287598, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": -2.776968479156494, |
|
"logits/rejected": -2.747732639312744, |
|
"logps/chosen": -274.90594482421875, |
|
"logps/rejected": -258.62103271484375, |
|
"loss": 0.6066, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.034456029534339905, |
|
"rewards/margins": 0.39508965611457825, |
|
"rewards/rejected": -0.42954570055007935, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 12.425506591796875, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"logits/chosen": -2.781564950942993, |
|
"logits/rejected": -2.756075620651245, |
|
"logps/chosen": -250.3762664794922, |
|
"logps/rejected": -221.0802459716797, |
|
"loss": 0.5766, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.05863947793841362, |
|
"rewards/margins": 0.37909096479415894, |
|
"rewards/rejected": -0.43773046135902405, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 12.826536178588867, |
|
"learning_rate": 4.9997324926814375e-06, |
|
"logits/chosen": -2.775822401046753, |
|
"logits/rejected": -2.7732839584350586, |
|
"logps/chosen": -267.44232177734375, |
|
"logps/rejected": -250.16455078125, |
|
"loss": 0.5795, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.06354306638240814, |
|
"rewards/margins": 0.5622893571853638, |
|
"rewards/rejected": -0.4987463057041168, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"eval_logits/chosen": -2.7899813652038574, |
|
"eval_logits/rejected": -2.768789529800415, |
|
"eval_logps/chosen": -271.6063232421875, |
|
"eval_logps/rejected": -271.3592529296875, |
|
"eval_loss": 0.5875207781791687, |
|
"eval_rewards/accuracies": 0.682539701461792, |
|
"eval_rewards/chosen": 0.0264796894043684, |
|
"eval_rewards/margins": 0.3985615074634552, |
|
"eval_rewards/rejected": -0.37208184599876404, |
|
"eval_runtime": 354.4273, |
|
"eval_samples_per_second": 5.643, |
|
"eval_steps_per_second": 0.178, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 11.797159194946289, |
|
"learning_rate": 4.996723692767927e-06, |
|
"logits/chosen": -2.7566726207733154, |
|
"logits/rejected": -2.727776527404785, |
|
"logps/chosen": -275.2736511230469, |
|
"logps/rejected": -252.49301147460938, |
|
"loss": 0.5617, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.06772824376821518, |
|
"rewards/margins": 0.48222237825393677, |
|
"rewards/rejected": -0.414494127035141, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 13.538057327270508, |
|
"learning_rate": 4.9903757462135984e-06, |
|
"logits/chosen": -2.80414080619812, |
|
"logits/rejected": -2.7789015769958496, |
|
"logps/chosen": -260.9825744628906, |
|
"logps/rejected": -256.66082763671875, |
|
"loss": 0.5598, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.07279430329799652, |
|
"rewards/margins": 0.5211631655693054, |
|
"rewards/rejected": -0.4483688771724701, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 17.772846221923828, |
|
"learning_rate": 4.980697142834315e-06, |
|
"logits/chosen": -2.7555739879608154, |
|
"logits/rejected": -2.7485146522521973, |
|
"logps/chosen": -259.66021728515625, |
|
"logps/rejected": -250.82589721679688, |
|
"loss": 0.5679, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.04345005005598068, |
|
"rewards/margins": 0.595272958278656, |
|
"rewards/rejected": -0.5518229603767395, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 14.183941841125488, |
|
"learning_rate": 4.967700826904229e-06, |
|
"logits/chosen": -2.789041042327881, |
|
"logits/rejected": -2.7646267414093018, |
|
"logps/chosen": -251.831298828125, |
|
"logps/rejected": -259.8060607910156, |
|
"loss": 0.5732, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.012771936133503914, |
|
"rewards/margins": 0.5387409925460815, |
|
"rewards/rejected": -0.5515128970146179, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 13.352206230163574, |
|
"learning_rate": 4.951404179843963e-06, |
|
"logits/chosen": -2.7706031799316406, |
|
"logits/rejected": -2.777296543121338, |
|
"logps/chosen": -260.20172119140625, |
|
"logps/rejected": -244.833984375, |
|
"loss": 0.5414, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.30398789048194885, |
|
"rewards/margins": 0.5834565162658691, |
|
"rewards/rejected": -0.2794686257839203, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 14.198497772216797, |
|
"learning_rate": 4.931828996974498e-06, |
|
"logits/chosen": -2.8031933307647705, |
|
"logits/rejected": -2.7727789878845215, |
|
"logps/chosen": -282.6216735839844, |
|
"logps/rejected": -249.09408569335938, |
|
"loss": 0.5264, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.7594842314720154, |
|
"rewards/margins": 0.7169784903526306, |
|
"rewards/rejected": 0.04250572994351387, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 14.190126419067383, |
|
"learning_rate": 4.909001458367867e-06, |
|
"logits/chosen": -2.821536064147949, |
|
"logits/rejected": -2.7903571128845215, |
|
"logps/chosen": -268.6175537109375, |
|
"logps/rejected": -254.8057098388672, |
|
"loss": 0.5492, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.7267504930496216, |
|
"rewards/margins": 0.6977671980857849, |
|
"rewards/rejected": 0.028983300551772118, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 14.373480796813965, |
|
"learning_rate": 4.882952093833628e-06, |
|
"logits/chosen": -2.828423023223877, |
|
"logits/rejected": -2.818983554840088, |
|
"logps/chosen": -246.63040161132812, |
|
"logps/rejected": -241.3114471435547, |
|
"loss": 0.5687, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.4481576979160309, |
|
"rewards/margins": 0.6290773153305054, |
|
"rewards/rejected": -0.18091967701911926, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 12.725995063781738, |
|
"learning_rate": 4.853715742087947e-06, |
|
"logits/chosen": -2.8280279636383057, |
|
"logits/rejected": -2.793407917022705, |
|
"logps/chosen": -296.0823669433594, |
|
"logps/rejected": -275.6176452636719, |
|
"loss": 0.538, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4707435667514801, |
|
"rewards/margins": 0.7004532814025879, |
|
"rewards/rejected": -0.22970974445343018, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 16.12042236328125, |
|
"learning_rate": 4.821331504159906e-06, |
|
"logits/chosen": -2.7835636138916016, |
|
"logits/rejected": -2.7702224254608154, |
|
"logps/chosen": -249.65725708007812, |
|
"logps/rejected": -258.82049560546875, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.3579314351081848, |
|
"rewards/margins": 0.6976887583732605, |
|
"rewards/rejected": -0.3397572934627533, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"eval_logits/chosen": -2.7981207370758057, |
|
"eval_logits/rejected": -2.779163122177124, |
|
"eval_logps/chosen": -271.27044677734375, |
|
"eval_logps/rejected": -273.3644714355469, |
|
"eval_loss": 0.5519838333129883, |
|
"eval_rewards/accuracies": 0.7103174328804016, |
|
"eval_rewards/chosen": 0.0600733757019043, |
|
"eval_rewards/margins": 0.6326771974563599, |
|
"eval_rewards/rejected": -0.5726038217544556, |
|
"eval_runtime": 353.4661, |
|
"eval_samples_per_second": 5.658, |
|
"eval_steps_per_second": 0.178, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 15.229165077209473, |
|
"learning_rate": 4.7858426910973435e-06, |
|
"logits/chosen": -2.8110532760620117, |
|
"logits/rejected": -2.785770893096924, |
|
"logps/chosen": -290.8441467285156, |
|
"logps/rejected": -283.64971923828125, |
|
"loss": 0.5339, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.17869320511817932, |
|
"rewards/margins": 0.780803382396698, |
|
"rewards/rejected": -0.6021102070808411, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 11.427948951721191, |
|
"learning_rate": 4.747296766042161e-06, |
|
"logits/chosen": -2.761380434036255, |
|
"logits/rejected": -2.7642910480499268, |
|
"logps/chosen": -280.3062744140625, |
|
"logps/rejected": -276.4909973144531, |
|
"loss": 0.5184, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.01649661734700203, |
|
"rewards/margins": 0.7951753735542297, |
|
"rewards/rejected": -0.7786787748336792, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 15.31595230102539, |
|
"learning_rate": 4.705745280752586e-06, |
|
"logits/chosen": -2.8096513748168945, |
|
"logits/rejected": -2.7831692695617676, |
|
"logps/chosen": -272.4754943847656, |
|
"logps/rejected": -266.40240478515625, |
|
"loss": 0.524, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.3214190900325775, |
|
"rewards/margins": 0.831048846244812, |
|
"rewards/rejected": -0.5096298456192017, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 16.855459213256836, |
|
"learning_rate": 4.661243806657256e-06, |
|
"logits/chosen": -2.790187120437622, |
|
"logits/rejected": -2.751081705093384, |
|
"logps/chosen": -297.11236572265625, |
|
"logps/rejected": -265.01123046875, |
|
"loss": 0.5584, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.5351754426956177, |
|
"rewards/margins": 0.7664622664451599, |
|
"rewards/rejected": -0.23128685355186462, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 13.651627540588379, |
|
"learning_rate": 4.613851860533367e-06, |
|
"logits/chosen": -2.788170576095581, |
|
"logits/rejected": -2.7668204307556152, |
|
"logps/chosen": -260.80865478515625, |
|
"logps/rejected": -255.9954376220703, |
|
"loss": 0.5141, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.4738442301750183, |
|
"rewards/margins": 1.0687077045440674, |
|
"rewards/rejected": -0.5948633551597595, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 15.22368049621582, |
|
"learning_rate": 4.563632824908252e-06, |
|
"logits/chosen": -2.8234317302703857, |
|
"logits/rejected": -2.7849972248077393, |
|
"logps/chosen": -297.3622741699219, |
|
"logps/rejected": -288.5556945800781, |
|
"loss": 0.5374, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.3837077021598816, |
|
"rewards/margins": 0.7442808747291565, |
|
"rewards/rejected": -0.3605732023715973, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 13.450421333312988, |
|
"learning_rate": 4.510653863290871e-06, |
|
"logits/chosen": -2.7682323455810547, |
|
"logits/rejected": -2.7710976600646973, |
|
"logps/chosen": -264.67218017578125, |
|
"logps/rejected": -262.8077697753906, |
|
"loss": 0.5469, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.5086392164230347, |
|
"rewards/margins": 0.8164563179016113, |
|
"rewards/rejected": -0.3078171908855438, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 13.034257888793945, |
|
"learning_rate": 4.454985830346574e-06, |
|
"logits/chosen": -2.7635130882263184, |
|
"logits/rejected": -2.730128049850464, |
|
"logps/chosen": -263.2830810546875, |
|
"logps/rejected": -227.2650604248047, |
|
"loss": 0.5155, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.6867417693138123, |
|
"rewards/margins": 0.9631470441818237, |
|
"rewards/rejected": -0.2764051854610443, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 11.6800537109375, |
|
"learning_rate": 4.396703177135262e-06, |
|
"logits/chosen": -2.741833209991455, |
|
"logits/rejected": -2.7163052558898926, |
|
"logps/chosen": -271.12530517578125, |
|
"logps/rejected": -264.3458557128906, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.34893399477005005, |
|
"rewards/margins": 0.8408756256103516, |
|
"rewards/rejected": -0.49194154143333435, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 15.404385566711426, |
|
"learning_rate": 4.335883851539693e-06, |
|
"logits/chosen": -2.7640957832336426, |
|
"logits/rejected": -2.7771594524383545, |
|
"logps/chosen": -268.81573486328125, |
|
"logps/rejected": -250.0668182373047, |
|
"loss": 0.545, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.04025740176439285, |
|
"rewards/margins": 0.6916046738624573, |
|
"rewards/rejected": -0.651347279548645, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"eval_logits/chosen": -2.780329942703247, |
|
"eval_logits/rejected": -2.7615909576416016, |
|
"eval_logps/chosen": -272.0685729980469, |
|
"eval_logps/rejected": -275.2751159667969, |
|
"eval_loss": 0.5320433378219604, |
|
"eval_rewards/accuracies": 0.704365074634552, |
|
"eval_rewards/chosen": -0.019741566851735115, |
|
"eval_rewards/margins": 0.7439272999763489, |
|
"eval_rewards/rejected": -0.7636688351631165, |
|
"eval_runtime": 353.3156, |
|
"eval_samples_per_second": 5.661, |
|
"eval_steps_per_second": 0.178, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 15.563763618469238, |
|
"learning_rate": 4.2726091940171055e-06, |
|
"logits/chosen": -2.829540729522705, |
|
"logits/rejected": -2.7938830852508545, |
|
"logps/chosen": -332.7828674316406, |
|
"logps/rejected": -280.2691955566406, |
|
"loss": 0.5152, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.09140340238809586, |
|
"rewards/margins": 0.7919255495071411, |
|
"rewards/rejected": -0.7005220651626587, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 16.867467880249023, |
|
"learning_rate": 4.206963828813555e-06, |
|
"logits/chosen": -2.788259983062744, |
|
"logits/rejected": -2.7637429237365723, |
|
"logps/chosen": -271.119140625, |
|
"logps/rejected": -276.2804260253906, |
|
"loss": 0.5178, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10287600755691528, |
|
"rewards/margins": 0.8704935908317566, |
|
"rewards/rejected": -0.9733695983886719, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 14.005449295043945, |
|
"learning_rate": 4.139035550786495e-06, |
|
"logits/chosen": -2.7821526527404785, |
|
"logits/rejected": -2.754249095916748, |
|
"logps/chosen": -268.0732421875, |
|
"logps/rejected": -225.94900512695312, |
|
"loss": 0.51, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.022264836356043816, |
|
"rewards/margins": 0.8124101758003235, |
|
"rewards/rejected": -0.7901453971862793, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 17.381444931030273, |
|
"learning_rate": 4.068915207986931e-06, |
|
"logits/chosen": -2.771470069885254, |
|
"logits/rejected": -2.7284042835235596, |
|
"logps/chosen": -257.30120849609375, |
|
"logps/rejected": -253.9716339111328, |
|
"loss": 0.5152, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.1510663479566574, |
|
"rewards/margins": 0.8377777338027954, |
|
"rewards/rejected": -0.686711311340332, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 13.891199111938477, |
|
"learning_rate": 3.996696580158211e-06, |
|
"logits/chosen": -2.793546676635742, |
|
"logits/rejected": -2.7672958374023438, |
|
"logps/chosen": -294.7528076171875, |
|
"logps/rejected": -245.27969360351562, |
|
"loss": 0.5194, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.3805600702762604, |
|
"rewards/margins": 0.7021139860153198, |
|
"rewards/rejected": -0.32155394554138184, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 14.259988784790039, |
|
"learning_rate": 3.922476253313921e-06, |
|
"logits/chosen": -2.741086483001709, |
|
"logits/rejected": -2.7274608612060547, |
|
"logps/chosen": -274.81207275390625, |
|
"logps/rejected": -242.241943359375, |
|
"loss": 0.5059, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.2537211775779724, |
|
"rewards/margins": 0.7949485778808594, |
|
"rewards/rejected": -0.5412274599075317, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 19.930835723876953, |
|
"learning_rate": 3.846353490562664e-06, |
|
"logits/chosen": -2.715400218963623, |
|
"logits/rejected": -2.6755480766296387, |
|
"logps/chosen": -266.83074951171875, |
|
"logps/rejected": -271.99041748046875, |
|
"loss": 0.5134, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2603386640548706, |
|
"rewards/margins": 1.0200598239898682, |
|
"rewards/rejected": -1.2803986072540283, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 15.25166130065918, |
|
"learning_rate": 3.768430099352445e-06, |
|
"logits/chosen": -2.769425868988037, |
|
"logits/rejected": -2.723906993865967, |
|
"logps/chosen": -267.3630065917969, |
|
"logps/rejected": -261.64910888671875, |
|
"loss": 0.5334, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.059519242495298386, |
|
"rewards/margins": 0.8974593877792358, |
|
"rewards/rejected": -0.9569786787033081, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 13.465699195861816, |
|
"learning_rate": 3.6888102953122307e-06, |
|
"logits/chosen": -2.772050619125366, |
|
"logits/rejected": -2.7556827068328857, |
|
"logps/chosen": -254.5989532470703, |
|
"logps/rejected": -253.4079132080078, |
|
"loss": 0.5046, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.2735213041305542, |
|
"rewards/margins": 0.8997882008552551, |
|
"rewards/rejected": -0.6262668967247009, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 17.0921573638916, |
|
"learning_rate": 3.607600562872785e-06, |
|
"logits/chosen": -2.7273311614990234, |
|
"logits/rejected": -2.7061333656311035, |
|
"logps/chosen": -275.65447998046875, |
|
"logps/rejected": -268.2806396484375, |
|
"loss": 0.4747, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 0.17162422835826874, |
|
"rewards/margins": 1.1053993701934814, |
|
"rewards/rejected": -0.9337752461433411, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"eval_logits/chosen": -2.7731950283050537, |
|
"eval_logits/rejected": -2.753185510635376, |
|
"eval_logps/chosen": -273.5996398925781, |
|
"eval_logps/rejected": -277.1651306152344, |
|
"eval_loss": 0.5228143334388733, |
|
"eval_rewards/accuracies": 0.7003968358039856, |
|
"eval_rewards/chosen": -0.1728479415178299, |
|
"eval_rewards/margins": 0.7798227071762085, |
|
"eval_rewards/rejected": -0.952670693397522, |
|
"eval_runtime": 353.2859, |
|
"eval_samples_per_second": 5.661, |
|
"eval_steps_per_second": 0.178, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 14.339557647705078, |
|
"learning_rate": 3.5249095128531863e-06, |
|
"logits/chosen": -2.664485454559326, |
|
"logits/rejected": -2.6545228958129883, |
|
"logps/chosen": -247.5968780517578, |
|
"logps/rejected": -247.5493621826172, |
|
"loss": 0.4833, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.0953296571969986, |
|
"rewards/margins": 1.0826025009155273, |
|
"rewards/rejected": -1.1779320240020752, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 17.868024826049805, |
|
"learning_rate": 3.4408477372034743e-06, |
|
"logits/chosen": -2.716871738433838, |
|
"logits/rejected": -2.7016210556030273, |
|
"logps/chosen": -246.01736450195312, |
|
"logps/rejected": -259.8745422363281, |
|
"loss": 0.4955, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.052067507058382034, |
|
"rewards/margins": 1.0310100317001343, |
|
"rewards/rejected": -1.0830775499343872, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 16.81452178955078, |
|
"learning_rate": 3.355527661097728e-06, |
|
"logits/chosen": -2.75317645072937, |
|
"logits/rejected": -2.7509617805480957, |
|
"logps/chosen": -250.8342742919922, |
|
"logps/rejected": -248.368408203125, |
|
"loss": 0.529, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.0890086218714714, |
|
"rewards/margins": 1.041926622390747, |
|
"rewards/rejected": -0.9529180526733398, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 14.235637664794922, |
|
"learning_rate": 3.269063392575352e-06, |
|
"logits/chosen": -2.807570695877075, |
|
"logits/rejected": -2.744551420211792, |
|
"logps/chosen": -296.353759765625, |
|
"logps/rejected": -257.65374755859375, |
|
"loss": 0.4776, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.2943809926509857, |
|
"rewards/margins": 1.0381492376327515, |
|
"rewards/rejected": -0.7437682747840881, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 11.64111042022705, |
|
"learning_rate": 3.181570569931697e-06, |
|
"logits/chosen": -2.8272039890289307, |
|
"logits/rejected": -2.7998225688934326, |
|
"logps/chosen": -298.7302551269531, |
|
"logps/rejected": -261.15606689453125, |
|
"loss": 0.4891, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.18123969435691833, |
|
"rewards/margins": 0.7455132007598877, |
|
"rewards/rejected": -0.564273476600647, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 14.514245986938477, |
|
"learning_rate": 3.09316620706208e-06, |
|
"logits/chosen": -2.803687334060669, |
|
"logits/rejected": -2.788764715194702, |
|
"logps/chosen": -294.7230224609375, |
|
"logps/rejected": -290.67449951171875, |
|
"loss": 0.5102, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.22936221957206726, |
|
"rewards/margins": 1.060276985168457, |
|
"rewards/rejected": -0.8309147953987122, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 19.26064682006836, |
|
"learning_rate": 3.0039685369660785e-06, |
|
"logits/chosen": -2.745448589324951, |
|
"logits/rejected": -2.7057714462280273, |
|
"logps/chosen": -275.9302978515625, |
|
"logps/rejected": -271.31964111328125, |
|
"loss": 0.5009, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.40928229689598083, |
|
"rewards/margins": 1.3523207902908325, |
|
"rewards/rejected": -0.9430384635925293, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 15.569584846496582, |
|
"learning_rate": 2.91409685362137e-06, |
|
"logits/chosen": -2.728668689727783, |
|
"logits/rejected": -2.7024617195129395, |
|
"logps/chosen": -280.735595703125, |
|
"logps/rejected": -284.4299621582031, |
|
"loss": 0.5371, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.218542218208313, |
|
"rewards/margins": 0.9496285319328308, |
|
"rewards/rejected": -0.7310863137245178, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 13.441338539123535, |
|
"learning_rate": 2.8236713524386085e-06, |
|
"logits/chosen": -2.780989408493042, |
|
"logits/rejected": -2.7283778190612793, |
|
"logps/chosen": -251.0142364501953, |
|
"logps/rejected": -233.73703002929688, |
|
"loss": 0.5264, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.3006051182746887, |
|
"rewards/margins": 1.0492786169052124, |
|
"rewards/rejected": -0.7486735582351685, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 15.913731575012207, |
|
"learning_rate": 2.7328129695107205e-06, |
|
"logits/chosen": -2.6493608951568604, |
|
"logits/rejected": -2.6593687534332275, |
|
"logps/chosen": -285.948974609375, |
|
"logps/rejected": -264.4158630371094, |
|
"loss": 0.5367, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.08400087058544159, |
|
"rewards/margins": 0.9130845069885254, |
|
"rewards/rejected": -0.997085452079773, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"eval_logits/chosen": -2.753965139389038, |
|
"eval_logits/rejected": -2.733893632888794, |
|
"eval_logps/chosen": -274.0134582519531, |
|
"eval_logps/rejected": -278.07373046875, |
|
"eval_loss": 0.5174898505210876, |
|
"eval_rewards/accuracies": 0.7142857313156128, |
|
"eval_rewards/chosen": -0.21423013508319855, |
|
"eval_rewards/margins": 0.8292967677116394, |
|
"eval_rewards/rejected": -1.0435270071029663, |
|
"eval_runtime": 353.1034, |
|
"eval_samples_per_second": 5.664, |
|
"eval_steps_per_second": 0.178, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 15.676944732666016, |
|
"learning_rate": 2.641643219871597e-06, |
|
"logits/chosen": -2.719494581222534, |
|
"logits/rejected": -2.7037150859832764, |
|
"logps/chosen": -294.9654235839844, |
|
"logps/rejected": -266.7233581542969, |
|
"loss": 0.5713, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.23952999711036682, |
|
"rewards/margins": 0.886762261390686, |
|
"rewards/rejected": -1.126292109489441, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 13.938344955444336, |
|
"learning_rate": 2.5502840349805074e-06, |
|
"logits/chosen": -2.696988821029663, |
|
"logits/rejected": -2.683870792388916, |
|
"logps/chosen": -269.9395446777344, |
|
"logps/rejected": -262.0766906738281, |
|
"loss": 0.49, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.01678340695798397, |
|
"rewards/margins": 0.9905561208724976, |
|
"rewards/rejected": -1.0073394775390625, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 15.948507308959961, |
|
"learning_rate": 2.4588575996495797e-06, |
|
"logits/chosen": -2.7424683570861816, |
|
"logits/rejected": -2.694587230682373, |
|
"logps/chosen": -284.1721496582031, |
|
"logps/rejected": -278.7875671386719, |
|
"loss": 0.5118, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.3610132336616516, |
|
"rewards/margins": 0.8961461186408997, |
|
"rewards/rejected": -0.5351330041885376, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 12.358839988708496, |
|
"learning_rate": 2.367486188632446e-06, |
|
"logits/chosen": -2.6885437965393066, |
|
"logits/rejected": -2.6670284271240234, |
|
"logps/chosen": -256.28900146484375, |
|
"logps/rejected": -221.53140258789062, |
|
"loss": 0.4942, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.3476331830024719, |
|
"rewards/margins": 0.8925381898880005, |
|
"rewards/rejected": -0.5449050068855286, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 15.99252986907959, |
|
"learning_rate": 2.276292003092593e-06, |
|
"logits/chosen": -2.733283281326294, |
|
"logits/rejected": -2.7206578254699707, |
|
"logps/chosen": -258.4396667480469, |
|
"logps/rejected": -267.3887634277344, |
|
"loss": 0.5156, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.19142115116119385, |
|
"rewards/margins": 0.8478537797927856, |
|
"rewards/rejected": -0.656432569026947, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 17.259794235229492, |
|
"learning_rate": 2.1853970071701415e-06, |
|
"logits/chosen": -2.7236075401306152, |
|
"logits/rejected": -2.697274923324585, |
|
"logps/chosen": -275.6216735839844, |
|
"logps/rejected": -264.6971740722656, |
|
"loss": 0.5283, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.0764341875910759, |
|
"rewards/margins": 0.8573528528213501, |
|
"rewards/rejected": -0.7809187173843384, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 15.168867111206055, |
|
"learning_rate": 2.0949227648656194e-06, |
|
"logits/chosen": -2.74639630317688, |
|
"logits/rejected": -2.7061123847961426, |
|
"logps/chosen": -275.2660217285156, |
|
"logps/rejected": -243.61441040039062, |
|
"loss": 0.4809, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.16716930270195007, |
|
"rewards/margins": 1.0621994733810425, |
|
"rewards/rejected": -0.89503014087677, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 16.580690383911133, |
|
"learning_rate": 2.00499027745888e-06, |
|
"logits/chosen": -2.6987080574035645, |
|
"logits/rejected": -2.6702322959899902, |
|
"logps/chosen": -251.2617950439453, |
|
"logps/rejected": -249.33554077148438, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.08427687734365463, |
|
"rewards/margins": 1.002518653869629, |
|
"rewards/rejected": -0.9182417988777161, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 13.715599060058594, |
|
"learning_rate": 1.915719821680624e-06, |
|
"logits/chosen": -2.7376370429992676, |
|
"logits/rejected": -2.695758819580078, |
|
"logps/chosen": -288.4046325683594, |
|
"logps/rejected": -244.7723388671875, |
|
"loss": 0.5487, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.006659612059593201, |
|
"rewards/margins": 0.9512457847595215, |
|
"rewards/rejected": -0.9579054117202759, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 19.28061294555664, |
|
"learning_rate": 1.8272307888529276e-06, |
|
"logits/chosen": -2.720449924468994, |
|
"logits/rejected": -2.7100579738616943, |
|
"logps/chosen": -256.0718994140625, |
|
"logps/rejected": -238.8487548828125, |
|
"loss": 0.5031, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.2969541549682617, |
|
"rewards/margins": 0.761928915977478, |
|
"rewards/rejected": -1.0588830709457397, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"eval_logits/chosen": -2.726771831512451, |
|
"eval_logits/rejected": -2.7070980072021484, |
|
"eval_logps/chosen": -274.8104553222656, |
|
"eval_logps/rejected": -278.967041015625, |
|
"eval_loss": 0.513893723487854, |
|
"eval_rewards/accuracies": 0.7023809552192688, |
|
"eval_rewards/chosen": -0.2939308285713196, |
|
"eval_rewards/margins": 0.8389276266098022, |
|
"eval_rewards/rejected": -1.132858395576477, |
|
"eval_runtime": 353.2373, |
|
"eval_samples_per_second": 5.662, |
|
"eval_steps_per_second": 0.178, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 15.231009483337402, |
|
"learning_rate": 1.739641525213929e-06, |
|
"logits/chosen": -2.688774347305298, |
|
"logits/rejected": -2.6713051795959473, |
|
"logps/chosen": -264.80462646484375, |
|
"logps/rejected": -323.32769775390625, |
|
"loss": 0.4765, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.2533731758594513, |
|
"rewards/margins": 1.022083044052124, |
|
"rewards/rejected": -1.275456190109253, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 13.739964485168457, |
|
"learning_rate": 1.6530691736402317e-06, |
|
"logits/chosen": -2.752707004547119, |
|
"logits/rejected": -2.7096076011657715, |
|
"logps/chosen": -272.6566467285156, |
|
"logps/rejected": -246.8385467529297, |
|
"loss": 0.4911, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.008082455024123192, |
|
"rewards/margins": 0.9637606739997864, |
|
"rewards/rejected": -0.9718431234359741, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 15.237521171569824, |
|
"learning_rate": 1.5676295169786864e-06, |
|
"logits/chosen": -2.6328330039978027, |
|
"logits/rejected": -2.6349148750305176, |
|
"logps/chosen": -243.0568389892578, |
|
"logps/rejected": -257.5806884765625, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.11530411243438721, |
|
"rewards/margins": 1.1866023540496826, |
|
"rewards/rejected": -1.071298360824585, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 14.393755912780762, |
|
"learning_rate": 1.4834368231970922e-06, |
|
"logits/chosen": -2.745518922805786, |
|
"logits/rejected": -2.7395710945129395, |
|
"logps/chosen": -261.69219970703125, |
|
"logps/rejected": -273.5394592285156, |
|
"loss": 0.4615, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.13604871928691864, |
|
"rewards/margins": 1.1850334405899048, |
|
"rewards/rejected": -1.0489846467971802, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 16.45931625366211, |
|
"learning_rate": 1.4006036925609245e-06, |
|
"logits/chosen": -2.7178597450256348, |
|
"logits/rejected": -2.701998472213745, |
|
"logps/chosen": -257.53363037109375, |
|
"logps/rejected": -260.20428466796875, |
|
"loss": 0.5055, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.378938764333725, |
|
"rewards/margins": 1.3896540403366089, |
|
"rewards/rejected": -1.0107152462005615, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 16.44599151611328, |
|
"learning_rate": 1.3192409070404582e-06, |
|
"logits/chosen": -2.668501615524292, |
|
"logits/rejected": -2.657517671585083, |
|
"logps/chosen": -266.4493408203125, |
|
"logps/rejected": -272.10491943359375, |
|
"loss": 0.5126, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.3282933533191681, |
|
"rewards/margins": 1.1049590110778809, |
|
"rewards/rejected": -0.7766658067703247, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 16.747392654418945, |
|
"learning_rate": 1.2394572821496953e-06, |
|
"logits/chosen": -2.720128059387207, |
|
"logits/rejected": -2.704305648803711, |
|
"logps/chosen": -274.4294738769531, |
|
"logps/rejected": -278.8267822265625, |
|
"loss": 0.5283, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.37905776500701904, |
|
"rewards/margins": 1.1339893341064453, |
|
"rewards/rejected": -0.7549317479133606, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 14.092031478881836, |
|
"learning_rate": 1.1613595214152713e-06, |
|
"logits/chosen": -2.725722074508667, |
|
"logits/rejected": -2.7022783756256104, |
|
"logps/chosen": -272.71630859375, |
|
"logps/rejected": -235.8934783935547, |
|
"loss": 0.5253, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.28024324774742126, |
|
"rewards/margins": 1.039801001548767, |
|
"rewards/rejected": -0.7595577836036682, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 12.211508750915527, |
|
"learning_rate": 1.0850520736699362e-06, |
|
"logits/chosen": -2.758209705352783, |
|
"logits/rejected": -2.7124698162078857, |
|
"logps/chosen": -298.98974609375, |
|
"logps/rejected": -265.95648193359375, |
|
"loss": 0.4786, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.3048846125602722, |
|
"rewards/margins": 1.1615703105926514, |
|
"rewards/rejected": -0.8566857576370239, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 17.241064071655273, |
|
"learning_rate": 1.0106369933615043e-06, |
|
"logits/chosen": -2.682668924331665, |
|
"logits/rejected": -2.677841901779175, |
|
"logps/chosen": -243.40737915039062, |
|
"logps/rejected": -249.0398406982422, |
|
"loss": 0.5057, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.09061723947525024, |
|
"rewards/margins": 0.9352877736091614, |
|
"rewards/rejected": -0.8446704745292664, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"eval_logits/chosen": -2.740434408187866, |
|
"eval_logits/rejected": -2.7206947803497314, |
|
"eval_logps/chosen": -271.9793701171875, |
|
"eval_logps/rejected": -276.6876220703125, |
|
"eval_loss": 0.5084052085876465, |
|
"eval_rewards/accuracies": 0.7202380895614624, |
|
"eval_rewards/chosen": -0.010822229087352753, |
|
"eval_rewards/margins": 0.8940958976745605, |
|
"eval_rewards/rejected": -0.9049180150032043, |
|
"eval_runtime": 353.4167, |
|
"eval_samples_per_second": 5.659, |
|
"eval_steps_per_second": 0.178, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 14.434347152709961, |
|
"learning_rate": 9.382138040640714e-07, |
|
"logits/chosen": -2.7248692512512207, |
|
"logits/rejected": -2.7028727531433105, |
|
"logps/chosen": -248.70083618164062, |
|
"logps/rejected": -249.9818878173828, |
|
"loss": 0.523, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.020344754680991173, |
|
"rewards/margins": 0.7711048722267151, |
|
"rewards/rejected": -0.7914497256278992, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 18.781707763671875, |
|
"learning_rate": 8.678793653740633e-07, |
|
"logits/chosen": -2.6842763423919678, |
|
"logits/rejected": -2.655897378921509, |
|
"logps/chosen": -227.59765625, |
|
"logps/rejected": -242.20462036132812, |
|
"loss": 0.5119, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.029393959790468216, |
|
"rewards/margins": 1.0940216779708862, |
|
"rewards/rejected": -1.1234157085418701, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 14.272910118103027, |
|
"learning_rate": 7.997277433690984e-07, |
|
"logits/chosen": -2.763821601867676, |
|
"logits/rejected": -2.7315735816955566, |
|
"logps/chosen": -260.8923034667969, |
|
"logps/rejected": -242.97708129882812, |
|
"loss": 0.4847, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.01939038559794426, |
|
"rewards/margins": 1.0031776428222656, |
|
"rewards/rejected": -0.9837873578071594, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 14.98263931274414, |
|
"learning_rate": 7.338500848029603e-07, |
|
"logits/chosen": -2.6752095222473145, |
|
"logits/rejected": -2.657548427581787, |
|
"logps/chosen": -277.3648376464844, |
|
"logps/rejected": -270.6456604003906, |
|
"loss": 0.5273, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.2628101706504822, |
|
"rewards/margins": 1.2527306079864502, |
|
"rewards/rejected": -0.9899206161499023, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 13.564722061157227, |
|
"learning_rate": 6.70334495204884e-07, |
|
"logits/chosen": -2.6619467735290527, |
|
"logits/rejected": -2.678673267364502, |
|
"logps/chosen": -261.7742919921875, |
|
"logps/rejected": -279.1748046875, |
|
"loss": 0.5415, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.09365497529506683, |
|
"rewards/margins": 0.9422794580459595, |
|
"rewards/rejected": -0.8486245274543762, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 15.380537986755371, |
|
"learning_rate": 6.092659210462232e-07, |
|
"logits/chosen": -2.7603864669799805, |
|
"logits/rejected": -2.740291118621826, |
|
"logps/chosen": -277.1561279296875, |
|
"logps/rejected": -247.03018188476562, |
|
"loss": 0.5041, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.11306103318929672, |
|
"rewards/margins": 1.0758002996444702, |
|
"rewards/rejected": -0.9627392888069153, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 12.876290321350098, |
|
"learning_rate": 5.507260361320738e-07, |
|
"logits/chosen": -2.766484498977661, |
|
"logits/rejected": -2.7405753135681152, |
|
"logps/chosen": -275.5859069824219, |
|
"logps/rejected": -261.8198547363281, |
|
"loss": 0.5393, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.04547576978802681, |
|
"rewards/margins": 0.9839900732040405, |
|
"rewards/rejected": -0.9385143518447876, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 14.665273666381836, |
|
"learning_rate": 4.947931323697983e-07, |
|
"logits/chosen": -2.7336275577545166, |
|
"logits/rejected": -2.702479362487793, |
|
"logps/chosen": -280.28045654296875, |
|
"logps/rejected": -260.28778076171875, |
|
"loss": 0.4583, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.07684344798326492, |
|
"rewards/margins": 0.9824104309082031, |
|
"rewards/rejected": -1.0592539310455322, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 13.474715232849121, |
|
"learning_rate": 4.4154201506053985e-07, |
|
"logits/chosen": -2.698451519012451, |
|
"logits/rejected": -2.6745803356170654, |
|
"logps/chosen": -273.3094482421875, |
|
"logps/rejected": -273.69232177734375, |
|
"loss": 0.5363, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.16311880946159363, |
|
"rewards/margins": 0.8299474716186523, |
|
"rewards/rejected": -0.9930663108825684, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 18.379581451416016, |
|
"learning_rate": 3.910439028537638e-07, |
|
"logits/chosen": -2.688326358795166, |
|
"logits/rejected": -2.650494337081909, |
|
"logps/chosen": -279.04498291015625, |
|
"logps/rejected": -262.01470947265625, |
|
"loss": 0.5172, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.11311912536621094, |
|
"rewards/margins": 1.0069270133972168, |
|
"rewards/rejected": -0.8938078880310059, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"eval_logits/chosen": -2.7371084690093994, |
|
"eval_logits/rejected": -2.717339038848877, |
|
"eval_logps/chosen": -272.1710510253906, |
|
"eval_logps/rejected": -276.8697204589844, |
|
"eval_loss": 0.5089952945709229, |
|
"eval_rewards/accuracies": 0.7182539701461792, |
|
"eval_rewards/chosen": -0.02998838946223259, |
|
"eval_rewards/margins": 0.8931422829627991, |
|
"eval_rewards/rejected": -0.9231306910514832, |
|
"eval_runtime": 353.2424, |
|
"eval_samples_per_second": 5.662, |
|
"eval_steps_per_second": 0.178, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 14.40329647064209, |
|
"learning_rate": 3.4336633249862084e-07, |
|
"logits/chosen": -2.6875698566436768, |
|
"logits/rejected": -2.688908100128174, |
|
"logps/chosen": -262.572998046875, |
|
"logps/rejected": -263.0613708496094, |
|
"loss": 0.5058, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.08515429496765137, |
|
"rewards/margins": 0.9841421246528625, |
|
"rewards/rejected": -0.8989877700805664, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 13.108388900756836, |
|
"learning_rate": 2.98573068519539e-07, |
|
"logits/chosen": -2.733366012573242, |
|
"logits/rejected": -2.6849071979522705, |
|
"logps/chosen": -281.99212646484375, |
|
"logps/rejected": -253.0430145263672, |
|
"loss": 0.4671, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.06205648183822632, |
|
"rewards/margins": 1.0704829692840576, |
|
"rewards/rejected": -1.0084264278411865, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 12.790390014648438, |
|
"learning_rate": 2.5672401793681854e-07, |
|
"logits/chosen": -2.754135847091675, |
|
"logits/rejected": -2.7304673194885254, |
|
"logps/chosen": -255.24227905273438, |
|
"logps/rejected": -233.9311981201172, |
|
"loss": 0.4682, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.1760888546705246, |
|
"rewards/margins": 1.146909236907959, |
|
"rewards/rejected": -0.970820426940918, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 12.2880220413208, |
|
"learning_rate": 2.178751501463036e-07, |
|
"logits/chosen": -2.703233480453491, |
|
"logits/rejected": -2.676057815551758, |
|
"logps/chosen": -288.25238037109375, |
|
"logps/rejected": -258.7244567871094, |
|
"loss": 0.4983, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.12756434082984924, |
|
"rewards/margins": 1.043333649635315, |
|
"rewards/rejected": -0.9157692193984985, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 18.457061767578125, |
|
"learning_rate": 1.820784220652766e-07, |
|
"logits/chosen": -2.769766330718994, |
|
"logits/rejected": -2.749628782272339, |
|
"logps/chosen": -275.16302490234375, |
|
"logps/rejected": -264.35260009765625, |
|
"loss": 0.5438, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.1516454517841339, |
|
"rewards/margins": 1.0686285495758057, |
|
"rewards/rejected": -0.9169832468032837, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 20.659849166870117, |
|
"learning_rate": 1.4938170864468636e-07, |
|
"logits/chosen": -2.7188377380371094, |
|
"logits/rejected": -2.7116434574127197, |
|
"logps/chosen": -265.40045166015625, |
|
"logps/rejected": -279.65716552734375, |
|
"loss": 0.4835, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.013210540637373924, |
|
"rewards/margins": 1.0902684926986694, |
|
"rewards/rejected": -1.0770580768585205, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 15.20347785949707, |
|
"learning_rate": 1.1982873884064466e-07, |
|
"logits/chosen": -2.6989123821258545, |
|
"logits/rejected": -2.668877124786377, |
|
"logps/chosen": -265.70562744140625, |
|
"logps/rejected": -227.8814239501953, |
|
"loss": 0.5241, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.006173081696033478, |
|
"rewards/margins": 0.8927028775215149, |
|
"rewards/rejected": -0.8988760113716125, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 13.230890274047852, |
|
"learning_rate": 9.345903713082305e-08, |
|
"logits/chosen": -2.7578580379486084, |
|
"logits/rejected": -2.700892210006714, |
|
"logps/chosen": -257.47833251953125, |
|
"logps/rejected": -217.13882446289062, |
|
"loss": 0.4941, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.035571496933698654, |
|
"rewards/margins": 1.0173650979995728, |
|
"rewards/rejected": -1.0529365539550781, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 13.502944946289062, |
|
"learning_rate": 7.030787065396866e-08, |
|
"logits/chosen": -2.709721088409424, |
|
"logits/rejected": -2.6791884899139404, |
|
"logps/chosen": -248.15966796875, |
|
"logps/rejected": -225.49337768554688, |
|
"loss": 0.5095, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.09806522727012634, |
|
"rewards/margins": 0.9047859907150269, |
|
"rewards/rejected": -0.8067208528518677, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 13.781939506530762, |
|
"learning_rate": 5.0406202043228604e-08, |
|
"logits/chosen": -2.698310375213623, |
|
"logits/rejected": -2.6684579849243164, |
|
"logps/chosen": -279.3674621582031, |
|
"logps/rejected": -252.4850311279297, |
|
"loss": 0.5173, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.18393948674201965, |
|
"rewards/margins": 0.9908119440078735, |
|
"rewards/rejected": -0.8068723678588867, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"eval_logits/chosen": -2.737166404724121, |
|
"eval_logits/rejected": -2.7174251079559326, |
|
"eval_logps/chosen": -271.87890625, |
|
"eval_logps/rejected": -276.7181396484375, |
|
"eval_loss": 0.5083790421485901, |
|
"eval_rewards/accuracies": 0.7222222089767456, |
|
"eval_rewards/chosen": -0.0007740448345430195, |
|
"eval_rewards/margins": 0.9071968197822571, |
|
"eval_rewards/rejected": -0.9079708456993103, |
|
"eval_runtime": 353.3783, |
|
"eval_samples_per_second": 5.66, |
|
"eval_steps_per_second": 0.178, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 12.07172679901123, |
|
"learning_rate": 3.378064801637687e-08, |
|
"logits/chosen": -2.7228991985321045, |
|
"logits/rejected": -2.7141544818878174, |
|
"logps/chosen": -273.98095703125, |
|
"logps/rejected": -259.7334899902344, |
|
"loss": 0.4743, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.030942853540182114, |
|
"rewards/margins": 1.1606462001800537, |
|
"rewards/rejected": -1.1297032833099365, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 13.161744117736816, |
|
"learning_rate": 2.0453443778310766e-08, |
|
"logits/chosen": -2.7474637031555176, |
|
"logits/rejected": -2.731945753097534, |
|
"logps/chosen": -264.48785400390625, |
|
"logps/rejected": -259.0505065917969, |
|
"loss": 0.515, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.009974095039069653, |
|
"rewards/margins": 1.0056250095367432, |
|
"rewards/rejected": -0.9956509470939636, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 13.763269424438477, |
|
"learning_rate": 1.0442413283435759e-08, |
|
"logits/chosen": -2.752173900604248, |
|
"logits/rejected": -2.7249786853790283, |
|
"logps/chosen": -276.56597900390625, |
|
"logps/rejected": -276.8932800292969, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.10977672040462494, |
|
"rewards/margins": 0.9989693760871887, |
|
"rewards/rejected": -0.8891927003860474, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 13.344006538391113, |
|
"learning_rate": 3.760945397705828e-09, |
|
"logits/chosen": -2.750119209289551, |
|
"logits/rejected": -2.7376656532287598, |
|
"logps/chosen": -282.4700012207031, |
|
"logps/rejected": -263.9110107421875, |
|
"loss": 0.5178, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.0567372627556324, |
|
"rewards/margins": 1.0393383502960205, |
|
"rewards/rejected": -0.9826010465621948, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 16.06287956237793, |
|
"learning_rate": 4.1797599220405605e-10, |
|
"logits/chosen": -2.705085515975952, |
|
"logits/rejected": -2.6794345378875732, |
|
"logps/chosen": -257.38623046875, |
|
"logps/rejected": -263.4278259277344, |
|
"loss": 0.505, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.005693820305168629, |
|
"rewards/margins": 0.9306586384773254, |
|
"rewards/rejected": -0.936352550983429, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9997382884061764, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5278735879828168, |
|
"train_runtime": 28474.7268, |
|
"train_samples_per_second": 2.147, |
|
"train_steps_per_second": 0.034 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 955, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|