|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 478, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 49.891043665102934, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -2.7660439014434814, |
|
"logits/rejected": -2.717564582824707, |
|
"logps/chosen": -269.8568420410156, |
|
"logps/rejected": -360.52459716796875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 46.946091297352105, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.592543125152588, |
|
"logits/rejected": -2.56319522857666, |
|
"logps/chosen": -264.7040100097656, |
|
"logps/rejected": -251.515625, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4791666567325592, |
|
"rewards/chosen": 0.004693002440035343, |
|
"rewards/margins": 0.0028277651872485876, |
|
"rewards/rejected": 0.0018652371363714337, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 41.817724108185395, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.65449595451355, |
|
"logits/rejected": -2.6068952083587646, |
|
"logps/chosen": -280.5221252441406, |
|
"logps/rejected": -295.92376708984375, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.05156273767352104, |
|
"rewards/margins": 0.00740828737616539, |
|
"rewards/rejected": 0.04415445029735565, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 39.81553425430633, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.6671488285064697, |
|
"logits/rejected": -2.5955922603607178, |
|
"logps/chosen": -296.41644287109375, |
|
"logps/rejected": -260.6401672363281, |
|
"loss": 0.6733, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.2127685844898224, |
|
"rewards/margins": 0.04726782441139221, |
|
"rewards/rejected": 0.16550076007843018, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 38.58454153774096, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.5658886432647705, |
|
"logits/rejected": -2.5324325561523438, |
|
"logps/chosen": -259.78594970703125, |
|
"logps/rejected": -241.00991821289062, |
|
"loss": 0.6399, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.3669721484184265, |
|
"rewards/margins": 0.19786901772022247, |
|
"rewards/rejected": 0.16910310089588165, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 37.351752662935816, |
|
"learning_rate": 4.999733114418725e-07, |
|
"logits/chosen": -2.5195257663726807, |
|
"logits/rejected": -2.4827651977539062, |
|
"logps/chosen": -273.65081787109375, |
|
"logps/rejected": -290.78680419921875, |
|
"loss": 0.6094, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.304054319858551, |
|
"rewards/margins": 0.2041884958744049, |
|
"rewards/rejected": 0.09986577928066254, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 39.61129660699584, |
|
"learning_rate": 4.990398100856366e-07, |
|
"logits/chosen": -2.567991018295288, |
|
"logits/rejected": -2.5036864280700684, |
|
"logps/chosen": -260.38055419921875, |
|
"logps/rejected": -294.011474609375, |
|
"loss": 0.6013, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.5612996220588684, |
|
"rewards/margins": 0.3578048348426819, |
|
"rewards/rejected": 0.20349478721618652, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 41.460696281749556, |
|
"learning_rate": 4.967775735898179e-07, |
|
"logits/chosen": -2.460195302963257, |
|
"logits/rejected": -2.46120023727417, |
|
"logps/chosen": -253.1399383544922, |
|
"logps/rejected": -253.4242706298828, |
|
"loss": 0.5693, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.6238263845443726, |
|
"rewards/margins": 0.4591788649559021, |
|
"rewards/rejected": 0.16464750468730927, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 61.37030849441711, |
|
"learning_rate": 4.931986719649298e-07, |
|
"logits/chosen": -2.615948438644409, |
|
"logits/rejected": -2.5394978523254395, |
|
"logps/chosen": -311.7240295410156, |
|
"logps/rejected": -263.1805725097656, |
|
"loss": 0.5671, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.5546952486038208, |
|
"rewards/margins": 0.5107932686805725, |
|
"rewards/rejected": 0.04390193149447441, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 39.59727717104598, |
|
"learning_rate": 4.883222001996351e-07, |
|
"logits/chosen": -2.5085294246673584, |
|
"logits/rejected": -2.4543616771698, |
|
"logps/chosen": -251.203369140625, |
|
"logps/rejected": -259.8647766113281, |
|
"loss": 0.5646, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.3953971564769745, |
|
"rewards/margins": 0.7687323689460754, |
|
"rewards/rejected": -0.37333518266677856, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 36.57841721590594, |
|
"learning_rate": 4.821741763807186e-07, |
|
"logits/chosen": -2.499514102935791, |
|
"logits/rejected": -2.4649369716644287, |
|
"logps/chosen": -248.44363403320312, |
|
"logps/rejected": -257.64776611328125, |
|
"loss": 0.565, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.5957446694374084, |
|
"rewards/margins": 0.6267115473747253, |
|
"rewards/rejected": -0.03096688725054264, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -2.550398111343384, |
|
"eval_logits/rejected": -2.5104503631591797, |
|
"eval_logps/chosen": -250.69297790527344, |
|
"eval_logps/rejected": -262.7791748046875, |
|
"eval_loss": 0.5717624425888062, |
|
"eval_rewards/accuracies": 0.73828125, |
|
"eval_rewards/chosen": 0.5950239300727844, |
|
"eval_rewards/margins": 0.6006231904029846, |
|
"eval_rewards/rejected": -0.005599223077297211, |
|
"eval_runtime": 96.9486, |
|
"eval_samples_per_second": 20.629, |
|
"eval_steps_per_second": 0.33, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 51.91494841998397, |
|
"learning_rate": 4.747874028753375e-07, |
|
"logits/chosen": -2.55851149559021, |
|
"logits/rejected": -2.4656014442443848, |
|
"logps/chosen": -292.62615966796875, |
|
"logps/rejected": -258.59661865234375, |
|
"loss": 0.5713, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.5976964831352234, |
|
"rewards/margins": 0.6357330083847046, |
|
"rewards/rejected": -0.0380365327000618, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 70.69069363258822, |
|
"learning_rate": 4.662012913161997e-07, |
|
"logits/chosen": -2.4600424766540527, |
|
"logits/rejected": -2.4324684143066406, |
|
"logps/chosen": -270.7308349609375, |
|
"logps/rejected": -260.5433349609375, |
|
"loss": 0.5497, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5218156576156616, |
|
"rewards/margins": 0.5561539530754089, |
|
"rewards/rejected": -0.03433822840452194, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 42.312370253489476, |
|
"learning_rate": 4.5646165232345103e-07, |
|
"logits/chosen": -2.464791774749756, |
|
"logits/rejected": -2.439894676208496, |
|
"logps/chosen": -268.9382019042969, |
|
"logps/rejected": -269.9627685546875, |
|
"loss": 0.5423, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.6532469987869263, |
|
"rewards/margins": 0.7295945882797241, |
|
"rewards/rejected": -0.07634757459163666, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 40.45859260542855, |
|
"learning_rate": 4.456204510851956e-07, |
|
"logits/chosen": -2.5265681743621826, |
|
"logits/rejected": -2.485774517059326, |
|
"logps/chosen": -303.1440124511719, |
|
"logps/rejected": -301.68914794921875, |
|
"loss": 0.5376, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.6410696506500244, |
|
"rewards/margins": 0.6916864514350891, |
|
"rewards/rejected": -0.0506168007850647, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 41.1747855806655, |
|
"learning_rate": 4.337355301007335e-07, |
|
"logits/chosen": -2.5189616680145264, |
|
"logits/rejected": -2.4531705379486084, |
|
"logps/chosen": -272.0736999511719, |
|
"logps/rejected": -276.2969055175781, |
|
"loss": 0.5442, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5575242042541504, |
|
"rewards/margins": 0.5619192719459534, |
|
"rewards/rejected": -0.004395070485770702, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 48.726180323544725, |
|
"learning_rate": 4.2087030056579986e-07, |
|
"logits/chosen": -2.5174994468688965, |
|
"logits/rejected": -2.43558406829834, |
|
"logps/chosen": -260.0892028808594, |
|
"logps/rejected": -260.7149658203125, |
|
"loss": 0.5652, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.6489425897598267, |
|
"rewards/margins": 0.8134964108467102, |
|
"rewards/rejected": -0.16455380618572235, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 49.53825953706789, |
|
"learning_rate": 4.070934040463998e-07, |
|
"logits/chosen": -2.4509148597717285, |
|
"logits/rejected": -2.3897039890289307, |
|
"logps/chosen": -239.52261352539062, |
|
"logps/rejected": -233.6277618408203, |
|
"loss": 0.5489, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.49063143134117126, |
|
"rewards/margins": 0.6157802939414978, |
|
"rewards/rejected": -0.12514881789684296, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 51.08561061111303, |
|
"learning_rate": 3.9247834624635404e-07, |
|
"logits/chosen": -2.3483898639678955, |
|
"logits/rejected": -2.306784152984619, |
|
"logps/chosen": -247.6396026611328, |
|
"logps/rejected": -231.8523406982422, |
|
"loss": 0.5181, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.43596941232681274, |
|
"rewards/margins": 0.6500319242477417, |
|
"rewards/rejected": -0.21406252682209015, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 42.31027201995276, |
|
"learning_rate": 3.7710310482256523e-07, |
|
"logits/chosen": -2.41634464263916, |
|
"logits/rejected": -2.378105878829956, |
|
"logps/chosen": -260.20306396484375, |
|
"logps/rejected": -261.46502685546875, |
|
"loss": 0.5392, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.5519876480102539, |
|
"rewards/margins": 0.6375012993812561, |
|
"rewards/rejected": -0.08551368862390518, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 102.86207924802177, |
|
"learning_rate": 3.610497133404795e-07, |
|
"logits/chosen": -2.392763614654541, |
|
"logits/rejected": -2.381993532180786, |
|
"logps/chosen": -249.912109375, |
|
"logps/rejected": -256.75439453125, |
|
"loss": 0.5467, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.49922746419906616, |
|
"rewards/margins": 0.7344967126846313, |
|
"rewards/rejected": -0.235269233584404, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -2.517864942550659, |
|
"eval_logits/rejected": -2.4783387184143066, |
|
"eval_logps/chosen": -249.6370849609375, |
|
"eval_logps/rejected": -264.89788818359375, |
|
"eval_loss": 0.5432960391044617, |
|
"eval_rewards/accuracies": 0.74609375, |
|
"eval_rewards/chosen": 0.6478186845779419, |
|
"eval_rewards/margins": 0.759353518486023, |
|
"eval_rewards/rejected": -0.11153475195169449, |
|
"eval_runtime": 96.4207, |
|
"eval_samples_per_second": 20.742, |
|
"eval_steps_per_second": 0.332, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 45.308290366409736, |
|
"learning_rate": 3.4440382358952115e-07, |
|
"logits/chosen": -2.4460113048553467, |
|
"logits/rejected": -2.391810894012451, |
|
"logps/chosen": -278.56781005859375, |
|
"logps/rejected": -257.2254943847656, |
|
"loss": 0.5436, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.5562152862548828, |
|
"rewards/margins": 0.8551079034805298, |
|
"rewards/rejected": -0.29889267683029175, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 50.1182470431882, |
|
"learning_rate": 3.272542485937368e-07, |
|
"logits/chosen": -2.4605488777160645, |
|
"logits/rejected": -2.42708683013916, |
|
"logps/chosen": -257.90826416015625, |
|
"logps/rejected": -253.3182830810547, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.3734419643878937, |
|
"rewards/margins": 0.7561658024787903, |
|
"rewards/rejected": -0.382723867893219, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 43.71024962971359, |
|
"learning_rate": 3.096924887558854e-07, |
|
"logits/chosen": -2.490509510040283, |
|
"logits/rejected": -2.4491913318634033, |
|
"logps/chosen": -237.17898559570312, |
|
"logps/rejected": -251.81686401367188, |
|
"loss": 0.5441, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.5843235850334167, |
|
"rewards/margins": 0.7882751226425171, |
|
"rewards/rejected": -0.20395155251026154, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 44.93616969967234, |
|
"learning_rate": 2.9181224366319943e-07, |
|
"logits/chosen": -2.533695697784424, |
|
"logits/rejected": -2.500807285308838, |
|
"logps/chosen": -253.635498046875, |
|
"logps/rejected": -253.0944061279297, |
|
"loss": 0.5142, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5032340884208679, |
|
"rewards/margins": 0.7045356035232544, |
|
"rewards/rejected": -0.2013014256954193, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 42.68904256130122, |
|
"learning_rate": 2.7370891215954565e-07, |
|
"logits/chosen": -2.483025074005127, |
|
"logits/rejected": -2.4015185832977295, |
|
"logps/chosen": -285.0963439941406, |
|
"logps/rejected": -263.43560791015625, |
|
"loss": 0.5198, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.6547069549560547, |
|
"rewards/margins": 0.850358784198761, |
|
"rewards/rejected": -0.19565197825431824, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 45.43502171857602, |
|
"learning_rate": 2.55479083351317e-07, |
|
"logits/chosen": -2.516913890838623, |
|
"logits/rejected": -2.478473424911499, |
|
"logps/chosen": -282.80230712890625, |
|
"logps/rejected": -258.77288818359375, |
|
"loss": 0.5235, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.5754625797271729, |
|
"rewards/margins": 0.8150871396064758, |
|
"rewards/rejected": -0.23962458968162537, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 41.73526734917468, |
|
"learning_rate": 2.3722002126275822e-07, |
|
"logits/chosen": -2.5381340980529785, |
|
"logits/rejected": -2.4941086769104004, |
|
"logps/chosen": -267.4333190917969, |
|
"logps/rejected": -260.50677490234375, |
|
"loss": 0.5406, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.4781308174133301, |
|
"rewards/margins": 0.6212563514709473, |
|
"rewards/rejected": -0.14312560856342316, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 48.561323508433155, |
|
"learning_rate": 2.19029145890313e-07, |
|
"logits/chosen": -2.510133743286133, |
|
"logits/rejected": -2.4422435760498047, |
|
"logps/chosen": -250.73855590820312, |
|
"logps/rejected": -247.487060546875, |
|
"loss": 0.5599, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5469261407852173, |
|
"rewards/margins": 0.8119627833366394, |
|
"rewards/rejected": -0.2650366425514221, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 44.504093632124075, |
|
"learning_rate": 2.0100351342479216e-07, |
|
"logits/chosen": -2.5589568614959717, |
|
"logits/rejected": -2.5217483043670654, |
|
"logps/chosen": -240.7520751953125, |
|
"logps/rejected": -244.8422088623047, |
|
"loss": 0.5354, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.5167636871337891, |
|
"rewards/margins": 0.661081075668335, |
|
"rewards/rejected": -0.14431743323802948, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 52.52022669231452, |
|
"learning_rate": 1.8323929841460178e-07, |
|
"logits/chosen": -2.5429511070251465, |
|
"logits/rejected": -2.472679376602173, |
|
"logps/chosen": -292.2240905761719, |
|
"logps/rejected": -266.68658447265625, |
|
"loss": 0.517, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.4385985732078552, |
|
"rewards/margins": 0.7676541209220886, |
|
"rewards/rejected": -0.329055517911911, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -2.5622596740722656, |
|
"eval_logits/rejected": -2.520256280899048, |
|
"eval_logps/chosen": -251.2219696044922, |
|
"eval_logps/rejected": -268.04449462890625, |
|
"eval_loss": 0.53697669506073, |
|
"eval_rewards/accuracies": 0.76953125, |
|
"eval_rewards/chosen": 0.5685745477676392, |
|
"eval_rewards/margins": 0.8374388217926025, |
|
"eval_rewards/rejected": -0.2688642740249634, |
|
"eval_runtime": 96.3678, |
|
"eval_samples_per_second": 20.754, |
|
"eval_steps_per_second": 0.332, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 43.866661437938184, |
|
"learning_rate": 1.6583128063291573e-07, |
|
"logits/chosen": -2.4593474864959717, |
|
"logits/rejected": -2.443233013153076, |
|
"logps/chosen": -285.5498046875, |
|
"logps/rejected": -263.8379821777344, |
|
"loss": 0.5077, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.4893050193786621, |
|
"rewards/margins": 0.7553777098655701, |
|
"rewards/rejected": -0.26607269048690796, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 43.407860217947494, |
|
"learning_rate": 1.488723393865766e-07, |
|
"logits/chosen": -2.4746253490448, |
|
"logits/rejected": -2.4388270378112793, |
|
"logps/chosen": -283.4583740234375, |
|
"logps/rejected": -250.38204956054688, |
|
"loss": 0.504, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.5105848908424377, |
|
"rewards/margins": 0.788524329662323, |
|
"rewards/rejected": -0.2779393792152405, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 40.302692173545196, |
|
"learning_rate": 1.3245295796480788e-07, |
|
"logits/chosen": -2.4712371826171875, |
|
"logits/rejected": -2.4099698066711426, |
|
"logps/chosen": -252.349853515625, |
|
"logps/rejected": -264.03912353515625, |
|
"loss": 0.5242, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.4930170178413391, |
|
"rewards/margins": 0.7200408577919006, |
|
"rewards/rejected": -0.2270239144563675, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 50.168955016672676, |
|
"learning_rate": 1.1666074087171627e-07, |
|
"logits/chosen": -2.467729091644287, |
|
"logits/rejected": -2.4046943187713623, |
|
"logps/chosen": -278.697509765625, |
|
"logps/rejected": -285.4507141113281, |
|
"loss": 0.524, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.5648467540740967, |
|
"rewards/margins": 0.8352931141853333, |
|
"rewards/rejected": -0.2704463601112366, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 46.15971070553052, |
|
"learning_rate": 1.0157994641835734e-07, |
|
"logits/chosen": -2.445666790008545, |
|
"logits/rejected": -2.377004384994507, |
|
"logps/chosen": -248.63241577148438, |
|
"logps/rejected": -248.23904418945312, |
|
"loss": 0.4924, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.47894006967544556, |
|
"rewards/margins": 0.8554509878158569, |
|
"rewards/rejected": -0.37651100754737854, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 54.17198760484943, |
|
"learning_rate": 8.729103716819111e-08, |
|
"logits/chosen": -2.4745380878448486, |
|
"logits/rejected": -2.376185178756714, |
|
"logps/chosen": -292.89483642578125, |
|
"logps/rejected": -269.1952209472656, |
|
"loss": 0.5388, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.4898607134819031, |
|
"rewards/margins": 0.8843740224838257, |
|
"rewards/rejected": -0.3945133090019226, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 44.15468601338237, |
|
"learning_rate": 7.387025063449081e-08, |
|
"logits/chosen": -2.409170150756836, |
|
"logits/rejected": -2.367518186569214, |
|
"logps/chosen": -266.35430908203125, |
|
"logps/rejected": -242.5480194091797, |
|
"loss": 0.5384, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.44893550872802734, |
|
"rewards/margins": 0.6646324992179871, |
|
"rewards/rejected": -0.21569697558879852, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 39.47383320898196, |
|
"learning_rate": 6.138919252022435e-08, |
|
"logits/chosen": -2.3523006439208984, |
|
"logits/rejected": -2.3420968055725098, |
|
"logps/chosen": -230.9795379638672, |
|
"logps/rejected": -267.8912658691406, |
|
"loss": 0.5181, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.4252557158470154, |
|
"rewards/margins": 0.8839966058731079, |
|
"rewards/rejected": -0.45874080061912537, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 48.64961363299689, |
|
"learning_rate": 4.991445467064689e-08, |
|
"logits/chosen": -2.4286305904388428, |
|
"logits/rejected": -2.394604206085205, |
|
"logps/chosen": -293.20440673828125, |
|
"logps/rejected": -287.0997009277344, |
|
"loss": 0.5149, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.6216251850128174, |
|
"rewards/margins": 0.7780872583389282, |
|
"rewards/rejected": -0.15646204352378845, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 44.38080817079193, |
|
"learning_rate": 3.9507259776993954e-08, |
|
"logits/chosen": -2.4102301597595215, |
|
"logits/rejected": -2.3357295989990234, |
|
"logps/chosen": -259.7147521972656, |
|
"logps/rejected": -273.10699462890625, |
|
"loss": 0.518, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.541024386882782, |
|
"rewards/margins": 0.8457515835762024, |
|
"rewards/rejected": -0.30472710728645325, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -2.4731171131134033, |
|
"eval_logits/rejected": -2.4323782920837402, |
|
"eval_logps/chosen": -250.02178955078125, |
|
"eval_logps/rejected": -267.0915222167969, |
|
"eval_loss": 0.5348160862922668, |
|
"eval_rewards/accuracies": 0.75390625, |
|
"eval_rewards/chosen": 0.6285843849182129, |
|
"eval_rewards/margins": 0.8498014211654663, |
|
"eval_rewards/rejected": -0.22121697664260864, |
|
"eval_runtime": 96.4764, |
|
"eval_samples_per_second": 20.73, |
|
"eval_steps_per_second": 0.332, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 48.15981350653104, |
|
"learning_rate": 3.022313472693447e-08, |
|
"logits/chosen": -2.444577932357788, |
|
"logits/rejected": -2.3699073791503906, |
|
"logps/chosen": -286.5138854980469, |
|
"logps/rejected": -274.3666687011719, |
|
"loss": 0.5226, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.6201778650283813, |
|
"rewards/margins": 0.8976529240608215, |
|
"rewards/rejected": -0.2774750590324402, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 48.573506313099124, |
|
"learning_rate": 2.2111614344599684e-08, |
|
"logits/chosen": -2.429912805557251, |
|
"logits/rejected": -2.3931796550750732, |
|
"logps/chosen": -287.13067626953125, |
|
"logps/rejected": -279.46844482421875, |
|
"loss": 0.5212, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.4971562325954437, |
|
"rewards/margins": 0.7474662065505981, |
|
"rewards/rejected": -0.25030994415283203, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 41.98038749926915, |
|
"learning_rate": 1.521597710086439e-08, |
|
"logits/chosen": -2.3573684692382812, |
|
"logits/rejected": -2.3092567920684814, |
|
"logps/chosen": -269.9436950683594, |
|
"logps/rejected": -265.4564514160156, |
|
"loss": 0.501, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.45472264289855957, |
|
"rewards/margins": 0.838543713092804, |
|
"rewards/rejected": -0.38382115960121155, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 44.22650678163462, |
|
"learning_rate": 9.57301420397924e-09, |
|
"logits/chosen": -2.4332785606384277, |
|
"logits/rejected": -2.3776473999023438, |
|
"logps/chosen": -272.65960693359375, |
|
"logps/rejected": -271.44329833984375, |
|
"loss": 0.5213, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.5838757157325745, |
|
"rewards/margins": 0.700454831123352, |
|
"rewards/rejected": -0.1165790781378746, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 43.00589727019739, |
|
"learning_rate": 5.212833302556258e-09, |
|
"logits/chosen": -2.3836779594421387, |
|
"logits/rejected": -2.360665798187256, |
|
"logps/chosen": -284.2134704589844, |
|
"logps/rejected": -312.9830627441406, |
|
"loss": 0.5099, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.4735330641269684, |
|
"rewards/margins": 0.7689631581306458, |
|
"rewards/rejected": -0.29543009400367737, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 46.86754726240038, |
|
"learning_rate": 2.158697848236607e-09, |
|
"logits/chosen": -2.417273998260498, |
|
"logits/rejected": -2.377349376678467, |
|
"logps/chosen": -262.1804504394531, |
|
"logps/rejected": -247.7431182861328, |
|
"loss": 0.5264, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.48920711874961853, |
|
"rewards/margins": 0.7419728636741638, |
|
"rewards/rejected": -0.2527657151222229, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 45.055740606082026, |
|
"learning_rate": 4.269029751107489e-10, |
|
"logits/chosen": -2.4338390827178955, |
|
"logits/rejected": -2.3758208751678467, |
|
"logps/chosen": -268.4836730957031, |
|
"logps/rejected": -289.60205078125, |
|
"loss": 0.4974, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.5107508897781372, |
|
"rewards/margins": 0.8732994794845581, |
|
"rewards/rejected": -0.3625485301017761, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 478, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5478911828795238, |
|
"train_runtime": 7553.9268, |
|
"train_samples_per_second": 8.093, |
|
"train_steps_per_second": 0.063 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 478, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|