zephyr-7b-dpo-full / trainer_state.json
wzhouad's picture
Model save
a773a48 verified
raw
history blame
No virus
25.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 478,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.0416666666666666e-08,
"logits/chosen": -2.847970962524414,
"logits/rejected": -2.79160213470459,
"logps/chosen": -284.9612731933594,
"logps/rejected": -276.45928955078125,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.02,
"learning_rate": 1.0416666666666667e-07,
"logits/chosen": -2.754901647567749,
"logits/rejected": -2.7529661655426025,
"logps/chosen": -249.956298828125,
"logps/rejected": -223.05245971679688,
"loss": 0.6931,
"rewards/accuracies": 0.3958333432674408,
"rewards/chosen": -8.542059367755428e-05,
"rewards/margins": -4.0294162317877635e-05,
"rewards/rejected": -4.512643499765545e-05,
"step": 10
},
{
"epoch": 0.04,
"learning_rate": 2.0833333333333333e-07,
"logits/chosen": -2.7449066638946533,
"logits/rejected": -2.745481014251709,
"logps/chosen": -257.4268493652344,
"logps/rejected": -247.520751953125,
"loss": 0.6925,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": 0.00028673160704784095,
"rewards/margins": 0.0011877163778990507,
"rewards/rejected": -0.0009009848581627011,
"step": 20
},
{
"epoch": 0.06,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -2.8009085655212402,
"logits/rejected": -2.7534918785095215,
"logps/chosen": -300.4103088378906,
"logps/rejected": -261.89532470703125,
"loss": 0.6882,
"rewards/accuracies": 0.71875,
"rewards/chosen": 0.0016673363279551268,
"rewards/margins": 0.009702490642666817,
"rewards/rejected": -0.008035155013203621,
"step": 30
},
{
"epoch": 0.08,
"learning_rate": 4.1666666666666667e-07,
"logits/chosen": -2.7635364532470703,
"logits/rejected": -2.751422882080078,
"logps/chosen": -256.6298522949219,
"logps/rejected": -274.86297607421875,
"loss": 0.6805,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.0019601243548095226,
"rewards/margins": 0.025836413726210594,
"rewards/rejected": -0.027796542271971703,
"step": 40
},
{
"epoch": 0.1,
"learning_rate": 4.999733114418725e-07,
"logits/chosen": -2.7672626972198486,
"logits/rejected": -2.7396867275238037,
"logps/chosen": -284.4268798828125,
"logps/rejected": -256.52667236328125,
"loss": 0.6675,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.023474793881177902,
"rewards/margins": 0.06475953012704849,
"rewards/rejected": -0.0882343202829361,
"step": 50
},
{
"epoch": 0.13,
"learning_rate": 4.990398100856366e-07,
"logits/chosen": -2.7358150482177734,
"logits/rejected": -2.724313259124756,
"logps/chosen": -281.9308166503906,
"logps/rejected": -256.6224670410156,
"loss": 0.6443,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.060463108122348785,
"rewards/margins": 0.1052827388048172,
"rewards/rejected": -0.1657458394765854,
"step": 60
},
{
"epoch": 0.15,
"learning_rate": 4.967775735898179e-07,
"logits/chosen": -2.781935453414917,
"logits/rejected": -2.739537000656128,
"logps/chosen": -291.1555480957031,
"logps/rejected": -273.9505920410156,
"loss": 0.6246,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.24020154774188995,
"rewards/margins": 0.17989788949489594,
"rewards/rejected": -0.4200994074344635,
"step": 70
},
{
"epoch": 0.17,
"learning_rate": 4.931986719649298e-07,
"logits/chosen": -2.782163143157959,
"logits/rejected": -2.7544727325439453,
"logps/chosen": -290.7063903808594,
"logps/rejected": -333.33160400390625,
"loss": 0.5953,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.30353400111198425,
"rewards/margins": 0.3068069517612457,
"rewards/rejected": -0.61034095287323,
"step": 80
},
{
"epoch": 0.19,
"learning_rate": 4.883222001996351e-07,
"logits/chosen": -2.8103935718536377,
"logits/rejected": -2.7860381603240967,
"logps/chosen": -309.4369201660156,
"logps/rejected": -328.04937744140625,
"loss": 0.5871,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.34070074558258057,
"rewards/margins": 0.4278062880039215,
"rewards/rejected": -0.7685070037841797,
"step": 90
},
{
"epoch": 0.21,
"learning_rate": 4.821741763807186e-07,
"logits/chosen": -2.775650978088379,
"logits/rejected": -2.742344379425049,
"logps/chosen": -354.2271423339844,
"logps/rejected": -372.828369140625,
"loss": 0.5691,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.6526215672492981,
"rewards/margins": 0.4535134732723236,
"rewards/rejected": -1.1061351299285889,
"step": 100
},
{
"epoch": 0.21,
"eval_logits/chosen": -2.7190756797790527,
"eval_logits/rejected": -2.702101707458496,
"eval_logps/chosen": -322.6109924316406,
"eval_logps/rejected": -376.20880126953125,
"eval_loss": 0.5829024910926819,
"eval_rewards/accuracies": 0.7421875,
"eval_rewards/chosen": -0.6557134985923767,
"eval_rewards/margins": 0.5328419208526611,
"eval_rewards/rejected": -1.188555359840393,
"eval_runtime": 53.0851,
"eval_samples_per_second": 37.675,
"eval_steps_per_second": 0.603,
"step": 100
},
{
"epoch": 0.23,
"learning_rate": 4.747874028753375e-07,
"logits/chosen": -2.667227268218994,
"logits/rejected": -2.6603758335113525,
"logps/chosen": -321.42108154296875,
"logps/rejected": -396.7526550292969,
"loss": 0.5384,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.5715780258178711,
"rewards/margins": 0.6688358187675476,
"rewards/rejected": -1.2404139041900635,
"step": 110
},
{
"epoch": 0.25,
"learning_rate": 4.662012913161997e-07,
"logits/chosen": -2.622821807861328,
"logits/rejected": -2.583700656890869,
"logps/chosen": -340.69219970703125,
"logps/rejected": -375.4017333984375,
"loss": 0.5579,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.7315243482589722,
"rewards/margins": 0.5486994981765747,
"rewards/rejected": -1.2802238464355469,
"step": 120
},
{
"epoch": 0.27,
"learning_rate": 4.5646165232345103e-07,
"logits/chosen": -2.5822339057922363,
"logits/rejected": -2.547309398651123,
"logps/chosen": -359.7410583496094,
"logps/rejected": -351.17999267578125,
"loss": 0.5523,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.6760958433151245,
"rewards/margins": 0.4332718849182129,
"rewards/rejected": -1.1093676090240479,
"step": 130
},
{
"epoch": 0.29,
"learning_rate": 4.456204510851956e-07,
"logits/chosen": -2.458064079284668,
"logits/rejected": -2.434985637664795,
"logps/chosen": -344.94622802734375,
"logps/rejected": -373.15277099609375,
"loss": 0.5431,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.9658713340759277,
"rewards/margins": 0.568038821220398,
"rewards/rejected": -1.5339101552963257,
"step": 140
},
{
"epoch": 0.31,
"learning_rate": 4.337355301007335e-07,
"logits/chosen": -2.430382490158081,
"logits/rejected": -2.411181926727295,
"logps/chosen": -362.24664306640625,
"logps/rejected": -394.7173767089844,
"loss": 0.541,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.970133900642395,
"rewards/margins": 0.5773912668228149,
"rewards/rejected": -1.5475252866744995,
"step": 150
},
{
"epoch": 0.33,
"learning_rate": 4.2087030056579986e-07,
"logits/chosen": -2.3705012798309326,
"logits/rejected": -2.3451476097106934,
"logps/chosen": -340.9483947753906,
"logps/rejected": -381.2392883300781,
"loss": 0.5488,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.7996856570243835,
"rewards/margins": 0.6973718404769897,
"rewards/rejected": -1.497057557106018,
"step": 160
},
{
"epoch": 0.36,
"learning_rate": 4.070934040463998e-07,
"logits/chosen": -2.298063278198242,
"logits/rejected": -2.2643802165985107,
"logps/chosen": -356.18292236328125,
"logps/rejected": -401.3460998535156,
"loss": 0.5395,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.8752641677856445,
"rewards/margins": 0.6319175958633423,
"rewards/rejected": -1.5071817636489868,
"step": 170
},
{
"epoch": 0.38,
"learning_rate": 3.9247834624635404e-07,
"logits/chosen": -2.3489673137664795,
"logits/rejected": -2.294405937194824,
"logps/chosen": -366.259765625,
"logps/rejected": -413.059326171875,
"loss": 0.5228,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.8981040716171265,
"rewards/margins": 0.7530413866043091,
"rewards/rejected": -1.651145339012146,
"step": 180
},
{
"epoch": 0.4,
"learning_rate": 3.7710310482256523e-07,
"logits/chosen": -2.22472882270813,
"logits/rejected": -2.1942319869995117,
"logps/chosen": -390.96893310546875,
"logps/rejected": -435.68634033203125,
"loss": 0.5221,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -1.3375661373138428,
"rewards/margins": 0.6510864496231079,
"rewards/rejected": -1.9886524677276611,
"step": 190
},
{
"epoch": 0.42,
"learning_rate": 3.610497133404795e-07,
"logits/chosen": -2.330658197402954,
"logits/rejected": -2.253397226333618,
"logps/chosen": -424.68511962890625,
"logps/rejected": -460.4125061035156,
"loss": 0.5446,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.9010859727859497,
"rewards/margins": 0.9040181040763855,
"rewards/rejected": -1.8051040172576904,
"step": 200
},
{
"epoch": 0.42,
"eval_logits/chosen": -2.273806571960449,
"eval_logits/rejected": -2.2433524131774902,
"eval_logps/chosen": -338.0599365234375,
"eval_logps/rejected": -420.1078186035156,
"eval_loss": 0.5300609469413757,
"eval_rewards/accuracies": 0.78125,
"eval_rewards/chosen": -0.810202956199646,
"eval_rewards/margins": 0.8173429369926453,
"eval_rewards/rejected": -1.6275460720062256,
"eval_runtime": 53.0552,
"eval_samples_per_second": 37.697,
"eval_steps_per_second": 0.603,
"step": 200
},
{
"epoch": 0.44,
"learning_rate": 3.4440382358952115e-07,
"logits/chosen": -2.26928448677063,
"logits/rejected": -2.201911449432373,
"logps/chosen": -353.4331970214844,
"logps/rejected": -383.96044921875,
"loss": 0.5455,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.8622655868530273,
"rewards/margins": 0.5730525255203247,
"rewards/rejected": -1.4353179931640625,
"step": 210
},
{
"epoch": 0.46,
"learning_rate": 3.272542485937368e-07,
"logits/chosen": -2.2439053058624268,
"logits/rejected": -2.206618070602417,
"logps/chosen": -370.7458190917969,
"logps/rejected": -391.848388671875,
"loss": 0.5253,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.7618538737297058,
"rewards/margins": 0.7462855577468872,
"rewards/rejected": -1.5081393718719482,
"step": 220
},
{
"epoch": 0.48,
"learning_rate": 3.096924887558854e-07,
"logits/chosen": -2.1762518882751465,
"logits/rejected": -2.1476693153381348,
"logps/chosen": -382.38946533203125,
"logps/rejected": -465.69561767578125,
"loss": 0.5132,
"rewards/accuracies": 0.71875,
"rewards/chosen": -1.005793571472168,
"rewards/margins": 0.7425030469894409,
"rewards/rejected": -1.7482967376708984,
"step": 230
},
{
"epoch": 0.5,
"learning_rate": 2.9181224366319943e-07,
"logits/chosen": -2.1192374229431152,
"logits/rejected": -2.0674259662628174,
"logps/chosen": -391.3011474609375,
"logps/rejected": -484.4254455566406,
"loss": 0.5263,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -1.375982642173767,
"rewards/margins": 0.8829982876777649,
"rewards/rejected": -2.2589809894561768,
"step": 240
},
{
"epoch": 0.52,
"learning_rate": 2.7370891215954565e-07,
"logits/chosen": -2.1064059734344482,
"logits/rejected": -2.0222904682159424,
"logps/chosen": -397.3945007324219,
"logps/rejected": -454.42340087890625,
"loss": 0.5111,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -1.3846924304962158,
"rewards/margins": 0.8052938580513,
"rewards/rejected": -2.18998646736145,
"step": 250
},
{
"epoch": 0.54,
"learning_rate": 2.55479083351317e-07,
"logits/chosen": -2.0734519958496094,
"logits/rejected": -2.041645050048828,
"logps/chosen": -403.8518371582031,
"logps/rejected": -443.9764099121094,
"loss": 0.5362,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -1.352430820465088,
"rewards/margins": 0.6026407480239868,
"rewards/rejected": -1.9550716876983643,
"step": 260
},
{
"epoch": 0.56,
"learning_rate": 2.3722002126275822e-07,
"logits/chosen": -2.0378193855285645,
"logits/rejected": -2.006934881210327,
"logps/chosen": -402.4918518066406,
"logps/rejected": -457.62811279296875,
"loss": 0.5152,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -1.2763839960098267,
"rewards/margins": 0.744287371635437,
"rewards/rejected": -2.0206713676452637,
"step": 270
},
{
"epoch": 0.59,
"learning_rate": 2.19029145890313e-07,
"logits/chosen": -2.033855438232422,
"logits/rejected": -1.9725334644317627,
"logps/chosen": -366.2498474121094,
"logps/rejected": -433.2369079589844,
"loss": 0.5284,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -1.2484426498413086,
"rewards/margins": 0.7818558812141418,
"rewards/rejected": -2.0302984714508057,
"step": 280
},
{
"epoch": 0.61,
"learning_rate": 2.0100351342479216e-07,
"logits/chosen": -2.018475294113159,
"logits/rejected": -1.949302077293396,
"logps/chosen": -367.6812438964844,
"logps/rejected": -429.4832458496094,
"loss": 0.5041,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": -1.210700273513794,
"rewards/margins": 0.7417057752609253,
"rewards/rejected": -1.9524061679840088,
"step": 290
},
{
"epoch": 0.63,
"learning_rate": 1.8323929841460178e-07,
"logits/chosen": -2.01090145111084,
"logits/rejected": -1.9497419595718384,
"logps/chosen": -396.8717956542969,
"logps/rejected": -473.7056579589844,
"loss": 0.5094,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -1.406031847000122,
"rewards/margins": 0.7575126886367798,
"rewards/rejected": -2.1635446548461914,
"step": 300
},
{
"epoch": 0.63,
"eval_logits/chosen": -2.0371742248535156,
"eval_logits/rejected": -1.9920138120651245,
"eval_logps/chosen": -394.5289611816406,
"eval_logps/rejected": -488.7168884277344,
"eval_loss": 0.514569878578186,
"eval_rewards/accuracies": 0.765625,
"eval_rewards/chosen": -1.374893307685852,
"eval_rewards/margins": 0.9387427568435669,
"eval_rewards/rejected": -2.313636064529419,
"eval_runtime": 53.0256,
"eval_samples_per_second": 37.718,
"eval_steps_per_second": 0.603,
"step": 300
},
{
"epoch": 0.65,
"learning_rate": 1.6583128063291573e-07,
"logits/chosen": -1.9743964672088623,
"logits/rejected": -1.8795156478881836,
"logps/chosen": -401.95098876953125,
"logps/rejected": -473.07586669921875,
"loss": 0.4934,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": -1.2877211570739746,
"rewards/margins": 0.9713341593742371,
"rewards/rejected": -2.2590553760528564,
"step": 310
},
{
"epoch": 0.67,
"learning_rate": 1.488723393865766e-07,
"logits/chosen": -2.009753704071045,
"logits/rejected": -1.9591827392578125,
"logps/chosen": -424.99468994140625,
"logps/rejected": -447.1941833496094,
"loss": 0.5096,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -1.286123275756836,
"rewards/margins": 0.8317530751228333,
"rewards/rejected": -2.1178765296936035,
"step": 320
},
{
"epoch": 0.69,
"learning_rate": 1.3245295796480788e-07,
"logits/chosen": -2.021080493927002,
"logits/rejected": -1.9558074474334717,
"logps/chosen": -386.18670654296875,
"logps/rejected": -441.7825622558594,
"loss": 0.5108,
"rewards/accuracies": 0.75,
"rewards/chosen": -1.3397352695465088,
"rewards/margins": 0.7417815923690796,
"rewards/rejected": -2.081516742706299,
"step": 330
},
{
"epoch": 0.71,
"learning_rate": 1.1666074087171627e-07,
"logits/chosen": -1.9722802639007568,
"logits/rejected": -1.9194387197494507,
"logps/chosen": -390.5426330566406,
"logps/rejected": -470.82958984375,
"loss": 0.5234,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -1.3929929733276367,
"rewards/margins": 0.8960745930671692,
"rewards/rejected": -2.2890677452087402,
"step": 340
},
{
"epoch": 0.73,
"learning_rate": 1.0157994641835734e-07,
"logits/chosen": -1.9723567962646484,
"logits/rejected": -1.9255586862564087,
"logps/chosen": -353.8846740722656,
"logps/rejected": -450.743408203125,
"loss": 0.4932,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -1.279344916343689,
"rewards/margins": 0.8211178779602051,
"rewards/rejected": -2.1004626750946045,
"step": 350
},
{
"epoch": 0.75,
"learning_rate": 8.729103716819111e-08,
"logits/chosen": -1.9666541814804077,
"logits/rejected": -1.8845767974853516,
"logps/chosen": -398.8426818847656,
"logps/rejected": -476.284912109375,
"loss": 0.4746,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -1.3157447576522827,
"rewards/margins": 1.0537182092666626,
"rewards/rejected": -2.3694632053375244,
"step": 360
},
{
"epoch": 0.77,
"learning_rate": 7.387025063449081e-08,
"logits/chosen": -1.922550916671753,
"logits/rejected": -1.8920552730560303,
"logps/chosen": -385.36676025390625,
"logps/rejected": -481.94219970703125,
"loss": 0.4884,
"rewards/accuracies": 0.75,
"rewards/chosen": -1.3011926412582397,
"rewards/margins": 0.9755498766899109,
"rewards/rejected": -2.2767422199249268,
"step": 370
},
{
"epoch": 0.79,
"learning_rate": 6.138919252022435e-08,
"logits/chosen": -1.964270830154419,
"logits/rejected": -1.9201478958129883,
"logps/chosen": -422.5608825683594,
"logps/rejected": -470.6983337402344,
"loss": 0.4982,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -1.5673155784606934,
"rewards/margins": 0.7714017629623413,
"rewards/rejected": -2.338717460632324,
"step": 380
},
{
"epoch": 0.82,
"learning_rate": 4.991445467064689e-08,
"logits/chosen": -1.9478000402450562,
"logits/rejected": -1.9133468866348267,
"logps/chosen": -396.41827392578125,
"logps/rejected": -474.91168212890625,
"loss": 0.4906,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -1.392188549041748,
"rewards/margins": 0.9372695684432983,
"rewards/rejected": -2.329457998275757,
"step": 390
},
{
"epoch": 0.84,
"learning_rate": 3.9507259776993954e-08,
"logits/chosen": -2.014727830886841,
"logits/rejected": -1.9725955724716187,
"logps/chosen": -433.93402099609375,
"logps/rejected": -496.03948974609375,
"loss": 0.5086,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -1.337740182876587,
"rewards/margins": 0.89354008436203,
"rewards/rejected": -2.231280565261841,
"step": 400
},
{
"epoch": 0.84,
"eval_logits/chosen": -2.0268406867980957,
"eval_logits/rejected": -1.9826929569244385,
"eval_logps/chosen": -388.0500183105469,
"eval_logps/rejected": -484.0532531738281,
"eval_loss": 0.5034094452857971,
"eval_rewards/accuracies": 0.76953125,
"eval_rewards/chosen": -1.3101037740707397,
"eval_rewards/margins": 0.9568960070610046,
"eval_rewards/rejected": -2.2669999599456787,
"eval_runtime": 53.0612,
"eval_samples_per_second": 37.692,
"eval_steps_per_second": 0.603,
"step": 400
},
{
"epoch": 0.86,
"learning_rate": 3.022313472693447e-08,
"logits/chosen": -1.9932317733764648,
"logits/rejected": -1.9669653177261353,
"logps/chosen": -391.12274169921875,
"logps/rejected": -434.02191162109375,
"loss": 0.5097,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -1.3643336296081543,
"rewards/margins": 0.6512311100959778,
"rewards/rejected": -2.0155646800994873,
"step": 410
},
{
"epoch": 0.88,
"learning_rate": 2.2111614344599684e-08,
"logits/chosen": -2.064518928527832,
"logits/rejected": -1.9801286458969116,
"logps/chosen": -398.71868896484375,
"logps/rejected": -479.0596618652344,
"loss": 0.4848,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -1.2102010250091553,
"rewards/margins": 1.0837668180465698,
"rewards/rejected": -2.2939677238464355,
"step": 420
},
{
"epoch": 0.9,
"learning_rate": 1.521597710086439e-08,
"logits/chosen": -2.049975633621216,
"logits/rejected": -1.996206521987915,
"logps/chosen": -411.322509765625,
"logps/rejected": -459.893798828125,
"loss": 0.492,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -1.363693356513977,
"rewards/margins": 0.7787196636199951,
"rewards/rejected": -2.1424131393432617,
"step": 430
},
{
"epoch": 0.92,
"learning_rate": 9.57301420397924e-09,
"logits/chosen": -1.9763036966323853,
"logits/rejected": -1.950627326965332,
"logps/chosen": -419.8603515625,
"logps/rejected": -458.17822265625,
"loss": 0.4956,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -1.4159471988677979,
"rewards/margins": 0.7630717158317566,
"rewards/rejected": -2.179018974304199,
"step": 440
},
{
"epoch": 0.94,
"learning_rate": 5.212833302556258e-09,
"logits/chosen": -2.0032382011413574,
"logits/rejected": -1.9466326236724854,
"logps/chosen": -413.5555114746094,
"logps/rejected": -492.5790100097656,
"loss": 0.4873,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -1.3693794012069702,
"rewards/margins": 0.907731831073761,
"rewards/rejected": -2.277111291885376,
"step": 450
},
{
"epoch": 0.96,
"learning_rate": 2.158697848236607e-09,
"logits/chosen": -1.964643120765686,
"logits/rejected": -1.9253301620483398,
"logps/chosen": -396.19683837890625,
"logps/rejected": -466.6449279785156,
"loss": 0.4853,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -1.3089487552642822,
"rewards/margins": 0.8880389332771301,
"rewards/rejected": -2.1969876289367676,
"step": 460
},
{
"epoch": 0.98,
"learning_rate": 4.269029751107489e-10,
"logits/chosen": -2.0099263191223145,
"logits/rejected": -1.9355924129486084,
"logps/chosen": -420.68408203125,
"logps/rejected": -471.353515625,
"loss": 0.4977,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -1.3482139110565186,
"rewards/margins": 1.0080922842025757,
"rewards/rejected": -2.356306314468384,
"step": 470
},
{
"epoch": 1.0,
"step": 478,
"total_flos": 0.0,
"train_loss": 0.5420855548092511,
"train_runtime": 4282.9885,
"train_samples_per_second": 14.274,
"train_steps_per_second": 0.112
}
],
"logging_steps": 10,
"max_steps": 478,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}