zephyr-7b-dpo-full / trainer_state.json
wzhouad's picture
Model save
1edbb80 verified
raw
history blame
No virus
25.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 478,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.0416666666666666e-08,
"logits/chosen": -2.8386030197143555,
"logits/rejected": -2.823939323425293,
"logps/chosen": -324.3727722167969,
"logps/rejected": -231.64634704589844,
"loss": 0.2826,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.02,
"learning_rate": 1.0416666666666667e-07,
"logits/chosen": -2.8247194290161133,
"logits/rejected": -2.750765800476074,
"logps/chosen": -275.7482604980469,
"logps/rejected": -253.39404296875,
"loss": 0.2847,
"rewards/accuracies": 0.4513888955116272,
"rewards/chosen": 0.00012852638610638678,
"rewards/margins": -0.0004244056181050837,
"rewards/rejected": 0.0005529320333153009,
"step": 10
},
{
"epoch": 0.04,
"learning_rate": 2.0833333333333333e-07,
"logits/chosen": -2.7973198890686035,
"logits/rejected": -2.779845714569092,
"logps/chosen": -261.89483642578125,
"logps/rejected": -257.04736328125,
"loss": 0.2856,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.0005934558575972915,
"rewards/margins": 0.0017298649763688445,
"rewards/rejected": -0.001136409118771553,
"step": 20
},
{
"epoch": 0.06,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -2.783583164215088,
"logits/rejected": -2.777108907699585,
"logps/chosen": -294.8003234863281,
"logps/rejected": -259.10296630859375,
"loss": 0.2889,
"rewards/accuracies": 0.65625,
"rewards/chosen": 0.0027175676077604294,
"rewards/margins": 0.011478239670395851,
"rewards/rejected": -0.008760671131312847,
"step": 30
},
{
"epoch": 0.08,
"learning_rate": 4.1666666666666667e-07,
"logits/chosen": -2.802429676055908,
"logits/rejected": -2.7715487480163574,
"logps/chosen": -284.63958740234375,
"logps/rejected": -264.9128112792969,
"loss": 0.2823,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.007285858038812876,
"rewards/margins": 0.022248882800340652,
"rewards/rejected": -0.029534736648201942,
"step": 40
},
{
"epoch": 0.1,
"learning_rate": 4.999733114418725e-07,
"logits/chosen": -2.781130790710449,
"logits/rejected": -2.718773126602173,
"logps/chosen": -284.725341796875,
"logps/rejected": -255.60073852539062,
"loss": 0.2671,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.023446276783943176,
"rewards/margins": 0.06585647165775299,
"rewards/rejected": -0.08930274099111557,
"step": 50
},
{
"epoch": 0.13,
"learning_rate": 4.990398100856366e-07,
"logits/chosen": -2.8104348182678223,
"logits/rejected": -2.788311243057251,
"logps/chosen": -297.0313720703125,
"logps/rejected": -266.0052795410156,
"loss": 0.2428,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.10381942987442017,
"rewards/margins": 0.084610715508461,
"rewards/rejected": -0.18843016028404236,
"step": 60
},
{
"epoch": 0.15,
"learning_rate": 4.967775735898179e-07,
"logits/chosen": -2.704342842102051,
"logits/rejected": -2.6683297157287598,
"logps/chosen": -276.36395263671875,
"logps/rejected": -271.9848327636719,
"loss": 0.2192,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.16314834356307983,
"rewards/margins": 0.17039458453655243,
"rewards/rejected": -0.33354294300079346,
"step": 70
},
{
"epoch": 0.17,
"learning_rate": 4.931986719649298e-07,
"logits/chosen": -2.7222819328308105,
"logits/rejected": -2.7045040130615234,
"logps/chosen": -298.33831787109375,
"logps/rejected": -293.718017578125,
"loss": 0.1999,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.30510228872299194,
"rewards/margins": 0.1686253696680069,
"rewards/rejected": -0.47372761368751526,
"step": 80
},
{
"epoch": 0.19,
"learning_rate": 4.883222001996351e-07,
"logits/chosen": -2.759632110595703,
"logits/rejected": -2.734144449234009,
"logps/chosen": -331.0855712890625,
"logps/rejected": -346.59991455078125,
"loss": 0.1682,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.45922285318374634,
"rewards/margins": 0.3295659124851227,
"rewards/rejected": -0.7887887954711914,
"step": 90
},
{
"epoch": 0.21,
"learning_rate": 4.821741763807186e-07,
"logits/chosen": -2.6711103916168213,
"logits/rejected": -2.664060115814209,
"logps/chosen": -336.68927001953125,
"logps/rejected": -331.12799072265625,
"loss": 0.1643,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.4235810339450836,
"rewards/margins": 0.24597103893756866,
"rewards/rejected": -0.6695520281791687,
"step": 100
},
{
"epoch": 0.21,
"eval_logits/chosen": -2.790248394012451,
"eval_logits/rejected": -2.7691245079040527,
"eval_logps/chosen": -297.79962158203125,
"eval_logps/rejected": -337.0708923339844,
"eval_loss": 0.15584461390972137,
"eval_rewards/accuracies": 0.74609375,
"eval_rewards/chosen": -0.4075998365879059,
"eval_rewards/margins": 0.38957637548446655,
"eval_rewards/rejected": -0.79717618227005,
"eval_runtime": 53.5413,
"eval_samples_per_second": 37.354,
"eval_steps_per_second": 0.598,
"step": 100
},
{
"epoch": 0.23,
"learning_rate": 4.747874028753375e-07,
"logits/chosen": -2.7711846828460693,
"logits/rejected": -2.7162532806396484,
"logps/chosen": -322.896484375,
"logps/rejected": -321.31158447265625,
"loss": 0.1423,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.4842161536216736,
"rewards/margins": 0.4429897367954254,
"rewards/rejected": -0.9272058606147766,
"step": 110
},
{
"epoch": 0.25,
"learning_rate": 4.662012913161997e-07,
"logits/chosen": -2.6857857704162598,
"logits/rejected": -2.664361000061035,
"logps/chosen": -340.3297119140625,
"logps/rejected": -381.2372741699219,
"loss": 0.1325,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.7687980532646179,
"rewards/margins": 0.4345701336860657,
"rewards/rejected": -1.203368067741394,
"step": 120
},
{
"epoch": 0.27,
"learning_rate": 4.5646165232345103e-07,
"logits/chosen": -2.679908037185669,
"logits/rejected": -2.661154270172119,
"logps/chosen": -350.47247314453125,
"logps/rejected": -386.91656494140625,
"loss": 0.1191,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.8761329650878906,
"rewards/margins": 0.5328775644302368,
"rewards/rejected": -1.4090105295181274,
"step": 130
},
{
"epoch": 0.29,
"learning_rate": 4.456204510851956e-07,
"logits/chosen": -2.622180461883545,
"logits/rejected": -2.604306697845459,
"logps/chosen": -338.3455505371094,
"logps/rejected": -356.08990478515625,
"loss": 0.1244,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.8404749035835266,
"rewards/margins": 0.39392346143722534,
"rewards/rejected": -1.2343984842300415,
"step": 140
},
{
"epoch": 0.31,
"learning_rate": 4.337355301007335e-07,
"logits/chosen": -2.541025400161743,
"logits/rejected": -2.5166730880737305,
"logps/chosen": -345.60760498046875,
"logps/rejected": -372.7431335449219,
"loss": 0.1258,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.7515507936477661,
"rewards/margins": 0.42334675788879395,
"rewards/rejected": -1.17489755153656,
"step": 150
},
{
"epoch": 0.33,
"learning_rate": 4.2087030056579986e-07,
"logits/chosen": -2.5379557609558105,
"logits/rejected": -2.528388261795044,
"logps/chosen": -345.44384765625,
"logps/rejected": -388.0000915527344,
"loss": 0.1209,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.8115363121032715,
"rewards/margins": 0.39176443219184875,
"rewards/rejected": -1.2033007144927979,
"step": 160
},
{
"epoch": 0.36,
"learning_rate": 4.070934040463998e-07,
"logits/chosen": -2.508551836013794,
"logits/rejected": -2.4616193771362305,
"logps/chosen": -371.34246826171875,
"logps/rejected": -380.660888671875,
"loss": 0.1105,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.9994179606437683,
"rewards/margins": 0.4626193940639496,
"rewards/rejected": -1.4620373249053955,
"step": 170
},
{
"epoch": 0.38,
"learning_rate": 3.9247834624635404e-07,
"logits/chosen": -2.51965594291687,
"logits/rejected": -2.5132761001586914,
"logps/chosen": -332.5484924316406,
"logps/rejected": -384.0250549316406,
"loss": 0.1124,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.9845203161239624,
"rewards/margins": 0.4795452654361725,
"rewards/rejected": -1.4640657901763916,
"step": 180
},
{
"epoch": 0.4,
"learning_rate": 3.7710310482256523e-07,
"logits/chosen": -2.55594539642334,
"logits/rejected": -2.5516602993011475,
"logps/chosen": -353.2313537597656,
"logps/rejected": -384.13861083984375,
"loss": 0.1058,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.9792869687080383,
"rewards/margins": 0.40680208802223206,
"rewards/rejected": -1.3860890865325928,
"step": 190
},
{
"epoch": 0.42,
"learning_rate": 3.610497133404795e-07,
"logits/chosen": -2.5069775581359863,
"logits/rejected": -2.5189363956451416,
"logps/chosen": -398.85382080078125,
"logps/rejected": -431.91455078125,
"loss": 0.1003,
"rewards/accuracies": 0.65625,
"rewards/chosen": -1.2246520519256592,
"rewards/margins": 0.3960326015949249,
"rewards/rejected": -1.6206846237182617,
"step": 200
},
{
"epoch": 0.42,
"eval_logits/chosen": -2.5340371131896973,
"eval_logits/rejected": -2.513735294342041,
"eval_logps/chosen": -384.15533447265625,
"eval_logps/rejected": -450.7552185058594,
"eval_loss": 0.0996941402554512,
"eval_rewards/accuracies": 0.703125,
"eval_rewards/chosen": -1.2711572647094727,
"eval_rewards/margins": 0.6628624200820923,
"eval_rewards/rejected": -1.934019684791565,
"eval_runtime": 53.511,
"eval_samples_per_second": 37.375,
"eval_steps_per_second": 0.598,
"step": 200
},
{
"epoch": 0.44,
"learning_rate": 3.4440382358952115e-07,
"logits/chosen": -2.455578565597534,
"logits/rejected": -2.446720838546753,
"logps/chosen": -391.07830810546875,
"logps/rejected": -428.397705078125,
"loss": 0.1038,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -1.3349438905715942,
"rewards/margins": 0.5562185645103455,
"rewards/rejected": -1.8911622762680054,
"step": 210
},
{
"epoch": 0.46,
"learning_rate": 3.272542485937368e-07,
"logits/chosen": -2.551090955734253,
"logits/rejected": -2.529384136199951,
"logps/chosen": -385.6699523925781,
"logps/rejected": -405.87615966796875,
"loss": 0.1138,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -1.1878398656845093,
"rewards/margins": 0.44445449113845825,
"rewards/rejected": -1.6322942972183228,
"step": 220
},
{
"epoch": 0.48,
"learning_rate": 3.096924887558854e-07,
"logits/chosen": -2.5678157806396484,
"logits/rejected": -2.5255255699157715,
"logps/chosen": -411.07745361328125,
"logps/rejected": -404.2816467285156,
"loss": 0.1149,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.9748584628105164,
"rewards/margins": 0.47213855385780334,
"rewards/rejected": -1.446997046470642,
"step": 230
},
{
"epoch": 0.5,
"learning_rate": 2.9181224366319943e-07,
"logits/chosen": -2.4429595470428467,
"logits/rejected": -2.4049136638641357,
"logps/chosen": -386.62530517578125,
"logps/rejected": -397.7767028808594,
"loss": 0.1092,
"rewards/accuracies": 0.6875,
"rewards/chosen": -1.1572192907333374,
"rewards/margins": 0.4687051773071289,
"rewards/rejected": -1.6259244680404663,
"step": 240
},
{
"epoch": 0.52,
"learning_rate": 2.7370891215954565e-07,
"logits/chosen": -2.400578260421753,
"logits/rejected": -2.3846592903137207,
"logps/chosen": -413.29266357421875,
"logps/rejected": -441.35748291015625,
"loss": 0.0928,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -1.3619310855865479,
"rewards/margins": 0.6331827044487,
"rewards/rejected": -1.9951136112213135,
"step": 250
},
{
"epoch": 0.54,
"learning_rate": 2.55479083351317e-07,
"logits/chosen": -2.435859203338623,
"logits/rejected": -2.4128081798553467,
"logps/chosen": -418.8388671875,
"logps/rejected": -462.96282958984375,
"loss": 0.097,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -1.2928632497787476,
"rewards/margins": 0.7572471499443054,
"rewards/rejected": -2.050110340118408,
"step": 260
},
{
"epoch": 0.56,
"learning_rate": 2.3722002126275822e-07,
"logits/chosen": -2.3607535362243652,
"logits/rejected": -2.3512327671051025,
"logps/chosen": -393.47845458984375,
"logps/rejected": -424.65692138671875,
"loss": 0.0942,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -1.2448090314865112,
"rewards/margins": 0.5817195177078247,
"rewards/rejected": -1.8265281915664673,
"step": 270
},
{
"epoch": 0.59,
"learning_rate": 2.19029145890313e-07,
"logits/chosen": -2.384596586227417,
"logits/rejected": -2.357322931289673,
"logps/chosen": -401.50152587890625,
"logps/rejected": -447.069580078125,
"loss": 0.0894,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -1.463189721107483,
"rewards/margins": 0.627885103225708,
"rewards/rejected": -2.0910747051239014,
"step": 280
},
{
"epoch": 0.61,
"learning_rate": 2.0100351342479216e-07,
"logits/chosen": -2.3855137825012207,
"logits/rejected": -2.334260940551758,
"logps/chosen": -441.15118408203125,
"logps/rejected": -456.8433532714844,
"loss": 0.0895,
"rewards/accuracies": 0.6875,
"rewards/chosen": -1.5582000017166138,
"rewards/margins": 0.6062092185020447,
"rewards/rejected": -2.1644091606140137,
"step": 290
},
{
"epoch": 0.63,
"learning_rate": 1.8323929841460178e-07,
"logits/chosen": -2.4264094829559326,
"logits/rejected": -2.403550624847412,
"logps/chosen": -412.9310607910156,
"logps/rejected": -471.4112854003906,
"loss": 0.0953,
"rewards/accuracies": 0.71875,
"rewards/chosen": -1.2313965559005737,
"rewards/margins": 0.6434706449508667,
"rewards/rejected": -1.8748672008514404,
"step": 300
},
{
"epoch": 0.63,
"eval_logits/chosen": -2.4030282497406006,
"eval_logits/rejected": -2.3836517333984375,
"eval_logps/chosen": -377.3980712890625,
"eval_logps/rejected": -449.78228759765625,
"eval_loss": 0.10235561430454254,
"eval_rewards/accuracies": 0.75390625,
"eval_rewards/chosen": -1.2035841941833496,
"eval_rewards/margins": 0.7207058072090149,
"eval_rewards/rejected": -1.9242901802062988,
"eval_runtime": 53.5723,
"eval_samples_per_second": 37.333,
"eval_steps_per_second": 0.597,
"step": 300
},
{
"epoch": 0.65,
"learning_rate": 1.6583128063291573e-07,
"logits/chosen": -2.3959908485412598,
"logits/rejected": -2.366027593612671,
"logps/chosen": -389.87841796875,
"logps/rejected": -428.79150390625,
"loss": 0.0967,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -1.2791574001312256,
"rewards/margins": 0.5353385806083679,
"rewards/rejected": -1.8144958019256592,
"step": 310
},
{
"epoch": 0.67,
"learning_rate": 1.488723393865766e-07,
"logits/chosen": -2.315176010131836,
"logits/rejected": -2.303180694580078,
"logps/chosen": -419.81304931640625,
"logps/rejected": -451.9205627441406,
"loss": 0.0913,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -1.3704838752746582,
"rewards/margins": 0.4932515621185303,
"rewards/rejected": -1.8637354373931885,
"step": 320
},
{
"epoch": 0.69,
"learning_rate": 1.3245295796480788e-07,
"logits/chosen": -2.3155629634857178,
"logits/rejected": -2.306206226348877,
"logps/chosen": -373.34173583984375,
"logps/rejected": -451.43304443359375,
"loss": 0.094,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -1.2377197742462158,
"rewards/margins": 0.7202552556991577,
"rewards/rejected": -1.957975149154663,
"step": 330
},
{
"epoch": 0.71,
"learning_rate": 1.1666074087171627e-07,
"logits/chosen": -2.3178515434265137,
"logits/rejected": -2.317112684249878,
"logps/chosen": -421.288330078125,
"logps/rejected": -464.2798767089844,
"loss": 0.1012,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -1.3072739839553833,
"rewards/margins": 0.6341418027877808,
"rewards/rejected": -1.941415786743164,
"step": 340
},
{
"epoch": 0.73,
"learning_rate": 1.0157994641835734e-07,
"logits/chosen": -2.352154016494751,
"logits/rejected": -2.310459852218628,
"logps/chosen": -371.04180908203125,
"logps/rejected": -418.411376953125,
"loss": 0.0964,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -1.2016589641571045,
"rewards/margins": 0.6332089900970459,
"rewards/rejected": -1.8348678350448608,
"step": 350
},
{
"epoch": 0.75,
"learning_rate": 8.729103716819111e-08,
"logits/chosen": -2.3340022563934326,
"logits/rejected": -2.2888753414154053,
"logps/chosen": -399.73870849609375,
"logps/rejected": -433.62939453125,
"loss": 0.103,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -1.3542587757110596,
"rewards/margins": 0.6527735590934753,
"rewards/rejected": -2.0070323944091797,
"step": 360
},
{
"epoch": 0.77,
"learning_rate": 7.387025063449081e-08,
"logits/chosen": -2.305725574493408,
"logits/rejected": -2.2590928077697754,
"logps/chosen": -424.70269775390625,
"logps/rejected": -478.83160400390625,
"loss": 0.0832,
"rewards/accuracies": 0.71875,
"rewards/chosen": -1.3823884725570679,
"rewards/margins": 0.7607783079147339,
"rewards/rejected": -2.143167018890381,
"step": 370
},
{
"epoch": 0.79,
"learning_rate": 6.138919252022435e-08,
"logits/chosen": -2.3276476860046387,
"logits/rejected": -2.3130292892456055,
"logps/chosen": -431.13568115234375,
"logps/rejected": -477.88824462890625,
"loss": 0.0903,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -1.4548090696334839,
"rewards/margins": 0.671941876411438,
"rewards/rejected": -2.126750946044922,
"step": 380
},
{
"epoch": 0.82,
"learning_rate": 4.991445467064689e-08,
"logits/chosen": -2.2263472080230713,
"logits/rejected": -2.1942696571350098,
"logps/chosen": -418.37335205078125,
"logps/rejected": -485.0545349121094,
"loss": 0.0883,
"rewards/accuracies": 0.75,
"rewards/chosen": -1.488586187362671,
"rewards/margins": 0.7860161662101746,
"rewards/rejected": -2.2746024131774902,
"step": 390
},
{
"epoch": 0.84,
"learning_rate": 3.9507259776993954e-08,
"logits/chosen": -2.319228410720825,
"logits/rejected": -2.2877087593078613,
"logps/chosen": -417.96875,
"logps/rejected": -461.0101623535156,
"loss": 0.0811,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -1.4231641292572021,
"rewards/margins": 0.647831916809082,
"rewards/rejected": -2.0709962844848633,
"step": 400
},
{
"epoch": 0.84,
"eval_logits/chosen": -2.3254384994506836,
"eval_logits/rejected": -2.301893472671509,
"eval_logps/chosen": -393.03472900390625,
"eval_logps/rejected": -475.715087890625,
"eval_loss": 0.09447792172431946,
"eval_rewards/accuracies": 0.765625,
"eval_rewards/chosen": -1.3599507808685303,
"eval_rewards/margins": 0.8236675262451172,
"eval_rewards/rejected": -2.1836180686950684,
"eval_runtime": 53.5742,
"eval_samples_per_second": 37.331,
"eval_steps_per_second": 0.597,
"step": 400
},
{
"epoch": 0.86,
"learning_rate": 3.022313472693447e-08,
"logits/chosen": -2.3134891986846924,
"logits/rejected": -2.2576441764831543,
"logps/chosen": -405.07867431640625,
"logps/rejected": -426.08770751953125,
"loss": 0.088,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -1.304164171218872,
"rewards/margins": 0.7416768074035645,
"rewards/rejected": -2.0458409786224365,
"step": 410
},
{
"epoch": 0.88,
"learning_rate": 2.2111614344599684e-08,
"logits/chosen": -2.3239588737487793,
"logits/rejected": -2.2752654552459717,
"logps/chosen": -434.28118896484375,
"logps/rejected": -482.84234619140625,
"loss": 0.0896,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -1.4340513944625854,
"rewards/margins": 0.8941879272460938,
"rewards/rejected": -2.3282394409179688,
"step": 420
},
{
"epoch": 0.9,
"learning_rate": 1.521597710086439e-08,
"logits/chosen": -2.278296947479248,
"logits/rejected": -2.2763679027557373,
"logps/chosen": -423.744384765625,
"logps/rejected": -485.7794494628906,
"loss": 0.0868,
"rewards/accuracies": 0.6875,
"rewards/chosen": -1.4879920482635498,
"rewards/margins": 0.6670708656311035,
"rewards/rejected": -2.1550629138946533,
"step": 430
},
{
"epoch": 0.92,
"learning_rate": 9.57301420397924e-09,
"logits/chosen": -2.26120924949646,
"logits/rejected": -2.2485973834991455,
"logps/chosen": -404.76959228515625,
"logps/rejected": -461.03448486328125,
"loss": 0.0892,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -1.4389055967330933,
"rewards/margins": 0.6930050253868103,
"rewards/rejected": -2.131910800933838,
"step": 440
},
{
"epoch": 0.94,
"learning_rate": 5.212833302556258e-09,
"logits/chosen": -2.2681469917297363,
"logits/rejected": -2.275200366973877,
"logps/chosen": -404.1940612792969,
"logps/rejected": -463.80401611328125,
"loss": 0.0902,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -1.5075231790542603,
"rewards/margins": 0.6551094055175781,
"rewards/rejected": -2.162632703781128,
"step": 450
},
{
"epoch": 0.96,
"learning_rate": 2.158697848236607e-09,
"logits/chosen": -2.2567198276519775,
"logits/rejected": -2.215657949447632,
"logps/chosen": -404.21527099609375,
"logps/rejected": -441.24945068359375,
"loss": 0.0867,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -1.5105773210525513,
"rewards/margins": 0.5308315753936768,
"rewards/rejected": -2.0414090156555176,
"step": 460
},
{
"epoch": 0.98,
"learning_rate": 4.269029751107489e-10,
"logits/chosen": -2.2435102462768555,
"logits/rejected": -2.2021100521087646,
"logps/chosen": -399.60418701171875,
"logps/rejected": -474.943359375,
"loss": 0.0902,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -1.4095227718353271,
"rewards/margins": 0.8036805391311646,
"rewards/rejected": -2.213203191757202,
"step": 470
},
{
"epoch": 1.0,
"step": 478,
"total_flos": 0.0,
"train_loss": 0.13007899894375183,
"train_runtime": 3956.3918,
"train_samples_per_second": 15.452,
"train_steps_per_second": 0.121
}
],
"logging_steps": 10,
"max_steps": 478,
"num_train_epochs": 1,
"save_steps": 1000,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}