|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 478, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -2.8386030197143555, |
|
"logits/rejected": -2.823939323425293, |
|
"logps/chosen": -324.3727722167969, |
|
"logps/rejected": -231.64634704589844, |
|
"loss": 0.2826, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.8247194290161133, |
|
"logits/rejected": -2.750765800476074, |
|
"logps/chosen": -275.7482604980469, |
|
"logps/rejected": -253.39404296875, |
|
"loss": 0.2847, |
|
"rewards/accuracies": 0.4513888955116272, |
|
"rewards/chosen": 0.00012852638610638678, |
|
"rewards/margins": -0.0004244056181050837, |
|
"rewards/rejected": 0.0005529320333153009, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.7973198890686035, |
|
"logits/rejected": -2.779845714569092, |
|
"logps/chosen": -261.89483642578125, |
|
"logps/rejected": -257.04736328125, |
|
"loss": 0.2856, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0005934558575972915, |
|
"rewards/margins": 0.0017298649763688445, |
|
"rewards/rejected": -0.001136409118771553, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.783583164215088, |
|
"logits/rejected": -2.777108907699585, |
|
"logps/chosen": -294.8003234863281, |
|
"logps/rejected": -259.10296630859375, |
|
"loss": 0.2889, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.0027175676077604294, |
|
"rewards/margins": 0.011478239670395851, |
|
"rewards/rejected": -0.008760671131312847, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.802429676055908, |
|
"logits/rejected": -2.7715487480163574, |
|
"logps/chosen": -284.63958740234375, |
|
"logps/rejected": -264.9128112792969, |
|
"loss": 0.2823, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.007285858038812876, |
|
"rewards/margins": 0.022248882800340652, |
|
"rewards/rejected": -0.029534736648201942, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999733114418725e-07, |
|
"logits/chosen": -2.781130790710449, |
|
"logits/rejected": -2.718773126602173, |
|
"logps/chosen": -284.725341796875, |
|
"logps/rejected": -255.60073852539062, |
|
"loss": 0.2671, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.023446276783943176, |
|
"rewards/margins": 0.06585647165775299, |
|
"rewards/rejected": -0.08930274099111557, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990398100856366e-07, |
|
"logits/chosen": -2.8104348182678223, |
|
"logits/rejected": -2.788311243057251, |
|
"logps/chosen": -297.0313720703125, |
|
"logps/rejected": -266.0052795410156, |
|
"loss": 0.2428, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.10381942987442017, |
|
"rewards/margins": 0.084610715508461, |
|
"rewards/rejected": -0.18843016028404236, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967775735898179e-07, |
|
"logits/chosen": -2.704342842102051, |
|
"logits/rejected": -2.6683297157287598, |
|
"logps/chosen": -276.36395263671875, |
|
"logps/rejected": -271.9848327636719, |
|
"loss": 0.2192, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.16314834356307983, |
|
"rewards/margins": 0.17039458453655243, |
|
"rewards/rejected": -0.33354294300079346, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931986719649298e-07, |
|
"logits/chosen": -2.7222819328308105, |
|
"logits/rejected": -2.7045040130615234, |
|
"logps/chosen": -298.33831787109375, |
|
"logps/rejected": -293.718017578125, |
|
"loss": 0.1999, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.30510228872299194, |
|
"rewards/margins": 0.1686253696680069, |
|
"rewards/rejected": -0.47372761368751526, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.883222001996351e-07, |
|
"logits/chosen": -2.759632110595703, |
|
"logits/rejected": -2.734144449234009, |
|
"logps/chosen": -331.0855712890625, |
|
"logps/rejected": -346.59991455078125, |
|
"loss": 0.1682, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.45922285318374634, |
|
"rewards/margins": 0.3295659124851227, |
|
"rewards/rejected": -0.7887887954711914, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.821741763807186e-07, |
|
"logits/chosen": -2.6711103916168213, |
|
"logits/rejected": -2.664060115814209, |
|
"logps/chosen": -336.68927001953125, |
|
"logps/rejected": -331.12799072265625, |
|
"loss": 0.1643, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.4235810339450836, |
|
"rewards/margins": 0.24597103893756866, |
|
"rewards/rejected": -0.6695520281791687, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -2.790248394012451, |
|
"eval_logits/rejected": -2.7691245079040527, |
|
"eval_logps/chosen": -297.79962158203125, |
|
"eval_logps/rejected": -337.0708923339844, |
|
"eval_loss": 0.15584461390972137, |
|
"eval_rewards/accuracies": 0.74609375, |
|
"eval_rewards/chosen": -0.4075998365879059, |
|
"eval_rewards/margins": 0.38957637548446655, |
|
"eval_rewards/rejected": -0.79717618227005, |
|
"eval_runtime": 53.5413, |
|
"eval_samples_per_second": 37.354, |
|
"eval_steps_per_second": 0.598, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.747874028753375e-07, |
|
"logits/chosen": -2.7711846828460693, |
|
"logits/rejected": -2.7162532806396484, |
|
"logps/chosen": -322.896484375, |
|
"logps/rejected": -321.31158447265625, |
|
"loss": 0.1423, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4842161536216736, |
|
"rewards/margins": 0.4429897367954254, |
|
"rewards/rejected": -0.9272058606147766, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.662012913161997e-07, |
|
"logits/chosen": -2.6857857704162598, |
|
"logits/rejected": -2.664361000061035, |
|
"logps/chosen": -340.3297119140625, |
|
"logps/rejected": -381.2372741699219, |
|
"loss": 0.1325, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7687980532646179, |
|
"rewards/margins": 0.4345701336860657, |
|
"rewards/rejected": -1.203368067741394, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5646165232345103e-07, |
|
"logits/chosen": -2.679908037185669, |
|
"logits/rejected": -2.661154270172119, |
|
"logps/chosen": -350.47247314453125, |
|
"logps/rejected": -386.91656494140625, |
|
"loss": 0.1191, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8761329650878906, |
|
"rewards/margins": 0.5328775644302368, |
|
"rewards/rejected": -1.4090105295181274, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.456204510851956e-07, |
|
"logits/chosen": -2.622180461883545, |
|
"logits/rejected": -2.604306697845459, |
|
"logps/chosen": -338.3455505371094, |
|
"logps/rejected": -356.08990478515625, |
|
"loss": 0.1244, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8404749035835266, |
|
"rewards/margins": 0.39392346143722534, |
|
"rewards/rejected": -1.2343984842300415, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.337355301007335e-07, |
|
"logits/chosen": -2.541025400161743, |
|
"logits/rejected": -2.5166730880737305, |
|
"logps/chosen": -345.60760498046875, |
|
"logps/rejected": -372.7431335449219, |
|
"loss": 0.1258, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7515507936477661, |
|
"rewards/margins": 0.42334675788879395, |
|
"rewards/rejected": -1.17489755153656, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2087030056579986e-07, |
|
"logits/chosen": -2.5379557609558105, |
|
"logits/rejected": -2.528388261795044, |
|
"logps/chosen": -345.44384765625, |
|
"logps/rejected": -388.0000915527344, |
|
"loss": 0.1209, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8115363121032715, |
|
"rewards/margins": 0.39176443219184875, |
|
"rewards/rejected": -1.2033007144927979, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.070934040463998e-07, |
|
"logits/chosen": -2.508551836013794, |
|
"logits/rejected": -2.4616193771362305, |
|
"logps/chosen": -371.34246826171875, |
|
"logps/rejected": -380.660888671875, |
|
"loss": 0.1105, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9994179606437683, |
|
"rewards/margins": 0.4626193940639496, |
|
"rewards/rejected": -1.4620373249053955, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.9247834624635404e-07, |
|
"logits/chosen": -2.51965594291687, |
|
"logits/rejected": -2.5132761001586914, |
|
"logps/chosen": -332.5484924316406, |
|
"logps/rejected": -384.0250549316406, |
|
"loss": 0.1124, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.9845203161239624, |
|
"rewards/margins": 0.4795452654361725, |
|
"rewards/rejected": -1.4640657901763916, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.7710310482256523e-07, |
|
"logits/chosen": -2.55594539642334, |
|
"logits/rejected": -2.5516602993011475, |
|
"logps/chosen": -353.2313537597656, |
|
"logps/rejected": -384.13861083984375, |
|
"loss": 0.1058, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.9792869687080383, |
|
"rewards/margins": 0.40680208802223206, |
|
"rewards/rejected": -1.3860890865325928, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.610497133404795e-07, |
|
"logits/chosen": -2.5069775581359863, |
|
"logits/rejected": -2.5189363956451416, |
|
"logps/chosen": -398.85382080078125, |
|
"logps/rejected": -431.91455078125, |
|
"loss": 0.1003, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2246520519256592, |
|
"rewards/margins": 0.3960326015949249, |
|
"rewards/rejected": -1.6206846237182617, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -2.5340371131896973, |
|
"eval_logits/rejected": -2.513735294342041, |
|
"eval_logps/chosen": -384.15533447265625, |
|
"eval_logps/rejected": -450.7552185058594, |
|
"eval_loss": 0.0996941402554512, |
|
"eval_rewards/accuracies": 0.703125, |
|
"eval_rewards/chosen": -1.2711572647094727, |
|
"eval_rewards/margins": 0.6628624200820923, |
|
"eval_rewards/rejected": -1.934019684791565, |
|
"eval_runtime": 53.511, |
|
"eval_samples_per_second": 37.375, |
|
"eval_steps_per_second": 0.598, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4440382358952115e-07, |
|
"logits/chosen": -2.455578565597534, |
|
"logits/rejected": -2.446720838546753, |
|
"logps/chosen": -391.07830810546875, |
|
"logps/rejected": -428.397705078125, |
|
"loss": 0.1038, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3349438905715942, |
|
"rewards/margins": 0.5562185645103455, |
|
"rewards/rejected": -1.8911622762680054, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.272542485937368e-07, |
|
"logits/chosen": -2.551090955734253, |
|
"logits/rejected": -2.529384136199951, |
|
"logps/chosen": -385.6699523925781, |
|
"logps/rejected": -405.87615966796875, |
|
"loss": 0.1138, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.1878398656845093, |
|
"rewards/margins": 0.44445449113845825, |
|
"rewards/rejected": -1.6322942972183228, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.096924887558854e-07, |
|
"logits/chosen": -2.5678157806396484, |
|
"logits/rejected": -2.5255255699157715, |
|
"logps/chosen": -411.07745361328125, |
|
"logps/rejected": -404.2816467285156, |
|
"loss": 0.1149, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.9748584628105164, |
|
"rewards/margins": 0.47213855385780334, |
|
"rewards/rejected": -1.446997046470642, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9181224366319943e-07, |
|
"logits/chosen": -2.4429595470428467, |
|
"logits/rejected": -2.4049136638641357, |
|
"logps/chosen": -386.62530517578125, |
|
"logps/rejected": -397.7767028808594, |
|
"loss": 0.1092, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1572192907333374, |
|
"rewards/margins": 0.4687051773071289, |
|
"rewards/rejected": -1.6259244680404663, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7370891215954565e-07, |
|
"logits/chosen": -2.400578260421753, |
|
"logits/rejected": -2.3846592903137207, |
|
"logps/chosen": -413.29266357421875, |
|
"logps/rejected": -441.35748291015625, |
|
"loss": 0.0928, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.3619310855865479, |
|
"rewards/margins": 0.6331827044487, |
|
"rewards/rejected": -1.9951136112213135, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.55479083351317e-07, |
|
"logits/chosen": -2.435859203338623, |
|
"logits/rejected": -2.4128081798553467, |
|
"logps/chosen": -418.8388671875, |
|
"logps/rejected": -462.96282958984375, |
|
"loss": 0.097, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.2928632497787476, |
|
"rewards/margins": 0.7572471499443054, |
|
"rewards/rejected": -2.050110340118408, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3722002126275822e-07, |
|
"logits/chosen": -2.3607535362243652, |
|
"logits/rejected": -2.3512327671051025, |
|
"logps/chosen": -393.47845458984375, |
|
"logps/rejected": -424.65692138671875, |
|
"loss": 0.0942, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.2448090314865112, |
|
"rewards/margins": 0.5817195177078247, |
|
"rewards/rejected": -1.8265281915664673, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.19029145890313e-07, |
|
"logits/chosen": -2.384596586227417, |
|
"logits/rejected": -2.357322931289673, |
|
"logps/chosen": -401.50152587890625, |
|
"logps/rejected": -447.069580078125, |
|
"loss": 0.0894, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.463189721107483, |
|
"rewards/margins": 0.627885103225708, |
|
"rewards/rejected": -2.0910747051239014, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0100351342479216e-07, |
|
"logits/chosen": -2.3855137825012207, |
|
"logits/rejected": -2.334260940551758, |
|
"logps/chosen": -441.15118408203125, |
|
"logps/rejected": -456.8433532714844, |
|
"loss": 0.0895, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.5582000017166138, |
|
"rewards/margins": 0.6062092185020447, |
|
"rewards/rejected": -2.1644091606140137, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8323929841460178e-07, |
|
"logits/chosen": -2.4264094829559326, |
|
"logits/rejected": -2.403550624847412, |
|
"logps/chosen": -412.9310607910156, |
|
"logps/rejected": -471.4112854003906, |
|
"loss": 0.0953, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2313965559005737, |
|
"rewards/margins": 0.6434706449508667, |
|
"rewards/rejected": -1.8748672008514404, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -2.4030282497406006, |
|
"eval_logits/rejected": -2.3836517333984375, |
|
"eval_logps/chosen": -377.3980712890625, |
|
"eval_logps/rejected": -449.78228759765625, |
|
"eval_loss": 0.10235561430454254, |
|
"eval_rewards/accuracies": 0.75390625, |
|
"eval_rewards/chosen": -1.2035841941833496, |
|
"eval_rewards/margins": 0.7207058072090149, |
|
"eval_rewards/rejected": -1.9242901802062988, |
|
"eval_runtime": 53.5723, |
|
"eval_samples_per_second": 37.333, |
|
"eval_steps_per_second": 0.597, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6583128063291573e-07, |
|
"logits/chosen": -2.3959908485412598, |
|
"logits/rejected": -2.366027593612671, |
|
"logps/chosen": -389.87841796875, |
|
"logps/rejected": -428.79150390625, |
|
"loss": 0.0967, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2791574001312256, |
|
"rewards/margins": 0.5353385806083679, |
|
"rewards/rejected": -1.8144958019256592, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.488723393865766e-07, |
|
"logits/chosen": -2.315176010131836, |
|
"logits/rejected": -2.303180694580078, |
|
"logps/chosen": -419.81304931640625, |
|
"logps/rejected": -451.9205627441406, |
|
"loss": 0.0913, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.3704838752746582, |
|
"rewards/margins": 0.4932515621185303, |
|
"rewards/rejected": -1.8637354373931885, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3245295796480788e-07, |
|
"logits/chosen": -2.3155629634857178, |
|
"logits/rejected": -2.306206226348877, |
|
"logps/chosen": -373.34173583984375, |
|
"logps/rejected": -451.43304443359375, |
|
"loss": 0.094, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.2377197742462158, |
|
"rewards/margins": 0.7202552556991577, |
|
"rewards/rejected": -1.957975149154663, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1666074087171627e-07, |
|
"logits/chosen": -2.3178515434265137, |
|
"logits/rejected": -2.317112684249878, |
|
"logps/chosen": -421.288330078125, |
|
"logps/rejected": -464.2798767089844, |
|
"loss": 0.1012, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.3072739839553833, |
|
"rewards/margins": 0.6341418027877808, |
|
"rewards/rejected": -1.941415786743164, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0157994641835734e-07, |
|
"logits/chosen": -2.352154016494751, |
|
"logits/rejected": -2.310459852218628, |
|
"logps/chosen": -371.04180908203125, |
|
"logps/rejected": -418.411376953125, |
|
"loss": 0.0964, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2016589641571045, |
|
"rewards/margins": 0.6332089900970459, |
|
"rewards/rejected": -1.8348678350448608, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.729103716819111e-08, |
|
"logits/chosen": -2.3340022563934326, |
|
"logits/rejected": -2.2888753414154053, |
|
"logps/chosen": -399.73870849609375, |
|
"logps/rejected": -433.62939453125, |
|
"loss": 0.103, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.3542587757110596, |
|
"rewards/margins": 0.6527735590934753, |
|
"rewards/rejected": -2.0070323944091797, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.387025063449081e-08, |
|
"logits/chosen": -2.305725574493408, |
|
"logits/rejected": -2.2590928077697754, |
|
"logps/chosen": -424.70269775390625, |
|
"logps/rejected": -478.83160400390625, |
|
"loss": 0.0832, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3823884725570679, |
|
"rewards/margins": 0.7607783079147339, |
|
"rewards/rejected": -2.143167018890381, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.138919252022435e-08, |
|
"logits/chosen": -2.3276476860046387, |
|
"logits/rejected": -2.3130292892456055, |
|
"logps/chosen": -431.13568115234375, |
|
"logps/rejected": -477.88824462890625, |
|
"loss": 0.0903, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.4548090696334839, |
|
"rewards/margins": 0.671941876411438, |
|
"rewards/rejected": -2.126750946044922, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.991445467064689e-08, |
|
"logits/chosen": -2.2263472080230713, |
|
"logits/rejected": -2.1942696571350098, |
|
"logps/chosen": -418.37335205078125, |
|
"logps/rejected": -485.0545349121094, |
|
"loss": 0.0883, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.488586187362671, |
|
"rewards/margins": 0.7860161662101746, |
|
"rewards/rejected": -2.2746024131774902, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9507259776993954e-08, |
|
"logits/chosen": -2.319228410720825, |
|
"logits/rejected": -2.2877087593078613, |
|
"logps/chosen": -417.96875, |
|
"logps/rejected": -461.0101623535156, |
|
"loss": 0.0811, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4231641292572021, |
|
"rewards/margins": 0.647831916809082, |
|
"rewards/rejected": -2.0709962844848633, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -2.3254384994506836, |
|
"eval_logits/rejected": -2.301893472671509, |
|
"eval_logps/chosen": -393.03472900390625, |
|
"eval_logps/rejected": -475.715087890625, |
|
"eval_loss": 0.09447792172431946, |
|
"eval_rewards/accuracies": 0.765625, |
|
"eval_rewards/chosen": -1.3599507808685303, |
|
"eval_rewards/margins": 0.8236675262451172, |
|
"eval_rewards/rejected": -2.1836180686950684, |
|
"eval_runtime": 53.5742, |
|
"eval_samples_per_second": 37.331, |
|
"eval_steps_per_second": 0.597, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.022313472693447e-08, |
|
"logits/chosen": -2.3134891986846924, |
|
"logits/rejected": -2.2576441764831543, |
|
"logps/chosen": -405.07867431640625, |
|
"logps/rejected": -426.08770751953125, |
|
"loss": 0.088, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.304164171218872, |
|
"rewards/margins": 0.7416768074035645, |
|
"rewards/rejected": -2.0458409786224365, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.2111614344599684e-08, |
|
"logits/chosen": -2.3239588737487793, |
|
"logits/rejected": -2.2752654552459717, |
|
"logps/chosen": -434.28118896484375, |
|
"logps/rejected": -482.84234619140625, |
|
"loss": 0.0896, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4340513944625854, |
|
"rewards/margins": 0.8941879272460938, |
|
"rewards/rejected": -2.3282394409179688, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.521597710086439e-08, |
|
"logits/chosen": -2.278296947479248, |
|
"logits/rejected": -2.2763679027557373, |
|
"logps/chosen": -423.744384765625, |
|
"logps/rejected": -485.7794494628906, |
|
"loss": 0.0868, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4879920482635498, |
|
"rewards/margins": 0.6670708656311035, |
|
"rewards/rejected": -2.1550629138946533, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.57301420397924e-09, |
|
"logits/chosen": -2.26120924949646, |
|
"logits/rejected": -2.2485973834991455, |
|
"logps/chosen": -404.76959228515625, |
|
"logps/rejected": -461.03448486328125, |
|
"loss": 0.0892, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.4389055967330933, |
|
"rewards/margins": 0.6930050253868103, |
|
"rewards/rejected": -2.131910800933838, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.212833302556258e-09, |
|
"logits/chosen": -2.2681469917297363, |
|
"logits/rejected": -2.275200366973877, |
|
"logps/chosen": -404.1940612792969, |
|
"logps/rejected": -463.80401611328125, |
|
"loss": 0.0902, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.5075231790542603, |
|
"rewards/margins": 0.6551094055175781, |
|
"rewards/rejected": -2.162632703781128, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.158697848236607e-09, |
|
"logits/chosen": -2.2567198276519775, |
|
"logits/rejected": -2.215657949447632, |
|
"logps/chosen": -404.21527099609375, |
|
"logps/rejected": -441.24945068359375, |
|
"loss": 0.0867, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.5105773210525513, |
|
"rewards/margins": 0.5308315753936768, |
|
"rewards/rejected": -2.0414090156555176, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.269029751107489e-10, |
|
"logits/chosen": -2.2435102462768555, |
|
"logits/rejected": -2.2021100521087646, |
|
"logps/chosen": -399.60418701171875, |
|
"logps/rejected": -474.943359375, |
|
"loss": 0.0902, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.4095227718353271, |
|
"rewards/margins": 0.8036805391311646, |
|
"rewards/rejected": -2.213203191757202, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 478, |
|
"total_flos": 0.0, |
|
"train_loss": 0.13007899894375183, |
|
"train_runtime": 3956.3918, |
|
"train_samples_per_second": 15.452, |
|
"train_steps_per_second": 0.121 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 478, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|