{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 5811, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 8.591065292096219e-10, "logits/chosen": -2.810119152069092, "logits/rejected": -2.8539578914642334, "logps/chosen": -108.88716125488281, "logps/rejected": -104.7931137084961, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 8.59106529209622e-09, "logits/chosen": -3.077805995941162, "logits/rejected": -3.05576753616333, "logps/chosen": -324.1370849609375, "logps/rejected": -248.8675994873047, "loss": 0.6941, "rewards/accuracies": 0.3888888955116272, "rewards/chosen": -0.004619969520717859, "rewards/margins": -0.00470061507076025, "rewards/rejected": 8.064573194133118e-05, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.718213058419244e-08, "logits/chosen": -3.031592845916748, "logits/rejected": -2.9924607276916504, "logps/chosen": -246.78054809570312, "logps/rejected": -176.89419555664062, "loss": 0.6935, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.0062343222089111805, "rewards/margins": -0.0077113681472837925, "rewards/rejected": 0.0014770453562960029, "step": 20 }, { "epoch": 0.02, "learning_rate": 2.5773195876288656e-08, "logits/chosen": -3.0332155227661133, "logits/rejected": -3.0279202461242676, "logps/chosen": -308.8547668457031, "logps/rejected": -265.58990478515625, "loss": 0.6915, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.004843696020543575, "rewards/margins": -0.00607115076854825, "rewards/rejected": 0.0012274538166821003, "step": 30 }, { "epoch": 0.02, "learning_rate": 3.436426116838488e-08, "logits/chosen": -2.9431536197662354, "logits/rejected": -2.9705748558044434, "logps/chosen": -315.57232666015625, "logps/rejected": -228.0748291015625, "loss": 0.6859, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.002184107434004545, "rewards/margins": 0.008781509473919868, "rewards/rejected": -0.0065974025055766106, "step": 40 }, { "epoch": 0.03, "learning_rate": 4.29553264604811e-08, "logits/chosen": -3.1191954612731934, "logits/rejected": -3.098832368850708, "logps/chosen": -262.530029296875, "logps/rejected": -206.8711395263672, "loss": 0.6792, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.021147452294826508, "rewards/margins": 0.03999967873096466, "rewards/rejected": -0.01885223016142845, "step": 50 }, { "epoch": 0.03, "learning_rate": 5.154639175257731e-08, "logits/chosen": -3.00576114654541, "logits/rejected": -2.969909191131592, "logps/chosen": -257.22900390625, "logps/rejected": -242.36923217773438, "loss": 0.6678, "rewards/accuracies": 0.6875, "rewards/chosen": 0.028875883668661118, "rewards/margins": 0.05295173078775406, "rewards/rejected": -0.024075839668512344, "step": 60 }, { "epoch": 0.04, "learning_rate": 6.013745704467354e-08, "logits/chosen": -3.0738158226013184, "logits/rejected": -3.0244216918945312, "logps/chosen": -308.69781494140625, "logps/rejected": -248.3504638671875, "loss": 0.6604, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03902146592736244, "rewards/margins": 0.08460570871829987, "rewards/rejected": -0.04558424651622772, "step": 70 }, { "epoch": 0.04, "learning_rate": 6.872852233676976e-08, "logits/chosen": -3.0613749027252197, "logits/rejected": -3.033377170562744, "logps/chosen": -289.31512451171875, "logps/rejected": -253.02688598632812, "loss": 0.639, "rewards/accuracies": 0.625, "rewards/chosen": 0.0463881716132164, "rewards/margins": 0.10045802593231201, "rewards/rejected": -0.054069846868515015, "step": 80 }, { "epoch": 0.05, "learning_rate": 7.731958762886598e-08, "logits/chosen": -3.079406261444092, "logits/rejected": -3.0861239433288574, "logps/chosen": -309.35357666015625, "logps/rejected": -254.6897430419922, "loss": 0.6328, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.044357143342494965, "rewards/margins": 0.14239642024040222, "rewards/rejected": -0.09803926199674606, "step": 90 }, { "epoch": 0.05, "learning_rate": 8.59106529209622e-08, "logits/chosen": -2.939406633377075, "logits/rejected": -2.9405882358551025, "logps/chosen": -268.3811340332031, "logps/rejected": -196.1370849609375, "loss": 0.6224, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.03316980227828026, "rewards/margins": 0.16405239701271057, "rewards/rejected": -0.13088259100914001, "step": 100 }, { "epoch": 0.05, "eval_logits/chosen": -3.0500237941741943, "eval_logits/rejected": -3.028080463409424, "eval_logps/chosen": -282.3603210449219, "eval_logps/rejected": -241.34710693359375, "eval_loss": 0.6036545038223267, "eval_rewards/accuracies": 0.712890625, "eval_rewards/chosen": 0.05369603633880615, "eval_rewards/margins": 0.2317994236946106, "eval_rewards/rejected": -0.17810338735580444, "eval_runtime": 323.3837, "eval_samples_per_second": 6.185, "eval_steps_per_second": 0.099, "step": 100 }, { "epoch": 0.06, "learning_rate": 9.450171821305841e-08, "logits/chosen": -3.009620428085327, "logits/rejected": -2.9950802326202393, "logps/chosen": -268.7337646484375, "logps/rejected": -245.73104858398438, "loss": 0.6129, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.004001384135335684, "rewards/margins": 0.24445387721061707, "rewards/rejected": -0.24045248329639435, "step": 110 }, { "epoch": 0.06, "learning_rate": 1.0309278350515462e-07, "logits/chosen": -3.0452167987823486, "logits/rejected": -3.016136646270752, "logps/chosen": -226.81088256835938, "logps/rejected": -213.1511688232422, "loss": 0.5806, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.030819633975625038, "rewards/margins": 0.21938776969909668, "rewards/rejected": -0.25020742416381836, "step": 120 }, { "epoch": 0.07, "learning_rate": 1.1168384879725086e-07, "logits/chosen": -3.087947368621826, "logits/rejected": -3.1047377586364746, "logps/chosen": -315.256103515625, "logps/rejected": -240.69979858398438, "loss": 0.5887, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.10795386135578156, "rewards/margins": 0.39908522367477417, "rewards/rejected": -0.2911313772201538, "step": 130 }, { "epoch": 0.07, "learning_rate": 1.202749140893471e-07, "logits/chosen": -3.0309665203094482, "logits/rejected": -2.9723358154296875, "logps/chosen": -294.95440673828125, "logps/rejected": -272.88055419921875, "loss": 0.5633, "rewards/accuracies": 0.75, "rewards/chosen": 0.001623488962650299, "rewards/margins": 0.5517271161079407, "rewards/rejected": -0.5501035451889038, "step": 140 }, { "epoch": 0.08, "learning_rate": 1.2886597938144328e-07, "logits/chosen": -3.0824379920959473, "logits/rejected": -3.0265815258026123, "logps/chosen": -287.56927490234375, "logps/rejected": -247.44351196289062, "loss": 0.5508, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.10439233481884003, "rewards/margins": 0.6577320098876953, "rewards/rejected": -0.5533396005630493, "step": 150 }, { "epoch": 0.08, "learning_rate": 1.3745704467353952e-07, "logits/chosen": -2.9724361896514893, "logits/rejected": -2.9667444229125977, "logps/chosen": -297.40399169921875, "logps/rejected": -245.5622100830078, "loss": 0.5402, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.07300714403390884, "rewards/margins": 0.5651272535324097, "rewards/rejected": -0.4921201169490814, "step": 160 }, { "epoch": 0.09, "learning_rate": 1.4604810996563573e-07, "logits/chosen": -3.055687427520752, "logits/rejected": -3.0171315670013428, "logps/chosen": -282.4520568847656, "logps/rejected": -239.1197509765625, "loss": 0.491, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.10492371022701263, "rewards/margins": 0.9229158163070679, "rewards/rejected": -0.817992091178894, "step": 170 }, { "epoch": 0.09, "learning_rate": 1.5463917525773197e-07, "logits/chosen": -3.0365664958953857, "logits/rejected": -2.9825618267059326, "logps/chosen": -234.84115600585938, "logps/rejected": -211.46438598632812, "loss": 0.513, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.017088109627366066, "rewards/margins": 0.6811542510986328, "rewards/rejected": -0.6640661954879761, "step": 180 }, { "epoch": 0.1, "learning_rate": 1.6323024054982818e-07, "logits/chosen": -2.981078863143921, "logits/rejected": -2.967215061187744, "logps/chosen": -264.2391357421875, "logps/rejected": -267.962158203125, "loss": 0.5673, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.010824739933013916, "rewards/margins": 0.6467477679252625, "rewards/rejected": -0.6575725674629211, "step": 190 }, { "epoch": 0.1, "learning_rate": 1.718213058419244e-07, "logits/chosen": -2.9363226890563965, "logits/rejected": -2.9228029251098633, "logps/chosen": -269.818115234375, "logps/rejected": -194.14317321777344, "loss": 0.4992, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.14461569488048553, "rewards/margins": 0.8342581987380981, "rewards/rejected": -0.6896424889564514, "step": 200 }, { "epoch": 0.1, "eval_logits/chosen": -3.0038464069366455, "eval_logits/rejected": -2.9767627716064453, "eval_logps/chosen": -282.980224609375, "eval_logps/rejected": -248.3236083984375, "eval_loss": 0.5157163143157959, "eval_rewards/accuracies": 0.751953125, "eval_rewards/chosen": 0.0022485731169581413, "eval_rewards/margins": 0.7594023942947388, "eval_rewards/rejected": -0.7571538090705872, "eval_runtime": 321.6043, "eval_samples_per_second": 6.219, "eval_steps_per_second": 0.1, "step": 200 }, { "epoch": 0.11, "learning_rate": 1.804123711340206e-07, "logits/chosen": -2.928514003753662, "logits/rejected": -2.8743972778320312, "logps/chosen": -257.0296325683594, "logps/rejected": -241.7471160888672, "loss": 0.5681, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2513473927974701, "rewards/margins": 0.5627373456954956, "rewards/rejected": -0.8140847086906433, "step": 210 }, { "epoch": 0.11, "learning_rate": 1.8900343642611682e-07, "logits/chosen": -3.0199873447418213, "logits/rejected": -2.975822925567627, "logps/chosen": -286.3315124511719, "logps/rejected": -240.0165557861328, "loss": 0.5206, "rewards/accuracies": 0.75, "rewards/chosen": -0.13750353455543518, "rewards/margins": 0.7021521925926208, "rewards/rejected": -0.8396557569503784, "step": 220 }, { "epoch": 0.12, "learning_rate": 1.9759450171821303e-07, "logits/chosen": -3.0140998363494873, "logits/rejected": -2.968956708908081, "logps/chosen": -291.6414489746094, "logps/rejected": -255.5946044921875, "loss": 0.515, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.1091725081205368, "rewards/margins": 0.6621167659759521, "rewards/rejected": -0.7712893486022949, "step": 230 }, { "epoch": 0.12, "learning_rate": 2.0618556701030925e-07, "logits/chosen": -3.0256845951080322, "logits/rejected": -2.9893667697906494, "logps/chosen": -333.3338317871094, "logps/rejected": -229.1930389404297, "loss": 0.5377, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.13958527147769928, "rewards/margins": 0.6952698826789856, "rewards/rejected": -0.8348551988601685, "step": 240 }, { "epoch": 0.13, "learning_rate": 2.1477663230240549e-07, "logits/chosen": -3.1012818813323975, "logits/rejected": -3.0699336528778076, "logps/chosen": -273.399169921875, "logps/rejected": -256.04022216796875, "loss": 0.5168, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.006724365055561066, "rewards/margins": 0.8931355476379395, "rewards/rejected": -0.8864110708236694, "step": 250 }, { "epoch": 0.13, "learning_rate": 2.2336769759450173e-07, "logits/chosen": -3.089816093444824, "logits/rejected": -3.0267081260681152, "logps/chosen": -298.5501403808594, "logps/rejected": -248.4580535888672, "loss": 0.5228, "rewards/accuracies": 0.75, "rewards/chosen": 0.1098913699388504, "rewards/margins": 0.9398354291915894, "rewards/rejected": -0.8299440145492554, "step": 260 }, { "epoch": 0.14, "learning_rate": 2.3195876288659794e-07, "logits/chosen": -3.030869483947754, "logits/rejected": -3.0321927070617676, "logps/chosen": -275.00250244140625, "logps/rejected": -237.8729705810547, "loss": 0.4865, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.04042524844408035, "rewards/margins": 0.7864399552345276, "rewards/rejected": -0.8268651962280273, "step": 270 }, { "epoch": 0.14, "learning_rate": 2.405498281786942e-07, "logits/chosen": -2.989088535308838, "logits/rejected": -2.9642486572265625, "logps/chosen": -310.1044006347656, "logps/rejected": -253.1246795654297, "loss": 0.5111, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.023750295862555504, "rewards/margins": 0.991936981678009, "rewards/rejected": -0.9681867361068726, "step": 280 }, { "epoch": 0.15, "learning_rate": 2.4914089347079036e-07, "logits/chosen": -2.9553210735321045, "logits/rejected": -2.938413143157959, "logps/chosen": -291.2104797363281, "logps/rejected": -284.74664306640625, "loss": 0.5164, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.018911823630332947, "rewards/margins": 0.84661465883255, "rewards/rejected": -0.8277028203010559, "step": 290 }, { "epoch": 0.15, "learning_rate": 2.5773195876288655e-07, "logits/chosen": -3.0006637573242188, "logits/rejected": -3.0096077919006348, "logps/chosen": -261.3973388671875, "logps/rejected": -251.36203002929688, "loss": 0.5334, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.13136225938796997, "rewards/margins": 0.9697853922843933, "rewards/rejected": -1.1011477708816528, "step": 300 }, { "epoch": 0.15, "eval_logits/chosen": -3.027198314666748, "eval_logits/rejected": -3.003648281097412, "eval_logps/chosen": -284.21612548828125, "eval_logps/rejected": -251.9915771484375, "eval_loss": 0.4982943534851074, "eval_rewards/accuracies": 0.76953125, "eval_rewards/chosen": -0.10033171623945236, "eval_rewards/margins": 0.9612622261047363, "eval_rewards/rejected": -1.0615938901901245, "eval_runtime": 317.7678, "eval_samples_per_second": 6.294, "eval_steps_per_second": 0.101, "step": 300 }, { "epoch": 0.16, "learning_rate": 2.663230240549828e-07, "logits/chosen": -3.0613951683044434, "logits/rejected": -3.061393976211548, "logps/chosen": -264.8827819824219, "logps/rejected": -229.23611450195312, "loss": 0.5103, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.18704332411289215, "rewards/margins": 0.870599091053009, "rewards/rejected": -1.0576423406600952, "step": 310 }, { "epoch": 0.17, "learning_rate": 2.7491408934707903e-07, "logits/chosen": -3.0413594245910645, "logits/rejected": -3.0220789909362793, "logps/chosen": -297.75079345703125, "logps/rejected": -239.26602172851562, "loss": 0.4581, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20401251316070557, "rewards/margins": 1.3027021884918213, "rewards/rejected": -1.5067145824432373, "step": 320 }, { "epoch": 0.17, "learning_rate": 2.835051546391752e-07, "logits/chosen": -3.030508041381836, "logits/rejected": -3.0119926929473877, "logps/chosen": -304.0564270019531, "logps/rejected": -252.899658203125, "loss": 0.4816, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.24729672074317932, "rewards/margins": 1.1143649816513062, "rewards/rejected": -1.3616619110107422, "step": 330 }, { "epoch": 0.18, "learning_rate": 2.9209621993127146e-07, "logits/chosen": -2.9939687252044678, "logits/rejected": -2.989213705062866, "logps/chosen": -286.02130126953125, "logps/rejected": -249.25900268554688, "loss": 0.5316, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.21859419345855713, "rewards/margins": 0.7743669152259827, "rewards/rejected": -0.9929611086845398, "step": 340 }, { "epoch": 0.18, "learning_rate": 3.006872852233677e-07, "logits/chosen": -3.0797624588012695, "logits/rejected": -3.0391476154327393, "logps/chosen": -233.7308349609375, "logps/rejected": -231.7999267578125, "loss": 0.4624, "rewards/accuracies": 0.8125, "rewards/chosen": -0.20324818789958954, "rewards/margins": 1.153113842010498, "rewards/rejected": -1.3563621044158936, "step": 350 }, { "epoch": 0.19, "learning_rate": 3.0927835051546394e-07, "logits/chosen": -3.0688464641571045, "logits/rejected": -3.072322368621826, "logps/chosen": -265.6463317871094, "logps/rejected": -221.05331420898438, "loss": 0.4777, "rewards/accuracies": 0.8125, "rewards/chosen": 0.04622482880949974, "rewards/margins": 1.1380517482757568, "rewards/rejected": -1.0918270349502563, "step": 360 }, { "epoch": 0.19, "learning_rate": 3.178694158075601e-07, "logits/chosen": -3.0716795921325684, "logits/rejected": -3.0277438163757324, "logps/chosen": -252.778076171875, "logps/rejected": -208.10794067382812, "loss": 0.4727, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.022056875750422478, "rewards/margins": 1.2446290254592896, "rewards/rejected": -1.2225720882415771, "step": 370 }, { "epoch": 0.2, "learning_rate": 3.2646048109965636e-07, "logits/chosen": -3.022399425506592, "logits/rejected": -3.0119998455047607, "logps/chosen": -240.9415740966797, "logps/rejected": -221.38894653320312, "loss": 0.5524, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.14949551224708557, "rewards/margins": 0.9345547556877136, "rewards/rejected": -1.0840502977371216, "step": 380 }, { "epoch": 0.2, "learning_rate": 3.3505154639175255e-07, "logits/chosen": -3.1336541175842285, "logits/rejected": -3.111215114593506, "logps/chosen": -256.5663757324219, "logps/rejected": -217.5249786376953, "loss": 0.4331, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.16669534146785736, "rewards/margins": 1.0762660503387451, "rewards/rejected": -1.2429615259170532, "step": 390 }, { "epoch": 0.21, "learning_rate": 3.436426116838488e-07, "logits/chosen": -3.1615653038024902, "logits/rejected": -3.136552333831787, "logps/chosen": -255.0474853515625, "logps/rejected": -195.88241577148438, "loss": 0.5479, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.41153281927108765, "rewards/margins": 0.7630301713943481, "rewards/rejected": -1.1745630502700806, "step": 400 }, { "epoch": 0.21, "eval_logits/chosen": -3.1487302780151367, "eval_logits/rejected": -3.126405715942383, "eval_logps/chosen": -285.24884033203125, "eval_logps/rejected": -253.95034790039062, "eval_loss": 0.4918481409549713, "eval_rewards/accuracies": 0.765625, "eval_rewards/chosen": -0.18604746460914612, "eval_rewards/margins": 1.0381252765655518, "eval_rewards/rejected": -1.224172830581665, "eval_runtime": 317.4383, "eval_samples_per_second": 6.3, "eval_steps_per_second": 0.101, "step": 400 }, { "epoch": 0.21, "learning_rate": 3.5223367697594503e-07, "logits/chosen": -3.0263237953186035, "logits/rejected": -2.9650163650512695, "logps/chosen": -307.6156311035156, "logps/rejected": -224.01260375976562, "loss": 0.4129, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14582698047161102, "rewards/margins": 1.1535571813583374, "rewards/rejected": -1.2993842363357544, "step": 410 }, { "epoch": 0.22, "learning_rate": 3.608247422680412e-07, "logits/chosen": -3.0302376747131348, "logits/rejected": -2.976388454437256, "logps/chosen": -282.3056945800781, "logps/rejected": -241.4582977294922, "loss": 0.4857, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.33549290895462036, "rewards/margins": 1.263900637626648, "rewards/rejected": -1.599393606185913, "step": 420 }, { "epoch": 0.22, "learning_rate": 3.6941580756013745e-07, "logits/chosen": -3.090383529663086, "logits/rejected": -3.039210796356201, "logps/chosen": -245.99832153320312, "logps/rejected": -227.4595947265625, "loss": 0.475, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.7581796646118164, "rewards/margins": 1.3016096353530884, "rewards/rejected": -2.0597891807556152, "step": 430 }, { "epoch": 0.23, "learning_rate": 3.7800687285223364e-07, "logits/chosen": -3.0211751461029053, "logits/rejected": -3.0732388496398926, "logps/chosen": -288.61004638671875, "logps/rejected": -288.28326416015625, "loss": 0.5412, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4035927355289459, "rewards/margins": 0.9907367825508118, "rewards/rejected": -1.3943296670913696, "step": 440 }, { "epoch": 0.23, "learning_rate": 3.865979381443299e-07, "logits/chosen": -3.093149185180664, "logits/rejected": -3.0894787311553955, "logps/chosen": -263.6800537109375, "logps/rejected": -252.18533325195312, "loss": 0.4876, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.17855218052864075, "rewards/margins": 0.8364956974983215, "rewards/rejected": -1.0150480270385742, "step": 450 }, { "epoch": 0.24, "learning_rate": 3.9518900343642607e-07, "logits/chosen": -3.0586328506469727, "logits/rejected": -3.0303149223327637, "logps/chosen": -270.06689453125, "logps/rejected": -258.2165222167969, "loss": 0.5399, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.38541078567504883, "rewards/margins": 1.0738279819488525, "rewards/rejected": -1.4592386484146118, "step": 460 }, { "epoch": 0.24, "learning_rate": 4.037800687285223e-07, "logits/chosen": -3.1244683265686035, "logits/rejected": -3.092893362045288, "logps/chosen": -319.0964660644531, "logps/rejected": -210.59323120117188, "loss": 0.4481, "rewards/accuracies": 0.75, "rewards/chosen": -0.35953083634376526, "rewards/margins": 1.2343127727508545, "rewards/rejected": -1.5938435792922974, "step": 470 }, { "epoch": 0.25, "learning_rate": 4.123711340206185e-07, "logits/chosen": -3.1930689811706543, "logits/rejected": -3.13870906829834, "logps/chosen": -286.3772888183594, "logps/rejected": -252.65731811523438, "loss": 0.4779, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.22101116180419922, "rewards/margins": 1.0905197858810425, "rewards/rejected": -1.3115311861038208, "step": 480 }, { "epoch": 0.25, "learning_rate": 4.209621993127148e-07, "logits/chosen": -2.997666120529175, "logits/rejected": -2.9871137142181396, "logps/chosen": -264.60845947265625, "logps/rejected": -244.0891571044922, "loss": 0.418, "rewards/accuracies": 0.75, "rewards/chosen": -0.4232266843318939, "rewards/margins": 1.2868436574935913, "rewards/rejected": -1.710070252418518, "step": 490 }, { "epoch": 0.26, "learning_rate": 4.2955326460481097e-07, "logits/chosen": -3.1927952766418457, "logits/rejected": -3.164834976196289, "logps/chosen": -276.08343505859375, "logps/rejected": -261.13067626953125, "loss": 0.531, "rewards/accuracies": 0.6875, "rewards/chosen": -0.574891209602356, "rewards/margins": 0.9274374842643738, "rewards/rejected": -1.502328634262085, "step": 500 }, { "epoch": 0.26, "eval_logits/chosen": -3.1247048377990723, "eval_logits/rejected": -3.0859904289245605, "eval_logps/chosen": -288.6731262207031, "eval_logps/rejected": -258.18206787109375, "eval_loss": 0.4929064214229584, "eval_rewards/accuracies": 0.755859375, "eval_rewards/chosen": -0.47026461362838745, "eval_rewards/margins": 1.1051421165466309, "eval_rewards/rejected": -1.5754066705703735, "eval_runtime": 318.1686, "eval_samples_per_second": 6.286, "eval_steps_per_second": 0.101, "step": 500 }, { "epoch": 0.26, "learning_rate": 4.381443298969072e-07, "logits/chosen": -3.0263919830322266, "logits/rejected": -3.0432791709899902, "logps/chosen": -290.43975830078125, "logps/rejected": -252.24258422851562, "loss": 0.5209, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.648147463798523, "rewards/margins": 0.7450908422470093, "rewards/rejected": -1.3932383060455322, "step": 510 }, { "epoch": 0.27, "learning_rate": 4.4673539518900345e-07, "logits/chosen": -2.995274782180786, "logits/rejected": -2.980912685394287, "logps/chosen": -252.3384246826172, "logps/rejected": -227.31539916992188, "loss": 0.5035, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5721083879470825, "rewards/margins": 1.4294297695159912, "rewards/rejected": -2.001537799835205, "step": 520 }, { "epoch": 0.27, "learning_rate": 4.5532646048109964e-07, "logits/chosen": -3.0251669883728027, "logits/rejected": -2.9927849769592285, "logps/chosen": -282.0303649902344, "logps/rejected": -237.615966796875, "loss": 0.5056, "rewards/accuracies": 0.75, "rewards/chosen": -0.5890978574752808, "rewards/margins": 1.003430962562561, "rewards/rejected": -1.5925285816192627, "step": 530 }, { "epoch": 0.28, "learning_rate": 4.639175257731959e-07, "logits/chosen": -3.061938762664795, "logits/rejected": -3.0511374473571777, "logps/chosen": -281.97113037109375, "logps/rejected": -258.9970397949219, "loss": 0.5447, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6903527975082397, "rewards/margins": 0.8404518365859985, "rewards/rejected": -1.5308043956756592, "step": 540 }, { "epoch": 0.28, "learning_rate": 4.7250859106529206e-07, "logits/chosen": -3.045581340789795, "logits/rejected": -3.0202322006225586, "logps/chosen": -270.1367492675781, "logps/rejected": -244.7872314453125, "loss": 0.5352, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.5443307757377625, "rewards/margins": 1.179702639579773, "rewards/rejected": -1.7240333557128906, "step": 550 }, { "epoch": 0.29, "learning_rate": 4.810996563573884e-07, "logits/chosen": -3.0227630138397217, "logits/rejected": -2.979196071624756, "logps/chosen": -309.9779968261719, "logps/rejected": -265.4678955078125, "loss": 0.5059, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4732925295829773, "rewards/margins": 1.1587791442871094, "rewards/rejected": -1.632071852684021, "step": 560 }, { "epoch": 0.29, "learning_rate": 4.896907216494845e-07, "logits/chosen": -3.0945346355438232, "logits/rejected": -3.033567428588867, "logps/chosen": -279.26904296875, "logps/rejected": -266.6229553222656, "loss": 0.4818, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.7089546918869019, "rewards/margins": 0.9176638722419739, "rewards/rejected": -1.6266186237335205, "step": 570 }, { "epoch": 0.3, "learning_rate": 4.982817869415807e-07, "logits/chosen": -3.1003482341766357, "logits/rejected": -3.012528657913208, "logps/chosen": -273.9862976074219, "logps/rejected": -218.8135223388672, "loss": 0.549, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3407500982284546, "rewards/margins": 1.2094409465789795, "rewards/rejected": -1.550191044807434, "step": 580 }, { "epoch": 0.3, "learning_rate": 4.992350353796136e-07, "logits/chosen": -3.000741481781006, "logits/rejected": -2.9954724311828613, "logps/chosen": -248.0009307861328, "logps/rejected": -250.56332397460938, "loss": 0.4856, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.35843077301979065, "rewards/margins": 1.3551334142684937, "rewards/rejected": -1.7135642766952515, "step": 590 }, { "epoch": 0.31, "learning_rate": 4.982788296041308e-07, "logits/chosen": -3.097600221633911, "logits/rejected": -3.019806385040283, "logps/chosen": -252.8380584716797, "logps/rejected": -228.98291015625, "loss": 0.486, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6423465609550476, "rewards/margins": 1.1674960851669312, "rewards/rejected": -1.8098424673080444, "step": 600 }, { "epoch": 0.31, "eval_logits/chosen": -3.0920188426971436, "eval_logits/rejected": -3.051931619644165, "eval_logps/chosen": -287.2957763671875, "eval_logps/rejected": -256.58001708984375, "eval_loss": 0.5095893740653992, "eval_rewards/accuracies": 0.728515625, "eval_rewards/chosen": -0.35594525933265686, "eval_rewards/margins": 1.0864894390106201, "eval_rewards/rejected": -1.442434549331665, "eval_runtime": 328.4161, "eval_samples_per_second": 6.09, "eval_steps_per_second": 0.097, "step": 600 }, { "epoch": 0.31, "learning_rate": 4.973226238286479e-07, "logits/chosen": -3.039318799972534, "logits/rejected": -3.0004897117614746, "logps/chosen": -324.72064208984375, "logps/rejected": -273.87554931640625, "loss": 0.5269, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.317125141620636, "rewards/margins": 1.1922056674957275, "rewards/rejected": -1.5093307495117188, "step": 610 }, { "epoch": 0.32, "learning_rate": 4.96366418053165e-07, "logits/chosen": -3.1272237300872803, "logits/rejected": -3.090803861618042, "logps/chosen": -281.8341064453125, "logps/rejected": -271.6834716796875, "loss": 0.555, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.26511287689208984, "rewards/margins": 0.7951613068580627, "rewards/rejected": -1.0602742433547974, "step": 620 }, { "epoch": 0.33, "learning_rate": 4.954102122776821e-07, "logits/chosen": -3.1253395080566406, "logits/rejected": -3.0239481925964355, "logps/chosen": -253.9168243408203, "logps/rejected": -203.9703369140625, "loss": 0.4661, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.22944042086601257, "rewards/margins": 1.2094790935516357, "rewards/rejected": -1.4389193058013916, "step": 630 }, { "epoch": 0.33, "learning_rate": 4.944540065021993e-07, "logits/chosen": -2.9290738105773926, "logits/rejected": -2.94616961479187, "logps/chosen": -240.6305389404297, "logps/rejected": -210.3444061279297, "loss": 0.4798, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5323778390884399, "rewards/margins": 1.3140183687210083, "rewards/rejected": -1.8463960886001587, "step": 640 }, { "epoch": 0.34, "learning_rate": 4.934978007267163e-07, "logits/chosen": -3.044485330581665, "logits/rejected": -3.0301990509033203, "logps/chosen": -279.6080322265625, "logps/rejected": -256.0472412109375, "loss": 0.5052, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.05965696647763252, "rewards/margins": 1.3026609420776367, "rewards/rejected": -1.3623180389404297, "step": 650 }, { "epoch": 0.34, "learning_rate": 4.925415949512335e-07, "logits/chosen": -3.0065619945526123, "logits/rejected": -2.941624641418457, "logps/chosen": -331.6128845214844, "logps/rejected": -255.57754516601562, "loss": 0.4996, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18121977150440216, "rewards/margins": 1.374424695968628, "rewards/rejected": -1.5556445121765137, "step": 660 }, { "epoch": 0.35, "learning_rate": 4.915853891757506e-07, "logits/chosen": -2.91098690032959, "logits/rejected": -2.91432523727417, "logps/chosen": -197.34231567382812, "logps/rejected": -242.2581329345703, "loss": 0.5585, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1997532695531845, "rewards/margins": 0.7331693172454834, "rewards/rejected": -0.9329225420951843, "step": 670 }, { "epoch": 0.35, "learning_rate": 4.906291834002677e-07, "logits/chosen": -2.969010829925537, "logits/rejected": -2.9175844192504883, "logps/chosen": -282.77374267578125, "logps/rejected": -260.5110168457031, "loss": 0.5112, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6085377931594849, "rewards/margins": 0.9930826425552368, "rewards/rejected": -1.6016204357147217, "step": 680 }, { "epoch": 0.36, "learning_rate": 4.896729776247848e-07, "logits/chosen": -2.983025074005127, "logits/rejected": -2.9608101844787598, "logps/chosen": -291.65081787109375, "logps/rejected": -255.21841430664062, "loss": 0.465, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.46502479910850525, "rewards/margins": 1.2626570463180542, "rewards/rejected": -1.7276817560195923, "step": 690 }, { "epoch": 0.36, "learning_rate": 4.88716771849302e-07, "logits/chosen": -2.986703395843506, "logits/rejected": -2.9134316444396973, "logps/chosen": -330.8349304199219, "logps/rejected": -278.9441223144531, "loss": 0.4858, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.35095247626304626, "rewards/margins": 1.597726583480835, "rewards/rejected": -1.948678970336914, "step": 700 }, { "epoch": 0.36, "eval_logits/chosen": -2.990648031234741, "eval_logits/rejected": -2.9452342987060547, "eval_logps/chosen": -290.5132751464844, "eval_logps/rejected": -262.8216552734375, "eval_loss": 0.5078591704368591, "eval_rewards/accuracies": 0.78125, "eval_rewards/chosen": -0.6229965686798096, "eval_rewards/margins": 1.3374930620193481, "eval_rewards/rejected": -1.9604897499084473, "eval_runtime": 316.1081, "eval_samples_per_second": 6.327, "eval_steps_per_second": 0.101, "step": 700 }, { "epoch": 0.37, "learning_rate": 4.87760566073819e-07, "logits/chosen": -2.902663230895996, "logits/rejected": -2.90675687789917, "logps/chosen": -303.98944091796875, "logps/rejected": -247.2509307861328, "loss": 0.5063, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.7577051520347595, "rewards/margins": 1.2916464805603027, "rewards/rejected": -2.049351453781128, "step": 710 }, { "epoch": 0.37, "learning_rate": 4.868043602983362e-07, "logits/chosen": -3.0257387161254883, "logits/rejected": -2.9773671627044678, "logps/chosen": -304.38763427734375, "logps/rejected": -303.9205017089844, "loss": 0.4917, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9141196012496948, "rewards/margins": 1.5308420658111572, "rewards/rejected": -2.4449617862701416, "step": 720 }, { "epoch": 0.38, "learning_rate": 4.858481545228533e-07, "logits/chosen": -3.053493022918701, "logits/rejected": -2.971351146697998, "logps/chosen": -326.4499206542969, "logps/rejected": -290.7962646484375, "loss": 0.4751, "rewards/accuracies": 0.75, "rewards/chosen": -0.8385788202285767, "rewards/margins": 1.379955530166626, "rewards/rejected": -2.218534469604492, "step": 730 }, { "epoch": 0.38, "learning_rate": 4.848919487473704e-07, "logits/chosen": -2.985682964324951, "logits/rejected": -2.9546995162963867, "logps/chosen": -303.14288330078125, "logps/rejected": -300.7163391113281, "loss": 0.5696, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.9265249371528625, "rewards/margins": 1.297126054763794, "rewards/rejected": -2.2236509323120117, "step": 740 }, { "epoch": 0.39, "learning_rate": 4.839357429718875e-07, "logits/chosen": -3.0034239292144775, "logits/rejected": -3.02946400642395, "logps/chosen": -276.158935546875, "logps/rejected": -249.05520629882812, "loss": 0.5449, "rewards/accuracies": 0.75, "rewards/chosen": -0.8627828359603882, "rewards/margins": 1.1859722137451172, "rewards/rejected": -2.048755168914795, "step": 750 }, { "epoch": 0.39, "learning_rate": 4.829795371964047e-07, "logits/chosen": -3.018502712249756, "logits/rejected": -2.972832441329956, "logps/chosen": -307.4183044433594, "logps/rejected": -272.1914978027344, "loss": 0.5195, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6957966089248657, "rewards/margins": 1.277281403541565, "rewards/rejected": -1.9730780124664307, "step": 760 }, { "epoch": 0.4, "learning_rate": 4.820233314209217e-07, "logits/chosen": -2.824676275253296, "logits/rejected": -2.803086280822754, "logps/chosen": -258.3660888671875, "logps/rejected": -231.0617218017578, "loss": 0.5578, "rewards/accuracies": 0.75, "rewards/chosen": -0.7795838117599487, "rewards/margins": 1.1370725631713867, "rewards/rejected": -1.916656494140625, "step": 770 }, { "epoch": 0.4, "learning_rate": 4.810671256454389e-07, "logits/chosen": -2.802565813064575, "logits/rejected": -2.81516432762146, "logps/chosen": -304.1695251464844, "logps/rejected": -290.732177734375, "loss": 0.4932, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.7143360376358032, "rewards/margins": 1.5994818210601807, "rewards/rejected": -2.3138179779052734, "step": 780 }, { "epoch": 0.41, "learning_rate": 4.80110919869956e-07, "logits/chosen": -2.8336338996887207, "logits/rejected": -2.786895513534546, "logps/chosen": -300.0252685546875, "logps/rejected": -234.04409790039062, "loss": 0.5294, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.6363843679428101, "rewards/margins": 1.3050638437271118, "rewards/rejected": -1.9414482116699219, "step": 790 }, { "epoch": 0.41, "learning_rate": 4.791547140944731e-07, "logits/chosen": -2.70941162109375, "logits/rejected": -2.7157716751098633, "logps/chosen": -224.75210571289062, "logps/rejected": -233.43685913085938, "loss": 0.4844, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6023855209350586, "rewards/margins": 1.4085330963134766, "rewards/rejected": -2.010918617248535, "step": 800 }, { "epoch": 0.41, "eval_logits/chosen": -2.790893077850342, "eval_logits/rejected": -2.7574403285980225, "eval_logps/chosen": -291.6787414550781, "eval_logps/rejected": -265.0712585449219, "eval_loss": 0.4998268783092499, "eval_rewards/accuracies": 0.755859375, "eval_rewards/chosen": -0.7197288274765015, "eval_rewards/margins": 1.4274795055389404, "eval_rewards/rejected": -2.1472084522247314, "eval_runtime": 316.6822, "eval_samples_per_second": 6.315, "eval_steps_per_second": 0.101, "step": 800 }, { "epoch": 0.42, "learning_rate": 4.781985083189902e-07, "logits/chosen": -2.7896525859832764, "logits/rejected": -2.74312162399292, "logps/chosen": -258.0677795410156, "logps/rejected": -267.8040466308594, "loss": 0.5101, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6541247367858887, "rewards/margins": 1.296798586845398, "rewards/rejected": -1.9509233236312866, "step": 810 }, { "epoch": 0.42, "learning_rate": 4.772423025435074e-07, "logits/chosen": -2.806490898132324, "logits/rejected": -2.779108762741089, "logps/chosen": -280.8498229980469, "logps/rejected": -274.19976806640625, "loss": 0.5199, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5879713892936707, "rewards/margins": 0.8193132281303406, "rewards/rejected": -1.4072844982147217, "step": 820 }, { "epoch": 0.43, "learning_rate": 4.762860967680244e-07, "logits/chosen": -2.8954238891601562, "logits/rejected": -2.891010046005249, "logps/chosen": -243.6877899169922, "logps/rejected": -203.91360473632812, "loss": 0.5433, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.538101851940155, "rewards/margins": 1.0817303657531738, "rewards/rejected": -1.6198322772979736, "step": 830 }, { "epoch": 0.43, "learning_rate": 4.7532989099254154e-07, "logits/chosen": -2.8991799354553223, "logits/rejected": -2.804013252258301, "logps/chosen": -264.1739501953125, "logps/rejected": -238.92105102539062, "loss": 0.5658, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.7942388653755188, "rewards/margins": 0.818154513835907, "rewards/rejected": -1.6123933792114258, "step": 840 }, { "epoch": 0.44, "learning_rate": 4.7437368521705866e-07, "logits/chosen": -2.9615111351013184, "logits/rejected": -2.9156970977783203, "logps/chosen": -255.1885223388672, "logps/rejected": -266.92242431640625, "loss": 0.5423, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5804500579833984, "rewards/margins": 1.492943286895752, "rewards/rejected": -2.0733933448791504, "step": 850 }, { "epoch": 0.44, "learning_rate": 4.7341747944157577e-07, "logits/chosen": -2.9630184173583984, "logits/rejected": -2.9477226734161377, "logps/chosen": -277.333984375, "logps/rejected": -256.2931213378906, "loss": 0.5494, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7760841250419617, "rewards/margins": 0.9907199144363403, "rewards/rejected": -1.7668039798736572, "step": 860 }, { "epoch": 0.45, "learning_rate": 4.724612736660929e-07, "logits/chosen": -2.8701772689819336, "logits/rejected": -2.8017899990081787, "logps/chosen": -278.9487609863281, "logps/rejected": -250.276611328125, "loss": 0.4508, "rewards/accuracies": 0.8125, "rewards/chosen": -0.47556501626968384, "rewards/margins": 1.75156569480896, "rewards/rejected": -2.227130889892578, "step": 870 }, { "epoch": 0.45, "learning_rate": 4.7150506789061006e-07, "logits/chosen": -2.962653636932373, "logits/rejected": -2.913095712661743, "logps/chosen": -293.9844055175781, "logps/rejected": -297.45758056640625, "loss": 0.4961, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.8863751292228699, "rewards/margins": 1.492598295211792, "rewards/rejected": -2.3789734840393066, "step": 880 }, { "epoch": 0.46, "learning_rate": 4.7054886211512717e-07, "logits/chosen": -2.9517550468444824, "logits/rejected": -2.943208694458008, "logps/chosen": -282.61322021484375, "logps/rejected": -254.03759765625, "loss": 0.5139, "rewards/accuracies": 0.75, "rewards/chosen": -0.49182477593421936, "rewards/margins": 1.1340211629867554, "rewards/rejected": -1.6258461475372314, "step": 890 }, { "epoch": 0.46, "learning_rate": 4.695926563396443e-07, "logits/chosen": -3.0224180221557617, "logits/rejected": -2.9857900142669678, "logps/chosen": -278.03973388671875, "logps/rejected": -240.48263549804688, "loss": 0.4999, "rewards/accuracies": 0.75, "rewards/chosen": -0.8061272501945496, "rewards/margins": 0.933633029460907, "rewards/rejected": -1.739760398864746, "step": 900 }, { "epoch": 0.46, "eval_logits/chosen": -2.9806318283081055, "eval_logits/rejected": -2.945358991622925, "eval_logps/chosen": -290.1770324707031, "eval_logps/rejected": -261.89630126953125, "eval_loss": 0.49826258420944214, "eval_rewards/accuracies": 0.763671875, "eval_rewards/chosen": -0.5950886011123657, "eval_rewards/margins": 1.2885997295379639, "eval_rewards/rejected": -1.88368821144104, "eval_runtime": 316.0875, "eval_samples_per_second": 6.327, "eval_steps_per_second": 0.101, "step": 900 }, { "epoch": 0.47, "learning_rate": 4.686364505641614e-07, "logits/chosen": -2.983365535736084, "logits/rejected": -2.934338092803955, "logps/chosen": -277.3138732910156, "logps/rejected": -242.1064453125, "loss": 0.474, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6188751459121704, "rewards/margins": 1.4204778671264648, "rewards/rejected": -2.0393528938293457, "step": 910 }, { "epoch": 0.47, "learning_rate": 4.676802447886785e-07, "logits/chosen": -2.8876590728759766, "logits/rejected": -2.8793785572052, "logps/chosen": -266.5966796875, "logps/rejected": -245.0421600341797, "loss": 0.5145, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6580719947814941, "rewards/margins": 1.34773588180542, "rewards/rejected": -2.005807876586914, "step": 920 }, { "epoch": 0.48, "learning_rate": 4.6672403901319564e-07, "logits/chosen": -2.9086079597473145, "logits/rejected": -2.8821098804473877, "logps/chosen": -259.8950500488281, "logps/rejected": -246.66268920898438, "loss": 0.4504, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.32651153206825256, "rewards/margins": 1.7572702169418335, "rewards/rejected": -2.0837817192077637, "step": 930 }, { "epoch": 0.49, "learning_rate": 4.6576783323771275e-07, "logits/chosen": -2.818511486053467, "logits/rejected": -2.8014657497406006, "logps/chosen": -231.5043487548828, "logps/rejected": -227.52957153320312, "loss": 0.4717, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.7696239352226257, "rewards/margins": 1.2795578241348267, "rewards/rejected": -2.0491816997528076, "step": 940 }, { "epoch": 0.49, "learning_rate": 4.6481162746222987e-07, "logits/chosen": -2.832824945449829, "logits/rejected": -2.8191285133361816, "logps/chosen": -300.3023986816406, "logps/rejected": -259.49481201171875, "loss": 0.677, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7233734130859375, "rewards/margins": 0.7480804324150085, "rewards/rejected": -2.47145414352417, "step": 950 }, { "epoch": 0.5, "learning_rate": 4.63855421686747e-07, "logits/chosen": -2.8832907676696777, "logits/rejected": -2.841891050338745, "logps/chosen": -293.1336669921875, "logps/rejected": -253.708984375, "loss": 0.5306, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.5854119062423706, "rewards/margins": 1.5504266023635864, "rewards/rejected": -2.135838747024536, "step": 960 }, { "epoch": 0.5, "learning_rate": 4.628992159112641e-07, "logits/chosen": -2.8098645210266113, "logits/rejected": -2.8350157737731934, "logps/chosen": -268.37091064453125, "logps/rejected": -274.38330078125, "loss": 0.5073, "rewards/accuracies": 0.75, "rewards/chosen": -0.3112182319164276, "rewards/margins": 1.5667588710784912, "rewards/rejected": -1.8779771327972412, "step": 970 }, { "epoch": 0.51, "learning_rate": 4.6194301013578116e-07, "logits/chosen": -2.865346670150757, "logits/rejected": -2.875011444091797, "logps/chosen": -322.71319580078125, "logps/rejected": -258.764892578125, "loss": 0.519, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5713719129562378, "rewards/margins": 1.4547207355499268, "rewards/rejected": -2.026092529296875, "step": 980 }, { "epoch": 0.51, "learning_rate": 4.609868043602983e-07, "logits/chosen": -2.937608003616333, "logits/rejected": -2.890714406967163, "logps/chosen": -255.7438507080078, "logps/rejected": -237.1357879638672, "loss": 0.4716, "rewards/accuracies": 0.75, "rewards/chosen": -0.5775920152664185, "rewards/margins": 1.088714838027954, "rewards/rejected": -1.6663068532943726, "step": 990 }, { "epoch": 0.52, "learning_rate": 4.600305985848154e-07, "logits/chosen": -2.901493787765503, "logits/rejected": -2.863281488418579, "logps/chosen": -247.2804412841797, "logps/rejected": -243.9705047607422, "loss": 0.45, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.5680816173553467, "rewards/margins": 1.0015087127685547, "rewards/rejected": -1.5695903301239014, "step": 1000 }, { "epoch": 0.52, "eval_logits/chosen": -2.944443464279175, "eval_logits/rejected": -2.915790557861328, "eval_logps/chosen": -291.0830078125, "eval_logps/rejected": -265.7383117675781, "eval_loss": 0.49156078696250916, "eval_rewards/accuracies": 0.767578125, "eval_rewards/chosen": -0.670283317565918, "eval_rewards/margins": 1.5322901010513306, "eval_rewards/rejected": -2.202573537826538, "eval_runtime": 316.7143, "eval_samples_per_second": 6.315, "eval_steps_per_second": 0.101, "step": 1000 }, { "epoch": 0.52, "learning_rate": 4.590743928093325e-07, "logits/chosen": -2.7421276569366455, "logits/rejected": -2.71517276763916, "logps/chosen": -307.6241760253906, "logps/rejected": -264.940185546875, "loss": 0.5709, "rewards/accuracies": 0.75, "rewards/chosen": -0.8950144052505493, "rewards/margins": 1.0978505611419678, "rewards/rejected": -1.9928648471832275, "step": 1010 }, { "epoch": 0.53, "learning_rate": 4.581181870338497e-07, "logits/chosen": -2.79264235496521, "logits/rejected": -2.7919936180114746, "logps/chosen": -318.16876220703125, "logps/rejected": -289.8711853027344, "loss": 0.4914, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.4478762745857239, "rewards/margins": 1.5937278270721436, "rewards/rejected": -2.0416042804718018, "step": 1020 }, { "epoch": 0.53, "learning_rate": 4.571619812583668e-07, "logits/chosen": -2.838495969772339, "logits/rejected": -2.8164424896240234, "logps/chosen": -271.66400146484375, "logps/rejected": -279.39678955078125, "loss": 0.4703, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.7276671528816223, "rewards/margins": 1.3479902744293213, "rewards/rejected": -2.075657367706299, "step": 1030 }, { "epoch": 0.54, "learning_rate": 4.562057754828839e-07, "logits/chosen": -2.8075003623962402, "logits/rejected": -2.7784311771392822, "logps/chosen": -286.3624267578125, "logps/rejected": -255.8455810546875, "loss": 0.5342, "rewards/accuracies": 0.75, "rewards/chosen": -0.6813332438468933, "rewards/margins": 1.5885751247406006, "rewards/rejected": -2.2699084281921387, "step": 1040 }, { "epoch": 0.54, "learning_rate": 4.55249569707401e-07, "logits/chosen": -2.8811793327331543, "logits/rejected": -2.8664612770080566, "logps/chosen": -238.18032836914062, "logps/rejected": -259.69635009765625, "loss": 0.4901, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.9409188032150269, "rewards/margins": 1.1440527439117432, "rewards/rejected": -2.0849716663360596, "step": 1050 }, { "epoch": 0.55, "learning_rate": 4.5429336393191814e-07, "logits/chosen": -2.8247945308685303, "logits/rejected": -2.8488352298736572, "logps/chosen": -253.0706024169922, "logps/rejected": -228.4701690673828, "loss": 0.5447, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.7817291617393494, "rewards/margins": 1.0004041194915771, "rewards/rejected": -1.7821333408355713, "step": 1060 }, { "epoch": 0.55, "learning_rate": 4.5333715815643525e-07, "logits/chosen": -2.9571573734283447, "logits/rejected": -2.964838743209839, "logps/chosen": -302.10919189453125, "logps/rejected": -274.91094970703125, "loss": 0.5441, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.0221765041351318, "rewards/margins": 1.3178188800811768, "rewards/rejected": -2.3399956226348877, "step": 1070 }, { "epoch": 0.56, "learning_rate": 4.5238095238095237e-07, "logits/chosen": -2.956000804901123, "logits/rejected": -2.910071611404419, "logps/chosen": -285.3445129394531, "logps/rejected": -254.9427947998047, "loss": 0.4861, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.0887224674224854, "rewards/margins": 1.3039524555206299, "rewards/rejected": -2.3926749229431152, "step": 1080 }, { "epoch": 0.56, "learning_rate": 4.514247466054695e-07, "logits/chosen": -2.8950510025024414, "logits/rejected": -2.8666024208068848, "logps/chosen": -232.2127685546875, "logps/rejected": -229.81631469726562, "loss": 0.5095, "rewards/accuracies": 0.75, "rewards/chosen": -0.9966214299201965, "rewards/margins": 1.4529788494110107, "rewards/rejected": -2.4496002197265625, "step": 1090 }, { "epoch": 0.57, "learning_rate": 4.504685408299866e-07, "logits/chosen": -2.8119843006134033, "logits/rejected": -2.7979767322540283, "logps/chosen": -305.0646667480469, "logps/rejected": -284.5693664550781, "loss": 0.5239, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9634173512458801, "rewards/margins": 1.3650161027908325, "rewards/rejected": -2.3284332752227783, "step": 1100 }, { "epoch": 0.57, "eval_logits/chosen": -2.878817081451416, "eval_logits/rejected": -2.845369577407837, "eval_logps/chosen": -292.7281494140625, "eval_logps/rejected": -265.22552490234375, "eval_loss": 0.48481130599975586, "eval_rewards/accuracies": 0.76953125, "eval_rewards/chosen": -0.8068309426307678, "eval_rewards/margins": 1.3531807661056519, "eval_rewards/rejected": -2.1600115299224854, "eval_runtime": 318.4361, "eval_samples_per_second": 6.281, "eval_steps_per_second": 0.1, "step": 1100 }, { "epoch": 0.57, "learning_rate": 4.495123350545037e-07, "logits/chosen": -2.880981683731079, "logits/rejected": -2.8679022789001465, "logps/chosen": -303.65814208984375, "logps/rejected": -290.8370361328125, "loss": 0.481, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6773524284362793, "rewards/margins": 1.214261770248413, "rewards/rejected": -1.8916141986846924, "step": 1110 }, { "epoch": 0.58, "learning_rate": 4.4855612927902083e-07, "logits/chosen": -2.8071045875549316, "logits/rejected": -2.780510902404785, "logps/chosen": -307.7220764160156, "logps/rejected": -260.41473388671875, "loss": 0.5594, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.2152903079986572, "rewards/margins": 1.1422783136367798, "rewards/rejected": -2.3575685024261475, "step": 1120 }, { "epoch": 0.58, "learning_rate": 4.4759992350353795e-07, "logits/chosen": -2.817838668823242, "logits/rejected": -2.7243850231170654, "logps/chosen": -292.699462890625, "logps/rejected": -237.35189819335938, "loss": 0.4047, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9119682312011719, "rewards/margins": 1.669274091720581, "rewards/rejected": -2.581242322921753, "step": 1130 }, { "epoch": 0.59, "learning_rate": 4.46643717728055e-07, "logits/chosen": -2.7423298358917236, "logits/rejected": -2.692660331726074, "logps/chosen": -251.9851837158203, "logps/rejected": -262.4775390625, "loss": 0.4745, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.0000646114349365, "rewards/margins": 1.3926888704299927, "rewards/rejected": -2.392753839492798, "step": 1140 }, { "epoch": 0.59, "learning_rate": 4.4568751195257213e-07, "logits/chosen": -2.768141508102417, "logits/rejected": -2.7031548023223877, "logps/chosen": -331.9037170410156, "logps/rejected": -262.0032653808594, "loss": 0.5157, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.6028135418891907, "rewards/margins": 1.720782995223999, "rewards/rejected": -2.323596715927124, "step": 1150 }, { "epoch": 0.6, "learning_rate": 4.447313061770893e-07, "logits/chosen": -2.7821106910705566, "logits/rejected": -2.718907356262207, "logps/chosen": -265.594970703125, "logps/rejected": -271.9844055175781, "loss": 0.5171, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.6262668371200562, "rewards/margins": 1.543013334274292, "rewards/rejected": -2.1692802906036377, "step": 1160 }, { "epoch": 0.6, "learning_rate": 4.437751004016064e-07, "logits/chosen": -2.6798043251037598, "logits/rejected": -2.6406688690185547, "logps/chosen": -232.1375274658203, "logps/rejected": -245.87734985351562, "loss": 0.4895, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.7926133871078491, "rewards/margins": 1.1563670635223389, "rewards/rejected": -1.9489803314208984, "step": 1170 }, { "epoch": 0.61, "learning_rate": 4.4281889462612353e-07, "logits/chosen": -2.829881191253662, "logits/rejected": -2.8291101455688477, "logps/chosen": -271.8867492675781, "logps/rejected": -233.0546875, "loss": 0.4677, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.6890048980712891, "rewards/margins": 1.305870771408081, "rewards/rejected": -1.9948759078979492, "step": 1180 }, { "epoch": 0.61, "learning_rate": 4.4186268885064064e-07, "logits/chosen": -2.8454790115356445, "logits/rejected": -2.8463377952575684, "logps/chosen": -299.15740966796875, "logps/rejected": -256.55517578125, "loss": 0.5241, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.860299289226532, "rewards/margins": 1.0273938179016113, "rewards/rejected": -1.8876930475234985, "step": 1190 }, { "epoch": 0.62, "learning_rate": 4.4090648307515776e-07, "logits/chosen": -2.8461060523986816, "logits/rejected": -2.8194382190704346, "logps/chosen": -218.40353393554688, "logps/rejected": -193.2891387939453, "loss": 0.4766, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.8421468734741211, "rewards/margins": 1.0289016962051392, "rewards/rejected": -1.8710485696792603, "step": 1200 }, { "epoch": 0.62, "eval_logits/chosen": -2.867302179336548, "eval_logits/rejected": -2.8188960552215576, "eval_logps/chosen": -290.20074462890625, "eval_logps/rejected": -261.778564453125, "eval_loss": 0.49736690521240234, "eval_rewards/accuracies": 0.744140625, "eval_rewards/chosen": -0.597055196762085, "eval_rewards/margins": 1.2768592834472656, "eval_rewards/rejected": -1.8739144802093506, "eval_runtime": 321.7279, "eval_samples_per_second": 6.216, "eval_steps_per_second": 0.099, "step": 1200 }, { "epoch": 0.62, "learning_rate": 4.399502772996749e-07, "logits/chosen": -2.855459690093994, "logits/rejected": -2.8342843055725098, "logps/chosen": -276.38092041015625, "logps/rejected": -260.7767639160156, "loss": 0.4844, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.8932267427444458, "rewards/margins": 1.357304334640503, "rewards/rejected": -2.2505311965942383, "step": 1210 }, { "epoch": 0.63, "learning_rate": 4.38994071524192e-07, "logits/chosen": -2.8395485877990723, "logits/rejected": -2.8473269939422607, "logps/chosen": -257.31781005859375, "logps/rejected": -227.14266967773438, "loss": 0.4853, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9152318239212036, "rewards/margins": 1.1170425415039062, "rewards/rejected": -2.0322744846343994, "step": 1220 }, { "epoch": 0.64, "learning_rate": 4.380378657487091e-07, "logits/chosen": -2.8990378379821777, "logits/rejected": -2.837754249572754, "logps/chosen": -318.90997314453125, "logps/rejected": -308.8431701660156, "loss": 0.4755, "rewards/accuracies": 0.75, "rewards/chosen": -0.5132166743278503, "rewards/margins": 1.5900919437408447, "rewards/rejected": -2.103308916091919, "step": 1230 }, { "epoch": 0.64, "learning_rate": 4.370816599732262e-07, "logits/chosen": -2.881894588470459, "logits/rejected": -2.847029447555542, "logps/chosen": -312.7770080566406, "logps/rejected": -241.28427124023438, "loss": 0.4762, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1661875247955322, "rewards/margins": 1.2864001989364624, "rewards/rejected": -2.452587842941284, "step": 1240 }, { "epoch": 0.65, "learning_rate": 4.3612545419774334e-07, "logits/chosen": -2.804856777191162, "logits/rejected": -2.8128418922424316, "logps/chosen": -240.44216918945312, "logps/rejected": -273.0758972167969, "loss": 0.5413, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.2008466720581055, "rewards/margins": 0.9144786596298218, "rewards/rejected": -2.115325450897217, "step": 1250 }, { "epoch": 0.65, "learning_rate": 4.3516924842226045e-07, "logits/chosen": -2.791196823120117, "logits/rejected": -2.7827858924865723, "logps/chosen": -287.58038330078125, "logps/rejected": -254.9193572998047, "loss": 0.4981, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9099253416061401, "rewards/margins": 1.3614977598190308, "rewards/rejected": -2.271422863006592, "step": 1260 }, { "epoch": 0.66, "learning_rate": 4.3421304264677757e-07, "logits/chosen": -2.8488688468933105, "logits/rejected": -2.823370933532715, "logps/chosen": -260.3859558105469, "logps/rejected": -228.0181121826172, "loss": 0.4665, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.8980686068534851, "rewards/margins": 1.4441890716552734, "rewards/rejected": -2.3422577381134033, "step": 1270 }, { "epoch": 0.66, "learning_rate": 4.332568368712947e-07, "logits/chosen": -2.9369869232177734, "logits/rejected": -2.901026725769043, "logps/chosen": -275.18603515625, "logps/rejected": -255.6267852783203, "loss": 0.5541, "rewards/accuracies": 0.75, "rewards/chosen": -0.9746459126472473, "rewards/margins": 1.1340761184692383, "rewards/rejected": -2.10872220993042, "step": 1280 }, { "epoch": 0.67, "learning_rate": 4.323006310958118e-07, "logits/chosen": -2.8876943588256836, "logits/rejected": -2.863734483718872, "logps/chosen": -318.10064697265625, "logps/rejected": -273.443115234375, "loss": 0.4573, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.8325966000556946, "rewards/margins": 1.513689637184143, "rewards/rejected": -2.3462862968444824, "step": 1290 }, { "epoch": 0.67, "learning_rate": 4.313444253203289e-07, "logits/chosen": -2.849710702896118, "logits/rejected": -2.8413021564483643, "logps/chosen": -259.23809814453125, "logps/rejected": -245.3144989013672, "loss": 0.497, "rewards/accuracies": 0.75, "rewards/chosen": -0.9290975332260132, "rewards/margins": 1.4907515048980713, "rewards/rejected": -2.419848918914795, "step": 1300 }, { "epoch": 0.67, "eval_logits/chosen": -2.850832462310791, "eval_logits/rejected": -2.8080756664276123, "eval_logps/chosen": -295.5160827636719, "eval_logps/rejected": -268.895263671875, "eval_loss": 0.5048100352287292, "eval_rewards/accuracies": 0.7265625, "eval_rewards/chosen": -1.038227915763855, "eval_rewards/margins": 1.4263715744018555, "eval_rewards/rejected": -2.464599370956421, "eval_runtime": 317.3633, "eval_samples_per_second": 6.302, "eval_steps_per_second": 0.101, "step": 1300 }, { "epoch": 0.68, "learning_rate": 4.3038821954484603e-07, "logits/chosen": -2.8496804237365723, "logits/rejected": -2.787351369857788, "logps/chosen": -286.7266845703125, "logps/rejected": -259.89300537109375, "loss": 0.4511, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.1912802457809448, "rewards/margins": 1.1682765483856201, "rewards/rejected": -2.3595566749572754, "step": 1310 }, { "epoch": 0.68, "learning_rate": 4.2943201376936315e-07, "logits/chosen": -2.792931079864502, "logits/rejected": -2.7611405849456787, "logps/chosen": -284.4120178222656, "logps/rejected": -262.6029052734375, "loss": 0.5266, "rewards/accuracies": 0.8125, "rewards/chosen": -1.140195369720459, "rewards/margins": 1.461016058921814, "rewards/rejected": -2.6012113094329834, "step": 1320 }, { "epoch": 0.69, "learning_rate": 4.2847580799388026e-07, "logits/chosen": -2.812748432159424, "logits/rejected": -2.7818191051483154, "logps/chosen": -300.1193542480469, "logps/rejected": -289.3134460449219, "loss": 0.5295, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.2611351013183594, "rewards/margins": 1.0888680219650269, "rewards/rejected": -2.350003480911255, "step": 1330 }, { "epoch": 0.69, "learning_rate": 4.275196022183974e-07, "logits/chosen": -2.777885913848877, "logits/rejected": -2.72430682182312, "logps/chosen": -304.4727478027344, "logps/rejected": -240.90951538085938, "loss": 0.4819, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.178075909614563, "rewards/margins": 1.2842957973480225, "rewards/rejected": -2.462371826171875, "step": 1340 }, { "epoch": 0.7, "learning_rate": 4.265633964429145e-07, "logits/chosen": -2.8341128826141357, "logits/rejected": -2.8277735710144043, "logps/chosen": -271.2368469238281, "logps/rejected": -219.6669921875, "loss": 0.5483, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.102269172668457, "rewards/margins": 0.9782671928405762, "rewards/rejected": -2.080536365509033, "step": 1350 }, { "epoch": 0.7, "learning_rate": 4.256071906674316e-07, "logits/chosen": -2.839200735092163, "logits/rejected": -2.8269591331481934, "logps/chosen": -316.34442138671875, "logps/rejected": -284.6401672363281, "loss": 0.5924, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.9669753909111023, "rewards/margins": 1.2528489828109741, "rewards/rejected": -2.2198243141174316, "step": 1360 }, { "epoch": 0.71, "learning_rate": 4.246509848919487e-07, "logits/chosen": -2.8589673042297363, "logits/rejected": -2.862018585205078, "logps/chosen": -264.96063232421875, "logps/rejected": -270.63311767578125, "loss": 0.7776, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.9053139686584473, "rewards/margins": 1.4050822257995605, "rewards/rejected": -2.310396194458008, "step": 1370 }, { "epoch": 0.71, "learning_rate": 4.2369477911646584e-07, "logits/chosen": -2.873610258102417, "logits/rejected": -2.8400185108184814, "logps/chosen": -277.764404296875, "logps/rejected": -241.86380004882812, "loss": 0.5508, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.2043688297271729, "rewards/margins": 1.1356245279312134, "rewards/rejected": -2.339993476867676, "step": 1380 }, { "epoch": 0.72, "learning_rate": 4.2273857334098296e-07, "logits/chosen": -2.81402850151062, "logits/rejected": -2.7660045623779297, "logps/chosen": -281.29833984375, "logps/rejected": -243.02853393554688, "loss": 0.6673, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.8850394487380981, "rewards/margins": 1.2282497882843018, "rewards/rejected": -2.1132893562316895, "step": 1390 }, { "epoch": 0.72, "learning_rate": 4.2178236756550007e-07, "logits/chosen": -2.7675588130950928, "logits/rejected": -2.7476298809051514, "logps/chosen": -293.7554626464844, "logps/rejected": -243.5753936767578, "loss": 0.5281, "rewards/accuracies": 0.75, "rewards/chosen": -0.986733615398407, "rewards/margins": 1.1079729795455933, "rewards/rejected": -2.0947067737579346, "step": 1400 }, { "epoch": 0.72, "eval_logits/chosen": -2.8254780769348145, "eval_logits/rejected": -2.7945051193237305, "eval_logps/chosen": -295.2208251953125, "eval_logps/rejected": -265.64361572265625, "eval_loss": 0.500278115272522, "eval_rewards/accuracies": 0.755859375, "eval_rewards/chosen": -1.0137208700180054, "eval_rewards/margins": 1.1809906959533691, "eval_rewards/rejected": -2.194711685180664, "eval_runtime": 318.4776, "eval_samples_per_second": 6.28, "eval_steps_per_second": 0.1, "step": 1400 }, { "epoch": 0.73, "learning_rate": 4.208261617900172e-07, "logits/chosen": -2.8300578594207764, "logits/rejected": -2.8291683197021484, "logps/chosen": -285.86212158203125, "logps/rejected": -218.8897705078125, "loss": 0.4851, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.991295337677002, "rewards/margins": 1.3615312576293945, "rewards/rejected": -2.3528265953063965, "step": 1410 }, { "epoch": 0.73, "learning_rate": 4.198699560145343e-07, "logits/chosen": -2.684546947479248, "logits/rejected": -2.677889585494995, "logps/chosen": -263.8229675292969, "logps/rejected": -246.3050994873047, "loss": 0.5848, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9759442210197449, "rewards/margins": 1.2241525650024414, "rewards/rejected": -2.200096845626831, "step": 1420 }, { "epoch": 0.74, "learning_rate": 4.189137502390514e-07, "logits/chosen": -2.7766776084899902, "logits/rejected": -2.7259960174560547, "logps/chosen": -278.78668212890625, "logps/rejected": -276.542724609375, "loss": 0.5414, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8481017351150513, "rewards/margins": 1.2137280702590942, "rewards/rejected": -2.0618300437927246, "step": 1430 }, { "epoch": 0.74, "learning_rate": 4.179575444635686e-07, "logits/chosen": -2.7790379524230957, "logits/rejected": -2.7324721813201904, "logps/chosen": -332.954345703125, "logps/rejected": -281.37396240234375, "loss": 0.5612, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7206013798713684, "rewards/margins": 1.2431817054748535, "rewards/rejected": -1.9637832641601562, "step": 1440 }, { "epoch": 0.75, "learning_rate": 4.170013386880857e-07, "logits/chosen": -2.7227365970611572, "logits/rejected": -2.702394962310791, "logps/chosen": -275.46258544921875, "logps/rejected": -290.63922119140625, "loss": 0.4629, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.756820797920227, "rewards/margins": 1.5169973373413086, "rewards/rejected": -2.273818254470825, "step": 1450 }, { "epoch": 0.75, "learning_rate": 4.1604513291260277e-07, "logits/chosen": -2.740973711013794, "logits/rejected": -2.708120584487915, "logps/chosen": -265.1321105957031, "logps/rejected": -263.557373046875, "loss": 0.512, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7814218401908875, "rewards/margins": 1.434728741645813, "rewards/rejected": -2.2161507606506348, "step": 1460 }, { "epoch": 0.76, "learning_rate": 4.150889271371199e-07, "logits/chosen": -2.6875643730163574, "logits/rejected": -2.6750056743621826, "logps/chosen": -286.6531066894531, "logps/rejected": -242.7853546142578, "loss": 0.5317, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.9189741015434265, "rewards/margins": 1.4883990287780762, "rewards/rejected": -2.4073729515075684, "step": 1470 }, { "epoch": 0.76, "learning_rate": 4.14132721361637e-07, "logits/chosen": -2.7173943519592285, "logits/rejected": -2.6566505432128906, "logps/chosen": -277.0287170410156, "logps/rejected": -211.9427490234375, "loss": 0.4223, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0475391149520874, "rewards/margins": 1.6872495412826538, "rewards/rejected": -2.734788417816162, "step": 1480 }, { "epoch": 0.77, "learning_rate": 4.131765155861541e-07, "logits/chosen": -2.7067253589630127, "logits/rejected": -2.684105396270752, "logps/chosen": -240.4397430419922, "logps/rejected": -219.2634735107422, "loss": 0.504, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6751075387001038, "rewards/margins": 1.6110649108886719, "rewards/rejected": -2.286172389984131, "step": 1490 }, { "epoch": 0.77, "learning_rate": 4.1222030981067123e-07, "logits/chosen": -2.7754743099212646, "logits/rejected": -2.7444348335266113, "logps/chosen": -305.04144287109375, "logps/rejected": -273.15802001953125, "loss": 0.4428, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.9992974400520325, "rewards/margins": 1.479531168937683, "rewards/rejected": -2.4788289070129395, "step": 1500 }, { "epoch": 0.77, "eval_logits/chosen": -2.8138766288757324, "eval_logits/rejected": -2.781527042388916, "eval_logps/chosen": -293.6202392578125, "eval_logps/rejected": -266.918212890625, "eval_loss": 0.4851285517215729, "eval_rewards/accuracies": 0.759765625, "eval_rewards/chosen": -0.8808744549751282, "eval_rewards/margins": 1.4196304082870483, "eval_rewards/rejected": -2.3005049228668213, "eval_runtime": 319.4215, "eval_samples_per_second": 6.261, "eval_steps_per_second": 0.1, "step": 1500 }, { "epoch": 0.78, "learning_rate": 4.1126410403518835e-07, "logits/chosen": -2.7427940368652344, "logits/rejected": -2.729235887527466, "logps/chosen": -251.69873046875, "logps/rejected": -273.91204833984375, "loss": 0.5308, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.1604094505310059, "rewards/margins": 1.4270920753479004, "rewards/rejected": -2.587501049041748, "step": 1510 }, { "epoch": 0.78, "learning_rate": 4.1030789825970546e-07, "logits/chosen": -2.7286899089813232, "logits/rejected": -2.7324271202087402, "logps/chosen": -295.30816650390625, "logps/rejected": -276.5433349609375, "loss": 0.5479, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7870533466339111, "rewards/margins": 1.2467817068099976, "rewards/rejected": -2.033834934234619, "step": 1520 }, { "epoch": 0.79, "learning_rate": 4.093516924842226e-07, "logits/chosen": -2.7449193000793457, "logits/rejected": -2.7396390438079834, "logps/chosen": -286.4870910644531, "logps/rejected": -264.2047424316406, "loss": 0.4302, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.6260234117507935, "rewards/margins": 1.3172390460968018, "rewards/rejected": -1.9432624578475952, "step": 1530 }, { "epoch": 0.8, "learning_rate": 4.083954867087397e-07, "logits/chosen": -2.880856990814209, "logits/rejected": -2.8310086727142334, "logps/chosen": -264.1263732910156, "logps/rejected": -254.74954223632812, "loss": 0.4815, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7684402465820312, "rewards/margins": 1.2261098623275757, "rewards/rejected": -1.994550108909607, "step": 1540 }, { "epoch": 0.8, "learning_rate": 4.074392809332568e-07, "logits/chosen": -2.7877519130706787, "logits/rejected": -2.7584595680236816, "logps/chosen": -320.35577392578125, "logps/rejected": -277.05474853515625, "loss": 0.5136, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7774165868759155, "rewards/margins": 1.9509509801864624, "rewards/rejected": -2.728367805480957, "step": 1550 }, { "epoch": 0.81, "learning_rate": 4.064830751577739e-07, "logits/chosen": -2.8028507232666016, "logits/rejected": -2.768125057220459, "logps/chosen": -264.3491516113281, "logps/rejected": -252.72451782226562, "loss": 0.4797, "rewards/accuracies": 0.875, "rewards/chosen": -0.6554406881332397, "rewards/margins": 1.6132471561431885, "rewards/rejected": -2.2686874866485596, "step": 1560 }, { "epoch": 0.81, "learning_rate": 4.0552686938229104e-07, "logits/chosen": -2.8114914894104004, "logits/rejected": -2.8133301734924316, "logps/chosen": -263.5157775878906, "logps/rejected": -240.23287963867188, "loss": 0.4487, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.6728530526161194, "rewards/margins": 1.3916168212890625, "rewards/rejected": -2.064469814300537, "step": 1570 }, { "epoch": 0.82, "learning_rate": 4.045706636068082e-07, "logits/chosen": -2.784010171890259, "logits/rejected": -2.7423911094665527, "logps/chosen": -277.7910461425781, "logps/rejected": -253.73117065429688, "loss": 0.4494, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.8233027458190918, "rewards/margins": 1.6080726385116577, "rewards/rejected": -2.431375026702881, "step": 1580 }, { "epoch": 0.82, "learning_rate": 4.036144578313253e-07, "logits/chosen": -2.7516722679138184, "logits/rejected": -2.728902816772461, "logps/chosen": -281.9308776855469, "logps/rejected": -262.9776611328125, "loss": 0.5688, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.9304908514022827, "rewards/margins": 1.3224947452545166, "rewards/rejected": -2.2529854774475098, "step": 1590 }, { "epoch": 0.83, "learning_rate": 4.0265825205584244e-07, "logits/chosen": -2.7793705463409424, "logits/rejected": -2.741088390350342, "logps/chosen": -297.0889587402344, "logps/rejected": -271.71759033203125, "loss": 0.5192, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.000962734222412, "rewards/margins": 1.3558018207550049, "rewards/rejected": -2.356764554977417, "step": 1600 }, { "epoch": 0.83, "eval_logits/chosen": -2.772773265838623, "eval_logits/rejected": -2.7394397258758545, "eval_logps/chosen": -293.9598083496094, "eval_logps/rejected": -267.9066162109375, "eval_loss": 0.4758269786834717, "eval_rewards/accuracies": 0.75390625, "eval_rewards/chosen": -0.9090580940246582, "eval_rewards/margins": 1.4734832048416138, "eval_rewards/rejected": -2.3825411796569824, "eval_runtime": 320.799, "eval_samples_per_second": 6.234, "eval_steps_per_second": 0.1, "step": 1600 }, { "epoch": 0.83, "learning_rate": 4.0170204628035956e-07, "logits/chosen": -2.7495179176330566, "logits/rejected": -2.7530386447906494, "logps/chosen": -228.5417938232422, "logps/rejected": -225.79443359375, "loss": 0.4594, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7358361482620239, "rewards/margins": 1.6655571460723877, "rewards/rejected": -2.401392936706543, "step": 1610 }, { "epoch": 0.84, "learning_rate": 4.007458405048766e-07, "logits/chosen": -2.769211530685425, "logits/rejected": -2.7297558784484863, "logps/chosen": -315.2281799316406, "logps/rejected": -290.23358154296875, "loss": 0.4533, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0057151317596436, "rewards/margins": 1.6007373332977295, "rewards/rejected": -2.606452465057373, "step": 1620 }, { "epoch": 0.84, "learning_rate": 3.9978963472939373e-07, "logits/chosen": -2.7459876537323, "logits/rejected": -2.718555212020874, "logps/chosen": -284.618896484375, "logps/rejected": -252.7120819091797, "loss": 0.4716, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.816493034362793, "rewards/margins": 1.718118667602539, "rewards/rejected": -2.534611463546753, "step": 1630 }, { "epoch": 0.85, "learning_rate": 3.9883342895391085e-07, "logits/chosen": -2.76477313041687, "logits/rejected": -2.7111496925354004, "logps/chosen": -326.5727844238281, "logps/rejected": -261.5386657714844, "loss": 0.4638, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9594038128852844, "rewards/margins": 1.6950759887695312, "rewards/rejected": -2.654479503631592, "step": 1640 }, { "epoch": 0.85, "learning_rate": 3.9787722317842796e-07, "logits/chosen": -2.81067156791687, "logits/rejected": -2.769451141357422, "logps/chosen": -298.5956115722656, "logps/rejected": -220.0414276123047, "loss": 0.4967, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.6601829528808594, "rewards/margins": 1.7225878238677979, "rewards/rejected": -2.3827710151672363, "step": 1650 }, { "epoch": 0.86, "learning_rate": 3.969210174029451e-07, "logits/chosen": -2.755230188369751, "logits/rejected": -2.6929092407226562, "logps/chosen": -285.7733154296875, "logps/rejected": -282.2913818359375, "loss": 0.4814, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2028340101242065, "rewards/margins": 1.4422218799591064, "rewards/rejected": -2.6450560092926025, "step": 1660 }, { "epoch": 0.86, "learning_rate": 3.959648116274622e-07, "logits/chosen": -2.7809128761291504, "logits/rejected": -2.734466075897217, "logps/chosen": -276.9807434082031, "logps/rejected": -242.58352661132812, "loss": 0.5961, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.2188154458999634, "rewards/margins": 1.1295832395553589, "rewards/rejected": -2.3483986854553223, "step": 1670 }, { "epoch": 0.87, "learning_rate": 3.950086058519793e-07, "logits/chosen": -2.823768138885498, "logits/rejected": -2.806485176086426, "logps/chosen": -252.9328155517578, "logps/rejected": -242.95828247070312, "loss": 0.5396, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.0010833740234375, "rewards/margins": 1.2032005786895752, "rewards/rejected": -2.2042839527130127, "step": 1680 }, { "epoch": 0.87, "learning_rate": 3.9405240007649643e-07, "logits/chosen": -2.911090135574341, "logits/rejected": -2.896416187286377, "logps/chosen": -268.2712707519531, "logps/rejected": -259.96990966796875, "loss": 0.4689, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.0347354412078857, "rewards/margins": 1.1820820569992065, "rewards/rejected": -2.2168173789978027, "step": 1690 }, { "epoch": 0.88, "learning_rate": 3.9309619430101354e-07, "logits/chosen": -2.7931480407714844, "logits/rejected": -2.7691762447357178, "logps/chosen": -264.2796325683594, "logps/rejected": -249.5575714111328, "loss": 0.533, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9981765747070312, "rewards/margins": 1.411323070526123, "rewards/rejected": -2.4094996452331543, "step": 1700 }, { "epoch": 0.88, "eval_logits/chosen": -2.833019733428955, "eval_logits/rejected": -2.8005237579345703, "eval_logps/chosen": -292.82659912109375, "eval_logps/rejected": -265.5081787109375, "eval_loss": 0.4752504229545593, "eval_rewards/accuracies": 0.767578125, "eval_rewards/chosen": -0.8150023221969604, "eval_rewards/margins": 1.3684699535369873, "eval_rewards/rejected": -2.183472156524658, "eval_runtime": 317.8946, "eval_samples_per_second": 6.291, "eval_steps_per_second": 0.101, "step": 1700 }, { "epoch": 0.88, "learning_rate": 3.9213998852553066e-07, "logits/chosen": -2.844062566757202, "logits/rejected": -2.8267855644226074, "logps/chosen": -328.4850769042969, "logps/rejected": -273.3662414550781, "loss": 0.4976, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.9768746495246887, "rewards/margins": 1.2568423748016357, "rewards/rejected": -2.233717203140259, "step": 1710 }, { "epoch": 0.89, "learning_rate": 3.9118378275004783e-07, "logits/chosen": -2.870741605758667, "logits/rejected": -2.8543131351470947, "logps/chosen": -276.6694030761719, "logps/rejected": -319.7810363769531, "loss": 0.5615, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0811164379119873, "rewards/margins": 1.0238618850708008, "rewards/rejected": -2.104978322982788, "step": 1720 }, { "epoch": 0.89, "learning_rate": 3.9022757697456494e-07, "logits/chosen": -2.7498209476470947, "logits/rejected": -2.763112783432007, "logps/chosen": -332.51806640625, "logps/rejected": -284.61541748046875, "loss": 0.4673, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7268058061599731, "rewards/margins": 1.2242915630340576, "rewards/rejected": -1.9510971307754517, "step": 1730 }, { "epoch": 0.9, "learning_rate": 3.8927137119908206e-07, "logits/chosen": -2.7959511280059814, "logits/rejected": -2.7661242485046387, "logps/chosen": -302.2712707519531, "logps/rejected": -233.9701690673828, "loss": 0.5276, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9777994155883789, "rewards/margins": 1.1548362970352173, "rewards/rejected": -2.1326355934143066, "step": 1740 }, { "epoch": 0.9, "learning_rate": 3.883151654235992e-07, "logits/chosen": -2.7878224849700928, "logits/rejected": -2.7468504905700684, "logps/chosen": -297.1304016113281, "logps/rejected": -268.62579345703125, "loss": 0.4869, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.8458808660507202, "rewards/margins": 1.6886154413223267, "rewards/rejected": -2.5344960689544678, "step": 1750 }, { "epoch": 0.91, "learning_rate": 3.873589596481163e-07, "logits/chosen": -2.8348701000213623, "logits/rejected": -2.829029083251953, "logps/chosen": -296.2809143066406, "logps/rejected": -270.854736328125, "loss": 0.5471, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.2697277069091797, "rewards/margins": 1.2317073345184326, "rewards/rejected": -2.501434803009033, "step": 1760 }, { "epoch": 0.91, "learning_rate": 3.864027538726334e-07, "logits/chosen": -2.761077880859375, "logits/rejected": -2.762988567352295, "logps/chosen": -279.43878173828125, "logps/rejected": -259.74066162109375, "loss": 0.5687, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.8637536764144897, "rewards/margins": 1.701249122619629, "rewards/rejected": -2.565002918243408, "step": 1770 }, { "epoch": 0.92, "learning_rate": 3.8544654809715047e-07, "logits/chosen": -2.781764268875122, "logits/rejected": -2.755702495574951, "logps/chosen": -294.3699645996094, "logps/rejected": -265.453369140625, "loss": 0.5175, "rewards/accuracies": 0.75, "rewards/chosen": -1.1851778030395508, "rewards/margins": 1.3336509466171265, "rewards/rejected": -2.5188283920288086, "step": 1780 }, { "epoch": 0.92, "learning_rate": 3.844903423216676e-07, "logits/chosen": -2.7623558044433594, "logits/rejected": -2.7166218757629395, "logps/chosen": -271.791748046875, "logps/rejected": -240.17330932617188, "loss": 0.5106, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.8020727038383484, "rewards/margins": 1.2217572927474976, "rewards/rejected": -2.023829936981201, "step": 1790 }, { "epoch": 0.93, "learning_rate": 3.835341365461847e-07, "logits/chosen": -2.7119574546813965, "logits/rejected": -2.6809260845184326, "logps/chosen": -274.4230651855469, "logps/rejected": -226.7615966796875, "loss": 0.5803, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.660488486289978, "rewards/margins": 1.4312580823898315, "rewards/rejected": -2.0917468070983887, "step": 1800 }, { "epoch": 0.93, "eval_logits/chosen": -2.7542262077331543, "eval_logits/rejected": -2.7118141651153564, "eval_logps/chosen": -291.21661376953125, "eval_logps/rejected": -263.7262268066406, "eval_loss": 0.48542389273643494, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -0.6813722252845764, "eval_rewards/margins": 1.3541966676712036, "eval_rewards/rejected": -2.0355687141418457, "eval_runtime": 319.1659, "eval_samples_per_second": 6.266, "eval_steps_per_second": 0.1, "step": 1800 }, { "epoch": 0.93, "learning_rate": 3.825779307707018e-07, "logits/chosen": -2.757596731185913, "logits/rejected": -2.7049384117126465, "logps/chosen": -211.03396606445312, "logps/rejected": -233.15756225585938, "loss": 0.5198, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7588549256324768, "rewards/margins": 1.0998008251190186, "rewards/rejected": -1.8586556911468506, "step": 1810 }, { "epoch": 0.94, "learning_rate": 3.8162172499521893e-07, "logits/chosen": -2.781250238418579, "logits/rejected": -2.7046427726745605, "logps/chosen": -267.51214599609375, "logps/rejected": -222.8110809326172, "loss": 0.5098, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7936843037605286, "rewards/margins": 1.3773696422576904, "rewards/rejected": -2.1710543632507324, "step": 1820 }, { "epoch": 0.94, "learning_rate": 3.8066551921973605e-07, "logits/chosen": -2.728585958480835, "logits/rejected": -2.724705219268799, "logps/chosen": -263.5246276855469, "logps/rejected": -240.3393096923828, "loss": 0.4847, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.6747117042541504, "rewards/margins": 1.237976312637329, "rewards/rejected": -1.9126880168914795, "step": 1830 }, { "epoch": 0.95, "learning_rate": 3.7970931344425316e-07, "logits/chosen": -2.733170986175537, "logits/rejected": -2.6717562675476074, "logps/chosen": -300.1498107910156, "logps/rejected": -243.97872924804688, "loss": 0.4631, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.7505453824996948, "rewards/margins": 1.109626054763794, "rewards/rejected": -1.8601710796356201, "step": 1840 }, { "epoch": 0.96, "learning_rate": 3.787531076687703e-07, "logits/chosen": -2.6076982021331787, "logits/rejected": -2.603797435760498, "logps/chosen": -251.7250518798828, "logps/rejected": -210.78012084960938, "loss": 0.5239, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.7790793180465698, "rewards/margins": 1.4422688484191895, "rewards/rejected": -2.221348285675049, "step": 1850 }, { "epoch": 0.96, "learning_rate": 3.7779690189328745e-07, "logits/chosen": -2.749746084213257, "logits/rejected": -2.7368478775024414, "logps/chosen": -262.80609130859375, "logps/rejected": -242.1180419921875, "loss": 0.4598, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.8659567832946777, "rewards/margins": 1.2928107976913452, "rewards/rejected": -2.1587674617767334, "step": 1860 }, { "epoch": 0.97, "learning_rate": 3.7684069611780456e-07, "logits/chosen": -2.7655014991760254, "logits/rejected": -2.7061004638671875, "logps/chosen": -285.18267822265625, "logps/rejected": -254.34707641601562, "loss": 0.5085, "rewards/accuracies": 0.75, "rewards/chosen": -0.5289172530174255, "rewards/margins": 1.4916884899139404, "rewards/rejected": -2.0206058025360107, "step": 1870 }, { "epoch": 0.97, "learning_rate": 3.758844903423217e-07, "logits/chosen": -2.80228853225708, "logits/rejected": -2.744138240814209, "logps/chosen": -257.478271484375, "logps/rejected": -252.5127410888672, "loss": 0.4789, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.7231428027153015, "rewards/margins": 1.5403425693511963, "rewards/rejected": -2.2634854316711426, "step": 1880 }, { "epoch": 0.98, "learning_rate": 3.749282845668388e-07, "logits/chosen": -2.8067102432250977, "logits/rejected": -2.7753746509552, "logps/chosen": -305.51080322265625, "logps/rejected": -274.6072082519531, "loss": 0.4907, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.912000298500061, "rewards/margins": 1.2635942697525024, "rewards/rejected": -2.1755943298339844, "step": 1890 }, { "epoch": 0.98, "learning_rate": 3.739720787913559e-07, "logits/chosen": -2.7168214321136475, "logits/rejected": -2.68996262550354, "logps/chosen": -267.233642578125, "logps/rejected": -236.4512176513672, "loss": 0.4714, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8374724388122559, "rewards/margins": 1.4095791578292847, "rewards/rejected": -2.24705171585083, "step": 1900 }, { "epoch": 0.98, "eval_logits/chosen": -2.7287309169769287, "eval_logits/rejected": -2.686368703842163, "eval_logps/chosen": -292.27044677734375, "eval_logps/rejected": -264.8912353515625, "eval_loss": 0.4854743778705597, "eval_rewards/accuracies": 0.755859375, "eval_rewards/chosen": -0.7688434720039368, "eval_rewards/margins": 1.363422155380249, "eval_rewards/rejected": -2.132265567779541, "eval_runtime": 319.5662, "eval_samples_per_second": 6.258, "eval_steps_per_second": 0.1, "step": 1900 }, { "epoch": 0.99, "learning_rate": 3.73015873015873e-07, "logits/chosen": -2.717925548553467, "logits/rejected": -2.733980655670166, "logps/chosen": -279.5800476074219, "logps/rejected": -266.599609375, "loss": 0.4845, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.7073032259941101, "rewards/margins": 1.0223753452301025, "rewards/rejected": -1.729678750038147, "step": 1910 }, { "epoch": 0.99, "learning_rate": 3.7205966724039014e-07, "logits/chosen": -2.630387783050537, "logits/rejected": -2.628966808319092, "logps/chosen": -291.262451171875, "logps/rejected": -254.486328125, "loss": 0.4876, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.9451972246170044, "rewards/margins": 1.1159427165985107, "rewards/rejected": -2.0611400604248047, "step": 1920 }, { "epoch": 1.0, "learning_rate": 3.711034614649072e-07, "logits/chosen": -2.751884937286377, "logits/rejected": -2.6744704246520996, "logps/chosen": -310.27105712890625, "logps/rejected": -250.6178741455078, "loss": 0.4338, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.8743211030960083, "rewards/margins": 1.6535139083862305, "rewards/rejected": -2.5278351306915283, "step": 1930 }, { "epoch": 1.0, "learning_rate": 3.701472556894243e-07, "logits/chosen": -2.6217920780181885, "logits/rejected": -2.6342613697052, "logps/chosen": -254.4636688232422, "logps/rejected": -291.636474609375, "loss": 0.3546, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.37152260541915894, "rewards/margins": 2.311744213104248, "rewards/rejected": -2.683267116546631, "step": 1940 }, { "epoch": 1.01, "learning_rate": 3.6919104991394144e-07, "logits/chosen": -2.7525599002838135, "logits/rejected": -2.721869945526123, "logps/chosen": -264.6305847167969, "logps/rejected": -271.7926940917969, "loss": 0.0935, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.7642915844917297, "rewards/margins": 5.123612403869629, "rewards/rejected": -4.359320640563965, "step": 1950 }, { "epoch": 1.01, "learning_rate": 3.6823484413845855e-07, "logits/chosen": -2.6347012519836426, "logits/rejected": -2.6132309436798096, "logps/chosen": -266.58013916015625, "logps/rejected": -297.7238464355469, "loss": 0.0793, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.11046914756298065, "rewards/margins": 4.930331707000732, "rewards/rejected": -4.819861888885498, "step": 1960 }, { "epoch": 1.02, "learning_rate": 3.6727863836297567e-07, "logits/chosen": -2.644935131072998, "logits/rejected": -2.632234811782837, "logps/chosen": -247.8032684326172, "logps/rejected": -276.6401062011719, "loss": 0.0932, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.6025737524032593, "rewards/margins": 5.628508567810059, "rewards/rejected": -5.025933742523193, "step": 1970 }, { "epoch": 1.02, "learning_rate": 3.663224325874928e-07, "logits/chosen": -2.6542904376983643, "logits/rejected": -2.612003803253174, "logps/chosen": -245.9752655029297, "logps/rejected": -289.7812805175781, "loss": 0.1119, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.11580270528793335, "rewards/margins": 4.786902904510498, "rewards/rejected": -4.671099662780762, "step": 1980 }, { "epoch": 1.03, "learning_rate": 3.653662268120099e-07, "logits/chosen": -2.6202826499938965, "logits/rejected": -2.613140344619751, "logps/chosen": -245.0804901123047, "logps/rejected": -325.1968994140625, "loss": 0.0758, "rewards/accuracies": 1.0, "rewards/chosen": 0.588118851184845, "rewards/margins": 5.362673759460449, "rewards/rejected": -4.77455472946167, "step": 1990 }, { "epoch": 1.03, "learning_rate": 3.6441002103652707e-07, "logits/chosen": -2.6428706645965576, "logits/rejected": -2.59324312210083, "logps/chosen": -271.5627136230469, "logps/rejected": -304.9239501953125, "loss": 0.0702, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.08451937139034271, "rewards/margins": 5.415391445159912, "rewards/rejected": -5.499910354614258, "step": 2000 }, { "epoch": 1.03, "eval_logits/chosen": -2.6669654846191406, "eval_logits/rejected": -2.6171834468841553, "eval_logps/chosen": -300.97821044921875, "eval_logps/rejected": -281.7778625488281, "eval_loss": 0.4988020062446594, "eval_rewards/accuracies": 0.779296875, "eval_rewards/chosen": -1.4915847778320312, "eval_rewards/margins": 2.042271375656128, "eval_rewards/rejected": -3.53385591506958, "eval_runtime": 319.7077, "eval_samples_per_second": 6.256, "eval_steps_per_second": 0.1, "step": 2000 }, { "epoch": 1.04, "learning_rate": 3.634538152610442e-07, "logits/chosen": -2.641677141189575, "logits/rejected": -2.643462657928467, "logps/chosen": -276.3083801269531, "logps/rejected": -314.2549133300781, "loss": 0.0711, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.16625761985778809, "rewards/margins": 5.539187431335449, "rewards/rejected": -5.372929573059082, "step": 2010 }, { "epoch": 1.04, "learning_rate": 3.624976094855613e-07, "logits/chosen": -2.5828781127929688, "logits/rejected": -2.5809438228607178, "logps/chosen": -260.15313720703125, "logps/rejected": -280.32427978515625, "loss": 0.0729, "rewards/accuracies": 1.0, "rewards/chosen": 0.1327195167541504, "rewards/margins": 5.46164608001709, "rewards/rejected": -5.328926086425781, "step": 2020 }, { "epoch": 1.05, "learning_rate": 3.615414037100784e-07, "logits/chosen": -2.6018245220184326, "logits/rejected": -2.528472423553467, "logps/chosen": -287.52838134765625, "logps/rejected": -281.5432434082031, "loss": 0.0876, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2448788434267044, "rewards/margins": 4.978910446166992, "rewards/rejected": -5.223789691925049, "step": 2030 }, { "epoch": 1.05, "learning_rate": 3.6058519793459553e-07, "logits/chosen": -2.563922166824341, "logits/rejected": -2.570225954055786, "logps/chosen": -246.0321807861328, "logps/rejected": -261.4310607910156, "loss": 0.1004, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.17234891653060913, "rewards/margins": 5.0647807121276855, "rewards/rejected": -4.892431735992432, "step": 2040 }, { "epoch": 1.06, "learning_rate": 3.5962899215911265e-07, "logits/chosen": -2.644136428833008, "logits/rejected": -2.5843114852905273, "logps/chosen": -242.87496948242188, "logps/rejected": -288.273193359375, "loss": 0.05, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.05976637452840805, "rewards/margins": 4.80216121673584, "rewards/rejected": -4.861927509307861, "step": 2050 }, { "epoch": 1.06, "learning_rate": 3.5867278638362976e-07, "logits/chosen": -2.6612026691436768, "logits/rejected": -2.602583408355713, "logps/chosen": -312.2911071777344, "logps/rejected": -329.08880615234375, "loss": 0.0794, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.025797352194786072, "rewards/margins": 5.537256717681885, "rewards/rejected": -5.5114593505859375, "step": 2060 }, { "epoch": 1.07, "learning_rate": 3.577165806081469e-07, "logits/chosen": -2.683077335357666, "logits/rejected": -2.6511878967285156, "logps/chosen": -259.8304138183594, "logps/rejected": -273.88134765625, "loss": 0.1119, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.3746635913848877, "rewards/margins": 4.719629287719727, "rewards/rejected": -5.094293117523193, "step": 2070 }, { "epoch": 1.07, "learning_rate": 3.56760374832664e-07, "logits/chosen": -2.743640661239624, "logits/rejected": -2.6378026008605957, "logps/chosen": -285.29217529296875, "logps/rejected": -325.79730224609375, "loss": 0.0445, "rewards/accuracies": 1.0, "rewards/chosen": 0.7642688751220703, "rewards/margins": 6.9339423179626465, "rewards/rejected": -6.169673442840576, "step": 2080 }, { "epoch": 1.08, "learning_rate": 3.5580416905718106e-07, "logits/chosen": -2.640382766723633, "logits/rejected": -2.603083372116089, "logps/chosen": -305.37701416015625, "logps/rejected": -279.64251708984375, "loss": 0.0999, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.019237805157899857, "rewards/margins": 5.373513221740723, "rewards/rejected": -5.354275703430176, "step": 2090 }, { "epoch": 1.08, "learning_rate": 3.5484796328169817e-07, "logits/chosen": -2.625525712966919, "logits/rejected": -2.5699048042297363, "logps/chosen": -265.05230712890625, "logps/rejected": -266.5211181640625, "loss": 0.0732, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.2632926106452942, "rewards/margins": 5.108711242675781, "rewards/rejected": -5.37200403213501, "step": 2100 }, { "epoch": 1.08, "eval_logits/chosen": -2.6880927085876465, "eval_logits/rejected": -2.635967254638672, "eval_logps/chosen": -302.6147155761719, "eval_logps/rejected": -285.49981689453125, "eval_loss": 0.518837034702301, "eval_rewards/accuracies": 0.779296875, "eval_rewards/chosen": -1.6274150609970093, "eval_rewards/margins": 2.215365171432495, "eval_rewards/rejected": -3.842780113220215, "eval_runtime": 320.088, "eval_samples_per_second": 6.248, "eval_steps_per_second": 0.1, "step": 2100 }, { "epoch": 1.09, "learning_rate": 3.538917575062153e-07, "logits/chosen": -2.5916850566864014, "logits/rejected": -2.5758230686187744, "logps/chosen": -263.6676940917969, "logps/rejected": -322.29315185546875, "loss": 0.0513, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.11733119189739227, "rewards/margins": 6.150258541107178, "rewards/rejected": -6.267590045928955, "step": 2110 }, { "epoch": 1.09, "learning_rate": 3.529355517307324e-07, "logits/chosen": -2.6319103240966797, "logits/rejected": -2.5478832721710205, "logps/chosen": -250.47476196289062, "logps/rejected": -309.0978088378906, "loss": 0.071, "rewards/accuracies": 1.0, "rewards/chosen": 0.16580773890018463, "rewards/margins": 6.501988410949707, "rewards/rejected": -6.336180210113525, "step": 2120 }, { "epoch": 1.1, "learning_rate": 3.519793459552495e-07, "logits/chosen": -2.641134738922119, "logits/rejected": -2.6294798851013184, "logps/chosen": -277.805419921875, "logps/rejected": -312.05352783203125, "loss": 0.2798, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -1.4639580249786377, "rewards/margins": 4.2110114097595215, "rewards/rejected": -5.674968719482422, "step": 2130 }, { "epoch": 1.1, "learning_rate": 3.510231401797667e-07, "logits/chosen": -2.6621997356414795, "logits/rejected": -2.632620334625244, "logps/chosen": -306.9314270019531, "logps/rejected": -307.2564392089844, "loss": 0.1166, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7471309900283813, "rewards/margins": 5.462823867797852, "rewards/rejected": -6.209954738616943, "step": 2140 }, { "epoch": 1.11, "learning_rate": 3.500669344042838e-07, "logits/chosen": -2.725174903869629, "logits/rejected": -2.6272196769714355, "logps/chosen": -258.8153991699219, "logps/rejected": -298.2309875488281, "loss": 0.0757, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6687513589859009, "rewards/margins": 5.353775978088379, "rewards/rejected": -6.022526741027832, "step": 2150 }, { "epoch": 1.12, "learning_rate": 3.491107286288009e-07, "logits/chosen": -2.711127281188965, "logits/rejected": -2.647641658782959, "logps/chosen": -270.5455017089844, "logps/rejected": -314.5936279296875, "loss": 0.1358, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.3531016707420349, "rewards/margins": 5.820456027984619, "rewards/rejected": -6.173557281494141, "step": 2160 }, { "epoch": 1.12, "learning_rate": 3.4815452285331803e-07, "logits/chosen": -2.6257951259613037, "logits/rejected": -2.640259265899658, "logps/chosen": -288.4144592285156, "logps/rejected": -287.8852233886719, "loss": 0.1067, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6735815405845642, "rewards/margins": 5.013296604156494, "rewards/rejected": -5.686878204345703, "step": 2170 }, { "epoch": 1.13, "learning_rate": 3.4719831707783515e-07, "logits/chosen": -2.65114426612854, "logits/rejected": -2.6000962257385254, "logps/chosen": -301.15411376953125, "logps/rejected": -335.2748718261719, "loss": 0.085, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.37887340784072876, "rewards/margins": 6.436954498291016, "rewards/rejected": -6.815828800201416, "step": 2180 }, { "epoch": 1.13, "learning_rate": 3.4624211130235227e-07, "logits/chosen": -2.753479480743408, "logits/rejected": -2.6782631874084473, "logps/chosen": -252.6403350830078, "logps/rejected": -279.285888671875, "loss": 0.085, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.6888843774795532, "rewards/margins": 5.546933174133301, "rewards/rejected": -6.235817909240723, "step": 2190 }, { "epoch": 1.14, "learning_rate": 3.452859055268694e-07, "logits/chosen": -2.743636131286621, "logits/rejected": -2.6467411518096924, "logps/chosen": -258.35882568359375, "logps/rejected": -283.0083923339844, "loss": 0.077, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.5067641139030457, "rewards/margins": 5.8026299476623535, "rewards/rejected": -6.309393405914307, "step": 2200 }, { "epoch": 1.14, "eval_logits/chosen": -2.7822632789611816, "eval_logits/rejected": -2.728759527206421, "eval_logps/chosen": -308.92279052734375, "eval_logps/rejected": -290.8334045410156, "eval_loss": 0.527400016784668, "eval_rewards/accuracies": 0.78125, "eval_rewards/chosen": -2.1509876251220703, "eval_rewards/margins": 2.1344761848449707, "eval_rewards/rejected": -4.285463809967041, "eval_runtime": 317.8558, "eval_samples_per_second": 6.292, "eval_steps_per_second": 0.101, "step": 2200 }, { "epoch": 1.14, "learning_rate": 3.443296997513865e-07, "logits/chosen": -2.7795581817626953, "logits/rejected": -2.773555278778076, "logps/chosen": -300.0327453613281, "logps/rejected": -339.4162292480469, "loss": 0.0703, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.04485178366303444, "rewards/margins": 6.257258415222168, "rewards/rejected": -6.212407112121582, "step": 2210 }, { "epoch": 1.15, "learning_rate": 3.433734939759036e-07, "logits/chosen": -2.7311389446258545, "logits/rejected": -2.6696159839630127, "logps/chosen": -290.277587890625, "logps/rejected": -348.5881652832031, "loss": 0.0978, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.20712128281593323, "rewards/margins": 7.117246150970459, "rewards/rejected": -6.910124778747559, "step": 2220 }, { "epoch": 1.15, "learning_rate": 3.4241728820042073e-07, "logits/chosen": -2.651031494140625, "logits/rejected": -2.6046855449676514, "logps/chosen": -246.6273956298828, "logps/rejected": -317.081787109375, "loss": 0.09, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.7147589921951294, "rewards/margins": 5.336766719818115, "rewards/rejected": -6.051525115966797, "step": 2230 }, { "epoch": 1.16, "learning_rate": 3.4146108242493784e-07, "logits/chosen": -2.721897840499878, "logits/rejected": -2.73583984375, "logps/chosen": -239.13265991210938, "logps/rejected": -290.4288024902344, "loss": 0.0936, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1963367760181427, "rewards/margins": 5.392816543579102, "rewards/rejected": -5.58915376663208, "step": 2240 }, { "epoch": 1.16, "learning_rate": 3.405048766494549e-07, "logits/chosen": -2.705955982208252, "logits/rejected": -2.6627707481384277, "logps/chosen": -294.12884521484375, "logps/rejected": -286.95172119140625, "loss": 0.1018, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.2477845847606659, "rewards/margins": 5.5062785148620605, "rewards/rejected": -5.754064083099365, "step": 2250 }, { "epoch": 1.17, "learning_rate": 3.39548670873972e-07, "logits/chosen": -2.666400194168091, "logits/rejected": -2.637899875640869, "logps/chosen": -314.47991943359375, "logps/rejected": -355.02020263671875, "loss": 0.113, "rewards/accuracies": 0.9375, "rewards/chosen": 0.0485207624733448, "rewards/margins": 6.785955905914307, "rewards/rejected": -6.737435340881348, "step": 2260 }, { "epoch": 1.17, "learning_rate": 3.3859246509848914e-07, "logits/chosen": -2.6963794231414795, "logits/rejected": -2.6791512966156006, "logps/chosen": -271.19989013671875, "logps/rejected": -315.05706787109375, "loss": 0.0935, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.3176961839199066, "rewards/margins": 5.508792877197266, "rewards/rejected": -5.826489448547363, "step": 2270 }, { "epoch": 1.18, "learning_rate": 3.376362593230063e-07, "logits/chosen": -2.6799559593200684, "logits/rejected": -2.6982581615448, "logps/chosen": -253.5721893310547, "logps/rejected": -322.83612060546875, "loss": 0.0764, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.3253689408302307, "rewards/margins": 5.896140098571777, "rewards/rejected": -6.221508979797363, "step": 2280 }, { "epoch": 1.18, "learning_rate": 3.366800535475234e-07, "logits/chosen": -2.69290828704834, "logits/rejected": -2.6353368759155273, "logps/chosen": -290.2775573730469, "logps/rejected": -291.1913757324219, "loss": 0.0916, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.17918790876865387, "rewards/margins": 6.185643672943115, "rewards/rejected": -6.006455898284912, "step": 2290 }, { "epoch": 1.19, "learning_rate": 3.3572384777204054e-07, "logits/chosen": -2.7753913402557373, "logits/rejected": -2.732677936553955, "logps/chosen": -295.47418212890625, "logps/rejected": -291.96478271484375, "loss": 0.0673, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.04849424213171005, "rewards/margins": 5.900446891784668, "rewards/rejected": -5.851952075958252, "step": 2300 }, { "epoch": 1.19, "eval_logits/chosen": -2.7568819522857666, "eval_logits/rejected": -2.6971426010131836, "eval_logps/chosen": -303.86004638671875, "eval_logps/rejected": -286.6026306152344, "eval_loss": 0.5168585181236267, "eval_rewards/accuracies": 0.783203125, "eval_rewards/chosen": -1.7307777404785156, "eval_rewards/margins": 2.2035346031188965, "eval_rewards/rejected": -3.934312343597412, "eval_runtime": 319.3064, "eval_samples_per_second": 6.264, "eval_steps_per_second": 0.1, "step": 2300 }, { "epoch": 1.19, "learning_rate": 3.3476764199655765e-07, "logits/chosen": -2.624662160873413, "logits/rejected": -2.5989861488342285, "logps/chosen": -214.7354278564453, "logps/rejected": -288.50347900390625, "loss": 0.083, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1969306915998459, "rewards/margins": 6.174539566040039, "rewards/rejected": -6.3714704513549805, "step": 2310 }, { "epoch": 1.2, "learning_rate": 3.3381143622107477e-07, "logits/chosen": -2.6197948455810547, "logits/rejected": -2.6149678230285645, "logps/chosen": -308.3079528808594, "logps/rejected": -327.30682373046875, "loss": 0.0538, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.060916267335414886, "rewards/margins": 6.648951053619385, "rewards/rejected": -6.70986795425415, "step": 2320 }, { "epoch": 1.2, "learning_rate": 3.328552304455919e-07, "logits/chosen": -2.640090227127075, "logits/rejected": -2.631544589996338, "logps/chosen": -264.54058837890625, "logps/rejected": -299.2560729980469, "loss": 0.0648, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.4170001447200775, "rewards/margins": 5.685815334320068, "rewards/rejected": -6.102816104888916, "step": 2330 }, { "epoch": 1.21, "learning_rate": 3.31899024670109e-07, "logits/chosen": -2.6624598503112793, "logits/rejected": -2.5809929370880127, "logps/chosen": -286.1086730957031, "logps/rejected": -305.8424072265625, "loss": 0.0729, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6273858547210693, "rewards/margins": 5.90106725692749, "rewards/rejected": -6.5284528732299805, "step": 2340 }, { "epoch": 1.21, "learning_rate": 3.309428188946261e-07, "logits/chosen": -2.61332631111145, "logits/rejected": -2.5772688388824463, "logps/chosen": -253.9518585205078, "logps/rejected": -288.00860595703125, "loss": 0.0936, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6646236777305603, "rewards/margins": 5.378995895385742, "rewards/rejected": -6.043619632720947, "step": 2350 }, { "epoch": 1.22, "learning_rate": 3.2998661311914323e-07, "logits/chosen": -2.70505428314209, "logits/rejected": -2.6530563831329346, "logps/chosen": -266.2115783691406, "logps/rejected": -311.61199951171875, "loss": 0.0734, "rewards/accuracies": 1.0, "rewards/chosen": -0.1992596685886383, "rewards/margins": 6.321752548217773, "rewards/rejected": -6.521012783050537, "step": 2360 }, { "epoch": 1.22, "learning_rate": 3.2903040734366035e-07, "logits/chosen": -2.7303738594055176, "logits/rejected": -2.689063549041748, "logps/chosen": -254.94515991210938, "logps/rejected": -318.1282043457031, "loss": 0.0872, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2987816333770752, "rewards/margins": 6.046111106872559, "rewards/rejected": -6.344893455505371, "step": 2370 }, { "epoch": 1.23, "learning_rate": 3.2807420156817746e-07, "logits/chosen": -2.6064562797546387, "logits/rejected": -2.6192920207977295, "logps/chosen": -249.04708862304688, "logps/rejected": -286.11407470703125, "loss": 0.0995, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.31438302993774414, "rewards/margins": 5.681126594543457, "rewards/rejected": -5.995509624481201, "step": 2380 }, { "epoch": 1.23, "learning_rate": 3.271179957926946e-07, "logits/chosen": -2.7069311141967773, "logits/rejected": -2.6731648445129395, "logps/chosen": -285.67620849609375, "logps/rejected": -305.9593200683594, "loss": 0.0917, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.6521123647689819, "rewards/margins": 5.830193519592285, "rewards/rejected": -6.48230504989624, "step": 2390 }, { "epoch": 1.24, "learning_rate": 3.261617900172117e-07, "logits/chosen": -2.6710569858551025, "logits/rejected": -2.6521377563476562, "logps/chosen": -242.3379364013672, "logps/rejected": -299.2978210449219, "loss": 0.1039, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.03360246866941452, "rewards/margins": 5.616101264953613, "rewards/rejected": -5.582499027252197, "step": 2400 }, { "epoch": 1.24, "eval_logits/chosen": -2.7419803142547607, "eval_logits/rejected": -2.6974315643310547, "eval_logps/chosen": -303.6773376464844, "eval_logps/rejected": -284.75732421875, "eval_loss": 0.5114707350730896, "eval_rewards/accuracies": 0.771484375, "eval_rewards/chosen": -1.7156122922897339, "eval_rewards/margins": 2.065539836883545, "eval_rewards/rejected": -3.7811522483825684, "eval_runtime": 315.6377, "eval_samples_per_second": 6.336, "eval_steps_per_second": 0.101, "step": 2400 }, { "epoch": 1.24, "learning_rate": 3.2520558424172876e-07, "logits/chosen": -2.7516894340515137, "logits/rejected": -2.6764895915985107, "logps/chosen": -271.0102844238281, "logps/rejected": -286.83404541015625, "loss": 0.0686, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.02064484916627407, "rewards/margins": 5.755192279815674, "rewards/rejected": -5.775836944580078, "step": 2410 }, { "epoch": 1.25, "learning_rate": 3.242493784662459e-07, "logits/chosen": -2.700606107711792, "logits/rejected": -2.680436611175537, "logps/chosen": -261.15338134765625, "logps/rejected": -296.3388671875, "loss": 0.0768, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5877324342727661, "rewards/margins": 5.474600791931152, "rewards/rejected": -6.062333106994629, "step": 2420 }, { "epoch": 1.25, "learning_rate": 3.2329317269076304e-07, "logits/chosen": -2.709926128387451, "logits/rejected": -2.6798205375671387, "logps/chosen": -285.6766357421875, "logps/rejected": -344.2245788574219, "loss": 0.0978, "rewards/accuracies": 1.0, "rewards/chosen": -0.2649703919887543, "rewards/margins": 6.29079008102417, "rewards/rejected": -6.555760860443115, "step": 2430 }, { "epoch": 1.26, "learning_rate": 3.2233696691528016e-07, "logits/chosen": -2.652538299560547, "logits/rejected": -2.655233383178711, "logps/chosen": -256.19219970703125, "logps/rejected": -319.622802734375, "loss": 0.0946, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.19920220971107483, "rewards/margins": 5.758728981018066, "rewards/rejected": -5.957931041717529, "step": 2440 }, { "epoch": 1.26, "learning_rate": 3.2138076113979727e-07, "logits/chosen": -2.6438474655151367, "logits/rejected": -2.6383254528045654, "logps/chosen": -277.60943603515625, "logps/rejected": -327.45025634765625, "loss": 0.1061, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.5103539228439331, "rewards/margins": 5.954094409942627, "rewards/rejected": -6.46444845199585, "step": 2450 }, { "epoch": 1.27, "learning_rate": 3.204245553643144e-07, "logits/chosen": -2.647308349609375, "logits/rejected": -2.6240108013153076, "logps/chosen": -310.7269592285156, "logps/rejected": -310.34515380859375, "loss": 0.0789, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.25060057640075684, "rewards/margins": 5.379478454589844, "rewards/rejected": -5.6300787925720215, "step": 2460 }, { "epoch": 1.28, "learning_rate": 3.194683495888315e-07, "logits/chosen": -2.732145309448242, "logits/rejected": -2.7091526985168457, "logps/chosen": -301.0093994140625, "logps/rejected": -285.6148681640625, "loss": 0.0873, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2798474133014679, "rewards/margins": 5.049505710601807, "rewards/rejected": -5.3293538093566895, "step": 2470 }, { "epoch": 1.28, "learning_rate": 3.185121438133486e-07, "logits/chosen": -2.698073148727417, "logits/rejected": -2.678560972213745, "logps/chosen": -283.1154479980469, "logps/rejected": -364.0557556152344, "loss": 0.0909, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.09830506145954132, "rewards/margins": 6.078310489654541, "rewards/rejected": -5.980005741119385, "step": 2480 }, { "epoch": 1.29, "learning_rate": 3.1755593803786574e-07, "logits/chosen": -2.727752447128296, "logits/rejected": -2.706210136413574, "logps/chosen": -231.37643432617188, "logps/rejected": -287.2300109863281, "loss": 0.0879, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5455104112625122, "rewards/margins": 5.321480751037598, "rewards/rejected": -5.8669915199279785, "step": 2490 }, { "epoch": 1.29, "learning_rate": 3.1659973226238285e-07, "logits/chosen": -2.715097427368164, "logits/rejected": -2.688115358352661, "logps/chosen": -273.99884033203125, "logps/rejected": -290.21014404296875, "loss": 0.0961, "rewards/accuracies": 0.9375, "rewards/chosen": -0.2534043490886688, "rewards/margins": 5.720442771911621, "rewards/rejected": -5.973847389221191, "step": 2500 }, { "epoch": 1.29, "eval_logits/chosen": -2.7484543323516846, "eval_logits/rejected": -2.7070529460906982, "eval_logps/chosen": -311.0831604003906, "eval_logps/rejected": -293.74456787109375, "eval_loss": 0.5289760231971741, "eval_rewards/accuracies": 0.7734375, "eval_rewards/chosen": -2.3302979469299316, "eval_rewards/margins": 2.1967928409576416, "eval_rewards/rejected": -4.527090549468994, "eval_runtime": 316.6229, "eval_samples_per_second": 6.317, "eval_steps_per_second": 0.101, "step": 2500 }, { "epoch": 1.3, "learning_rate": 3.1564352648689997e-07, "logits/chosen": -2.7701210975646973, "logits/rejected": -2.7461390495300293, "logps/chosen": -304.7459411621094, "logps/rejected": -301.94281005859375, "loss": 0.1114, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.31933537125587463, "rewards/margins": 5.555022239685059, "rewards/rejected": -5.874357223510742, "step": 2510 }, { "epoch": 1.3, "learning_rate": 3.146873207114171e-07, "logits/chosen": -2.7222537994384766, "logits/rejected": -2.68473482131958, "logps/chosen": -244.7481231689453, "logps/rejected": -302.4150695800781, "loss": 0.0857, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.19391921162605286, "rewards/margins": 6.480815887451172, "rewards/rejected": -6.674736022949219, "step": 2520 }, { "epoch": 1.31, "learning_rate": 3.137311149359342e-07, "logits/chosen": -2.74364972114563, "logits/rejected": -2.7010247707366943, "logps/chosen": -315.34161376953125, "logps/rejected": -348.8326110839844, "loss": 0.0919, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.43541082739830017, "rewards/margins": 5.95950984954834, "rewards/rejected": -6.3949198722839355, "step": 2530 }, { "epoch": 1.31, "learning_rate": 3.127749091604513e-07, "logits/chosen": -2.734626293182373, "logits/rejected": -2.718654155731201, "logps/chosen": -308.5, "logps/rejected": -352.7704772949219, "loss": 0.0817, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.3196238875389099, "rewards/margins": 6.986210823059082, "rewards/rejected": -7.3058342933654785, "step": 2540 }, { "epoch": 1.32, "learning_rate": 3.1181870338496843e-07, "logits/chosen": -2.6749234199523926, "logits/rejected": -2.66908597946167, "logps/chosen": -213.5609588623047, "logps/rejected": -308.34637451171875, "loss": 0.1008, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6157265901565552, "rewards/margins": 5.665709495544434, "rewards/rejected": -6.281436920166016, "step": 2550 }, { "epoch": 1.32, "learning_rate": 3.108624976094856e-07, "logits/chosen": -2.6879990100860596, "logits/rejected": -2.6776421070098877, "logps/chosen": -264.83575439453125, "logps/rejected": -277.2493896484375, "loss": 0.0817, "rewards/accuracies": 1.0, "rewards/chosen": 0.23239612579345703, "rewards/margins": 5.752984046936035, "rewards/rejected": -5.520586967468262, "step": 2560 }, { "epoch": 1.33, "learning_rate": 3.0990629183400266e-07, "logits/chosen": -2.6211886405944824, "logits/rejected": -2.6052334308624268, "logps/chosen": -291.84844970703125, "logps/rejected": -324.58770751953125, "loss": 0.0826, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.3146992623806, "rewards/margins": 6.526357173919678, "rewards/rejected": -6.841056823730469, "step": 2570 }, { "epoch": 1.33, "learning_rate": 3.089500860585198e-07, "logits/chosen": -2.6255393028259277, "logits/rejected": -2.6229114532470703, "logps/chosen": -238.12771606445312, "logps/rejected": -276.9713134765625, "loss": 0.1132, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -1.1312475204467773, "rewards/margins": 4.8700103759765625, "rewards/rejected": -6.001257419586182, "step": 2580 }, { "epoch": 1.34, "learning_rate": 3.079938802830369e-07, "logits/chosen": -2.710979700088501, "logits/rejected": -2.6540303230285645, "logps/chosen": -261.181640625, "logps/rejected": -244.72726440429688, "loss": 0.0791, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6768450140953064, "rewards/margins": 4.950498580932617, "rewards/rejected": -5.627344131469727, "step": 2590 }, { "epoch": 1.34, "learning_rate": 3.07037674507554e-07, "logits/chosen": -2.6508870124816895, "logits/rejected": -2.6480696201324463, "logps/chosen": -301.396484375, "logps/rejected": -358.33624267578125, "loss": 0.1269, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.1376478672027588, "rewards/margins": 6.12053918838501, "rewards/rejected": -6.258187294006348, "step": 2600 }, { "epoch": 1.34, "eval_logits/chosen": -2.7431821823120117, "eval_logits/rejected": -2.7065858840942383, "eval_logps/chosen": -304.9791259765625, "eval_logps/rejected": -284.65460205078125, "eval_loss": 0.506081759929657, "eval_rewards/accuracies": 0.76953125, "eval_rewards/chosen": -1.823662519454956, "eval_rewards/margins": 1.9489631652832031, "eval_rewards/rejected": -3.772625684738159, "eval_runtime": 317.9435, "eval_samples_per_second": 6.29, "eval_steps_per_second": 0.101, "step": 2600 }, { "epoch": 1.35, "learning_rate": 3.060814687320711e-07, "logits/chosen": -2.687321186065674, "logits/rejected": -2.635239839553833, "logps/chosen": -284.09307861328125, "logps/rejected": -288.2312927246094, "loss": 0.0985, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.06770718097686768, "rewards/margins": 5.459515571594238, "rewards/rejected": -5.527222156524658, "step": 2610 }, { "epoch": 1.35, "learning_rate": 3.0512526295658824e-07, "logits/chosen": -2.6925947666168213, "logits/rejected": -2.6380653381347656, "logps/chosen": -288.567626953125, "logps/rejected": -304.4593200683594, "loss": 0.105, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.09371298551559448, "rewards/margins": 5.579823017120361, "rewards/rejected": -5.67353630065918, "step": 2620 }, { "epoch": 1.36, "learning_rate": 3.0416905718110536e-07, "logits/chosen": -2.7244067192077637, "logits/rejected": -2.722740411758423, "logps/chosen": -250.6427001953125, "logps/rejected": -275.47381591796875, "loss": 0.0899, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.4610835909843445, "rewards/margins": 5.498654365539551, "rewards/rejected": -5.959738254547119, "step": 2630 }, { "epoch": 1.36, "learning_rate": 3.0321285140562247e-07, "logits/chosen": -2.7533857822418213, "logits/rejected": -2.6935248374938965, "logps/chosen": -254.89892578125, "logps/rejected": -282.41119384765625, "loss": 0.1449, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2677046060562134, "rewards/margins": 4.776516437530518, "rewards/rejected": -6.044220924377441, "step": 2640 }, { "epoch": 1.37, "learning_rate": 3.022566456301396e-07, "logits/chosen": -2.8177356719970703, "logits/rejected": -2.7771425247192383, "logps/chosen": -265.8627014160156, "logps/rejected": -302.5569152832031, "loss": 0.0778, "rewards/accuracies": 1.0, "rewards/chosen": -0.5320996046066284, "rewards/margins": 5.644290447235107, "rewards/rejected": -6.176390647888184, "step": 2650 }, { "epoch": 1.37, "learning_rate": 3.013004398546567e-07, "logits/chosen": -2.7223782539367676, "logits/rejected": -2.6292693614959717, "logps/chosen": -275.7391662597656, "logps/rejected": -305.2659912109375, "loss": 0.0721, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8571189045906067, "rewards/margins": 5.543956279754639, "rewards/rejected": -6.401075839996338, "step": 2660 }, { "epoch": 1.38, "learning_rate": 3.003442340791738e-07, "logits/chosen": -2.8157827854156494, "logits/rejected": -2.7974352836608887, "logps/chosen": -269.65850830078125, "logps/rejected": -304.28826904296875, "loss": 0.0963, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.9287446737289429, "rewards/margins": 6.168341636657715, "rewards/rejected": -7.097086429595947, "step": 2670 }, { "epoch": 1.38, "learning_rate": 2.9938802830369093e-07, "logits/chosen": -2.7907795906066895, "logits/rejected": -2.793023109436035, "logps/chosen": -269.62432861328125, "logps/rejected": -313.7629699707031, "loss": 0.1116, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.8878449201583862, "rewards/margins": 5.750227928161621, "rewards/rejected": -6.638073921203613, "step": 2680 }, { "epoch": 1.39, "learning_rate": 2.9843182252820805e-07, "logits/chosen": -2.7264926433563232, "logits/rejected": -2.7053160667419434, "logps/chosen": -255.37881469726562, "logps/rejected": -300.7747802734375, "loss": 0.0832, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5666817426681519, "rewards/margins": 6.178980350494385, "rewards/rejected": -6.745661735534668, "step": 2690 }, { "epoch": 1.39, "learning_rate": 2.974756167527252e-07, "logits/chosen": -2.804187059402466, "logits/rejected": -2.799229383468628, "logps/chosen": -247.48110961914062, "logps/rejected": -296.6025085449219, "loss": 0.0959, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7765015363693237, "rewards/margins": 4.88554048538208, "rewards/rejected": -5.662042617797852, "step": 2700 }, { "epoch": 1.39, "eval_logits/chosen": -2.758382797241211, "eval_logits/rejected": -2.7061219215393066, "eval_logps/chosen": -305.2204895019531, "eval_logps/rejected": -286.34173583984375, "eval_loss": 0.506572425365448, "eval_rewards/accuracies": 0.779296875, "eval_rewards/chosen": -1.8436938524246216, "eval_rewards/margins": 2.068965435028076, "eval_rewards/rejected": -3.9126596450805664, "eval_runtime": 318.7156, "eval_samples_per_second": 6.275, "eval_steps_per_second": 0.1, "step": 2700 }, { "epoch": 1.4, "learning_rate": 2.9651941097724233e-07, "logits/chosen": -2.758733034133911, "logits/rejected": -2.7200570106506348, "logps/chosen": -284.4052734375, "logps/rejected": -278.2381896972656, "loss": 0.0904, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.3705640733242035, "rewards/margins": 5.155032157897949, "rewards/rejected": -5.525596618652344, "step": 2710 }, { "epoch": 1.4, "learning_rate": 2.9556320520175945e-07, "logits/chosen": -2.661815881729126, "logits/rejected": -2.619009494781494, "logps/chosen": -270.3291320800781, "logps/rejected": -288.2655334472656, "loss": 0.1031, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5597130060195923, "rewards/margins": 5.728754043579102, "rewards/rejected": -6.288466930389404, "step": 2720 }, { "epoch": 1.41, "learning_rate": 2.946069994262765e-07, "logits/chosen": -2.7923271656036377, "logits/rejected": -2.7072436809539795, "logps/chosen": -297.76824951171875, "logps/rejected": -331.4146728515625, "loss": 0.076, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.3209906220436096, "rewards/margins": 6.075893878936768, "rewards/rejected": -6.396883964538574, "step": 2730 }, { "epoch": 1.41, "learning_rate": 2.9365079365079363e-07, "logits/chosen": -2.758159875869751, "logits/rejected": -2.664613962173462, "logps/chosen": -285.35345458984375, "logps/rejected": -272.0963134765625, "loss": 0.1049, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.7277927994728088, "rewards/margins": 5.51666259765625, "rewards/rejected": -6.244454860687256, "step": 2740 }, { "epoch": 1.42, "learning_rate": 2.9269458787531074e-07, "logits/chosen": -2.692495822906494, "logits/rejected": -2.6483452320098877, "logps/chosen": -246.64199829101562, "logps/rejected": -318.22442626953125, "loss": 0.0897, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7362026572227478, "rewards/margins": 5.328550338745117, "rewards/rejected": -6.064752578735352, "step": 2750 }, { "epoch": 1.42, "learning_rate": 2.9173838209982786e-07, "logits/chosen": -2.758169174194336, "logits/rejected": -2.678088426589966, "logps/chosen": -270.147705078125, "logps/rejected": -257.0202331542969, "loss": 0.0946, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7274396419525146, "rewards/margins": 5.241420745849609, "rewards/rejected": -5.968860626220703, "step": 2760 }, { "epoch": 1.43, "learning_rate": 2.90782176324345e-07, "logits/chosen": -2.698112726211548, "logits/rejected": -2.648719310760498, "logps/chosen": -329.0129089355469, "logps/rejected": -301.7458801269531, "loss": 0.0759, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2308160960674286, "rewards/margins": 6.0896806716918945, "rewards/rejected": -6.320497035980225, "step": 2770 }, { "epoch": 1.44, "learning_rate": 2.898259705488621e-07, "logits/chosen": -2.8107573986053467, "logits/rejected": -2.805783271789551, "logps/chosen": -271.0702819824219, "logps/rejected": -283.05157470703125, "loss": 0.084, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.6828057765960693, "rewards/margins": 5.364167213439941, "rewards/rejected": -6.046972751617432, "step": 2780 }, { "epoch": 1.44, "learning_rate": 2.888697647733792e-07, "logits/chosen": -2.7616028785705566, "logits/rejected": -2.719783306121826, "logps/chosen": -328.262939453125, "logps/rejected": -335.8316955566406, "loss": 0.086, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.14453238248825073, "rewards/margins": 6.687165260314941, "rewards/rejected": -6.8316969871521, "step": 2790 }, { "epoch": 1.45, "learning_rate": 2.879135589978963e-07, "logits/chosen": -2.6463236808776855, "logits/rejected": -2.6956329345703125, "logps/chosen": -245.4470672607422, "logps/rejected": -309.3108215332031, "loss": 0.1009, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.45763087272644043, "rewards/margins": 5.422387599945068, "rewards/rejected": -5.880019187927246, "step": 2800 }, { "epoch": 1.45, "eval_logits/chosen": -2.7338132858276367, "eval_logits/rejected": -2.6835834980010986, "eval_logps/chosen": -312.49072265625, "eval_logps/rejected": -294.735595703125, "eval_loss": 0.5241068601608276, "eval_rewards/accuracies": 0.78515625, "eval_rewards/chosen": -2.447122573852539, "eval_rewards/margins": 2.162221908569336, "eval_rewards/rejected": -4.609344482421875, "eval_runtime": 319.0341, "eval_samples_per_second": 6.269, "eval_steps_per_second": 0.1, "step": 2800 }, { "epoch": 1.45, "learning_rate": 2.8695735322241344e-07, "logits/chosen": -2.7229807376861572, "logits/rejected": -2.7083780765533447, "logps/chosen": -286.12255859375, "logps/rejected": -310.3410949707031, "loss": 0.0911, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.34616807103157043, "rewards/margins": 5.964114189147949, "rewards/rejected": -6.3102827072143555, "step": 2810 }, { "epoch": 1.46, "learning_rate": 2.8600114744693055e-07, "logits/chosen": -2.6550228595733643, "logits/rejected": -2.6088643074035645, "logps/chosen": -251.9344940185547, "logps/rejected": -317.67352294921875, "loss": 0.0809, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1923748403787613, "rewards/margins": 6.3998823165893555, "rewards/rejected": -6.592257022857666, "step": 2820 }, { "epoch": 1.46, "learning_rate": 2.8504494167144767e-07, "logits/chosen": -2.719397783279419, "logits/rejected": -2.696880340576172, "logps/chosen": -271.78887939453125, "logps/rejected": -316.2119140625, "loss": 0.0784, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6223933100700378, "rewards/margins": 5.7708539962768555, "rewards/rejected": -6.393246650695801, "step": 2830 }, { "epoch": 1.47, "learning_rate": 2.8408873589596484e-07, "logits/chosen": -2.7376041412353516, "logits/rejected": -2.6873397827148438, "logps/chosen": -270.13287353515625, "logps/rejected": -294.29364013671875, "loss": 0.0786, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6966304779052734, "rewards/margins": 5.888973712921143, "rewards/rejected": -6.585604190826416, "step": 2840 }, { "epoch": 1.47, "learning_rate": 2.8313253012048195e-07, "logits/chosen": -2.605156421661377, "logits/rejected": -2.610856294631958, "logps/chosen": -234.8222198486328, "logps/rejected": -336.3276672363281, "loss": 0.0954, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.2880403399467468, "rewards/margins": 6.164098739624023, "rewards/rejected": -6.452138423919678, "step": 2850 }, { "epoch": 1.48, "learning_rate": 2.8217632434499907e-07, "logits/chosen": -2.768667459487915, "logits/rejected": -2.7183034420013428, "logps/chosen": -295.6549377441406, "logps/rejected": -328.62579345703125, "loss": 0.0798, "rewards/accuracies": 1.0, "rewards/chosen": -0.7613356709480286, "rewards/margins": 6.143101215362549, "rewards/rejected": -6.9044365882873535, "step": 2860 }, { "epoch": 1.48, "learning_rate": 2.812201185695162e-07, "logits/chosen": -2.6880557537078857, "logits/rejected": -2.6741247177124023, "logps/chosen": -244.7305145263672, "logps/rejected": -312.7955627441406, "loss": 0.0737, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.6571290493011475, "rewards/margins": 5.4620184898376465, "rewards/rejected": -6.119148254394531, "step": 2870 }, { "epoch": 1.49, "learning_rate": 2.802639127940333e-07, "logits/chosen": -2.735438346862793, "logits/rejected": -2.656383991241455, "logps/chosen": -271.6971130371094, "logps/rejected": -268.0536804199219, "loss": 0.1122, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.796291708946228, "rewards/margins": 5.884654998779297, "rewards/rejected": -6.680947303771973, "step": 2880 }, { "epoch": 1.49, "learning_rate": 2.7930770701855036e-07, "logits/chosen": -2.7465033531188965, "logits/rejected": -2.68650484085083, "logps/chosen": -275.4809265136719, "logps/rejected": -296.68341064453125, "loss": 0.1213, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.316895991563797, "rewards/margins": 6.447287082672119, "rewards/rejected": -6.764183044433594, "step": 2890 }, { "epoch": 1.5, "learning_rate": 2.783515012430675e-07, "logits/chosen": -2.689809560775757, "logits/rejected": -2.6326003074645996, "logps/chosen": -276.33892822265625, "logps/rejected": -288.660400390625, "loss": 0.0917, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.7795993089675903, "rewards/margins": 5.5033416748046875, "rewards/rejected": -6.282940864562988, "step": 2900 }, { "epoch": 1.5, "eval_logits/chosen": -2.758772134780884, "eval_logits/rejected": -2.7068967819213867, "eval_logps/chosen": -312.622802734375, "eval_logps/rejected": -293.7532043457031, "eval_loss": 0.5349659323692322, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -2.45808744430542, "eval_rewards/margins": 2.0697200298309326, "eval_rewards/rejected": -4.527807235717773, "eval_runtime": 318.7237, "eval_samples_per_second": 6.275, "eval_steps_per_second": 0.1, "step": 2900 }, { "epoch": 1.5, "learning_rate": 2.773952954675846e-07, "logits/chosen": -2.702435255050659, "logits/rejected": -2.686267852783203, "logps/chosen": -273.24639892578125, "logps/rejected": -268.05352783203125, "loss": 0.0771, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.7544882893562317, "rewards/margins": 5.377245903015137, "rewards/rejected": -6.131734371185303, "step": 2910 }, { "epoch": 1.51, "learning_rate": 2.764390896921017e-07, "logits/chosen": -2.7505154609680176, "logits/rejected": -2.6563503742218018, "logps/chosen": -303.974853515625, "logps/rejected": -297.67767333984375, "loss": 0.0979, "rewards/accuracies": 0.9375, "rewards/chosen": -0.39709168672561646, "rewards/margins": 5.832863807678223, "rewards/rejected": -6.229954719543457, "step": 2920 }, { "epoch": 1.51, "learning_rate": 2.754828839166188e-07, "logits/chosen": -2.6120545864105225, "logits/rejected": -2.6222383975982666, "logps/chosen": -263.12353515625, "logps/rejected": -283.98468017578125, "loss": 0.0822, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5255668759346008, "rewards/margins": 6.533519744873047, "rewards/rejected": -7.059086799621582, "step": 2930 }, { "epoch": 1.52, "learning_rate": 2.7452667814113594e-07, "logits/chosen": -2.7562460899353027, "logits/rejected": -2.725320816040039, "logps/chosen": -296.91766357421875, "logps/rejected": -314.5440368652344, "loss": 0.0989, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7135354280471802, "rewards/margins": 5.722177028656006, "rewards/rejected": -6.4357123374938965, "step": 2940 }, { "epoch": 1.52, "learning_rate": 2.7357047236565306e-07, "logits/chosen": -2.666968584060669, "logits/rejected": -2.679483413696289, "logps/chosen": -262.410400390625, "logps/rejected": -303.61175537109375, "loss": 0.0698, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.4541957378387451, "rewards/margins": 5.653780937194824, "rewards/rejected": -6.107976913452148, "step": 2950 }, { "epoch": 1.53, "learning_rate": 2.7261426659017017e-07, "logits/chosen": -2.7422938346862793, "logits/rejected": -2.7402257919311523, "logps/chosen": -267.5207214355469, "logps/rejected": -301.51959228515625, "loss": 0.0786, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8701791763305664, "rewards/margins": 5.560877799987793, "rewards/rejected": -6.431056976318359, "step": 2960 }, { "epoch": 1.53, "learning_rate": 2.716580608146873e-07, "logits/chosen": -2.7655584812164307, "logits/rejected": -2.7117350101470947, "logps/chosen": -331.04718017578125, "logps/rejected": -326.4444885253906, "loss": 0.0688, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.38417384028434753, "rewards/margins": 6.654132843017578, "rewards/rejected": -7.038305759429932, "step": 2970 }, { "epoch": 1.54, "learning_rate": 2.7070185503920446e-07, "logits/chosen": -2.6233134269714355, "logits/rejected": -2.5897209644317627, "logps/chosen": -254.3827362060547, "logps/rejected": -318.66937255859375, "loss": 0.0739, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.7413161396980286, "rewards/margins": 6.332542896270752, "rewards/rejected": -7.073859214782715, "step": 2980 }, { "epoch": 1.54, "learning_rate": 2.6974564926372157e-07, "logits/chosen": -2.8048453330993652, "logits/rejected": -2.7334775924682617, "logps/chosen": -291.77313232421875, "logps/rejected": -322.15960693359375, "loss": 0.0696, "rewards/accuracies": 1.0, "rewards/chosen": -0.28778010606765747, "rewards/margins": 6.62146520614624, "rewards/rejected": -6.90924596786499, "step": 2990 }, { "epoch": 1.55, "learning_rate": 2.687894434882387e-07, "logits/chosen": -2.7648658752441406, "logits/rejected": -2.657827377319336, "logps/chosen": -247.3353729248047, "logps/rejected": -267.6654357910156, "loss": 0.0693, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.8535417318344116, "rewards/margins": 5.313368797302246, "rewards/rejected": -6.1669111251831055, "step": 3000 }, { "epoch": 1.55, "eval_logits/chosen": -2.7642407417297363, "eval_logits/rejected": -2.7178869247436523, "eval_logps/chosen": -311.40460205078125, "eval_logps/rejected": -294.0999755859375, "eval_loss": 0.5371096134185791, "eval_rewards/accuracies": 0.7578125, "eval_rewards/chosen": -2.3569798469543457, "eval_rewards/margins": 2.199610471725464, "eval_rewards/rejected": -4.556590557098389, "eval_runtime": 319.208, "eval_samples_per_second": 6.266, "eval_steps_per_second": 0.1, "step": 3000 }, { "epoch": 1.55, "learning_rate": 2.678332377127558e-07, "logits/chosen": -2.776304244995117, "logits/rejected": -2.6788744926452637, "logps/chosen": -305.3674011230469, "logps/rejected": -318.96295166015625, "loss": 0.0866, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.17442083358764648, "rewards/margins": 6.954986572265625, "rewards/rejected": -7.129406929016113, "step": 3010 }, { "epoch": 1.56, "learning_rate": 2.668770319372729e-07, "logits/chosen": -2.7773995399475098, "logits/rejected": -2.7525391578674316, "logps/chosen": -273.67510986328125, "logps/rejected": -265.53631591796875, "loss": 0.0842, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4548625349998474, "rewards/margins": 5.007421970367432, "rewards/rejected": -5.462284564971924, "step": 3020 }, { "epoch": 1.56, "learning_rate": 2.6592082616179004e-07, "logits/chosen": -2.585448980331421, "logits/rejected": -2.5813400745391846, "logps/chosen": -200.90914916992188, "logps/rejected": -277.71881103515625, "loss": 0.0974, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.36157527565956116, "rewards/margins": 5.650599479675293, "rewards/rejected": -6.0121750831604, "step": 3030 }, { "epoch": 1.57, "learning_rate": 2.649646203863071e-07, "logits/chosen": -2.6424880027770996, "logits/rejected": -2.6704490184783936, "logps/chosen": -256.95245361328125, "logps/rejected": -313.34674072265625, "loss": 0.0681, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.20644274353981018, "rewards/margins": 5.7752885818481445, "rewards/rejected": -5.981731414794922, "step": 3040 }, { "epoch": 1.57, "learning_rate": 2.640084146108242e-07, "logits/chosen": -2.7053580284118652, "logits/rejected": -2.688702344894409, "logps/chosen": -309.57733154296875, "logps/rejected": -309.84478759765625, "loss": 0.0746, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.5918713212013245, "rewards/margins": 6.199500560760498, "rewards/rejected": -6.791372776031494, "step": 3050 }, { "epoch": 1.58, "learning_rate": 2.6305220883534133e-07, "logits/chosen": -2.720906972885132, "logits/rejected": -2.604559898376465, "logps/chosen": -241.77334594726562, "logps/rejected": -287.62457275390625, "loss": 0.0794, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.5095193982124329, "rewards/margins": 5.459133148193359, "rewards/rejected": -5.968652248382568, "step": 3060 }, { "epoch": 1.58, "learning_rate": 2.6209600305985845e-07, "logits/chosen": -2.724483013153076, "logits/rejected": -2.681415319442749, "logps/chosen": -280.43377685546875, "logps/rejected": -313.9124450683594, "loss": 0.1251, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.08990747481584549, "rewards/margins": 6.539010524749756, "rewards/rejected": -6.628918647766113, "step": 3070 }, { "epoch": 1.59, "learning_rate": 2.6113979728437556e-07, "logits/chosen": -2.808774471282959, "logits/rejected": -2.783923387527466, "logps/chosen": -305.9368591308594, "logps/rejected": -303.0685119628906, "loss": 0.0877, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.38404324650764465, "rewards/margins": 5.625106334686279, "rewards/rejected": -6.009149551391602, "step": 3080 }, { "epoch": 1.6, "learning_rate": 2.601835915088927e-07, "logits/chosen": -2.6188642978668213, "logits/rejected": -2.6304309368133545, "logps/chosen": -238.93826293945312, "logps/rejected": -292.0005187988281, "loss": 0.1037, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2586580216884613, "rewards/margins": 5.472790718078613, "rewards/rejected": -5.731448173522949, "step": 3090 }, { "epoch": 1.6, "learning_rate": 2.592273857334098e-07, "logits/chosen": -2.69022798538208, "logits/rejected": -2.687714099884033, "logps/chosen": -263.8324279785156, "logps/rejected": -302.5688171386719, "loss": 0.0861, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.2078767567873001, "rewards/margins": 6.1142754554748535, "rewards/rejected": -6.322152137756348, "step": 3100 }, { "epoch": 1.6, "eval_logits/chosen": -2.810418128967285, "eval_logits/rejected": -2.7428994178771973, "eval_logps/chosen": -308.62701416015625, "eval_logps/rejected": -289.9939880371094, "eval_loss": 0.5141046643257141, "eval_rewards/accuracies": 0.775390625, "eval_rewards/chosen": -2.126436233520508, "eval_rewards/margins": 2.0893585681915283, "eval_rewards/rejected": -4.215795040130615, "eval_runtime": 317.8532, "eval_samples_per_second": 6.292, "eval_steps_per_second": 0.101, "step": 3100 }, { "epoch": 1.61, "learning_rate": 2.582711799579269e-07, "logits/chosen": -2.782196044921875, "logits/rejected": -2.7835662364959717, "logps/chosen": -297.5877990722656, "logps/rejected": -319.4659118652344, "loss": 0.0875, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5400816798210144, "rewards/margins": 5.985971450805664, "rewards/rejected": -6.526052951812744, "step": 3110 }, { "epoch": 1.61, "learning_rate": 2.573149741824441e-07, "logits/chosen": -2.797783374786377, "logits/rejected": -2.6672234535217285, "logps/chosen": -307.6380920410156, "logps/rejected": -301.02227783203125, "loss": 0.0924, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2821415960788727, "rewards/margins": 5.907652378082275, "rewards/rejected": -6.189794063568115, "step": 3120 }, { "epoch": 1.62, "learning_rate": 2.563587684069612e-07, "logits/chosen": -2.748999834060669, "logits/rejected": -2.7170605659484863, "logps/chosen": -303.27020263671875, "logps/rejected": -312.60003662109375, "loss": 0.0721, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7495017051696777, "rewards/margins": 5.746244430541992, "rewards/rejected": -6.495745658874512, "step": 3130 }, { "epoch": 1.62, "learning_rate": 2.554025626314783e-07, "logits/chosen": -2.809506893157959, "logits/rejected": -2.6735405921936035, "logps/chosen": -279.3205261230469, "logps/rejected": -325.3266296386719, "loss": 0.0953, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9223713874816895, "rewards/margins": 6.016545295715332, "rewards/rejected": -6.938917636871338, "step": 3140 }, { "epoch": 1.63, "learning_rate": 2.544463568559954e-07, "logits/chosen": -2.8387951850891113, "logits/rejected": -2.6980299949645996, "logps/chosen": -315.58544921875, "logps/rejected": -335.14825439453125, "loss": 0.0815, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.4985596537590027, "rewards/margins": 6.318514823913574, "rewards/rejected": -6.817073822021484, "step": 3150 }, { "epoch": 1.63, "learning_rate": 2.5349015108051254e-07, "logits/chosen": -2.7535789012908936, "logits/rejected": -2.714042901992798, "logps/chosen": -257.7418212890625, "logps/rejected": -288.21722412109375, "loss": 0.0912, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.4672360420227051, "rewards/margins": 6.251719951629639, "rewards/rejected": -6.718955993652344, "step": 3160 }, { "epoch": 1.64, "learning_rate": 2.5253394530502966e-07, "logits/chosen": -2.652970552444458, "logits/rejected": -2.6133012771606445, "logps/chosen": -330.0263671875, "logps/rejected": -333.5130615234375, "loss": 0.0617, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.19771215319633484, "rewards/margins": 6.702794551849365, "rewards/rejected": -6.900506496429443, "step": 3170 }, { "epoch": 1.64, "learning_rate": 2.5157773952954677e-07, "logits/chosen": -2.8288116455078125, "logits/rejected": -2.7837817668914795, "logps/chosen": -312.13885498046875, "logps/rejected": -314.2359924316406, "loss": 0.1038, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5361202359199524, "rewards/margins": 5.746079921722412, "rewards/rejected": -6.282199859619141, "step": 3180 }, { "epoch": 1.65, "learning_rate": 2.506215337540639e-07, "logits/chosen": -2.722975254058838, "logits/rejected": -2.6670470237731934, "logps/chosen": -262.28387451171875, "logps/rejected": -311.4022521972656, "loss": 0.0934, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.4326835572719574, "rewards/margins": 5.7436957359313965, "rewards/rejected": -6.176379680633545, "step": 3190 }, { "epoch": 1.65, "learning_rate": 2.4966532797858095e-07, "logits/chosen": -2.674398422241211, "logits/rejected": -2.5761911869049072, "logps/chosen": -284.32232666015625, "logps/rejected": -288.8324890136719, "loss": 0.0851, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2184424102306366, "rewards/margins": 6.102179050445557, "rewards/rejected": -6.320620536804199, "step": 3200 }, { "epoch": 1.65, "eval_logits/chosen": -2.7583746910095215, "eval_logits/rejected": -2.6925430297851562, "eval_logps/chosen": -306.22760009765625, "eval_logps/rejected": -288.53936767578125, "eval_loss": 0.5175272822380066, "eval_rewards/accuracies": 0.76953125, "eval_rewards/chosen": -1.9272873401641846, "eval_rewards/margins": 2.167772054672241, "eval_rewards/rejected": -4.095059394836426, "eval_runtime": 318.5536, "eval_samples_per_second": 6.278, "eval_steps_per_second": 0.1, "step": 3200 }, { "epoch": 1.66, "learning_rate": 2.4870912220309807e-07, "logits/chosen": -2.665853977203369, "logits/rejected": -2.6626288890838623, "logps/chosen": -275.97418212890625, "logps/rejected": -284.8667907714844, "loss": 0.0942, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.42326611280441284, "rewards/margins": 5.18906307220459, "rewards/rejected": -5.612329006195068, "step": 3210 }, { "epoch": 1.66, "learning_rate": 2.477529164276152e-07, "logits/chosen": -2.8330671787261963, "logits/rejected": -2.7457823753356934, "logps/chosen": -280.6058044433594, "logps/rejected": -316.77880859375, "loss": 0.0898, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.4407665729522705, "rewards/margins": 5.992166996002197, "rewards/rejected": -6.432933807373047, "step": 3220 }, { "epoch": 1.67, "learning_rate": 2.4679671065213235e-07, "logits/chosen": -2.7792186737060547, "logits/rejected": -2.7241435050964355, "logps/chosen": -288.08074951171875, "logps/rejected": -330.20611572265625, "loss": 0.0904, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.3401411175727844, "rewards/margins": 5.96522855758667, "rewards/rejected": -6.305370807647705, "step": 3230 }, { "epoch": 1.67, "learning_rate": 2.4584050487664947e-07, "logits/chosen": -2.7446224689483643, "logits/rejected": -2.6137149333953857, "logps/chosen": -287.86639404296875, "logps/rejected": -303.94000244140625, "loss": 0.0537, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.12650933861732483, "rewards/margins": 6.544957637786865, "rewards/rejected": -6.671466827392578, "step": 3240 }, { "epoch": 1.68, "learning_rate": 2.448842991011666e-07, "logits/chosen": -2.736572027206421, "logits/rejected": -2.6515696048736572, "logps/chosen": -295.3900451660156, "logps/rejected": -304.6390686035156, "loss": 0.0926, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.6295541524887085, "rewards/margins": 5.8065385818481445, "rewards/rejected": -6.436093330383301, "step": 3250 }, { "epoch": 1.68, "learning_rate": 2.439280933256837e-07, "logits/chosen": -2.6713812351226807, "logits/rejected": -2.662527084350586, "logps/chosen": -244.6562042236328, "logps/rejected": -318.9786071777344, "loss": 0.0598, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.38372328877449036, "rewards/margins": 6.347474098205566, "rewards/rejected": -6.731197357177734, "step": 3260 }, { "epoch": 1.69, "learning_rate": 2.429718875502008e-07, "logits/chosen": -2.5205514430999756, "logits/rejected": -2.583298683166504, "logps/chosen": -263.51751708984375, "logps/rejected": -344.06060791015625, "loss": 0.0649, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.2645038962364197, "rewards/margins": 6.9788408279418945, "rewards/rejected": -7.243344783782959, "step": 3270 }, { "epoch": 1.69, "learning_rate": 2.420156817747179e-07, "logits/chosen": -2.684580087661743, "logits/rejected": -2.5111072063446045, "logps/chosen": -274.7818298339844, "logps/rejected": -277.52825927734375, "loss": 0.0691, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.5633732676506042, "rewards/margins": 5.820583343505859, "rewards/rejected": -6.383955955505371, "step": 3280 }, { "epoch": 1.7, "learning_rate": 2.41059475999235e-07, "logits/chosen": -2.678091049194336, "logits/rejected": -2.5452818870544434, "logps/chosen": -250.3430633544922, "logps/rejected": -290.3915100097656, "loss": 0.0763, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9485856294631958, "rewards/margins": 5.433687686920166, "rewards/rejected": -6.3822736740112305, "step": 3290 }, { "epoch": 1.7, "learning_rate": 2.4010327022375216e-07, "logits/chosen": -2.708986520767212, "logits/rejected": -2.7057483196258545, "logps/chosen": -304.6985168457031, "logps/rejected": -329.76080322265625, "loss": 0.0837, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.08352717012166977, "rewards/margins": 6.654336452484131, "rewards/rejected": -6.737864017486572, "step": 3300 }, { "epoch": 1.7, "eval_logits/chosen": -2.7439873218536377, "eval_logits/rejected": -2.6726362705230713, "eval_logps/chosen": -307.9421081542969, "eval_logps/rejected": -292.1949157714844, "eval_loss": 0.5354318618774414, "eval_rewards/accuracies": 0.763671875, "eval_rewards/chosen": -2.069589376449585, "eval_rewards/margins": 2.328883409500122, "eval_rewards/rejected": -4.398472785949707, "eval_runtime": 319.1771, "eval_samples_per_second": 6.266, "eval_steps_per_second": 0.1, "step": 3300 }, { "epoch": 1.71, "learning_rate": 2.391470644482693e-07, "logits/chosen": -2.716585159301758, "logits/rejected": -2.7203798294067383, "logps/chosen": -286.1582336425781, "logps/rejected": -319.0726623535156, "loss": 0.0821, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.00444456934928894, "rewards/margins": 5.982482433319092, "rewards/rejected": -5.9780378341674805, "step": 3310 }, { "epoch": 1.71, "learning_rate": 2.3819085867278636e-07, "logits/chosen": -2.6158809661865234, "logits/rejected": -2.572007656097412, "logps/chosen": -216.1724090576172, "logps/rejected": -261.08209228515625, "loss": 0.0828, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.099599838256836, "rewards/margins": 5.417374610900879, "rewards/rejected": -6.516973972320557, "step": 3320 }, { "epoch": 1.72, "learning_rate": 2.3723465289730348e-07, "logits/chosen": -2.7337965965270996, "logits/rejected": -2.6799380779266357, "logps/chosen": -305.67218017578125, "logps/rejected": -264.9610595703125, "loss": 0.1067, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.22794389724731445, "rewards/margins": 5.117917060852051, "rewards/rejected": -5.345860958099365, "step": 3330 }, { "epoch": 1.72, "learning_rate": 2.362784471218206e-07, "logits/chosen": -2.6990344524383545, "logits/rejected": -2.634636163711548, "logps/chosen": -262.9833068847656, "logps/rejected": -321.34454345703125, "loss": 0.0635, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.14797277748584747, "rewards/margins": 6.34617805480957, "rewards/rejected": -6.494150638580322, "step": 3340 }, { "epoch": 1.73, "learning_rate": 2.353222413463377e-07, "logits/chosen": -2.6412806510925293, "logits/rejected": -2.5726284980773926, "logps/chosen": -274.2547912597656, "logps/rejected": -321.79742431640625, "loss": 0.0712, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.6301458477973938, "rewards/margins": 6.126635551452637, "rewards/rejected": -6.756781101226807, "step": 3350 }, { "epoch": 1.73, "learning_rate": 2.3436603557085483e-07, "logits/chosen": -2.6641173362731934, "logits/rejected": -2.6521782875061035, "logps/chosen": -283.4554138183594, "logps/rejected": -325.281494140625, "loss": 0.0794, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.6093801259994507, "rewards/margins": 5.445070266723633, "rewards/rejected": -6.054450511932373, "step": 3360 }, { "epoch": 1.74, "learning_rate": 2.3340982979537197e-07, "logits/chosen": -2.6841273307800293, "logits/rejected": -2.6150929927825928, "logps/chosen": -347.07525634765625, "logps/rejected": -348.2049865722656, "loss": 0.0893, "rewards/accuracies": 1.0, "rewards/chosen": -0.04954729601740837, "rewards/margins": 6.338484764099121, "rewards/rejected": -6.38803243637085, "step": 3370 }, { "epoch": 1.74, "learning_rate": 2.3245362401988909e-07, "logits/chosen": -2.687577962875366, "logits/rejected": -2.660741090774536, "logps/chosen": -289.68035888671875, "logps/rejected": -310.7860412597656, "loss": 0.0813, "rewards/accuracies": 1.0, "rewards/chosen": 0.06715719401836395, "rewards/margins": 7.135079860687256, "rewards/rejected": -7.067922115325928, "step": 3380 }, { "epoch": 1.75, "learning_rate": 2.314974182444062e-07, "logits/chosen": -2.7338855266571045, "logits/rejected": -2.7030651569366455, "logps/chosen": -271.0210266113281, "logps/rejected": -315.3516540527344, "loss": 0.0713, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.4303258657455444, "rewards/margins": 5.624815940856934, "rewards/rejected": -7.055141448974609, "step": 3390 }, { "epoch": 1.76, "learning_rate": 2.305412124689233e-07, "logits/chosen": -2.6211514472961426, "logits/rejected": -2.5887961387634277, "logps/chosen": -268.812255859375, "logps/rejected": -308.2967834472656, "loss": 0.056, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.6851238012313843, "rewards/margins": 6.7636308670043945, "rewards/rejected": -7.448754787445068, "step": 3400 }, { "epoch": 1.76, "eval_logits/chosen": -2.7202062606811523, "eval_logits/rejected": -2.649815559387207, "eval_logps/chosen": -316.5497131347656, "eval_logps/rejected": -303.2956237792969, "eval_loss": 0.5596067905426025, "eval_rewards/accuracies": 0.7734375, "eval_rewards/chosen": -2.7840216159820557, "eval_rewards/margins": 2.535808801651001, "eval_rewards/rejected": -5.319830417633057, "eval_runtime": 330.4893, "eval_samples_per_second": 6.052, "eval_steps_per_second": 0.097, "step": 3400 }, { "epoch": 1.76, "learning_rate": 2.295850066934404e-07, "logits/chosen": -2.6713547706604004, "logits/rejected": -2.587475538253784, "logps/chosen": -284.5220031738281, "logps/rejected": -325.01934814453125, "loss": 0.0868, "rewards/accuracies": 0.9375, "rewards/chosen": -1.171018362045288, "rewards/margins": 6.473021507263184, "rewards/rejected": -7.644040107727051, "step": 3410 }, { "epoch": 1.77, "learning_rate": 2.2862880091795752e-07, "logits/chosen": -2.6998648643493652, "logits/rejected": -2.655348300933838, "logps/chosen": -277.3625183105469, "logps/rejected": -340.12103271484375, "loss": 0.0929, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.9923136830329895, "rewards/margins": 6.232722282409668, "rewards/rejected": -7.225035667419434, "step": 3420 }, { "epoch": 1.77, "learning_rate": 2.2767259514247464e-07, "logits/chosen": -2.7072086334228516, "logits/rejected": -2.685591220855713, "logps/chosen": -276.17559814453125, "logps/rejected": -340.01934814453125, "loss": 0.0831, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.5714015364646912, "rewards/margins": 6.117895126342773, "rewards/rejected": -6.689296722412109, "step": 3430 }, { "epoch": 1.78, "learning_rate": 2.2671638936699178e-07, "logits/chosen": -2.587096929550171, "logits/rejected": -2.581613540649414, "logps/chosen": -270.30706787109375, "logps/rejected": -318.89385986328125, "loss": 0.0727, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3270878195762634, "rewards/margins": 6.317829608917236, "rewards/rejected": -6.6449174880981445, "step": 3440 }, { "epoch": 1.78, "learning_rate": 2.257601835915089e-07, "logits/chosen": -2.7279696464538574, "logits/rejected": -2.689582586288452, "logps/chosen": -330.78790283203125, "logps/rejected": -332.47418212890625, "loss": 0.0725, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.31098902225494385, "rewards/margins": 6.5533928871154785, "rewards/rejected": -6.864381313323975, "step": 3450 }, { "epoch": 1.79, "learning_rate": 2.24803977816026e-07, "logits/chosen": -2.6843161582946777, "logits/rejected": -2.6000914573669434, "logps/chosen": -286.02996826171875, "logps/rejected": -295.32000732421875, "loss": 0.1168, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5812801718711853, "rewards/margins": 6.048283576965332, "rewards/rejected": -6.629563808441162, "step": 3460 }, { "epoch": 1.79, "learning_rate": 2.2384777204054313e-07, "logits/chosen": -2.7898075580596924, "logits/rejected": -2.6955037117004395, "logps/chosen": -292.6401672363281, "logps/rejected": -316.26580810546875, "loss": 0.0773, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.0034230053424835205, "rewards/margins": 6.607641696929932, "rewards/rejected": -6.604218482971191, "step": 3470 }, { "epoch": 1.8, "learning_rate": 2.2289156626506022e-07, "logits/chosen": -2.651047468185425, "logits/rejected": -2.595153331756592, "logps/chosen": -296.525634765625, "logps/rejected": -311.1611633300781, "loss": 0.0729, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.7819842100143433, "rewards/margins": 6.437596321105957, "rewards/rejected": -7.21958065032959, "step": 3480 }, { "epoch": 1.8, "learning_rate": 2.2193536048957733e-07, "logits/chosen": -2.8485751152038574, "logits/rejected": -2.740419626235962, "logps/chosen": -317.0204772949219, "logps/rejected": -325.99603271484375, "loss": 0.111, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.22058232128620148, "rewards/margins": 5.933473110198975, "rewards/rejected": -6.154055595397949, "step": 3490 }, { "epoch": 1.81, "learning_rate": 2.2097915471409445e-07, "logits/chosen": -2.7194607257843018, "logits/rejected": -2.717597484588623, "logps/chosen": -286.6048889160156, "logps/rejected": -315.5644226074219, "loss": 0.0689, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7288711667060852, "rewards/margins": 5.953031063079834, "rewards/rejected": -6.681902885437012, "step": 3500 }, { "epoch": 1.81, "eval_logits/chosen": -2.7731614112854004, "eval_logits/rejected": -2.7108559608459473, "eval_logps/chosen": -310.8092956542969, "eval_logps/rejected": -294.283203125, "eval_loss": 0.5347998142242432, "eval_rewards/accuracies": 0.78125, "eval_rewards/chosen": -2.307563066482544, "eval_rewards/margins": 2.264232873916626, "eval_rewards/rejected": -4.571796417236328, "eval_runtime": 319.7337, "eval_samples_per_second": 6.255, "eval_steps_per_second": 0.1, "step": 3500 }, { "epoch": 1.81, "learning_rate": 2.200229489386116e-07, "logits/chosen": -2.7406704425811768, "logits/rejected": -2.6979715824127197, "logps/chosen": -287.6631164550781, "logps/rejected": -343.1600646972656, "loss": 0.0584, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.7930260896682739, "rewards/margins": 6.991511344909668, "rewards/rejected": -7.784538269042969, "step": 3510 }, { "epoch": 1.82, "learning_rate": 2.190667431631287e-07, "logits/chosen": -2.6968741416931152, "logits/rejected": -2.6829638481140137, "logps/chosen": -274.96380615234375, "logps/rejected": -375.9587097167969, "loss": 0.1112, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5181733965873718, "rewards/margins": 6.889904975891113, "rewards/rejected": -7.408078670501709, "step": 3520 }, { "epoch": 1.82, "learning_rate": 2.1811053738764582e-07, "logits/chosen": -2.6301279067993164, "logits/rejected": -2.6212897300720215, "logps/chosen": -247.24368286132812, "logps/rejected": -323.6666259765625, "loss": 0.0597, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5010433197021484, "rewards/margins": 6.03061056137085, "rewards/rejected": -6.531654357910156, "step": 3530 }, { "epoch": 1.83, "learning_rate": 2.1715433161216294e-07, "logits/chosen": -2.6192808151245117, "logits/rejected": -2.6582112312316895, "logps/chosen": -275.57501220703125, "logps/rejected": -327.3641052246094, "loss": 0.0816, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6718168258666992, "rewards/margins": 5.8994855880737305, "rewards/rejected": -6.571302890777588, "step": 3540 }, { "epoch": 1.83, "learning_rate": 2.1619812583668005e-07, "logits/chosen": -2.660083770751953, "logits/rejected": -2.6552069187164307, "logps/chosen": -262.21221923828125, "logps/rejected": -297.0346984863281, "loss": 0.0749, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.0412641763687134, "rewards/margins": 6.329002857208252, "rewards/rejected": -7.370265960693359, "step": 3550 }, { "epoch": 1.84, "learning_rate": 2.1524192006119714e-07, "logits/chosen": -2.6721417903900146, "logits/rejected": -2.6527702808380127, "logps/chosen": -262.6390380859375, "logps/rejected": -305.446533203125, "loss": 0.0737, "rewards/accuracies": 1.0, "rewards/chosen": -1.524945616722107, "rewards/margins": 5.509359359741211, "rewards/rejected": -7.034304618835449, "step": 3560 }, { "epoch": 1.84, "learning_rate": 2.1428571428571426e-07, "logits/chosen": -2.698298931121826, "logits/rejected": -2.5938477516174316, "logps/chosen": -255.32101440429688, "logps/rejected": -286.9696350097656, "loss": 0.0522, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.1083383560180664, "rewards/margins": 6.037410259246826, "rewards/rejected": -7.145749092102051, "step": 3570 }, { "epoch": 1.85, "learning_rate": 2.133295085102314e-07, "logits/chosen": -2.612971544265747, "logits/rejected": -2.5065410137176514, "logps/chosen": -259.94805908203125, "logps/rejected": -297.79937744140625, "loss": 0.0497, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2725722789764404, "rewards/margins": 6.419241428375244, "rewards/rejected": -7.6918134689331055, "step": 3580 }, { "epoch": 1.85, "learning_rate": 2.1237330273474851e-07, "logits/chosen": -2.7124502658843994, "logits/rejected": -2.6637096405029297, "logps/chosen": -314.7274475097656, "logps/rejected": -319.1263427734375, "loss": 0.1053, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.8897290229797363, "rewards/margins": 6.549591064453125, "rewards/rejected": -7.4393205642700195, "step": 3590 }, { "epoch": 1.86, "learning_rate": 2.1141709695926563e-07, "logits/chosen": -2.6986777782440186, "logits/rejected": -2.6317005157470703, "logps/chosen": -280.7533264160156, "logps/rejected": -291.148193359375, "loss": 0.0934, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.8306609988212585, "rewards/margins": 6.620227813720703, "rewards/rejected": -7.450888633728027, "step": 3600 }, { "epoch": 1.86, "eval_logits/chosen": -2.72717022895813, "eval_logits/rejected": -2.653413772583008, "eval_logps/chosen": -315.21905517578125, "eval_logps/rejected": -299.8420715332031, "eval_loss": 0.5538860559463501, "eval_rewards/accuracies": 0.7734375, "eval_rewards/chosen": -2.6735758781433105, "eval_rewards/margins": 2.3596088886260986, "eval_rewards/rejected": -5.03318452835083, "eval_runtime": 318.9781, "eval_samples_per_second": 6.27, "eval_steps_per_second": 0.1, "step": 3600 }, { "epoch": 1.86, "learning_rate": 2.1046089118378275e-07, "logits/chosen": -2.7085819244384766, "logits/rejected": -2.647667407989502, "logps/chosen": -267.76422119140625, "logps/rejected": -332.65655517578125, "loss": 0.0676, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.7043758630752563, "rewards/margins": 7.160613059997559, "rewards/rejected": -7.864989280700684, "step": 3610 }, { "epoch": 1.87, "learning_rate": 2.0950468540829986e-07, "logits/chosen": -2.680129289627075, "logits/rejected": -2.6082656383514404, "logps/chosen": -284.46026611328125, "logps/rejected": -300.432861328125, "loss": 0.118, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.713326096534729, "rewards/margins": 6.579096794128418, "rewards/rejected": -7.292423248291016, "step": 3620 }, { "epoch": 1.87, "learning_rate": 2.0854847963281698e-07, "logits/chosen": -2.702371597290039, "logits/rejected": -2.586292028427124, "logps/chosen": -312.0211486816406, "logps/rejected": -312.04010009765625, "loss": 0.0865, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7484079599380493, "rewards/margins": 6.361302375793457, "rewards/rejected": -7.1097092628479, "step": 3630 }, { "epoch": 1.88, "learning_rate": 2.0759227385733407e-07, "logits/chosen": -2.5228898525238037, "logits/rejected": -2.4642603397369385, "logps/chosen": -314.04473876953125, "logps/rejected": -316.93182373046875, "loss": 0.0702, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7400057911872864, "rewards/margins": 6.061376094818115, "rewards/rejected": -6.801381587982178, "step": 3640 }, { "epoch": 1.88, "learning_rate": 2.066360680818512e-07, "logits/chosen": -2.698092222213745, "logits/rejected": -2.5973281860351562, "logps/chosen": -338.0748596191406, "logps/rejected": -322.3836364746094, "loss": 0.0779, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.45165568590164185, "rewards/margins": 6.239035606384277, "rewards/rejected": -6.690691947937012, "step": 3650 }, { "epoch": 1.89, "learning_rate": 2.0567986230636832e-07, "logits/chosen": -2.6877999305725098, "logits/rejected": -2.6085829734802246, "logps/chosen": -262.77606201171875, "logps/rejected": -316.13897705078125, "loss": 0.1007, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.600859522819519, "rewards/margins": 6.560063362121582, "rewards/rejected": -7.160922050476074, "step": 3660 }, { "epoch": 1.89, "learning_rate": 2.0472365653088544e-07, "logits/chosen": -2.7101938724517822, "logits/rejected": -2.648778200149536, "logps/chosen": -310.6146240234375, "logps/rejected": -308.3982238769531, "loss": 0.0788, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -1.1889054775238037, "rewards/margins": 5.46823263168335, "rewards/rejected": -6.657138824462891, "step": 3670 }, { "epoch": 1.9, "learning_rate": 2.0376745075540256e-07, "logits/chosen": -2.7423958778381348, "logits/rejected": -2.693328619003296, "logps/chosen": -329.9389953613281, "logps/rejected": -331.4327697753906, "loss": 0.0778, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9183465838432312, "rewards/margins": 6.151231288909912, "rewards/rejected": -7.069577693939209, "step": 3680 }, { "epoch": 1.91, "learning_rate": 2.0281124497991967e-07, "logits/chosen": -2.6946024894714355, "logits/rejected": -2.682375431060791, "logps/chosen": -290.21405029296875, "logps/rejected": -335.01373291015625, "loss": 0.1029, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.7418602705001831, "rewards/margins": 6.170797824859619, "rewards/rejected": -6.91265869140625, "step": 3690 }, { "epoch": 1.91, "learning_rate": 2.018550392044368e-07, "logits/chosen": -2.7262251377105713, "logits/rejected": -2.6651265621185303, "logps/chosen": -284.5633239746094, "logps/rejected": -363.5076599121094, "loss": 0.0694, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.6026765704154968, "rewards/margins": 7.016315460205078, "rewards/rejected": -7.618991851806641, "step": 3700 }, { "epoch": 1.91, "eval_logits/chosen": -2.7433369159698486, "eval_logits/rejected": -2.673041820526123, "eval_logps/chosen": -315.1214599609375, "eval_logps/rejected": -298.85418701171875, "eval_loss": 0.5426376461982727, "eval_rewards/accuracies": 0.76953125, "eval_rewards/chosen": -2.6654717922210693, "eval_rewards/margins": 2.285717487335205, "eval_rewards/rejected": -4.951189041137695, "eval_runtime": 319.8913, "eval_samples_per_second": 6.252, "eval_steps_per_second": 0.1, "step": 3700 }, { "epoch": 1.92, "learning_rate": 2.0089883342895388e-07, "logits/chosen": -2.738462448120117, "logits/rejected": -2.692068099975586, "logps/chosen": -319.04583740234375, "logps/rejected": -289.92987060546875, "loss": 0.0777, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.45338597893714905, "rewards/margins": 6.3850555419921875, "rewards/rejected": -6.838441371917725, "step": 3710 }, { "epoch": 1.92, "learning_rate": 1.9994262765347102e-07, "logits/chosen": -2.7660131454467773, "logits/rejected": -2.6503071784973145, "logps/chosen": -292.02587890625, "logps/rejected": -272.49468994140625, "loss": 0.0847, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.8121105432510376, "rewards/margins": 5.917023658752441, "rewards/rejected": -6.729134559631348, "step": 3720 }, { "epoch": 1.93, "learning_rate": 1.9898642187798813e-07, "logits/chosen": -2.622962236404419, "logits/rejected": -2.601963996887207, "logps/chosen": -295.5254211425781, "logps/rejected": -339.43707275390625, "loss": 0.0652, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.3916550576686859, "rewards/margins": 7.198772430419922, "rewards/rejected": -7.590427398681641, "step": 3730 }, { "epoch": 1.93, "learning_rate": 1.9803021610250525e-07, "logits/chosen": -2.7688496112823486, "logits/rejected": -2.7322731018066406, "logps/chosen": -241.67526245117188, "logps/rejected": -298.95489501953125, "loss": 0.0793, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.7542971968650818, "rewards/margins": 5.94872522354126, "rewards/rejected": -6.703022003173828, "step": 3740 }, { "epoch": 1.94, "learning_rate": 1.9707401032702237e-07, "logits/chosen": -2.6245992183685303, "logits/rejected": -2.5694174766540527, "logps/chosen": -267.7350158691406, "logps/rejected": -306.3616943359375, "loss": 0.0501, "rewards/accuracies": 1.0, "rewards/chosen": -0.9523770213127136, "rewards/margins": 6.084922790527344, "rewards/rejected": -7.037300109863281, "step": 3750 }, { "epoch": 1.94, "learning_rate": 1.9611780455153948e-07, "logits/chosen": -2.7001795768737793, "logits/rejected": -2.7346932888031006, "logps/chosen": -246.02664184570312, "logps/rejected": -314.5271301269531, "loss": 0.0823, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6487317681312561, "rewards/margins": 6.192134857177734, "rewards/rejected": -6.8408660888671875, "step": 3760 }, { "epoch": 1.95, "learning_rate": 1.951615987760566e-07, "logits/chosen": -2.6450703144073486, "logits/rejected": -2.593804359436035, "logps/chosen": -286.943115234375, "logps/rejected": -319.27960205078125, "loss": 0.0791, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2068636417388916, "rewards/margins": 6.372591495513916, "rewards/rejected": -7.579455375671387, "step": 3770 }, { "epoch": 1.95, "learning_rate": 1.942053930005737e-07, "logits/chosen": -2.808245897293091, "logits/rejected": -2.694265127182007, "logps/chosen": -298.6407775878906, "logps/rejected": -325.7048034667969, "loss": 0.0723, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -1.0979423522949219, "rewards/margins": 6.701653480529785, "rewards/rejected": -7.799594879150391, "step": 3780 }, { "epoch": 1.96, "learning_rate": 1.9324918722509086e-07, "logits/chosen": -2.650181531906128, "logits/rejected": -2.6911978721618652, "logps/chosen": -274.6768493652344, "logps/rejected": -349.334716796875, "loss": 0.0925, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.695887804031372, "rewards/margins": 6.532961845397949, "rewards/rejected": -8.228849411010742, "step": 3790 }, { "epoch": 1.96, "learning_rate": 1.9229298144960794e-07, "logits/chosen": -2.7447924613952637, "logits/rejected": -2.7087714672088623, "logps/chosen": -291.3640441894531, "logps/rejected": -319.9002685546875, "loss": 0.1267, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -1.1460148096084595, "rewards/margins": 6.4988250732421875, "rewards/rejected": -7.644840240478516, "step": 3800 }, { "epoch": 1.96, "eval_logits/chosen": -2.7430062294006348, "eval_logits/rejected": -2.677804946899414, "eval_logps/chosen": -317.66644287109375, "eval_logps/rejected": -299.8018798828125, "eval_loss": 0.5620393753051758, "eval_rewards/accuracies": 0.791015625, "eval_rewards/chosen": -2.8767101764678955, "eval_rewards/margins": 2.1531410217285156, "eval_rewards/rejected": -5.02985143661499, "eval_runtime": 320.816, "eval_samples_per_second": 6.234, "eval_steps_per_second": 0.1, "step": 3800 }, { "epoch": 1.97, "learning_rate": 1.9133677567412506e-07, "logits/chosen": -2.744810104370117, "logits/rejected": -2.68992018699646, "logps/chosen": -295.21893310546875, "logps/rejected": -316.60919189453125, "loss": 0.0905, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -1.3385887145996094, "rewards/margins": 5.636076927185059, "rewards/rejected": -6.97466516494751, "step": 3810 }, { "epoch": 1.97, "learning_rate": 1.9038056989864218e-07, "logits/chosen": -2.7312960624694824, "logits/rejected": -2.719721555709839, "logps/chosen": -255.89419555664062, "logps/rejected": -285.29937744140625, "loss": 0.1069, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7798563241958618, "rewards/margins": 5.466775894165039, "rewards/rejected": -6.246632099151611, "step": 3820 }, { "epoch": 1.98, "learning_rate": 1.894243641231593e-07, "logits/chosen": -2.5127370357513428, "logits/rejected": -2.510570526123047, "logps/chosen": -274.7824401855469, "logps/rejected": -267.11651611328125, "loss": 0.1142, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -1.1732484102249146, "rewards/margins": 5.389260292053223, "rewards/rejected": -6.562509059906006, "step": 3830 }, { "epoch": 1.98, "learning_rate": 1.884681583476764e-07, "logits/chosen": -2.759958028793335, "logits/rejected": -2.7305500507354736, "logps/chosen": -293.76055908203125, "logps/rejected": -317.3556823730469, "loss": 0.0565, "rewards/accuracies": 0.9375, "rewards/chosen": -1.473763346672058, "rewards/margins": 5.311868667602539, "rewards/rejected": -6.785632133483887, "step": 3840 }, { "epoch": 1.99, "learning_rate": 1.8751195257219352e-07, "logits/chosen": -2.757524013519287, "logits/rejected": -2.716054677963257, "logps/chosen": -261.44049072265625, "logps/rejected": -303.45013427734375, "loss": 0.0876, "rewards/accuracies": 1.0, "rewards/chosen": -1.0988296270370483, "rewards/margins": 5.643826961517334, "rewards/rejected": -6.7426557540893555, "step": 3850 }, { "epoch": 1.99, "learning_rate": 1.8655574679671067e-07, "logits/chosen": -2.7302353382110596, "logits/rejected": -2.6915578842163086, "logps/chosen": -298.05908203125, "logps/rejected": -328.80633544921875, "loss": 0.1036, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -1.1912249326705933, "rewards/margins": 6.21510648727417, "rewards/rejected": -7.4063310623168945, "step": 3860 }, { "epoch": 2.0, "learning_rate": 1.8559954102122778e-07, "logits/chosen": -2.734767198562622, "logits/rejected": -2.657914400100708, "logps/chosen": -284.9921569824219, "logps/rejected": -315.1389465332031, "loss": 0.0897, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.7892888188362122, "rewards/margins": 6.1825714111328125, "rewards/rejected": -6.971860408782959, "step": 3870 }, { "epoch": 2.0, "learning_rate": 1.8464333524574487e-07, "logits/chosen": -2.7339882850646973, "logits/rejected": -2.703369617462158, "logps/chosen": -262.26190185546875, "logps/rejected": -295.3121032714844, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/chosen": -0.7540243268013, "rewards/margins": 6.619154453277588, "rewards/rejected": -7.373178958892822, "step": 3880 }, { "epoch": 2.01, "learning_rate": 1.8368712947026199e-07, "logits/chosen": -2.742309093475342, "logits/rejected": -2.6803410053253174, "logps/chosen": -290.23187255859375, "logps/rejected": -318.8965759277344, "loss": 0.0121, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.4057774543762207, "rewards/margins": 7.248461723327637, "rewards/rejected": -7.654238700866699, "step": 3890 }, { "epoch": 2.01, "learning_rate": 1.827309236947791e-07, "logits/chosen": -2.621222496032715, "logits/rejected": -2.5998809337615967, "logps/chosen": -253.2609405517578, "logps/rejected": -337.5628356933594, "loss": 0.024, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.8895366787910461, "rewards/margins": 7.6298394203186035, "rewards/rejected": -8.519375801086426, "step": 3900 }, { "epoch": 2.01, "eval_logits/chosen": -2.7240028381347656, "eval_logits/rejected": -2.6526060104370117, "eval_logps/chosen": -318.7413635253906, "eval_logps/rejected": -306.39208984375, "eval_loss": 0.5618212223052979, "eval_rewards/accuracies": 0.783203125, "eval_rewards/chosen": -2.9659268856048584, "eval_rewards/margins": 2.610912561416626, "eval_rewards/rejected": -5.576839447021484, "eval_runtime": 320.1095, "eval_samples_per_second": 6.248, "eval_steps_per_second": 0.1, "step": 3900 }, { "epoch": 2.02, "learning_rate": 1.8177471791929622e-07, "logits/chosen": -2.6321282386779785, "logits/rejected": -2.584348201751709, "logps/chosen": -271.2797546386719, "logps/rejected": -385.1412048339844, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -0.7935308218002319, "rewards/margins": 8.099836349487305, "rewards/rejected": -8.893366813659668, "step": 3910 }, { "epoch": 2.02, "learning_rate": 1.8081851214381333e-07, "logits/chosen": -2.5629727840423584, "logits/rejected": -2.550774097442627, "logps/chosen": -284.50360107421875, "logps/rejected": -368.0713806152344, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -1.2019399404525757, "rewards/margins": 7.724644660949707, "rewards/rejected": -8.926583290100098, "step": 3920 }, { "epoch": 2.03, "learning_rate": 1.7986230636833047e-07, "logits/chosen": -2.652442455291748, "logits/rejected": -2.5345163345336914, "logps/chosen": -234.04696655273438, "logps/rejected": -311.1173400878906, "loss": 0.018, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.1374295949935913, "rewards/margins": 6.791005611419678, "rewards/rejected": -7.928435325622559, "step": 3930 }, { "epoch": 2.03, "learning_rate": 1.789061005928476e-07, "logits/chosen": -2.7317845821380615, "logits/rejected": -2.601142168045044, "logps/chosen": -318.1007385253906, "logps/rejected": -311.67828369140625, "loss": 0.0212, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.9945087432861328, "rewards/margins": 7.461602687835693, "rewards/rejected": -8.456111907958984, "step": 3940 }, { "epoch": 2.04, "learning_rate": 1.7794989481736468e-07, "logits/chosen": -2.685377597808838, "logits/rejected": -2.605644702911377, "logps/chosen": -303.90484619140625, "logps/rejected": -338.5601501464844, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9852126836776733, "rewards/margins": 7.9290947914123535, "rewards/rejected": -8.914307594299316, "step": 3950 }, { "epoch": 2.04, "learning_rate": 1.769936890418818e-07, "logits/chosen": -2.6533775329589844, "logits/rejected": -2.59948468208313, "logps/chosen": -277.9990539550781, "logps/rejected": -334.33251953125, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -0.4269586503505707, "rewards/margins": 8.160022735595703, "rewards/rejected": -8.586980819702148, "step": 3960 }, { "epoch": 2.05, "learning_rate": 1.760374832663989e-07, "logits/chosen": -2.582099199295044, "logits/rejected": -2.496831178665161, "logps/chosen": -283.3002624511719, "logps/rejected": -348.09014892578125, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -0.9939042329788208, "rewards/margins": 7.6933183670043945, "rewards/rejected": -8.687223434448242, "step": 3970 }, { "epoch": 2.05, "learning_rate": 1.7508127749091603e-07, "logits/chosen": -2.6613199710845947, "logits/rejected": -2.617159605026245, "logps/chosen": -290.9040832519531, "logps/rejected": -346.271240234375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.259175181388855, "rewards/margins": 7.8499884605407715, "rewards/rejected": -9.109164237976074, "step": 3980 }, { "epoch": 2.06, "learning_rate": 1.7412507171543314e-07, "logits/chosen": -2.638604164123535, "logits/rejected": -2.566866636276245, "logps/chosen": -284.4937438964844, "logps/rejected": -315.1275329589844, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -1.6634613275527954, "rewards/margins": 7.883145809173584, "rewards/rejected": -9.54660701751709, "step": 3990 }, { "epoch": 2.07, "learning_rate": 1.7316886593995028e-07, "logits/chosen": -2.6418697834014893, "logits/rejected": -2.601135015487671, "logps/chosen": -264.54522705078125, "logps/rejected": -333.7240295410156, "loss": 0.0171, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -1.2121659517288208, "rewards/margins": 8.385894775390625, "rewards/rejected": -9.598061561584473, "step": 4000 }, { "epoch": 2.07, "eval_logits/chosen": -2.678946018218994, "eval_logits/rejected": -2.6016712188720703, "eval_logps/chosen": -327.0849304199219, "eval_logps/rejected": -319.86224365234375, "eval_loss": 0.6116997003555298, "eval_rewards/accuracies": 0.779296875, "eval_rewards/chosen": -3.658442974090576, "eval_rewards/margins": 3.0364155769348145, "eval_rewards/rejected": -6.694858551025391, "eval_runtime": 318.9857, "eval_samples_per_second": 6.27, "eval_steps_per_second": 0.1, "step": 4000 }, { "epoch": 2.07, "learning_rate": 1.722126601644674e-07, "logits/chosen": -2.6576359272003174, "logits/rejected": -2.5062575340270996, "logps/chosen": -292.4459533691406, "logps/rejected": -328.7396240234375, "loss": 0.0129, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.079073429107666, "rewards/margins": 7.914822578430176, "rewards/rejected": -8.993895530700684, "step": 4010 }, { "epoch": 2.08, "learning_rate": 1.7125645438898452e-07, "logits/chosen": -2.601428270339966, "logits/rejected": -2.5724899768829346, "logps/chosen": -286.8119201660156, "logps/rejected": -346.60247802734375, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.4060837030410767, "rewards/margins": 8.813210487365723, "rewards/rejected": -10.219294548034668, "step": 4020 }, { "epoch": 2.08, "learning_rate": 1.703002486135016e-07, "logits/chosen": -2.639251947402954, "logits/rejected": -2.6305768489837646, "logps/chosen": -300.5618591308594, "logps/rejected": -354.28289794921875, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -0.6109575033187866, "rewards/margins": 9.505043029785156, "rewards/rejected": -10.116000175476074, "step": 4030 }, { "epoch": 2.09, "learning_rate": 1.6934404283801872e-07, "logits/chosen": -2.6380391120910645, "logits/rejected": -2.57027530670166, "logps/chosen": -292.8567810058594, "logps/rejected": -361.1514892578125, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.8670053482055664, "rewards/margins": 9.120863914489746, "rewards/rejected": -9.987869262695312, "step": 4040 }, { "epoch": 2.09, "learning_rate": 1.6838783706253584e-07, "logits/chosen": -2.6397929191589355, "logits/rejected": -2.5708415508270264, "logps/chosen": -257.95391845703125, "logps/rejected": -323.4990234375, "loss": 0.0153, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.2431116104125977, "rewards/margins": 8.200019836425781, "rewards/rejected": -9.443130493164062, "step": 4050 }, { "epoch": 2.1, "learning_rate": 1.6743163128705295e-07, "logits/chosen": -2.6018359661102295, "logits/rejected": -2.5202841758728027, "logps/chosen": -271.3185119628906, "logps/rejected": -351.54266357421875, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -1.7273725271224976, "rewards/margins": 8.657565116882324, "rewards/rejected": -10.384939193725586, "step": 4060 }, { "epoch": 2.1, "learning_rate": 1.664754255115701e-07, "logits/chosen": -2.605795383453369, "logits/rejected": -2.558429002761841, "logps/chosen": -266.20648193359375, "logps/rejected": -356.53338623046875, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -1.3280067443847656, "rewards/margins": 9.252349853515625, "rewards/rejected": -10.580357551574707, "step": 4070 }, { "epoch": 2.11, "learning_rate": 1.655192197360872e-07, "logits/chosen": -2.779416561126709, "logits/rejected": -2.670290470123291, "logps/chosen": -324.42041015625, "logps/rejected": -381.06732177734375, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -1.2550573348999023, "rewards/margins": 8.682723999023438, "rewards/rejected": -9.937780380249023, "step": 4080 }, { "epoch": 2.11, "learning_rate": 1.6456301396060433e-07, "logits/chosen": -2.6742947101593018, "logits/rejected": -2.598904848098755, "logps/chosen": -317.13446044921875, "logps/rejected": -387.83551025390625, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -1.3452589511871338, "rewards/margins": 8.348325729370117, "rewards/rejected": -9.693585395812988, "step": 4090 }, { "epoch": 2.12, "learning_rate": 1.6360680818512144e-07, "logits/chosen": -2.6107609272003174, "logits/rejected": -2.610686779022217, "logps/chosen": -273.50714111328125, "logps/rejected": -345.960205078125, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -1.0450981855392456, "rewards/margins": 8.247116088867188, "rewards/rejected": -9.292214393615723, "step": 4100 }, { "epoch": 2.12, "eval_logits/chosen": -2.6772398948669434, "eval_logits/rejected": -2.600703716278076, "eval_logps/chosen": -329.8154602050781, "eval_logps/rejected": -324.5066223144531, "eval_loss": 0.6536163091659546, "eval_rewards/accuracies": 0.7734375, "eval_rewards/chosen": -3.885077714920044, "eval_rewards/margins": 3.1952614784240723, "eval_rewards/rejected": -7.080338954925537, "eval_runtime": 321.6183, "eval_samples_per_second": 6.219, "eval_steps_per_second": 0.099, "step": 4100 }, { "epoch": 2.12, "learning_rate": 1.6265060240963853e-07, "logits/chosen": -2.701676845550537, "logits/rejected": -2.6190435886383057, "logps/chosen": -315.9639587402344, "logps/rejected": -341.44830322265625, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.054032564163208, "rewards/margins": 8.190388679504395, "rewards/rejected": -9.244421005249023, "step": 4110 }, { "epoch": 2.13, "learning_rate": 1.6169439663415565e-07, "logits/chosen": -2.717128276824951, "logits/rejected": -2.619654893875122, "logps/chosen": -299.8330993652344, "logps/rejected": -363.94610595703125, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.489800214767456, "rewards/margins": 9.731782913208008, "rewards/rejected": -11.221583366394043, "step": 4120 }, { "epoch": 2.13, "learning_rate": 1.6073819085867276e-07, "logits/chosen": -2.721148729324341, "logits/rejected": -2.7373862266540527, "logps/chosen": -280.73907470703125, "logps/rejected": -415.42657470703125, "loss": 0.0143, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.0552005767822266, "rewards/margins": 9.348871231079102, "rewards/rejected": -10.404069900512695, "step": 4130 }, { "epoch": 2.14, "learning_rate": 1.597819850831899e-07, "logits/chosen": -2.6489956378936768, "logits/rejected": -2.5617423057556152, "logps/chosen": -260.9148864746094, "logps/rejected": -355.51629638671875, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -1.2448207139968872, "rewards/margins": 9.049307823181152, "rewards/rejected": -10.294129371643066, "step": 4140 }, { "epoch": 2.14, "learning_rate": 1.5882577930770702e-07, "logits/chosen": -2.7245113849639893, "logits/rejected": -2.6208720207214355, "logps/chosen": -294.0907287597656, "logps/rejected": -355.63372802734375, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -1.3541228771209717, "rewards/margins": 9.125802993774414, "rewards/rejected": -10.479926109313965, "step": 4150 }, { "epoch": 2.15, "learning_rate": 1.5786957353222414e-07, "logits/chosen": -2.68344783782959, "logits/rejected": -2.5753684043884277, "logps/chosen": -335.9783630371094, "logps/rejected": -356.41900634765625, "loss": 0.0113, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.9662755727767944, "rewards/margins": 9.411206245422363, "rewards/rejected": -10.377481460571289, "step": 4160 }, { "epoch": 2.15, "learning_rate": 1.5691336775674125e-07, "logits/chosen": -2.595283269882202, "logits/rejected": -2.5607352256774902, "logps/chosen": -268.7843017578125, "logps/rejected": -346.91558837890625, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -1.2743018865585327, "rewards/margins": 9.886120796203613, "rewards/rejected": -11.160421371459961, "step": 4170 }, { "epoch": 2.16, "learning_rate": 1.5595716198125837e-07, "logits/chosen": -2.5931780338287354, "logits/rejected": -2.4969544410705566, "logps/chosen": -312.57135009765625, "logps/rejected": -386.31494140625, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -2.2210693359375, "rewards/margins": 9.4270601272583, "rewards/rejected": -11.6481294631958, "step": 4180 }, { "epoch": 2.16, "learning_rate": 1.5500095620577546e-07, "logits/chosen": -2.702571153640747, "logits/rejected": -2.6287167072296143, "logps/chosen": -277.70562744140625, "logps/rejected": -322.46771240234375, "loss": 0.0126, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.9663254022598267, "rewards/margins": 8.634066581726074, "rewards/rejected": -10.600390434265137, "step": 4190 }, { "epoch": 2.17, "learning_rate": 1.5404475043029257e-07, "logits/chosen": -2.655897617340088, "logits/rejected": -2.621303081512451, "logps/chosen": -270.8863525390625, "logps/rejected": -337.29620361328125, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -1.1621564626693726, "rewards/margins": 8.764557838439941, "rewards/rejected": -9.926713943481445, "step": 4200 }, { "epoch": 2.17, "eval_logits/chosen": -2.674062728881836, "eval_logits/rejected": -2.6019034385681152, "eval_logps/chosen": -326.279296875, "eval_logps/rejected": -319.0072326660156, "eval_loss": 0.6296473145484924, "eval_rewards/accuracies": 0.7734375, "eval_rewards/chosen": -3.5915751457214355, "eval_rewards/margins": 3.032320261001587, "eval_rewards/rejected": -6.62389612197876, "eval_runtime": 318.3994, "eval_samples_per_second": 6.281, "eval_steps_per_second": 0.101, "step": 4200 }, { "epoch": 2.17, "learning_rate": 1.5308854465480971e-07, "logits/chosen": -2.6837573051452637, "logits/rejected": -2.618286371231079, "logps/chosen": -281.00970458984375, "logps/rejected": -310.1212463378906, "loss": 0.0129, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.7264576554298401, "rewards/margins": 7.929131507873535, "rewards/rejected": -8.655588150024414, "step": 4210 }, { "epoch": 2.18, "learning_rate": 1.5213233887932683e-07, "logits/chosen": -2.6160149574279785, "logits/rejected": -2.535550594329834, "logps/chosen": -315.74615478515625, "logps/rejected": -344.5783996582031, "loss": 0.0065, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.0572540760040283, "rewards/margins": 8.362442970275879, "rewards/rejected": -9.419695854187012, "step": 4220 }, { "epoch": 2.18, "learning_rate": 1.5117613310384395e-07, "logits/chosen": -2.6453897953033447, "logits/rejected": -2.589632034301758, "logps/chosen": -299.0142822265625, "logps/rejected": -356.302978515625, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -1.3567501306533813, "rewards/margins": 9.359026908874512, "rewards/rejected": -10.715776443481445, "step": 4230 }, { "epoch": 2.19, "learning_rate": 1.5021992732836106e-07, "logits/chosen": -2.639852523803711, "logits/rejected": -2.5899417400360107, "logps/chosen": -299.6291809082031, "logps/rejected": -395.42816162109375, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -1.6487194299697876, "rewards/margins": 9.371026039123535, "rewards/rejected": -11.019745826721191, "step": 4240 }, { "epoch": 2.19, "learning_rate": 1.4926372155287818e-07, "logits/chosen": -2.6065778732299805, "logits/rejected": -2.6227426528930664, "logps/chosen": -243.9075469970703, "logps/rejected": -348.82464599609375, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.697610855102539, "rewards/margins": 8.68234920501709, "rewards/rejected": -10.379960060119629, "step": 4250 }, { "epoch": 2.2, "learning_rate": 1.483075157773953e-07, "logits/chosen": -2.67543363571167, "logits/rejected": -2.552049398422241, "logps/chosen": -297.5004577636719, "logps/rejected": -330.24200439453125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.4021927118301392, "rewards/margins": 7.92205286026001, "rewards/rejected": -9.32424545288086, "step": 4260 }, { "epoch": 2.2, "learning_rate": 1.4735131000191238e-07, "logits/chosen": -2.5814054012298584, "logits/rejected": -2.5440471172332764, "logps/chosen": -256.4645080566406, "logps/rejected": -300.4546203613281, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -1.2562730312347412, "rewards/margins": 8.49797248840332, "rewards/rejected": -9.754246711730957, "step": 4270 }, { "epoch": 2.21, "learning_rate": 1.4639510422642952e-07, "logits/chosen": -2.6232187747955322, "logits/rejected": -2.5715179443359375, "logps/chosen": -258.98822021484375, "logps/rejected": -330.06427001953125, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -1.6914314031600952, "rewards/margins": 8.244222640991211, "rewards/rejected": -9.935653686523438, "step": 4280 }, { "epoch": 2.21, "learning_rate": 1.4543889845094664e-07, "logits/chosen": -2.579191207885742, "logits/rejected": -2.616372585296631, "logps/chosen": -317.7884216308594, "logps/rejected": -402.5799865722656, "loss": 0.0127, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.9717906713485718, "rewards/margins": 10.175740242004395, "rewards/rejected": -11.147529602050781, "step": 4290 }, { "epoch": 2.22, "learning_rate": 1.4448269267546376e-07, "logits/chosen": -2.6141788959503174, "logits/rejected": -2.5500636100769043, "logps/chosen": -340.75775146484375, "logps/rejected": -368.6303405761719, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -0.5387579202651978, "rewards/margins": 9.281224250793457, "rewards/rejected": -9.81998062133789, "step": 4300 }, { "epoch": 2.22, "eval_logits/chosen": -2.6569724082946777, "eval_logits/rejected": -2.587719202041626, "eval_logps/chosen": -326.94036865234375, "eval_logps/rejected": -320.8320617675781, "eval_loss": 0.6244524717330933, "eval_rewards/accuracies": 0.783203125, "eval_rewards/chosen": -3.646446704864502, "eval_rewards/margins": 3.1289076805114746, "eval_rewards/rejected": -6.775354385375977, "eval_runtime": 318.2881, "eval_samples_per_second": 6.284, "eval_steps_per_second": 0.101, "step": 4300 }, { "epoch": 2.23, "learning_rate": 1.4352648689998087e-07, "logits/chosen": -2.681724786758423, "logits/rejected": -2.550607681274414, "logps/chosen": -264.21734619140625, "logps/rejected": -325.14312744140625, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.99748295545578, "rewards/margins": 8.451597213745117, "rewards/rejected": -9.449081420898438, "step": 4310 }, { "epoch": 2.23, "learning_rate": 1.42570281124498e-07, "logits/chosen": -2.649517774581909, "logits/rejected": -2.5733730792999268, "logps/chosen": -311.07476806640625, "logps/rejected": -374.2307434082031, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -0.8575196266174316, "rewards/margins": 8.917535781860352, "rewards/rejected": -9.775054931640625, "step": 4320 }, { "epoch": 2.24, "learning_rate": 1.416140753490151e-07, "logits/chosen": -2.5949928760528564, "logits/rejected": -2.626636266708374, "logps/chosen": -311.9324645996094, "logps/rejected": -381.35845947265625, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.1303819417953491, "rewards/margins": 9.577088356018066, "rewards/rejected": -10.707470893859863, "step": 4330 }, { "epoch": 2.24, "learning_rate": 1.4065786957353222e-07, "logits/chosen": -2.6130571365356445, "logits/rejected": -2.6217548847198486, "logps/chosen": -297.66009521484375, "logps/rejected": -360.7577209472656, "loss": 0.0173, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.5998196601867676, "rewards/margins": 8.538372993469238, "rewards/rejected": -10.138192176818848, "step": 4340 }, { "epoch": 2.25, "learning_rate": 1.3970166379804933e-07, "logits/chosen": -2.6455140113830566, "logits/rejected": -2.546311140060425, "logps/chosen": -331.08526611328125, "logps/rejected": -353.76885986328125, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -1.4843032360076904, "rewards/margins": 8.658735275268555, "rewards/rejected": -10.143038749694824, "step": 4350 }, { "epoch": 2.25, "learning_rate": 1.3874545802256645e-07, "logits/chosen": -2.655974864959717, "logits/rejected": -2.5950520038604736, "logps/chosen": -281.14239501953125, "logps/rejected": -333.76666259765625, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -1.3453155755996704, "rewards/margins": 8.751745223999023, "rewards/rejected": -10.09705924987793, "step": 4360 }, { "epoch": 2.26, "learning_rate": 1.3778925224708357e-07, "logits/chosen": -2.491414785385132, "logits/rejected": -2.498983860015869, "logps/chosen": -276.05633544921875, "logps/rejected": -345.7313232421875, "loss": 0.0114, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.8199462890625, "rewards/margins": 8.57990837097168, "rewards/rejected": -10.399855613708496, "step": 4370 }, { "epoch": 2.26, "learning_rate": 1.3683304647160068e-07, "logits/chosen": -2.4998817443847656, "logits/rejected": -2.451429605484009, "logps/chosen": -267.40478515625, "logps/rejected": -369.1258850097656, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.7423540353775024, "rewards/margins": 9.21500301361084, "rewards/rejected": -10.957357406616211, "step": 4380 }, { "epoch": 2.27, "learning_rate": 1.358768406961178e-07, "logits/chosen": -2.5258095264434814, "logits/rejected": -2.536705732345581, "logps/chosen": -260.1905517578125, "logps/rejected": -353.17822265625, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -1.5383861064910889, "rewards/margins": 9.138838768005371, "rewards/rejected": -10.677224159240723, "step": 4390 }, { "epoch": 2.27, "learning_rate": 1.349206349206349e-07, "logits/chosen": -2.571965456008911, "logits/rejected": -2.5515596866607666, "logps/chosen": -258.63037109375, "logps/rejected": -329.9254455566406, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -2.521057605743408, "rewards/margins": 8.299089431762695, "rewards/rejected": -10.820146560668945, "step": 4400 }, { "epoch": 2.27, "eval_logits/chosen": -2.6114020347595215, "eval_logits/rejected": -2.539966583251953, "eval_logps/chosen": -336.7133483886719, "eval_logps/rejected": -333.55706787109375, "eval_loss": 0.6658715605735779, "eval_rewards/accuracies": 0.783203125, "eval_rewards/chosen": -4.457603454589844, "eval_rewards/margins": 3.3739254474639893, "eval_rewards/rejected": -7.831529140472412, "eval_runtime": 318.3925, "eval_samples_per_second": 6.282, "eval_steps_per_second": 0.101, "step": 4400 }, { "epoch": 2.28, "learning_rate": 1.3396442914515203e-07, "logits/chosen": -2.5480706691741943, "logits/rejected": -2.448054790496826, "logps/chosen": -301.80108642578125, "logps/rejected": -377.3705139160156, "loss": 0.0108, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.1850955486297607, "rewards/margins": 9.278152465820312, "rewards/rejected": -11.463247299194336, "step": 4410 }, { "epoch": 2.28, "learning_rate": 1.3300822336966917e-07, "logits/chosen": -2.606113910675049, "logits/rejected": -2.4512763023376465, "logps/chosen": -324.0838623046875, "logps/rejected": -365.725341796875, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.2610952854156494, "rewards/margins": 10.031002044677734, "rewards/rejected": -11.292096138000488, "step": 4420 }, { "epoch": 2.29, "learning_rate": 1.3205201759418626e-07, "logits/chosen": -2.515292167663574, "logits/rejected": -2.424518346786499, "logps/chosen": -319.5099182128906, "logps/rejected": -349.7605285644531, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -1.2446824312210083, "rewards/margins": 8.991830825805664, "rewards/rejected": -10.236513137817383, "step": 4430 }, { "epoch": 2.29, "learning_rate": 1.3109581181870338e-07, "logits/chosen": -2.6586146354675293, "logits/rejected": -2.597219705581665, "logps/chosen": -329.9248046875, "logps/rejected": -353.97259521484375, "loss": 0.0092, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.7189701199531555, "rewards/margins": 9.446731567382812, "rewards/rejected": -10.165702819824219, "step": 4440 }, { "epoch": 2.3, "learning_rate": 1.301396060432205e-07, "logits/chosen": -2.598280191421509, "logits/rejected": -2.4691569805145264, "logps/chosen": -324.75830078125, "logps/rejected": -359.9110412597656, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -1.60588800907135, "rewards/margins": 9.08741569519043, "rewards/rejected": -10.693304061889648, "step": 4450 }, { "epoch": 2.3, "learning_rate": 1.291834002677376e-07, "logits/chosen": -2.500492572784424, "logits/rejected": -2.471306562423706, "logps/chosen": -287.25732421875, "logps/rejected": -366.6937561035156, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -1.7285501956939697, "rewards/margins": 8.849311828613281, "rewards/rejected": -10.577861785888672, "step": 4460 }, { "epoch": 2.31, "learning_rate": 1.2822719449225472e-07, "logits/chosen": -2.5392227172851562, "logits/rejected": -2.475801467895508, "logps/chosen": -227.740478515625, "logps/rejected": -320.9836730957031, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.9318557977676392, "rewards/margins": 7.946051120758057, "rewards/rejected": -9.877906799316406, "step": 4470 }, { "epoch": 2.31, "learning_rate": 1.2727098871677184e-07, "logits/chosen": -2.6095147132873535, "logits/rejected": -2.5397262573242188, "logps/chosen": -350.12664794921875, "logps/rejected": -400.3258361816406, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.8414974212646484, "rewards/margins": 9.32072925567627, "rewards/rejected": -10.162227630615234, "step": 4480 }, { "epoch": 2.32, "learning_rate": 1.2631478294128898e-07, "logits/chosen": -2.631654739379883, "logits/rejected": -2.5640525817871094, "logps/chosen": -282.1172180175781, "logps/rejected": -358.6563720703125, "loss": 0.0136, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.4470527172088623, "rewards/margins": 8.591032981872559, "rewards/rejected": -10.0380859375, "step": 4490 }, { "epoch": 2.32, "learning_rate": 1.253585771658061e-07, "logits/chosen": -2.5246212482452393, "logits/rejected": -2.528172254562378, "logps/chosen": -256.9081726074219, "logps/rejected": -354.5949401855469, "loss": 0.0193, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.4222064018249512, "rewards/margins": 8.636482238769531, "rewards/rejected": -10.058688163757324, "step": 4500 }, { "epoch": 2.32, "eval_logits/chosen": -2.5622150897979736, "eval_logits/rejected": -2.488191843032837, "eval_logps/chosen": -331.6075439453125, "eval_logps/rejected": -328.6133728027344, "eval_loss": 0.6365222930908203, "eval_rewards/accuracies": 0.783203125, "eval_rewards/chosen": -4.033820629119873, "eval_rewards/margins": 3.3873822689056396, "eval_rewards/rejected": -7.421202659606934, "eval_runtime": 319.5742, "eval_samples_per_second": 6.258, "eval_steps_per_second": 0.1, "step": 4500 }, { "epoch": 2.33, "learning_rate": 1.2440237139032319e-07, "logits/chosen": -2.620438814163208, "logits/rejected": -2.562074899673462, "logps/chosen": -302.58758544921875, "logps/rejected": -339.60516357421875, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -1.4918501377105713, "rewards/margins": 8.977855682373047, "rewards/rejected": -10.469705581665039, "step": 4510 }, { "epoch": 2.33, "learning_rate": 1.234461656148403e-07, "logits/chosen": -2.5544915199279785, "logits/rejected": -2.532634735107422, "logps/chosen": -329.8451232910156, "logps/rejected": -380.8804016113281, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.2753658294677734, "rewards/margins": 9.633660316467285, "rewards/rejected": -10.909025192260742, "step": 4520 }, { "epoch": 2.34, "learning_rate": 1.2248995983935742e-07, "logits/chosen": -2.6059627532958984, "logits/rejected": -2.558316707611084, "logps/chosen": -308.9573669433594, "logps/rejected": -362.646484375, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.3761866092681885, "rewards/margins": 9.357909202575684, "rewards/rejected": -10.734095573425293, "step": 4530 }, { "epoch": 2.34, "learning_rate": 1.2153375406387456e-07, "logits/chosen": -2.5272932052612305, "logits/rejected": -2.5124011039733887, "logps/chosen": -303.6487731933594, "logps/rejected": -392.2650451660156, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.4411789178848267, "rewards/margins": 9.6179780960083, "rewards/rejected": -11.059157371520996, "step": 4540 }, { "epoch": 2.35, "learning_rate": 1.2057754828839165e-07, "logits/chosen": -2.5741665363311768, "logits/rejected": -2.5687332153320312, "logps/chosen": -281.04644775390625, "logps/rejected": -359.9246520996094, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -1.2585711479187012, "rewards/margins": 9.069477081298828, "rewards/rejected": -10.328048706054688, "step": 4550 }, { "epoch": 2.35, "learning_rate": 1.1962134251290876e-07, "logits/chosen": -2.5465121269226074, "logits/rejected": -2.469954490661621, "logps/chosen": -310.76898193359375, "logps/rejected": -331.4693908691406, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -2.262485980987549, "rewards/margins": 8.27159309387207, "rewards/rejected": -10.534078598022461, "step": 4560 }, { "epoch": 2.36, "learning_rate": 1.1866513673742588e-07, "logits/chosen": -2.5708045959472656, "logits/rejected": -2.5364973545074463, "logps/chosen": -312.3697814941406, "logps/rejected": -322.3011474609375, "loss": 0.0195, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.549739956855774, "rewards/margins": 8.245625495910645, "rewards/rejected": -9.795366287231445, "step": 4570 }, { "epoch": 2.36, "learning_rate": 1.1770893096194301e-07, "logits/chosen": -2.474184513092041, "logits/rejected": -2.3878045082092285, "logps/chosen": -300.81451416015625, "logps/rejected": -360.3260803222656, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -1.8275524377822876, "rewards/margins": 9.576362609863281, "rewards/rejected": -11.40391731262207, "step": 4580 }, { "epoch": 2.37, "learning_rate": 1.1675272518646012e-07, "logits/chosen": -2.5650835037231445, "logits/rejected": -2.568039655685425, "logps/chosen": -256.2845153808594, "logps/rejected": -350.01287841796875, "loss": 0.0134, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6010682582855225, "rewards/margins": 8.686319351196289, "rewards/rejected": -10.287389755249023, "step": 4590 }, { "epoch": 2.37, "learning_rate": 1.1579651941097724e-07, "logits/chosen": -2.6248526573181152, "logits/rejected": -2.5677542686462402, "logps/chosen": -331.3634338378906, "logps/rejected": -363.09393310546875, "loss": 0.0141, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.800086259841919, "rewards/margins": 9.287454605102539, "rewards/rejected": -11.087540626525879, "step": 4600 }, { "epoch": 2.37, "eval_logits/chosen": -2.5648860931396484, "eval_logits/rejected": -2.489142656326294, "eval_logps/chosen": -342.25701904296875, "eval_logps/rejected": -342.17694091796875, "eval_loss": 0.6965673565864563, "eval_rewards/accuracies": 0.79296875, "eval_rewards/chosen": -4.917724609375, "eval_rewards/margins": 3.6292529106140137, "eval_rewards/rejected": -8.546977043151855, "eval_runtime": 318.3886, "eval_samples_per_second": 6.282, "eval_steps_per_second": 0.101, "step": 4600 }, { "epoch": 2.38, "learning_rate": 1.1484031363549436e-07, "logits/chosen": -2.498725652694702, "logits/rejected": -2.441718339920044, "logps/chosen": -321.1325988769531, "logps/rejected": -368.9969787597656, "loss": 0.019, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -2.2213826179504395, "rewards/margins": 9.481762886047363, "rewards/rejected": -11.703145980834961, "step": 4610 }, { "epoch": 2.39, "learning_rate": 1.1388410786001147e-07, "logits/chosen": -2.592276096343994, "logits/rejected": -2.589096784591675, "logps/chosen": -274.46533203125, "logps/rejected": -378.339111328125, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -2.0332369804382324, "rewards/margins": 9.035720825195312, "rewards/rejected": -11.068957328796387, "step": 4620 }, { "epoch": 2.39, "learning_rate": 1.1292790208452859e-07, "logits/chosen": -2.5966005325317383, "logits/rejected": -2.531064987182617, "logps/chosen": -245.4945526123047, "logps/rejected": -343.61370849609375, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -1.7675237655639648, "rewards/margins": 8.607636451721191, "rewards/rejected": -10.375160217285156, "step": 4630 }, { "epoch": 2.4, "learning_rate": 1.119716963090457e-07, "logits/chosen": -2.538722515106201, "logits/rejected": -2.528024196624756, "logps/chosen": -264.6753845214844, "logps/rejected": -363.26507568359375, "loss": 0.0123, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.035627603530884, "rewards/margins": 8.781867027282715, "rewards/rejected": -10.81749439239502, "step": 4640 }, { "epoch": 2.4, "learning_rate": 1.1101549053356282e-07, "logits/chosen": -2.559422016143799, "logits/rejected": -2.5168871879577637, "logps/chosen": -305.71759033203125, "logps/rejected": -368.6883239746094, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -1.982983946800232, "rewards/margins": 9.25355339050293, "rewards/rejected": -11.236538887023926, "step": 4650 }, { "epoch": 2.41, "learning_rate": 1.1005928475807993e-07, "logits/chosen": -2.5026497840881348, "logits/rejected": -2.4770092964172363, "logps/chosen": -254.85073852539062, "logps/rejected": -348.93524169921875, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -2.375333309173584, "rewards/margins": 8.748095512390137, "rewards/rejected": -11.123429298400879, "step": 4660 }, { "epoch": 2.41, "learning_rate": 1.0910307898259705e-07, "logits/chosen": -2.4617130756378174, "logits/rejected": -2.469223976135254, "logps/chosen": -272.6137390136719, "logps/rejected": -394.5780029296875, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -1.8366597890853882, "rewards/margins": 9.062443733215332, "rewards/rejected": -10.899103164672852, "step": 4670 }, { "epoch": 2.42, "learning_rate": 1.0814687320711418e-07, "logits/chosen": -2.404995918273926, "logits/rejected": -2.4313814640045166, "logps/chosen": -245.19973754882812, "logps/rejected": -394.5294494628906, "loss": 0.0098, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.027427911758423, "rewards/margins": 10.42660140991211, "rewards/rejected": -12.45403003692627, "step": 4680 }, { "epoch": 2.42, "learning_rate": 1.0719066743163128e-07, "logits/chosen": -2.5163486003875732, "logits/rejected": -2.491466999053955, "logps/chosen": -340.662109375, "logps/rejected": -385.46356201171875, "loss": 0.0181, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.5398316383361816, "rewards/margins": 9.674993515014648, "rewards/rejected": -12.214825630187988, "step": 4690 }, { "epoch": 2.43, "learning_rate": 1.062344616561484e-07, "logits/chosen": -2.5108859539031982, "logits/rejected": -2.4712326526641846, "logps/chosen": -305.0962219238281, "logps/rejected": -389.53076171875, "loss": 0.0126, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.11510968208313, "rewards/margins": 10.006596565246582, "rewards/rejected": -12.121706008911133, "step": 4700 }, { "epoch": 2.43, "eval_logits/chosen": -2.5245583057403564, "eval_logits/rejected": -2.4465231895446777, "eval_logps/chosen": -342.8072509765625, "eval_logps/rejected": -342.7202453613281, "eval_loss": 0.6971668601036072, "eval_rewards/accuracies": 0.794921875, "eval_rewards/chosen": -4.963394641876221, "eval_rewards/margins": 3.6286776065826416, "eval_rewards/rejected": -8.592071533203125, "eval_runtime": 319.4042, "eval_samples_per_second": 6.262, "eval_steps_per_second": 0.1, "step": 4700 }, { "epoch": 2.43, "learning_rate": 1.0527825588066551e-07, "logits/chosen": -2.463685989379883, "logits/rejected": -2.4428679943084717, "logps/chosen": -307.13800048828125, "logps/rejected": -377.75396728515625, "loss": 0.0111, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.8499171733856201, "rewards/margins": 11.113882064819336, "rewards/rejected": -12.963800430297852, "step": 4710 }, { "epoch": 2.44, "learning_rate": 1.0432205010518264e-07, "logits/chosen": -2.4915518760681152, "logits/rejected": -2.4777255058288574, "logps/chosen": -263.48199462890625, "logps/rejected": -392.24163818359375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.8751649856567383, "rewards/margins": 10.635249137878418, "rewards/rejected": -12.510414123535156, "step": 4720 }, { "epoch": 2.44, "learning_rate": 1.0336584432969974e-07, "logits/chosen": -2.4323394298553467, "logits/rejected": -2.3600401878356934, "logps/chosen": -256.75836181640625, "logps/rejected": -352.5439147949219, "loss": 0.0115, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6668592691421509, "rewards/margins": 9.645096778869629, "rewards/rejected": -11.311955451965332, "step": 4730 }, { "epoch": 2.45, "learning_rate": 1.0240963855421686e-07, "logits/chosen": -2.4249470233917236, "logits/rejected": -2.396257162094116, "logps/chosen": -342.30975341796875, "logps/rejected": -376.93597412109375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.862187147140503, "rewards/margins": 9.38375473022461, "rewards/rejected": -11.245942115783691, "step": 4740 }, { "epoch": 2.45, "learning_rate": 1.0145343277873399e-07, "logits/chosen": -2.468728542327881, "logits/rejected": -2.467491865158081, "logps/chosen": -333.68365478515625, "logps/rejected": -390.3247985839844, "loss": 0.0099, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.2577388286590576, "rewards/margins": 9.807296752929688, "rewards/rejected": -12.065035820007324, "step": 4750 }, { "epoch": 2.46, "learning_rate": 1.004972270032511e-07, "logits/chosen": -2.3522987365722656, "logits/rejected": -2.250192165374756, "logps/chosen": -267.41033935546875, "logps/rejected": -322.203857421875, "loss": 0.0111, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.5054080486297607, "rewards/margins": 9.13123893737793, "rewards/rejected": -11.636646270751953, "step": 4760 }, { "epoch": 2.46, "learning_rate": 9.95410212277682e-08, "logits/chosen": -2.497518539428711, "logits/rejected": -2.425494432449341, "logps/chosen": -299.71771240234375, "logps/rejected": -383.6201477050781, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -2.1493115425109863, "rewards/margins": 9.863985061645508, "rewards/rejected": -12.013298034667969, "step": 4770 }, { "epoch": 2.47, "learning_rate": 9.858481545228532e-08, "logits/chosen": -2.5256683826446533, "logits/rejected": -2.3960118293762207, "logps/chosen": -315.375732421875, "logps/rejected": -353.1593017578125, "loss": 0.0098, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.316373348236084, "rewards/margins": 9.379770278930664, "rewards/rejected": -11.696145057678223, "step": 4780 }, { "epoch": 2.47, "learning_rate": 9.762860967680245e-08, "logits/chosen": -2.4524011611938477, "logits/rejected": -2.3414859771728516, "logps/chosen": -316.0469055175781, "logps/rejected": -404.5767517089844, "loss": 0.0092, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.201435089111328, "rewards/margins": 10.279439926147461, "rewards/rejected": -13.480875015258789, "step": 4790 }, { "epoch": 2.48, "learning_rate": 9.667240390131957e-08, "logits/chosen": -2.5935683250427246, "logits/rejected": -2.4577014446258545, "logps/chosen": -325.57415771484375, "logps/rejected": -380.89544677734375, "loss": 0.0092, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.9809951782226562, "rewards/margins": 9.919576644897461, "rewards/rejected": -11.9005708694458, "step": 4800 }, { "epoch": 2.48, "eval_logits/chosen": -2.5738141536712646, "eval_logits/rejected": -2.497675657272339, "eval_logps/chosen": -339.61773681640625, "eval_logps/rejected": -338.5913391113281, "eval_loss": 0.6804133057594299, "eval_rewards/accuracies": 0.783203125, "eval_rewards/chosen": -4.698667526245117, "eval_rewards/margins": 3.550706624984741, "eval_rewards/rejected": -8.249374389648438, "eval_runtime": 318.4418, "eval_samples_per_second": 6.281, "eval_steps_per_second": 0.1, "step": 4800 }, { "epoch": 2.48, "learning_rate": 9.571619812583667e-08, "logits/chosen": -2.4724533557891846, "logits/rejected": -2.453674554824829, "logps/chosen": -302.52337646484375, "logps/rejected": -376.511474609375, "loss": 0.0135, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6175811290740967, "rewards/margins": 9.42072582244873, "rewards/rejected": -11.038309097290039, "step": 4810 }, { "epoch": 2.49, "learning_rate": 9.47599923503538e-08, "logits/chosen": -2.5494678020477295, "logits/rejected": -2.469895839691162, "logps/chosen": -287.8092346191406, "logps/rejected": -370.8468322753906, "loss": 0.0226, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.838513970375061, "rewards/margins": 9.084135055541992, "rewards/rejected": -10.922649383544922, "step": 4820 }, { "epoch": 2.49, "learning_rate": 9.380378657487091e-08, "logits/chosen": -2.588010311126709, "logits/rejected": -2.564948320388794, "logps/chosen": -289.35943603515625, "logps/rejected": -354.00848388671875, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -1.926978349685669, "rewards/margins": 9.108599662780762, "rewards/rejected": -11.035577774047852, "step": 4830 }, { "epoch": 2.5, "learning_rate": 9.284758079938803e-08, "logits/chosen": -2.566575765609741, "logits/rejected": -2.4745187759399414, "logps/chosen": -293.2911071777344, "logps/rejected": -345.7245178222656, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.663283348083496, "rewards/margins": 8.808053970336914, "rewards/rejected": -10.47133731842041, "step": 4840 }, { "epoch": 2.5, "learning_rate": 9.189137502390513e-08, "logits/chosen": -2.5855600833892822, "logits/rejected": -2.5306286811828613, "logps/chosen": -301.97222900390625, "logps/rejected": -422.9330139160156, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -2.4876444339752197, "rewards/margins": 8.726387977600098, "rewards/rejected": -11.214032173156738, "step": 4850 }, { "epoch": 2.51, "learning_rate": 9.093516924842226e-08, "logits/chosen": -2.5165441036224365, "logits/rejected": -2.525033950805664, "logps/chosen": -280.6492919921875, "logps/rejected": -380.94915771484375, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.6511640548706055, "rewards/margins": 9.781908988952637, "rewards/rejected": -11.433073043823242, "step": 4860 }, { "epoch": 2.51, "learning_rate": 8.997896347293938e-08, "logits/chosen": -2.5831665992736816, "logits/rejected": -2.559399127960205, "logps/chosen": -295.9663391113281, "logps/rejected": -402.94769287109375, "loss": 0.0145, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.627557396888733, "rewards/margins": 9.204852104187012, "rewards/rejected": -10.832409858703613, "step": 4870 }, { "epoch": 2.52, "learning_rate": 8.902275769745648e-08, "logits/chosen": -2.487705945968628, "logits/rejected": -2.5085301399230957, "logps/chosen": -263.83251953125, "logps/rejected": -374.3230285644531, "loss": 0.0123, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.8174574375152588, "rewards/margins": 9.712531089782715, "rewards/rejected": -11.529987335205078, "step": 4880 }, { "epoch": 2.52, "learning_rate": 8.806655192197361e-08, "logits/chosen": -2.4663872718811035, "logits/rejected": -2.494250774383545, "logps/chosen": -253.3268585205078, "logps/rejected": -385.09674072265625, "loss": 0.01, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.0438319444656372, "rewards/margins": 10.672999382019043, "rewards/rejected": -11.716833114624023, "step": 4890 }, { "epoch": 2.53, "learning_rate": 8.711034614649072e-08, "logits/chosen": -2.5544614791870117, "logits/rejected": -2.4938340187072754, "logps/chosen": -279.287109375, "logps/rejected": -376.6075439453125, "loss": 0.0232, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.2230172157287598, "rewards/margins": 9.40105152130127, "rewards/rejected": -11.624067306518555, "step": 4900 }, { "epoch": 2.53, "eval_logits/chosen": -2.595872402191162, "eval_logits/rejected": -2.516965866088867, "eval_logps/chosen": -333.196044921875, "eval_logps/rejected": -329.98468017578125, "eval_loss": 0.6464589238166809, "eval_rewards/accuracies": 0.78125, "eval_rewards/chosen": -4.1656646728515625, "eval_rewards/margins": 3.369356393814087, "eval_rewards/rejected": -7.53502082824707, "eval_runtime": 320.304, "eval_samples_per_second": 6.244, "eval_steps_per_second": 0.1, "step": 4900 }, { "epoch": 2.53, "learning_rate": 8.615414037100784e-08, "logits/chosen": -2.5359928607940674, "logits/rejected": -2.4233551025390625, "logps/chosen": -351.11895751953125, "logps/rejected": -356.0968322753906, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.0170377492904663, "rewards/margins": 9.062870979309082, "rewards/rejected": -10.079909324645996, "step": 4910 }, { "epoch": 2.54, "learning_rate": 8.519793459552494e-08, "logits/chosen": -2.5489351749420166, "logits/rejected": -2.443962574005127, "logps/chosen": -273.9066162109375, "logps/rejected": -355.36383056640625, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -1.6547244787216187, "rewards/margins": 9.063847541809082, "rewards/rejected": -10.718571662902832, "step": 4920 }, { "epoch": 2.55, "learning_rate": 8.424172882004207e-08, "logits/chosen": -2.5692667961120605, "logits/rejected": -2.5660996437072754, "logps/chosen": -291.45428466796875, "logps/rejected": -392.21148681640625, "loss": 0.0301, "rewards/accuracies": 1.0, "rewards/chosen": -1.2204498052597046, "rewards/margins": 9.119312286376953, "rewards/rejected": -10.339762687683105, "step": 4930 }, { "epoch": 2.55, "learning_rate": 8.328552304455919e-08, "logits/chosen": -2.5892844200134277, "logits/rejected": -2.531513214111328, "logps/chosen": -261.0030822753906, "logps/rejected": -339.7623291015625, "loss": 0.0133, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.2518980503082275, "rewards/margins": 9.087709426879883, "rewards/rejected": -10.339608192443848, "step": 4940 }, { "epoch": 2.56, "learning_rate": 8.23293172690763e-08, "logits/chosen": -2.587712049484253, "logits/rejected": -2.5394985675811768, "logps/chosen": -292.624267578125, "logps/rejected": -367.47125244140625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.865768313407898, "rewards/margins": 10.39607048034668, "rewards/rejected": -11.261838912963867, "step": 4950 }, { "epoch": 2.56, "learning_rate": 8.137311149359343e-08, "logits/chosen": -2.7129569053649902, "logits/rejected": -2.5451972484588623, "logps/chosen": -343.82586669921875, "logps/rejected": -362.82281494140625, "loss": 0.0042, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.241144061088562, "rewards/margins": 9.968961715698242, "rewards/rejected": -11.21010684967041, "step": 4960 }, { "epoch": 2.57, "learning_rate": 8.041690571811053e-08, "logits/chosen": -2.5828230381011963, "logits/rejected": -2.610429286956787, "logps/chosen": -301.0491638183594, "logps/rejected": -375.3876037597656, "loss": 0.0184, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.9739782810211182, "rewards/margins": 10.063211441040039, "rewards/rejected": -12.037190437316895, "step": 4970 }, { "epoch": 2.57, "learning_rate": 7.946069994262765e-08, "logits/chosen": -2.560863971710205, "logits/rejected": -2.506436824798584, "logps/chosen": -275.01666259765625, "logps/rejected": -338.76593017578125, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.6348235607147217, "rewards/margins": 8.857879638671875, "rewards/rejected": -10.492703437805176, "step": 4980 }, { "epoch": 2.58, "learning_rate": 7.850449416714476e-08, "logits/chosen": -2.614187240600586, "logits/rejected": -2.5568108558654785, "logps/chosen": -317.72760009765625, "logps/rejected": -368.532958984375, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -2.08376145362854, "rewards/margins": 8.628350257873535, "rewards/rejected": -10.712111473083496, "step": 4990 }, { "epoch": 2.58, "learning_rate": 7.754828839166188e-08, "logits/chosen": -2.59089994430542, "logits/rejected": -2.5594866275787354, "logps/chosen": -289.4053039550781, "logps/rejected": -379.1748962402344, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.9431779384613037, "rewards/margins": 9.281224250793457, "rewards/rejected": -11.22440242767334, "step": 5000 }, { "epoch": 2.58, "eval_logits/chosen": -2.604246139526367, "eval_logits/rejected": -2.525014638900757, "eval_logps/chosen": -340.39959716796875, "eval_logps/rejected": -340.3017272949219, "eval_loss": 0.6717602014541626, "eval_rewards/accuracies": 0.791015625, "eval_rewards/chosen": -4.7635602951049805, "eval_rewards/margins": 3.6277759075164795, "eval_rewards/rejected": -8.391336441040039, "eval_runtime": 318.043, "eval_samples_per_second": 6.288, "eval_steps_per_second": 0.101, "step": 5000 }, { "epoch": 2.59, "learning_rate": 7.6592082616179e-08, "logits/chosen": -2.5820934772491455, "logits/rejected": -2.5470452308654785, "logps/chosen": -335.5353088378906, "logps/rejected": -404.00274658203125, "loss": 0.0164, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.3382019996643066, "rewards/margins": 9.839495658874512, "rewards/rejected": -12.177698135375977, "step": 5010 }, { "epoch": 2.59, "learning_rate": 7.563587684069611e-08, "logits/chosen": -2.491746425628662, "logits/rejected": -2.4980692863464355, "logps/chosen": -293.9728698730469, "logps/rejected": -380.77423095703125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.9554068446159363, "rewards/margins": 10.293642044067383, "rewards/rejected": -11.249048233032227, "step": 5020 }, { "epoch": 2.6, "learning_rate": 7.467967106521324e-08, "logits/chosen": -2.5577642917633057, "logits/rejected": -2.498095989227295, "logps/chosen": -264.3507995605469, "logps/rejected": -289.55059814453125, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -1.4294242858886719, "rewards/margins": 8.731189727783203, "rewards/rejected": -10.160614967346191, "step": 5030 }, { "epoch": 2.6, "learning_rate": 7.372346528973034e-08, "logits/chosen": -2.6247453689575195, "logits/rejected": -2.5211758613586426, "logps/chosen": -307.9507141113281, "logps/rejected": -353.5289611816406, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -2.5065128803253174, "rewards/margins": 8.784669876098633, "rewards/rejected": -11.291183471679688, "step": 5040 }, { "epoch": 2.61, "learning_rate": 7.276725951424746e-08, "logits/chosen": -2.4845967292785645, "logits/rejected": -2.469513416290283, "logps/chosen": -277.8036804199219, "logps/rejected": -371.1934509277344, "loss": 0.0149, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.0346837043762207, "rewards/margins": 9.958390235900879, "rewards/rejected": -11.993074417114258, "step": 5050 }, { "epoch": 2.61, "learning_rate": 7.181105373876457e-08, "logits/chosen": -2.5248143672943115, "logits/rejected": -2.431788206100464, "logps/chosen": -282.77972412109375, "logps/rejected": -336.2415771484375, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.7290560007095337, "rewards/margins": 9.321789741516113, "rewards/rejected": -11.050846099853516, "step": 5060 }, { "epoch": 2.62, "learning_rate": 7.08548479632817e-08, "logits/chosen": -2.5115160942077637, "logits/rejected": -2.453545093536377, "logps/chosen": -328.39215087890625, "logps/rejected": -366.82623291015625, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -1.5835685729980469, "rewards/margins": 9.910944938659668, "rewards/rejected": -11.494514465332031, "step": 5070 }, { "epoch": 2.62, "learning_rate": 6.98986421877988e-08, "logits/chosen": -2.5842628479003906, "logits/rejected": -2.508204936981201, "logps/chosen": -309.05963134765625, "logps/rejected": -390.99676513671875, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -1.921203851699829, "rewards/margins": 9.40475082397461, "rewards/rejected": -11.325955390930176, "step": 5080 }, { "epoch": 2.63, "learning_rate": 6.894243641231592e-08, "logits/chosen": -2.4735982418060303, "logits/rejected": -2.429694175720215, "logps/chosen": -280.0932312011719, "logps/rejected": -370.35150146484375, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.730223298072815, "rewards/margins": 10.40523910522461, "rewards/rejected": -12.135461807250977, "step": 5090 }, { "epoch": 2.63, "learning_rate": 6.798623063683305e-08, "logits/chosen": -2.472644329071045, "logits/rejected": -2.3655731678009033, "logps/chosen": -289.5957946777344, "logps/rejected": -376.9017028808594, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.7777118682861328, "rewards/margins": 11.044604301452637, "rewards/rejected": -12.822317123413086, "step": 5100 }, { "epoch": 2.63, "eval_logits/chosen": -2.5831332206726074, "eval_logits/rejected": -2.501971960067749, "eval_logps/chosen": -340.5081481933594, "eval_logps/rejected": -341.53564453125, "eval_loss": 0.6863308548927307, "eval_rewards/accuracies": 0.79296875, "eval_rewards/chosen": -4.77256965637207, "eval_rewards/margins": 3.7211790084838867, "eval_rewards/rejected": -8.493749618530273, "eval_runtime": 320.9983, "eval_samples_per_second": 6.231, "eval_steps_per_second": 0.1, "step": 5100 }, { "epoch": 2.64, "learning_rate": 6.703002486135017e-08, "logits/chosen": -2.486607313156128, "logits/rejected": -2.427100896835327, "logps/chosen": -260.5155029296875, "logps/rejected": -389.38653564453125, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -1.7357391119003296, "rewards/margins": 10.936002731323242, "rewards/rejected": -12.671743392944336, "step": 5110 }, { "epoch": 2.64, "learning_rate": 6.607381908586727e-08, "logits/chosen": -2.5482192039489746, "logits/rejected": -2.427295207977295, "logps/chosen": -301.60650634765625, "logps/rejected": -414.67156982421875, "loss": 0.0166, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6183178424835205, "rewards/margins": 10.478198051452637, "rewards/rejected": -12.096515655517578, "step": 5120 }, { "epoch": 2.65, "learning_rate": 6.511761331038438e-08, "logits/chosen": -2.550570011138916, "logits/rejected": -2.5209152698516846, "logps/chosen": -248.3583984375, "logps/rejected": -350.33837890625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.985918402671814, "rewards/margins": 9.335420608520508, "rewards/rejected": -11.321340560913086, "step": 5130 }, { "epoch": 2.65, "learning_rate": 6.416140753490151e-08, "logits/chosen": -2.5878586769104004, "logits/rejected": -2.528557300567627, "logps/chosen": -335.5371398925781, "logps/rejected": -398.05340576171875, "loss": 0.0131, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.6360633373260498, "rewards/margins": 9.230558395385742, "rewards/rejected": -10.866621017456055, "step": 5140 }, { "epoch": 2.66, "learning_rate": 6.320520175941863e-08, "logits/chosen": -2.4938244819641113, "logits/rejected": -2.401942253112793, "logps/chosen": -277.12017822265625, "logps/rejected": -349.9761657714844, "loss": 0.0073, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.9788345098495483, "rewards/margins": 9.517779350280762, "rewards/rejected": -11.496614456176758, "step": 5150 }, { "epoch": 2.66, "learning_rate": 6.224899598393573e-08, "logits/chosen": -2.6075191497802734, "logits/rejected": -2.545313835144043, "logps/chosen": -336.71490478515625, "logps/rejected": -410.62725830078125, "loss": 0.0106, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6410192251205444, "rewards/margins": 9.674164772033691, "rewards/rejected": -11.315183639526367, "step": 5160 }, { "epoch": 2.67, "learning_rate": 6.129279020845286e-08, "logits/chosen": -2.6234943866729736, "logits/rejected": -2.409572124481201, "logps/chosen": -284.7733154296875, "logps/rejected": -336.3525695800781, "loss": 0.0098, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.7556365728378296, "rewards/margins": 9.638923645019531, "rewards/rejected": -11.394559860229492, "step": 5170 }, { "epoch": 2.67, "learning_rate": 6.033658443296998e-08, "logits/chosen": -2.5837929248809814, "logits/rejected": -2.5347201824188232, "logps/chosen": -313.72296142578125, "logps/rejected": -380.16876220703125, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -2.0695643424987793, "rewards/margins": 9.706546783447266, "rewards/rejected": -11.776111602783203, "step": 5180 }, { "epoch": 2.68, "learning_rate": 5.9380378657487085e-08, "logits/chosen": -2.5611135959625244, "logits/rejected": -2.5496647357940674, "logps/chosen": -313.5663146972656, "logps/rejected": -379.2289123535156, "loss": 0.0053, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.8659000396728516, "rewards/margins": 9.83556842803955, "rewards/rejected": -11.701469421386719, "step": 5190 }, { "epoch": 2.68, "learning_rate": 5.842417288200421e-08, "logits/chosen": -2.580559492111206, "logits/rejected": -2.5791428089141846, "logps/chosen": -327.03900146484375, "logps/rejected": -390.4469909667969, "loss": 0.0127, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.6271005868911743, "rewards/margins": 10.176151275634766, "rewards/rejected": -11.803252220153809, "step": 5200 }, { "epoch": 2.68, "eval_logits/chosen": -2.584197998046875, "eval_logits/rejected": -2.5054285526275635, "eval_logps/chosen": -345.98077392578125, "eval_logps/rejected": -348.4450988769531, "eval_loss": 0.7056116461753845, "eval_rewards/accuracies": 0.791015625, "eval_rewards/chosen": -5.226799011230469, "eval_rewards/margins": 3.8404362201690674, "eval_rewards/rejected": -9.067235946655273, "eval_runtime": 317.9898, "eval_samples_per_second": 6.29, "eval_steps_per_second": 0.101, "step": 5200 }, { "epoch": 2.69, "learning_rate": 5.7467967106521317e-08, "logits/chosen": -2.51556134223938, "logits/rejected": -2.508523464202881, "logps/chosen": -237.5015106201172, "logps/rejected": -370.477783203125, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -2.4638724327087402, "rewards/margins": 9.642496109008789, "rewards/rejected": -12.106368064880371, "step": 5210 }, { "epoch": 2.69, "learning_rate": 5.651176133103844e-08, "logits/chosen": -2.6352040767669678, "logits/rejected": -2.569431781768799, "logps/chosen": -299.9414978027344, "logps/rejected": -404.7259521484375, "loss": 0.0125, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6761621236801147, "rewards/margins": 10.782684326171875, "rewards/rejected": -12.458847045898438, "step": 5220 }, { "epoch": 2.7, "learning_rate": 5.555555555555555e-08, "logits/chosen": -2.51019549369812, "logits/rejected": -2.4396004676818848, "logps/chosen": -287.68170166015625, "logps/rejected": -327.1431884765625, "loss": 0.0091, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.096198558807373, "rewards/margins": 9.429479598999023, "rewards/rejected": -11.525676727294922, "step": 5230 }, { "epoch": 2.71, "learning_rate": 5.459934978007267e-08, "logits/chosen": -2.5686802864074707, "logits/rejected": -2.5236449241638184, "logps/chosen": -306.9776916503906, "logps/rejected": -387.10479736328125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.9488521814346313, "rewards/margins": 10.734182357788086, "rewards/rejected": -12.683034896850586, "step": 5240 }, { "epoch": 2.71, "learning_rate": 5.3643144004589786e-08, "logits/chosen": -2.516479015350342, "logits/rejected": -2.4240593910217285, "logps/chosen": -344.1722412109375, "logps/rejected": -379.5743713378906, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.175539255142212, "rewards/margins": 10.531427383422852, "rewards/rejected": -11.706968307495117, "step": 5250 }, { "epoch": 2.72, "learning_rate": 5.26869382291069e-08, "logits/chosen": -2.5235304832458496, "logits/rejected": -2.4926791191101074, "logps/chosen": -266.27349853515625, "logps/rejected": -336.34808349609375, "loss": 0.0116, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.1276798248291016, "rewards/margins": 9.203332901000977, "rewards/rejected": -11.331011772155762, "step": 5260 }, { "epoch": 2.72, "learning_rate": 5.173073245362402e-08, "logits/chosen": -2.519742965698242, "logits/rejected": -2.4678268432617188, "logps/chosen": -314.0550842285156, "logps/rejected": -392.15216064453125, "loss": 0.0125, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.894164800643921, "rewards/margins": 8.993600845336914, "rewards/rejected": -11.887765884399414, "step": 5270 }, { "epoch": 2.73, "learning_rate": 5.077452667814113e-08, "logits/chosen": -2.563774347305298, "logits/rejected": -2.437164783477783, "logps/chosen": -265.26861572265625, "logps/rejected": -370.3797607421875, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -2.3756794929504395, "rewards/margins": 10.7139310836792, "rewards/rejected": -13.089612007141113, "step": 5280 }, { "epoch": 2.73, "learning_rate": 4.981832090265825e-08, "logits/chosen": -2.5724966526031494, "logits/rejected": -2.516940116882324, "logps/chosen": -274.1853332519531, "logps/rejected": -368.7134094238281, "loss": 0.0075, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.894335389137268, "rewards/margins": 10.603357315063477, "rewards/rejected": -12.49769401550293, "step": 5290 }, { "epoch": 2.74, "learning_rate": 4.8862115127175364e-08, "logits/chosen": -2.6790122985839844, "logits/rejected": -2.4961013793945312, "logps/chosen": -323.95361328125, "logps/rejected": -380.634765625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -2.549590587615967, "rewards/margins": 9.533754348754883, "rewards/rejected": -12.083345413208008, "step": 5300 }, { "epoch": 2.74, "eval_logits/chosen": -2.624846935272217, "eval_logits/rejected": -2.5487570762634277, "eval_logps/chosen": -341.4156799316406, "eval_logps/rejected": -343.13934326171875, "eval_loss": 0.6885589361190796, "eval_rewards/accuracies": 0.794921875, "eval_rewards/chosen": -4.847894191741943, "eval_rewards/margins": 3.7789649963378906, "eval_rewards/rejected": -8.626859664916992, "eval_runtime": 318.9446, "eval_samples_per_second": 6.271, "eval_steps_per_second": 0.1, "step": 5300 }, { "epoch": 2.74, "learning_rate": 4.790590935169248e-08, "logits/chosen": -2.5632383823394775, "logits/rejected": -2.5772087574005127, "logps/chosen": -275.593994140625, "logps/rejected": -451.9659118652344, "loss": 0.0124, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.454101085662842, "rewards/margins": 9.966111183166504, "rewards/rejected": -12.420210838317871, "step": 5310 }, { "epoch": 2.75, "learning_rate": 4.69497035762096e-08, "logits/chosen": -2.557803153991699, "logits/rejected": -2.4479947090148926, "logps/chosen": -275.3583679199219, "logps/rejected": -362.67803955078125, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -2.999619483947754, "rewards/margins": 9.817155838012695, "rewards/rejected": -12.81677532196045, "step": 5320 }, { "epoch": 2.75, "learning_rate": 4.599349780072671e-08, "logits/chosen": -2.5632691383361816, "logits/rejected": -2.4367032051086426, "logps/chosen": -311.99871826171875, "logps/rejected": -348.1423645019531, "loss": 0.011, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.8917236328125, "rewards/margins": 9.827619552612305, "rewards/rejected": -11.719342231750488, "step": 5330 }, { "epoch": 2.76, "learning_rate": 4.5037292025243834e-08, "logits/chosen": -2.559521198272705, "logits/rejected": -2.5648350715637207, "logps/chosen": -316.2248840332031, "logps/rejected": -407.82940673828125, "loss": 0.0084, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.9234721660614014, "rewards/margins": 9.798246383666992, "rewards/rejected": -11.721719741821289, "step": 5340 }, { "epoch": 2.76, "learning_rate": 4.408108624976094e-08, "logits/chosen": -2.657686233520508, "logits/rejected": -2.598619222640991, "logps/chosen": -292.7362365722656, "logps/rejected": -408.7117004394531, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -1.6331180334091187, "rewards/margins": 11.511256217956543, "rewards/rejected": -13.144373893737793, "step": 5350 }, { "epoch": 2.77, "learning_rate": 4.3124880474278065e-08, "logits/chosen": -2.631532907485962, "logits/rejected": -2.6100709438323975, "logps/chosen": -270.24896240234375, "logps/rejected": -350.3780212402344, "loss": 0.0217, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.8933576345443726, "rewards/margins": 9.350987434387207, "rewards/rejected": -11.244342803955078, "step": 5360 }, { "epoch": 2.77, "learning_rate": 4.2168674698795174e-08, "logits/chosen": -2.572918176651001, "logits/rejected": -2.500176191329956, "logps/chosen": -265.62957763671875, "logps/rejected": -385.9189453125, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.0787353515625, "rewards/margins": 10.076845169067383, "rewards/rejected": -12.155580520629883, "step": 5370 }, { "epoch": 2.78, "learning_rate": 4.1212468923312296e-08, "logits/chosen": -2.4669809341430664, "logits/rejected": -2.4324145317077637, "logps/chosen": -298.2072448730469, "logps/rejected": -385.07415771484375, "loss": 0.014, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.9495398998260498, "rewards/margins": 9.755758285522461, "rewards/rejected": -11.70529842376709, "step": 5380 }, { "epoch": 2.78, "learning_rate": 4.025626314782941e-08, "logits/chosen": -2.4328222274780273, "logits/rejected": -2.4482827186584473, "logps/chosen": -290.0146789550781, "logps/rejected": -357.91998291015625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.8055553436279297, "rewards/margins": 9.906686782836914, "rewards/rejected": -11.71224308013916, "step": 5390 }, { "epoch": 2.79, "learning_rate": 3.930005737234653e-08, "logits/chosen": -2.6099352836608887, "logits/rejected": -2.5663235187530518, "logps/chosen": -269.28533935546875, "logps/rejected": -321.4599304199219, "loss": 0.0132, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.9380912780761719, "rewards/margins": 9.027185440063477, "rewards/rejected": -10.965276718139648, "step": 5400 }, { "epoch": 2.79, "eval_logits/chosen": -2.6259608268737793, "eval_logits/rejected": -2.550081253051758, "eval_logps/chosen": -339.6431579589844, "eval_logps/rejected": -340.4170227050781, "eval_loss": 0.6838744282722473, "eval_rewards/accuracies": 0.79296875, "eval_rewards/chosen": -4.700774669647217, "eval_rewards/margins": 3.7001309394836426, "eval_rewards/rejected": -8.40090560913086, "eval_runtime": 321.2762, "eval_samples_per_second": 6.225, "eval_steps_per_second": 0.1, "step": 5400 }, { "epoch": 2.79, "learning_rate": 3.8343851596863644e-08, "logits/chosen": -2.603602170944214, "logits/rejected": -2.5159850120544434, "logps/chosen": -262.61260986328125, "logps/rejected": -332.5048828125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.4028029441833496, "rewards/margins": 10.283886909484863, "rewards/rejected": -11.686689376831055, "step": 5410 }, { "epoch": 2.8, "learning_rate": 3.738764582138076e-08, "logits/chosen": -2.5683610439300537, "logits/rejected": -2.4774279594421387, "logps/chosen": -337.24591064453125, "logps/rejected": -391.3978576660156, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -1.9349607229232788, "rewards/margins": 9.81804370880127, "rewards/rejected": -11.75300407409668, "step": 5420 }, { "epoch": 2.8, "learning_rate": 3.6431440045897875e-08, "logits/chosen": -2.541027307510376, "logits/rejected": -2.482694387435913, "logps/chosen": -287.49285888671875, "logps/rejected": -396.57135009765625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.5601873397827148, "rewards/margins": 10.287034034729004, "rewards/rejected": -11.847220420837402, "step": 5430 }, { "epoch": 2.81, "learning_rate": 3.547523427041499e-08, "logits/chosen": -2.658437490463257, "logits/rejected": -2.630117177963257, "logps/chosen": -300.69720458984375, "logps/rejected": -413.85137939453125, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.4356235265731812, "rewards/margins": 10.68783187866211, "rewards/rejected": -12.123456001281738, "step": 5440 }, { "epoch": 2.81, "learning_rate": 3.4519028494932106e-08, "logits/chosen": -2.5041563510894775, "logits/rejected": -2.4459660053253174, "logps/chosen": -304.7166748046875, "logps/rejected": -392.0087890625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.8776581287384033, "rewards/margins": 10.753235816955566, "rewards/rejected": -12.630894660949707, "step": 5450 }, { "epoch": 2.82, "learning_rate": 3.356282271944923e-08, "logits/chosen": -2.5902135372161865, "logits/rejected": -2.6090686321258545, "logps/chosen": -273.72021484375, "logps/rejected": -397.5484313964844, "loss": 0.0128, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.7490066289901733, "rewards/margins": 10.411901473999023, "rewards/rejected": -12.160909652709961, "step": 5460 }, { "epoch": 2.82, "learning_rate": 3.260661694396634e-08, "logits/chosen": -2.690364360809326, "logits/rejected": -2.5692737102508545, "logps/chosen": -343.971435546875, "logps/rejected": -362.0664367675781, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.4323015213012695, "rewards/margins": 10.527503967285156, "rewards/rejected": -11.95980453491211, "step": 5470 }, { "epoch": 2.83, "learning_rate": 3.165041116848346e-08, "logits/chosen": -2.5867881774902344, "logits/rejected": -2.5974814891815186, "logps/chosen": -286.1340026855469, "logps/rejected": -450.23468017578125, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -2.1082751750946045, "rewards/margins": 10.532219886779785, "rewards/rejected": -12.640494346618652, "step": 5480 }, { "epoch": 2.83, "learning_rate": 3.0694205393000576e-08, "logits/chosen": -2.5749049186706543, "logits/rejected": -2.5124335289001465, "logps/chosen": -271.21044921875, "logps/rejected": -367.52923583984375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.0981590747833252, "rewards/margins": 10.481293678283691, "rewards/rejected": -11.579452514648438, "step": 5490 }, { "epoch": 2.84, "learning_rate": 2.9737999617517688e-08, "logits/chosen": -2.5593762397766113, "logits/rejected": -2.4909842014312744, "logps/chosen": -312.36419677734375, "logps/rejected": -357.9867858886719, "loss": 0.0103, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.9348328113555908, "rewards/margins": 9.653051376342773, "rewards/rejected": -11.587884902954102, "step": 5500 }, { "epoch": 2.84, "eval_logits/chosen": -2.616666555404663, "eval_logits/rejected": -2.5404672622680664, "eval_logps/chosen": -341.28814697265625, "eval_logps/rejected": -342.4482727050781, "eval_loss": 0.687979519367218, "eval_rewards/accuracies": 0.796875, "eval_rewards/chosen": -4.837313652038574, "eval_rewards/margins": 3.7321832180023193, "eval_rewards/rejected": -8.569496154785156, "eval_runtime": 317.1461, "eval_samples_per_second": 6.306, "eval_steps_per_second": 0.101, "step": 5500 }, { "epoch": 2.84, "learning_rate": 2.8781793842034804e-08, "logits/chosen": -2.4844002723693848, "logits/rejected": -2.3355154991149902, "logps/chosen": -266.91778564453125, "logps/rejected": -350.5770263671875, "loss": 0.0147, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.337966203689575, "rewards/margins": 9.253957748413086, "rewards/rejected": -11.591924667358398, "step": 5510 }, { "epoch": 2.85, "learning_rate": 2.782558806655192e-08, "logits/chosen": -2.5024611949920654, "logits/rejected": -2.50520920753479, "logps/chosen": -289.66229248046875, "logps/rejected": -392.1813659667969, "loss": 0.014, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -2.412630796432495, "rewards/margins": 9.504823684692383, "rewards/rejected": -11.917454719543457, "step": 5520 }, { "epoch": 2.85, "learning_rate": 2.6869382291069035e-08, "logits/chosen": -2.6169703006744385, "logits/rejected": -2.5267577171325684, "logps/chosen": -310.38531494140625, "logps/rejected": -389.1824645996094, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.7938950061798096, "rewards/margins": 10.015830993652344, "rewards/rejected": -11.809727668762207, "step": 5530 }, { "epoch": 2.86, "learning_rate": 2.591317651558615e-08, "logits/chosen": -2.5246803760528564, "logits/rejected": -2.5527288913726807, "logps/chosen": -277.68511962890625, "logps/rejected": -368.65985107421875, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -2.1543831825256348, "rewards/margins": 9.553988456726074, "rewards/rejected": -11.708372116088867, "step": 5540 }, { "epoch": 2.87, "learning_rate": 2.4956970740103267e-08, "logits/chosen": -2.5779435634613037, "logits/rejected": -2.516773223876953, "logps/chosen": -292.4654541015625, "logps/rejected": -410.75274658203125, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -2.225564956665039, "rewards/margins": 10.349849700927734, "rewards/rejected": -12.575413703918457, "step": 5550 }, { "epoch": 2.87, "learning_rate": 2.4000764964620386e-08, "logits/chosen": -2.5916004180908203, "logits/rejected": -2.5483925342559814, "logps/chosen": -364.6165771484375, "logps/rejected": -410.914306640625, "loss": 0.0157, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.3061633110046387, "rewards/margins": 9.430337905883789, "rewards/rejected": -11.73650074005127, "step": 5560 }, { "epoch": 2.88, "learning_rate": 2.30445591891375e-08, "logits/chosen": -2.642362117767334, "logits/rejected": -2.5382802486419678, "logps/chosen": -340.33660888671875, "logps/rejected": -395.65447998046875, "loss": 0.0088, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.6563146114349365, "rewards/margins": 10.552990913391113, "rewards/rejected": -12.209306716918945, "step": 5570 }, { "epoch": 2.88, "learning_rate": 2.2088353413654617e-08, "logits/chosen": -2.507833242416382, "logits/rejected": -2.4024722576141357, "logps/chosen": -254.4896240234375, "logps/rejected": -333.20367431640625, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -2.2360875606536865, "rewards/margins": 10.094629287719727, "rewards/rejected": -12.330717086791992, "step": 5580 }, { "epoch": 2.89, "learning_rate": 2.1132147638171733e-08, "logits/chosen": -2.5680835247039795, "logits/rejected": -2.5192208290100098, "logps/chosen": -328.0243225097656, "logps/rejected": -363.34112548828125, "loss": 0.0105, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.0092064142227173, "rewards/margins": 10.329462051391602, "rewards/rejected": -11.338667869567871, "step": 5590 }, { "epoch": 2.89, "learning_rate": 2.0175941862688848e-08, "logits/chosen": -2.535581588745117, "logits/rejected": -2.5435423851013184, "logps/chosen": -267.56866455078125, "logps/rejected": -360.1643371582031, "loss": 0.0105, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.0220346450805664, "rewards/margins": 10.354208946228027, "rewards/rejected": -12.376241683959961, "step": 5600 }, { "epoch": 2.89, "eval_logits/chosen": -2.6135942935943604, "eval_logits/rejected": -2.5382933616638184, "eval_logps/chosen": -343.89697265625, "eval_logps/rejected": -345.81622314453125, "eval_loss": 0.6967966556549072, "eval_rewards/accuracies": 0.78515625, "eval_rewards/chosen": -5.0538434982299805, "eval_rewards/margins": 3.7951948642730713, "eval_rewards/rejected": -8.849039077758789, "eval_runtime": 317.8394, "eval_samples_per_second": 6.292, "eval_steps_per_second": 0.101, "step": 5600 }, { "epoch": 2.9, "learning_rate": 1.9219736087205964e-08, "logits/chosen": -2.581259250640869, "logits/rejected": -2.4780125617980957, "logps/chosen": -269.8539733886719, "logps/rejected": -362.7257385253906, "loss": 0.015, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.081495761871338, "rewards/margins": 9.792734146118164, "rewards/rejected": -11.874229431152344, "step": 5610 }, { "epoch": 2.9, "learning_rate": 1.826353031172308e-08, "logits/chosen": -2.5501909255981445, "logits/rejected": -2.459183931350708, "logps/chosen": -325.034912109375, "logps/rejected": -462.1394958496094, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.5985212326049805, "rewards/margins": 10.827996253967285, "rewards/rejected": -12.42651653289795, "step": 5620 }, { "epoch": 2.91, "learning_rate": 1.73073245362402e-08, "logits/chosen": -2.5880441665649414, "logits/rejected": -2.496222972869873, "logps/chosen": -308.3292236328125, "logps/rejected": -356.22503662109375, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -2.4536099433898926, "rewards/margins": 9.852060317993164, "rewards/rejected": -12.305669784545898, "step": 5630 }, { "epoch": 2.91, "learning_rate": 1.6351118760757314e-08, "logits/chosen": -2.5573298931121826, "logits/rejected": -2.511517286300659, "logps/chosen": -290.1534423828125, "logps/rejected": -355.4990234375, "loss": 0.0164, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.0956108570098877, "rewards/margins": 9.225427627563477, "rewards/rejected": -11.321039199829102, "step": 5640 }, { "epoch": 2.92, "learning_rate": 1.539491298527443e-08, "logits/chosen": -2.57816481590271, "logits/rejected": -2.5746302604675293, "logps/chosen": -256.2776794433594, "logps/rejected": -378.756591796875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.5643820762634277, "rewards/margins": 9.850593566894531, "rewards/rejected": -12.414976119995117, "step": 5650 }, { "epoch": 2.92, "learning_rate": 1.4438707209791546e-08, "logits/chosen": -2.5814177989959717, "logits/rejected": -2.4435269832611084, "logps/chosen": -314.55865478515625, "logps/rejected": -351.59674072265625, "loss": 0.0154, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.09812593460083, "rewards/margins": 9.633223533630371, "rewards/rejected": -11.73134994506836, "step": 5660 }, { "epoch": 2.93, "learning_rate": 1.3482501434308661e-08, "logits/chosen": -2.564723253250122, "logits/rejected": -2.500065565109253, "logps/chosen": -304.1372985839844, "logps/rejected": -337.75555419921875, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -2.1323633193969727, "rewards/margins": 9.086587905883789, "rewards/rejected": -11.218950271606445, "step": 5670 }, { "epoch": 2.93, "learning_rate": 1.2526295658825777e-08, "logits/chosen": -2.6547515392303467, "logits/rejected": -2.6340692043304443, "logps/chosen": -312.80859375, "logps/rejected": -405.7166748046875, "loss": 0.0136, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.106706142425537, "rewards/margins": 10.638137817382812, "rewards/rejected": -12.744844436645508, "step": 5680 }, { "epoch": 2.94, "learning_rate": 1.1570089883342895e-08, "logits/chosen": -2.5111231803894043, "logits/rejected": -2.4250659942626953, "logps/chosen": -321.18170166015625, "logps/rejected": -433.1678771972656, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -2.3372254371643066, "rewards/margins": 10.568437576293945, "rewards/rejected": -12.905660629272461, "step": 5690 }, { "epoch": 2.94, "learning_rate": 1.061388410786001e-08, "logits/chosen": -2.5413479804992676, "logits/rejected": -2.533717632293701, "logps/chosen": -291.4538269042969, "logps/rejected": -345.87432861328125, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.968036413192749, "rewards/margins": 9.563445091247559, "rewards/rejected": -11.53148078918457, "step": 5700 }, { "epoch": 2.94, "eval_logits/chosen": -2.6125216484069824, "eval_logits/rejected": -2.5372533798217773, "eval_logps/chosen": -344.4386901855469, "eval_logps/rejected": -346.6778869628906, "eval_loss": 0.6993198394775391, "eval_rewards/accuracies": 0.787109375, "eval_rewards/chosen": -5.098803520202637, "eval_rewards/margins": 3.821753978729248, "eval_rewards/rejected": -8.920557022094727, "eval_runtime": 319.2414, "eval_samples_per_second": 6.265, "eval_steps_per_second": 0.1, "step": 5700 }, { "epoch": 2.95, "learning_rate": 9.657678332377126e-09, "logits/chosen": -2.5063250064849854, "logits/rejected": -2.446988582611084, "logps/chosen": -292.32879638671875, "logps/rejected": -346.18853759765625, "loss": 0.0088, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.7591307163238525, "rewards/margins": 9.89818286895752, "rewards/rejected": -11.657312393188477, "step": 5710 }, { "epoch": 2.95, "learning_rate": 8.701472556894243e-09, "logits/chosen": -2.560904026031494, "logits/rejected": -2.402731418609619, "logps/chosen": -277.12921142578125, "logps/rejected": -398.18475341796875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -2.768036365509033, "rewards/margins": 10.488271713256836, "rewards/rejected": -13.256306648254395, "step": 5720 }, { "epoch": 2.96, "learning_rate": 7.745266781411359e-09, "logits/chosen": -2.578120708465576, "logits/rejected": -2.5930380821228027, "logps/chosen": -325.9931640625, "logps/rejected": -406.7197265625, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -2.0759735107421875, "rewards/margins": 9.622904777526855, "rewards/rejected": -11.698877334594727, "step": 5730 }, { "epoch": 2.96, "learning_rate": 6.7890610059284754e-09, "logits/chosen": -2.5674290657043457, "logits/rejected": -2.45582914352417, "logps/chosen": -253.65576171875, "logps/rejected": -311.51971435546875, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -2.083470344543457, "rewards/margins": 8.793237686157227, "rewards/rejected": -10.876708984375, "step": 5740 }, { "epoch": 2.97, "learning_rate": 5.832855230445592e-09, "logits/chosen": -2.4950573444366455, "logits/rejected": -2.449819564819336, "logps/chosen": -297.53924560546875, "logps/rejected": -366.6745910644531, "loss": 0.0099, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.9202989339828491, "rewards/margins": 10.459419250488281, "rewards/rejected": -12.379718780517578, "step": 5750 }, { "epoch": 2.97, "learning_rate": 4.8766494549627085e-09, "logits/chosen": -2.580498218536377, "logits/rejected": -2.4627091884613037, "logps/chosen": -291.70330810546875, "logps/rejected": -376.4898376464844, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.145448088645935, "rewards/margins": 10.998189926147461, "rewards/rejected": -12.14363956451416, "step": 5760 }, { "epoch": 2.98, "learning_rate": 3.920443679479824e-09, "logits/chosen": -2.664374589920044, "logits/rejected": -2.5885655879974365, "logps/chosen": -333.6195373535156, "logps/rejected": -371.6888122558594, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -2.0988895893096924, "rewards/margins": 10.173905372619629, "rewards/rejected": -12.272794723510742, "step": 5770 }, { "epoch": 2.98, "learning_rate": 2.96423790399694e-09, "logits/chosen": -2.6213550567626953, "logits/rejected": -2.570923089981079, "logps/chosen": -306.8786926269531, "logps/rejected": -386.19036865234375, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -2.2202601432800293, "rewards/margins": 9.088173866271973, "rewards/rejected": -11.308435440063477, "step": 5780 }, { "epoch": 2.99, "learning_rate": 2.008032128514056e-09, "logits/chosen": -2.589977264404297, "logits/rejected": -2.5663177967071533, "logps/chosen": -305.0616455078125, "logps/rejected": -388.3238220214844, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -2.246802806854248, "rewards/margins": 9.70131778717041, "rewards/rejected": -11.948119163513184, "step": 5790 }, { "epoch": 2.99, "learning_rate": 1.0518263530311723e-09, "logits/chosen": -2.6121764183044434, "logits/rejected": -2.5459160804748535, "logps/chosen": -254.16561889648438, "logps/rejected": -375.0100402832031, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.293713092803955, "rewards/margins": 9.469767570495605, "rewards/rejected": -11.763479232788086, "step": 5800 }, { "epoch": 2.99, "eval_logits/chosen": -2.6124672889709473, "eval_logits/rejected": -2.5373010635375977, "eval_logps/chosen": -343.6734313964844, "eval_logps/rejected": -345.7339172363281, "eval_loss": 0.6975029110908508, "eval_rewards/accuracies": 0.794921875, "eval_rewards/chosen": -5.0352888107299805, "eval_rewards/margins": 3.8069207668304443, "eval_rewards/rejected": -8.842209815979004, "eval_runtime": 318.539, "eval_samples_per_second": 6.279, "eval_steps_per_second": 0.1, "step": 5800 }, { "epoch": 3.0, "learning_rate": 9.562057754828839e-11, "logits/chosen": -2.5696330070495605, "logits/rejected": -2.5527381896972656, "logps/chosen": -258.56689453125, "logps/rejected": -373.21209716796875, "loss": 0.0133, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.265561580657959, "rewards/margins": 8.861224174499512, "rewards/rejected": -11.126786231994629, "step": 5810 }, { "epoch": 3.0, "step": 5811, "total_flos": 0.0, "train_loss": 0.20528750838680607, "train_runtime": 83798.7665, "train_samples_per_second": 2.218, "train_steps_per_second": 0.069 } ], "logging_steps": 10, "max_steps": 5811, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }