{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 1000, "global_step": 23238, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.1514629948364888e-10, "logits/chosen": -3.2950148582458496, "logits/rejected": -3.335197925567627, "logps/chosen": -347.2623596191406, "logps/rejected": -406.54669189453125, "loss": 0.787, "rewards/accuracies": 0.5, "rewards/chosen": -0.9639298915863037, "rewards/margins": 0.012924805283546448, "rewards/rejected": -0.9768546223640442, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.1514629948364885e-09, "logits/chosen": -2.924586296081543, "logits/rejected": -2.854886770248413, "logps/chosen": -544.9013671875, "logps/rejected": -457.9469299316406, "loss": 1.1164, "rewards/accuracies": 0.4722222089767456, "rewards/chosen": -1.6463477611541748, "rewards/margins": -0.24253013730049133, "rewards/rejected": -1.4038176536560059, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.302925989672977e-09, "logits/chosen": -2.90858793258667, "logits/rejected": -2.8465538024902344, "logps/chosen": -596.8198852539062, "logps/rejected": -498.96478271484375, "loss": 1.0907, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -1.706392526626587, "rewards/margins": -0.3493468463420868, "rewards/rejected": -1.3570456504821777, "step": 20 }, { "epoch": 0.0, "learning_rate": 6.454388984509466e-09, "logits/chosen": -2.7887256145477295, "logits/rejected": -2.7121434211730957, "logps/chosen": -568.2722778320312, "logps/rejected": -399.5184631347656, "loss": 1.0928, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -1.506615161895752, "rewards/margins": -0.3349986672401428, "rewards/rejected": -1.1716164350509644, "step": 30 }, { "epoch": 0.01, "learning_rate": 8.605851979345954e-09, "logits/chosen": -2.8944332599639893, "logits/rejected": -2.7077884674072266, "logps/chosen": -530.0511474609375, "logps/rejected": -378.41973876953125, "loss": 1.177, "rewards/accuracies": 0.3125, "rewards/chosen": -1.5457532405853271, "rewards/margins": -0.5084830522537231, "rewards/rejected": -1.0372703075408936, "step": 40 }, { "epoch": 0.01, "learning_rate": 1.0757314974182444e-08, "logits/chosen": -2.8369626998901367, "logits/rejected": -2.765065908432007, "logps/chosen": -554.7618408203125, "logps/rejected": -461.94805908203125, "loss": 1.2406, "rewards/accuracies": 0.4375, "rewards/chosen": -1.818738579750061, "rewards/margins": -0.5370584726333618, "rewards/rejected": -1.2816802263259888, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.2908777969018932e-08, "logits/chosen": -2.830653667449951, "logits/rejected": -2.714463472366333, "logps/chosen": -595.2697143554688, "logps/rejected": -394.4006652832031, "loss": 1.3656, "rewards/accuracies": 0.375, "rewards/chosen": -1.7163364887237549, "rewards/margins": -0.7124733924865723, "rewards/rejected": -1.003862977027893, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.506024096385542e-08, "logits/chosen": -2.881875991821289, "logits/rejected": -2.7266342639923096, "logps/chosen": -551.6770629882812, "logps/rejected": -392.6590881347656, "loss": 1.1475, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -1.5037343502044678, "rewards/margins": -0.40448737144470215, "rewards/rejected": -1.0992470979690552, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.7211703958691908e-08, "logits/chosen": -2.920945405960083, "logits/rejected": -2.8154220581054688, "logps/chosen": -558.3458251953125, "logps/rejected": -424.73834228515625, "loss": 0.9299, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -1.3890039920806885, "rewards/margins": -0.09523806720972061, "rewards/rejected": -1.2937657833099365, "step": 80 }, { "epoch": 0.01, "learning_rate": 1.9363166953528397e-08, "logits/chosen": -2.8319311141967773, "logits/rejected": -2.7050387859344482, "logps/chosen": -532.0145263671875, "logps/rejected": -370.8155212402344, "loss": 1.0839, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -1.2564115524291992, "rewards/margins": -0.1931476891040802, "rewards/rejected": -1.0632636547088623, "step": 90 }, { "epoch": 0.01, "learning_rate": 2.1514629948364887e-08, "logits/chosen": -2.9122109413146973, "logits/rejected": -2.8262534141540527, "logps/chosen": -509.1393127441406, "logps/rejected": -428.3275451660156, "loss": 1.05, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -1.4573386907577515, "rewards/margins": -0.27454182505607605, "rewards/rejected": -1.1827967166900635, "step": 100 }, { "epoch": 0.01, "learning_rate": 2.3666092943201377e-08, "logits/chosen": -2.9896833896636963, "logits/rejected": -2.9096086025238037, "logps/chosen": -499.1744079589844, "logps/rejected": -421.06884765625, "loss": 1.0822, "rewards/accuracies": 0.4375, "rewards/chosen": -1.4199817180633545, "rewards/margins": -0.2942788600921631, "rewards/rejected": -1.125702977180481, "step": 110 }, { "epoch": 0.02, "learning_rate": 2.5817555938037863e-08, "logits/chosen": -2.777085065841675, "logits/rejected": -2.700615406036377, "logps/chosen": -486.452880859375, "logps/rejected": -381.12518310546875, "loss": 1.0246, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -1.3233524560928345, "rewards/margins": -0.24365441501140594, "rewards/rejected": -1.0796979665756226, "step": 120 }, { "epoch": 0.02, "learning_rate": 2.7969018932874356e-08, "logits/chosen": -2.9161570072174072, "logits/rejected": -2.702455997467041, "logps/chosen": -545.6387329101562, "logps/rejected": -349.59088134765625, "loss": 1.3111, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -1.681069016456604, "rewards/margins": -0.6724443435668945, "rewards/rejected": -1.0086246728897095, "step": 130 }, { "epoch": 0.02, "learning_rate": 3.012048192771084e-08, "logits/chosen": -2.845571994781494, "logits/rejected": -2.9078187942504883, "logps/chosen": -443.4098205566406, "logps/rejected": -460.6910095214844, "loss": 0.9821, "rewards/accuracies": 0.4375, "rewards/chosen": -1.3453633785247803, "rewards/margins": -0.13185834884643555, "rewards/rejected": -1.2135050296783447, "step": 140 }, { "epoch": 0.02, "learning_rate": 3.227194492254733e-08, "logits/chosen": -2.828176736831665, "logits/rejected": -2.6844418048858643, "logps/chosen": -602.3214111328125, "logps/rejected": -415.995361328125, "loss": 1.2102, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -1.6732925176620483, "rewards/margins": -0.4834602475166321, "rewards/rejected": -1.1898324489593506, "step": 150 }, { "epoch": 0.02, "learning_rate": 3.4423407917383815e-08, "logits/chosen": -2.8217580318450928, "logits/rejected": -2.827690601348877, "logps/chosen": -620.5433349609375, "logps/rejected": -529.1263427734375, "loss": 1.1004, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -1.8277528285980225, "rewards/margins": -0.1952226161956787, "rewards/rejected": -1.6325302124023438, "step": 160 }, { "epoch": 0.02, "learning_rate": 3.6574870912220305e-08, "logits/chosen": -2.965924024581909, "logits/rejected": -2.9315109252929688, "logps/chosen": -510.6612854003906, "logps/rejected": -433.0647888183594, "loss": 1.0859, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -1.5090383291244507, "rewards/margins": -0.35547584295272827, "rewards/rejected": -1.153562307357788, "step": 170 }, { "epoch": 0.02, "learning_rate": 3.8726333907056795e-08, "logits/chosen": -2.918009042739868, "logits/rejected": -2.8664751052856445, "logps/chosen": -589.7950439453125, "logps/rejected": -468.0103454589844, "loss": 0.924, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -1.4110956192016602, "rewards/margins": -0.11761031299829483, "rewards/rejected": -1.293485403060913, "step": 180 }, { "epoch": 0.02, "learning_rate": 4.087779690189329e-08, "logits/chosen": -2.845302104949951, "logits/rejected": -2.7780838012695312, "logps/chosen": -519.7857666015625, "logps/rejected": -426.0814514160156, "loss": 1.4072, "rewards/accuracies": 0.32499998807907104, "rewards/chosen": -1.7318992614746094, "rewards/margins": -0.7683115601539612, "rewards/rejected": -0.963587760925293, "step": 190 }, { "epoch": 0.03, "learning_rate": 4.3029259896729774e-08, "logits/chosen": -3.00028920173645, "logits/rejected": -2.8585524559020996, "logps/chosen": -544.9568481445312, "logps/rejected": -455.34967041015625, "loss": 0.8903, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.3432106971740723, "rewards/margins": -0.05514455959200859, "rewards/rejected": -1.2880661487579346, "step": 200 }, { "epoch": 0.03, "learning_rate": 4.5180722891566264e-08, "logits/chosen": -2.958261251449585, "logits/rejected": -2.805788040161133, "logps/chosen": -627.1492919921875, "logps/rejected": -419.6505432128906, "loss": 1.1882, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -1.8272796869277954, "rewards/margins": -0.4571278691291809, "rewards/rejected": -1.3701517581939697, "step": 210 }, { "epoch": 0.03, "learning_rate": 4.7332185886402753e-08, "logits/chosen": -2.969813823699951, "logits/rejected": -2.8628058433532715, "logps/chosen": -526.109375, "logps/rejected": -429.310791015625, "loss": 0.9797, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -1.3616421222686768, "rewards/margins": -0.1262921839952469, "rewards/rejected": -1.2353496551513672, "step": 220 }, { "epoch": 0.03, "learning_rate": 4.9483648881239237e-08, "logits/chosen": -2.9566166400909424, "logits/rejected": -2.8587098121643066, "logps/chosen": -582.52685546875, "logps/rejected": -402.5362243652344, "loss": 1.2166, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -1.5297281742095947, "rewards/margins": -0.5458205938339233, "rewards/rejected": -0.9839075803756714, "step": 230 }, { "epoch": 0.03, "learning_rate": 5.1635111876075726e-08, "logits/chosen": -2.9289391040802, "logits/rejected": -2.888399124145508, "logps/chosen": -461.36474609375, "logps/rejected": -391.436767578125, "loss": 1.1356, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -1.4402072429656982, "rewards/margins": -0.37837186455726624, "rewards/rejected": -1.0618354082107544, "step": 240 }, { "epoch": 0.03, "learning_rate": 5.3786574870912216e-08, "logits/chosen": -2.888613224029541, "logits/rejected": -2.815978765487671, "logps/chosen": -496.8260803222656, "logps/rejected": -432.523193359375, "loss": 1.0723, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -1.4461778402328491, "rewards/margins": -0.29346245527267456, "rewards/rejected": -1.1527154445648193, "step": 250 }, { "epoch": 0.03, "learning_rate": 5.593803786574871e-08, "logits/chosen": -2.9382882118225098, "logits/rejected": -2.7886083126068115, "logps/chosen": -596.6341552734375, "logps/rejected": -471.10955810546875, "loss": 1.2679, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -1.7197214365005493, "rewards/margins": -0.4524146020412445, "rewards/rejected": -1.2673068046569824, "step": 260 }, { "epoch": 0.03, "learning_rate": 5.8089500860585195e-08, "logits/chosen": -3.000114917755127, "logits/rejected": -2.889648675918579, "logps/chosen": -527.1404418945312, "logps/rejected": -420.24334716796875, "loss": 1.0399, "rewards/accuracies": 0.3125, "rewards/chosen": -1.5051302909851074, "rewards/margins": -0.3537796437740326, "rewards/rejected": -1.151350736618042, "step": 270 }, { "epoch": 0.04, "learning_rate": 6.024096385542168e-08, "logits/chosen": -2.709540843963623, "logits/rejected": -2.7550735473632812, "logps/chosen": -423.3228454589844, "logps/rejected": -413.387451171875, "loss": 0.9545, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -1.2355258464813232, "rewards/margins": -0.07075345516204834, "rewards/rejected": -1.1647725105285645, "step": 280 }, { "epoch": 0.04, "learning_rate": 6.239242685025817e-08, "logits/chosen": -2.8718864917755127, "logits/rejected": -2.7891790866851807, "logps/chosen": -540.0614013671875, "logps/rejected": -385.06890869140625, "loss": 1.0979, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -1.4737598896026611, "rewards/margins": -0.38921305537223816, "rewards/rejected": -1.0845468044281006, "step": 290 }, { "epoch": 0.04, "learning_rate": 6.454388984509466e-08, "logits/chosen": -2.875504732131958, "logits/rejected": -2.7327208518981934, "logps/chosen": -521.9022827148438, "logps/rejected": -341.1861572265625, "loss": 1.4403, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": -1.6321594715118408, "rewards/margins": -0.8978872299194336, "rewards/rejected": -0.7342721223831177, "step": 300 }, { "epoch": 0.04, "learning_rate": 6.669535283993115e-08, "logits/chosen": -2.871347665786743, "logits/rejected": -2.8042232990264893, "logps/chosen": -468.1570739746094, "logps/rejected": -401.05938720703125, "loss": 1.0122, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -1.2797627449035645, "rewards/margins": -0.2121923714876175, "rewards/rejected": -1.067570447921753, "step": 310 }, { "epoch": 0.04, "learning_rate": 6.884681583476763e-08, "logits/chosen": -2.951195478439331, "logits/rejected": -2.8491573333740234, "logps/chosen": -501.7499084472656, "logps/rejected": -408.4956359863281, "loss": 0.986, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -1.3669726848602295, "rewards/margins": -0.24334685504436493, "rewards/rejected": -1.1236258745193481, "step": 320 }, { "epoch": 0.04, "learning_rate": 7.099827882960413e-08, "logits/chosen": -2.927947521209717, "logits/rejected": -2.8009490966796875, "logps/chosen": -536.63134765625, "logps/rejected": -352.08013916015625, "loss": 1.3041, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": -1.6398804187774658, "rewards/margins": -0.7231046557426453, "rewards/rejected": -0.9167758226394653, "step": 330 }, { "epoch": 0.04, "learning_rate": 7.314974182444061e-08, "logits/chosen": -2.875483274459839, "logits/rejected": -2.7957916259765625, "logps/chosen": -542.56787109375, "logps/rejected": -431.2359924316406, "loss": 1.2567, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -1.771308183670044, "rewards/margins": -0.5543769001960754, "rewards/rejected": -1.2169312238693237, "step": 340 }, { "epoch": 0.05, "learning_rate": 7.530120481927711e-08, "logits/chosen": -2.8560211658477783, "logits/rejected": -2.836230516433716, "logps/chosen": -531.9417724609375, "logps/rejected": -420.0228576660156, "loss": 1.1598, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -1.5408753156661987, "rewards/margins": -0.4078097343444824, "rewards/rejected": -1.1330657005310059, "step": 350 }, { "epoch": 0.05, "learning_rate": 7.745266781411359e-08, "logits/chosen": -2.9122531414031982, "logits/rejected": -2.8809313774108887, "logps/chosen": -479.103759765625, "logps/rejected": -374.1607360839844, "loss": 1.0907, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -1.3091607093811035, "rewards/margins": -0.2847980856895447, "rewards/rejected": -1.024362564086914, "step": 360 }, { "epoch": 0.05, "learning_rate": 7.960413080895008e-08, "logits/chosen": -2.987844467163086, "logits/rejected": -2.915762424468994, "logps/chosen": -529.7155151367188, "logps/rejected": -392.6169128417969, "loss": 1.0604, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -1.4710811376571655, "rewards/margins": -0.33130010962486267, "rewards/rejected": -1.1397812366485596, "step": 370 }, { "epoch": 0.05, "learning_rate": 8.175559380378658e-08, "logits/chosen": -2.898550033569336, "logits/rejected": -2.8149094581604004, "logps/chosen": -528.57177734375, "logps/rejected": -398.68798828125, "loss": 1.1479, "rewards/accuracies": 0.375, "rewards/chosen": -1.468045949935913, "rewards/margins": -0.46139734983444214, "rewards/rejected": -1.0066487789154053, "step": 380 }, { "epoch": 0.05, "learning_rate": 8.390705679862306e-08, "logits/chosen": -2.974303722381592, "logits/rejected": -2.8059496879577637, "logps/chosen": -576.8655395507812, "logps/rejected": -391.68621826171875, "loss": 1.0287, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -1.3815443515777588, "rewards/margins": -0.3063068687915802, "rewards/rejected": -1.075237512588501, "step": 390 }, { "epoch": 0.05, "learning_rate": 8.605851979345955e-08, "logits/chosen": -2.91733980178833, "logits/rejected": -2.8732311725616455, "logps/chosen": -596.7152099609375, "logps/rejected": -498.4881286621094, "loss": 1.0727, "rewards/accuracies": 0.4375, "rewards/chosen": -1.7486822605133057, "rewards/margins": -0.19763290882110596, "rewards/rejected": -1.5510494709014893, "step": 400 }, { "epoch": 0.05, "learning_rate": 8.820998278829604e-08, "logits/chosen": -2.8658499717712402, "logits/rejected": -2.8552374839782715, "logps/chosen": -493.156982421875, "logps/rejected": -468.36309814453125, "loss": 0.9238, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -1.2440019845962524, "rewards/margins": -0.0739990621805191, "rewards/rejected": -1.1700029373168945, "step": 410 }, { "epoch": 0.05, "learning_rate": 9.036144578313253e-08, "logits/chosen": -2.915015459060669, "logits/rejected": -2.7743382453918457, "logps/chosen": -577.144775390625, "logps/rejected": -514.756591796875, "loss": 1.2174, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -1.6439377069473267, "rewards/margins": -0.48379650712013245, "rewards/rejected": -1.1601413488388062, "step": 420 }, { "epoch": 0.06, "learning_rate": 9.2512908777969e-08, "logits/chosen": -2.923586368560791, "logits/rejected": -2.9226834774017334, "logps/chosen": -453.4723205566406, "logps/rejected": -363.3133239746094, "loss": 1.0814, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -1.2304415702819824, "rewards/margins": -0.3403525650501251, "rewards/rejected": -0.8900890350341797, "step": 430 }, { "epoch": 0.06, "learning_rate": 9.466437177280551e-08, "logits/chosen": -2.928081750869751, "logits/rejected": -2.751584529876709, "logps/chosen": -599.8878173828125, "logps/rejected": -406.40679931640625, "loss": 1.4413, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": -2.0039727687835693, "rewards/margins": -0.8381384015083313, "rewards/rejected": -1.1658344268798828, "step": 440 }, { "epoch": 0.06, "learning_rate": 9.6815834767642e-08, "logits/chosen": -2.950822353363037, "logits/rejected": -2.810190439224243, "logps/chosen": -560.4677734375, "logps/rejected": -338.6112976074219, "loss": 1.2477, "rewards/accuracies": 0.3375000059604645, "rewards/chosen": -1.5505653619766235, "rewards/margins": -0.6597352623939514, "rewards/rejected": -0.8908301591873169, "step": 450 }, { "epoch": 0.06, "learning_rate": 9.896729776247847e-08, "logits/chosen": -2.87968373298645, "logits/rejected": -2.743292808532715, "logps/chosen": -567.3447875976562, "logps/rejected": -433.7767639160156, "loss": 1.2441, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -1.7191362380981445, "rewards/margins": -0.6595061421394348, "rewards/rejected": -1.0596299171447754, "step": 460 }, { "epoch": 0.06, "learning_rate": 1.0111876075731498e-07, "logits/chosen": -2.987578868865967, "logits/rejected": -2.848745584487915, "logps/chosen": -521.1275634765625, "logps/rejected": -367.42474365234375, "loss": 1.1784, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -1.501481294631958, "rewards/margins": -0.46360883116722107, "rewards/rejected": -1.0378721952438354, "step": 470 }, { "epoch": 0.06, "learning_rate": 1.0327022375215145e-07, "logits/chosen": -2.7967822551727295, "logits/rejected": -2.752747058868408, "logps/chosen": -500.1796875, "logps/rejected": -464.89263916015625, "loss": 1.0258, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": -1.5135209560394287, "rewards/margins": -0.24555352330207825, "rewards/rejected": -1.2679673433303833, "step": 480 }, { "epoch": 0.06, "learning_rate": 1.0542168674698796e-07, "logits/chosen": -2.898345470428467, "logits/rejected": -2.7509310245513916, "logps/chosen": -623.8351440429688, "logps/rejected": -407.9005432128906, "loss": 1.0147, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -1.511136770248413, "rewards/margins": -0.2714304029941559, "rewards/rejected": -1.23970627784729, "step": 490 }, { "epoch": 0.06, "learning_rate": 1.0757314974182443e-07, "logits/chosen": -2.8036983013153076, "logits/rejected": -2.8310534954071045, "logps/chosen": -457.04071044921875, "logps/rejected": -435.40032958984375, "loss": 1.1232, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -1.425236701965332, "rewards/margins": -0.23419681191444397, "rewards/rejected": -1.191039800643921, "step": 500 }, { "epoch": 0.07, "learning_rate": 1.0972461273666092e-07, "logits/chosen": -2.924166440963745, "logits/rejected": -2.772520065307617, "logps/chosen": -545.0553588867188, "logps/rejected": -381.671630859375, "loss": 1.1406, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -1.521466851234436, "rewards/margins": -0.4211350381374359, "rewards/rejected": -1.1003320217132568, "step": 510 }, { "epoch": 0.07, "learning_rate": 1.1187607573149742e-07, "logits/chosen": -2.9792284965515137, "logits/rejected": -2.837831974029541, "logps/chosen": -569.2177124023438, "logps/rejected": -392.05364990234375, "loss": 1.2733, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -1.800487756729126, "rewards/margins": -0.6486862897872925, "rewards/rejected": -1.151801347732544, "step": 520 }, { "epoch": 0.07, "learning_rate": 1.140275387263339e-07, "logits/chosen": -2.8640835285186768, "logits/rejected": -2.7710225582122803, "logps/chosen": -556.4378662109375, "logps/rejected": -412.46630859375, "loss": 1.1597, "rewards/accuracies": 0.375, "rewards/chosen": -1.6286447048187256, "rewards/margins": -0.5456947684288025, "rewards/rejected": -1.0829498767852783, "step": 530 }, { "epoch": 0.07, "learning_rate": 1.1617900172117039e-07, "logits/chosen": -2.96451997756958, "logits/rejected": -2.7815613746643066, "logps/chosen": -573.4811401367188, "logps/rejected": -378.06304931640625, "loss": 1.2232, "rewards/accuracies": 0.375, "rewards/chosen": -1.628395676612854, "rewards/margins": -0.552253782749176, "rewards/rejected": -1.0761418342590332, "step": 540 }, { "epoch": 0.07, "learning_rate": 1.1833046471600688e-07, "logits/chosen": -2.9742980003356934, "logits/rejected": -2.7474184036254883, "logps/chosen": -614.0609741210938, "logps/rejected": -399.3195495605469, "loss": 1.2513, "rewards/accuracies": 0.23749999701976776, "rewards/chosen": -1.8134523630142212, "rewards/margins": -0.6658233404159546, "rewards/rejected": -1.1476290225982666, "step": 550 }, { "epoch": 0.07, "learning_rate": 1.2048192771084337e-07, "logits/chosen": -2.831990957260132, "logits/rejected": -2.8579297065734863, "logps/chosen": -479.0257873535156, "logps/rejected": -458.7317810058594, "loss": 0.9099, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -1.1580950021743774, "rewards/margins": 0.050583623349666595, "rewards/rejected": -1.2086787223815918, "step": 560 }, { "epoch": 0.07, "learning_rate": 1.2263339070567985e-07, "logits/chosen": -2.8621904850006104, "logits/rejected": -2.7598016262054443, "logps/chosen": -564.8843383789062, "logps/rejected": -386.1514892578125, "loss": 1.0571, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -1.3847564458847046, "rewards/margins": -0.23089079558849335, "rewards/rejected": -1.1538655757904053, "step": 570 }, { "epoch": 0.07, "learning_rate": 1.2478485370051635e-07, "logits/chosen": -3.02630352973938, "logits/rejected": -2.8428852558135986, "logps/chosen": -616.1720581054688, "logps/rejected": -418.72894287109375, "loss": 1.208, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": -1.6343454122543335, "rewards/margins": -0.5769091844558716, "rewards/rejected": -1.057436227798462, "step": 580 }, { "epoch": 0.08, "learning_rate": 1.2693631669535285e-07, "logits/chosen": -2.8908603191375732, "logits/rejected": -2.7416155338287354, "logps/chosen": -619.9117431640625, "logps/rejected": -451.9015197753906, "loss": 1.2677, "rewards/accuracies": 0.3375000059604645, "rewards/chosen": -1.7586055994033813, "rewards/margins": -0.591353178024292, "rewards/rejected": -1.1672526597976685, "step": 590 }, { "epoch": 0.08, "learning_rate": 1.2908777969018933e-07, "logits/chosen": -2.966827869415283, "logits/rejected": -2.8974204063415527, "logps/chosen": -478.32769775390625, "logps/rejected": -355.04888916015625, "loss": 1.1493, "rewards/accuracies": 0.32499998807907104, "rewards/chosen": -1.3731646537780762, "rewards/margins": -0.5336369276046753, "rewards/rejected": -0.8395276069641113, "step": 600 }, { "epoch": 0.08, "learning_rate": 1.312392426850258e-07, "logits/chosen": -2.8757786750793457, "logits/rejected": -2.762982130050659, "logps/chosen": -545.1154174804688, "logps/rejected": -420.70379638671875, "loss": 1.056, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -1.6534254550933838, "rewards/margins": -0.40917378664016724, "rewards/rejected": -1.2442518472671509, "step": 610 }, { "epoch": 0.08, "learning_rate": 1.333907056798623e-07, "logits/chosen": -2.8273301124572754, "logits/rejected": -2.7787070274353027, "logps/chosen": -505.73638916015625, "logps/rejected": -431.45550537109375, "loss": 1.0571, "rewards/accuracies": 0.4375, "rewards/chosen": -1.486027479171753, "rewards/margins": -0.2768253684043884, "rewards/rejected": -1.2092022895812988, "step": 620 }, { "epoch": 0.08, "learning_rate": 1.3554216867469878e-07, "logits/chosen": -2.855276584625244, "logits/rejected": -2.747957468032837, "logps/chosen": -613.3048706054688, "logps/rejected": -473.283447265625, "loss": 1.1026, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -1.6418201923370361, "rewards/margins": -0.3672993779182434, "rewards/rejected": -1.274520754814148, "step": 630 }, { "epoch": 0.08, "learning_rate": 1.3769363166953526e-07, "logits/chosen": -2.959531307220459, "logits/rejected": -2.823859453201294, "logps/chosen": -558.1025390625, "logps/rejected": -373.2362365722656, "loss": 1.0386, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": -1.4322693347930908, "rewards/margins": -0.35867735743522644, "rewards/rejected": -1.073591947555542, "step": 640 }, { "epoch": 0.08, "learning_rate": 1.3984509466437176e-07, "logits/chosen": -2.8049299716949463, "logits/rejected": -2.652029037475586, "logps/chosen": -601.1675415039062, "logps/rejected": -462.1188049316406, "loss": 1.2113, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.7533762454986572, "rewards/margins": -0.4071729779243469, "rewards/rejected": -1.346203088760376, "step": 650 }, { "epoch": 0.09, "learning_rate": 1.4199655765920827e-07, "logits/chosen": -2.903475284576416, "logits/rejected": -2.6957991123199463, "logps/chosen": -550.4015502929688, "logps/rejected": -394.2493896484375, "loss": 1.0864, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -1.5049946308135986, "rewards/margins": -0.40729039907455444, "rewards/rejected": -1.0977041721343994, "step": 660 }, { "epoch": 0.09, "learning_rate": 1.4414802065404474e-07, "logits/chosen": -2.936917781829834, "logits/rejected": -2.7801499366760254, "logps/chosen": -520.6312866210938, "logps/rejected": -385.4730529785156, "loss": 1.0287, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -1.4622550010681152, "rewards/margins": -0.2108292579650879, "rewards/rejected": -1.2514257431030273, "step": 670 }, { "epoch": 0.09, "learning_rate": 1.4629948364888122e-07, "logits/chosen": -2.894369602203369, "logits/rejected": -2.718942642211914, "logps/chosen": -581.3135986328125, "logps/rejected": -415.207275390625, "loss": 1.2502, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": -1.7388607263565063, "rewards/margins": -0.5420593023300171, "rewards/rejected": -1.1968014240264893, "step": 680 }, { "epoch": 0.09, "learning_rate": 1.4845094664371772e-07, "logits/chosen": -2.8638455867767334, "logits/rejected": -2.771210193634033, "logps/chosen": -501.74981689453125, "logps/rejected": -380.2633972167969, "loss": 1.042, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -1.470611810684204, "rewards/margins": -0.3094070255756378, "rewards/rejected": -1.1612048149108887, "step": 690 }, { "epoch": 0.09, "learning_rate": 1.5060240963855423e-07, "logits/chosen": -2.992621898651123, "logits/rejected": -2.758277416229248, "logps/chosen": -638.457275390625, "logps/rejected": -450.360595703125, "loss": 1.3188, "rewards/accuracies": 0.2750000059604645, "rewards/chosen": -1.963989496231079, "rewards/margins": -0.7857285737991333, "rewards/rejected": -1.1782606840133667, "step": 700 }, { "epoch": 0.09, "learning_rate": 1.527538726333907e-07, "logits/chosen": -2.7889435291290283, "logits/rejected": -2.809724807739258, "logps/chosen": -572.9876708984375, "logps/rejected": -523.0567626953125, "loss": 1.0992, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": -1.6143077611923218, "rewards/margins": -0.3347818851470947, "rewards/rejected": -1.2795259952545166, "step": 710 }, { "epoch": 0.09, "learning_rate": 1.5490533562822718e-07, "logits/chosen": -2.8934144973754883, "logits/rejected": -2.824483633041382, "logps/chosen": -443.08575439453125, "logps/rejected": -381.6692199707031, "loss": 0.9419, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -1.3082326650619507, "rewards/margins": -0.1980036199092865, "rewards/rejected": -1.1102291345596313, "step": 720 }, { "epoch": 0.09, "learning_rate": 1.5705679862306368e-07, "logits/chosen": -2.952026128768921, "logits/rejected": -2.753056049346924, "logps/chosen": -564.59619140625, "logps/rejected": -397.1822509765625, "loss": 0.962, "rewards/accuracies": 0.3375000059604645, "rewards/chosen": -1.5011630058288574, "rewards/margins": -0.2888903021812439, "rewards/rejected": -1.2122727632522583, "step": 730 }, { "epoch": 0.1, "learning_rate": 1.5920826161790016e-07, "logits/chosen": -2.870356798171997, "logits/rejected": -2.760673999786377, "logps/chosen": -667.3470458984375, "logps/rejected": -491.30096435546875, "loss": 1.1641, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -1.718750238418579, "rewards/margins": -0.38304054737091064, "rewards/rejected": -1.335709810256958, "step": 740 }, { "epoch": 0.1, "learning_rate": 1.6135972461273663e-07, "logits/chosen": -2.862515926361084, "logits/rejected": -2.7680115699768066, "logps/chosen": -570.6820678710938, "logps/rejected": -447.6932067871094, "loss": 1.1678, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": -1.8231760263442993, "rewards/margins": -0.5018742680549622, "rewards/rejected": -1.3213016986846924, "step": 750 }, { "epoch": 0.1, "learning_rate": 1.6351118760757316e-07, "logits/chosen": -2.808417797088623, "logits/rejected": -2.737382650375366, "logps/chosen": -461.3914489746094, "logps/rejected": -397.62921142578125, "loss": 0.9786, "rewards/accuracies": 0.3375000059604645, "rewards/chosen": -1.4511468410491943, "rewards/margins": -0.25694090127944946, "rewards/rejected": -1.1942059993743896, "step": 760 }, { "epoch": 0.1, "learning_rate": 1.6566265060240964e-07, "logits/chosen": -2.9370501041412354, "logits/rejected": -2.771433115005493, "logps/chosen": -476.743896484375, "logps/rejected": -305.5546875, "loss": 1.1805, "rewards/accuracies": 0.2750000059604645, "rewards/chosen": -1.4096827507019043, "rewards/margins": -0.5241997838020325, "rewards/rejected": -0.8854829668998718, "step": 770 }, { "epoch": 0.1, "learning_rate": 1.6781411359724612e-07, "logits/chosen": -2.965244770050049, "logits/rejected": -2.876194715499878, "logps/chosen": -519.2010498046875, "logps/rejected": -353.551513671875, "loss": 1.2371, "rewards/accuracies": 0.2874999940395355, "rewards/chosen": -1.584154486656189, "rewards/margins": -0.6151142716407776, "rewards/rejected": -0.9690402746200562, "step": 780 }, { "epoch": 0.1, "learning_rate": 1.699655765920826e-07, "logits/chosen": -2.96506667137146, "logits/rejected": -2.7676732540130615, "logps/chosen": -558.1395263671875, "logps/rejected": -397.5313720703125, "loss": 1.0133, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -1.523147702217102, "rewards/margins": -0.2312515676021576, "rewards/rejected": -1.2918962240219116, "step": 790 }, { "epoch": 0.1, "learning_rate": 1.721170395869191e-07, "logits/chosen": -2.8465828895568848, "logits/rejected": -2.8931725025177, "logps/chosen": -549.4329223632812, "logps/rejected": -474.57220458984375, "loss": 0.9576, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -1.4321925640106201, "rewards/margins": -0.2065524309873581, "rewards/rejected": -1.2256401777267456, "step": 800 }, { "epoch": 0.1, "learning_rate": 1.7426850258175557e-07, "logits/chosen": -2.792717218399048, "logits/rejected": -2.6944947242736816, "logps/chosen": -508.41900634765625, "logps/rejected": -373.2610168457031, "loss": 1.0773, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -1.3918097019195557, "rewards/margins": -0.2720213532447815, "rewards/rejected": -1.1197882890701294, "step": 810 }, { "epoch": 0.11, "learning_rate": 1.7641996557659208e-07, "logits/chosen": -2.809436559677124, "logits/rejected": -2.7730727195739746, "logps/chosen": -593.3984375, "logps/rejected": -460.0164489746094, "loss": 1.0253, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -1.6003118753433228, "rewards/margins": -0.15719449520111084, "rewards/rejected": -1.4431171417236328, "step": 820 }, { "epoch": 0.11, "learning_rate": 1.7857142857142858e-07, "logits/chosen": -2.7714428901672363, "logits/rejected": -2.6096713542938232, "logps/chosen": -567.9268798828125, "logps/rejected": -309.6457214355469, "loss": 1.2684, "rewards/accuracies": 0.3375000059604645, "rewards/chosen": -1.6627538204193115, "rewards/margins": -0.6221283674240112, "rewards/rejected": -1.0406254529953003, "step": 830 }, { "epoch": 0.11, "learning_rate": 1.8072289156626505e-07, "logits/chosen": -2.992332935333252, "logits/rejected": -2.854959487915039, "logps/chosen": -536.15283203125, "logps/rejected": -412.3268127441406, "loss": 1.1348, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": -1.486196756362915, "rewards/margins": -0.39995765686035156, "rewards/rejected": -1.0862390995025635, "step": 840 }, { "epoch": 0.11, "learning_rate": 1.8287435456110153e-07, "logits/chosen": -2.9263148307800293, "logits/rejected": -2.763336658477783, "logps/chosen": -598.2183837890625, "logps/rejected": -387.10906982421875, "loss": 1.1092, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.6284101009368896, "rewards/margins": -0.3487069606781006, "rewards/rejected": -1.2797032594680786, "step": 850 }, { "epoch": 0.11, "learning_rate": 1.85025817555938e-07, "logits/chosen": -2.9703197479248047, "logits/rejected": -2.774250030517578, "logps/chosen": -583.4091796875, "logps/rejected": -371.3197326660156, "loss": 1.1221, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -1.526422142982483, "rewards/margins": -0.4696226119995117, "rewards/rejected": -1.0567996501922607, "step": 860 }, { "epoch": 0.11, "learning_rate": 1.8717728055077454e-07, "logits/chosen": -2.946754217147827, "logits/rejected": -2.7919583320617676, "logps/chosen": -548.9928588867188, "logps/rejected": -387.41455078125, "loss": 0.9936, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -1.3771367073059082, "rewards/margins": -0.28174012899398804, "rewards/rejected": -1.095396637916565, "step": 870 }, { "epoch": 0.11, "learning_rate": 1.8932874354561101e-07, "logits/chosen": -2.927468776702881, "logits/rejected": -2.7719478607177734, "logps/chosen": -654.3358764648438, "logps/rejected": -443.1180114746094, "loss": 1.1766, "rewards/accuracies": 0.375, "rewards/chosen": -1.8755700588226318, "rewards/margins": -0.4832931160926819, "rewards/rejected": -1.3922770023345947, "step": 880 }, { "epoch": 0.11, "learning_rate": 1.914802065404475e-07, "logits/chosen": -2.9349350929260254, "logits/rejected": -2.784813404083252, "logps/chosen": -613.9952392578125, "logps/rejected": -441.60577392578125, "loss": 1.1416, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -1.5874807834625244, "rewards/margins": -0.4033326506614685, "rewards/rejected": -1.1841480731964111, "step": 890 }, { "epoch": 0.12, "learning_rate": 1.93631669535284e-07, "logits/chosen": -2.9416117668151855, "logits/rejected": -2.7952167987823486, "logps/chosen": -552.5643920898438, "logps/rejected": -395.2642517089844, "loss": 1.1665, "rewards/accuracies": 0.375, "rewards/chosen": -1.592219352722168, "rewards/margins": -0.48249393701553345, "rewards/rejected": -1.1097254753112793, "step": 900 }, { "epoch": 0.12, "learning_rate": 1.9578313253012047e-07, "logits/chosen": -2.9010910987854004, "logits/rejected": -2.8078465461730957, "logps/chosen": -565.1591796875, "logps/rejected": -399.7140808105469, "loss": 0.955, "rewards/accuracies": 0.5, "rewards/chosen": -1.454108476638794, "rewards/margins": -0.1559695303440094, "rewards/rejected": -1.2981388568878174, "step": 910 }, { "epoch": 0.12, "learning_rate": 1.9793459552495695e-07, "logits/chosen": -2.8650078773498535, "logits/rejected": -2.8078949451446533, "logps/chosen": -581.2164306640625, "logps/rejected": -443.3714294433594, "loss": 1.1964, "rewards/accuracies": 0.3375000059604645, "rewards/chosen": -1.7957483530044556, "rewards/margins": -0.5385252237319946, "rewards/rejected": -1.257223129272461, "step": 920 }, { "epoch": 0.12, "learning_rate": 2.0008605851979345e-07, "logits/chosen": -2.8055660724639893, "logits/rejected": -2.7351882457733154, "logps/chosen": -521.1897583007812, "logps/rejected": -423.233642578125, "loss": 0.8425, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -1.4578535556793213, "rewards/margins": -0.01384312380105257, "rewards/rejected": -1.4440103769302368, "step": 930 }, { "epoch": 0.12, "learning_rate": 2.0223752151462995e-07, "logits/chosen": -2.878535747528076, "logits/rejected": -2.9410085678100586, "logps/chosen": -500.46954345703125, "logps/rejected": -525.8159790039062, "loss": 0.8939, "rewards/accuracies": 0.5625, "rewards/chosen": -1.3395569324493408, "rewards/margins": -0.052168406546115875, "rewards/rejected": -1.2873884439468384, "step": 940 }, { "epoch": 0.12, "learning_rate": 2.0438898450946643e-07, "logits/chosen": -2.9400551319122314, "logits/rejected": -2.7984728813171387, "logps/chosen": -509.8056640625, "logps/rejected": -401.0815734863281, "loss": 1.2411, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -1.656899094581604, "rewards/margins": -0.5038260221481323, "rewards/rejected": -1.1530729532241821, "step": 950 }, { "epoch": 0.12, "learning_rate": 2.065404475043029e-07, "logits/chosen": -3.0151658058166504, "logits/rejected": -2.800750255584717, "logps/chosen": -562.4000244140625, "logps/rejected": -383.0415344238281, "loss": 1.2029, "rewards/accuracies": 0.32499998807907104, "rewards/chosen": -1.6115776300430298, "rewards/margins": -0.5468304753303528, "rewards/rejected": -1.0647470951080322, "step": 960 }, { "epoch": 0.13, "learning_rate": 2.086919104991394e-07, "logits/chosen": -2.9518675804138184, "logits/rejected": -2.779400587081909, "logps/chosen": -485.1910095214844, "logps/rejected": -341.27947998046875, "loss": 1.0442, "rewards/accuracies": 0.3125, "rewards/chosen": -1.3870869874954224, "rewards/margins": -0.3973722457885742, "rewards/rejected": -0.989714503288269, "step": 970 }, { "epoch": 0.13, "learning_rate": 2.108433734939759e-07, "logits/chosen": -2.9250259399414062, "logits/rejected": -2.9033093452453613, "logps/chosen": -493.86322021484375, "logps/rejected": -417.23297119140625, "loss": 0.8666, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -1.1876720190048218, "rewards/margins": -0.022589247673749924, "rewards/rejected": -1.1650828123092651, "step": 980 }, { "epoch": 0.13, "learning_rate": 2.129948364888124e-07, "logits/chosen": -2.8862996101379395, "logits/rejected": -2.8070743083953857, "logps/chosen": -566.3331909179688, "logps/rejected": -393.439697265625, "loss": 1.123, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -1.6148746013641357, "rewards/margins": -0.43463772535324097, "rewards/rejected": -1.1802369356155396, "step": 990 }, { "epoch": 0.13, "learning_rate": 2.1514629948364886e-07, "logits/chosen": -2.870009183883667, "logits/rejected": -2.7119038105010986, "logps/chosen": -530.6444091796875, "logps/rejected": -384.41168212890625, "loss": 1.0169, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -1.453222632408142, "rewards/margins": -0.21160733699798584, "rewards/rejected": -1.2416154146194458, "step": 1000 }, { "epoch": 0.13, "eval_logits/chosen": -3.193068742752075, "eval_logits/rejected": -3.139143943786621, "eval_logps/chosen": -532.3322143554688, "eval_logps/rejected": -402.6611328125, "eval_loss": 0.6484649777412415, "eval_rewards/accuracies": 0.6144999861717224, "eval_rewards/chosen": -0.051246289163827896, "eval_rewards/margins": 0.12200205773115158, "eval_rewards/rejected": -0.17324835062026978, "eval_runtime": 278.7697, "eval_samples_per_second": 7.174, "eval_steps_per_second": 3.587, "step": 1000 }, { "epoch": 0.13, "learning_rate": 2.1729776247848537e-07, "logits/chosen": -2.816324234008789, "logits/rejected": -2.673035144805908, "logps/chosen": -578.6807861328125, "logps/rejected": -372.2198791503906, "loss": 1.0343, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -1.5731618404388428, "rewards/margins": -0.36017125844955444, "rewards/rejected": -1.2129905223846436, "step": 1010 }, { "epoch": 0.13, "learning_rate": 2.1944922547332184e-07, "logits/chosen": -2.782355785369873, "logits/rejected": -2.7203006744384766, "logps/chosen": -630.630126953125, "logps/rejected": -501.6015625, "loss": 1.102, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.8362741470336914, "rewards/margins": -0.3561773896217346, "rewards/rejected": -1.4800965785980225, "step": 1020 }, { "epoch": 0.13, "learning_rate": 2.2160068846815832e-07, "logits/chosen": -2.8575661182403564, "logits/rejected": -2.7055656909942627, "logps/chosen": -589.990234375, "logps/rejected": -375.5095520019531, "loss": 1.0635, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -1.634260892868042, "rewards/margins": -0.41979464888572693, "rewards/rejected": -1.2144663333892822, "step": 1030 }, { "epoch": 0.13, "learning_rate": 2.2375215146299485e-07, "logits/chosen": -2.8353328704833984, "logits/rejected": -2.674062728881836, "logps/chosen": -553.508544921875, "logps/rejected": -407.17047119140625, "loss": 1.1393, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.6526243686676025, "rewards/margins": -0.4194316267967224, "rewards/rejected": -1.2331926822662354, "step": 1040 }, { "epoch": 0.14, "learning_rate": 2.2590361445783133e-07, "logits/chosen": -2.7971889972686768, "logits/rejected": -2.750368595123291, "logps/chosen": -561.7080078125, "logps/rejected": -382.54669189453125, "loss": 1.0857, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": -1.5634406805038452, "rewards/margins": -0.3089677691459656, "rewards/rejected": -1.2544729709625244, "step": 1050 }, { "epoch": 0.14, "learning_rate": 2.280550774526678e-07, "logits/chosen": -2.868030309677124, "logits/rejected": -2.80322003364563, "logps/chosen": -526.7525634765625, "logps/rejected": -443.47845458984375, "loss": 0.9137, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -1.4918253421783447, "rewards/margins": -0.07286953926086426, "rewards/rejected": -1.4189560413360596, "step": 1060 }, { "epoch": 0.14, "learning_rate": 2.3020654044750428e-07, "logits/chosen": -2.9315104484558105, "logits/rejected": -2.8404319286346436, "logps/chosen": -614.5169067382812, "logps/rejected": -429.1937561035156, "loss": 1.1065, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -1.6181504726409912, "rewards/margins": -0.3785467743873596, "rewards/rejected": -1.2396037578582764, "step": 1070 }, { "epoch": 0.14, "learning_rate": 2.3235800344234078e-07, "logits/chosen": -2.8635237216949463, "logits/rejected": -2.7819266319274902, "logps/chosen": -559.33251953125, "logps/rejected": -447.62322998046875, "loss": 1.0314, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -1.6913516521453857, "rewards/margins": -0.31128990650177, "rewards/rejected": -1.3800619840621948, "step": 1080 }, { "epoch": 0.14, "learning_rate": 2.3450946643717728e-07, "logits/chosen": -2.917978286743164, "logits/rejected": -2.817905902862549, "logps/chosen": -573.0879516601562, "logps/rejected": -424.15826416015625, "loss": 1.081, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -1.548439621925354, "rewards/margins": -0.3142968714237213, "rewards/rejected": -1.234142780303955, "step": 1090 }, { "epoch": 0.14, "learning_rate": 2.3666092943201376e-07, "logits/chosen": -2.990692615509033, "logits/rejected": -2.796546459197998, "logps/chosen": -556.9318237304688, "logps/rejected": -372.1362609863281, "loss": 1.1195, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": -1.7204296588897705, "rewards/margins": -0.47019535303115845, "rewards/rejected": -1.2502342462539673, "step": 1100 }, { "epoch": 0.14, "learning_rate": 2.3881239242685024e-07, "logits/chosen": -2.969381093978882, "logits/rejected": -2.813180923461914, "logps/chosen": -480.13006591796875, "logps/rejected": -329.70343017578125, "loss": 1.0251, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -1.4055486917495728, "rewards/margins": -0.28603026270866394, "rewards/rejected": -1.1195183992385864, "step": 1110 }, { "epoch": 0.14, "learning_rate": 2.4096385542168674e-07, "logits/chosen": -2.924518585205078, "logits/rejected": -2.795144557952881, "logps/chosen": -562.843505859375, "logps/rejected": -414.6380310058594, "loss": 0.9637, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.590654969215393, "rewards/margins": -0.2040623426437378, "rewards/rejected": -1.3865926265716553, "step": 1120 }, { "epoch": 0.15, "learning_rate": 2.4311531841652324e-07, "logits/chosen": -2.908942222595215, "logits/rejected": -2.772369861602783, "logps/chosen": -535.6682739257812, "logps/rejected": -374.5188293457031, "loss": 0.947, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -1.460593581199646, "rewards/margins": -0.24084782600402832, "rewards/rejected": -1.2197458744049072, "step": 1130 }, { "epoch": 0.15, "learning_rate": 2.452667814113597e-07, "logits/chosen": -2.8213319778442383, "logits/rejected": -2.774831771850586, "logps/chosen": -499.9840393066406, "logps/rejected": -412.185546875, "loss": 0.8533, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -1.3375402688980103, "rewards/margins": 0.01154423039406538, "rewards/rejected": -1.3490846157073975, "step": 1140 }, { "epoch": 0.15, "learning_rate": 2.474182444061962e-07, "logits/chosen": -2.860309600830078, "logits/rejected": -2.710993528366089, "logps/chosen": -530.9395751953125, "logps/rejected": -371.68658447265625, "loss": 1.0649, "rewards/accuracies": 0.375, "rewards/chosen": -1.6447010040283203, "rewards/margins": -0.43811988830566406, "rewards/rejected": -1.2065812349319458, "step": 1150 }, { "epoch": 0.15, "learning_rate": 2.495697074010327e-07, "logits/chosen": -2.9245567321777344, "logits/rejected": -2.8472609519958496, "logps/chosen": -486.03875732421875, "logps/rejected": -395.8489990234375, "loss": 0.9244, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -1.5255982875823975, "rewards/margins": -0.1432660073041916, "rewards/rejected": -1.3823322057724, "step": 1160 }, { "epoch": 0.15, "learning_rate": 2.5172117039586915e-07, "logits/chosen": -2.802548885345459, "logits/rejected": -2.720215320587158, "logps/chosen": -499.540283203125, "logps/rejected": -375.01190185546875, "loss": 0.8315, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -1.270132303237915, "rewards/margins": -0.014617949724197388, "rewards/rejected": -1.25551438331604, "step": 1170 }, { "epoch": 0.15, "learning_rate": 2.538726333907057e-07, "logits/chosen": -2.8183557987213135, "logits/rejected": -2.6663131713867188, "logps/chosen": -537.7388916015625, "logps/rejected": -356.41900634765625, "loss": 1.0758, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -1.5482803583145142, "rewards/margins": -0.3100317120552063, "rewards/rejected": -1.238248586654663, "step": 1180 }, { "epoch": 0.15, "learning_rate": 2.5602409638554215e-07, "logits/chosen": -2.8359200954437256, "logits/rejected": -2.7253754138946533, "logps/chosen": -530.1901245117188, "logps/rejected": -411.25946044921875, "loss": 0.9359, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -1.6378841400146484, "rewards/margins": -0.17405256628990173, "rewards/rejected": -1.4638315439224243, "step": 1190 }, { "epoch": 0.15, "learning_rate": 2.5817555938037866e-07, "logits/chosen": -2.7867913246154785, "logits/rejected": -2.773775577545166, "logps/chosen": -471.4696350097656, "logps/rejected": -457.1849670410156, "loss": 0.8443, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -1.3898470401763916, "rewards/margins": -0.014698025770485401, "rewards/rejected": -1.375149130821228, "step": 1200 }, { "epoch": 0.16, "learning_rate": 2.6032702237521516e-07, "logits/chosen": -2.833289623260498, "logits/rejected": -2.6802875995635986, "logps/chosen": -609.0410766601562, "logps/rejected": -455.87744140625, "loss": 0.9583, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -1.6370375156402588, "rewards/margins": -0.1687018722295761, "rewards/rejected": -1.4683353900909424, "step": 1210 }, { "epoch": 0.16, "learning_rate": 2.624784853700516e-07, "logits/chosen": -2.926485776901245, "logits/rejected": -2.799776077270508, "logps/chosen": -544.6002197265625, "logps/rejected": -423.7403259277344, "loss": 0.9995, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": -1.6583139896392822, "rewards/margins": -0.2395690232515335, "rewards/rejected": -1.4187448024749756, "step": 1220 }, { "epoch": 0.16, "learning_rate": 2.646299483648881e-07, "logits/chosen": -2.726750135421753, "logits/rejected": -2.62660551071167, "logps/chosen": -486.42388916015625, "logps/rejected": -356.01287841796875, "loss": 0.8193, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.3647644519805908, "rewards/margins": 0.030314454808831215, "rewards/rejected": -1.3950790166854858, "step": 1230 }, { "epoch": 0.16, "learning_rate": 2.667814113597246e-07, "logits/chosen": -2.86924147605896, "logits/rejected": -2.7250566482543945, "logps/chosen": -613.7565307617188, "logps/rejected": -483.36834716796875, "loss": 0.9921, "rewards/accuracies": 0.4375, "rewards/chosen": -1.7570841312408447, "rewards/margins": -0.23914757370948792, "rewards/rejected": -1.5179364681243896, "step": 1240 }, { "epoch": 0.16, "learning_rate": 2.6893287435456107e-07, "logits/chosen": -2.858074903488159, "logits/rejected": -2.751110792160034, "logps/chosen": -546.7550048828125, "logps/rejected": -345.6010437011719, "loss": 1.0498, "rewards/accuracies": 0.3125, "rewards/chosen": -1.6797993183135986, "rewards/margins": -0.34665945172309875, "rewards/rejected": -1.3331396579742432, "step": 1250 }, { "epoch": 0.16, "learning_rate": 2.7108433734939757e-07, "logits/chosen": -2.8601012229919434, "logits/rejected": -2.746246576309204, "logps/chosen": -473.544677734375, "logps/rejected": -324.0445251464844, "loss": 0.9682, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -1.3674628734588623, "rewards/margins": -0.2602311968803406, "rewards/rejected": -1.1072317361831665, "step": 1260 }, { "epoch": 0.16, "learning_rate": 2.7323580034423407e-07, "logits/chosen": -2.8006560802459717, "logits/rejected": -2.7373969554901123, "logps/chosen": -587.0115356445312, "logps/rejected": -467.84149169921875, "loss": 0.9153, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.7156518697738647, "rewards/margins": -0.06155027076601982, "rewards/rejected": -1.6541016101837158, "step": 1270 }, { "epoch": 0.17, "learning_rate": 2.753872633390705e-07, "logits/chosen": -2.8365156650543213, "logits/rejected": -2.752209186553955, "logps/chosen": -479.68438720703125, "logps/rejected": -396.2503356933594, "loss": 0.9945, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -1.6339136362075806, "rewards/margins": -0.16748693585395813, "rewards/rejected": -1.4664266109466553, "step": 1280 }, { "epoch": 0.17, "learning_rate": 2.775387263339071e-07, "logits/chosen": -2.8361973762512207, "logits/rejected": -2.6339025497436523, "logps/chosen": -576.6954345703125, "logps/rejected": -353.8041076660156, "loss": 1.2388, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -1.857670783996582, "rewards/margins": -0.5842087268829346, "rewards/rejected": -1.2734620571136475, "step": 1290 }, { "epoch": 0.17, "learning_rate": 2.7969018932874353e-07, "logits/chosen": -2.698568820953369, "logits/rejected": -2.579517126083374, "logps/chosen": -609.9064331054688, "logps/rejected": -412.7127990722656, "loss": 1.0042, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -1.8061020374298096, "rewards/margins": -0.23068761825561523, "rewards/rejected": -1.5754145383834839, "step": 1300 }, { "epoch": 0.17, "learning_rate": 2.8184165232358003e-07, "logits/chosen": -2.7696101665496826, "logits/rejected": -2.76250958442688, "logps/chosen": -570.9863891601562, "logps/rejected": -492.69921875, "loss": 0.9278, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -1.7438056468963623, "rewards/margins": -0.12467324733734131, "rewards/rejected": -1.6191326379776, "step": 1310 }, { "epoch": 0.17, "learning_rate": 2.8399311531841653e-07, "logits/chosen": -2.8643393516540527, "logits/rejected": -2.773003339767456, "logps/chosen": -595.0081787109375, "logps/rejected": -505.0624084472656, "loss": 0.9184, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -1.7745182514190674, "rewards/margins": -0.08360988646745682, "rewards/rejected": -1.690908670425415, "step": 1320 }, { "epoch": 0.17, "learning_rate": 2.86144578313253e-07, "logits/chosen": -2.9062633514404297, "logits/rejected": -2.7844958305358887, "logps/chosen": -525.649658203125, "logps/rejected": -399.81182861328125, "loss": 0.9748, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -1.6652791500091553, "rewards/margins": -0.2477884292602539, "rewards/rejected": -1.4174907207489014, "step": 1330 }, { "epoch": 0.17, "learning_rate": 2.882960413080895e-07, "logits/chosen": -2.8574249744415283, "logits/rejected": -2.7487900257110596, "logps/chosen": -582.1544799804688, "logps/rejected": -418.78631591796875, "loss": 1.01, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -1.7914917469024658, "rewards/margins": -0.26422441005706787, "rewards/rejected": -1.527267336845398, "step": 1340 }, { "epoch": 0.17, "learning_rate": 2.90447504302926e-07, "logits/chosen": -2.866994619369507, "logits/rejected": -2.6418356895446777, "logps/chosen": -657.2451782226562, "logps/rejected": -394.9109191894531, "loss": 1.2131, "rewards/accuracies": 0.3125, "rewards/chosen": -2.0634543895721436, "rewards/margins": -0.5387575626373291, "rewards/rejected": -1.5246970653533936, "step": 1350 }, { "epoch": 0.18, "learning_rate": 2.9259896729776244e-07, "logits/chosen": -2.8091349601745605, "logits/rejected": -2.702402114868164, "logps/chosen": -516.1431884765625, "logps/rejected": -394.109619140625, "loss": 0.7877, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -1.451259732246399, "rewards/margins": 0.16100528836250305, "rewards/rejected": -1.6122649908065796, "step": 1360 }, { "epoch": 0.18, "learning_rate": 2.9475043029259894e-07, "logits/chosen": -2.8997249603271484, "logits/rejected": -2.8429055213928223, "logps/chosen": -619.3841552734375, "logps/rejected": -483.7142639160156, "loss": 0.9176, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -1.8392574787139893, "rewards/margins": -0.03846115618944168, "rewards/rejected": -1.8007965087890625, "step": 1370 }, { "epoch": 0.18, "learning_rate": 2.9690189328743545e-07, "logits/chosen": -2.743244171142578, "logits/rejected": -2.6387548446655273, "logps/chosen": -591.9429931640625, "logps/rejected": -464.12823486328125, "loss": 0.9394, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.8706462383270264, "rewards/margins": -0.10570874065160751, "rewards/rejected": -1.7649376392364502, "step": 1380 }, { "epoch": 0.18, "learning_rate": 2.990533562822719e-07, "logits/chosen": -2.8150315284729004, "logits/rejected": -2.785367488861084, "logps/chosen": -514.8580322265625, "logps/rejected": -399.76019287109375, "loss": 0.9027, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -1.682930588722229, "rewards/margins": -0.12662209570407867, "rewards/rejected": -1.556308627128601, "step": 1390 }, { "epoch": 0.18, "learning_rate": 3.0120481927710845e-07, "logits/chosen": -2.8010220527648926, "logits/rejected": -2.7525031566619873, "logps/chosen": -451.4773864746094, "logps/rejected": -366.0665588378906, "loss": 0.9207, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -1.6411329507827759, "rewards/margins": -0.15927720069885254, "rewards/rejected": -1.4818556308746338, "step": 1400 }, { "epoch": 0.18, "learning_rate": 3.0335628227194495e-07, "logits/chosen": -2.8725876808166504, "logits/rejected": -2.760266065597534, "logps/chosen": -624.1705932617188, "logps/rejected": -452.8232421875, "loss": 1.1346, "rewards/accuracies": 0.375, "rewards/chosen": -1.9674237966537476, "rewards/margins": -0.463092178106308, "rewards/rejected": -1.5043315887451172, "step": 1410 }, { "epoch": 0.18, "learning_rate": 3.055077452667814e-07, "logits/chosen": -2.8072211742401123, "logits/rejected": -2.7079505920410156, "logps/chosen": -568.61083984375, "logps/rejected": -421.95831298828125, "loss": 0.8825, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -1.627571702003479, "rewards/margins": 0.10494814068078995, "rewards/rejected": -1.732519507408142, "step": 1420 }, { "epoch": 0.18, "learning_rate": 3.076592082616179e-07, "logits/chosen": -2.8934836387634277, "logits/rejected": -2.669431447982788, "logps/chosen": -634.5987548828125, "logps/rejected": -440.66229248046875, "loss": 1.0701, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -1.9822676181793213, "rewards/margins": -0.4078945517539978, "rewards/rejected": -1.5743728876113892, "step": 1430 }, { "epoch": 0.19, "learning_rate": 3.0981067125645436e-07, "logits/chosen": -2.8552498817443848, "logits/rejected": -2.799323320388794, "logps/chosen": -617.8319091796875, "logps/rejected": -508.6891174316406, "loss": 0.913, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.8293393850326538, "rewards/margins": -0.0343785397708416, "rewards/rejected": -1.7949607372283936, "step": 1440 }, { "epoch": 0.19, "learning_rate": 3.1196213425129086e-07, "logits/chosen": -2.8801746368408203, "logits/rejected": -2.822490692138672, "logps/chosen": -543.0889892578125, "logps/rejected": -413.84619140625, "loss": 0.9435, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -1.7530921697616577, "rewards/margins": -0.10654345899820328, "rewards/rejected": -1.6465486288070679, "step": 1450 }, { "epoch": 0.19, "learning_rate": 3.1411359724612736e-07, "logits/chosen": -2.894483804702759, "logits/rejected": -2.7673373222351074, "logps/chosen": -489.6849670410156, "logps/rejected": -341.5064392089844, "loss": 0.9708, "rewards/accuracies": 0.4375, "rewards/chosen": -1.6869573593139648, "rewards/margins": -0.2533877491950989, "rewards/rejected": -1.4335696697235107, "step": 1460 }, { "epoch": 0.19, "learning_rate": 3.162650602409638e-07, "logits/chosen": -2.9046242237091064, "logits/rejected": -2.8491930961608887, "logps/chosen": -602.3531494140625, "logps/rejected": -456.58905029296875, "loss": 0.9754, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.8623802661895752, "rewards/margins": -0.07065002620220184, "rewards/rejected": -1.791730523109436, "step": 1470 }, { "epoch": 0.19, "learning_rate": 3.184165232358003e-07, "logits/chosen": -2.8815693855285645, "logits/rejected": -2.7625327110290527, "logps/chosen": -631.3030395507812, "logps/rejected": -492.72271728515625, "loss": 1.0084, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -2.064192295074463, "rewards/margins": -0.2469337284564972, "rewards/rejected": -1.8172584772109985, "step": 1480 }, { "epoch": 0.19, "learning_rate": 3.205679862306368e-07, "logits/chosen": -2.724708080291748, "logits/rejected": -2.636478900909424, "logps/chosen": -521.5567626953125, "logps/rejected": -399.39141845703125, "loss": 0.9164, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -1.6901798248291016, "rewards/margins": -0.14940288662910461, "rewards/rejected": -1.5407767295837402, "step": 1490 }, { "epoch": 0.19, "learning_rate": 3.2271944922547327e-07, "logits/chosen": -2.9676778316497803, "logits/rejected": -2.746744155883789, "logps/chosen": -638.0638427734375, "logps/rejected": -435.8655700683594, "loss": 0.9433, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -1.8821916580200195, "rewards/margins": -0.12366565316915512, "rewards/rejected": -1.7585258483886719, "step": 1500 }, { "epoch": 0.19, "learning_rate": 3.248709122203098e-07, "logits/chosen": -2.8734612464904785, "logits/rejected": -2.8474652767181396, "logps/chosen": -596.6041259765625, "logps/rejected": -500.2059631347656, "loss": 0.8397, "rewards/accuracies": 0.5, "rewards/chosen": -1.9216177463531494, "rewards/margins": 0.1691780835390091, "rewards/rejected": -2.0907959938049316, "step": 1510 }, { "epoch": 0.2, "learning_rate": 3.2702237521514633e-07, "logits/chosen": -2.91652250289917, "logits/rejected": -2.7264583110809326, "logps/chosen": -584.2235107421875, "logps/rejected": -348.86224365234375, "loss": 1.1271, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -1.9276378154754639, "rewards/margins": -0.473505437374115, "rewards/rejected": -1.454132318496704, "step": 1520 }, { "epoch": 0.2, "learning_rate": 3.291738382099828e-07, "logits/chosen": -2.9376606941223145, "logits/rejected": -2.797818422317505, "logps/chosen": -559.4569702148438, "logps/rejected": -415.83685302734375, "loss": 1.1283, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -2.057471990585327, "rewards/margins": -0.4784265160560608, "rewards/rejected": -1.5790456533432007, "step": 1530 }, { "epoch": 0.2, "learning_rate": 3.313253012048193e-07, "logits/chosen": -2.8855514526367188, "logits/rejected": -2.81734037399292, "logps/chosen": -525.7247924804688, "logps/rejected": -432.6988220214844, "loss": 0.8742, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -1.8276363611221313, "rewards/margins": -0.1263895332813263, "rewards/rejected": -1.7012468576431274, "step": 1540 }, { "epoch": 0.2, "learning_rate": 3.334767641996558e-07, "logits/chosen": -2.827314853668213, "logits/rejected": -2.716991901397705, "logps/chosen": -521.96875, "logps/rejected": -378.16900634765625, "loss": 0.8515, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.7704623937606812, "rewards/margins": 0.018947627395391464, "rewards/rejected": -1.7894099950790405, "step": 1550 }, { "epoch": 0.2, "learning_rate": 3.3562822719449223e-07, "logits/chosen": -2.792506694793701, "logits/rejected": -2.6465678215026855, "logps/chosen": -521.1290283203125, "logps/rejected": -375.52093505859375, "loss": 0.9606, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -1.7951921224594116, "rewards/margins": -0.1618775725364685, "rewards/rejected": -1.6333144903182983, "step": 1560 }, { "epoch": 0.2, "learning_rate": 3.3777969018932874e-07, "logits/chosen": -2.8518309593200684, "logits/rejected": -2.7925174236297607, "logps/chosen": -466.81982421875, "logps/rejected": -386.9195861816406, "loss": 0.9328, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -1.8160755634307861, "rewards/margins": -0.16884271800518036, "rewards/rejected": -1.6472326517105103, "step": 1570 }, { "epoch": 0.2, "learning_rate": 3.399311531841652e-07, "logits/chosen": -2.807286024093628, "logits/rejected": -2.7016749382019043, "logps/chosen": -500.60772705078125, "logps/rejected": -389.976318359375, "loss": 0.9374, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.787689208984375, "rewards/margins": -0.10646989196538925, "rewards/rejected": -1.6812193393707275, "step": 1580 }, { "epoch": 0.21, "learning_rate": 3.420826161790017e-07, "logits/chosen": -2.7797188758850098, "logits/rejected": -2.7222743034362793, "logps/chosen": -518.5906982421875, "logps/rejected": -403.8038330078125, "loss": 0.832, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.7311385869979858, "rewards/margins": 0.1203121691942215, "rewards/rejected": -1.8514509201049805, "step": 1590 }, { "epoch": 0.21, "learning_rate": 3.442340791738382e-07, "logits/chosen": -2.9623873233795166, "logits/rejected": -2.812945604324341, "logps/chosen": -581.7445068359375, "logps/rejected": -451.45770263671875, "loss": 0.8323, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -1.9025428295135498, "rewards/margins": 0.09064842760562897, "rewards/rejected": -1.993191123008728, "step": 1600 }, { "epoch": 0.21, "learning_rate": 3.4638554216867464e-07, "logits/chosen": -2.9188990592956543, "logits/rejected": -2.755385637283325, "logps/chosen": -542.2848510742188, "logps/rejected": -409.06817626953125, "loss": 0.9303, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.9689276218414307, "rewards/margins": -0.1666349172592163, "rewards/rejected": -1.8022925853729248, "step": 1610 }, { "epoch": 0.21, "learning_rate": 3.4853700516351115e-07, "logits/chosen": -2.9925742149353027, "logits/rejected": -2.927928924560547, "logps/chosen": -569.5219116210938, "logps/rejected": -497.70501708984375, "loss": 0.9886, "rewards/accuracies": 0.4375, "rewards/chosen": -1.9554630517959595, "rewards/margins": -0.23583821952342987, "rewards/rejected": -1.7196247577667236, "step": 1620 }, { "epoch": 0.21, "learning_rate": 3.506884681583477e-07, "logits/chosen": -2.9122562408447266, "logits/rejected": -2.7032299041748047, "logps/chosen": -573.4486694335938, "logps/rejected": -434.9693298339844, "loss": 0.8786, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.8113887310028076, "rewards/margins": -0.009095591492950916, "rewards/rejected": -1.802293062210083, "step": 1630 }, { "epoch": 0.21, "learning_rate": 3.5283993115318415e-07, "logits/chosen": -2.8657727241516113, "logits/rejected": -2.7460217475891113, "logps/chosen": -561.1114501953125, "logps/rejected": -473.70977783203125, "loss": 0.9135, "rewards/accuracies": 0.5, "rewards/chosen": -2.1179757118225098, "rewards/margins": -0.09567234665155411, "rewards/rejected": -2.022303581237793, "step": 1640 }, { "epoch": 0.21, "learning_rate": 3.5499139414802065e-07, "logits/chosen": -2.757032871246338, "logits/rejected": -2.8502440452575684, "logps/chosen": -490.92724609375, "logps/rejected": -485.83868408203125, "loss": 0.7432, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.6553966999053955, "rewards/margins": 0.2657851576805115, "rewards/rejected": -1.9211819171905518, "step": 1650 }, { "epoch": 0.21, "learning_rate": 3.5714285714285716e-07, "logits/chosen": -2.770601511001587, "logits/rejected": -2.6537094116210938, "logps/chosen": -653.2120361328125, "logps/rejected": -473.8134765625, "loss": 1.0249, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -2.240391254425049, "rewards/margins": -0.19637632369995117, "rewards/rejected": -2.0440146923065186, "step": 1660 }, { "epoch": 0.22, "learning_rate": 3.592943201376936e-07, "logits/chosen": -2.9291281700134277, "logits/rejected": -2.827333450317383, "logps/chosen": -592.3005981445312, "logps/rejected": -407.09912109375, "loss": 1.0135, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -2.045004367828369, "rewards/margins": -0.2607182562351227, "rewards/rejected": -1.7842861413955688, "step": 1670 }, { "epoch": 0.22, "learning_rate": 3.614457831325301e-07, "logits/chosen": -2.836266040802002, "logits/rejected": -2.7307322025299072, "logps/chosen": -548.6397094726562, "logps/rejected": -435.41009521484375, "loss": 0.905, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -1.9539844989776611, "rewards/margins": -0.05313248559832573, "rewards/rejected": -1.900851845741272, "step": 1680 }, { "epoch": 0.22, "learning_rate": 3.6359724612736656e-07, "logits/chosen": -2.7777936458587646, "logits/rejected": -2.732952833175659, "logps/chosen": -517.3648681640625, "logps/rejected": -445.37347412109375, "loss": 0.8978, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.98415207862854, "rewards/margins": -0.09530344605445862, "rewards/rejected": -1.8888486623764038, "step": 1690 }, { "epoch": 0.22, "learning_rate": 3.6574870912220306e-07, "logits/chosen": -2.89503812789917, "logits/rejected": -2.805832624435425, "logps/chosen": -624.8922729492188, "logps/rejected": -407.29376220703125, "loss": 1.1006, "rewards/accuracies": 0.375, "rewards/chosen": -2.0424647331237793, "rewards/margins": -0.46213895082473755, "rewards/rejected": -1.5803253650665283, "step": 1700 }, { "epoch": 0.22, "learning_rate": 3.6790017211703957e-07, "logits/chosen": -2.8190605640411377, "logits/rejected": -2.7707107067108154, "logps/chosen": -546.0337524414062, "logps/rejected": -454.19873046875, "loss": 0.7806, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -1.6919224262237549, "rewards/margins": 0.24006180465221405, "rewards/rejected": -1.9319841861724854, "step": 1710 }, { "epoch": 0.22, "learning_rate": 3.70051635111876e-07, "logits/chosen": -2.8438491821289062, "logits/rejected": -2.6750175952911377, "logps/chosen": -510.487548828125, "logps/rejected": -366.38702392578125, "loss": 0.8956, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.7860532999038696, "rewards/margins": 0.05922885611653328, "rewards/rejected": -1.8452821969985962, "step": 1720 }, { "epoch": 0.22, "learning_rate": 3.722030981067125e-07, "logits/chosen": -2.7036309242248535, "logits/rejected": -2.672974109649658, "logps/chosen": -507.2272033691406, "logps/rejected": -425.42584228515625, "loss": 0.8088, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.6897557973861694, "rewards/margins": 0.09269414842128754, "rewards/rejected": -1.7824499607086182, "step": 1730 }, { "epoch": 0.22, "learning_rate": 3.743545611015491e-07, "logits/chosen": -2.8550496101379395, "logits/rejected": -2.7029240131378174, "logps/chosen": -519.2011108398438, "logps/rejected": -383.98065185546875, "loss": 0.9102, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -1.7993066310882568, "rewards/margins": -0.15621671080589294, "rewards/rejected": -1.6430898904800415, "step": 1740 }, { "epoch": 0.23, "learning_rate": 3.765060240963855e-07, "logits/chosen": -2.971200466156006, "logits/rejected": -2.7857043743133545, "logps/chosen": -542.3231811523438, "logps/rejected": -386.90899658203125, "loss": 0.846, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.8112096786499023, "rewards/margins": -0.07752609252929688, "rewards/rejected": -1.7336835861206055, "step": 1750 }, { "epoch": 0.23, "learning_rate": 3.7865748709122203e-07, "logits/chosen": -2.8518710136413574, "logits/rejected": -2.7485268115997314, "logps/chosen": -602.1658325195312, "logps/rejected": -440.986328125, "loss": 0.8837, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -1.7850868701934814, "rewards/margins": -0.021163295954465866, "rewards/rejected": -1.7639236450195312, "step": 1760 }, { "epoch": 0.23, "learning_rate": 3.8080895008605853e-07, "logits/chosen": -2.876072406768799, "logits/rejected": -2.7509572505950928, "logps/chosen": -597.409912109375, "logps/rejected": -494.34918212890625, "loss": 0.8453, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -1.803466796875, "rewards/margins": 0.0800083726644516, "rewards/rejected": -1.8834750652313232, "step": 1770 }, { "epoch": 0.23, "learning_rate": 3.82960413080895e-07, "logits/chosen": -2.9353184700012207, "logits/rejected": -2.7781429290771484, "logps/chosen": -636.5653076171875, "logps/rejected": -448.9288024902344, "loss": 1.0073, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -2.077251434326172, "rewards/margins": -0.26921552419662476, "rewards/rejected": -1.8080356121063232, "step": 1780 }, { "epoch": 0.23, "learning_rate": 3.851118760757315e-07, "logits/chosen": -2.841559886932373, "logits/rejected": -2.6938087940216064, "logps/chosen": -536.5172729492188, "logps/rejected": -416.68572998046875, "loss": 0.9396, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -1.8948417901992798, "rewards/margins": -0.0775873064994812, "rewards/rejected": -1.8172543048858643, "step": 1790 }, { "epoch": 0.23, "learning_rate": 3.87263339070568e-07, "logits/chosen": -2.8506624698638916, "logits/rejected": -2.742614507675171, "logps/chosen": -539.6422119140625, "logps/rejected": -459.68243408203125, "loss": 0.8224, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.8882976770401, "rewards/margins": 0.028648024424910545, "rewards/rejected": -1.9169456958770752, "step": 1800 }, { "epoch": 0.23, "learning_rate": 3.8941480206540444e-07, "logits/chosen": -2.717916965484619, "logits/rejected": -2.5655388832092285, "logps/chosen": -568.6392211914062, "logps/rejected": -444.94549560546875, "loss": 0.8631, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.002718448638916, "rewards/margins": 0.06587555259466171, "rewards/rejected": -2.0685935020446777, "step": 1810 }, { "epoch": 0.23, "learning_rate": 3.9156626506024094e-07, "logits/chosen": -2.7946579456329346, "logits/rejected": -2.7098052501678467, "logps/chosen": -565.1211547851562, "logps/rejected": -423.8689880371094, "loss": 0.8538, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -1.930731177330017, "rewards/margins": 0.0008298560860566795, "rewards/rejected": -1.9315611124038696, "step": 1820 }, { "epoch": 0.24, "learning_rate": 3.937177280550774e-07, "logits/chosen": -2.8048808574676514, "logits/rejected": -2.701763153076172, "logps/chosen": -557.1632080078125, "logps/rejected": -399.2131042480469, "loss": 0.7885, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.8196334838867188, "rewards/margins": 0.20004236698150635, "rewards/rejected": -2.0196757316589355, "step": 1830 }, { "epoch": 0.24, "learning_rate": 3.958691910499139e-07, "logits/chosen": -2.7799479961395264, "logits/rejected": -2.7087819576263428, "logps/chosen": -534.8798217773438, "logps/rejected": -422.2250061035156, "loss": 0.8502, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.8414695262908936, "rewards/margins": 0.08200196921825409, "rewards/rejected": -1.9234718084335327, "step": 1840 }, { "epoch": 0.24, "learning_rate": 3.9802065404475045e-07, "logits/chosen": -2.976451873779297, "logits/rejected": -2.790057897567749, "logps/chosen": -597.7567138671875, "logps/rejected": -414.6481018066406, "loss": 0.7673, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.7279897928237915, "rewards/margins": 0.11377272754907608, "rewards/rejected": -1.8417625427246094, "step": 1850 }, { "epoch": 0.24, "learning_rate": 4.001721170395869e-07, "logits/chosen": -2.8339648246765137, "logits/rejected": -2.8079586029052734, "logps/chosen": -525.3915405273438, "logps/rejected": -471.70654296875, "loss": 0.6796, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.6978451013565063, "rewards/margins": 0.3546532690525055, "rewards/rejected": -2.0524983406066895, "step": 1860 }, { "epoch": 0.24, "learning_rate": 4.023235800344234e-07, "logits/chosen": -2.947197437286377, "logits/rejected": -2.7663753032684326, "logps/chosen": -674.8883666992188, "logps/rejected": -462.217529296875, "loss": 0.7678, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.0073087215423584, "rewards/margins": 0.18922948837280273, "rewards/rejected": -2.1965384483337402, "step": 1870 }, { "epoch": 0.24, "learning_rate": 4.044750430292599e-07, "logits/chosen": -2.9186415672302246, "logits/rejected": -2.7127037048339844, "logps/chosen": -584.491455078125, "logps/rejected": -417.11175537109375, "loss": 0.9706, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -2.0534462928771973, "rewards/margins": -0.1830880343914032, "rewards/rejected": -1.8703587055206299, "step": 1880 }, { "epoch": 0.24, "learning_rate": 4.0662650602409635e-07, "logits/chosen": -2.724447250366211, "logits/rejected": -2.7510530948638916, "logps/chosen": -461.91131591796875, "logps/rejected": -467.2215881347656, "loss": 0.7367, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.589511513710022, "rewards/margins": 0.2833203077316284, "rewards/rejected": -1.8728317022323608, "step": 1890 }, { "epoch": 0.25, "learning_rate": 4.0877796901893286e-07, "logits/chosen": -2.6461379528045654, "logits/rejected": -2.630333423614502, "logps/chosen": -528.1666870117188, "logps/rejected": -441.7476501464844, "loss": 0.925, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -2.1651337146759033, "rewards/margins": -0.04461859539151192, "rewards/rejected": -2.1205148696899414, "step": 1900 }, { "epoch": 0.25, "learning_rate": 4.1092943201376936e-07, "logits/chosen": -2.748358964920044, "logits/rejected": -2.5968236923217773, "logps/chosen": -570.5778198242188, "logps/rejected": -413.67962646484375, "loss": 0.9379, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -2.0689868927001953, "rewards/margins": -0.16650693118572235, "rewards/rejected": -1.902479887008667, "step": 1910 }, { "epoch": 0.25, "learning_rate": 4.130808950086058e-07, "logits/chosen": -2.765955686569214, "logits/rejected": -2.6952526569366455, "logps/chosen": -533.554443359375, "logps/rejected": -433.8186950683594, "loss": 0.7349, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.7326622009277344, "rewards/margins": 0.18850912153720856, "rewards/rejected": -1.9211714267730713, "step": 1920 }, { "epoch": 0.25, "learning_rate": 4.152323580034423e-07, "logits/chosen": -2.8519043922424316, "logits/rejected": -2.7960617542266846, "logps/chosen": -603.3978271484375, "logps/rejected": -482.07196044921875, "loss": 0.7952, "rewards/accuracies": 0.625, "rewards/chosen": -1.8095966577529907, "rewards/margins": 0.3029947876930237, "rewards/rejected": -2.112591505050659, "step": 1930 }, { "epoch": 0.25, "learning_rate": 4.173838209982788e-07, "logits/chosen": -2.8676059246063232, "logits/rejected": -2.785088062286377, "logps/chosen": -600.6505126953125, "logps/rejected": -465.03741455078125, "loss": 0.7382, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.013301372528076, "rewards/margins": 0.2241552323102951, "rewards/rejected": -2.2374565601348877, "step": 1940 }, { "epoch": 0.25, "learning_rate": 4.1953528399311527e-07, "logits/chosen": -2.8872151374816895, "logits/rejected": -2.81948184967041, "logps/chosen": -478.6048889160156, "logps/rejected": -398.9527282714844, "loss": 0.8719, "rewards/accuracies": 0.4375, "rewards/chosen": -1.9965078830718994, "rewards/margins": -0.0012805372243747115, "rewards/rejected": -1.9952274560928345, "step": 1950 }, { "epoch": 0.25, "learning_rate": 4.216867469879518e-07, "logits/chosen": -2.7885420322418213, "logits/rejected": -2.7978386878967285, "logps/chosen": -534.6414184570312, "logps/rejected": -510.53326416015625, "loss": 0.7657, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.9214937686920166, "rewards/margins": 0.2959798276424408, "rewards/rejected": -2.2174737453460693, "step": 1960 }, { "epoch": 0.25, "learning_rate": 4.2383820998278827e-07, "logits/chosen": -2.9018187522888184, "logits/rejected": -2.72564959526062, "logps/chosen": -652.3592529296875, "logps/rejected": -494.27099609375, "loss": 0.9238, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -2.252997636795044, "rewards/margins": -0.10669213533401489, "rewards/rejected": -2.1463053226470947, "step": 1970 }, { "epoch": 0.26, "learning_rate": 4.259896729776248e-07, "logits/chosen": -2.8923745155334473, "logits/rejected": -2.6882662773132324, "logps/chosen": -568.8607177734375, "logps/rejected": -350.9393615722656, "loss": 0.8917, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -1.9816792011260986, "rewards/margins": -0.018235957249999046, "rewards/rejected": -1.9634431600570679, "step": 1980 }, { "epoch": 0.26, "learning_rate": 4.281411359724613e-07, "logits/chosen": -2.8289968967437744, "logits/rejected": -2.693525552749634, "logps/chosen": -497.8589782714844, "logps/rejected": -358.84161376953125, "loss": 0.7109, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.7408955097198486, "rewards/margins": 0.2985888719558716, "rewards/rejected": -2.0394845008850098, "step": 1990 }, { "epoch": 0.26, "learning_rate": 4.3029259896729773e-07, "logits/chosen": -2.7143149375915527, "logits/rejected": -2.686941623687744, "logps/chosen": -552.3225708007812, "logps/rejected": -433.88165283203125, "loss": 0.9048, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -1.869206428527832, "rewards/margins": -0.01314872968941927, "rewards/rejected": -1.8560574054718018, "step": 2000 }, { "epoch": 0.26, "eval_logits/chosen": -3.1417076587677, "eval_logits/rejected": -3.0870752334594727, "eval_logps/chosen": -537.7091674804688, "eval_logps/rejected": -411.7990417480469, "eval_loss": 0.626380980014801, "eval_rewards/accuracies": 0.6324999928474426, "eval_rewards/chosen": -0.5889479517936707, "eval_rewards/margins": 0.4980906844139099, "eval_rewards/rejected": -1.0870387554168701, "eval_runtime": 279.2601, "eval_samples_per_second": 7.162, "eval_steps_per_second": 3.581, "step": 2000 }, { "epoch": 0.26, "learning_rate": 4.3244406196213423e-07, "logits/chosen": -2.7796380519866943, "logits/rejected": -2.751617908477783, "logps/chosen": -472.86907958984375, "logps/rejected": -417.7991638183594, "loss": 0.8242, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.8358080387115479, "rewards/margins": 0.1358126699924469, "rewards/rejected": -1.9716205596923828, "step": 2010 }, { "epoch": 0.26, "learning_rate": 4.3459552495697073e-07, "logits/chosen": -2.7541327476501465, "logits/rejected": -2.658782482147217, "logps/chosen": -539.7421875, "logps/rejected": -430.4676818847656, "loss": 0.6431, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.8655827045440674, "rewards/margins": 0.4940468370914459, "rewards/rejected": -2.3596298694610596, "step": 2020 }, { "epoch": 0.26, "learning_rate": 4.367469879518072e-07, "logits/chosen": -2.912479877471924, "logits/rejected": -2.825775384902954, "logps/chosen": -484.981689453125, "logps/rejected": -397.9569091796875, "loss": 0.8009, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -1.9291341304779053, "rewards/margins": 0.08383835852146149, "rewards/rejected": -2.012972354888916, "step": 2030 }, { "epoch": 0.26, "learning_rate": 4.388984509466437e-07, "logits/chosen": -2.7078006267547607, "logits/rejected": -2.6392722129821777, "logps/chosen": -537.9918823242188, "logps/rejected": -422.5455017089844, "loss": 0.8754, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -1.9987589120864868, "rewards/margins": 0.20297488570213318, "rewards/rejected": -2.2017340660095215, "step": 2040 }, { "epoch": 0.26, "learning_rate": 4.410499139414802e-07, "logits/chosen": -2.823462724685669, "logits/rejected": -2.73626971244812, "logps/chosen": -541.2608032226562, "logps/rejected": -396.48809814453125, "loss": 0.8174, "rewards/accuracies": 0.625, "rewards/chosen": -2.1588289737701416, "rewards/margins": 0.11597499996423721, "rewards/rejected": -2.27480411529541, "step": 2050 }, { "epoch": 0.27, "learning_rate": 4.4320137693631664e-07, "logits/chosen": -2.995511054992676, "logits/rejected": -2.8413054943084717, "logps/chosen": -536.1195678710938, "logps/rejected": -325.72589111328125, "loss": 0.8221, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.0978875160217285, "rewards/margins": 0.05977402999997139, "rewards/rejected": -2.1576614379882812, "step": 2060 }, { "epoch": 0.27, "learning_rate": 4.453528399311532e-07, "logits/chosen": -2.906083345413208, "logits/rejected": -2.8683700561523438, "logps/chosen": -545.9041748046875, "logps/rejected": -396.8572692871094, "loss": 0.7732, "rewards/accuracies": 0.5625, "rewards/chosen": -1.9222491979599, "rewards/margins": 0.3301015794277191, "rewards/rejected": -2.2523508071899414, "step": 2070 }, { "epoch": 0.27, "learning_rate": 4.475043029259897e-07, "logits/chosen": -2.913356304168701, "logits/rejected": -2.7371296882629395, "logps/chosen": -500.8119201660156, "logps/rejected": -332.6963195800781, "loss": 0.831, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -1.8240810632705688, "rewards/margins": 0.10617981106042862, "rewards/rejected": -1.9302608966827393, "step": 2080 }, { "epoch": 0.27, "learning_rate": 4.4965576592082615e-07, "logits/chosen": -2.7469863891601562, "logits/rejected": -2.690962791442871, "logps/chosen": -506.85162353515625, "logps/rejected": -378.12969970703125, "loss": 0.9069, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -1.9376070499420166, "rewards/margins": -0.03831708803772926, "rewards/rejected": -1.8992897272109985, "step": 2090 }, { "epoch": 0.27, "learning_rate": 4.5180722891566265e-07, "logits/chosen": -2.9235877990722656, "logits/rejected": -2.714040756225586, "logps/chosen": -648.655517578125, "logps/rejected": -481.77886962890625, "loss": 0.8356, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.062394618988037, "rewards/margins": 0.16428807377815247, "rewards/rejected": -2.226682662963867, "step": 2100 }, { "epoch": 0.27, "learning_rate": 4.539586919104991e-07, "logits/chosen": -2.8489155769348145, "logits/rejected": -2.820423126220703, "logps/chosen": -572.5152587890625, "logps/rejected": -558.827880859375, "loss": 0.7798, "rewards/accuracies": 0.5625, "rewards/chosen": -2.139749765396118, "rewards/margins": 0.27008432149887085, "rewards/rejected": -2.409834146499634, "step": 2110 }, { "epoch": 0.27, "learning_rate": 4.561101549053356e-07, "logits/chosen": -2.7946486473083496, "logits/rejected": -2.6862101554870605, "logps/chosen": -568.9442138671875, "logps/rejected": -395.029296875, "loss": 0.8397, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -2.1196932792663574, "rewards/margins": -0.00775412330403924, "rewards/rejected": -2.111938953399658, "step": 2120 }, { "epoch": 0.27, "learning_rate": 4.582616179001721e-07, "logits/chosen": -2.946789503097534, "logits/rejected": -2.842837333679199, "logps/chosen": -604.6136474609375, "logps/rejected": -422.8882751464844, "loss": 0.847, "rewards/accuracies": 0.5625, "rewards/chosen": -1.8699918985366821, "rewards/margins": 0.12150251865386963, "rewards/rejected": -1.9914944171905518, "step": 2130 }, { "epoch": 0.28, "learning_rate": 4.6041308089500856e-07, "logits/chosen": -2.769011974334717, "logits/rejected": -2.6381101608276367, "logps/chosen": -586.3226318359375, "logps/rejected": -418.6080017089844, "loss": 0.9309, "rewards/accuracies": 0.5, "rewards/chosen": -2.231593370437622, "rewards/margins": -0.05373038724064827, "rewards/rejected": -2.177863121032715, "step": 2140 }, { "epoch": 0.28, "learning_rate": 4.6256454388984506e-07, "logits/chosen": -2.8355765342712402, "logits/rejected": -2.6896424293518066, "logps/chosen": -630.6998291015625, "logps/rejected": -449.71356201171875, "loss": 0.8953, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.375441074371338, "rewards/margins": 0.048886921256780624, "rewards/rejected": -2.424327850341797, "step": 2150 }, { "epoch": 0.28, "learning_rate": 4.6471600688468156e-07, "logits/chosen": -2.876163959503174, "logits/rejected": -2.7347521781921387, "logps/chosen": -580.9049072265625, "logps/rejected": -357.2464904785156, "loss": 0.7791, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -2.0741965770721436, "rewards/margins": 0.1343580186367035, "rewards/rejected": -2.208554744720459, "step": 2160 }, { "epoch": 0.28, "learning_rate": 4.66867469879518e-07, "logits/chosen": -2.7174484729766846, "logits/rejected": -2.7042646408081055, "logps/chosen": -481.4728088378906, "logps/rejected": -449.29815673828125, "loss": 0.8195, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.034794569015503, "rewards/margins": 0.13036833703517914, "rewards/rejected": -2.165163516998291, "step": 2170 }, { "epoch": 0.28, "learning_rate": 4.6901893287435457e-07, "logits/chosen": -2.968000888824463, "logits/rejected": -2.8140835762023926, "logps/chosen": -623.6590576171875, "logps/rejected": -423.74169921875, "loss": 0.722, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.0474791526794434, "rewards/margins": 0.3475549817085266, "rewards/rejected": -2.3950343132019043, "step": 2180 }, { "epoch": 0.28, "learning_rate": 4.7117039586919107e-07, "logits/chosen": -2.8312714099884033, "logits/rejected": -2.693993091583252, "logps/chosen": -605.5287475585938, "logps/rejected": -452.76055908203125, "loss": 0.89, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -1.9848915338516235, "rewards/margins": 0.020434845238924026, "rewards/rejected": -2.00532603263855, "step": 2190 }, { "epoch": 0.28, "learning_rate": 4.733218588640275e-07, "logits/chosen": -2.9014763832092285, "logits/rejected": -2.778722047805786, "logps/chosen": -616.271728515625, "logps/rejected": -427.01641845703125, "loss": 0.851, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -2.299665689468384, "rewards/margins": -0.027428757399320602, "rewards/rejected": -2.2722370624542236, "step": 2200 }, { "epoch": 0.29, "learning_rate": 4.75473321858864e-07, "logits/chosen": -3.0176608562469482, "logits/rejected": -2.8489811420440674, "logps/chosen": -522.884765625, "logps/rejected": -357.45391845703125, "loss": 0.7603, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.9400784969329834, "rewards/margins": 0.26796695590019226, "rewards/rejected": -2.208045244216919, "step": 2210 }, { "epoch": 0.29, "learning_rate": 4.776247848537005e-07, "logits/chosen": -2.921874523162842, "logits/rejected": -2.7731966972351074, "logps/chosen": -535.2808227539062, "logps/rejected": -369.0583801269531, "loss": 0.7948, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.937657356262207, "rewards/margins": 0.1300104558467865, "rewards/rejected": -2.0676677227020264, "step": 2220 }, { "epoch": 0.29, "learning_rate": 4.79776247848537e-07, "logits/chosen": -2.8315391540527344, "logits/rejected": -2.7679221630096436, "logps/chosen": -550.04150390625, "logps/rejected": -440.28466796875, "loss": 0.7939, "rewards/accuracies": 0.5625, "rewards/chosen": -2.041292667388916, "rewards/margins": 0.19342505931854248, "rewards/rejected": -2.234717845916748, "step": 2230 }, { "epoch": 0.29, "learning_rate": 4.819277108433735e-07, "logits/chosen": -2.7069544792175293, "logits/rejected": -2.5584564208984375, "logps/chosen": -506.95465087890625, "logps/rejected": -388.6950988769531, "loss": 0.9325, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -2.1797444820404053, "rewards/margins": -0.08831959962844849, "rewards/rejected": -2.0914249420166016, "step": 2240 }, { "epoch": 0.29, "learning_rate": 4.840791738382099e-07, "logits/chosen": -2.795880079269409, "logits/rejected": -2.786827802658081, "logps/chosen": -506.03363037109375, "logps/rejected": -393.88665771484375, "loss": 0.7697, "rewards/accuracies": 0.625, "rewards/chosen": -1.9651305675506592, "rewards/margins": 0.28149765729904175, "rewards/rejected": -2.2466280460357666, "step": 2250 }, { "epoch": 0.29, "learning_rate": 4.862306368330465e-07, "logits/chosen": -2.830744981765747, "logits/rejected": -2.6732897758483887, "logps/chosen": -526.8204345703125, "logps/rejected": -366.59014892578125, "loss": 0.743, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.8457939624786377, "rewards/margins": 0.28888386487960815, "rewards/rejected": -2.1346776485443115, "step": 2260 }, { "epoch": 0.29, "learning_rate": 4.883820998278829e-07, "logits/chosen": -2.9840941429138184, "logits/rejected": -2.832062244415283, "logps/chosen": -501.9346618652344, "logps/rejected": -448.49261474609375, "loss": 0.8323, "rewards/accuracies": 0.5, "rewards/chosen": -2.1008195877075195, "rewards/margins": -0.006246727891266346, "rewards/rejected": -2.0945727825164795, "step": 2270 }, { "epoch": 0.29, "learning_rate": 4.905335628227194e-07, "logits/chosen": -2.807037353515625, "logits/rejected": -2.6359143257141113, "logps/chosen": -537.9305419921875, "logps/rejected": -322.43206787109375, "loss": 0.8172, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.0297632217407227, "rewards/margins": 0.11355499178171158, "rewards/rejected": -2.1433181762695312, "step": 2280 }, { "epoch": 0.3, "learning_rate": 4.926850258175558e-07, "logits/chosen": -2.906654119491577, "logits/rejected": -2.8637027740478516, "logps/chosen": -538.2664794921875, "logps/rejected": -450.1026916503906, "loss": 0.6905, "rewards/accuracies": 0.5625, "rewards/chosen": -1.969580054283142, "rewards/margins": 0.3184029161930084, "rewards/rejected": -2.287982940673828, "step": 2290 }, { "epoch": 0.3, "learning_rate": 4.948364888123924e-07, "logits/chosen": -2.8138623237609863, "logits/rejected": -2.6844050884246826, "logps/chosen": -479.81317138671875, "logps/rejected": -377.5061950683594, "loss": 0.9355, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -2.2049105167388916, "rewards/margins": -0.19390681385993958, "rewards/rejected": -2.011003255844116, "step": 2300 }, { "epoch": 0.3, "learning_rate": 4.96987951807229e-07, "logits/chosen": -2.7215194702148438, "logits/rejected": -2.6725094318389893, "logps/chosen": -532.0401611328125, "logps/rejected": -432.17279052734375, "loss": 0.8779, "rewards/accuracies": 0.4375, "rewards/chosen": -1.9565441608428955, "rewards/margins": -0.03278857469558716, "rewards/rejected": -1.9237556457519531, "step": 2310 }, { "epoch": 0.3, "learning_rate": 4.991394148020654e-07, "logits/chosen": -2.914489269256592, "logits/rejected": -2.758741617202759, "logps/chosen": -574.322021484375, "logps/rejected": -386.8875427246094, "loss": 0.8527, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.9831886291503906, "rewards/margins": 0.14324414730072021, "rewards/rejected": -2.1264326572418213, "step": 2320 }, { "epoch": 0.3, "learning_rate": 4.998565554174237e-07, "logits/chosen": -2.835888147354126, "logits/rejected": -2.785256862640381, "logps/chosen": -515.9947509765625, "logps/rejected": -447.996337890625, "loss": 0.7895, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.040264129638672, "rewards/margins": 0.17393705248832703, "rewards/rejected": -2.2142012119293213, "step": 2330 }, { "epoch": 0.3, "learning_rate": 4.9961748111313e-07, "logits/chosen": -2.811607837677002, "logits/rejected": -2.684720993041992, "logps/chosen": -567.3128662109375, "logps/rejected": -419.92236328125, "loss": 0.8586, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.0275330543518066, "rewards/margins": 0.1493760198354721, "rewards/rejected": -2.1769089698791504, "step": 2340 }, { "epoch": 0.3, "learning_rate": 4.993784068088362e-07, "logits/chosen": -2.7469704151153564, "logits/rejected": -2.6607160568237305, "logps/chosen": -522.1895141601562, "logps/rejected": -419.0931701660156, "loss": 0.9377, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.1491105556488037, "rewards/margins": 0.07706727087497711, "rewards/rejected": -2.22617769241333, "step": 2350 }, { "epoch": 0.3, "learning_rate": 4.991393325045423e-07, "logits/chosen": -2.881044387817383, "logits/rejected": -2.800541400909424, "logps/chosen": -574.7361450195312, "logps/rejected": -512.8135375976562, "loss": 0.774, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.9044551849365234, "rewards/margins": 0.2562500238418579, "rewards/rejected": -2.160705089569092, "step": 2360 }, { "epoch": 0.31, "learning_rate": 4.989002582002486e-07, "logits/chosen": -2.823262929916382, "logits/rejected": -2.774517774581909, "logps/chosen": -565.5042114257812, "logps/rejected": -468.5177307128906, "loss": 0.7719, "rewards/accuracies": 0.5625, "rewards/chosen": -1.9088211059570312, "rewards/margins": 0.2686259150505066, "rewards/rejected": -2.1774470806121826, "step": 2370 }, { "epoch": 0.31, "learning_rate": 4.986611838959548e-07, "logits/chosen": -2.8734469413757324, "logits/rejected": -2.773289203643799, "logps/chosen": -577.1066284179688, "logps/rejected": -431.068115234375, "loss": 0.7964, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.1157658100128174, "rewards/margins": 0.26725512742996216, "rewards/rejected": -2.3830208778381348, "step": 2380 }, { "epoch": 0.31, "learning_rate": 4.984221095916611e-07, "logits/chosen": -2.7530970573425293, "logits/rejected": -2.7133212089538574, "logps/chosen": -540.5751953125, "logps/rejected": -405.7702941894531, "loss": 0.7651, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.9273430109024048, "rewards/margins": 0.3407163619995117, "rewards/rejected": -2.268059492111206, "step": 2390 }, { "epoch": 0.31, "learning_rate": 4.981830352873673e-07, "logits/chosen": -2.849417209625244, "logits/rejected": -2.739900827407837, "logps/chosen": -472.8578186035156, "logps/rejected": -332.7039489746094, "loss": 0.7285, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.7969036102294922, "rewards/margins": 0.2622024416923523, "rewards/rejected": -2.0591061115264893, "step": 2400 }, { "epoch": 0.31, "learning_rate": 4.979439609830735e-07, "logits/chosen": -2.7596452236175537, "logits/rejected": -2.6665871143341064, "logps/chosen": -572.219482421875, "logps/rejected": -433.916748046875, "loss": 0.8842, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -2.0447371006011963, "rewards/margins": -0.020780205726623535, "rewards/rejected": -2.023956775665283, "step": 2410 }, { "epoch": 0.31, "learning_rate": 4.977048866787798e-07, "logits/chosen": -2.7938766479492188, "logits/rejected": -2.5946459770202637, "logps/chosen": -592.4756469726562, "logps/rejected": -411.8023986816406, "loss": 0.6996, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.8439090251922607, "rewards/margins": 0.45222172141075134, "rewards/rejected": -2.296130657196045, "step": 2420 }, { "epoch": 0.31, "learning_rate": 4.974658123744859e-07, "logits/chosen": -2.8324100971221924, "logits/rejected": -2.7072129249572754, "logps/chosen": -536.714111328125, "logps/rejected": -381.92327880859375, "loss": 0.7462, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.074983596801758, "rewards/margins": 0.3289003372192383, "rewards/rejected": -2.403884172439575, "step": 2430 }, { "epoch": 0.32, "learning_rate": 4.972267380701922e-07, "logits/chosen": -2.740319013595581, "logits/rejected": -2.624183177947998, "logps/chosen": -672.5626220703125, "logps/rejected": -494.5342712402344, "loss": 0.6662, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.8548822402954102, "rewards/margins": 0.4773755669593811, "rewards/rejected": -2.3322577476501465, "step": 2440 }, { "epoch": 0.32, "learning_rate": 4.969876637658984e-07, "logits/chosen": -2.773541212081909, "logits/rejected": -2.74444317817688, "logps/chosen": -508.7833557128906, "logps/rejected": -398.09381103515625, "loss": 0.8052, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.8220367431640625, "rewards/margins": 0.15804436802864075, "rewards/rejected": -1.9800812005996704, "step": 2450 }, { "epoch": 0.32, "learning_rate": 4.967485894616046e-07, "logits/chosen": -2.8311057090759277, "logits/rejected": -2.7586803436279297, "logps/chosen": -503.29248046875, "logps/rejected": -389.7448425292969, "loss": 0.855, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.973077416419983, "rewards/margins": 0.0373322069644928, "rewards/rejected": -2.0104095935821533, "step": 2460 }, { "epoch": 0.32, "learning_rate": 4.965095151573109e-07, "logits/chosen": -2.819887638092041, "logits/rejected": -2.680096387863159, "logps/chosen": -578.0244140625, "logps/rejected": -444.6253967285156, "loss": 0.8748, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -2.103471040725708, "rewards/margins": 0.035336486995220184, "rewards/rejected": -2.138807773590088, "step": 2470 }, { "epoch": 0.32, "learning_rate": 4.962704408530171e-07, "logits/chosen": -2.817553758621216, "logits/rejected": -2.6796724796295166, "logps/chosen": -528.9798583984375, "logps/rejected": -425.93194580078125, "loss": 0.752, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.8595647811889648, "rewards/margins": 0.2909550070762634, "rewards/rejected": -2.150519847869873, "step": 2480 }, { "epoch": 0.32, "learning_rate": 4.960313665487233e-07, "logits/chosen": -2.6797142028808594, "logits/rejected": -2.591926097869873, "logps/chosen": -622.3917846679688, "logps/rejected": -466.8741149902344, "loss": 0.8013, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.253539562225342, "rewards/margins": 0.23709645867347717, "rewards/rejected": -2.490635871887207, "step": 2490 }, { "epoch": 0.32, "learning_rate": 4.957922922444295e-07, "logits/chosen": -2.780484676361084, "logits/rejected": -2.670138359069824, "logps/chosen": -543.6640625, "logps/rejected": -393.2830810546875, "loss": 0.8587, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -1.8787298202514648, "rewards/margins": 0.09098343551158905, "rewards/rejected": -1.9697129726409912, "step": 2500 }, { "epoch": 0.32, "learning_rate": 4.955532179401357e-07, "logits/chosen": -2.824950695037842, "logits/rejected": -2.6856064796447754, "logps/chosen": -512.1431274414062, "logps/rejected": -341.43463134765625, "loss": 0.8003, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -1.922302007675171, "rewards/margins": 0.16654515266418457, "rewards/rejected": -2.0888473987579346, "step": 2510 }, { "epoch": 0.33, "learning_rate": 4.95314143635842e-07, "logits/chosen": -2.7873103618621826, "logits/rejected": -2.5577282905578613, "logps/chosen": -556.5481567382812, "logps/rejected": -383.45941162109375, "loss": 0.8023, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.8575496673583984, "rewards/margins": 0.24868705868721008, "rewards/rejected": -2.106236696243286, "step": 2520 }, { "epoch": 0.33, "learning_rate": 4.950750693315482e-07, "logits/chosen": -2.850815534591675, "logits/rejected": -2.660727024078369, "logps/chosen": -603.760986328125, "logps/rejected": -400.2694396972656, "loss": 0.8299, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.9876623153686523, "rewards/margins": 0.08533529937267303, "rewards/rejected": -2.072997570037842, "step": 2530 }, { "epoch": 0.33, "learning_rate": 4.948359950272544e-07, "logits/chosen": -2.796008825302124, "logits/rejected": -2.7583513259887695, "logps/chosen": -618.0372314453125, "logps/rejected": -478.07537841796875, "loss": 0.869, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.145376205444336, "rewards/margins": 0.09527996927499771, "rewards/rejected": -2.2406563758850098, "step": 2540 }, { "epoch": 0.33, "learning_rate": 4.945969207229607e-07, "logits/chosen": -2.7771430015563965, "logits/rejected": -2.6592440605163574, "logps/chosen": -516.8414916992188, "logps/rejected": -405.2905578613281, "loss": 0.8478, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.071530342102051, "rewards/margins": -0.00506844837218523, "rewards/rejected": -2.0664618015289307, "step": 2550 }, { "epoch": 0.33, "learning_rate": 4.94357846418667e-07, "logits/chosen": -2.807464122772217, "logits/rejected": -2.702007293701172, "logps/chosen": -570.2498779296875, "logps/rejected": -401.76971435546875, "loss": 0.8366, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.290713310241699, "rewards/margins": 0.10876867920160294, "rewards/rejected": -2.399482011795044, "step": 2560 }, { "epoch": 0.33, "learning_rate": 4.941187721143731e-07, "logits/chosen": -2.926056385040283, "logits/rejected": -2.7315711975097656, "logps/chosen": -607.478271484375, "logps/rejected": -399.8065490722656, "loss": 0.7682, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.0597915649414062, "rewards/margins": 0.10546314716339111, "rewards/rejected": -2.165254592895508, "step": 2570 }, { "epoch": 0.33, "learning_rate": 4.938796978100794e-07, "logits/chosen": -2.8048503398895264, "logits/rejected": -2.7613272666931152, "logps/chosen": -524.5460205078125, "logps/rejected": -432.6692810058594, "loss": 0.7529, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.1234846115112305, "rewards/margins": 0.2614061236381531, "rewards/rejected": -2.3848910331726074, "step": 2580 }, { "epoch": 0.33, "learning_rate": 4.936406235057856e-07, "logits/chosen": -2.994157075881958, "logits/rejected": -2.7959580421447754, "logps/chosen": -572.154296875, "logps/rejected": -446.2792053222656, "loss": 0.8068, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.168882369995117, "rewards/margins": 0.1295901983976364, "rewards/rejected": -2.2984726428985596, "step": 2590 }, { "epoch": 0.34, "learning_rate": 4.934015492014918e-07, "logits/chosen": -2.8011257648468018, "logits/rejected": -2.725109577178955, "logps/chosen": -550.4921264648438, "logps/rejected": -468.13592529296875, "loss": 0.8389, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -2.0903992652893066, "rewards/margins": 0.1960996687412262, "rewards/rejected": -2.2864990234375, "step": 2600 }, { "epoch": 0.34, "learning_rate": 4.931624748971981e-07, "logits/chosen": -2.7981820106506348, "logits/rejected": -2.6693222522735596, "logps/chosen": -571.6451416015625, "logps/rejected": -443.2021484375, "loss": 0.7545, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.981628179550171, "rewards/margins": 0.4215835630893707, "rewards/rejected": -2.403212070465088, "step": 2610 }, { "epoch": 0.34, "learning_rate": 4.929234005929043e-07, "logits/chosen": -2.7760796546936035, "logits/rejected": -2.682614326477051, "logps/chosen": -540.0670166015625, "logps/rejected": -401.46112060546875, "loss": 0.7828, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.769898772239685, "rewards/margins": 0.2808716893196106, "rewards/rejected": -2.0507702827453613, "step": 2620 }, { "epoch": 0.34, "learning_rate": 4.926843262886104e-07, "logits/chosen": -2.6951746940612793, "logits/rejected": -2.632133722305298, "logps/chosen": -560.2874755859375, "logps/rejected": -435.25592041015625, "loss": 0.7451, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.9571740627288818, "rewards/margins": 0.2637379765510559, "rewards/rejected": -2.220912218093872, "step": 2630 }, { "epoch": 0.34, "learning_rate": 4.924452519843167e-07, "logits/chosen": -2.8257579803466797, "logits/rejected": -2.653413772583008, "logps/chosen": -527.3146362304688, "logps/rejected": -432.3429260253906, "loss": 0.7259, "rewards/accuracies": 0.625, "rewards/chosen": -1.806842565536499, "rewards/margins": 0.5315450429916382, "rewards/rejected": -2.3383874893188477, "step": 2640 }, { "epoch": 0.34, "learning_rate": 4.922061776800229e-07, "logits/chosen": -2.681851863861084, "logits/rejected": -2.5614876747131348, "logps/chosen": -547.1260986328125, "logps/rejected": -412.05322265625, "loss": 0.8448, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -1.9122753143310547, "rewards/margins": 0.10089880228042603, "rewards/rejected": -2.013174295425415, "step": 2650 }, { "epoch": 0.34, "learning_rate": 4.919671033757292e-07, "logits/chosen": -2.872462272644043, "logits/rejected": -2.777625322341919, "logps/chosen": -559.3712158203125, "logps/rejected": -435.20684814453125, "loss": 0.8042, "rewards/accuracies": 0.5625, "rewards/chosen": -2.030155658721924, "rewards/margins": 0.21518965065479279, "rewards/rejected": -2.2453453540802, "step": 2660 }, { "epoch": 0.34, "learning_rate": 4.917280290714354e-07, "logits/chosen": -2.8870348930358887, "logits/rejected": -2.7925612926483154, "logps/chosen": -415.56103515625, "logps/rejected": -323.95892333984375, "loss": 0.8668, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.9474235773086548, "rewards/margins": 0.1252090036869049, "rewards/rejected": -2.0726325511932373, "step": 2670 }, { "epoch": 0.35, "learning_rate": 4.914889547671416e-07, "logits/chosen": -2.6858718395233154, "logits/rejected": -2.5800650119781494, "logps/chosen": -478.20782470703125, "logps/rejected": -355.93487548828125, "loss": 0.8259, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.05359148979187, "rewards/margins": 0.13443593680858612, "rewards/rejected": -2.1880276203155518, "step": 2680 }, { "epoch": 0.35, "learning_rate": 4.912498804628479e-07, "logits/chosen": -2.7775845527648926, "logits/rejected": -2.6514220237731934, "logps/chosen": -487.857421875, "logps/rejected": -391.13385009765625, "loss": 0.8129, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.067124366760254, "rewards/margins": 0.18736276030540466, "rewards/rejected": -2.2544872760772705, "step": 2690 }, { "epoch": 0.35, "learning_rate": 4.91010806158554e-07, "logits/chosen": -2.835216999053955, "logits/rejected": -2.737123489379883, "logps/chosen": -506.6705627441406, "logps/rejected": -435.43255615234375, "loss": 0.8052, "rewards/accuracies": 0.5, "rewards/chosen": -2.164228677749634, "rewards/margins": 0.09972640872001648, "rewards/rejected": -2.2639548778533936, "step": 2700 }, { "epoch": 0.35, "learning_rate": 4.907717318542603e-07, "logits/chosen": -2.8129425048828125, "logits/rejected": -2.70133900642395, "logps/chosen": -651.8212890625, "logps/rejected": -463.5082092285156, "loss": 0.7998, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.020569324493408, "rewards/margins": 0.3348166048526764, "rewards/rejected": -2.355386257171631, "step": 2710 }, { "epoch": 0.35, "learning_rate": 4.905326575499665e-07, "logits/chosen": -2.712247371673584, "logits/rejected": -2.595644474029541, "logps/chosen": -542.924072265625, "logps/rejected": -436.4156188964844, "loss": 0.8134, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.1858654022216797, "rewards/margins": 0.16855645179748535, "rewards/rejected": -2.354421377182007, "step": 2720 }, { "epoch": 0.35, "learning_rate": 4.902935832456727e-07, "logits/chosen": -2.665574073791504, "logits/rejected": -2.7030956745147705, "logps/chosen": -472.8019104003906, "logps/rejected": -428.1283264160156, "loss": 0.6506, "rewards/accuracies": 0.625, "rewards/chosen": -1.8676960468292236, "rewards/margins": 0.49865245819091797, "rewards/rejected": -2.3663482666015625, "step": 2730 }, { "epoch": 0.35, "learning_rate": 4.90054508941379e-07, "logits/chosen": -2.824490785598755, "logits/rejected": -2.732994556427002, "logps/chosen": -619.6190795898438, "logps/rejected": -465.5160217285156, "loss": 0.7844, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.18182110786438, "rewards/margins": 0.19695289433002472, "rewards/rejected": -2.3787736892700195, "step": 2740 }, { "epoch": 0.36, "learning_rate": 4.898154346370852e-07, "logits/chosen": -2.85568904876709, "logits/rejected": -2.7158732414245605, "logps/chosen": -506.20623779296875, "logps/rejected": -396.60723876953125, "loss": 0.703, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.9618847370147705, "rewards/margins": 0.3581981062889099, "rewards/rejected": -2.320082902908325, "step": 2750 }, { "epoch": 0.36, "learning_rate": 4.895763603327915e-07, "logits/chosen": -2.7891225814819336, "logits/rejected": -2.6228432655334473, "logps/chosen": -545.3680419921875, "logps/rejected": -383.2461853027344, "loss": 0.748, "rewards/accuracies": 0.5625, "rewards/chosen": -2.05367112159729, "rewards/margins": 0.2967754006385803, "rewards/rejected": -2.3504459857940674, "step": 2760 }, { "epoch": 0.36, "learning_rate": 4.893372860284976e-07, "logits/chosen": -2.6997873783111572, "logits/rejected": -2.6738839149475098, "logps/chosen": -651.97021484375, "logps/rejected": -495.9085998535156, "loss": 0.8504, "rewards/accuracies": 0.5625, "rewards/chosen": -2.219648599624634, "rewards/margins": 0.2922101616859436, "rewards/rejected": -2.5118587017059326, "step": 2770 }, { "epoch": 0.36, "learning_rate": 4.890982117242038e-07, "logits/chosen": -2.7507214546203613, "logits/rejected": -2.7924389839172363, "logps/chosen": -472.013916015625, "logps/rejected": -422.439208984375, "loss": 0.8108, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.032975673675537, "rewards/margins": 0.2863037586212158, "rewards/rejected": -2.319279432296753, "step": 2780 }, { "epoch": 0.36, "learning_rate": 4.888591374199101e-07, "logits/chosen": -2.868905544281006, "logits/rejected": -2.7063729763031006, "logps/chosen": -537.2835693359375, "logps/rejected": -375.0529479980469, "loss": 0.719, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.9265568256378174, "rewards/margins": 0.3856390118598938, "rewards/rejected": -2.3121957778930664, "step": 2790 }, { "epoch": 0.36, "learning_rate": 4.886200631156163e-07, "logits/chosen": -2.858168125152588, "logits/rejected": -2.772005319595337, "logps/chosen": -548.1929321289062, "logps/rejected": -422.44580078125, "loss": 0.6905, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.7946834564208984, "rewards/margins": 0.42759236693382263, "rewards/rejected": -2.222275972366333, "step": 2800 }, { "epoch": 0.36, "learning_rate": 4.883809888113226e-07, "logits/chosen": -2.8050026893615723, "logits/rejected": -2.5968687534332275, "logps/chosen": -582.8165283203125, "logps/rejected": -378.930419921875, "loss": 0.8427, "rewards/accuracies": 0.5625, "rewards/chosen": -2.095353603363037, "rewards/margins": 0.17291346192359924, "rewards/rejected": -2.2682666778564453, "step": 2810 }, { "epoch": 0.36, "learning_rate": 4.881419145070288e-07, "logits/chosen": -2.7534139156341553, "logits/rejected": -2.5887417793273926, "logps/chosen": -611.856201171875, "logps/rejected": -442.8833923339844, "loss": 0.7884, "rewards/accuracies": 0.5625, "rewards/chosen": -1.9510936737060547, "rewards/margins": 0.28013744950294495, "rewards/rejected": -2.2312309741973877, "step": 2820 }, { "epoch": 0.37, "learning_rate": 4.87902840202735e-07, "logits/chosen": -2.775545835494995, "logits/rejected": -2.6656289100646973, "logps/chosen": -514.7666015625, "logps/rejected": -427.2875061035156, "loss": 0.7608, "rewards/accuracies": 0.5625, "rewards/chosen": -2.0783638954162598, "rewards/margins": 0.226555734872818, "rewards/rejected": -2.304919481277466, "step": 2830 }, { "epoch": 0.37, "learning_rate": 4.876637658984412e-07, "logits/chosen": -2.799164295196533, "logits/rejected": -2.7305500507354736, "logps/chosen": -536.5535888671875, "logps/rejected": -459.87451171875, "loss": 0.835, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -2.2144229412078857, "rewards/margins": 0.19424942135810852, "rewards/rejected": -2.4086718559265137, "step": 2840 }, { "epoch": 0.37, "learning_rate": 4.874246915941474e-07, "logits/chosen": -2.8002047538757324, "logits/rejected": -2.622699022293091, "logps/chosen": -600.7700805664062, "logps/rejected": -452.8916015625, "loss": 0.98, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -2.3135323524475098, "rewards/margins": -0.040419984608888626, "rewards/rejected": -2.2731125354766846, "step": 2850 }, { "epoch": 0.37, "learning_rate": 4.871856172898537e-07, "logits/chosen": -2.732327699661255, "logits/rejected": -2.619849443435669, "logps/chosen": -503.76812744140625, "logps/rejected": -356.35699462890625, "loss": 0.6622, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.9031383991241455, "rewards/margins": 0.4827834963798523, "rewards/rejected": -2.3859219551086426, "step": 2860 }, { "epoch": 0.37, "learning_rate": 4.869465429855599e-07, "logits/chosen": -2.726788282394409, "logits/rejected": -2.6187350749969482, "logps/chosen": -522.1117553710938, "logps/rejected": -410.56182861328125, "loss": 0.9616, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -2.2282214164733887, "rewards/margins": -0.0752413421869278, "rewards/rejected": -2.152980327606201, "step": 2870 }, { "epoch": 0.37, "learning_rate": 4.867074686812661e-07, "logits/chosen": -2.825840950012207, "logits/rejected": -2.751084804534912, "logps/chosen": -600.2213134765625, "logps/rejected": -502.71588134765625, "loss": 0.7077, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.19783353805542, "rewards/margins": 0.45002421736717224, "rewards/rejected": -2.647857666015625, "step": 2880 }, { "epoch": 0.37, "learning_rate": 4.864683943769724e-07, "logits/chosen": -2.737039089202881, "logits/rejected": -2.603180408477783, "logps/chosen": -546.8961181640625, "logps/rejected": -417.132568359375, "loss": 0.7208, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.8564770221710205, "rewards/margins": 0.4153919219970703, "rewards/rejected": -2.271868944168091, "step": 2890 }, { "epoch": 0.37, "learning_rate": 4.862293200726786e-07, "logits/chosen": -2.679462432861328, "logits/rejected": -2.6757259368896484, "logps/chosen": -448.8861389160156, "logps/rejected": -421.935302734375, "loss": 0.7651, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.9151523113250732, "rewards/margins": 0.44336336851119995, "rewards/rejected": -2.358515739440918, "step": 2900 }, { "epoch": 0.38, "learning_rate": 4.859902457683847e-07, "logits/chosen": -2.791712999343872, "logits/rejected": -2.566567897796631, "logps/chosen": -606.23193359375, "logps/rejected": -426.50189208984375, "loss": 0.8618, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.306436061859131, "rewards/margins": 0.10214591026306152, "rewards/rejected": -2.4085817337036133, "step": 2910 }, { "epoch": 0.38, "learning_rate": 4.85751171464091e-07, "logits/chosen": -2.7852790355682373, "logits/rejected": -2.5986099243164062, "logps/chosen": -673.240478515625, "logps/rejected": -498.196044921875, "loss": 0.6215, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0669894218444824, "rewards/margins": 0.6604068875312805, "rewards/rejected": -2.727396011352539, "step": 2920 }, { "epoch": 0.38, "learning_rate": 4.855120971597972e-07, "logits/chosen": -2.697110176086426, "logits/rejected": -2.5939905643463135, "logps/chosen": -592.4246215820312, "logps/rejected": -447.48876953125, "loss": 0.7615, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.280095100402832, "rewards/margins": 0.36650410294532776, "rewards/rejected": -2.646599531173706, "step": 2930 }, { "epoch": 0.38, "learning_rate": 4.852730228555035e-07, "logits/chosen": -2.7566514015197754, "logits/rejected": -2.695261001586914, "logps/chosen": -509.24359130859375, "logps/rejected": -475.9761657714844, "loss": 0.8635, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.1045339107513428, "rewards/margins": 0.10693587362766266, "rewards/rejected": -2.2114696502685547, "step": 2940 }, { "epoch": 0.38, "learning_rate": 4.850339485512097e-07, "logits/chosen": -2.812796115875244, "logits/rejected": -2.6763455867767334, "logps/chosen": -502.3902282714844, "logps/rejected": -380.5372314453125, "loss": 0.8081, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -2.032017946243286, "rewards/margins": 0.14416567981243134, "rewards/rejected": -2.1761839389801025, "step": 2950 }, { "epoch": 0.38, "learning_rate": 4.847948742469159e-07, "logits/chosen": -2.8402814865112305, "logits/rejected": -2.7296533584594727, "logps/chosen": -642.1256103515625, "logps/rejected": -484.5752868652344, "loss": 0.6904, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.138479232788086, "rewards/margins": 0.5107513666152954, "rewards/rejected": -2.6492302417755127, "step": 2960 }, { "epoch": 0.38, "learning_rate": 4.845557999426222e-07, "logits/chosen": -2.7914347648620605, "logits/rejected": -2.7076497077941895, "logps/chosen": -532.999755859375, "logps/rejected": -500.0226135253906, "loss": 0.7944, "rewards/accuracies": 0.5625, "rewards/chosen": -2.2355923652648926, "rewards/margins": 0.24997882544994354, "rewards/rejected": -2.4855713844299316, "step": 2970 }, { "epoch": 0.38, "learning_rate": 4.843167256383283e-07, "logits/chosen": -2.8852286338806152, "logits/rejected": -2.7256205081939697, "logps/chosen": -512.3923950195312, "logps/rejected": -403.9988708496094, "loss": 0.725, "rewards/accuracies": 0.5625, "rewards/chosen": -2.11303973197937, "rewards/margins": 0.38554632663726807, "rewards/rejected": -2.4985861778259277, "step": 2980 }, { "epoch": 0.39, "learning_rate": 4.840776513340346e-07, "logits/chosen": -2.7532520294189453, "logits/rejected": -2.654836416244507, "logps/chosen": -585.3475341796875, "logps/rejected": -464.982666015625, "loss": 0.7117, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.1471757888793945, "rewards/margins": 0.4290723204612732, "rewards/rejected": -2.5762479305267334, "step": 2990 }, { "epoch": 0.39, "learning_rate": 4.838385770297408e-07, "logits/chosen": -2.67000412940979, "logits/rejected": -2.5130884647369385, "logps/chosen": -480.02069091796875, "logps/rejected": -364.9166564941406, "loss": 0.8198, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -2.0692412853240967, "rewards/margins": 0.24552872776985168, "rewards/rejected": -2.314769983291626, "step": 3000 }, { "epoch": 0.39, "eval_logits/chosen": -3.059387683868408, "eval_logits/rejected": -2.989030361175537, "eval_logps/chosen": -539.9495239257812, "eval_logps/rejected": -416.48199462890625, "eval_loss": 0.6522261500358582, "eval_rewards/accuracies": 0.6365000009536743, "eval_rewards/chosen": -0.812973141670227, "eval_rewards/margins": 0.7423619031906128, "eval_rewards/rejected": -1.5553350448608398, "eval_runtime": 278.4884, "eval_samples_per_second": 7.182, "eval_steps_per_second": 3.591, "step": 3000 }, { "epoch": 0.39, "learning_rate": 4.83599502725447e-07, "logits/chosen": -2.829756259918213, "logits/rejected": -2.737196683883667, "logps/chosen": -579.0891723632812, "logps/rejected": -479.61932373046875, "loss": 0.7379, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.1118922233581543, "rewards/margins": 0.42534583806991577, "rewards/rejected": -2.537238359451294, "step": 3010 }, { "epoch": 0.39, "learning_rate": 4.833604284211533e-07, "logits/chosen": -2.8195526599884033, "logits/rejected": -2.7391529083251953, "logps/chosen": -552.901123046875, "logps/rejected": -440.85015869140625, "loss": 0.8183, "rewards/accuracies": 0.5625, "rewards/chosen": -2.140753984451294, "rewards/margins": 0.2461775243282318, "rewards/rejected": -2.3869316577911377, "step": 3020 }, { "epoch": 0.39, "learning_rate": 4.831213541168595e-07, "logits/chosen": -2.5550625324249268, "logits/rejected": -2.50566029548645, "logps/chosen": -555.8888549804688, "logps/rejected": -408.9020690917969, "loss": 0.7595, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.28201961517334, "rewards/margins": 0.33049583435058594, "rewards/rejected": -2.612515687942505, "step": 3030 }, { "epoch": 0.39, "learning_rate": 4.828822798125657e-07, "logits/chosen": -2.831808567047119, "logits/rejected": -2.7125041484832764, "logps/chosen": -546.6590576171875, "logps/rejected": -430.83331298828125, "loss": 0.8419, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.2797110080718994, "rewards/margins": 0.2732388973236084, "rewards/rejected": -2.552949905395508, "step": 3040 }, { "epoch": 0.39, "learning_rate": 4.826432055082719e-07, "logits/chosen": -2.8821866512298584, "logits/rejected": -2.7920615673065186, "logps/chosen": -527.3948364257812, "logps/rejected": -440.94903564453125, "loss": 0.7283, "rewards/accuracies": 0.5625, "rewards/chosen": -2.0505270957946777, "rewards/margins": 0.3367258906364441, "rewards/rejected": -2.3872528076171875, "step": 3050 }, { "epoch": 0.4, "learning_rate": 4.824041312039781e-07, "logits/chosen": -2.857111692428589, "logits/rejected": -2.6552622318267822, "logps/chosen": -551.9385986328125, "logps/rejected": -373.56964111328125, "loss": 1.0863, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.325578212738037, "rewards/margins": -0.19287124276161194, "rewards/rejected": -2.132707118988037, "step": 3060 }, { "epoch": 0.4, "learning_rate": 4.821650568996844e-07, "logits/chosen": -2.8273565769195557, "logits/rejected": -2.7035810947418213, "logps/chosen": -463.0125427246094, "logps/rejected": -360.55731201171875, "loss": 0.6271, "rewards/accuracies": 0.625, "rewards/chosen": -1.8159844875335693, "rewards/margins": 0.5569252371788025, "rewards/rejected": -2.3729097843170166, "step": 3070 }, { "epoch": 0.4, "learning_rate": 4.819259825953906e-07, "logits/chosen": -2.920973062515259, "logits/rejected": -2.7385663986206055, "logps/chosen": -584.9503784179688, "logps/rejected": -423.487060546875, "loss": 0.6179, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.9195674657821655, "rewards/margins": 0.7058788537979126, "rewards/rejected": -2.6254465579986572, "step": 3080 }, { "epoch": 0.4, "learning_rate": 4.816869082910969e-07, "logits/chosen": -2.7110838890075684, "logits/rejected": -2.6282958984375, "logps/chosen": -538.6134643554688, "logps/rejected": -424.6338806152344, "loss": 0.8432, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -2.13964581489563, "rewards/margins": 0.20019295811653137, "rewards/rejected": -2.3398385047912598, "step": 3090 }, { "epoch": 0.4, "learning_rate": 4.814478339868031e-07, "logits/chosen": -2.749368667602539, "logits/rejected": -2.626753091812134, "logps/chosen": -612.5210571289062, "logps/rejected": -453.98565673828125, "loss": 0.8714, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.4095232486724854, "rewards/margins": 0.16736245155334473, "rewards/rejected": -2.57688570022583, "step": 3100 }, { "epoch": 0.4, "learning_rate": 4.812087596825092e-07, "logits/chosen": -2.7729251384735107, "logits/rejected": -2.670654296875, "logps/chosen": -537.7632446289062, "logps/rejected": -449.70111083984375, "loss": 0.7742, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.238839626312256, "rewards/margins": 0.31172847747802734, "rewards/rejected": -2.550568103790283, "step": 3110 }, { "epoch": 0.4, "learning_rate": 4.809696853782155e-07, "logits/chosen": -2.8437442779541016, "logits/rejected": -2.619767427444458, "logps/chosen": -525.1566772460938, "logps/rejected": -390.8587646484375, "loss": 0.7033, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.923958420753479, "rewards/margins": 0.3466717600822449, "rewards/rejected": -2.270630359649658, "step": 3120 }, { "epoch": 0.4, "learning_rate": 4.807306110739217e-07, "logits/chosen": -2.789945363998413, "logits/rejected": -2.622040271759033, "logps/chosen": -587.6214599609375, "logps/rejected": -403.03619384765625, "loss": 0.9296, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.1782054901123047, "rewards/margins": 0.07255192846059799, "rewards/rejected": -2.2507576942443848, "step": 3130 }, { "epoch": 0.41, "learning_rate": 4.80491536769628e-07, "logits/chosen": -2.714181661605835, "logits/rejected": -2.583012580871582, "logps/chosen": -563.2373046875, "logps/rejected": -422.0274353027344, "loss": 0.8057, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.0355100631713867, "rewards/margins": 0.3088306784629822, "rewards/rejected": -2.3443405628204346, "step": 3140 }, { "epoch": 0.41, "learning_rate": 4.802524624653342e-07, "logits/chosen": -2.7542080879211426, "logits/rejected": -2.6829440593719482, "logps/chosen": -565.644287109375, "logps/rejected": -437.26141357421875, "loss": 0.7604, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.9714053869247437, "rewards/margins": 0.3292379677295685, "rewards/rejected": -2.3006432056427, "step": 3150 }, { "epoch": 0.41, "learning_rate": 4.800133881610405e-07, "logits/chosen": -2.867932081222534, "logits/rejected": -2.7595183849334717, "logps/chosen": -491.3173828125, "logps/rejected": -389.95343017578125, "loss": 0.8488, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -2.0221166610717773, "rewards/margins": 0.1686307191848755, "rewards/rejected": -2.1907474994659424, "step": 3160 }, { "epoch": 0.41, "learning_rate": 4.797743138567467e-07, "logits/chosen": -2.7735683917999268, "logits/rejected": -2.718082904815674, "logps/chosen": -477.60565185546875, "logps/rejected": -396.0860595703125, "loss": 0.6703, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.9657294750213623, "rewards/margins": 0.42019543051719666, "rewards/rejected": -2.385925054550171, "step": 3170 }, { "epoch": 0.41, "learning_rate": 4.795352395524529e-07, "logits/chosen": -2.802736282348633, "logits/rejected": -2.652377128601074, "logps/chosen": -595.60546875, "logps/rejected": -409.0793151855469, "loss": 0.6437, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.141831874847412, "rewards/margins": 0.5375227928161621, "rewards/rejected": -2.679354429244995, "step": 3180 }, { "epoch": 0.41, "learning_rate": 4.792961652481591e-07, "logits/chosen": -2.7185847759246826, "logits/rejected": -2.5601160526275635, "logps/chosen": -643.6676635742188, "logps/rejected": -504.5415954589844, "loss": 0.8328, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.161146402359009, "rewards/margins": 0.22949862480163574, "rewards/rejected": -2.3906445503234863, "step": 3190 }, { "epoch": 0.41, "learning_rate": 4.790570909438653e-07, "logits/chosen": -2.743508815765381, "logits/rejected": -2.595231056213379, "logps/chosen": -607.4264526367188, "logps/rejected": -414.31182861328125, "loss": 0.9001, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -2.1838314533233643, "rewards/margins": 0.04494140297174454, "rewards/rejected": -2.2287728786468506, "step": 3200 }, { "epoch": 0.41, "learning_rate": 4.788180166395716e-07, "logits/chosen": -2.75516676902771, "logits/rejected": -2.6759743690490723, "logps/chosen": -504.50457763671875, "logps/rejected": -423.5565490722656, "loss": 0.8317, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1603360176086426, "rewards/margins": 0.3424733281135559, "rewards/rejected": -2.5028092861175537, "step": 3210 }, { "epoch": 0.42, "learning_rate": 4.785789423352778e-07, "logits/chosen": -2.8154194355010986, "logits/rejected": -2.767573833465576, "logps/chosen": -503.8462829589844, "logps/rejected": -443.1922912597656, "loss": 0.7633, "rewards/accuracies": 0.625, "rewards/chosen": -2.062992572784424, "rewards/margins": 0.3740801215171814, "rewards/rejected": -2.437072515487671, "step": 3220 }, { "epoch": 0.42, "learning_rate": 4.783398680309841e-07, "logits/chosen": -2.8409180641174316, "logits/rejected": -2.7195210456848145, "logps/chosen": -578.6688232421875, "logps/rejected": -402.527587890625, "loss": 0.7092, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.3317856788635254, "rewards/margins": 0.4103906750679016, "rewards/rejected": -2.7421765327453613, "step": 3230 }, { "epoch": 0.42, "learning_rate": 4.781007937266903e-07, "logits/chosen": -2.667501926422119, "logits/rejected": -2.5922751426696777, "logps/chosen": -524.1946411132812, "logps/rejected": -418.29998779296875, "loss": 0.9106, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -2.2861945629119873, "rewards/margins": 0.04903187230229378, "rewards/rejected": -2.33522629737854, "step": 3240 }, { "epoch": 0.42, "learning_rate": 4.778617194223964e-07, "logits/chosen": -2.703965425491333, "logits/rejected": -2.6872448921203613, "logps/chosen": -617.6965942382812, "logps/rejected": -514.9215087890625, "loss": 0.6864, "rewards/accuracies": 0.6875, "rewards/chosen": -1.905982255935669, "rewards/margins": 0.5739933252334595, "rewards/rejected": -2.4799752235412598, "step": 3250 }, { "epoch": 0.42, "learning_rate": 4.776226451181027e-07, "logits/chosen": -2.767775058746338, "logits/rejected": -2.619624376296997, "logps/chosen": -596.1765747070312, "logps/rejected": -443.5970153808594, "loss": 0.8113, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -2.191617250442505, "rewards/margins": 0.19275793433189392, "rewards/rejected": -2.3843750953674316, "step": 3260 }, { "epoch": 0.42, "learning_rate": 4.773835708138089e-07, "logits/chosen": -2.8446855545043945, "logits/rejected": -2.7906861305236816, "logps/chosen": -557.2652587890625, "logps/rejected": -431.84344482421875, "loss": 0.7979, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.227850914001465, "rewards/margins": 0.1870911866426468, "rewards/rejected": -2.4149422645568848, "step": 3270 }, { "epoch": 0.42, "learning_rate": 4.771444965095152e-07, "logits/chosen": -2.766535997390747, "logits/rejected": -2.7483139038085938, "logps/chosen": -508.4488830566406, "logps/rejected": -431.70947265625, "loss": 0.6887, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.308040142059326, "rewards/margins": 0.40275755524635315, "rewards/rejected": -2.7107977867126465, "step": 3280 }, { "epoch": 0.42, "learning_rate": 4.769054222052214e-07, "logits/chosen": -2.8370213508605957, "logits/rejected": -2.677006959915161, "logps/chosen": -607.3972778320312, "logps/rejected": -455.49505615234375, "loss": 0.7913, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.2294437885284424, "rewards/margins": 0.4134894013404846, "rewards/rejected": -2.6429331302642822, "step": 3290 }, { "epoch": 0.43, "learning_rate": 4.7666634790092757e-07, "logits/chosen": -2.880847454071045, "logits/rejected": -2.6843008995056152, "logps/chosen": -649.33251953125, "logps/rejected": -419.30743408203125, "loss": 0.8275, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.4189555644989014, "rewards/margins": 0.2000318318605423, "rewards/rejected": -2.618987560272217, "step": 3300 }, { "epoch": 0.43, "learning_rate": 4.764272735966338e-07, "logits/chosen": -2.7527997493743896, "logits/rejected": -2.6799416542053223, "logps/chosen": -639.4215087890625, "logps/rejected": -529.8319702148438, "loss": 0.7599, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -2.2496955394744873, "rewards/margins": 0.42364567518234253, "rewards/rejected": -2.6733412742614746, "step": 3310 }, { "epoch": 0.43, "learning_rate": 4.7618819929234004e-07, "logits/chosen": -2.713793992996216, "logits/rejected": -2.6209774017333984, "logps/chosen": -554.2831420898438, "logps/rejected": -415.7435607910156, "loss": 0.7342, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1200554370880127, "rewards/margins": 0.45271024107933044, "rewards/rejected": -2.572765588760376, "step": 3320 }, { "epoch": 0.43, "learning_rate": 4.759491249880462e-07, "logits/chosen": -2.772984743118286, "logits/rejected": -2.5798306465148926, "logps/chosen": -542.0056762695312, "logps/rejected": -368.70538330078125, "loss": 0.7273, "rewards/accuracies": 0.625, "rewards/chosen": -2.0621256828308105, "rewards/margins": 0.3588714599609375, "rewards/rejected": -2.420997142791748, "step": 3330 }, { "epoch": 0.43, "learning_rate": 4.7571005068375246e-07, "logits/chosen": -2.7961249351501465, "logits/rejected": -2.6456944942474365, "logps/chosen": -615.7622680664062, "logps/rejected": -454.16143798828125, "loss": 0.7782, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.3534188270568848, "rewards/margins": 0.5551124215126038, "rewards/rejected": -2.908531427383423, "step": 3340 }, { "epoch": 0.43, "learning_rate": 4.754709763794587e-07, "logits/chosen": -2.8730826377868652, "logits/rejected": -2.7972488403320312, "logps/chosen": -511.72967529296875, "logps/rejected": -434.6209411621094, "loss": 0.7967, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.2565650939941406, "rewards/margins": 0.21254083514213562, "rewards/rejected": -2.4691061973571777, "step": 3350 }, { "epoch": 0.43, "learning_rate": 4.7523190207516493e-07, "logits/chosen": -2.8212058544158936, "logits/rejected": -2.728835105895996, "logps/chosen": -504.23974609375, "logps/rejected": -408.368408203125, "loss": 0.7465, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.220158338546753, "rewards/margins": 0.3300139307975769, "rewards/rejected": -2.5501723289489746, "step": 3360 }, { "epoch": 0.44, "learning_rate": 4.7499282777087116e-07, "logits/chosen": -2.8258891105651855, "logits/rejected": -2.704133987426758, "logps/chosen": -529.7109985351562, "logps/rejected": -379.32647705078125, "loss": 0.6583, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.060457706451416, "rewards/margins": 0.6395069360733032, "rewards/rejected": -2.699964761734009, "step": 3370 }, { "epoch": 0.44, "learning_rate": 4.7475375346657735e-07, "logits/chosen": -2.716897964477539, "logits/rejected": -2.611382246017456, "logps/chosen": -529.1348876953125, "logps/rejected": -403.0037536621094, "loss": 0.7723, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.000471353530884, "rewards/margins": 0.3191344141960144, "rewards/rejected": -2.319605827331543, "step": 3380 }, { "epoch": 0.44, "learning_rate": 4.745146791622836e-07, "logits/chosen": -2.6551501750946045, "logits/rejected": -2.6351780891418457, "logps/chosen": -501.81976318359375, "logps/rejected": -426.1039123535156, "loss": 0.6721, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9174938201904297, "rewards/margins": 0.5629794597625732, "rewards/rejected": -2.480473279953003, "step": 3390 }, { "epoch": 0.44, "learning_rate": 4.742756048579898e-07, "logits/chosen": -2.8401482105255127, "logits/rejected": -2.5484986305236816, "logps/chosen": -575.43359375, "logps/rejected": -349.25018310546875, "loss": 0.6351, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0643410682678223, "rewards/margins": 0.5883464217185974, "rewards/rejected": -2.6526875495910645, "step": 3400 }, { "epoch": 0.44, "learning_rate": 4.7403653055369605e-07, "logits/chosen": -2.6589298248291016, "logits/rejected": -2.5375618934631348, "logps/chosen": -525.0596313476562, "logps/rejected": -362.850830078125, "loss": 0.808, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1767489910125732, "rewards/margins": 0.343155175447464, "rewards/rejected": -2.5199038982391357, "step": 3410 }, { "epoch": 0.44, "learning_rate": 4.737974562494023e-07, "logits/chosen": -2.7257027626037598, "logits/rejected": -2.6717324256896973, "logps/chosen": -499.09283447265625, "logps/rejected": -449.640869140625, "loss": 0.6553, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2877256870269775, "rewards/margins": 0.6043558120727539, "rewards/rejected": -2.8920814990997314, "step": 3420 }, { "epoch": 0.44, "learning_rate": 4.735583819451085e-07, "logits/chosen": -2.7200000286102295, "logits/rejected": -2.6528306007385254, "logps/chosen": -550.0437622070312, "logps/rejected": -455.5809020996094, "loss": 0.6014, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0504612922668457, "rewards/margins": 0.669892430305481, "rewards/rejected": -2.720353603363037, "step": 3430 }, { "epoch": 0.44, "learning_rate": 4.733193076408147e-07, "logits/chosen": -2.7639553546905518, "logits/rejected": -2.6276161670684814, "logps/chosen": -500.4852600097656, "logps/rejected": -378.915771484375, "loss": 0.8146, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -2.3640353679656982, "rewards/margins": 0.2523082494735718, "rewards/rejected": -2.6163439750671387, "step": 3440 }, { "epoch": 0.45, "learning_rate": 4.73080233336521e-07, "logits/chosen": -2.843798875808716, "logits/rejected": -2.6951980590820312, "logps/chosen": -534.0528564453125, "logps/rejected": -391.10028076171875, "loss": 0.6525, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1260335445404053, "rewards/margins": 0.5272551774978638, "rewards/rejected": -2.6532886028289795, "step": 3450 }, { "epoch": 0.45, "learning_rate": 4.7284115903222723e-07, "logits/chosen": -2.6662633419036865, "logits/rejected": -2.526205539703369, "logps/chosen": -556.1610107421875, "logps/rejected": -432.79193115234375, "loss": 0.7039, "rewards/accuracies": 0.5625, "rewards/chosen": -2.4458017349243164, "rewards/margins": 0.453535258769989, "rewards/rejected": -2.8993372917175293, "step": 3460 }, { "epoch": 0.45, "learning_rate": 4.7260208472793347e-07, "logits/chosen": -2.8714003562927246, "logits/rejected": -2.68316388130188, "logps/chosen": -612.2136840820312, "logps/rejected": -451.249755859375, "loss": 0.7842, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.40043568611145, "rewards/margins": 0.3106958270072937, "rewards/rejected": -2.7111315727233887, "step": 3470 }, { "epoch": 0.45, "learning_rate": 4.7236301042363965e-07, "logits/chosen": -2.818276882171631, "logits/rejected": -2.6933836936950684, "logps/chosen": -605.9519653320312, "logps/rejected": -446.3802795410156, "loss": 0.6955, "rewards/accuracies": 0.625, "rewards/chosen": -1.8703542947769165, "rewards/margins": 0.3543032109737396, "rewards/rejected": -2.2246575355529785, "step": 3480 }, { "epoch": 0.45, "learning_rate": 4.721239361193459e-07, "logits/chosen": -2.881615400314331, "logits/rejected": -2.723027229309082, "logps/chosen": -645.2496948242188, "logps/rejected": -499.7584533691406, "loss": 0.8423, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -2.2417397499084473, "rewards/margins": 0.35685935616493225, "rewards/rejected": -2.598599433898926, "step": 3490 }, { "epoch": 0.45, "learning_rate": 4.718848618150521e-07, "logits/chosen": -2.780104160308838, "logits/rejected": -2.7517831325531006, "logps/chosen": -538.9532470703125, "logps/rejected": -461.6817321777344, "loss": 0.6739, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.9690272808074951, "rewards/margins": 0.6265552639961243, "rewards/rejected": -2.5955824851989746, "step": 3500 }, { "epoch": 0.45, "learning_rate": 4.7164578751075836e-07, "logits/chosen": -2.7970213890075684, "logits/rejected": -2.6452202796936035, "logps/chosen": -532.872802734375, "logps/rejected": -388.1499328613281, "loss": 0.7362, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.8852684497833252, "rewards/margins": 0.4225034713745117, "rewards/rejected": -2.3077714443206787, "step": 3510 }, { "epoch": 0.45, "learning_rate": 4.7140671320646454e-07, "logits/chosen": -2.77333664894104, "logits/rejected": -2.691072463989258, "logps/chosen": -511.95257568359375, "logps/rejected": -399.1761779785156, "loss": 0.6495, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.9533090591430664, "rewards/margins": 0.5256878137588501, "rewards/rejected": -2.478996753692627, "step": 3520 }, { "epoch": 0.46, "learning_rate": 4.711676389021708e-07, "logits/chosen": -2.768772602081299, "logits/rejected": -2.6563949584960938, "logps/chosen": -516.580078125, "logps/rejected": -375.48590087890625, "loss": 0.7219, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.168954372406006, "rewards/margins": 0.3396991789340973, "rewards/rejected": -2.508653402328491, "step": 3530 }, { "epoch": 0.46, "learning_rate": 4.70928564597877e-07, "logits/chosen": -2.7179131507873535, "logits/rejected": -2.6170847415924072, "logps/chosen": -514.5012817382812, "logps/rejected": -404.4955749511719, "loss": 0.7787, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.0088396072387695, "rewards/margins": 0.2836318910121918, "rewards/rejected": -2.292471408843994, "step": 3540 }, { "epoch": 0.46, "learning_rate": 4.7068949029358325e-07, "logits/chosen": -2.833805561065674, "logits/rejected": -2.7586352825164795, "logps/chosen": -608.8426513671875, "logps/rejected": -542.80712890625, "loss": 0.8114, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.4171502590179443, "rewards/margins": 0.35464930534362793, "rewards/rejected": -2.7717995643615723, "step": 3550 }, { "epoch": 0.46, "learning_rate": 4.704504159892895e-07, "logits/chosen": -2.723379135131836, "logits/rejected": -2.690556526184082, "logps/chosen": -518.2579345703125, "logps/rejected": -484.92584228515625, "loss": 0.7462, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.1485767364501953, "rewards/margins": 0.37568727135658264, "rewards/rejected": -2.524263858795166, "step": 3560 }, { "epoch": 0.46, "learning_rate": 4.7021134168499567e-07, "logits/chosen": -2.801586151123047, "logits/rejected": -2.6535611152648926, "logps/chosen": -608.86083984375, "logps/rejected": -426.54278564453125, "loss": 0.7551, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.1716418266296387, "rewards/margins": 0.40539613366127014, "rewards/rejected": -2.577038049697876, "step": 3570 }, { "epoch": 0.46, "learning_rate": 4.699722673807019e-07, "logits/chosen": -2.8050029277801514, "logits/rejected": -2.702195405960083, "logps/chosen": -546.6934814453125, "logps/rejected": -432.4085388183594, "loss": 0.8993, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -2.3243439197540283, "rewards/margins": 0.15948566794395447, "rewards/rejected": -2.4838294982910156, "step": 3580 }, { "epoch": 0.46, "learning_rate": 4.6973319307640814e-07, "logits/chosen": -2.700775623321533, "logits/rejected": -2.694840669631958, "logps/chosen": -466.4554748535156, "logps/rejected": -390.87615966796875, "loss": 0.7922, "rewards/accuracies": 0.5, "rewards/chosen": -2.0380849838256836, "rewards/margins": 0.2236051857471466, "rewards/rejected": -2.261690139770508, "step": 3590 }, { "epoch": 0.46, "learning_rate": 4.694941187721144e-07, "logits/chosen": -2.7744715213775635, "logits/rejected": -2.660827159881592, "logps/chosen": -607.0294189453125, "logps/rejected": -454.7483825683594, "loss": 0.7032, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.159827709197998, "rewards/margins": 0.5385087728500366, "rewards/rejected": -2.698336362838745, "step": 3600 }, { "epoch": 0.47, "learning_rate": 4.692550444678206e-07, "logits/chosen": -2.7625057697296143, "logits/rejected": -2.5873944759368896, "logps/chosen": -624.8814697265625, "logps/rejected": -460.4112243652344, "loss": 0.7344, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.326052188873291, "rewards/margins": 0.33939146995544434, "rewards/rejected": -2.6654438972473145, "step": 3610 }, { "epoch": 0.47, "learning_rate": 4.690159701635268e-07, "logits/chosen": -2.827319383621216, "logits/rejected": -2.755857467651367, "logps/chosen": -524.0814819335938, "logps/rejected": -397.8774719238281, "loss": 0.7789, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.2072091102600098, "rewards/margins": 0.3533988893032074, "rewards/rejected": -2.56060791015625, "step": 3620 }, { "epoch": 0.47, "learning_rate": 4.6877689585923303e-07, "logits/chosen": -2.7475945949554443, "logits/rejected": -2.623791456222534, "logps/chosen": -486.52239990234375, "logps/rejected": -409.9076232910156, "loss": 0.6703, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0888938903808594, "rewards/margins": 0.5309438109397888, "rewards/rejected": -2.619837760925293, "step": 3630 }, { "epoch": 0.47, "learning_rate": 4.6853782155493927e-07, "logits/chosen": -2.736027479171753, "logits/rejected": -2.6973345279693604, "logps/chosen": -568.0948486328125, "logps/rejected": -424.34381103515625, "loss": 0.7893, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.097799301147461, "rewards/margins": 0.22389331459999084, "rewards/rejected": -2.321692705154419, "step": 3640 }, { "epoch": 0.47, "learning_rate": 4.682987472506455e-07, "logits/chosen": -2.7642452716827393, "logits/rejected": -2.7027883529663086, "logps/chosen": -555.19384765625, "logps/rejected": -465.5340881347656, "loss": 0.6936, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.9895200729370117, "rewards/margins": 0.3982035517692566, "rewards/rejected": -2.387723684310913, "step": 3650 }, { "epoch": 0.47, "learning_rate": 4.680596729463517e-07, "logits/chosen": -2.6523067951202393, "logits/rejected": -2.534090518951416, "logps/chosen": -528.7243041992188, "logps/rejected": -437.9938049316406, "loss": 0.7422, "rewards/accuracies": 0.625, "rewards/chosen": -2.0607244968414307, "rewards/margins": 0.3822864592075348, "rewards/rejected": -2.4430108070373535, "step": 3660 }, { "epoch": 0.47, "learning_rate": 4.678205986420579e-07, "logits/chosen": -2.7857019901275635, "logits/rejected": -2.706968069076538, "logps/chosen": -479.658447265625, "logps/rejected": -387.6781311035156, "loss": 0.7478, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.2608258724212646, "rewards/margins": 0.2470257729291916, "rewards/rejected": -2.5078518390655518, "step": 3670 }, { "epoch": 0.48, "learning_rate": 4.6758152433776416e-07, "logits/chosen": -2.8488802909851074, "logits/rejected": -2.7172207832336426, "logps/chosen": -520.583984375, "logps/rejected": -416.8192443847656, "loss": 0.723, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.129680633544922, "rewards/margins": 0.43240708112716675, "rewards/rejected": -2.5620875358581543, "step": 3680 }, { "epoch": 0.48, "learning_rate": 4.673424500334704e-07, "logits/chosen": -2.8075921535491943, "logits/rejected": -2.6730575561523438, "logps/chosen": -531.0025024414062, "logps/rejected": -423.229248046875, "loss": 0.765, "rewards/accuracies": 0.5625, "rewards/chosen": -1.9580967426300049, "rewards/margins": 0.35486021637916565, "rewards/rejected": -2.312957286834717, "step": 3690 }, { "epoch": 0.48, "learning_rate": 4.6710337572917663e-07, "logits/chosen": -2.721198320388794, "logits/rejected": -2.641798496246338, "logps/chosen": -531.6104736328125, "logps/rejected": -434.64068603515625, "loss": 0.7185, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.0828354358673096, "rewards/margins": 0.5572195053100586, "rewards/rejected": -2.640054941177368, "step": 3700 }, { "epoch": 0.48, "learning_rate": 4.668643014248828e-07, "logits/chosen": -2.797947406768799, "logits/rejected": -2.69712495803833, "logps/chosen": -512.5301513671875, "logps/rejected": -402.13330078125, "loss": 0.6711, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.9861986637115479, "rewards/margins": 0.47461265325546265, "rewards/rejected": -2.460811138153076, "step": 3710 }, { "epoch": 0.48, "learning_rate": 4.6662522712058905e-07, "logits/chosen": -2.6386725902557373, "logits/rejected": -2.4989511966705322, "logps/chosen": -571.0833129882812, "logps/rejected": -460.89166259765625, "loss": 0.6287, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.9033597707748413, "rewards/margins": 0.6476998925209045, "rewards/rejected": -2.5510594844818115, "step": 3720 }, { "epoch": 0.48, "learning_rate": 4.663861528162953e-07, "logits/chosen": -2.934797525405884, "logits/rejected": -2.8321218490600586, "logps/chosen": -589.3635864257812, "logps/rejected": -455.05810546875, "loss": 0.7568, "rewards/accuracies": 0.5625, "rewards/chosen": -1.9847795963287354, "rewards/margins": 0.3083726465702057, "rewards/rejected": -2.293152332305908, "step": 3730 }, { "epoch": 0.48, "learning_rate": 4.661470785120015e-07, "logits/chosen": -2.8341031074523926, "logits/rejected": -2.73675274848938, "logps/chosen": -539.5736083984375, "logps/rejected": -462.33026123046875, "loss": 0.7452, "rewards/accuracies": 0.625, "rewards/chosen": -2.0527567863464355, "rewards/margins": 0.38251161575317383, "rewards/rejected": -2.4352684020996094, "step": 3740 }, { "epoch": 0.48, "learning_rate": 4.6590800420770776e-07, "logits/chosen": -2.724475383758545, "logits/rejected": -2.6765122413635254, "logps/chosen": -589.7545166015625, "logps/rejected": -450.74578857421875, "loss": 0.8272, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.244536876678467, "rewards/margins": 0.23951300978660583, "rewards/rejected": -2.4840502738952637, "step": 3750 }, { "epoch": 0.49, "learning_rate": 4.6566892990341394e-07, "logits/chosen": -2.787227153778076, "logits/rejected": -2.5439441204071045, "logps/chosen": -657.1588134765625, "logps/rejected": -436.5938415527344, "loss": 0.7577, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.389896869659424, "rewards/margins": 0.37995007634162903, "rewards/rejected": -2.7698466777801514, "step": 3760 }, { "epoch": 0.49, "learning_rate": 4.654298555991202e-07, "logits/chosen": -2.7972121238708496, "logits/rejected": -2.735654354095459, "logps/chosen": -418.87408447265625, "logps/rejected": -399.1194152832031, "loss": 0.6414, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.034653902053833, "rewards/margins": 0.5314531922340393, "rewards/rejected": -2.5661072731018066, "step": 3770 }, { "epoch": 0.49, "learning_rate": 4.651907812948264e-07, "logits/chosen": -2.7964584827423096, "logits/rejected": -2.706233263015747, "logps/chosen": -615.9241333007812, "logps/rejected": -548.5323486328125, "loss": 0.7727, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.240238904953003, "rewards/margins": 0.3536197543144226, "rewards/rejected": -2.5938589572906494, "step": 3780 }, { "epoch": 0.49, "learning_rate": 4.6495170699053265e-07, "logits/chosen": -2.735401153564453, "logits/rejected": -2.525318145751953, "logps/chosen": -641.40771484375, "logps/rejected": -431.3291931152344, "loss": 0.7572, "rewards/accuracies": 0.625, "rewards/chosen": -2.2677671909332275, "rewards/margins": 0.4505771994590759, "rewards/rejected": -2.7183444499969482, "step": 3790 }, { "epoch": 0.49, "learning_rate": 4.6471263268623883e-07, "logits/chosen": -2.7838873863220215, "logits/rejected": -2.6234636306762695, "logps/chosen": -591.7296142578125, "logps/rejected": -402.5968933105469, "loss": 0.5743, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9030898809432983, "rewards/margins": 0.9623501896858215, "rewards/rejected": -2.8654401302337646, "step": 3800 }, { "epoch": 0.49, "learning_rate": 4.6447355838194507e-07, "logits/chosen": -2.7665855884552, "logits/rejected": -2.6005775928497314, "logps/chosen": -585.5717163085938, "logps/rejected": -401.24072265625, "loss": 0.7181, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.3397786617279053, "rewards/margins": 0.4212261140346527, "rewards/rejected": -2.76100492477417, "step": 3810 }, { "epoch": 0.49, "learning_rate": 4.642344840776513e-07, "logits/chosen": -2.740190267562866, "logits/rejected": -2.6941633224487305, "logps/chosen": -494.997802734375, "logps/rejected": -427.08233642578125, "loss": 0.6557, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0699167251586914, "rewards/margins": 0.6359483599662781, "rewards/rejected": -2.705864906311035, "step": 3820 }, { "epoch": 0.49, "learning_rate": 4.6399540977335754e-07, "logits/chosen": -2.79597544670105, "logits/rejected": -2.72910737991333, "logps/chosen": -605.4143676757812, "logps/rejected": -501.7652282714844, "loss": 0.7269, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.2414908409118652, "rewards/margins": 0.4683249592781067, "rewards/rejected": -2.709815740585327, "step": 3830 }, { "epoch": 0.5, "learning_rate": 4.637563354690638e-07, "logits/chosen": -2.877440929412842, "logits/rejected": -2.6943612098693848, "logps/chosen": -526.584716796875, "logps/rejected": -382.42327880859375, "loss": 0.7904, "rewards/accuracies": 0.5625, "rewards/chosen": -2.1135990619659424, "rewards/margins": 0.305677592754364, "rewards/rejected": -2.419276475906372, "step": 3840 }, { "epoch": 0.5, "learning_rate": 4.6351726116476996e-07, "logits/chosen": -2.8650870323181152, "logits/rejected": -2.79915452003479, "logps/chosen": -569.0214233398438, "logps/rejected": -410.507568359375, "loss": 0.7463, "rewards/accuracies": 0.625, "rewards/chosen": -2.1374478340148926, "rewards/margins": 0.3000470995903015, "rewards/rejected": -2.437494993209839, "step": 3850 }, { "epoch": 0.5, "learning_rate": 4.632781868604762e-07, "logits/chosen": -2.840510845184326, "logits/rejected": -2.667966365814209, "logps/chosen": -574.5364990234375, "logps/rejected": -392.75439453125, "loss": 0.7931, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.230799674987793, "rewards/margins": 0.2758861482143402, "rewards/rejected": -2.506685733795166, "step": 3860 }, { "epoch": 0.5, "learning_rate": 4.6303911255618243e-07, "logits/chosen": -2.6653220653533936, "logits/rejected": -2.6049745082855225, "logps/chosen": -529.8118896484375, "logps/rejected": -475.25634765625, "loss": 0.7147, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.0415472984313965, "rewards/margins": 0.4103250503540039, "rewards/rejected": -2.4518723487854004, "step": 3870 }, { "epoch": 0.5, "learning_rate": 4.6280003825188867e-07, "logits/chosen": -2.8810198307037354, "logits/rejected": -2.739541530609131, "logps/chosen": -553.7215576171875, "logps/rejected": -426.44775390625, "loss": 0.7009, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.2403979301452637, "rewards/margins": 0.5569398403167725, "rewards/rejected": -2.797337770462036, "step": 3880 }, { "epoch": 0.5, "learning_rate": 4.625609639475949e-07, "logits/chosen": -2.781830310821533, "logits/rejected": -2.693213701248169, "logps/chosen": -518.9996337890625, "logps/rejected": -387.19757080078125, "loss": 0.7246, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1926441192626953, "rewards/margins": 0.3854844272136688, "rewards/rejected": -2.5781285762786865, "step": 3890 }, { "epoch": 0.5, "learning_rate": 4.623218896433011e-07, "logits/chosen": -2.750553607940674, "logits/rejected": -2.71532940864563, "logps/chosen": -467.32904052734375, "logps/rejected": -424.41534423828125, "loss": 0.7866, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.1735000610351562, "rewards/margins": 0.3451001048088074, "rewards/rejected": -2.51859974861145, "step": 3900 }, { "epoch": 0.5, "learning_rate": 4.620828153390073e-07, "logits/chosen": -2.812767505645752, "logits/rejected": -2.6674513816833496, "logps/chosen": -578.3057250976562, "logps/rejected": -406.92694091796875, "loss": 0.7841, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.010645627975464, "rewards/margins": 0.29800310730934143, "rewards/rejected": -2.3086485862731934, "step": 3910 }, { "epoch": 0.51, "learning_rate": 4.6184374103471356e-07, "logits/chosen": -2.790602922439575, "logits/rejected": -2.667210102081299, "logps/chosen": -633.9498901367188, "logps/rejected": -488.24365234375, "loss": 0.6719, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2110602855682373, "rewards/margins": 0.5655996799468994, "rewards/rejected": -2.7766599655151367, "step": 3920 }, { "epoch": 0.51, "learning_rate": 4.616046667304198e-07, "logits/chosen": -2.7382616996765137, "logits/rejected": -2.683189868927002, "logps/chosen": -601.0888671875, "logps/rejected": -473.51507568359375, "loss": 0.7859, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.1828372478485107, "rewards/margins": 0.2739858031272888, "rewards/rejected": -2.4568228721618652, "step": 3930 }, { "epoch": 0.51, "learning_rate": 4.61365592426126e-07, "logits/chosen": -2.7663779258728027, "logits/rejected": -2.658815860748291, "logps/chosen": -509.1817932128906, "logps/rejected": -406.7018127441406, "loss": 0.6671, "rewards/accuracies": 0.625, "rewards/chosen": -2.0505623817443848, "rewards/margins": 0.4434064030647278, "rewards/rejected": -2.4939687252044678, "step": 3940 }, { "epoch": 0.51, "learning_rate": 4.611265181218322e-07, "logits/chosen": -2.73384952545166, "logits/rejected": -2.5812559127807617, "logps/chosen": -715.2908325195312, "logps/rejected": -446.34393310546875, "loss": 0.7945, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.2505674362182617, "rewards/margins": 0.4614141881465912, "rewards/rejected": -2.711981773376465, "step": 3950 }, { "epoch": 0.51, "learning_rate": 4.6088744381753845e-07, "logits/chosen": -2.7827186584472656, "logits/rejected": -2.60693621635437, "logps/chosen": -578.6638793945312, "logps/rejected": -438.0262756347656, "loss": 0.7446, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1200525760650635, "rewards/margins": 0.4435195326805115, "rewards/rejected": -2.563572406768799, "step": 3960 }, { "epoch": 0.51, "learning_rate": 4.606483695132447e-07, "logits/chosen": -2.834327220916748, "logits/rejected": -2.7092368602752686, "logps/chosen": -544.5505981445312, "logps/rejected": -424.525146484375, "loss": 0.7317, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.253066301345825, "rewards/margins": 0.4511067271232605, "rewards/rejected": -2.7041728496551514, "step": 3970 }, { "epoch": 0.51, "learning_rate": 4.604092952089509e-07, "logits/chosen": -2.769908905029297, "logits/rejected": -2.740870952606201, "logps/chosen": -546.2132568359375, "logps/rejected": -500.3587951660156, "loss": 0.8279, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -2.1399118900299072, "rewards/margins": 0.40681037306785583, "rewards/rejected": -2.546722412109375, "step": 3980 }, { "epoch": 0.52, "learning_rate": 4.601702209046571e-07, "logits/chosen": -2.7790579795837402, "logits/rejected": -2.620356321334839, "logps/chosen": -546.6650390625, "logps/rejected": -350.5991516113281, "loss": 0.7265, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.3224613666534424, "rewards/margins": 0.30822962522506714, "rewards/rejected": -2.630690813064575, "step": 3990 }, { "epoch": 0.52, "learning_rate": 4.5993114660036334e-07, "logits/chosen": -2.7447009086608887, "logits/rejected": -2.658165216445923, "logps/chosen": -531.2030639648438, "logps/rejected": -402.70526123046875, "loss": 0.7973, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.1049389839172363, "rewards/margins": 0.290548175573349, "rewards/rejected": -2.39548659324646, "step": 4000 }, { "epoch": 0.52, "eval_logits/chosen": -3.1002094745635986, "eval_logits/rejected": -3.036515235900879, "eval_logps/chosen": -539.5912475585938, "eval_logps/rejected": -417.20880126953125, "eval_loss": 0.6435304880142212, "eval_rewards/accuracies": 0.6449999809265137, "eval_rewards/chosen": -0.7771567106246948, "eval_rewards/margins": 0.8508621454238892, "eval_rewards/rejected": -1.6280189752578735, "eval_runtime": 278.0848, "eval_samples_per_second": 7.192, "eval_steps_per_second": 3.596, "step": 4000 }, { "epoch": 0.52, "learning_rate": 4.596920722960696e-07, "logits/chosen": -2.8125662803649902, "logits/rejected": -2.640876293182373, "logps/chosen": -593.8612060546875, "logps/rejected": -451.7347106933594, "loss": 0.7285, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.0637295246124268, "rewards/margins": 0.39743977785110474, "rewards/rejected": -2.4611692428588867, "step": 4010 }, { "epoch": 0.52, "learning_rate": 4.594529979917758e-07, "logits/chosen": -2.90344500541687, "logits/rejected": -2.7522222995758057, "logps/chosen": -586.1970825195312, "logps/rejected": -413.2530212402344, "loss": 0.6349, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.2147181034088135, "rewards/margins": 0.6920836567878723, "rewards/rejected": -2.906801700592041, "step": 4020 }, { "epoch": 0.52, "learning_rate": 4.592139236874821e-07, "logits/chosen": -2.785090684890747, "logits/rejected": -2.5619449615478516, "logps/chosen": -624.2599487304688, "logps/rejected": -365.8192138671875, "loss": 0.7423, "rewards/accuracies": 0.625, "rewards/chosen": -1.9529781341552734, "rewards/margins": 0.5113283395767212, "rewards/rejected": -2.4643068313598633, "step": 4030 }, { "epoch": 0.52, "learning_rate": 4.589748493831883e-07, "logits/chosen": -2.8462374210357666, "logits/rejected": -2.688223361968994, "logps/chosen": -631.9451293945312, "logps/rejected": -432.81280517578125, "loss": 0.727, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2695345878601074, "rewards/margins": 0.4425322413444519, "rewards/rejected": -2.712067127227783, "step": 4040 }, { "epoch": 0.52, "learning_rate": 4.587357750788945e-07, "logits/chosen": -2.876166343688965, "logits/rejected": -2.7758677005767822, "logps/chosen": -536.0060424804688, "logps/rejected": -441.5453186035156, "loss": 0.8722, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -2.2363762855529785, "rewards/margins": 0.10737935453653336, "rewards/rejected": -2.3437557220458984, "step": 4050 }, { "epoch": 0.52, "learning_rate": 4.5849670077460075e-07, "logits/chosen": -2.781526565551758, "logits/rejected": -2.7114830017089844, "logps/chosen": -474.556396484375, "logps/rejected": -416.2935485839844, "loss": 0.7573, "rewards/accuracies": 0.625, "rewards/chosen": -2.0235238075256348, "rewards/margins": 0.35924863815307617, "rewards/rejected": -2.3827719688415527, "step": 4060 }, { "epoch": 0.53, "learning_rate": 4.58257626470307e-07, "logits/chosen": -2.819582462310791, "logits/rejected": -2.6759419441223145, "logps/chosen": -540.6016845703125, "logps/rejected": -403.54949951171875, "loss": 0.6978, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.0990471839904785, "rewards/margins": 0.5293450355529785, "rewards/rejected": -2.628392457962036, "step": 4070 }, { "epoch": 0.53, "learning_rate": 4.5801855216601317e-07, "logits/chosen": -2.778634548187256, "logits/rejected": -2.6681199073791504, "logps/chosen": -538.7603759765625, "logps/rejected": -377.1358947753906, "loss": 0.8256, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -2.053126811981201, "rewards/margins": 0.25069186091423035, "rewards/rejected": -2.303818702697754, "step": 4080 }, { "epoch": 0.53, "learning_rate": 4.577794778617194e-07, "logits/chosen": -2.662256956100464, "logits/rejected": -2.544454336166382, "logps/chosen": -533.1196899414062, "logps/rejected": -385.8739013671875, "loss": 0.5872, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9912357330322266, "rewards/margins": 0.6943919658660889, "rewards/rejected": -2.6856274604797363, "step": 4090 }, { "epoch": 0.53, "learning_rate": 4.5754040355742564e-07, "logits/chosen": -2.8131933212280273, "logits/rejected": -2.609994411468506, "logps/chosen": -538.6875, "logps/rejected": -386.5342102050781, "loss": 0.8085, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.2335281372070312, "rewards/margins": 0.340941846370697, "rewards/rejected": -2.574470043182373, "step": 4100 }, { "epoch": 0.53, "learning_rate": 4.573013292531319e-07, "logits/chosen": -2.7255492210388184, "logits/rejected": -2.71716570854187, "logps/chosen": -500.60528564453125, "logps/rejected": -456.87353515625, "loss": 0.718, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.156127452850342, "rewards/margins": 0.44509902596473694, "rewards/rejected": -2.601226806640625, "step": 4110 }, { "epoch": 0.53, "learning_rate": 4.570622549488381e-07, "logits/chosen": -2.8044135570526123, "logits/rejected": -2.701533794403076, "logps/chosen": -705.6146240234375, "logps/rejected": -507.5172424316406, "loss": 0.7001, "rewards/accuracies": 0.625, "rewards/chosen": -2.158120632171631, "rewards/margins": 0.642687201499939, "rewards/rejected": -2.8008077144622803, "step": 4120 }, { "epoch": 0.53, "learning_rate": 4.568231806445443e-07, "logits/chosen": -2.6521613597869873, "logits/rejected": -2.6085400581359863, "logps/chosen": -521.718994140625, "logps/rejected": -465.09912109375, "loss": 0.7857, "rewards/accuracies": 0.625, "rewards/chosen": -2.2655134201049805, "rewards/margins": 0.229165717959404, "rewards/rejected": -2.4946792125701904, "step": 4130 }, { "epoch": 0.53, "learning_rate": 4.5658410634025054e-07, "logits/chosen": -2.699741840362549, "logits/rejected": -2.6345646381378174, "logps/chosen": -613.5441284179688, "logps/rejected": -456.7112731933594, "loss": 0.6827, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.3276610374450684, "rewards/margins": 0.5642479062080383, "rewards/rejected": -2.891909122467041, "step": 4140 }, { "epoch": 0.54, "learning_rate": 4.5634503203595677e-07, "logits/chosen": -2.8039278984069824, "logits/rejected": -2.6830029487609863, "logps/chosen": -593.0384521484375, "logps/rejected": -478.57080078125, "loss": 0.7321, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.2785637378692627, "rewards/margins": 0.4198424816131592, "rewards/rejected": -2.6984057426452637, "step": 4150 }, { "epoch": 0.54, "learning_rate": 4.56105957731663e-07, "logits/chosen": -2.8052964210510254, "logits/rejected": -2.662141799926758, "logps/chosen": -555.6959228515625, "logps/rejected": -428.7920837402344, "loss": 0.642, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1814401149749756, "rewards/margins": 0.6196992993354797, "rewards/rejected": -2.8011393547058105, "step": 4160 }, { "epoch": 0.54, "learning_rate": 4.5586688342736924e-07, "logits/chosen": -2.7898311614990234, "logits/rejected": -2.6970787048339844, "logps/chosen": -584.8560180664062, "logps/rejected": -383.6614685058594, "loss": 0.8379, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.2494959831237793, "rewards/margins": 0.16729286313056946, "rewards/rejected": -2.4167885780334473, "step": 4170 }, { "epoch": 0.54, "learning_rate": 4.556278091230754e-07, "logits/chosen": -2.754314422607422, "logits/rejected": -2.7163174152374268, "logps/chosen": -631.3893432617188, "logps/rejected": -572.0696411132812, "loss": 0.7072, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.249389171600342, "rewards/margins": 0.7657713890075684, "rewards/rejected": -3.015160322189331, "step": 4180 }, { "epoch": 0.54, "learning_rate": 4.5538873481878166e-07, "logits/chosen": -2.855123519897461, "logits/rejected": -2.6631503105163574, "logps/chosen": -631.6343994140625, "logps/rejected": -452.1959533691406, "loss": 0.9496, "rewards/accuracies": 0.5, "rewards/chosen": -2.3876712322235107, "rewards/margins": 0.1047012060880661, "rewards/rejected": -2.4923722743988037, "step": 4190 }, { "epoch": 0.54, "learning_rate": 4.551496605144879e-07, "logits/chosen": -2.8421578407287598, "logits/rejected": -2.705609083175659, "logps/chosen": -590.9459838867188, "logps/rejected": -378.00885009765625, "loss": 0.671, "rewards/accuracies": 0.625, "rewards/chosen": -1.9409916400909424, "rewards/margins": 0.5374685525894165, "rewards/rejected": -2.4784598350524902, "step": 4200 }, { "epoch": 0.54, "learning_rate": 4.5491058621019413e-07, "logits/chosen": -2.774517774581909, "logits/rejected": -2.603320598602295, "logps/chosen": -566.2200927734375, "logps/rejected": -425.44891357421875, "loss": 0.6435, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.3822028636932373, "rewards/margins": 0.5722275376319885, "rewards/rejected": -2.95443058013916, "step": 4210 }, { "epoch": 0.54, "learning_rate": 4.546715119059003e-07, "logits/chosen": -2.8421363830566406, "logits/rejected": -2.755216360092163, "logps/chosen": -561.53466796875, "logps/rejected": -455.453125, "loss": 0.752, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.1202902793884277, "rewards/margins": 0.4752734303474426, "rewards/rejected": -2.5955634117126465, "step": 4220 }, { "epoch": 0.55, "learning_rate": 4.5443243760160655e-07, "logits/chosen": -2.711463689804077, "logits/rejected": -2.6245265007019043, "logps/chosen": -472.5931701660156, "logps/rejected": -387.6343688964844, "loss": 0.678, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.040147304534912, "rewards/margins": 0.5469608902931213, "rewards/rejected": -2.5871081352233887, "step": 4230 }, { "epoch": 0.55, "learning_rate": 4.541933632973128e-07, "logits/chosen": -2.747981548309326, "logits/rejected": -2.708200454711914, "logps/chosen": -636.7114868164062, "logps/rejected": -546.5797729492188, "loss": 0.7012, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.12658953666687, "rewards/margins": 0.4241103231906891, "rewards/rejected": -2.5506997108459473, "step": 4240 }, { "epoch": 0.55, "learning_rate": 4.53954288993019e-07, "logits/chosen": -2.807587146759033, "logits/rejected": -2.7729337215423584, "logps/chosen": -525.0863037109375, "logps/rejected": -459.4414978027344, "loss": 0.6781, "rewards/accuracies": 0.625, "rewards/chosen": -2.246459484100342, "rewards/margins": 0.4785032272338867, "rewards/rejected": -2.7249627113342285, "step": 4250 }, { "epoch": 0.55, "learning_rate": 4.5371521468872526e-07, "logits/chosen": -2.8299202919006348, "logits/rejected": -2.7794957160949707, "logps/chosen": -527.2339477539062, "logps/rejected": -427.02630615234375, "loss": 0.6471, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.9871060848236084, "rewards/margins": 0.673957347869873, "rewards/rejected": -2.6610636711120605, "step": 4260 }, { "epoch": 0.55, "learning_rate": 4.5347614038443144e-07, "logits/chosen": -2.821061611175537, "logits/rejected": -2.7510809898376465, "logps/chosen": -598.550537109375, "logps/rejected": -463.5281677246094, "loss": 0.6691, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.9727671146392822, "rewards/margins": 0.5661246180534363, "rewards/rejected": -2.5388917922973633, "step": 4270 }, { "epoch": 0.55, "learning_rate": 4.532370660801377e-07, "logits/chosen": -2.7863802909851074, "logits/rejected": -2.711343288421631, "logps/chosen": -544.9305419921875, "logps/rejected": -402.30865478515625, "loss": 0.8077, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.2246739864349365, "rewards/margins": 0.23920898139476776, "rewards/rejected": -2.4638831615448, "step": 4280 }, { "epoch": 0.55, "learning_rate": 4.529979917758439e-07, "logits/chosen": -2.9584672451019287, "logits/rejected": -2.8525757789611816, "logps/chosen": -501.33984375, "logps/rejected": -379.5090026855469, "loss": 0.7672, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.2735953330993652, "rewards/margins": 0.3075782358646393, "rewards/rejected": -2.5811736583709717, "step": 4290 }, { "epoch": 0.56, "learning_rate": 4.5275891747155015e-07, "logits/chosen": -2.877957582473755, "logits/rejected": -2.6938748359680176, "logps/chosen": -596.1134643554688, "logps/rejected": -425.72174072265625, "loss": 0.6871, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.127333879470825, "rewards/margins": 0.44593414664268494, "rewards/rejected": -2.573267936706543, "step": 4300 }, { "epoch": 0.56, "learning_rate": 4.525198431672564e-07, "logits/chosen": -2.77375864982605, "logits/rejected": -2.709991931915283, "logps/chosen": -540.65966796875, "logps/rejected": -469.0199279785156, "loss": 0.7861, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.3235082626342773, "rewards/margins": 0.2977842688560486, "rewards/rejected": -2.6212925910949707, "step": 4310 }, { "epoch": 0.56, "learning_rate": 4.5228076886296257e-07, "logits/chosen": -2.6939597129821777, "logits/rejected": -2.650891065597534, "logps/chosen": -500.967529296875, "logps/rejected": -387.16552734375, "loss": 0.7054, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.2192912101745605, "rewards/margins": 0.5032671689987183, "rewards/rejected": -2.7225582599639893, "step": 4320 }, { "epoch": 0.56, "learning_rate": 4.520416945586688e-07, "logits/chosen": -2.7032642364501953, "logits/rejected": -2.6730153560638428, "logps/chosen": -559.41650390625, "logps/rejected": -479.4176330566406, "loss": 0.7921, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.2438347339630127, "rewards/margins": 0.2683674395084381, "rewards/rejected": -2.512202262878418, "step": 4330 }, { "epoch": 0.56, "learning_rate": 4.5180262025437504e-07, "logits/chosen": -2.897618293762207, "logits/rejected": -2.7709648609161377, "logps/chosen": -518.5592041015625, "logps/rejected": -423.04656982421875, "loss": 0.832, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.0857386589050293, "rewards/margins": 0.35450881719589233, "rewards/rejected": -2.4402475357055664, "step": 4340 }, { "epoch": 0.56, "learning_rate": 4.515635459500813e-07, "logits/chosen": -2.9449024200439453, "logits/rejected": -2.7999820709228516, "logps/chosen": -556.2164916992188, "logps/rejected": -391.1475830078125, "loss": 0.693, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0905792713165283, "rewards/margins": 0.46551641821861267, "rewards/rejected": -2.5560951232910156, "step": 4350 }, { "epoch": 0.56, "learning_rate": 4.5132447164578746e-07, "logits/chosen": -2.7558722496032715, "logits/rejected": -2.672457695007324, "logps/chosen": -549.5408935546875, "logps/rejected": -461.1700134277344, "loss": 0.683, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.1230125427246094, "rewards/margins": 0.46520814299583435, "rewards/rejected": -2.5882205963134766, "step": 4360 }, { "epoch": 0.56, "learning_rate": 4.510853973414937e-07, "logits/chosen": -2.818596839904785, "logits/rejected": -2.715902328491211, "logps/chosen": -543.9423828125, "logps/rejected": -444.14599609375, "loss": 0.6906, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.9757503271102905, "rewards/margins": 0.4708569645881653, "rewards/rejected": -2.4466068744659424, "step": 4370 }, { "epoch": 0.57, "learning_rate": 4.5084632303719993e-07, "logits/chosen": -2.8783047199249268, "logits/rejected": -2.773334264755249, "logps/chosen": -595.30126953125, "logps/rejected": -455.69708251953125, "loss": 0.7146, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.142611026763916, "rewards/margins": 0.5036290884017944, "rewards/rejected": -2.646239757537842, "step": 4380 }, { "epoch": 0.57, "learning_rate": 4.5060724873290617e-07, "logits/chosen": -2.8503146171569824, "logits/rejected": -2.7365825176239014, "logps/chosen": -567.0790405273438, "logps/rejected": -469.53399658203125, "loss": 0.7872, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.3069262504577637, "rewards/margins": 0.3039989769458771, "rewards/rejected": -2.6109251976013184, "step": 4390 }, { "epoch": 0.57, "learning_rate": 4.503681744286124e-07, "logits/chosen": -2.831465482711792, "logits/rejected": -2.804511785507202, "logps/chosen": -460.866943359375, "logps/rejected": -415.01318359375, "loss": 0.7782, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.1296794414520264, "rewards/margins": 0.29982227087020874, "rewards/rejected": -2.4295012950897217, "step": 4400 }, { "epoch": 0.57, "learning_rate": 4.501291001243186e-07, "logits/chosen": -2.8402857780456543, "logits/rejected": -2.787198543548584, "logps/chosen": -545.3604736328125, "logps/rejected": -468.1798400878906, "loss": 0.6206, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.084433078765869, "rewards/margins": 0.6976747512817383, "rewards/rejected": -2.7821080684661865, "step": 4410 }, { "epoch": 0.57, "learning_rate": 4.498900258200248e-07, "logits/chosen": -2.716226816177368, "logits/rejected": -2.6193041801452637, "logps/chosen": -553.9498901367188, "logps/rejected": -430.7489318847656, "loss": 0.5687, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0207602977752686, "rewards/margins": 0.7793180346488953, "rewards/rejected": -2.8000783920288086, "step": 4420 }, { "epoch": 0.57, "learning_rate": 4.4965095151573106e-07, "logits/chosen": -2.6860251426696777, "logits/rejected": -2.6157307624816895, "logps/chosen": -506.4866638183594, "logps/rejected": -403.6722717285156, "loss": 0.5992, "rewards/accuracies": 0.6875, "rewards/chosen": -1.8730037212371826, "rewards/margins": 0.643346905708313, "rewards/rejected": -2.5163509845733643, "step": 4430 }, { "epoch": 0.57, "learning_rate": 4.494118772114373e-07, "logits/chosen": -2.8535070419311523, "logits/rejected": -2.751032590866089, "logps/chosen": -529.4246826171875, "logps/rejected": -424.38031005859375, "loss": 0.6998, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.027967929840088, "rewards/margins": 0.5580888390541077, "rewards/rejected": -2.586056709289551, "step": 4440 }, { "epoch": 0.57, "learning_rate": 4.4917280290714353e-07, "logits/chosen": -2.792224168777466, "logits/rejected": -2.743879795074463, "logps/chosen": -510.5660095214844, "logps/rejected": -415.2392578125, "loss": 0.7305, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.0899951457977295, "rewards/margins": 0.44340747594833374, "rewards/rejected": -2.533402919769287, "step": 4450 }, { "epoch": 0.58, "learning_rate": 4.489337286028497e-07, "logits/chosen": -2.8641674518585205, "logits/rejected": -2.6158289909362793, "logps/chosen": -632.8236083984375, "logps/rejected": -436.09765625, "loss": 0.8034, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -2.3794689178466797, "rewards/margins": 0.23332063853740692, "rewards/rejected": -2.6127896308898926, "step": 4460 }, { "epoch": 0.58, "learning_rate": 4.4869465429855595e-07, "logits/chosen": -2.820878744125366, "logits/rejected": -2.616281270980835, "logps/chosen": -637.419677734375, "logps/rejected": -466.54290771484375, "loss": 0.5984, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.131690502166748, "rewards/margins": 0.6820178031921387, "rewards/rejected": -2.8137080669403076, "step": 4470 }, { "epoch": 0.58, "learning_rate": 4.484555799942622e-07, "logits/chosen": -2.788083553314209, "logits/rejected": -2.692823886871338, "logps/chosen": -498.86456298828125, "logps/rejected": -380.7904968261719, "loss": 0.6162, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.9952516555786133, "rewards/margins": 0.6433397531509399, "rewards/rejected": -2.6385912895202637, "step": 4480 }, { "epoch": 0.58, "learning_rate": 4.482165056899684e-07, "logits/chosen": -2.8754889965057373, "logits/rejected": -2.706624746322632, "logps/chosen": -521.5037841796875, "logps/rejected": -377.43389892578125, "loss": 0.7205, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.9518992900848389, "rewards/margins": 0.43443888425827026, "rewards/rejected": -2.386338233947754, "step": 4490 }, { "epoch": 0.58, "learning_rate": 4.479774313856746e-07, "logits/chosen": -2.8608832359313965, "logits/rejected": -2.6749253273010254, "logps/chosen": -561.648193359375, "logps/rejected": -387.1114501953125, "loss": 0.6181, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.34736967086792, "rewards/margins": 0.5305957794189453, "rewards/rejected": -2.8779654502868652, "step": 4500 }, { "epoch": 0.58, "learning_rate": 4.4773835708138084e-07, "logits/chosen": -2.9089579582214355, "logits/rejected": -2.702291965484619, "logps/chosen": -531.2054443359375, "logps/rejected": -362.81927490234375, "loss": 0.7391, "rewards/accuracies": 0.5625, "rewards/chosen": -2.203659772872925, "rewards/margins": 0.3130992352962494, "rewards/rejected": -2.516758680343628, "step": 4510 }, { "epoch": 0.58, "learning_rate": 4.474992827770871e-07, "logits/chosen": -2.741257905960083, "logits/rejected": -2.6756930351257324, "logps/chosen": -546.0369873046875, "logps/rejected": -418.5692443847656, "loss": 0.7959, "rewards/accuracies": 0.625, "rewards/chosen": -2.2908997535705566, "rewards/margins": 0.3900548219680786, "rewards/rejected": -2.6809544563293457, "step": 4520 }, { "epoch": 0.58, "learning_rate": 4.472602084727933e-07, "logits/chosen": -2.7898521423339844, "logits/rejected": -2.6897034645080566, "logps/chosen": -541.2518920898438, "logps/rejected": -441.90057373046875, "loss": 0.6988, "rewards/accuracies": 0.625, "rewards/chosen": -2.1424758434295654, "rewards/margins": 0.4993956685066223, "rewards/rejected": -2.641871690750122, "step": 4530 }, { "epoch": 0.59, "learning_rate": 4.4702113416849955e-07, "logits/chosen": -2.8588273525238037, "logits/rejected": -2.7083663940429688, "logps/chosen": -597.3187255859375, "logps/rejected": -461.2806091308594, "loss": 0.6554, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0151572227478027, "rewards/margins": 0.722854495048523, "rewards/rejected": -2.7380120754241943, "step": 4540 }, { "epoch": 0.59, "learning_rate": 4.4678205986420573e-07, "logits/chosen": -2.9889578819274902, "logits/rejected": -2.823106527328491, "logps/chosen": -524.2215576171875, "logps/rejected": -376.14202880859375, "loss": 0.7122, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.958219289779663, "rewards/margins": 0.4365631639957428, "rewards/rejected": -2.394782543182373, "step": 4550 }, { "epoch": 0.59, "learning_rate": 4.4654298555991197e-07, "logits/chosen": -2.883577585220337, "logits/rejected": -2.731762170791626, "logps/chosen": -644.5260009765625, "logps/rejected": -546.5738525390625, "loss": 0.7727, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.3815293312072754, "rewards/margins": 0.4275704026222229, "rewards/rejected": -2.8090996742248535, "step": 4560 }, { "epoch": 0.59, "learning_rate": 4.463039112556182e-07, "logits/chosen": -2.792346477508545, "logits/rejected": -2.6165499687194824, "logps/chosen": -545.875732421875, "logps/rejected": -375.4015808105469, "loss": 0.7882, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.171797513961792, "rewards/margins": 0.22637733817100525, "rewards/rejected": -2.39817476272583, "step": 4570 }, { "epoch": 0.59, "learning_rate": 4.4606483695132444e-07, "logits/chosen": -2.970022439956665, "logits/rejected": -2.8193676471710205, "logps/chosen": -631.7252197265625, "logps/rejected": -470.835205078125, "loss": 0.7824, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.1214702129364014, "rewards/margins": 0.37015703320503235, "rewards/rejected": -2.4916272163391113, "step": 4580 }, { "epoch": 0.59, "learning_rate": 4.458257626470307e-07, "logits/chosen": -2.775463104248047, "logits/rejected": -2.660127639770508, "logps/chosen": -597.5404663085938, "logps/rejected": -485.1578063964844, "loss": 0.8372, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.09407377243042, "rewards/margins": 0.40365856885910034, "rewards/rejected": -2.497732162475586, "step": 4590 }, { "epoch": 0.59, "learning_rate": 4.4558668834273686e-07, "logits/chosen": -2.832953929901123, "logits/rejected": -2.7369446754455566, "logps/chosen": -568.8553466796875, "logps/rejected": -469.8540954589844, "loss": 0.7654, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.2154290676116943, "rewards/margins": 0.3422226309776306, "rewards/rejected": -2.557651996612549, "step": 4600 }, { "epoch": 0.6, "learning_rate": 4.453476140384431e-07, "logits/chosen": -2.7610960006713867, "logits/rejected": -2.786165952682495, "logps/chosen": -486.0570373535156, "logps/rejected": -477.2317810058594, "loss": 0.624, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.8835262060165405, "rewards/margins": 0.6429562568664551, "rewards/rejected": -2.526482343673706, "step": 4610 }, { "epoch": 0.6, "learning_rate": 4.451085397341494e-07, "logits/chosen": -2.8555312156677246, "logits/rejected": -2.710803508758545, "logps/chosen": -533.54150390625, "logps/rejected": -405.3128967285156, "loss": 0.706, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.1419107913970947, "rewards/margins": 0.42456546425819397, "rewards/rejected": -2.5664761066436768, "step": 4620 }, { "epoch": 0.6, "learning_rate": 4.448694654298556e-07, "logits/chosen": -2.833958148956299, "logits/rejected": -2.624474048614502, "logps/chosen": -637.4833374023438, "logps/rejected": -470.49951171875, "loss": 0.8058, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.1080501079559326, "rewards/margins": 0.35426679253578186, "rewards/rejected": -2.4623169898986816, "step": 4630 }, { "epoch": 0.6, "learning_rate": 4.446303911255618e-07, "logits/chosen": -2.7578072547912598, "logits/rejected": -2.5854268074035645, "logps/chosen": -571.554443359375, "logps/rejected": -387.06378173828125, "loss": 0.7843, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -2.130439519882202, "rewards/margins": 0.18169264495372772, "rewards/rejected": -2.3121323585510254, "step": 4640 }, { "epoch": 0.6, "learning_rate": 4.4439131682126804e-07, "logits/chosen": -2.749065399169922, "logits/rejected": -2.6710383892059326, "logps/chosen": -579.3748779296875, "logps/rejected": -414.8837890625, "loss": 0.7592, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.0001330375671387, "rewards/margins": 0.4167472720146179, "rewards/rejected": -2.4168803691864014, "step": 4650 }, { "epoch": 0.6, "learning_rate": 4.441522425169743e-07, "logits/chosen": -2.739123821258545, "logits/rejected": -2.6058404445648193, "logps/chosen": -563.9171142578125, "logps/rejected": -401.7550048828125, "loss": 0.6644, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.9390497207641602, "rewards/margins": 0.6571556329727173, "rewards/rejected": -2.596205472946167, "step": 4660 }, { "epoch": 0.6, "learning_rate": 4.439131682126805e-07, "logits/chosen": -2.911296844482422, "logits/rejected": -2.8667426109313965, "logps/chosen": -457.4932556152344, "logps/rejected": -406.36639404296875, "loss": 0.7467, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.8859506845474243, "rewards/margins": 0.3864440321922302, "rewards/rejected": -2.2723946571350098, "step": 4670 }, { "epoch": 0.6, "learning_rate": 4.4367409390838675e-07, "logits/chosen": -2.909228563308716, "logits/rejected": -2.7275028228759766, "logps/chosen": -644.3896484375, "logps/rejected": -510.56781005859375, "loss": 0.6663, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.249990463256836, "rewards/margins": 0.5669915080070496, "rewards/rejected": -2.816981792449951, "step": 4680 }, { "epoch": 0.61, "learning_rate": 4.4343501960409293e-07, "logits/chosen": -2.865060329437256, "logits/rejected": -2.7540111541748047, "logps/chosen": -634.2412719726562, "logps/rejected": -448.6144104003906, "loss": 0.6514, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1521382331848145, "rewards/margins": 0.5402829051017761, "rewards/rejected": -2.6924211978912354, "step": 4690 }, { "epoch": 0.61, "learning_rate": 4.4319594529979916e-07, "logits/chosen": -2.8262548446655273, "logits/rejected": -2.730936288833618, "logps/chosen": -534.4276123046875, "logps/rejected": -448.7801818847656, "loss": 0.7717, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.1510095596313477, "rewards/margins": 0.21614763140678406, "rewards/rejected": -2.367157459259033, "step": 4700 }, { "epoch": 0.61, "learning_rate": 4.429568709955054e-07, "logits/chosen": -2.76222562789917, "logits/rejected": -2.740508794784546, "logps/chosen": -519.9036865234375, "logps/rejected": -461.5658264160156, "loss": 0.5996, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0950632095336914, "rewards/margins": 0.590812087059021, "rewards/rejected": -2.685875177383423, "step": 4710 }, { "epoch": 0.61, "learning_rate": 4.4271779669121164e-07, "logits/chosen": -2.8726425170898438, "logits/rejected": -2.7274906635284424, "logps/chosen": -544.5289306640625, "logps/rejected": -391.71441650390625, "loss": 0.6042, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9945271015167236, "rewards/margins": 0.680935263633728, "rewards/rejected": -2.675462245941162, "step": 4720 }, { "epoch": 0.61, "learning_rate": 4.4247872238691787e-07, "logits/chosen": -2.677762031555176, "logits/rejected": -2.6398050785064697, "logps/chosen": -579.1080932617188, "logps/rejected": -425.0433044433594, "loss": 0.7099, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.1799254417419434, "rewards/margins": 0.5635089874267578, "rewards/rejected": -2.743434429168701, "step": 4730 }, { "epoch": 0.61, "learning_rate": 4.4223964808262406e-07, "logits/chosen": -2.840076208114624, "logits/rejected": -2.7688772678375244, "logps/chosen": -551.2069091796875, "logps/rejected": -473.9517517089844, "loss": 0.7484, "rewards/accuracies": 0.625, "rewards/chosen": -2.168424129486084, "rewards/margins": 0.4250139594078064, "rewards/rejected": -2.593437910079956, "step": 4740 }, { "epoch": 0.61, "learning_rate": 4.420005737783303e-07, "logits/chosen": -2.844048023223877, "logits/rejected": -2.7612366676330566, "logps/chosen": -517.2349853515625, "logps/rejected": -437.67303466796875, "loss": 0.7589, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.033501625061035, "rewards/margins": 0.30285337567329407, "rewards/rejected": -2.3363547325134277, "step": 4750 }, { "epoch": 0.61, "learning_rate": 4.4176149947403653e-07, "logits/chosen": -2.85823392868042, "logits/rejected": -2.7357256412506104, "logps/chosen": -595.9937744140625, "logps/rejected": -499.3099670410156, "loss": 0.6269, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1602678298950195, "rewards/margins": 0.6798582077026367, "rewards/rejected": -2.8401260375976562, "step": 4760 }, { "epoch": 0.62, "learning_rate": 4.4152242516974276e-07, "logits/chosen": -2.9272220134735107, "logits/rejected": -2.7622692584991455, "logps/chosen": -580.6509399414062, "logps/rejected": -373.2926330566406, "loss": 0.5403, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.8090896606445312, "rewards/margins": 1.005517601966858, "rewards/rejected": -2.8146071434020996, "step": 4770 }, { "epoch": 0.62, "learning_rate": 4.41283350865449e-07, "logits/chosen": -2.8414433002471924, "logits/rejected": -2.737119674682617, "logps/chosen": -607.875732421875, "logps/rejected": -488.080078125, "loss": 0.7394, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.210211992263794, "rewards/margins": 0.37479478120803833, "rewards/rejected": -2.5850064754486084, "step": 4780 }, { "epoch": 0.62, "learning_rate": 4.410442765611552e-07, "logits/chosen": -2.8818581104278564, "logits/rejected": -2.786668539047241, "logps/chosen": -527.1445922851562, "logps/rejected": -432.0728454589844, "loss": 0.6387, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.033905267715454, "rewards/margins": 0.5592555999755859, "rewards/rejected": -2.59316086769104, "step": 4790 }, { "epoch": 0.62, "learning_rate": 4.408052022568614e-07, "logits/chosen": -2.798516035079956, "logits/rejected": -2.712101459503174, "logps/chosen": -526.9841918945312, "logps/rejected": -451.8885803222656, "loss": 0.7297, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.2406082153320312, "rewards/margins": 0.3580622673034668, "rewards/rejected": -2.598670482635498, "step": 4800 }, { "epoch": 0.62, "learning_rate": 4.4056612795256765e-07, "logits/chosen": -2.7352852821350098, "logits/rejected": -2.6803152561187744, "logps/chosen": -546.5830078125, "logps/rejected": -439.8548889160156, "loss": 0.8639, "rewards/accuracies": 0.5625, "rewards/chosen": -2.4363632202148438, "rewards/margins": 0.17240507900714874, "rewards/rejected": -2.6087684631347656, "step": 4810 }, { "epoch": 0.62, "learning_rate": 4.403270536482739e-07, "logits/chosen": -2.8812713623046875, "logits/rejected": -2.6904964447021484, "logps/chosen": -624.5416870117188, "logps/rejected": -436.51177978515625, "loss": 0.6696, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.9701000452041626, "rewards/margins": 0.5710854530334473, "rewards/rejected": -2.5411856174468994, "step": 4820 }, { "epoch": 0.62, "learning_rate": 4.4008797934398007e-07, "logits/chosen": -2.8561224937438965, "logits/rejected": -2.766796588897705, "logps/chosen": -498.11962890625, "logps/rejected": -418.213623046875, "loss": 0.6636, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.023848295211792, "rewards/margins": 0.5814916491508484, "rewards/rejected": -2.605340003967285, "step": 4830 }, { "epoch": 0.62, "learning_rate": 4.398489050396863e-07, "logits/chosen": -2.8401377201080322, "logits/rejected": -2.722318649291992, "logps/chosen": -612.833984375, "logps/rejected": -491.17071533203125, "loss": 0.7492, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1826560497283936, "rewards/margins": 0.4553479254245758, "rewards/rejected": -2.6380038261413574, "step": 4840 }, { "epoch": 0.63, "learning_rate": 4.3960983073539254e-07, "logits/chosen": -2.9329540729522705, "logits/rejected": -2.786827564239502, "logps/chosen": -586.1820068359375, "logps/rejected": -454.5927734375, "loss": 0.681, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2242958545684814, "rewards/margins": 0.5314866900444031, "rewards/rejected": -2.7557826042175293, "step": 4850 }, { "epoch": 0.63, "learning_rate": 4.393707564310988e-07, "logits/chosen": -2.9064087867736816, "logits/rejected": -2.7915892601013184, "logps/chosen": -613.9527587890625, "logps/rejected": -501.9730529785156, "loss": 0.69, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.7703272104263306, "rewards/margins": 0.5988038778305054, "rewards/rejected": -2.369131326675415, "step": 4860 }, { "epoch": 0.63, "learning_rate": 4.39131682126805e-07, "logits/chosen": -2.8355984687805176, "logits/rejected": -2.729654550552368, "logps/chosen": -550.0733642578125, "logps/rejected": -401.7315368652344, "loss": 0.7564, "rewards/accuracies": 0.625, "rewards/chosen": -2.137010097503662, "rewards/margins": 0.3479614853858948, "rewards/rejected": -2.484971523284912, "step": 4870 }, { "epoch": 0.63, "learning_rate": 4.388926078225112e-07, "logits/chosen": -2.800163984298706, "logits/rejected": -2.7574872970581055, "logps/chosen": -542.0157470703125, "logps/rejected": -430.427978515625, "loss": 0.5926, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9717527627944946, "rewards/margins": 0.6924339532852173, "rewards/rejected": -2.664186477661133, "step": 4880 }, { "epoch": 0.63, "learning_rate": 4.3865353351821744e-07, "logits/chosen": -2.792942762374878, "logits/rejected": -2.631333112716675, "logps/chosen": -512.0310668945312, "logps/rejected": -361.72821044921875, "loss": 0.7227, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.9998868703842163, "rewards/margins": 0.3798137307167053, "rewards/rejected": -2.3797006607055664, "step": 4890 }, { "epoch": 0.63, "learning_rate": 4.3841445921392367e-07, "logits/chosen": -2.977135419845581, "logits/rejected": -2.8551840782165527, "logps/chosen": -533.6250610351562, "logps/rejected": -393.8391418457031, "loss": 0.6497, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.870043158531189, "rewards/margins": 0.5923143625259399, "rewards/rejected": -2.462357521057129, "step": 4900 }, { "epoch": 0.63, "learning_rate": 4.381753849096299e-07, "logits/chosen": -2.8798680305480957, "logits/rejected": -2.6744637489318848, "logps/chosen": -514.3638916015625, "logps/rejected": -331.22174072265625, "loss": 0.7251, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.8313674926757812, "rewards/margins": 0.3884674906730652, "rewards/rejected": -2.219835042953491, "step": 4910 }, { "epoch": 0.64, "learning_rate": 4.3793631060533614e-07, "logits/chosen": -2.7915377616882324, "logits/rejected": -2.6549439430236816, "logps/chosen": -570.5046997070312, "logps/rejected": -409.93896484375, "loss": 0.7855, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.1787846088409424, "rewards/margins": 0.4321858286857605, "rewards/rejected": -2.6109707355499268, "step": 4920 }, { "epoch": 0.64, "learning_rate": 4.376972363010423e-07, "logits/chosen": -2.764307737350464, "logits/rejected": -2.7454376220703125, "logps/chosen": -573.9234008789062, "logps/rejected": -434.0721740722656, "loss": 0.7166, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0666494369506836, "rewards/margins": 0.5339161157608032, "rewards/rejected": -2.6005656719207764, "step": 4930 }, { "epoch": 0.64, "learning_rate": 4.3745816199674856e-07, "logits/chosen": -2.8268930912017822, "logits/rejected": -2.720374584197998, "logps/chosen": -606.2349853515625, "logps/rejected": -435.1111755371094, "loss": 0.6839, "rewards/accuracies": 0.625, "rewards/chosen": -2.172375202178955, "rewards/margins": 0.5461037755012512, "rewards/rejected": -2.7184786796569824, "step": 4940 }, { "epoch": 0.64, "learning_rate": 4.372190876924548e-07, "logits/chosen": -2.807922124862671, "logits/rejected": -2.816833972930908, "logps/chosen": -449.5428771972656, "logps/rejected": -399.9245910644531, "loss": 0.6525, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.297957181930542, "rewards/margins": 0.49832668900489807, "rewards/rejected": -2.7962841987609863, "step": 4950 }, { "epoch": 0.64, "learning_rate": 4.3698001338816103e-07, "logits/chosen": -2.8888754844665527, "logits/rejected": -2.86909818649292, "logps/chosen": -506.2906188964844, "logps/rejected": -440.7579040527344, "loss": 0.7785, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -2.1566247940063477, "rewards/margins": 0.24124936759471893, "rewards/rejected": -2.397874355316162, "step": 4960 }, { "epoch": 0.64, "learning_rate": 4.367409390838672e-07, "logits/chosen": -2.8318827152252197, "logits/rejected": -2.6774582862854004, "logps/chosen": -612.0380859375, "logps/rejected": -449.0216369628906, "loss": 0.7451, "rewards/accuracies": 0.5625, "rewards/chosen": -2.365168809890747, "rewards/margins": 0.29462188482284546, "rewards/rejected": -2.659790515899658, "step": 4970 }, { "epoch": 0.64, "learning_rate": 4.3650186477957345e-07, "logits/chosen": -2.8360016345977783, "logits/rejected": -2.75669264793396, "logps/chosen": -526.3250122070312, "logps/rejected": -480.65838623046875, "loss": 0.7334, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.2339696884155273, "rewards/margins": 0.46296244859695435, "rewards/rejected": -2.696932315826416, "step": 4980 }, { "epoch": 0.64, "learning_rate": 4.362627904752797e-07, "logits/chosen": -2.73317813873291, "logits/rejected": -2.66444730758667, "logps/chosen": -558.5051879882812, "logps/rejected": -478.2125549316406, "loss": 0.8091, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.3871209621429443, "rewards/margins": 0.44208845496177673, "rewards/rejected": -2.829209327697754, "step": 4990 }, { "epoch": 0.65, "learning_rate": 4.360237161709859e-07, "logits/chosen": -2.840512990951538, "logits/rejected": -2.7092487812042236, "logps/chosen": -525.6910400390625, "logps/rejected": -363.9224548339844, "loss": 0.7659, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.9248355627059937, "rewards/margins": 0.39227813482284546, "rewards/rejected": -2.3171136379241943, "step": 5000 }, { "epoch": 0.65, "eval_logits/chosen": -3.124598741531372, "eval_logits/rejected": -3.0725595951080322, "eval_logps/chosen": -540.3062744140625, "eval_logps/rejected": -418.49627685546875, "eval_loss": 0.6419405341148376, "eval_rewards/accuracies": 0.6480000019073486, "eval_rewards/chosen": -0.8486536741256714, "eval_rewards/margins": 0.9081073999404907, "eval_rewards/rejected": -1.7567613124847412, "eval_runtime": 278.6494, "eval_samples_per_second": 7.177, "eval_steps_per_second": 3.589, "step": 5000 }, { "epoch": 0.65, "learning_rate": 4.3578464186669216e-07, "logits/chosen": -2.8836240768432617, "logits/rejected": -2.6890037059783936, "logps/chosen": -545.8804321289062, "logps/rejected": -379.8439025878906, "loss": 0.6781, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1456332206726074, "rewards/margins": 0.46257495880126953, "rewards/rejected": -2.608208417892456, "step": 5010 }, { "epoch": 0.65, "learning_rate": 4.3554556756239834e-07, "logits/chosen": -2.8033173084259033, "logits/rejected": -2.7291722297668457, "logps/chosen": -494.9178771972656, "logps/rejected": -383.7427673339844, "loss": 0.6208, "rewards/accuracies": 0.625, "rewards/chosen": -1.9794448614120483, "rewards/margins": 0.551287055015564, "rewards/rejected": -2.5307319164276123, "step": 5020 }, { "epoch": 0.65, "learning_rate": 4.353064932581046e-07, "logits/chosen": -2.807457447052002, "logits/rejected": -2.580191135406494, "logps/chosen": -605.3313598632812, "logps/rejected": -390.4307556152344, "loss": 0.7825, "rewards/accuracies": 0.5625, "rewards/chosen": -2.3059792518615723, "rewards/margins": 0.3658861517906189, "rewards/rejected": -2.671865224838257, "step": 5030 }, { "epoch": 0.65, "learning_rate": 4.350674189538108e-07, "logits/chosen": -2.9980764389038086, "logits/rejected": -2.8182578086853027, "logps/chosen": -593.7200927734375, "logps/rejected": -465.3916015625, "loss": 0.6316, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9716126918792725, "rewards/margins": 0.6290321946144104, "rewards/rejected": -2.600644588470459, "step": 5040 }, { "epoch": 0.65, "learning_rate": 4.3482834464951705e-07, "logits/chosen": -2.7556185722351074, "logits/rejected": -2.718479633331299, "logps/chosen": -527.4669189453125, "logps/rejected": -441.65789794921875, "loss": 0.6103, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.110208749771118, "rewards/margins": 0.6503718495368958, "rewards/rejected": -2.760580539703369, "step": 5050 }, { "epoch": 0.65, "learning_rate": 4.345892703452233e-07, "logits/chosen": -2.720655918121338, "logits/rejected": -2.753511905670166, "logps/chosen": -517.1807250976562, "logps/rejected": -453.371826171875, "loss": 0.6248, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.0686492919921875, "rewards/margins": 0.7462572455406189, "rewards/rejected": -2.814906358718872, "step": 5060 }, { "epoch": 0.65, "learning_rate": 4.3435019604092947e-07, "logits/chosen": -2.8433079719543457, "logits/rejected": -2.678109645843506, "logps/chosen": -592.722412109375, "logps/rejected": -400.6142272949219, "loss": 0.5724, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0269534587860107, "rewards/margins": 0.8294227719306946, "rewards/rejected": -2.8563761711120605, "step": 5070 }, { "epoch": 0.66, "learning_rate": 4.341111217366357e-07, "logits/chosen": -2.836181163787842, "logits/rejected": -2.6612906455993652, "logps/chosen": -593.8582763671875, "logps/rejected": -416.00018310546875, "loss": 0.6191, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1451122760772705, "rewards/margins": 0.7170602083206177, "rewards/rejected": -2.8621726036071777, "step": 5080 }, { "epoch": 0.66, "learning_rate": 4.3387204743234194e-07, "logits/chosen": -2.7135281562805176, "logits/rejected": -2.670506715774536, "logps/chosen": -571.4208374023438, "logps/rejected": -522.196533203125, "loss": 0.6537, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.356837749481201, "rewards/margins": 0.5831014513969421, "rewards/rejected": -2.939939260482788, "step": 5090 }, { "epoch": 0.66, "learning_rate": 4.336329731280482e-07, "logits/chosen": -2.900766372680664, "logits/rejected": -2.806513786315918, "logps/chosen": -578.3690185546875, "logps/rejected": -486.2378845214844, "loss": 0.6435, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.1619873046875, "rewards/margins": 0.635273814201355, "rewards/rejected": -2.7972614765167236, "step": 5100 }, { "epoch": 0.66, "learning_rate": 4.3339389882375436e-07, "logits/chosen": -2.7541842460632324, "logits/rejected": -2.6432342529296875, "logps/chosen": -628.6846923828125, "logps/rejected": -459.07952880859375, "loss": 0.7268, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.2736809253692627, "rewards/margins": 0.340880811214447, "rewards/rejected": -2.6145617961883545, "step": 5110 }, { "epoch": 0.66, "learning_rate": 4.331548245194606e-07, "logits/chosen": -2.8125863075256348, "logits/rejected": -2.747727870941162, "logps/chosen": -544.4940185546875, "logps/rejected": -432.564453125, "loss": 0.6586, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.2585840225219727, "rewards/margins": 0.5257794260978699, "rewards/rejected": -2.7843635082244873, "step": 5120 }, { "epoch": 0.66, "learning_rate": 4.3291575021516683e-07, "logits/chosen": -2.736574649810791, "logits/rejected": -2.6732165813446045, "logps/chosen": -637.7550048828125, "logps/rejected": -567.2156372070312, "loss": 0.7155, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.4062163829803467, "rewards/margins": 0.5794802904129028, "rewards/rejected": -2.98569655418396, "step": 5130 }, { "epoch": 0.66, "learning_rate": 4.3267667591087307e-07, "logits/chosen": -2.826977491378784, "logits/rejected": -2.747908115386963, "logps/chosen": -484.7662658691406, "logps/rejected": -381.6650085449219, "loss": 0.7542, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.2081732749938965, "rewards/margins": 0.3772352635860443, "rewards/rejected": -2.5854084491729736, "step": 5140 }, { "epoch": 0.66, "learning_rate": 4.324376016065793e-07, "logits/chosen": -2.8227620124816895, "logits/rejected": -2.7336370944976807, "logps/chosen": -543.7847900390625, "logps/rejected": -390.75506591796875, "loss": 0.7123, "rewards/accuracies": 0.5625, "rewards/chosen": -2.003014087677002, "rewards/margins": 0.4485486149787903, "rewards/rejected": -2.4515626430511475, "step": 5150 }, { "epoch": 0.67, "learning_rate": 4.321985273022855e-07, "logits/chosen": -2.939633846282959, "logits/rejected": -2.718578577041626, "logps/chosen": -631.9217529296875, "logps/rejected": -439.39556884765625, "loss": 0.6398, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0735881328582764, "rewards/margins": 0.6410504579544067, "rewards/rejected": -2.7146384716033936, "step": 5160 }, { "epoch": 0.67, "learning_rate": 4.319594529979917e-07, "logits/chosen": -2.844397783279419, "logits/rejected": -2.701301097869873, "logps/chosen": -615.3782958984375, "logps/rejected": -468.5672912597656, "loss": 0.726, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2449684143066406, "rewards/margins": 0.5826135873794556, "rewards/rejected": -2.8275821208953857, "step": 5170 }, { "epoch": 0.67, "learning_rate": 4.3172037869369796e-07, "logits/chosen": -2.8911216259002686, "logits/rejected": -2.8135173320770264, "logps/chosen": -474.92144775390625, "logps/rejected": -378.7367248535156, "loss": 0.6188, "rewards/accuracies": 0.625, "rewards/chosen": -2.105985164642334, "rewards/margins": 0.568085789680481, "rewards/rejected": -2.6740710735321045, "step": 5180 }, { "epoch": 0.67, "learning_rate": 4.314813043894042e-07, "logits/chosen": -2.864987850189209, "logits/rejected": -2.798766613006592, "logps/chosen": -599.8483276367188, "logps/rejected": -461.43292236328125, "loss": 0.6632, "rewards/accuracies": 0.625, "rewards/chosen": -2.073857545852661, "rewards/margins": 0.48726096749305725, "rewards/rejected": -2.5611186027526855, "step": 5190 }, { "epoch": 0.67, "learning_rate": 4.312422300851105e-07, "logits/chosen": -2.7065954208374023, "logits/rejected": -2.637667417526245, "logps/chosen": -571.5704956054688, "logps/rejected": -445.68603515625, "loss": 0.5881, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0685088634490967, "rewards/margins": 0.8940665125846863, "rewards/rejected": -2.9625754356384277, "step": 5200 }, { "epoch": 0.67, "learning_rate": 4.310031557808166e-07, "logits/chosen": -2.7520761489868164, "logits/rejected": -2.632167339324951, "logps/chosen": -533.7929077148438, "logps/rejected": -416.7777404785156, "loss": 0.7415, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.1206581592559814, "rewards/margins": 0.39914393424987793, "rewards/rejected": -2.5198020935058594, "step": 5210 }, { "epoch": 0.67, "learning_rate": 4.307640814765229e-07, "logits/chosen": -2.7905640602111816, "logits/rejected": -2.726794719696045, "logps/chosen": -581.58251953125, "logps/rejected": -462.66241455078125, "loss": 0.6963, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1695964336395264, "rewards/margins": 0.5792520046234131, "rewards/rejected": -2.7488484382629395, "step": 5220 }, { "epoch": 0.68, "learning_rate": 4.3052500717222914e-07, "logits/chosen": -2.8541038036346436, "logits/rejected": -2.698007583618164, "logps/chosen": -645.21337890625, "logps/rejected": -480.35089111328125, "loss": 0.8509, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.1923015117645264, "rewards/margins": 0.3513144850730896, "rewards/rejected": -2.5436160564422607, "step": 5230 }, { "epoch": 0.68, "learning_rate": 4.302859328679354e-07, "logits/chosen": -2.8317723274230957, "logits/rejected": -2.7780728340148926, "logps/chosen": -556.4080200195312, "logps/rejected": -471.46435546875, "loss": 0.7528, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.207441806793213, "rewards/margins": 0.46243181824684143, "rewards/rejected": -2.6698737144470215, "step": 5240 }, { "epoch": 0.68, "learning_rate": 4.3004685856364156e-07, "logits/chosen": -2.8964524269104004, "logits/rejected": -2.7179856300354004, "logps/chosen": -584.5445556640625, "logps/rejected": -399.30853271484375, "loss": 0.7526, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.4473605155944824, "rewards/margins": 0.5273586511611938, "rewards/rejected": -2.974719285964966, "step": 5250 }, { "epoch": 0.68, "learning_rate": 4.298077842593478e-07, "logits/chosen": -2.783820629119873, "logits/rejected": -2.7193784713745117, "logps/chosen": -579.43359375, "logps/rejected": -504.6046447753906, "loss": 0.6737, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0501904487609863, "rewards/margins": 0.5482980608940125, "rewards/rejected": -2.5984885692596436, "step": 5260 }, { "epoch": 0.68, "learning_rate": 4.2956870995505403e-07, "logits/chosen": -2.736269235610962, "logits/rejected": -2.654271364212036, "logps/chosen": -624.43017578125, "logps/rejected": -482.39813232421875, "loss": 0.6876, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.24006986618042, "rewards/margins": 0.636870265007019, "rewards/rejected": -2.8769402503967285, "step": 5270 }, { "epoch": 0.68, "learning_rate": 4.2932963565076027e-07, "logits/chosen": -2.775070905685425, "logits/rejected": -2.761807680130005, "logps/chosen": -554.0556030273438, "logps/rejected": -488.791748046875, "loss": 0.6764, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.061432361602783, "rewards/margins": 0.5242161750793457, "rewards/rejected": -2.58564829826355, "step": 5280 }, { "epoch": 0.68, "learning_rate": 4.290905613464665e-07, "logits/chosen": -2.8129162788391113, "logits/rejected": -2.749441623687744, "logps/chosen": -550.7742919921875, "logps/rejected": -402.42755126953125, "loss": 0.7183, "rewards/accuracies": 0.625, "rewards/chosen": -2.23543643951416, "rewards/margins": 0.3971523344516754, "rewards/rejected": -2.6325888633728027, "step": 5290 }, { "epoch": 0.68, "learning_rate": 4.288514870421727e-07, "logits/chosen": -2.80064058303833, "logits/rejected": -2.7428653240203857, "logps/chosen": -532.04150390625, "logps/rejected": -417.7718811035156, "loss": 0.6841, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.210252523422241, "rewards/margins": 0.4853716790676117, "rewards/rejected": -2.695624589920044, "step": 5300 }, { "epoch": 0.69, "learning_rate": 4.286124127378789e-07, "logits/chosen": -2.7993016242980957, "logits/rejected": -2.694007396697998, "logps/chosen": -630.4888916015625, "logps/rejected": -443.2860412597656, "loss": 0.7765, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.311167001724243, "rewards/margins": 0.23532244563102722, "rewards/rejected": -2.5464892387390137, "step": 5310 }, { "epoch": 0.69, "learning_rate": 4.2837333843358516e-07, "logits/chosen": -2.8660888671875, "logits/rejected": -2.7742886543273926, "logps/chosen": -513.7236328125, "logps/rejected": -393.33941650390625, "loss": 0.6706, "rewards/accuracies": 0.5625, "rewards/chosen": -2.196897506713867, "rewards/margins": 0.5331348776817322, "rewards/rejected": -2.730032444000244, "step": 5320 }, { "epoch": 0.69, "learning_rate": 4.281342641292914e-07, "logits/chosen": -2.8012309074401855, "logits/rejected": -2.800593137741089, "logps/chosen": -613.9073486328125, "logps/rejected": -542.9769287109375, "loss": 0.6268, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.093379020690918, "rewards/margins": 0.6899517178535461, "rewards/rejected": -2.7833309173583984, "step": 5330 }, { "epoch": 0.69, "learning_rate": 4.2789518982499763e-07, "logits/chosen": -2.9176483154296875, "logits/rejected": -2.7529187202453613, "logps/chosen": -606.00439453125, "logps/rejected": -452.6542053222656, "loss": 0.7622, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.3552448749542236, "rewards/margins": 0.3648986518383026, "rewards/rejected": -2.7201435565948486, "step": 5340 }, { "epoch": 0.69, "learning_rate": 4.276561155207038e-07, "logits/chosen": -2.8146607875823975, "logits/rejected": -2.748622417449951, "logps/chosen": -520.0908813476562, "logps/rejected": -409.8846130371094, "loss": 0.6199, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.944018006324768, "rewards/margins": 0.6377679109573364, "rewards/rejected": -2.5817856788635254, "step": 5350 }, { "epoch": 0.69, "learning_rate": 4.2741704121641005e-07, "logits/chosen": -2.8274645805358887, "logits/rejected": -2.762117862701416, "logps/chosen": -506.38641357421875, "logps/rejected": -379.1221923828125, "loss": 0.6339, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.06351637840271, "rewards/margins": 0.7554988265037537, "rewards/rejected": -2.8190150260925293, "step": 5360 }, { "epoch": 0.69, "learning_rate": 4.271779669121163e-07, "logits/chosen": -2.9061121940612793, "logits/rejected": -2.793224811553955, "logps/chosen": -604.07568359375, "logps/rejected": -434.81500244140625, "loss": 0.8689, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.2236971855163574, "rewards/margins": 0.26260557770729065, "rewards/rejected": -2.4863028526306152, "step": 5370 }, { "epoch": 0.69, "learning_rate": 4.269388926078225e-07, "logits/chosen": -2.89694881439209, "logits/rejected": -2.7369160652160645, "logps/chosen": -520.6971435546875, "logps/rejected": -379.74652099609375, "loss": 0.5712, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0119051933288574, "rewards/margins": 0.8386763334274292, "rewards/rejected": -2.850581645965576, "step": 5380 }, { "epoch": 0.7, "learning_rate": 4.266998183035287e-07, "logits/chosen": -2.8788695335388184, "logits/rejected": -2.8160171508789062, "logps/chosen": -559.3867797851562, "logps/rejected": -441.69061279296875, "loss": 0.788, "rewards/accuracies": 0.5625, "rewards/chosen": -2.4264440536499023, "rewards/margins": 0.41147786378860474, "rewards/rejected": -2.8379218578338623, "step": 5390 }, { "epoch": 0.7, "learning_rate": 4.2646074399923494e-07, "logits/chosen": -2.8750739097595215, "logits/rejected": -2.790757656097412, "logps/chosen": -626.2261962890625, "logps/rejected": -496.004150390625, "loss": 0.7629, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.2891860008239746, "rewards/margins": 0.3979819118976593, "rewards/rejected": -2.6871681213378906, "step": 5400 }, { "epoch": 0.7, "learning_rate": 4.262216696949412e-07, "logits/chosen": -2.8225326538085938, "logits/rejected": -2.780242681503296, "logps/chosen": -496.10906982421875, "logps/rejected": -403.44012451171875, "loss": 0.7645, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.2703099250793457, "rewards/margins": 0.3671378493309021, "rewards/rejected": -2.6374478340148926, "step": 5410 }, { "epoch": 0.7, "learning_rate": 4.259825953906474e-07, "logits/chosen": -2.893369197845459, "logits/rejected": -2.7697761058807373, "logps/chosen": -625.0241088867188, "logps/rejected": -481.23486328125, "loss": 0.7718, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.16568660736084, "rewards/margins": 0.3675471246242523, "rewards/rejected": -2.533234119415283, "step": 5420 }, { "epoch": 0.7, "learning_rate": 4.2574352108635365e-07, "logits/chosen": -2.9961748123168945, "logits/rejected": -2.9367353916168213, "logps/chosen": -477.5528259277344, "logps/rejected": -403.97064208984375, "loss": 0.7286, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.1249899864196777, "rewards/margins": 0.33043015003204346, "rewards/rejected": -2.4554202556610107, "step": 5430 }, { "epoch": 0.7, "learning_rate": 4.2550444678205983e-07, "logits/chosen": -2.8526535034179688, "logits/rejected": -2.736722707748413, "logps/chosen": -552.1631469726562, "logps/rejected": -414.25909423828125, "loss": 0.6773, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.3024935722351074, "rewards/margins": 0.5561520457267761, "rewards/rejected": -2.8586459159851074, "step": 5440 }, { "epoch": 0.7, "learning_rate": 4.2526537247776606e-07, "logits/chosen": -2.817809581756592, "logits/rejected": -2.7465660572052, "logps/chosen": -472.45184326171875, "logps/rejected": -397.3582458496094, "loss": 0.6965, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.3004353046417236, "rewards/margins": 0.43538618087768555, "rewards/rejected": -2.7358217239379883, "step": 5450 }, { "epoch": 0.7, "learning_rate": 4.250262981734723e-07, "logits/chosen": -2.8607425689697266, "logits/rejected": -2.7970974445343018, "logps/chosen": -543.6548461914062, "logps/rejected": -430.39306640625, "loss": 0.6416, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1137964725494385, "rewards/margins": 0.5042855739593506, "rewards/rejected": -2.618082046508789, "step": 5460 }, { "epoch": 0.71, "learning_rate": 4.2478722386917854e-07, "logits/chosen": -2.7612531185150146, "logits/rejected": -2.6812126636505127, "logps/chosen": -541.9531860351562, "logps/rejected": -484.4940490722656, "loss": 0.7316, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1307597160339355, "rewards/margins": 0.6770618557929993, "rewards/rejected": -2.807821750640869, "step": 5470 }, { "epoch": 0.71, "learning_rate": 4.2454814956488477e-07, "logits/chosen": -2.9084577560424805, "logits/rejected": -2.7056331634521484, "logps/chosen": -701.5545654296875, "logps/rejected": -497.17413330078125, "loss": 0.649, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.0104854106903076, "rewards/margins": 0.6309957504272461, "rewards/rejected": -2.641481399536133, "step": 5480 }, { "epoch": 0.71, "learning_rate": 4.2430907526059096e-07, "logits/chosen": -2.8590855598449707, "logits/rejected": -2.8261752128601074, "logps/chosen": -626.1135864257812, "logps/rejected": -466.96014404296875, "loss": 0.6222, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0967612266540527, "rewards/margins": 0.5848902463912964, "rewards/rejected": -2.6816515922546387, "step": 5490 }, { "epoch": 0.71, "learning_rate": 4.240700009562972e-07, "logits/chosen": -2.902533531188965, "logits/rejected": -2.8267734050750732, "logps/chosen": -570.2273559570312, "logps/rejected": -450.73272705078125, "loss": 0.7144, "rewards/accuracies": 0.5625, "rewards/chosen": -2.229483127593994, "rewards/margins": 0.5514118075370789, "rewards/rejected": -2.780895233154297, "step": 5500 }, { "epoch": 0.71, "learning_rate": 4.2383092665200343e-07, "logits/chosen": -2.805917263031006, "logits/rejected": -2.792271375656128, "logps/chosen": -462.51739501953125, "logps/rejected": -423.3101501464844, "loss": 0.5183, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.020951747894287, "rewards/margins": 0.8896690607070923, "rewards/rejected": -2.910621166229248, "step": 5510 }, { "epoch": 0.71, "learning_rate": 4.2359185234770966e-07, "logits/chosen": -2.858193874359131, "logits/rejected": -2.7181382179260254, "logps/chosen": -503.36944580078125, "logps/rejected": -327.0660705566406, "loss": 0.9257, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -2.35800838470459, "rewards/margins": -0.04332510381937027, "rewards/rejected": -2.314682960510254, "step": 5520 }, { "epoch": 0.71, "learning_rate": 4.2335277804341585e-07, "logits/chosen": -2.905712127685547, "logits/rejected": -2.8015990257263184, "logps/chosen": -628.4326171875, "logps/rejected": -439.07635498046875, "loss": 0.5994, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1417360305786133, "rewards/margins": 0.7060524821281433, "rewards/rejected": -2.8477885723114014, "step": 5530 }, { "epoch": 0.72, "learning_rate": 4.231137037391221e-07, "logits/chosen": -2.917940139770508, "logits/rejected": -2.750098466873169, "logps/chosen": -637.0859985351562, "logps/rejected": -436.9703063964844, "loss": 0.6321, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.230646848678589, "rewards/margins": 0.6964605450630188, "rewards/rejected": -2.927107334136963, "step": 5540 }, { "epoch": 0.72, "learning_rate": 4.228746294348283e-07, "logits/chosen": -2.881438732147217, "logits/rejected": -2.788992166519165, "logps/chosen": -545.9332275390625, "logps/rejected": -386.29638671875, "loss": 0.5449, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.099022388458252, "rewards/margins": 0.8938155174255371, "rewards/rejected": -2.99283766746521, "step": 5550 }, { "epoch": 0.72, "learning_rate": 4.2263555513053455e-07, "logits/chosen": -2.8561699390411377, "logits/rejected": -2.824343204498291, "logps/chosen": -506.8937072753906, "logps/rejected": -480.43341064453125, "loss": 0.7799, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.3929243087768555, "rewards/margins": 0.39257389307022095, "rewards/rejected": -2.7854979038238525, "step": 5560 }, { "epoch": 0.72, "learning_rate": 4.223964808262408e-07, "logits/chosen": -2.921765089035034, "logits/rejected": -2.8395705223083496, "logps/chosen": -572.70166015625, "logps/rejected": -441.8863220214844, "loss": 0.6539, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.103877544403076, "rewards/margins": 0.5021719932556152, "rewards/rejected": -2.6060497760772705, "step": 5570 }, { "epoch": 0.72, "learning_rate": 4.2215740652194697e-07, "logits/chosen": -2.9158380031585693, "logits/rejected": -2.7650187015533447, "logps/chosen": -567.4581298828125, "logps/rejected": -461.1324768066406, "loss": 0.8986, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -2.404977321624756, "rewards/margins": 0.16289648413658142, "rewards/rejected": -2.56787371635437, "step": 5580 }, { "epoch": 0.72, "learning_rate": 4.219183322176532e-07, "logits/chosen": -2.9615561962127686, "logits/rejected": -2.9338741302490234, "logps/chosen": -546.927001953125, "logps/rejected": -436.5244140625, "loss": 0.6267, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.0287675857543945, "rewards/margins": 0.5976980924606323, "rewards/rejected": -2.6264657974243164, "step": 5590 }, { "epoch": 0.72, "learning_rate": 4.2167925791335944e-07, "logits/chosen": -2.9967973232269287, "logits/rejected": -2.807753086090088, "logps/chosen": -547.2955322265625, "logps/rejected": -366.18475341796875, "loss": 0.6422, "rewards/accuracies": 0.6875, "rewards/chosen": -2.09132981300354, "rewards/margins": 0.5340958833694458, "rewards/rejected": -2.6254258155822754, "step": 5600 }, { "epoch": 0.72, "learning_rate": 4.214401836090657e-07, "logits/chosen": -2.8863227367401123, "logits/rejected": -2.8249568939208984, "logps/chosen": -569.7266845703125, "logps/rejected": -464.7767639160156, "loss": 0.7137, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1821656227111816, "rewards/margins": 0.5087321996688843, "rewards/rejected": -2.6908979415893555, "step": 5610 }, { "epoch": 0.73, "learning_rate": 4.212011093047719e-07, "logits/chosen": -2.897348642349243, "logits/rejected": -2.8235554695129395, "logps/chosen": -668.5621948242188, "logps/rejected": -517.1206665039062, "loss": 0.8254, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.4300990104675293, "rewards/margins": 0.33858436346054077, "rewards/rejected": -2.7686831951141357, "step": 5620 }, { "epoch": 0.73, "learning_rate": 4.209620350004781e-07, "logits/chosen": -2.9144225120544434, "logits/rejected": -2.880009174346924, "logps/chosen": -676.5970458984375, "logps/rejected": -510.3700256347656, "loss": 0.7583, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.277414321899414, "rewards/margins": 0.3616483211517334, "rewards/rejected": -2.6390626430511475, "step": 5630 }, { "epoch": 0.73, "learning_rate": 4.2072296069618434e-07, "logits/chosen": -3.0004539489746094, "logits/rejected": -2.9820566177368164, "logps/chosen": -516.5015258789062, "logps/rejected": -460.7757873535156, "loss": 0.7715, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.2256088256835938, "rewards/margins": 0.35726282000541687, "rewards/rejected": -2.582871675491333, "step": 5640 }, { "epoch": 0.73, "learning_rate": 4.2048388639189057e-07, "logits/chosen": -2.8498175144195557, "logits/rejected": -2.709986448287964, "logps/chosen": -535.1976318359375, "logps/rejected": -377.8082580566406, "loss": 0.8141, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.262723445892334, "rewards/margins": 0.3589038848876953, "rewards/rejected": -2.6216275691986084, "step": 5650 }, { "epoch": 0.73, "learning_rate": 4.202448120875968e-07, "logits/chosen": -2.8772265911102295, "logits/rejected": -2.8230974674224854, "logps/chosen": -526.2716064453125, "logps/rejected": -406.972412109375, "loss": 0.5965, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0907468795776367, "rewards/margins": 0.6038751602172852, "rewards/rejected": -2.694622039794922, "step": 5660 }, { "epoch": 0.73, "learning_rate": 4.20005737783303e-07, "logits/chosen": -2.9147744178771973, "logits/rejected": -2.8029208183288574, "logps/chosen": -643.0474853515625, "logps/rejected": -472.7498474121094, "loss": 0.7771, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.42350697517395, "rewards/margins": 0.409981906414032, "rewards/rejected": -2.833488941192627, "step": 5670 }, { "epoch": 0.73, "learning_rate": 4.197666634790092e-07, "logits/chosen": -2.819725275039673, "logits/rejected": -2.726280927658081, "logps/chosen": -541.6487426757812, "logps/rejected": -421.08660888671875, "loss": 0.7995, "rewards/accuracies": 0.5625, "rewards/chosen": -2.3585238456726074, "rewards/margins": 0.2560691237449646, "rewards/rejected": -2.614593267440796, "step": 5680 }, { "epoch": 0.73, "learning_rate": 4.1952758917471546e-07, "logits/chosen": -2.8787097930908203, "logits/rejected": -2.8863279819488525, "logps/chosen": -536.486083984375, "logps/rejected": -443.28851318359375, "loss": 0.6118, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.868790626525879, "rewards/margins": 0.5731475949287415, "rewards/rejected": -2.4419384002685547, "step": 5690 }, { "epoch": 0.74, "learning_rate": 4.192885148704217e-07, "logits/chosen": -2.838681936264038, "logits/rejected": -2.8049466609954834, "logps/chosen": -486.84515380859375, "logps/rejected": -398.03692626953125, "loss": 0.7123, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.190330982208252, "rewards/margins": 0.3368644416332245, "rewards/rejected": -2.5271949768066406, "step": 5700 }, { "epoch": 0.74, "learning_rate": 4.1904944056612793e-07, "logits/chosen": -2.8736209869384766, "logits/rejected": -2.6726698875427246, "logps/chosen": -534.291015625, "logps/rejected": -344.8094787597656, "loss": 0.8342, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.328742504119873, "rewards/margins": 0.1214713454246521, "rewards/rejected": -2.45021390914917, "step": 5710 }, { "epoch": 0.74, "learning_rate": 4.188103662618341e-07, "logits/chosen": -2.925985336303711, "logits/rejected": -2.7900967597961426, "logps/chosen": -520.3064575195312, "logps/rejected": -386.08209228515625, "loss": 0.8307, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.259958028793335, "rewards/margins": 0.2472977191209793, "rewards/rejected": -2.507256031036377, "step": 5720 }, { "epoch": 0.74, "learning_rate": 4.1857129195754035e-07, "logits/chosen": -2.894577980041504, "logits/rejected": -2.871814012527466, "logps/chosen": -502.472900390625, "logps/rejected": -473.12188720703125, "loss": 0.7406, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.116209030151367, "rewards/margins": 0.37626177072525024, "rewards/rejected": -2.4924709796905518, "step": 5730 }, { "epoch": 0.74, "learning_rate": 4.183322176532466e-07, "logits/chosen": -2.8952243328094482, "logits/rejected": -2.737194538116455, "logps/chosen": -559.3157958984375, "logps/rejected": -414.7272033691406, "loss": 0.6722, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.2360732555389404, "rewards/margins": 0.543919026851654, "rewards/rejected": -2.7799923419952393, "step": 5740 }, { "epoch": 0.74, "learning_rate": 4.180931433489528e-07, "logits/chosen": -2.785966396331787, "logits/rejected": -2.782021999359131, "logps/chosen": -490.7845153808594, "logps/rejected": -421.062744140625, "loss": 0.6828, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.244778871536255, "rewards/margins": 0.44268307089805603, "rewards/rejected": -2.6874616146087646, "step": 5750 }, { "epoch": 0.74, "learning_rate": 4.1785406904465906e-07, "logits/chosen": -2.902118682861328, "logits/rejected": -2.8368568420410156, "logps/chosen": -539.6217041015625, "logps/rejected": -410.736083984375, "loss": 0.6922, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.127525568008423, "rewards/margins": 0.5259748697280884, "rewards/rejected": -2.653500556945801, "step": 5760 }, { "epoch": 0.74, "learning_rate": 4.1761499474036524e-07, "logits/chosen": -2.8675146102905273, "logits/rejected": -2.8096089363098145, "logps/chosen": -484.5250549316406, "logps/rejected": -393.924072265625, "loss": 0.7086, "rewards/accuracies": 0.5625, "rewards/chosen": -2.171422004699707, "rewards/margins": 0.3847891688346863, "rewards/rejected": -2.556210994720459, "step": 5770 }, { "epoch": 0.75, "learning_rate": 4.173759204360715e-07, "logits/chosen": -2.7202084064483643, "logits/rejected": -2.7075626850128174, "logps/chosen": -453.42071533203125, "logps/rejected": -426.8235778808594, "loss": 0.852, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.2200067043304443, "rewards/margins": 0.22100433707237244, "rewards/rejected": -2.4410111904144287, "step": 5780 }, { "epoch": 0.75, "learning_rate": 4.1713684613177777e-07, "logits/chosen": -2.7129406929016113, "logits/rejected": -2.591736316680908, "logps/chosen": -585.8099365234375, "logps/rejected": -482.14532470703125, "loss": 0.6364, "rewards/accuracies": 0.625, "rewards/chosen": -2.3406004905700684, "rewards/margins": 0.8722810745239258, "rewards/rejected": -3.2128818035125732, "step": 5790 }, { "epoch": 0.75, "learning_rate": 4.16897771827484e-07, "logits/chosen": -2.9464006423950195, "logits/rejected": -2.8319053649902344, "logps/chosen": -573.4876098632812, "logps/rejected": -416.276123046875, "loss": 0.7649, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.3332650661468506, "rewards/margins": 0.5315276980400085, "rewards/rejected": -2.864792823791504, "step": 5800 }, { "epoch": 0.75, "learning_rate": 4.166586975231902e-07, "logits/chosen": -2.8985190391540527, "logits/rejected": -2.7633843421936035, "logps/chosen": -459.3121643066406, "logps/rejected": -358.45111083984375, "loss": 0.7008, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.0435056686401367, "rewards/margins": 0.504298210144043, "rewards/rejected": -2.5478038787841797, "step": 5810 }, { "epoch": 0.75, "learning_rate": 4.164196232188964e-07, "logits/chosen": -2.8704941272735596, "logits/rejected": -2.7720539569854736, "logps/chosen": -544.6500244140625, "logps/rejected": -398.4942321777344, "loss": 0.8978, "rewards/accuracies": 0.5, "rewards/chosen": -2.3098883628845215, "rewards/margins": 0.06314261257648468, "rewards/rejected": -2.3730311393737793, "step": 5820 }, { "epoch": 0.75, "learning_rate": 4.1618054891460266e-07, "logits/chosen": -2.8760814666748047, "logits/rejected": -2.823664426803589, "logps/chosen": -565.1480712890625, "logps/rejected": -458.7384338378906, "loss": 0.6229, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9862339496612549, "rewards/margins": 0.6635918617248535, "rewards/rejected": -2.6498255729675293, "step": 5830 }, { "epoch": 0.75, "learning_rate": 4.159414746103089e-07, "logits/chosen": -2.800656795501709, "logits/rejected": -2.714782238006592, "logps/chosen": -475.1065979003906, "logps/rejected": -403.0895080566406, "loss": 0.7579, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.13899302482605, "rewards/margins": 0.28132492303848267, "rewards/rejected": -2.420318126678467, "step": 5840 }, { "epoch": 0.76, "learning_rate": 4.1570240030601513e-07, "logits/chosen": -2.856694459915161, "logits/rejected": -2.756437301635742, "logps/chosen": -494.9986267089844, "logps/rejected": -340.62652587890625, "loss": 0.7851, "rewards/accuracies": 0.625, "rewards/chosen": -2.1848552227020264, "rewards/margins": 0.4201585352420807, "rewards/rejected": -2.6050140857696533, "step": 5850 }, { "epoch": 0.76, "learning_rate": 4.154633260017213e-07, "logits/chosen": -2.7561769485473633, "logits/rejected": -2.6290855407714844, "logps/chosen": -513.5797119140625, "logps/rejected": -374.69976806640625, "loss": 0.7227, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.3528246879577637, "rewards/margins": 0.4127357006072998, "rewards/rejected": -2.7655603885650635, "step": 5860 }, { "epoch": 0.76, "learning_rate": 4.1522425169742755e-07, "logits/chosen": -2.9711101055145264, "logits/rejected": -2.769608974456787, "logps/chosen": -523.5067749023438, "logps/rejected": -338.1678771972656, "loss": 0.6815, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.008937120437622, "rewards/margins": 0.6947196125984192, "rewards/rejected": -2.7036566734313965, "step": 5870 }, { "epoch": 0.76, "learning_rate": 4.149851773931338e-07, "logits/chosen": -2.7451140880584717, "logits/rejected": -2.7423744201660156, "logps/chosen": -485.931640625, "logps/rejected": -428.3771057128906, "loss": 0.6764, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0381252765655518, "rewards/margins": 0.5217207670211792, "rewards/rejected": -2.5598464012145996, "step": 5880 }, { "epoch": 0.76, "learning_rate": 4.1474610308884e-07, "logits/chosen": -2.8218040466308594, "logits/rejected": -2.7435035705566406, "logps/chosen": -535.173828125, "logps/rejected": -418.2737731933594, "loss": 0.731, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -2.3447461128234863, "rewards/margins": 0.43300023674964905, "rewards/rejected": -2.7777464389801025, "step": 5890 }, { "epoch": 0.76, "learning_rate": 4.1450702878454626e-07, "logits/chosen": -2.8860912322998047, "logits/rejected": -2.790113925933838, "logps/chosen": -569.0647583007812, "logps/rejected": -464.046142578125, "loss": 0.6842, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.194026470184326, "rewards/margins": 0.4985136091709137, "rewards/rejected": -2.692540168762207, "step": 5900 }, { "epoch": 0.76, "learning_rate": 4.1426795448025244e-07, "logits/chosen": -2.9004390239715576, "logits/rejected": -2.8136472702026367, "logps/chosen": -531.4432373046875, "logps/rejected": -378.4201354980469, "loss": 0.6182, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1673943996429443, "rewards/margins": 0.6199471950531006, "rewards/rejected": -2.787341594696045, "step": 5910 }, { "epoch": 0.76, "learning_rate": 4.140288801759587e-07, "logits/chosen": -2.8051552772521973, "logits/rejected": -2.7749104499816895, "logps/chosen": -520.5614013671875, "logps/rejected": -412.083740234375, "loss": 0.5346, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9864966869354248, "rewards/margins": 0.8316604495048523, "rewards/rejected": -2.818157434463501, "step": 5920 }, { "epoch": 0.77, "learning_rate": 4.137898058716649e-07, "logits/chosen": -2.7277884483337402, "logits/rejected": -2.6457467079162598, "logps/chosen": -555.8453979492188, "logps/rejected": -456.24200439453125, "loss": 0.6301, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.144341468811035, "rewards/margins": 0.5930684804916382, "rewards/rejected": -2.737409830093384, "step": 5930 }, { "epoch": 0.77, "learning_rate": 4.1355073156737115e-07, "logits/chosen": -2.8207767009735107, "logits/rejected": -2.7761917114257812, "logps/chosen": -518.4105224609375, "logps/rejected": -407.8157653808594, "loss": 0.6396, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.9560117721557617, "rewards/margins": 0.470583438873291, "rewards/rejected": -2.4265952110290527, "step": 5940 }, { "epoch": 0.77, "learning_rate": 4.133116572630774e-07, "logits/chosen": -2.81789493560791, "logits/rejected": -2.704853057861328, "logps/chosen": -492.22216796875, "logps/rejected": -406.84686279296875, "loss": 0.7356, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.3131136894226074, "rewards/margins": 0.41793498396873474, "rewards/rejected": -2.731048822402954, "step": 5950 }, { "epoch": 0.77, "learning_rate": 4.1307258295878357e-07, "logits/chosen": -2.746175765991211, "logits/rejected": -2.639108657836914, "logps/chosen": -500.3570861816406, "logps/rejected": -386.28765869140625, "loss": 0.6921, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.3833863735198975, "rewards/margins": 0.4802820086479187, "rewards/rejected": -2.863668441772461, "step": 5960 }, { "epoch": 0.77, "learning_rate": 4.128335086544898e-07, "logits/chosen": -2.91461443901062, "logits/rejected": -2.778257131576538, "logps/chosen": -578.1534423828125, "logps/rejected": -434.5841369628906, "loss": 0.6889, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.302880048751831, "rewards/margins": 0.6208136081695557, "rewards/rejected": -2.923693895339966, "step": 5970 }, { "epoch": 0.77, "learning_rate": 4.1259443435019604e-07, "logits/chosen": -2.7774810791015625, "logits/rejected": -2.6850199699401855, "logps/chosen": -617.7150268554688, "logps/rejected": -452.59100341796875, "loss": 0.7039, "rewards/accuracies": 0.625, "rewards/chosen": -2.3099422454833984, "rewards/margins": 0.47658076882362366, "rewards/rejected": -2.7865233421325684, "step": 5980 }, { "epoch": 0.77, "learning_rate": 4.123553600459023e-07, "logits/chosen": -2.9106497764587402, "logits/rejected": -2.701521396636963, "logps/chosen": -566.0615844726562, "logps/rejected": -391.56451416015625, "loss": 0.6534, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.227064847946167, "rewards/margins": 0.4805432856082916, "rewards/rejected": -2.707608222961426, "step": 5990 }, { "epoch": 0.77, "learning_rate": 4.1211628574160846e-07, "logits/chosen": -2.8468098640441895, "logits/rejected": -2.743288040161133, "logps/chosen": -617.8755493164062, "logps/rejected": -488.2684020996094, "loss": 0.6425, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.318199396133423, "rewards/margins": 0.6394053101539612, "rewards/rejected": -2.9576046466827393, "step": 6000 }, { "epoch": 0.77, "eval_logits/chosen": -3.1711645126342773, "eval_logits/rejected": -3.1293764114379883, "eval_logps/chosen": -541.1941528320312, "eval_logps/rejected": -419.9546813964844, "eval_loss": 0.6378899216651917, "eval_rewards/accuracies": 0.6554999947547913, "eval_rewards/chosen": -0.9374440908432007, "eval_rewards/margins": 0.9651620984077454, "eval_rewards/rejected": -1.9026061296463013, "eval_runtime": 278.9553, "eval_samples_per_second": 7.17, "eval_steps_per_second": 3.585, "step": 6000 }, { "epoch": 0.78, "learning_rate": 4.118772114373147e-07, "logits/chosen": -2.8796803951263428, "logits/rejected": -2.776118516921997, "logps/chosen": -543.7447509765625, "logps/rejected": -393.9072570800781, "loss": 0.5673, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1881489753723145, "rewards/margins": 0.6410235166549683, "rewards/rejected": -2.829172134399414, "step": 6010 }, { "epoch": 0.78, "learning_rate": 4.1163813713302093e-07, "logits/chosen": -2.806391716003418, "logits/rejected": -2.6973378658294678, "logps/chosen": -525.938232421875, "logps/rejected": -458.53662109375, "loss": 0.6803, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1855225563049316, "rewards/margins": 0.6352413892745972, "rewards/rejected": -2.8207640647888184, "step": 6020 }, { "epoch": 0.78, "learning_rate": 4.1139906282872717e-07, "logits/chosen": -2.7633821964263916, "logits/rejected": -2.668747663497925, "logps/chosen": -604.5541381835938, "logps/rejected": -471.9877014160156, "loss": 0.7579, "rewards/accuracies": 0.625, "rewards/chosen": -2.429410457611084, "rewards/margins": 0.4824395775794983, "rewards/rejected": -2.9118502140045166, "step": 6030 }, { "epoch": 0.78, "learning_rate": 4.111599885244334e-07, "logits/chosen": -2.787304639816284, "logits/rejected": -2.7587811946868896, "logps/chosen": -584.9354248046875, "logps/rejected": -470.1625061035156, "loss": 0.8287, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.425413131713867, "rewards/margins": 0.3198467791080475, "rewards/rejected": -2.7452597618103027, "step": 6040 }, { "epoch": 0.78, "learning_rate": 4.109209142201396e-07, "logits/chosen": -2.8536834716796875, "logits/rejected": -2.7333672046661377, "logps/chosen": -599.0545654296875, "logps/rejected": -432.77130126953125, "loss": 0.718, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.2993533611297607, "rewards/margins": 0.6329619288444519, "rewards/rejected": -2.9323153495788574, "step": 6050 }, { "epoch": 0.78, "learning_rate": 4.106818399158458e-07, "logits/chosen": -2.857110023498535, "logits/rejected": -2.744875431060791, "logps/chosen": -586.2503051757812, "logps/rejected": -436.3265686035156, "loss": 0.6653, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.265467643737793, "rewards/margins": 0.6511850953102112, "rewards/rejected": -2.9166526794433594, "step": 6060 }, { "epoch": 0.78, "learning_rate": 4.1044276561155206e-07, "logits/chosen": -2.798381805419922, "logits/rejected": -2.771883726119995, "logps/chosen": -555.5060424804688, "logps/rejected": -439.99609375, "loss": 0.6712, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1594345569610596, "rewards/margins": 0.5736945867538452, "rewards/rejected": -2.733128786087036, "step": 6070 }, { "epoch": 0.78, "learning_rate": 4.102036913072583e-07, "logits/chosen": -2.8350067138671875, "logits/rejected": -2.681400775909424, "logps/chosen": -599.6095581054688, "logps/rejected": -448.0142517089844, "loss": 0.7203, "rewards/accuracies": 0.625, "rewards/chosen": -2.24934458732605, "rewards/margins": 0.587101936340332, "rewards/rejected": -2.836446762084961, "step": 6080 }, { "epoch": 0.79, "learning_rate": 4.0996461700296453e-07, "logits/chosen": -2.842301845550537, "logits/rejected": -2.6934523582458496, "logps/chosen": -529.0728149414062, "logps/rejected": -415.7950744628906, "loss": 0.6938, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.199540853500366, "rewards/margins": 0.47461310029029846, "rewards/rejected": -2.674154281616211, "step": 6090 }, { "epoch": 0.79, "learning_rate": 4.097255426986707e-07, "logits/chosen": -2.83056902885437, "logits/rejected": -2.7594246864318848, "logps/chosen": -613.6036376953125, "logps/rejected": -447.6935119628906, "loss": 0.7448, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.600447177886963, "rewards/margins": 0.5293048620223999, "rewards/rejected": -3.129751682281494, "step": 6100 }, { "epoch": 0.79, "learning_rate": 4.0948646839437695e-07, "logits/chosen": -2.7418627738952637, "logits/rejected": -2.684875011444092, "logps/chosen": -549.8353271484375, "logps/rejected": -429.94940185546875, "loss": 0.6469, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.396963596343994, "rewards/margins": 0.5161769390106201, "rewards/rejected": -2.9131405353546143, "step": 6110 }, { "epoch": 0.79, "learning_rate": 4.092473940900832e-07, "logits/chosen": -2.742758274078369, "logits/rejected": -2.6335365772247314, "logps/chosen": -577.0084838867188, "logps/rejected": -423.7821350097656, "loss": 0.8064, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.2626121044158936, "rewards/margins": 0.26584798097610474, "rewards/rejected": -2.5284602642059326, "step": 6120 }, { "epoch": 0.79, "learning_rate": 4.090083197857894e-07, "logits/chosen": -2.7397332191467285, "logits/rejected": -2.583674907684326, "logps/chosen": -516.1879272460938, "logps/rejected": -436.80255126953125, "loss": 0.6756, "rewards/accuracies": 0.625, "rewards/chosen": -2.4465224742889404, "rewards/margins": 0.5269345045089722, "rewards/rejected": -2.973457098007202, "step": 6130 }, { "epoch": 0.79, "learning_rate": 4.087692454814956e-07, "logits/chosen": -2.8146755695343018, "logits/rejected": -2.708454132080078, "logps/chosen": -593.5131225585938, "logps/rejected": -452.6853942871094, "loss": 0.6932, "rewards/accuracies": 0.625, "rewards/chosen": -2.337578535079956, "rewards/margins": 0.6783876419067383, "rewards/rejected": -3.0159664154052734, "step": 6140 }, { "epoch": 0.79, "learning_rate": 4.0853017117720184e-07, "logits/chosen": -2.7989845275878906, "logits/rejected": -2.693890333175659, "logps/chosen": -538.8048095703125, "logps/rejected": -417.235595703125, "loss": 0.6263, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.053928852081299, "rewards/margins": 0.6783667802810669, "rewards/rejected": -2.732295513153076, "step": 6150 }, { "epoch": 0.8, "learning_rate": 4.082910968729081e-07, "logits/chosen": -2.8069894313812256, "logits/rejected": -2.7507805824279785, "logps/chosen": -551.6346435546875, "logps/rejected": -377.757568359375, "loss": 0.6079, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.145761013031006, "rewards/margins": 0.73796546459198, "rewards/rejected": -2.8837265968322754, "step": 6160 }, { "epoch": 0.8, "learning_rate": 4.080520225686143e-07, "logits/chosen": -2.738461971282959, "logits/rejected": -2.6801648139953613, "logps/chosen": -435.38427734375, "logps/rejected": -398.0328674316406, "loss": 0.7857, "rewards/accuracies": 0.625, "rewards/chosen": -1.9926084280014038, "rewards/margins": 0.28002145886421204, "rewards/rejected": -2.272629737854004, "step": 6170 }, { "epoch": 0.8, "learning_rate": 4.0781294826432055e-07, "logits/chosen": -2.875967264175415, "logits/rejected": -2.747831106185913, "logps/chosen": -555.6887817382812, "logps/rejected": -413.11639404296875, "loss": 0.7957, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.5109734535217285, "rewards/margins": 0.37060925364494324, "rewards/rejected": -2.881582736968994, "step": 6180 }, { "epoch": 0.8, "learning_rate": 4.0757387396002673e-07, "logits/chosen": -2.7820637226104736, "logits/rejected": -2.702619791030884, "logps/chosen": -485.8759765625, "logps/rejected": -370.24627685546875, "loss": 0.6954, "rewards/accuracies": 0.625, "rewards/chosen": -2.2080256938934326, "rewards/margins": 0.39282140135765076, "rewards/rejected": -2.6008472442626953, "step": 6190 }, { "epoch": 0.8, "learning_rate": 4.0733479965573296e-07, "logits/chosen": -2.843902111053467, "logits/rejected": -2.6984622478485107, "logps/chosen": -542.9966430664062, "logps/rejected": -428.001953125, "loss": 0.8115, "rewards/accuracies": 0.625, "rewards/chosen": -2.438141345977783, "rewards/margins": 0.3439865708351135, "rewards/rejected": -2.782128095626831, "step": 6200 }, { "epoch": 0.8, "learning_rate": 4.070957253514392e-07, "logits/chosen": -2.8118371963500977, "logits/rejected": -2.7302448749542236, "logps/chosen": -650.7228393554688, "logps/rejected": -505.3805236816406, "loss": 0.8152, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.456045627593994, "rewards/margins": 0.3167288601398468, "rewards/rejected": -2.7727746963500977, "step": 6210 }, { "epoch": 0.8, "learning_rate": 4.0685665104714544e-07, "logits/chosen": -2.8476903438568115, "logits/rejected": -2.64825701713562, "logps/chosen": -573.1537475585938, "logps/rejected": -361.7265625, "loss": 0.7825, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.277782678604126, "rewards/margins": 0.3889717161655426, "rewards/rejected": -2.6667540073394775, "step": 6220 }, { "epoch": 0.8, "learning_rate": 4.0661757674285167e-07, "logits/chosen": -2.725294828414917, "logits/rejected": -2.654000759124756, "logps/chosen": -560.3692626953125, "logps/rejected": -450.20947265625, "loss": 0.6927, "rewards/accuracies": 0.5625, "rewards/chosen": -2.526703357696533, "rewards/margins": 0.46545666456222534, "rewards/rejected": -2.9921603202819824, "step": 6230 }, { "epoch": 0.81, "learning_rate": 4.0637850243855786e-07, "logits/chosen": -2.8177123069763184, "logits/rejected": -2.580338478088379, "logps/chosen": -677.1884765625, "logps/rejected": -436.1844787597656, "loss": 0.7778, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.4520773887634277, "rewards/margins": 0.38212060928344727, "rewards/rejected": -2.834197998046875, "step": 6240 }, { "epoch": 0.81, "learning_rate": 4.061394281342641e-07, "logits/chosen": -2.851430892944336, "logits/rejected": -2.7044928073883057, "logps/chosen": -628.8058471679688, "logps/rejected": -473.25030517578125, "loss": 0.7051, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.5851945877075195, "rewards/margins": 0.5240641236305237, "rewards/rejected": -3.1092586517333984, "step": 6250 }, { "epoch": 0.81, "learning_rate": 4.0590035382997033e-07, "logits/chosen": -2.830200672149658, "logits/rejected": -2.631619453430176, "logps/chosen": -557.9229736328125, "logps/rejected": -385.1855163574219, "loss": 0.6159, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0718369483947754, "rewards/margins": 0.6721190810203552, "rewards/rejected": -2.7439560890197754, "step": 6260 }, { "epoch": 0.81, "learning_rate": 4.0566127952567656e-07, "logits/chosen": -2.6555354595184326, "logits/rejected": -2.5430915355682373, "logps/chosen": -591.9454956054688, "logps/rejected": -453.9837341308594, "loss": 0.7537, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.317887544631958, "rewards/margins": 0.6113752126693726, "rewards/rejected": -2.929262638092041, "step": 6270 }, { "epoch": 0.81, "learning_rate": 4.0542220522138275e-07, "logits/chosen": -2.8113341331481934, "logits/rejected": -2.719850540161133, "logps/chosen": -518.0054931640625, "logps/rejected": -385.63726806640625, "loss": 0.7511, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2346556186676025, "rewards/margins": 0.516946017742157, "rewards/rejected": -2.7516016960144043, "step": 6280 }, { "epoch": 0.81, "learning_rate": 4.05183130917089e-07, "logits/chosen": -2.8236749172210693, "logits/rejected": -2.6842455863952637, "logps/chosen": -612.46240234375, "logps/rejected": -469.9281311035156, "loss": 0.6703, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.298520088195801, "rewards/margins": 0.6430020928382874, "rewards/rejected": -2.9415223598480225, "step": 6290 }, { "epoch": 0.81, "learning_rate": 4.049440566127952e-07, "logits/chosen": -2.7837777137756348, "logits/rejected": -2.5725483894348145, "logps/chosen": -673.6102905273438, "logps/rejected": -492.28204345703125, "loss": 0.8127, "rewards/accuracies": 0.5625, "rewards/chosen": -2.51377010345459, "rewards/margins": 0.2975844144821167, "rewards/rejected": -2.811354875564575, "step": 6300 }, { "epoch": 0.81, "learning_rate": 4.0470498230850145e-07, "logits/chosen": -2.70831036567688, "logits/rejected": -2.7461154460906982, "logps/chosen": -531.0703735351562, "logps/rejected": -516.0758056640625, "loss": 0.83, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -2.3947794437408447, "rewards/margins": 0.2484634667634964, "rewards/rejected": -2.643242597579956, "step": 6310 }, { "epoch": 0.82, "learning_rate": 4.044659080042077e-07, "logits/chosen": -2.7295479774475098, "logits/rejected": -2.673699378967285, "logps/chosen": -534.3987426757812, "logps/rejected": -475.09246826171875, "loss": 0.7251, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.140441417694092, "rewards/margins": 0.410286009311676, "rewards/rejected": -2.550727367401123, "step": 6320 }, { "epoch": 0.82, "learning_rate": 4.0422683369991387e-07, "logits/chosen": -2.7961745262145996, "logits/rejected": -2.774951219558716, "logps/chosen": -586.8214111328125, "logps/rejected": -512.9927978515625, "loss": 0.7603, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.375560760498047, "rewards/margins": 0.5554514527320862, "rewards/rejected": -2.9310121536254883, "step": 6330 }, { "epoch": 0.82, "learning_rate": 4.039877593956201e-07, "logits/chosen": -2.7411274909973145, "logits/rejected": -2.6819591522216797, "logps/chosen": -503.12982177734375, "logps/rejected": -419.49041748046875, "loss": 0.6237, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0625393390655518, "rewards/margins": 0.7245683073997498, "rewards/rejected": -2.7871077060699463, "step": 6340 }, { "epoch": 0.82, "learning_rate": 4.0374868509132634e-07, "logits/chosen": -2.739589214324951, "logits/rejected": -2.5692203044891357, "logps/chosen": -598.4371337890625, "logps/rejected": -411.97857666015625, "loss": 0.5681, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1072444915771484, "rewards/margins": 0.7049761414527893, "rewards/rejected": -2.812220573425293, "step": 6350 }, { "epoch": 0.82, "learning_rate": 4.035096107870326e-07, "logits/chosen": -2.894331693649292, "logits/rejected": -2.7406980991363525, "logps/chosen": -555.1244506835938, "logps/rejected": -405.248779296875, "loss": 0.8244, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.5431857109069824, "rewards/margins": 0.3279717266559601, "rewards/rejected": -2.871157169342041, "step": 6360 }, { "epoch": 0.82, "learning_rate": 4.0327053648273887e-07, "logits/chosen": -2.7301859855651855, "logits/rejected": -2.7346067428588867, "logps/chosen": -527.5341186523438, "logps/rejected": -503.07183837890625, "loss": 0.7452, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.5226187705993652, "rewards/margins": 0.31079983711242676, "rewards/rejected": -2.833418607711792, "step": 6370 }, { "epoch": 0.82, "learning_rate": 4.03031462178445e-07, "logits/chosen": -2.8307607173919678, "logits/rejected": -2.753140926361084, "logps/chosen": -582.0201416015625, "logps/rejected": -455.0359802246094, "loss": 0.7636, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.360605239868164, "rewards/margins": 0.30381542444229126, "rewards/rejected": -2.6644208431243896, "step": 6380 }, { "epoch": 0.82, "learning_rate": 4.027923878741513e-07, "logits/chosen": -2.7598764896392822, "logits/rejected": -2.657489776611328, "logps/chosen": -599.0473022460938, "logps/rejected": -485.72589111328125, "loss": 0.8455, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.491989850997925, "rewards/margins": 0.2579317092895508, "rewards/rejected": -2.7499215602874756, "step": 6390 }, { "epoch": 0.83, "learning_rate": 4.025533135698575e-07, "logits/chosen": -2.6592421531677246, "logits/rejected": -2.527588129043579, "logps/chosen": -640.85498046875, "logps/rejected": -459.17010498046875, "loss": 0.6925, "rewards/accuracies": 0.625, "rewards/chosen": -2.287057399749756, "rewards/margins": 0.5862755179405212, "rewards/rejected": -2.8733327388763428, "step": 6400 }, { "epoch": 0.83, "learning_rate": 4.0231423926556376e-07, "logits/chosen": -2.90702748298645, "logits/rejected": -2.748400926589966, "logps/chosen": -662.9431762695312, "logps/rejected": -454.6493225097656, "loss": 0.6927, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1800777912139893, "rewards/margins": 0.6832410097122192, "rewards/rejected": -2.863318681716919, "step": 6410 }, { "epoch": 0.83, "learning_rate": 4.0207516496126994e-07, "logits/chosen": -2.7992334365844727, "logits/rejected": -2.6744840145111084, "logps/chosen": -516.8660888671875, "logps/rejected": -399.64935302734375, "loss": 0.6551, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9300148487091064, "rewards/margins": 0.7869305610656738, "rewards/rejected": -2.716945171356201, "step": 6420 }, { "epoch": 0.83, "learning_rate": 4.018360906569762e-07, "logits/chosen": -2.809412717819214, "logits/rejected": -2.6851086616516113, "logps/chosen": -545.0630493164062, "logps/rejected": -485.7027893066406, "loss": 0.707, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.142731189727783, "rewards/margins": 0.5680193901062012, "rewards/rejected": -2.7107505798339844, "step": 6430 }, { "epoch": 0.83, "learning_rate": 4.015970163526824e-07, "logits/chosen": -2.69657301902771, "logits/rejected": -2.6477413177490234, "logps/chosen": -517.7656860351562, "logps/rejected": -426.5035705566406, "loss": 0.5951, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.0788772106170654, "rewards/margins": 0.7185454964637756, "rewards/rejected": -2.7974228858947754, "step": 6440 }, { "epoch": 0.83, "learning_rate": 4.0135794204838865e-07, "logits/chosen": -2.780721426010132, "logits/rejected": -2.6752991676330566, "logps/chosen": -551.0904541015625, "logps/rejected": -457.2185974121094, "loss": 0.6352, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.301492214202881, "rewards/margins": 0.5815023183822632, "rewards/rejected": -2.8829948902130127, "step": 6450 }, { "epoch": 0.83, "learning_rate": 4.011188677440949e-07, "logits/chosen": -2.8883774280548096, "logits/rejected": -2.6903369426727295, "logps/chosen": -509.8154296875, "logps/rejected": -355.7737731933594, "loss": 0.7485, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.277609348297119, "rewards/margins": 0.3031327426433563, "rewards/rejected": -2.580742120742798, "step": 6460 }, { "epoch": 0.84, "learning_rate": 4.0087979343980107e-07, "logits/chosen": -2.7322707176208496, "logits/rejected": -2.6053848266601562, "logps/chosen": -559.7935791015625, "logps/rejected": -446.9764709472656, "loss": 0.6206, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1102054119110107, "rewards/margins": 0.7424209117889404, "rewards/rejected": -2.852626085281372, "step": 6470 }, { "epoch": 0.84, "learning_rate": 4.006407191355073e-07, "logits/chosen": -2.8607680797576904, "logits/rejected": -2.7170164585113525, "logps/chosen": -596.7918701171875, "logps/rejected": -442.300048828125, "loss": 0.7859, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.227337598800659, "rewards/margins": 0.38428056240081787, "rewards/rejected": -2.6116182804107666, "step": 6480 }, { "epoch": 0.84, "learning_rate": 4.0040164483121354e-07, "logits/chosen": -2.810925006866455, "logits/rejected": -2.6805922985076904, "logps/chosen": -594.3724365234375, "logps/rejected": -421.9840393066406, "loss": 0.6121, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.000711441040039, "rewards/margins": 0.7475128173828125, "rewards/rejected": -2.7482240200042725, "step": 6490 }, { "epoch": 0.84, "learning_rate": 4.001625705269198e-07, "logits/chosen": -2.9218266010284424, "logits/rejected": -2.7584729194641113, "logps/chosen": -530.1065063476562, "logps/rejected": -410.4961853027344, "loss": 0.6088, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.0932586193084717, "rewards/margins": 0.7650324702262878, "rewards/rejected": -2.858290910720825, "step": 6500 }, { "epoch": 0.84, "learning_rate": 3.99923496222626e-07, "logits/chosen": -2.803752899169922, "logits/rejected": -2.732654571533203, "logps/chosen": -578.3316040039062, "logps/rejected": -478.4007263183594, "loss": 0.7365, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.3640007972717285, "rewards/margins": 0.3765670657157898, "rewards/rejected": -2.740567922592163, "step": 6510 }, { "epoch": 0.84, "learning_rate": 3.996844219183322e-07, "logits/chosen": -2.825796127319336, "logits/rejected": -2.7327234745025635, "logps/chosen": -518.1613159179688, "logps/rejected": -384.73553466796875, "loss": 0.6876, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.282252550125122, "rewards/margins": 0.5240010023117065, "rewards/rejected": -2.806253671646118, "step": 6520 }, { "epoch": 0.84, "learning_rate": 3.9944534761403843e-07, "logits/chosen": -2.764681339263916, "logits/rejected": -2.7305004596710205, "logps/chosen": -515.6417236328125, "logps/rejected": -440.52667236328125, "loss": 0.6752, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.270634174346924, "rewards/margins": 0.47562068700790405, "rewards/rejected": -2.7462546825408936, "step": 6530 }, { "epoch": 0.84, "learning_rate": 3.9920627330974467e-07, "logits/chosen": -2.791395664215088, "logits/rejected": -2.7890677452087402, "logps/chosen": -545.8656616210938, "logps/rejected": -490.7024841308594, "loss": 0.7436, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.151245594024658, "rewards/margins": 0.5035072565078735, "rewards/rejected": -2.6547529697418213, "step": 6540 }, { "epoch": 0.85, "learning_rate": 3.989671990054509e-07, "logits/chosen": -2.79258394241333, "logits/rejected": -2.6712088584899902, "logps/chosen": -587.5865478515625, "logps/rejected": -406.0826110839844, "loss": 0.7532, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.3813295364379883, "rewards/margins": 0.3748011589050293, "rewards/rejected": -2.7561306953430176, "step": 6550 }, { "epoch": 0.85, "learning_rate": 3.987281247011571e-07, "logits/chosen": -2.7737839221954346, "logits/rejected": -2.600034236907959, "logps/chosen": -604.2899169921875, "logps/rejected": -467.707275390625, "loss": 0.7488, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.272090435028076, "rewards/margins": 0.4906039834022522, "rewards/rejected": -2.7626945972442627, "step": 6560 }, { "epoch": 0.85, "learning_rate": 3.984890503968633e-07, "logits/chosen": -2.7519845962524414, "logits/rejected": -2.636615037918091, "logps/chosen": -562.1656494140625, "logps/rejected": -464.66192626953125, "loss": 0.8459, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.423689842224121, "rewards/margins": 0.2471647709608078, "rewards/rejected": -2.6708545684814453, "step": 6570 }, { "epoch": 0.85, "learning_rate": 3.9824997609256956e-07, "logits/chosen": -2.7610840797424316, "logits/rejected": -2.714998960494995, "logps/chosen": -437.55413818359375, "logps/rejected": -344.6949768066406, "loss": 0.7322, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2162537574768066, "rewards/margins": 0.4469437599182129, "rewards/rejected": -2.6631975173950195, "step": 6580 }, { "epoch": 0.85, "learning_rate": 3.980109017882758e-07, "logits/chosen": -2.7890429496765137, "logits/rejected": -2.5993220806121826, "logps/chosen": -547.2493896484375, "logps/rejected": -437.6585388183594, "loss": 0.6031, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0535244941711426, "rewards/margins": 0.7762664556503296, "rewards/rejected": -2.8297908306121826, "step": 6590 }, { "epoch": 0.85, "learning_rate": 3.9777182748398203e-07, "logits/chosen": -2.8227858543395996, "logits/rejected": -2.7249598503112793, "logps/chosen": -567.2060546875, "logps/rejected": -400.8982849121094, "loss": 0.5566, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1065783500671387, "rewards/margins": 0.8348416090011597, "rewards/rejected": -2.941419839859009, "step": 6600 }, { "epoch": 0.85, "learning_rate": 3.975327531796882e-07, "logits/chosen": -2.8012287616729736, "logits/rejected": -2.7325432300567627, "logps/chosen": -591.6969604492188, "logps/rejected": -498.97845458984375, "loss": 0.6604, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.9462368488311768, "rewards/margins": 0.523747980594635, "rewards/rejected": -2.469984531402588, "step": 6610 }, { "epoch": 0.85, "learning_rate": 3.9729367887539445e-07, "logits/chosen": -2.775874137878418, "logits/rejected": -2.726327419281006, "logps/chosen": -549.9345092773438, "logps/rejected": -484.560791015625, "loss": 0.7126, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.302428722381592, "rewards/margins": 0.30685538053512573, "rewards/rejected": -2.609283924102783, "step": 6620 }, { "epoch": 0.86, "learning_rate": 3.970546045711007e-07, "logits/chosen": -2.772580623626709, "logits/rejected": -2.6297192573547363, "logps/chosen": -534.6124267578125, "logps/rejected": -420.0269470214844, "loss": 0.7868, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.1759262084960938, "rewards/margins": 0.3282342553138733, "rewards/rejected": -2.5041604042053223, "step": 6630 }, { "epoch": 0.86, "learning_rate": 3.968155302668069e-07, "logits/chosen": -2.876953601837158, "logits/rejected": -2.6772937774658203, "logps/chosen": -609.6029052734375, "logps/rejected": -419.65435791015625, "loss": 0.7105, "rewards/accuracies": 0.625, "rewards/chosen": -2.1762757301330566, "rewards/margins": 0.5130156874656677, "rewards/rejected": -2.689291000366211, "step": 6640 }, { "epoch": 0.86, "learning_rate": 3.9657645596251316e-07, "logits/chosen": -2.7525010108947754, "logits/rejected": -2.78941011428833, "logps/chosen": -437.76251220703125, "logps/rejected": -442.5130920410156, "loss": 0.7299, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -2.2785255908966064, "rewards/margins": 0.3919200599193573, "rewards/rejected": -2.670445203781128, "step": 6650 }, { "epoch": 0.86, "learning_rate": 3.9633738165821934e-07, "logits/chosen": -2.7804839611053467, "logits/rejected": -2.6712546348571777, "logps/chosen": -463.3856506347656, "logps/rejected": -405.2366027832031, "loss": 0.7281, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.0866668224334717, "rewards/margins": 0.4696715474128723, "rewards/rejected": -2.5563385486602783, "step": 6660 }, { "epoch": 0.86, "learning_rate": 3.960983073539256e-07, "logits/chosen": -2.8514771461486816, "logits/rejected": -2.780820608139038, "logps/chosen": -571.65478515625, "logps/rejected": -496.93170166015625, "loss": 0.8105, "rewards/accuracies": 0.625, "rewards/chosen": -2.200601100921631, "rewards/margins": 0.3901222348213196, "rewards/rejected": -2.5907235145568848, "step": 6670 }, { "epoch": 0.86, "learning_rate": 3.958592330496318e-07, "logits/chosen": -2.7178072929382324, "logits/rejected": -2.688544511795044, "logps/chosen": -518.0775146484375, "logps/rejected": -444.50030517578125, "loss": 0.6138, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.7407432794570923, "rewards/margins": 0.7183889150619507, "rewards/rejected": -2.459132432937622, "step": 6680 }, { "epoch": 0.86, "learning_rate": 3.9562015874533805e-07, "logits/chosen": -2.661963701248169, "logits/rejected": -2.5648369789123535, "logps/chosen": -574.4996337890625, "logps/rejected": -385.1125183105469, "loss": 0.8322, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -2.188434600830078, "rewards/margins": 0.14985477924346924, "rewards/rejected": -2.338289260864258, "step": 6690 }, { "epoch": 0.86, "learning_rate": 3.9538108444104423e-07, "logits/chosen": -2.72786808013916, "logits/rejected": -2.6862473487854004, "logps/chosen": -554.5831298828125, "logps/rejected": -473.1307678222656, "loss": 0.747, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.3390960693359375, "rewards/margins": 0.32796144485473633, "rewards/rejected": -2.667057514190674, "step": 6700 }, { "epoch": 0.87, "learning_rate": 3.9514201013675047e-07, "logits/chosen": -2.7935545444488525, "logits/rejected": -2.7245073318481445, "logps/chosen": -506.984130859375, "logps/rejected": -430.9375, "loss": 0.6864, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.144259214401245, "rewards/margins": 0.4482100009918213, "rewards/rejected": -2.5924692153930664, "step": 6710 }, { "epoch": 0.87, "learning_rate": 3.949029358324567e-07, "logits/chosen": -2.840808391571045, "logits/rejected": -2.6927683353424072, "logps/chosen": -608.7989501953125, "logps/rejected": -401.26654052734375, "loss": 0.8007, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.2495322227478027, "rewards/margins": 0.24491338431835175, "rewards/rejected": -2.494446277618408, "step": 6720 }, { "epoch": 0.87, "learning_rate": 3.9466386152816294e-07, "logits/chosen": -2.7973930835723877, "logits/rejected": -2.654360294342041, "logps/chosen": -531.6064453125, "logps/rejected": -420.5977478027344, "loss": 0.6571, "rewards/accuracies": 0.625, "rewards/chosen": -2.388455390930176, "rewards/margins": 0.6877198815345764, "rewards/rejected": -3.0761749744415283, "step": 6730 }, { "epoch": 0.87, "learning_rate": 3.944247872238692e-07, "logits/chosen": -2.86811900138855, "logits/rejected": -2.67934513092041, "logps/chosen": -575.8900146484375, "logps/rejected": -440.0181579589844, "loss": 0.7422, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.035102128982544, "rewards/margins": 0.4916251599788666, "rewards/rejected": -2.5267271995544434, "step": 6740 }, { "epoch": 0.87, "learning_rate": 3.9418571291957536e-07, "logits/chosen": -2.8413333892822266, "logits/rejected": -2.65212082862854, "logps/chosen": -594.1182250976562, "logps/rejected": -440.01123046875, "loss": 0.8243, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.495760440826416, "rewards/margins": 0.1939251720905304, "rewards/rejected": -2.689685821533203, "step": 6750 }, { "epoch": 0.87, "learning_rate": 3.939466386152816e-07, "logits/chosen": -2.710552215576172, "logits/rejected": -2.551602363586426, "logps/chosen": -513.3645629882812, "logps/rejected": -400.03558349609375, "loss": 0.6818, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.3862404823303223, "rewards/margins": 0.38805872201919556, "rewards/rejected": -2.774299144744873, "step": 6760 }, { "epoch": 0.87, "learning_rate": 3.9370756431098783e-07, "logits/chosen": -2.7288198471069336, "logits/rejected": -2.618593692779541, "logps/chosen": -551.9089965820312, "logps/rejected": -475.4961853027344, "loss": 0.7266, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.2261712551116943, "rewards/margins": 0.487542062997818, "rewards/rejected": -2.7137131690979004, "step": 6770 }, { "epoch": 0.88, "learning_rate": 3.9346849000669407e-07, "logits/chosen": -2.8017616271972656, "logits/rejected": -2.604384183883667, "logps/chosen": -567.6015014648438, "logps/rejected": -441.6485900878906, "loss": 0.6484, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.8123416900634766, "rewards/margins": 0.6920179724693298, "rewards/rejected": -2.504359722137451, "step": 6780 }, { "epoch": 0.88, "learning_rate": 3.932294157024003e-07, "logits/chosen": -2.788079023361206, "logits/rejected": -2.670574903488159, "logps/chosen": -612.6207275390625, "logps/rejected": -427.5228576660156, "loss": 0.6334, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1388862133026123, "rewards/margins": 0.63136225938797, "rewards/rejected": -2.7702484130859375, "step": 6790 }, { "epoch": 0.88, "learning_rate": 3.929903413981065e-07, "logits/chosen": -2.842102527618408, "logits/rejected": -2.6995646953582764, "logps/chosen": -598.1158447265625, "logps/rejected": -458.2763671875, "loss": 0.7303, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.2537808418273926, "rewards/margins": 0.5076573491096497, "rewards/rejected": -2.7614378929138184, "step": 6800 }, { "epoch": 0.88, "learning_rate": 3.927512670938127e-07, "logits/chosen": -2.697509288787842, "logits/rejected": -2.722066879272461, "logps/chosen": -469.55743408203125, "logps/rejected": -430.8231506347656, "loss": 0.775, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.0267834663391113, "rewards/margins": 0.4278809428215027, "rewards/rejected": -2.4546642303466797, "step": 6810 }, { "epoch": 0.88, "learning_rate": 3.9251219278951896e-07, "logits/chosen": -2.7290244102478027, "logits/rejected": -2.7244772911071777, "logps/chosen": -554.5084228515625, "logps/rejected": -488.6023864746094, "loss": 0.6782, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.075795888900757, "rewards/margins": 0.6182568669319153, "rewards/rejected": -2.6940526962280273, "step": 6820 }, { "epoch": 0.88, "learning_rate": 3.922731184852252e-07, "logits/chosen": -2.7538602352142334, "logits/rejected": -2.6838903427124023, "logps/chosen": -529.5670776367188, "logps/rejected": -443.5609436035156, "loss": 0.7028, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.3379995822906494, "rewards/margins": 0.3905390501022339, "rewards/rejected": -2.7285385131835938, "step": 6830 }, { "epoch": 0.88, "learning_rate": 3.920340441809314e-07, "logits/chosen": -2.7520079612731934, "logits/rejected": -2.732006311416626, "logps/chosen": -557.9749145507812, "logps/rejected": -503.19549560546875, "loss": 0.6225, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1547398567199707, "rewards/margins": 0.574142336845398, "rewards/rejected": -2.728882312774658, "step": 6840 }, { "epoch": 0.88, "learning_rate": 3.917949698766376e-07, "logits/chosen": -2.714407444000244, "logits/rejected": -2.6563870906829834, "logps/chosen": -508.23486328125, "logps/rejected": -420.5936584472656, "loss": 0.6772, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.273679256439209, "rewards/margins": 0.616862416267395, "rewards/rejected": -2.8905417919158936, "step": 6850 }, { "epoch": 0.89, "learning_rate": 3.9155589557234385e-07, "logits/chosen": -2.7536849975585938, "logits/rejected": -2.6780619621276855, "logps/chosen": -532.9346923828125, "logps/rejected": -442.9329528808594, "loss": 0.7301, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.026684284210205, "rewards/margins": 0.3888188898563385, "rewards/rejected": -2.4155030250549316, "step": 6860 }, { "epoch": 0.89, "learning_rate": 3.913168212680501e-07, "logits/chosen": -2.839233875274658, "logits/rejected": -2.67596435546875, "logps/chosen": -587.2990112304688, "logps/rejected": -413.66259765625, "loss": 0.5367, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9250290393829346, "rewards/margins": 0.8040135502815247, "rewards/rejected": -2.7290425300598145, "step": 6870 }, { "epoch": 0.89, "learning_rate": 3.910777469637563e-07, "logits/chosen": -2.8165371417999268, "logits/rejected": -2.6992411613464355, "logps/chosen": -548.3555297851562, "logps/rejected": -473.1068420410156, "loss": 0.598, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.2287182807922363, "rewards/margins": 0.7428308725357056, "rewards/rejected": -2.9715490341186523, "step": 6880 }, { "epoch": 0.89, "learning_rate": 3.908386726594625e-07, "logits/chosen": -2.7504067420959473, "logits/rejected": -2.6189916133880615, "logps/chosen": -576.6221313476562, "logps/rejected": -382.35064697265625, "loss": 0.7603, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -2.216306447982788, "rewards/margins": 0.30434054136276245, "rewards/rejected": -2.520646810531616, "step": 6890 }, { "epoch": 0.89, "learning_rate": 3.9059959835516874e-07, "logits/chosen": -2.844731569290161, "logits/rejected": -2.7623651027679443, "logps/chosen": -624.78564453125, "logps/rejected": -490.574462890625, "loss": 0.653, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1366238594055176, "rewards/margins": 0.6970704793930054, "rewards/rejected": -2.8336939811706543, "step": 6900 }, { "epoch": 0.89, "learning_rate": 3.90360524050875e-07, "logits/chosen": -2.7519454956054688, "logits/rejected": -2.6967320442199707, "logps/chosen": -546.4917602539062, "logps/rejected": -474.6334533691406, "loss": 0.6107, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2233877182006836, "rewards/margins": 0.6332194209098816, "rewards/rejected": -2.856606960296631, "step": 6910 }, { "epoch": 0.89, "learning_rate": 3.901214497465812e-07, "logits/chosen": -2.587851047515869, "logits/rejected": -2.571303367614746, "logps/chosen": -446.6715393066406, "logps/rejected": -415.11895751953125, "loss": 0.5696, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9336879253387451, "rewards/margins": 0.7759568691253662, "rewards/rejected": -2.7096447944641113, "step": 6920 }, { "epoch": 0.89, "learning_rate": 3.8988237544228745e-07, "logits/chosen": -2.7360055446624756, "logits/rejected": -2.670156955718994, "logps/chosen": -579.4041748046875, "logps/rejected": -456.81280517578125, "loss": 0.6489, "rewards/accuracies": 0.625, "rewards/chosen": -1.9909627437591553, "rewards/margins": 0.6672365069389343, "rewards/rejected": -2.6581990718841553, "step": 6930 }, { "epoch": 0.9, "learning_rate": 3.8964330113799363e-07, "logits/chosen": -2.8855583667755127, "logits/rejected": -2.713587522506714, "logps/chosen": -543.1939697265625, "logps/rejected": -463.63519287109375, "loss": 0.6684, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.3668830394744873, "rewards/margins": 0.4947829246520996, "rewards/rejected": -2.861666202545166, "step": 6940 }, { "epoch": 0.9, "learning_rate": 3.8940422683369986e-07, "logits/chosen": -2.864546775817871, "logits/rejected": -2.681567668914795, "logps/chosen": -600.4056396484375, "logps/rejected": -448.264892578125, "loss": 0.6987, "rewards/accuracies": 0.625, "rewards/chosen": -2.311378002166748, "rewards/margins": 0.39649707078933716, "rewards/rejected": -2.7078750133514404, "step": 6950 }, { "epoch": 0.9, "learning_rate": 3.891651525294061e-07, "logits/chosen": -2.7839736938476562, "logits/rejected": -2.694927930831909, "logps/chosen": -488.63348388671875, "logps/rejected": -356.9848327636719, "loss": 0.7444, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.2556586265563965, "rewards/margins": 0.2671874165534973, "rewards/rejected": -2.52284574508667, "step": 6960 }, { "epoch": 0.9, "learning_rate": 3.889260782251124e-07, "logits/chosen": -2.744133710861206, "logits/rejected": -2.6637229919433594, "logps/chosen": -462.137939453125, "logps/rejected": -393.3202209472656, "loss": 0.6341, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.118533134460449, "rewards/margins": 0.40997132658958435, "rewards/rejected": -2.5285046100616455, "step": 6970 }, { "epoch": 0.9, "learning_rate": 3.8868700392081857e-07, "logits/chosen": -2.823434352874756, "logits/rejected": -2.6879119873046875, "logps/chosen": -549.3412475585938, "logps/rejected": -376.6808166503906, "loss": 0.5106, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0633599758148193, "rewards/margins": 0.8851016163825989, "rewards/rejected": -2.9484612941741943, "step": 6980 }, { "epoch": 0.9, "learning_rate": 3.884479296165248e-07, "logits/chosen": -2.7253305912017822, "logits/rejected": -2.6090359687805176, "logps/chosen": -553.6043701171875, "logps/rejected": -438.865478515625, "loss": 0.8343, "rewards/accuracies": 0.5625, "rewards/chosen": -2.3372323513031006, "rewards/margins": 0.3334355354309082, "rewards/rejected": -2.670668125152588, "step": 6990 }, { "epoch": 0.9, "learning_rate": 3.8820885531223104e-07, "logits/chosen": -2.81173038482666, "logits/rejected": -2.691070795059204, "logps/chosen": -601.0951538085938, "logps/rejected": -495.6241149902344, "loss": 0.709, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.2419066429138184, "rewards/margins": 0.47018975019454956, "rewards/rejected": -2.7120964527130127, "step": 7000 }, { "epoch": 0.9, "eval_logits/chosen": -3.095890760421753, "eval_logits/rejected": -3.043271541595459, "eval_logps/chosen": -540.7271728515625, "eval_logps/rejected": -419.5711669921875, "eval_loss": 0.6274821758270264, "eval_rewards/accuracies": 0.6610000133514404, "eval_rewards/chosen": -0.8907455801963806, "eval_rewards/margins": 0.9735068082809448, "eval_rewards/rejected": -1.8642523288726807, "eval_runtime": 277.845, "eval_samples_per_second": 7.198, "eval_steps_per_second": 3.599, "step": 7000 }, { "epoch": 0.9, "learning_rate": 3.879697810079373e-07, "logits/chosen": -2.659806489944458, "logits/rejected": -2.631025791168213, "logps/chosen": -566.8919677734375, "logps/rejected": -429.82025146484375, "loss": 0.5569, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0567190647125244, "rewards/margins": 0.8397935032844543, "rewards/rejected": -2.896512508392334, "step": 7010 }, { "epoch": 0.91, "learning_rate": 3.877307067036435e-07, "logits/chosen": -2.8561949729919434, "logits/rejected": -2.7421305179595947, "logps/chosen": -571.9522705078125, "logps/rejected": -453.6626892089844, "loss": 0.6065, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.342928171157837, "rewards/margins": 0.6015159487724304, "rewards/rejected": -2.944444179534912, "step": 7020 }, { "epoch": 0.91, "learning_rate": 3.874916323993497e-07, "logits/chosen": -2.8306386470794678, "logits/rejected": -2.738603115081787, "logps/chosen": -465.8697204589844, "logps/rejected": -383.8002014160156, "loss": 0.7247, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.9384825229644775, "rewards/margins": 0.4341423511505127, "rewards/rejected": -2.3726248741149902, "step": 7030 }, { "epoch": 0.91, "learning_rate": 3.8725255809505593e-07, "logits/chosen": -2.697080135345459, "logits/rejected": -2.5505480766296387, "logps/chosen": -479.278076171875, "logps/rejected": -374.7202453613281, "loss": 0.6539, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.29748797416687, "rewards/margins": 0.5239706039428711, "rewards/rejected": -2.8214588165283203, "step": 7040 }, { "epoch": 0.91, "learning_rate": 3.8701348379076217e-07, "logits/chosen": -2.750772714614868, "logits/rejected": -2.605304718017578, "logps/chosen": -615.7262573242188, "logps/rejected": -431.2176208496094, "loss": 0.5903, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0624468326568604, "rewards/margins": 0.8423858880996704, "rewards/rejected": -2.904832363128662, "step": 7050 }, { "epoch": 0.91, "learning_rate": 3.867744094864684e-07, "logits/chosen": -2.6541383266448975, "logits/rejected": -2.656114101409912, "logps/chosen": -460.53729248046875, "logps/rejected": -362.3229064941406, "loss": 0.739, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.160126209259033, "rewards/margins": 0.41417980194091797, "rewards/rejected": -2.5743062496185303, "step": 7060 }, { "epoch": 0.91, "learning_rate": 3.8653533518217464e-07, "logits/chosen": -2.785848617553711, "logits/rejected": -2.613088846206665, "logps/chosen": -629.62060546875, "logps/rejected": -481.9634704589844, "loss": 0.6026, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0791642665863037, "rewards/margins": 0.7864478230476379, "rewards/rejected": -2.865612030029297, "step": 7070 }, { "epoch": 0.91, "learning_rate": 3.862962608778808e-07, "logits/chosen": -2.7656943798065186, "logits/rejected": -2.607226848602295, "logps/chosen": -608.1182861328125, "logps/rejected": -457.7933654785156, "loss": 0.7479, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.3664934635162354, "rewards/margins": 0.38771137595176697, "rewards/rejected": -2.754204750061035, "step": 7080 }, { "epoch": 0.92, "learning_rate": 3.8605718657358706e-07, "logits/chosen": -2.7757034301757812, "logits/rejected": -2.6596627235412598, "logps/chosen": -552.4000244140625, "logps/rejected": -389.28375244140625, "loss": 0.6497, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.9643714427947998, "rewards/margins": 0.7325254082679749, "rewards/rejected": -2.69689679145813, "step": 7090 }, { "epoch": 0.92, "learning_rate": 3.858181122692933e-07, "logits/chosen": -2.6949009895324707, "logits/rejected": -2.6125659942626953, "logps/chosen": -598.3751220703125, "logps/rejected": -492.7544860839844, "loss": 0.6905, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.0622918605804443, "rewards/margins": 0.6952208876609802, "rewards/rejected": -2.7575125694274902, "step": 7100 }, { "epoch": 0.92, "learning_rate": 3.8557903796499953e-07, "logits/chosen": -2.685107469558716, "logits/rejected": -2.5465922355651855, "logps/chosen": -536.4492797851562, "logps/rejected": -382.5140686035156, "loss": 0.5786, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.059360980987549, "rewards/margins": 0.6010545492172241, "rewards/rejected": -2.6604151725769043, "step": 7110 }, { "epoch": 0.92, "learning_rate": 3.853399636607057e-07, "logits/chosen": -2.673583507537842, "logits/rejected": -2.6028902530670166, "logps/chosen": -607.3548583984375, "logps/rejected": -524.6881713867188, "loss": 0.6856, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.2321600914001465, "rewards/margins": 0.6213058233261108, "rewards/rejected": -2.853466033935547, "step": 7120 }, { "epoch": 0.92, "learning_rate": 3.8510088935641195e-07, "logits/chosen": -2.9217419624328613, "logits/rejected": -2.824601650238037, "logps/chosen": -519.0184326171875, "logps/rejected": -457.06597900390625, "loss": 0.7252, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.203521966934204, "rewards/margins": 0.40893468260765076, "rewards/rejected": -2.612456798553467, "step": 7130 }, { "epoch": 0.92, "learning_rate": 3.848618150521182e-07, "logits/chosen": -2.653160572052002, "logits/rejected": -2.620842933654785, "logps/chosen": -558.7071533203125, "logps/rejected": -465.615234375, "loss": 0.6755, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1550228595733643, "rewards/margins": 0.6795660257339478, "rewards/rejected": -2.8345885276794434, "step": 7140 }, { "epoch": 0.92, "learning_rate": 3.846227407478244e-07, "logits/chosen": -2.692594289779663, "logits/rejected": -2.6938202381134033, "logps/chosen": -647.3683471679688, "logps/rejected": -529.5086669921875, "loss": 0.7536, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.243464231491089, "rewards/margins": 0.5401769876480103, "rewards/rejected": -2.7836413383483887, "step": 7150 }, { "epoch": 0.92, "learning_rate": 3.8438366644353066e-07, "logits/chosen": -2.768125057220459, "logits/rejected": -2.742870569229126, "logps/chosen": -550.785888671875, "logps/rejected": -487.44500732421875, "loss": 0.822, "rewards/accuracies": 0.5625, "rewards/chosen": -2.243739128112793, "rewards/margins": 0.2778702676296234, "rewards/rejected": -2.5216097831726074, "step": 7160 }, { "epoch": 0.93, "learning_rate": 3.8414459213923684e-07, "logits/chosen": -2.677732467651367, "logits/rejected": -2.666062355041504, "logps/chosen": -514.2194213867188, "logps/rejected": -459.54827880859375, "loss": 0.7781, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.3656506538391113, "rewards/margins": 0.21497401595115662, "rewards/rejected": -2.580624580383301, "step": 7170 }, { "epoch": 0.93, "learning_rate": 3.839055178349431e-07, "logits/chosen": -2.6963798999786377, "logits/rejected": -2.625648260116577, "logps/chosen": -582.64794921875, "logps/rejected": -431.19244384765625, "loss": 0.7121, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.178835391998291, "rewards/margins": 0.630666196346283, "rewards/rejected": -2.8095011711120605, "step": 7180 }, { "epoch": 0.93, "learning_rate": 3.836664435306493e-07, "logits/chosen": -2.698270082473755, "logits/rejected": -2.640073299407959, "logps/chosen": -524.3282470703125, "logps/rejected": -415.19049072265625, "loss": 0.761, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.270294189453125, "rewards/margins": 0.4280814230442047, "rewards/rejected": -2.698375701904297, "step": 7190 }, { "epoch": 0.93, "learning_rate": 3.8342736922635555e-07, "logits/chosen": -2.8154077529907227, "logits/rejected": -2.5775234699249268, "logps/chosen": -630.021484375, "logps/rejected": -437.14208984375, "loss": 0.877, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.518329381942749, "rewards/margins": 0.2102235108613968, "rewards/rejected": -2.728553056716919, "step": 7200 }, { "epoch": 0.93, "learning_rate": 3.831882949220618e-07, "logits/chosen": -2.7193145751953125, "logits/rejected": -2.695838451385498, "logps/chosen": -516.5201416015625, "logps/rejected": -417.4178161621094, "loss": 0.7481, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.2920191287994385, "rewards/margins": 0.4455041289329529, "rewards/rejected": -2.737523078918457, "step": 7210 }, { "epoch": 0.93, "learning_rate": 3.8294922061776797e-07, "logits/chosen": -2.84614634513855, "logits/rejected": -2.700958728790283, "logps/chosen": -619.039306640625, "logps/rejected": -498.855712890625, "loss": 0.638, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1043107509613037, "rewards/margins": 0.6316980123519897, "rewards/rejected": -2.736008882522583, "step": 7220 }, { "epoch": 0.93, "learning_rate": 3.827101463134742e-07, "logits/chosen": -2.75423526763916, "logits/rejected": -2.7306265830993652, "logps/chosen": -550.446044921875, "logps/rejected": -469.82598876953125, "loss": 0.7397, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.055995464324951, "rewards/margins": 0.48423871397972107, "rewards/rejected": -2.540234088897705, "step": 7230 }, { "epoch": 0.93, "learning_rate": 3.8247107200918044e-07, "logits/chosen": -2.851731061935425, "logits/rejected": -2.7484681606292725, "logps/chosen": -537.4243774414062, "logps/rejected": -395.510498046875, "loss": 0.6951, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1366422176361084, "rewards/margins": 0.5303007364273071, "rewards/rejected": -2.666942834854126, "step": 7240 }, { "epoch": 0.94, "learning_rate": 3.822319977048867e-07, "logits/chosen": -2.7636184692382812, "logits/rejected": -2.685455799102783, "logps/chosen": -508.89117431640625, "logps/rejected": -472.03857421875, "loss": 0.6708, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9541246891021729, "rewards/margins": 0.5360028147697449, "rewards/rejected": -2.4901275634765625, "step": 7250 }, { "epoch": 0.94, "learning_rate": 3.819929234005929e-07, "logits/chosen": -2.733673095703125, "logits/rejected": -2.5425338745117188, "logps/chosen": -546.05224609375, "logps/rejected": -397.50860595703125, "loss": 0.7395, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1300652027130127, "rewards/margins": 0.43674856424331665, "rewards/rejected": -2.5668139457702637, "step": 7260 }, { "epoch": 0.94, "learning_rate": 3.817538490962991e-07, "logits/chosen": -2.7900710105895996, "logits/rejected": -2.7012829780578613, "logps/chosen": -562.6397705078125, "logps/rejected": -413.2688903808594, "loss": 0.7177, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.1910266876220703, "rewards/margins": 0.5910761952400208, "rewards/rejected": -2.7821030616760254, "step": 7270 }, { "epoch": 0.94, "learning_rate": 3.8151477479200533e-07, "logits/chosen": -2.6658568382263184, "logits/rejected": -2.527818202972412, "logps/chosen": -588.5960083007812, "logps/rejected": -413.2210388183594, "loss": 0.6292, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.298879861831665, "rewards/margins": 0.6915411353111267, "rewards/rejected": -2.9904208183288574, "step": 7280 }, { "epoch": 0.94, "learning_rate": 3.8127570048771157e-07, "logits/chosen": -2.644355297088623, "logits/rejected": -2.6097073554992676, "logps/chosen": -544.771240234375, "logps/rejected": -481.0550231933594, "loss": 0.7051, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.3292737007141113, "rewards/margins": 0.5061517953872681, "rewards/rejected": -2.835425853729248, "step": 7290 }, { "epoch": 0.94, "learning_rate": 3.810366261834178e-07, "logits/chosen": -2.771111488342285, "logits/rejected": -2.667548418045044, "logps/chosen": -543.1357421875, "logps/rejected": -449.2579040527344, "loss": 0.6367, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1774396896362305, "rewards/margins": 0.5417853593826294, "rewards/rejected": -2.7192254066467285, "step": 7300 }, { "epoch": 0.94, "learning_rate": 3.80797551879124e-07, "logits/chosen": -2.8256707191467285, "logits/rejected": -2.621720552444458, "logps/chosen": -535.7460327148438, "logps/rejected": -327.7019958496094, "loss": 0.5513, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0553784370422363, "rewards/margins": 0.9449526071548462, "rewards/rejected": -3.000330924987793, "step": 7310 }, { "epoch": 0.95, "learning_rate": 3.805584775748302e-07, "logits/chosen": -2.6712775230407715, "logits/rejected": -2.597930669784546, "logps/chosen": -602.0726318359375, "logps/rejected": -437.06976318359375, "loss": 0.7516, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.221576690673828, "rewards/margins": 0.5121652483940125, "rewards/rejected": -2.7337417602539062, "step": 7320 }, { "epoch": 0.95, "learning_rate": 3.8031940327053646e-07, "logits/chosen": -2.7682700157165527, "logits/rejected": -2.591465711593628, "logps/chosen": -624.5355224609375, "logps/rejected": -451.8446350097656, "loss": 0.5946, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9830325841903687, "rewards/margins": 0.8766446113586426, "rewards/rejected": -2.8596770763397217, "step": 7330 }, { "epoch": 0.95, "learning_rate": 3.800803289662427e-07, "logits/chosen": -2.7733421325683594, "logits/rejected": -2.650411605834961, "logps/chosen": -578.7686767578125, "logps/rejected": -469.2037048339844, "loss": 0.7393, "rewards/accuracies": 0.625, "rewards/chosen": -2.176969289779663, "rewards/margins": 0.43589258193969727, "rewards/rejected": -2.6128621101379395, "step": 7340 }, { "epoch": 0.95, "learning_rate": 3.7984125466194893e-07, "logits/chosen": -2.837938070297241, "logits/rejected": -2.693556547164917, "logps/chosen": -607.993896484375, "logps/rejected": -494.961669921875, "loss": 0.5336, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.013343334197998, "rewards/margins": 1.098020315170288, "rewards/rejected": -3.111363649368286, "step": 7350 }, { "epoch": 0.95, "learning_rate": 3.796021803576551e-07, "logits/chosen": -2.826921224594116, "logits/rejected": -2.6893155574798584, "logps/chosen": -598.9656982421875, "logps/rejected": -465.95941162109375, "loss": 0.692, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0387463569641113, "rewards/margins": 0.5289139151573181, "rewards/rejected": -2.567659854888916, "step": 7360 }, { "epoch": 0.95, "learning_rate": 3.7936310605336135e-07, "logits/chosen": -2.6826260089874268, "logits/rejected": -2.5503013134002686, "logps/chosen": -604.5457153320312, "logps/rejected": -419.83001708984375, "loss": 0.7045, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.3402507305145264, "rewards/margins": 0.6033434867858887, "rewards/rejected": -2.943594455718994, "step": 7370 }, { "epoch": 0.95, "learning_rate": 3.791240317490676e-07, "logits/chosen": -2.694833517074585, "logits/rejected": -2.5606608390808105, "logps/chosen": -578.2926025390625, "logps/rejected": -453.772705078125, "loss": 0.7132, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -2.2359461784362793, "rewards/margins": 0.39623740315437317, "rewards/rejected": -2.632183790206909, "step": 7380 }, { "epoch": 0.95, "learning_rate": 3.788849574447738e-07, "logits/chosen": -2.796381711959839, "logits/rejected": -2.5481021404266357, "logps/chosen": -696.0401611328125, "logps/rejected": -476.9458923339844, "loss": 0.6283, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.6323659420013428, "rewards/margins": 0.6961309909820557, "rewards/rejected": -3.3284974098205566, "step": 7390 }, { "epoch": 0.96, "learning_rate": 3.7864588314048006e-07, "logits/chosen": -2.7147715091705322, "logits/rejected": -2.6052651405334473, "logps/chosen": -508.48602294921875, "logps/rejected": -406.919189453125, "loss": 0.7214, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.3076138496398926, "rewards/margins": 0.41971874237060547, "rewards/rejected": -2.727332592010498, "step": 7400 }, { "epoch": 0.96, "learning_rate": 3.7840680883618624e-07, "logits/chosen": -2.8235485553741455, "logits/rejected": -2.6445398330688477, "logps/chosen": -556.7811279296875, "logps/rejected": -381.1893005371094, "loss": 0.7933, "rewards/accuracies": 0.5625, "rewards/chosen": -2.313668966293335, "rewards/margins": 0.4421635568141937, "rewards/rejected": -2.7558324337005615, "step": 7410 }, { "epoch": 0.96, "learning_rate": 3.781677345318925e-07, "logits/chosen": -2.7307894229888916, "logits/rejected": -2.6597511768341064, "logps/chosen": -563.1531982421875, "logps/rejected": -457.0926818847656, "loss": 0.7126, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.0028223991394043, "rewards/margins": 0.5691893100738525, "rewards/rejected": -2.572011709213257, "step": 7420 }, { "epoch": 0.96, "learning_rate": 3.779286602275987e-07, "logits/chosen": -2.83616042137146, "logits/rejected": -2.6728107929229736, "logps/chosen": -508.9873046875, "logps/rejected": -360.03900146484375, "loss": 0.6549, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.30922269821167, "rewards/margins": 0.5641713738441467, "rewards/rejected": -2.873394012451172, "step": 7430 }, { "epoch": 0.96, "learning_rate": 3.7768958592330495e-07, "logits/chosen": -2.7706892490386963, "logits/rejected": -2.647163152694702, "logps/chosen": -619.3035278320312, "logps/rejected": -449.27288818359375, "loss": 0.7421, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.4920413494110107, "rewards/margins": 0.44030675292015076, "rewards/rejected": -2.9323480129241943, "step": 7440 }, { "epoch": 0.96, "learning_rate": 3.7745051161901113e-07, "logits/chosen": -2.7159008979797363, "logits/rejected": -2.6629209518432617, "logps/chosen": -568.381591796875, "logps/rejected": -537.7478637695312, "loss": 0.673, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.226628065109253, "rewards/margins": 0.5411468744277954, "rewards/rejected": -2.767775058746338, "step": 7450 }, { "epoch": 0.96, "learning_rate": 3.7721143731471737e-07, "logits/chosen": -2.7067408561706543, "logits/rejected": -2.6005923748016357, "logps/chosen": -629.8084106445312, "logps/rejected": -429.1446838378906, "loss": 0.6163, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2042603492736816, "rewards/margins": 0.8334742784500122, "rewards/rejected": -3.0377345085144043, "step": 7460 }, { "epoch": 0.96, "learning_rate": 3.769723630104236e-07, "logits/chosen": -2.705695629119873, "logits/rejected": -2.6104259490966797, "logps/chosen": -625.7001342773438, "logps/rejected": -470.0521545410156, "loss": 0.5534, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.3159847259521484, "rewards/margins": 0.9586429595947266, "rewards/rejected": -3.274627685546875, "step": 7470 }, { "epoch": 0.97, "learning_rate": 3.7673328870612984e-07, "logits/chosen": -2.6670753955841064, "logits/rejected": -2.6614201068878174, "logps/chosen": -517.419921875, "logps/rejected": -438.1039123535156, "loss": 0.6991, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.276968002319336, "rewards/margins": 0.5783045887947083, "rewards/rejected": -2.8552727699279785, "step": 7480 }, { "epoch": 0.97, "learning_rate": 3.764942144018361e-07, "logits/chosen": -2.6811907291412354, "logits/rejected": -2.6264216899871826, "logps/chosen": -537.1956176757812, "logps/rejected": -466.15338134765625, "loss": 0.7765, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.80316162109375, "rewards/margins": 0.4065954089164734, "rewards/rejected": -3.209756851196289, "step": 7490 }, { "epoch": 0.97, "learning_rate": 3.7625514009754226e-07, "logits/chosen": -2.815171718597412, "logits/rejected": -2.755178928375244, "logps/chosen": -553.336181640625, "logps/rejected": -429.7198181152344, "loss": 0.6387, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1596391201019287, "rewards/margins": 0.7996801137924194, "rewards/rejected": -2.959319591522217, "step": 7500 }, { "epoch": 0.97, "learning_rate": 3.760160657932485e-07, "logits/chosen": -2.8354201316833496, "logits/rejected": -2.764134168624878, "logps/chosen": -616.9829711914062, "logps/rejected": -480.80560302734375, "loss": 0.7752, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.517368793487549, "rewards/margins": 0.4672721028327942, "rewards/rejected": -2.9846410751342773, "step": 7510 }, { "epoch": 0.97, "learning_rate": 3.7577699148895473e-07, "logits/chosen": -2.7579619884490967, "logits/rejected": -2.618743419647217, "logps/chosen": -608.7033081054688, "logps/rejected": -436.57366943359375, "loss": 0.814, "rewards/accuracies": 0.625, "rewards/chosen": -2.603606939315796, "rewards/margins": 0.4155707359313965, "rewards/rejected": -3.0191779136657715, "step": 7520 }, { "epoch": 0.97, "learning_rate": 3.7553791718466097e-07, "logits/chosen": -2.798241376876831, "logits/rejected": -2.6652417182922363, "logps/chosen": -616.5509643554688, "logps/rejected": -413.59503173828125, "loss": 0.7797, "rewards/accuracies": 0.625, "rewards/chosen": -2.447665214538574, "rewards/margins": 0.3971273899078369, "rewards/rejected": -2.844792604446411, "step": 7530 }, { "epoch": 0.97, "learning_rate": 3.7529884288036725e-07, "logits/chosen": -2.7794108390808105, "logits/rejected": -2.6079201698303223, "logps/chosen": -602.8592529296875, "logps/rejected": -419.6504821777344, "loss": 0.5944, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.17431640625, "rewards/margins": 0.6872979402542114, "rewards/rejected": -2.861614227294922, "step": 7540 }, { "epoch": 0.97, "learning_rate": 3.750597685760734e-07, "logits/chosen": -2.8022496700286865, "logits/rejected": -2.7185263633728027, "logps/chosen": -534.9346923828125, "logps/rejected": -417.17333984375, "loss": 0.6906, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.474186658859253, "rewards/margins": 0.5645660758018494, "rewards/rejected": -3.038752794265747, "step": 7550 }, { "epoch": 0.98, "learning_rate": 3.7482069427177967e-07, "logits/chosen": -2.7089591026306152, "logits/rejected": -2.636620283126831, "logps/chosen": -503.97216796875, "logps/rejected": -432.09600830078125, "loss": 0.6706, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.220872402191162, "rewards/margins": 0.6783632040023804, "rewards/rejected": -2.899235486984253, "step": 7560 }, { "epoch": 0.98, "learning_rate": 3.745816199674859e-07, "logits/chosen": -2.589278221130371, "logits/rejected": -2.658398151397705, "logps/chosen": -475.1808166503906, "logps/rejected": -506.6736755371094, "loss": 0.8418, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -2.7741408348083496, "rewards/margins": 0.053803443908691406, "rewards/rejected": -2.82794451713562, "step": 7570 }, { "epoch": 0.98, "learning_rate": 3.7434254566319215e-07, "logits/chosen": -2.708195447921753, "logits/rejected": -2.6408493518829346, "logps/chosen": -521.0389404296875, "logps/rejected": -408.1683654785156, "loss": 0.6062, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2296884059906006, "rewards/margins": 0.6200961470603943, "rewards/rejected": -2.8497846126556396, "step": 7580 }, { "epoch": 0.98, "learning_rate": 3.7410347135889833e-07, "logits/chosen": -2.834658145904541, "logits/rejected": -2.661141872406006, "logps/chosen": -606.613037109375, "logps/rejected": -441.71343994140625, "loss": 0.6947, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.1037628650665283, "rewards/margins": 0.5196511745452881, "rewards/rejected": -2.6234138011932373, "step": 7590 }, { "epoch": 0.98, "learning_rate": 3.7386439705460456e-07, "logits/chosen": -2.804683208465576, "logits/rejected": -2.831134796142578, "logps/chosen": -526.544189453125, "logps/rejected": -519.4453125, "loss": 0.8675, "rewards/accuracies": 0.5625, "rewards/chosen": -2.4097206592559814, "rewards/margins": 0.32792869210243225, "rewards/rejected": -2.7376492023468018, "step": 7600 }, { "epoch": 0.98, "learning_rate": 3.736253227503108e-07, "logits/chosen": -2.7752578258514404, "logits/rejected": -2.647325038909912, "logps/chosen": -566.7606201171875, "logps/rejected": -421.6597595214844, "loss": 0.8067, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.396003246307373, "rewards/margins": 0.2568088471889496, "rewards/rejected": -2.6528122425079346, "step": 7610 }, { "epoch": 0.98, "learning_rate": 3.7338624844601704e-07, "logits/chosen": -2.7424092292785645, "logits/rejected": -2.664670467376709, "logps/chosen": -596.8222045898438, "logps/rejected": -495.55743408203125, "loss": 0.8137, "rewards/accuracies": 0.5625, "rewards/chosen": -2.5575151443481445, "rewards/margins": 0.34827834367752075, "rewards/rejected": -2.9057936668395996, "step": 7620 }, { "epoch": 0.99, "learning_rate": 3.7314717414172327e-07, "logits/chosen": -2.867964029312134, "logits/rejected": -2.686392068862915, "logps/chosen": -625.32568359375, "logps/rejected": -489.3540954589844, "loss": 0.7331, "rewards/accuracies": 0.625, "rewards/chosen": -2.389726161956787, "rewards/margins": 0.6009480953216553, "rewards/rejected": -2.9906744956970215, "step": 7630 }, { "epoch": 0.99, "learning_rate": 3.7290809983742945e-07, "logits/chosen": -2.849545478820801, "logits/rejected": -2.7013192176818848, "logps/chosen": -588.4034423828125, "logps/rejected": -443.49298095703125, "loss": 0.6481, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.2403178215026855, "rewards/margins": 0.6474124193191528, "rewards/rejected": -2.887730360031128, "step": 7640 }, { "epoch": 0.99, "learning_rate": 3.726690255331357e-07, "logits/chosen": -2.79241681098938, "logits/rejected": -2.660113573074341, "logps/chosen": -582.6751708984375, "logps/rejected": -427.72943115234375, "loss": 0.6856, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.347172260284424, "rewards/margins": 0.5881866216659546, "rewards/rejected": -2.9353582859039307, "step": 7650 }, { "epoch": 0.99, "learning_rate": 3.7242995122884193e-07, "logits/chosen": -2.689544200897217, "logits/rejected": -2.5856122970581055, "logps/chosen": -562.1372680664062, "logps/rejected": -488.919921875, "loss": 0.7428, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.61859393119812, "rewards/margins": 0.48222750425338745, "rewards/rejected": -3.100821018218994, "step": 7660 }, { "epoch": 0.99, "learning_rate": 3.7219087692454816e-07, "logits/chosen": -2.7754359245300293, "logits/rejected": -2.6216447353363037, "logps/chosen": -610.7249755859375, "logps/rejected": -428.35516357421875, "loss": 0.6147, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.995113730430603, "rewards/margins": 0.8478872179985046, "rewards/rejected": -2.843001127243042, "step": 7670 }, { "epoch": 0.99, "learning_rate": 3.719518026202544e-07, "logits/chosen": -2.9057424068450928, "logits/rejected": -2.7566421031951904, "logps/chosen": -572.91064453125, "logps/rejected": -453.65545654296875, "loss": 0.5909, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1823267936706543, "rewards/margins": 0.7124005556106567, "rewards/rejected": -2.8947272300720215, "step": 7680 }, { "epoch": 0.99, "learning_rate": 3.717127283159606e-07, "logits/chosen": -2.8694560527801514, "logits/rejected": -2.8232688903808594, "logps/chosen": -539.9500732421875, "logps/rejected": -469.13690185546875, "loss": 0.7024, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.304158926010132, "rewards/margins": 0.7175304293632507, "rewards/rejected": -3.0216891765594482, "step": 7690 }, { "epoch": 0.99, "learning_rate": 3.714736540116668e-07, "logits/chosen": -2.6147608757019043, "logits/rejected": -2.6495862007141113, "logps/chosen": -406.0460510253906, "logps/rejected": -426.08819580078125, "loss": 0.8191, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.4224376678466797, "rewards/margins": 0.22151660919189453, "rewards/rejected": -2.6439545154571533, "step": 7700 }, { "epoch": 1.0, "learning_rate": 3.7123457970737305e-07, "logits/chosen": -2.7146573066711426, "logits/rejected": -2.610327959060669, "logps/chosen": -539.4094848632812, "logps/rejected": -436.24945068359375, "loss": 0.5305, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.056718111038208, "rewards/margins": 0.8745146989822388, "rewards/rejected": -2.9312329292297363, "step": 7710 }, { "epoch": 1.0, "learning_rate": 3.709955054030793e-07, "logits/chosen": -2.792050838470459, "logits/rejected": -2.7344748973846436, "logps/chosen": -600.3341064453125, "logps/rejected": -488.42730712890625, "loss": 0.6757, "rewards/accuracies": 0.6875, "rewards/chosen": -2.261371374130249, "rewards/margins": 0.5775443315505981, "rewards/rejected": -2.838916063308716, "step": 7720 }, { "epoch": 1.0, "learning_rate": 3.7075643109878547e-07, "logits/chosen": -2.9027421474456787, "logits/rejected": -2.8036398887634277, "logps/chosen": -582.31787109375, "logps/rejected": -400.2735595703125, "loss": 0.6745, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.267928123474121, "rewards/margins": 0.5876890420913696, "rewards/rejected": -2.855617046356201, "step": 7730 }, { "epoch": 1.0, "learning_rate": 3.705173567944917e-07, "logits/chosen": -2.802525520324707, "logits/rejected": -2.6471428871154785, "logps/chosen": -629.8912353515625, "logps/rejected": -390.04864501953125, "loss": 0.6456, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1601977348327637, "rewards/margins": 0.737832248210907, "rewards/rejected": -2.8980298042297363, "step": 7740 }, { "epoch": 1.0, "learning_rate": 3.7027828249019794e-07, "logits/chosen": -2.720710515975952, "logits/rejected": -2.5515456199645996, "logps/chosen": -681.2289428710938, "logps/rejected": -432.67352294921875, "loss": 0.65, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.321909189224243, "rewards/margins": 0.7805188298225403, "rewards/rejected": -3.1024279594421387, "step": 7750 }, { "epoch": 1.0, "learning_rate": 3.700392081859042e-07, "logits/chosen": -2.803069829940796, "logits/rejected": -2.5748836994171143, "logps/chosen": -599.8126220703125, "logps/rejected": -369.245361328125, "loss": 0.5794, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.10430645942688, "rewards/margins": 0.8165141344070435, "rewards/rejected": -2.9208202362060547, "step": 7760 }, { "epoch": 1.0, "learning_rate": 3.698001338816104e-07, "logits/chosen": -2.817808151245117, "logits/rejected": -2.616654872894287, "logps/chosen": -633.9962158203125, "logps/rejected": -442.6092834472656, "loss": 0.6204, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1438231468200684, "rewards/margins": 0.6460675001144409, "rewards/rejected": -2.7898902893066406, "step": 7770 }, { "epoch": 1.0, "learning_rate": 3.695610595773166e-07, "logits/chosen": -2.909470558166504, "logits/rejected": -2.6912219524383545, "logps/chosen": -649.1695556640625, "logps/rejected": -449.9266662597656, "loss": 0.6908, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.361710786819458, "rewards/margins": 0.6186777353286743, "rewards/rejected": -2.9803884029388428, "step": 7780 }, { "epoch": 1.01, "learning_rate": 3.6932198527302283e-07, "logits/chosen": -2.7612252235412598, "logits/rejected": -2.6574809551239014, "logps/chosen": -602.3273315429688, "logps/rejected": -519.1671142578125, "loss": 0.5345, "rewards/accuracies": 0.75, "rewards/chosen": -2.0332956314086914, "rewards/margins": 0.8997390866279602, "rewards/rejected": -2.9330344200134277, "step": 7790 }, { "epoch": 1.01, "learning_rate": 3.6908291096872907e-07, "logits/chosen": -2.763727903366089, "logits/rejected": -2.676240921020508, "logps/chosen": -517.6400146484375, "logps/rejected": -476.97174072265625, "loss": 0.5482, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.025481700897217, "rewards/margins": 0.8974500894546509, "rewards/rejected": -2.9229319095611572, "step": 7800 }, { "epoch": 1.01, "learning_rate": 3.688438366644353e-07, "logits/chosen": -2.8036653995513916, "logits/rejected": -2.585273027420044, "logps/chosen": -606.5914306640625, "logps/rejected": -448.24072265625, "loss": 0.5398, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9939464330673218, "rewards/margins": 0.8903571963310242, "rewards/rejected": -2.8843040466308594, "step": 7810 }, { "epoch": 1.01, "learning_rate": 3.6860476236014154e-07, "logits/chosen": -2.7779574394226074, "logits/rejected": -2.7651522159576416, "logps/chosen": -497.48663330078125, "logps/rejected": -428.78668212890625, "loss": 0.6989, "rewards/accuracies": 0.625, "rewards/chosen": -2.247124433517456, "rewards/margins": 0.49469009041786194, "rewards/rejected": -2.741814374923706, "step": 7820 }, { "epoch": 1.01, "learning_rate": 3.683656880558477e-07, "logits/chosen": -2.699280023574829, "logits/rejected": -2.6089954376220703, "logps/chosen": -553.1142578125, "logps/rejected": -426.21942138671875, "loss": 0.6612, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.238917827606201, "rewards/margins": 0.6503468751907349, "rewards/rejected": -2.8892645835876465, "step": 7830 }, { "epoch": 1.01, "learning_rate": 3.6812661375155396e-07, "logits/chosen": -2.8094935417175293, "logits/rejected": -2.6411290168762207, "logps/chosen": -614.7215576171875, "logps/rejected": -454.730224609375, "loss": 0.6053, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.019620180130005, "rewards/margins": 0.8206133842468262, "rewards/rejected": -2.84023380279541, "step": 7840 }, { "epoch": 1.01, "learning_rate": 3.678875394472602e-07, "logits/chosen": -2.8755717277526855, "logits/rejected": -2.6879265308380127, "logps/chosen": -591.6857299804688, "logps/rejected": -462.82574462890625, "loss": 0.5973, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.067591667175293, "rewards/margins": 0.8367293477058411, "rewards/rejected": -2.9043209552764893, "step": 7850 }, { "epoch": 1.01, "learning_rate": 3.6764846514296643e-07, "logits/chosen": -2.7682747840881348, "logits/rejected": -2.631370782852173, "logps/chosen": -572.3814697265625, "logps/rejected": -443.2801818847656, "loss": 0.6239, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.142573356628418, "rewards/margins": 0.6403663158416748, "rewards/rejected": -2.7829394340515137, "step": 7860 }, { "epoch": 1.02, "learning_rate": 3.674093908386726e-07, "logits/chosen": -2.736388683319092, "logits/rejected": -2.6591076850891113, "logps/chosen": -569.4114990234375, "logps/rejected": -446.7035217285156, "loss": 0.7481, "rewards/accuracies": 0.625, "rewards/chosen": -2.1216628551483154, "rewards/margins": 0.6385433673858643, "rewards/rejected": -2.7602062225341797, "step": 7870 }, { "epoch": 1.02, "learning_rate": 3.6717031653437885e-07, "logits/chosen": -2.810454845428467, "logits/rejected": -2.776754856109619, "logps/chosen": -676.9915771484375, "logps/rejected": -590.1450805664062, "loss": 0.6409, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.246184825897217, "rewards/margins": 0.7173846364021301, "rewards/rejected": -2.9635696411132812, "step": 7880 }, { "epoch": 1.02, "learning_rate": 3.669312422300851e-07, "logits/chosen": -2.849891185760498, "logits/rejected": -2.7017428874969482, "logps/chosen": -590.9564819335938, "logps/rejected": -514.2528076171875, "loss": 0.6406, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.235018491744995, "rewards/margins": 0.5765985250473022, "rewards/rejected": -2.811616897583008, "step": 7890 }, { "epoch": 1.02, "learning_rate": 3.666921679257913e-07, "logits/chosen": -2.7675395011901855, "logits/rejected": -2.723806381225586, "logps/chosen": -465.74420166015625, "logps/rejected": -386.1535339355469, "loss": 0.7452, "rewards/accuracies": 0.5625, "rewards/chosen": -2.3854000568389893, "rewards/margins": 0.4471573829650879, "rewards/rejected": -2.832557439804077, "step": 7900 }, { "epoch": 1.02, "learning_rate": 3.6645309362149756e-07, "logits/chosen": -2.7394421100616455, "logits/rejected": -2.6747665405273438, "logps/chosen": -528.0010986328125, "logps/rejected": -452.281494140625, "loss": 0.6045, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2913856506347656, "rewards/margins": 0.7383930087089539, "rewards/rejected": -3.0297787189483643, "step": 7910 }, { "epoch": 1.02, "learning_rate": 3.6621401931720374e-07, "logits/chosen": -2.817007064819336, "logits/rejected": -2.5522658824920654, "logps/chosen": -626.2421875, "logps/rejected": -385.91778564453125, "loss": 0.6516, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.4830260276794434, "rewards/margins": 0.5957164764404297, "rewards/rejected": -3.078742504119873, "step": 7920 }, { "epoch": 1.02, "learning_rate": 3.6597494501291e-07, "logits/chosen": -2.6876378059387207, "logits/rejected": -2.6402347087860107, "logps/chosen": -603.7951049804688, "logps/rejected": -467.9532165527344, "loss": 0.6119, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1089653968811035, "rewards/margins": 0.7484790682792664, "rewards/rejected": -2.8574447631835938, "step": 7930 }, { "epoch": 1.03, "learning_rate": 3.657358707086162e-07, "logits/chosen": -2.687373399734497, "logits/rejected": -2.4883580207824707, "logps/chosen": -535.7762451171875, "logps/rejected": -345.2306213378906, "loss": 0.6209, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.3018646240234375, "rewards/margins": 0.6957074403762817, "rewards/rejected": -2.997572183609009, "step": 7940 }, { "epoch": 1.03, "learning_rate": 3.6549679640432245e-07, "logits/chosen": -2.6956400871276855, "logits/rejected": -2.610431432723999, "logps/chosen": -553.5438232421875, "logps/rejected": -373.50006103515625, "loss": 0.642, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.232147216796875, "rewards/margins": 0.7081996202468872, "rewards/rejected": -2.940347194671631, "step": 7950 }, { "epoch": 1.03, "learning_rate": 3.652577221000287e-07, "logits/chosen": -2.7029037475585938, "logits/rejected": -2.628688335418701, "logps/chosen": -522.205078125, "logps/rejected": -440.9866638183594, "loss": 0.5199, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9431402683258057, "rewards/margins": 0.7851122617721558, "rewards/rejected": -2.728252410888672, "step": 7960 }, { "epoch": 1.03, "learning_rate": 3.6501864779573487e-07, "logits/chosen": -2.618093252182007, "logits/rejected": -2.393245220184326, "logps/chosen": -518.9527587890625, "logps/rejected": -332.657470703125, "loss": 0.6548, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.242900848388672, "rewards/margins": 0.5277381539344788, "rewards/rejected": -2.770639181137085, "step": 7970 }, { "epoch": 1.03, "learning_rate": 3.647795734914411e-07, "logits/chosen": -2.817502975463867, "logits/rejected": -2.7207248210906982, "logps/chosen": -519.903076171875, "logps/rejected": -389.6810607910156, "loss": 0.5371, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0252573490142822, "rewards/margins": 0.9912310838699341, "rewards/rejected": -3.016488552093506, "step": 7980 }, { "epoch": 1.03, "learning_rate": 3.6454049918714734e-07, "logits/chosen": -2.7580714225769043, "logits/rejected": -2.629835605621338, "logps/chosen": -527.2303466796875, "logps/rejected": -407.8516845703125, "loss": 0.5942, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.323000431060791, "rewards/margins": 0.6178001761436462, "rewards/rejected": -2.940800666809082, "step": 7990 }, { "epoch": 1.03, "learning_rate": 3.643014248828536e-07, "logits/chosen": -2.867985248565674, "logits/rejected": -2.6681923866271973, "logps/chosen": -614.841796875, "logps/rejected": -436.9309997558594, "loss": 0.5569, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1742966175079346, "rewards/margins": 0.9890509843826294, "rewards/rejected": -3.1633477210998535, "step": 8000 }, { "epoch": 1.03, "eval_logits/chosen": -3.076028347015381, "eval_logits/rejected": -3.0148866176605225, "eval_logps/chosen": -541.1722412109375, "eval_logps/rejected": -420.2840270996094, "eval_loss": 0.6324803233146667, "eval_rewards/accuracies": 0.6625000238418579, "eval_rewards/chosen": -0.9352481365203857, "eval_rewards/margins": 1.0002866983413696, "eval_rewards/rejected": -1.9355350732803345, "eval_runtime": 278.3943, "eval_samples_per_second": 7.184, "eval_steps_per_second": 3.592, "step": 8000 }, { "epoch": 1.03, "learning_rate": 3.6406235057855976e-07, "logits/chosen": -2.6481220722198486, "logits/rejected": -2.6236917972564697, "logps/chosen": -465.2994079589844, "logps/rejected": -408.5280456542969, "loss": 0.6772, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.0867393016815186, "rewards/margins": 0.5731412172317505, "rewards/rejected": -2.6598806381225586, "step": 8010 }, { "epoch": 1.04, "learning_rate": 3.63823276274266e-07, "logits/chosen": -2.6799259185791016, "logits/rejected": -2.6547579765319824, "logps/chosen": -492.0006408691406, "logps/rejected": -449.2989807128906, "loss": 0.535, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.059708833694458, "rewards/margins": 0.8893237113952637, "rewards/rejected": -2.9490325450897217, "step": 8020 }, { "epoch": 1.04, "learning_rate": 3.6358420196997223e-07, "logits/chosen": -2.6793036460876465, "logits/rejected": -2.5999066829681396, "logps/chosen": -523.5848388671875, "logps/rejected": -423.2562561035156, "loss": 0.6669, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.2463343143463135, "rewards/margins": 0.5305004715919495, "rewards/rejected": -2.7768349647521973, "step": 8030 }, { "epoch": 1.04, "learning_rate": 3.6334512766567847e-07, "logits/chosen": -2.795576572418213, "logits/rejected": -2.6994338035583496, "logps/chosen": -606.5733642578125, "logps/rejected": -460.9706115722656, "loss": 0.5247, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.094459056854248, "rewards/margins": 0.9565951228141785, "rewards/rejected": -3.051053762435913, "step": 8040 }, { "epoch": 1.04, "learning_rate": 3.631060533613847e-07, "logits/chosen": -2.7948248386383057, "logits/rejected": -2.641523838043213, "logps/chosen": -563.6425170898438, "logps/rejected": -420.81341552734375, "loss": 0.5875, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.8797985315322876, "rewards/margins": 1.0152837038040161, "rewards/rejected": -2.8950819969177246, "step": 8050 }, { "epoch": 1.04, "learning_rate": 3.628669790570909e-07, "logits/chosen": -2.732543468475342, "logits/rejected": -2.6285433769226074, "logps/chosen": -462.47149658203125, "logps/rejected": -356.83251953125, "loss": 0.7423, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.182784080505371, "rewards/margins": 0.34111493825912476, "rewards/rejected": -2.5238988399505615, "step": 8060 }, { "epoch": 1.04, "learning_rate": 3.626279047527971e-07, "logits/chosen": -2.7094407081604004, "logits/rejected": -2.595351457595825, "logps/chosen": -532.6381225585938, "logps/rejected": -443.17059326171875, "loss": 0.5791, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.067004680633545, "rewards/margins": 0.8414853811264038, "rewards/rejected": -2.90848970413208, "step": 8070 }, { "epoch": 1.04, "learning_rate": 3.6238883044850336e-07, "logits/chosen": -2.791987895965576, "logits/rejected": -2.6653378009796143, "logps/chosen": -622.801513671875, "logps/rejected": -474.01824951171875, "loss": 0.5977, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.289249897003174, "rewards/margins": 0.7198301553726196, "rewards/rejected": -3.009079933166504, "step": 8080 }, { "epoch": 1.04, "learning_rate": 3.621497561442096e-07, "logits/chosen": -2.7165236473083496, "logits/rejected": -2.6356053352355957, "logps/chosen": -571.7903442382812, "logps/rejected": -429.815673828125, "loss": 0.5951, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.9579614400863647, "rewards/margins": 0.7263658046722412, "rewards/rejected": -2.6843273639678955, "step": 8090 }, { "epoch": 1.05, "learning_rate": 3.6191068183991583e-07, "logits/chosen": -2.7689929008483887, "logits/rejected": -2.692692279815674, "logps/chosen": -559.8029174804688, "logps/rejected": -498.3201599121094, "loss": 0.6904, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.256155490875244, "rewards/margins": 0.5346792936325073, "rewards/rejected": -2.790834903717041, "step": 8100 }, { "epoch": 1.05, "learning_rate": 3.61671607535622e-07, "logits/chosen": -2.7680838108062744, "logits/rejected": -2.691272258758545, "logps/chosen": -502.34161376953125, "logps/rejected": -430.71673583984375, "loss": 0.6283, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.868051528930664, "rewards/margins": 0.5838344693183899, "rewards/rejected": -2.45188570022583, "step": 8110 }, { "epoch": 1.05, "learning_rate": 3.6143253323132825e-07, "logits/chosen": -2.6853322982788086, "logits/rejected": -2.622756242752075, "logps/chosen": -516.8272705078125, "logps/rejected": -417.9344787597656, "loss": 0.7339, "rewards/accuracies": 0.5625, "rewards/chosen": -2.260593891143799, "rewards/margins": 0.5381008982658386, "rewards/rejected": -2.7986950874328613, "step": 8120 }, { "epoch": 1.05, "learning_rate": 3.611934589270345e-07, "logits/chosen": -2.776169776916504, "logits/rejected": -2.6810355186462402, "logps/chosen": -532.3388061523438, "logps/rejected": -423.025146484375, "loss": 0.5258, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9278675317764282, "rewards/margins": 0.8559097051620483, "rewards/rejected": -2.7837774753570557, "step": 8130 }, { "epoch": 1.05, "learning_rate": 3.609543846227408e-07, "logits/chosen": -2.8116018772125244, "logits/rejected": -2.6547608375549316, "logps/chosen": -602.9847412109375, "logps/rejected": -441.57220458984375, "loss": 0.556, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1246941089630127, "rewards/margins": 0.8012843132019043, "rewards/rejected": -2.925978422164917, "step": 8140 }, { "epoch": 1.05, "learning_rate": 3.607153103184469e-07, "logits/chosen": -2.824050188064575, "logits/rejected": -2.719909906387329, "logps/chosen": -510.7701110839844, "logps/rejected": -358.2281188964844, "loss": 0.5951, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.084567070007324, "rewards/margins": 0.5941601991653442, "rewards/rejected": -2.678727388381958, "step": 8150 }, { "epoch": 1.05, "learning_rate": 3.604762360141532e-07, "logits/chosen": -2.8074777126312256, "logits/rejected": -2.6880993843078613, "logps/chosen": -522.7774658203125, "logps/rejected": -414.93115234375, "loss": 0.6235, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2148423194885254, "rewards/margins": 0.6648720502853394, "rewards/rejected": -2.8797144889831543, "step": 8160 }, { "epoch": 1.05, "learning_rate": 3.6023716170985943e-07, "logits/chosen": -2.7433204650878906, "logits/rejected": -2.694549083709717, "logps/chosen": -578.0736083984375, "logps/rejected": -524.2781372070312, "loss": 0.5379, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1955437660217285, "rewards/margins": 0.7980221509933472, "rewards/rejected": -2.9935660362243652, "step": 8170 }, { "epoch": 1.06, "learning_rate": 3.5999808740556566e-07, "logits/chosen": -2.8944427967071533, "logits/rejected": -2.7216227054595947, "logps/chosen": -571.94287109375, "logps/rejected": -407.368896484375, "loss": 0.5075, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.9488528966903687, "rewards/margins": 1.0122454166412354, "rewards/rejected": -2.9610981941223145, "step": 8180 }, { "epoch": 1.06, "learning_rate": 3.597590131012719e-07, "logits/chosen": -2.765580415725708, "logits/rejected": -2.6790239810943604, "logps/chosen": -588.2041015625, "logps/rejected": -454.3912048339844, "loss": 0.6114, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.858782410621643, "rewards/margins": 0.5996623039245605, "rewards/rejected": -2.458444356918335, "step": 8190 }, { "epoch": 1.06, "learning_rate": 3.595199387969781e-07, "logits/chosen": -2.8984408378601074, "logits/rejected": -2.7245867252349854, "logps/chosen": -671.08154296875, "logps/rejected": -451.72930908203125, "loss": 0.5229, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9741636514663696, "rewards/margins": 0.9132621884346008, "rewards/rejected": -2.8874258995056152, "step": 8200 }, { "epoch": 1.06, "learning_rate": 3.592808644926843e-07, "logits/chosen": -2.782576084136963, "logits/rejected": -2.739847183227539, "logps/chosen": -525.0756225585938, "logps/rejected": -446.88519287109375, "loss": 0.6326, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.214761972427368, "rewards/margins": 0.5155223608016968, "rewards/rejected": -2.7302842140197754, "step": 8210 }, { "epoch": 1.06, "learning_rate": 3.5904179018839056e-07, "logits/chosen": -2.7928478717803955, "logits/rejected": -2.5963797569274902, "logps/chosen": -570.5167236328125, "logps/rejected": -414.87786865234375, "loss": 0.4917, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9796760082244873, "rewards/margins": 1.005733847618103, "rewards/rejected": -2.985410213470459, "step": 8220 }, { "epoch": 1.06, "learning_rate": 3.588027158840968e-07, "logits/chosen": -2.808419704437256, "logits/rejected": -2.6574623584747314, "logps/chosen": -577.8947143554688, "logps/rejected": -468.12823486328125, "loss": 0.7129, "rewards/accuracies": 0.625, "rewards/chosen": -2.221895694732666, "rewards/margins": 0.5744482278823853, "rewards/rejected": -2.796344041824341, "step": 8230 }, { "epoch": 1.06, "learning_rate": 3.5856364157980303e-07, "logits/chosen": -2.7705814838409424, "logits/rejected": -2.6461076736450195, "logps/chosen": -463.8115234375, "logps/rejected": -383.1347351074219, "loss": 0.537, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.011749744415283, "rewards/margins": 0.8444746732711792, "rewards/rejected": -2.856224536895752, "step": 8240 }, { "epoch": 1.07, "learning_rate": 3.583245672755092e-07, "logits/chosen": -2.7591960430145264, "logits/rejected": -2.602774143218994, "logps/chosen": -494.2841796875, "logps/rejected": -359.8154602050781, "loss": 0.6532, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.391080856323242, "rewards/margins": 0.55195152759552, "rewards/rejected": -2.9430320262908936, "step": 8250 }, { "epoch": 1.07, "learning_rate": 3.5808549297121545e-07, "logits/chosen": -2.7676682472229004, "logits/rejected": -2.631772518157959, "logps/chosen": -526.8746948242188, "logps/rejected": -407.897705078125, "loss": 0.6133, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1919403076171875, "rewards/margins": 0.7172631621360779, "rewards/rejected": -2.90920352935791, "step": 8260 }, { "epoch": 1.07, "learning_rate": 3.578464186669217e-07, "logits/chosen": -2.7191431522369385, "logits/rejected": -2.6137537956237793, "logps/chosen": -514.42529296875, "logps/rejected": -409.2682800292969, "loss": 0.6879, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.117780923843384, "rewards/margins": 0.5547804236412048, "rewards/rejected": -2.6725614070892334, "step": 8270 }, { "epoch": 1.07, "learning_rate": 3.576073443626279e-07, "logits/chosen": -2.8288826942443848, "logits/rejected": -2.8042893409729004, "logps/chosen": -426.9935607910156, "logps/rejected": -384.9145202636719, "loss": 0.7818, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -2.3076021671295166, "rewards/margins": 0.35874220728874207, "rewards/rejected": -2.666344165802002, "step": 8280 }, { "epoch": 1.07, "learning_rate": 3.573682700583341e-07, "logits/chosen": -2.692255735397339, "logits/rejected": -2.569883346557617, "logps/chosen": -529.7217407226562, "logps/rejected": -382.7051086425781, "loss": 0.5356, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1232800483703613, "rewards/margins": 0.8423713445663452, "rewards/rejected": -2.965651512145996, "step": 8290 }, { "epoch": 1.07, "learning_rate": 3.5712919575404034e-07, "logits/chosen": -2.6514246463775635, "logits/rejected": -2.6552271842956543, "logps/chosen": -472.86248779296875, "logps/rejected": -413.0440368652344, "loss": 0.7138, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.268265962600708, "rewards/margins": 0.5160333514213562, "rewards/rejected": -2.784299373626709, "step": 8300 }, { "epoch": 1.07, "learning_rate": 3.5689012144974657e-07, "logits/chosen": -2.7856640815734863, "logits/rejected": -2.672433376312256, "logps/chosen": -585.810546875, "logps/rejected": -484.795166015625, "loss": 0.6726, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.1606106758117676, "rewards/margins": 0.48520058393478394, "rewards/rejected": -2.6458115577697754, "step": 8310 }, { "epoch": 1.07, "learning_rate": 3.566510471454528e-07, "logits/chosen": -2.6851255893707275, "logits/rejected": -2.671164035797119, "logps/chosen": -596.5810546875, "logps/rejected": -544.0640869140625, "loss": 0.5768, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.7998450994491577, "rewards/margins": 0.8525261878967285, "rewards/rejected": -2.6523711681365967, "step": 8320 }, { "epoch": 1.08, "learning_rate": 3.5641197284115904e-07, "logits/chosen": -2.7871382236480713, "logits/rejected": -2.633084535598755, "logps/chosen": -519.3449096679688, "logps/rejected": -406.05010986328125, "loss": 0.5603, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0462398529052734, "rewards/margins": 0.79947429895401, "rewards/rejected": -2.8457140922546387, "step": 8330 }, { "epoch": 1.08, "learning_rate": 3.5617289853686523e-07, "logits/chosen": -2.7456164360046387, "logits/rejected": -2.645922899246216, "logps/chosen": -588.01611328125, "logps/rejected": -440.691650390625, "loss": 0.5567, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.8947060108184814, "rewards/margins": 0.9385307431221008, "rewards/rejected": -2.8332371711730957, "step": 8340 }, { "epoch": 1.08, "learning_rate": 3.5593382423257146e-07, "logits/chosen": -2.743725061416626, "logits/rejected": -2.5927205085754395, "logps/chosen": -494.4513244628906, "logps/rejected": -335.1656494140625, "loss": 0.5393, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.980179786682129, "rewards/margins": 0.8148461580276489, "rewards/rejected": -2.7950260639190674, "step": 8350 }, { "epoch": 1.08, "learning_rate": 3.556947499282777e-07, "logits/chosen": -2.660625696182251, "logits/rejected": -2.60603666305542, "logps/chosen": -566.09912109375, "logps/rejected": -479.764404296875, "loss": 0.6139, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.2231297492980957, "rewards/margins": 0.6036604642868042, "rewards/rejected": -2.8267900943756104, "step": 8360 }, { "epoch": 1.08, "learning_rate": 3.5545567562398394e-07, "logits/chosen": -2.7541027069091797, "logits/rejected": -2.569556713104248, "logps/chosen": -662.298583984375, "logps/rejected": -446.9365234375, "loss": 0.5362, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1801936626434326, "rewards/margins": 1.0049703121185303, "rewards/rejected": -3.185163974761963, "step": 8370 }, { "epoch": 1.08, "learning_rate": 3.5521660131969017e-07, "logits/chosen": -2.676978588104248, "logits/rejected": -2.5654537677764893, "logps/chosen": -525.2160034179688, "logps/rejected": -400.82611083984375, "loss": 0.5989, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0337910652160645, "rewards/margins": 0.6127709150314331, "rewards/rejected": -2.646561861038208, "step": 8380 }, { "epoch": 1.08, "learning_rate": 3.5497752701539635e-07, "logits/chosen": -2.7196102142333984, "logits/rejected": -2.665001153945923, "logps/chosen": -497.00115966796875, "logps/rejected": -430.49237060546875, "loss": 0.7853, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.3409266471862793, "rewards/margins": 0.3838806748390198, "rewards/rejected": -2.7248075008392334, "step": 8390 }, { "epoch": 1.08, "learning_rate": 3.547384527111026e-07, "logits/chosen": -2.6575586795806885, "logits/rejected": -2.600104808807373, "logps/chosen": -565.7757568359375, "logps/rejected": -450.50518798828125, "loss": 0.556, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1696932315826416, "rewards/margins": 0.8134137988090515, "rewards/rejected": -2.983107328414917, "step": 8400 }, { "epoch": 1.09, "learning_rate": 3.544993784068088e-07, "logits/chosen": -2.8060669898986816, "logits/rejected": -2.5748610496520996, "logps/chosen": -637.98193359375, "logps/rejected": -428.4849548339844, "loss": 0.6327, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.2373697757720947, "rewards/margins": 0.7707465291023254, "rewards/rejected": -3.0081162452697754, "step": 8410 }, { "epoch": 1.09, "learning_rate": 3.5426030410251506e-07, "logits/chosen": -2.80490779876709, "logits/rejected": -2.651207447052002, "logps/chosen": -601.1595458984375, "logps/rejected": -460.8880920410156, "loss": 0.5864, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.3147385120391846, "rewards/margins": 0.7982584238052368, "rewards/rejected": -3.112997055053711, "step": 8420 }, { "epoch": 1.09, "learning_rate": 3.5402122979822125e-07, "logits/chosen": -2.680711030960083, "logits/rejected": -2.5663819313049316, "logps/chosen": -532.2738647460938, "logps/rejected": -425.54351806640625, "loss": 0.6456, "rewards/accuracies": 0.625, "rewards/chosen": -2.1908280849456787, "rewards/margins": 0.577434241771698, "rewards/rejected": -2.7682626247406006, "step": 8430 }, { "epoch": 1.09, "learning_rate": 3.537821554939275e-07, "logits/chosen": -2.6293952465057373, "logits/rejected": -2.5786020755767822, "logps/chosen": -577.8242797851562, "logps/rejected": -466.20281982421875, "loss": 0.5636, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1702637672424316, "rewards/margins": 0.7981858849525452, "rewards/rejected": -2.968449354171753, "step": 8440 }, { "epoch": 1.09, "learning_rate": 3.535430811896337e-07, "logits/chosen": -2.814314365386963, "logits/rejected": -2.6108455657958984, "logps/chosen": -555.4205932617188, "logps/rejected": -371.29248046875, "loss": 0.5367, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.994989037513733, "rewards/margins": 1.0930545330047607, "rewards/rejected": -3.0880439281463623, "step": 8450 }, { "epoch": 1.09, "learning_rate": 3.5330400688533995e-07, "logits/chosen": -2.71536922454834, "logits/rejected": -2.5284767150878906, "logps/chosen": -562.7757568359375, "logps/rejected": -396.5674133300781, "loss": 0.6096, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.3174667358398438, "rewards/margins": 0.7840578556060791, "rewards/rejected": -3.101524829864502, "step": 8460 }, { "epoch": 1.09, "learning_rate": 3.530649325810462e-07, "logits/chosen": -2.7495951652526855, "logits/rejected": -2.654435157775879, "logps/chosen": -598.504638671875, "logps/rejected": -448.1190490722656, "loss": 0.5586, "rewards/accuracies": 0.75, "rewards/chosen": -2.215977191925049, "rewards/margins": 0.8764225244522095, "rewards/rejected": -3.092400312423706, "step": 8470 }, { "epoch": 1.09, "learning_rate": 3.5282585827675237e-07, "logits/chosen": -2.7310404777526855, "logits/rejected": -2.6211259365081787, "logps/chosen": -565.7813720703125, "logps/rejected": -433.81951904296875, "loss": 0.6685, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.238098621368408, "rewards/margins": 0.6695972084999084, "rewards/rejected": -2.907695770263672, "step": 8480 }, { "epoch": 1.1, "learning_rate": 3.525867839724586e-07, "logits/chosen": -2.667032241821289, "logits/rejected": -2.480752468109131, "logps/chosen": -659.3543701171875, "logps/rejected": -406.73687744140625, "loss": 0.6541, "rewards/accuracies": 0.6875, "rewards/chosen": -2.108402729034424, "rewards/margins": 0.8510922193527222, "rewards/rejected": -2.9594943523406982, "step": 8490 }, { "epoch": 1.1, "learning_rate": 3.5234770966816484e-07, "logits/chosen": -2.7310640811920166, "logits/rejected": -2.6289947032928467, "logps/chosen": -655.3177490234375, "logps/rejected": -486.898193359375, "loss": 0.6283, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2442915439605713, "rewards/margins": 0.7892856001853943, "rewards/rejected": -3.0335774421691895, "step": 8500 }, { "epoch": 1.1, "learning_rate": 3.521086353638711e-07, "logits/chosen": -2.8458304405212402, "logits/rejected": -2.6628940105438232, "logps/chosen": -571.9680786132812, "logps/rejected": -405.9533996582031, "loss": 0.6971, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1778790950775146, "rewards/margins": 0.5680282711982727, "rewards/rejected": -2.7459073066711426, "step": 8510 }, { "epoch": 1.1, "learning_rate": 3.518695610595773e-07, "logits/chosen": -2.7690436840057373, "logits/rejected": -2.6123459339141846, "logps/chosen": -445.0025329589844, "logps/rejected": -361.46807861328125, "loss": 0.7687, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.3815760612487793, "rewards/margins": 0.25464704632759094, "rewards/rejected": -2.636223316192627, "step": 8520 }, { "epoch": 1.1, "learning_rate": 3.516304867552835e-07, "logits/chosen": -2.73081636428833, "logits/rejected": -2.6617515087127686, "logps/chosen": -542.9581298828125, "logps/rejected": -459.35626220703125, "loss": 0.566, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.155132293701172, "rewards/margins": 0.8847547769546509, "rewards/rejected": -3.039886951446533, "step": 8530 }, { "epoch": 1.1, "learning_rate": 3.5139141245098973e-07, "logits/chosen": -2.664684772491455, "logits/rejected": -2.504241466522217, "logps/chosen": -544.9945068359375, "logps/rejected": -419.32257080078125, "loss": 0.6165, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1322693824768066, "rewards/margins": 0.9511917233467102, "rewards/rejected": -3.083461046218872, "step": 8540 }, { "epoch": 1.1, "learning_rate": 3.5115233814669597e-07, "logits/chosen": -2.5985658168792725, "logits/rejected": -2.595747470855713, "logps/chosen": -508.90277099609375, "logps/rejected": -428.82904052734375, "loss": 0.711, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.150609016418457, "rewards/margins": 0.5656641721725464, "rewards/rejected": -2.716273069381714, "step": 8550 }, { "epoch": 1.11, "learning_rate": 3.509132638424022e-07, "logits/chosen": -2.7784485816955566, "logits/rejected": -2.6416306495666504, "logps/chosen": -641.0805053710938, "logps/rejected": -470.63525390625, "loss": 0.6835, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2250723838806152, "rewards/margins": 0.6258837580680847, "rewards/rejected": -2.8509562015533447, "step": 8560 }, { "epoch": 1.11, "learning_rate": 3.5067418953810844e-07, "logits/chosen": -2.738584041595459, "logits/rejected": -2.7029871940612793, "logps/chosen": -578.4978637695312, "logps/rejected": -469.1668395996094, "loss": 0.6339, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1473546028137207, "rewards/margins": 0.7712458372116089, "rewards/rejected": -2.918600559234619, "step": 8570 }, { "epoch": 1.11, "learning_rate": 3.504351152338146e-07, "logits/chosen": -2.69062876701355, "logits/rejected": -2.576200008392334, "logps/chosen": -586.3553466796875, "logps/rejected": -391.77301025390625, "loss": 0.6409, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.084818124771118, "rewards/margins": 0.5308693647384644, "rewards/rejected": -2.615687847137451, "step": 8580 }, { "epoch": 1.11, "learning_rate": 3.5019604092952086e-07, "logits/chosen": -2.683666706085205, "logits/rejected": -2.6074068546295166, "logps/chosen": -499.1240234375, "logps/rejected": -427.42401123046875, "loss": 0.5303, "rewards/accuracies": 0.75, "rewards/chosen": -2.1206648349761963, "rewards/margins": 0.8624746203422546, "rewards/rejected": -2.9831392765045166, "step": 8590 }, { "epoch": 1.11, "learning_rate": 3.499569666252271e-07, "logits/chosen": -2.7685537338256836, "logits/rejected": -2.6816930770874023, "logps/chosen": -479.517578125, "logps/rejected": -389.04046630859375, "loss": 0.5924, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.3234214782714844, "rewards/margins": 0.7927545309066772, "rewards/rejected": -3.116176128387451, "step": 8600 }, { "epoch": 1.11, "learning_rate": 3.4971789232093333e-07, "logits/chosen": -2.6083943843841553, "logits/rejected": -2.5465755462646484, "logps/chosen": -614.2764282226562, "logps/rejected": -513.5477294921875, "loss": 0.5869, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.2281506061553955, "rewards/margins": 0.8320953249931335, "rewards/rejected": -3.0602457523345947, "step": 8610 }, { "epoch": 1.11, "learning_rate": 3.494788180166395e-07, "logits/chosen": -2.7632439136505127, "logits/rejected": -2.6268041133880615, "logps/chosen": -556.2453002929688, "logps/rejected": -444.06585693359375, "loss": 0.5371, "rewards/accuracies": 0.75, "rewards/chosen": -2.1543655395507812, "rewards/margins": 0.8871210217475891, "rewards/rejected": -3.0414865016937256, "step": 8620 }, { "epoch": 1.11, "learning_rate": 3.4923974371234575e-07, "logits/chosen": -2.8796944618225098, "logits/rejected": -2.7472941875457764, "logps/chosen": -602.0484008789062, "logps/rejected": -427.35693359375, "loss": 0.68, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.468949794769287, "rewards/margins": 0.4714924395084381, "rewards/rejected": -2.9404423236846924, "step": 8630 }, { "epoch": 1.12, "learning_rate": 3.49000669408052e-07, "logits/chosen": -2.6806352138519287, "logits/rejected": -2.5651159286499023, "logps/chosen": -522.3548583984375, "logps/rejected": -445.8601989746094, "loss": 0.5736, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.8545167446136475, "rewards/margins": 0.7690653800964355, "rewards/rejected": -2.623582363128662, "step": 8640 }, { "epoch": 1.12, "learning_rate": 3.487615951037582e-07, "logits/chosen": -2.8115053176879883, "logits/rejected": -2.6135780811309814, "logps/chosen": -511.6341857910156, "logps/rejected": -349.8370361328125, "loss": 0.7614, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.3445167541503906, "rewards/margins": 0.45426759123802185, "rewards/rejected": -2.7987842559814453, "step": 8650 }, { "epoch": 1.12, "learning_rate": 3.4852252079946446e-07, "logits/chosen": -2.7980105876922607, "logits/rejected": -2.6605467796325684, "logps/chosen": -606.5411376953125, "logps/rejected": -513.4190673828125, "loss": 0.5882, "rewards/accuracies": 0.6875, "rewards/chosen": -2.31478214263916, "rewards/margins": 0.8335739374160767, "rewards/rejected": -3.1483559608459473, "step": 8660 }, { "epoch": 1.12, "learning_rate": 3.4828344649517064e-07, "logits/chosen": -2.608660936355591, "logits/rejected": -2.5490405559539795, "logps/chosen": -495.24853515625, "logps/rejected": -416.87921142578125, "loss": 0.5867, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.233541488647461, "rewards/margins": 0.6719492673873901, "rewards/rejected": -2.9054906368255615, "step": 8670 }, { "epoch": 1.12, "learning_rate": 3.480443721908769e-07, "logits/chosen": -2.7483632564544678, "logits/rejected": -2.6068332195281982, "logps/chosen": -593.44970703125, "logps/rejected": -423.75677490234375, "loss": 0.6082, "rewards/accuracies": 0.6875, "rewards/chosen": -2.3439645767211914, "rewards/margins": 0.6326743960380554, "rewards/rejected": -2.9766390323638916, "step": 8680 }, { "epoch": 1.12, "learning_rate": 3.478052978865831e-07, "logits/chosen": -2.819648265838623, "logits/rejected": -2.680741786956787, "logps/chosen": -544.6221923828125, "logps/rejected": -433.429443359375, "loss": 0.709, "rewards/accuracies": 0.6875, "rewards/chosen": -2.4022469520568848, "rewards/margins": 0.3909798264503479, "rewards/rejected": -2.793226718902588, "step": 8690 }, { "epoch": 1.12, "learning_rate": 3.4756622358228935e-07, "logits/chosen": -2.6951115131378174, "logits/rejected": -2.5412192344665527, "logps/chosen": -538.8351440429688, "logps/rejected": -411.55706787109375, "loss": 0.6132, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9845552444458008, "rewards/margins": 0.6397794485092163, "rewards/rejected": -2.6243345737457275, "step": 8700 }, { "epoch": 1.12, "learning_rate": 3.473271492779956e-07, "logits/chosen": -2.6192431449890137, "logits/rejected": -2.5358078479766846, "logps/chosen": -525.9937744140625, "logps/rejected": -408.3705749511719, "loss": 0.5324, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.009521961212158, "rewards/margins": 0.9788221120834351, "rewards/rejected": -2.988344430923462, "step": 8710 }, { "epoch": 1.13, "learning_rate": 3.4708807497370177e-07, "logits/chosen": -2.8174593448638916, "logits/rejected": -2.6623101234436035, "logps/chosen": -546.4591064453125, "logps/rejected": -474.93994140625, "loss": 0.6419, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2093324661254883, "rewards/margins": 0.6466624140739441, "rewards/rejected": -2.855994701385498, "step": 8720 }, { "epoch": 1.13, "learning_rate": 3.4684900066940806e-07, "logits/chosen": -2.668396472930908, "logits/rejected": -2.4727413654327393, "logps/chosen": -654.7051391601562, "logps/rejected": -441.8409729003906, "loss": 0.7573, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.272103786468506, "rewards/margins": 0.7041548490524292, "rewards/rejected": -2.9762585163116455, "step": 8730 }, { "epoch": 1.13, "learning_rate": 3.466099263651143e-07, "logits/chosen": -2.8412423133850098, "logits/rejected": -2.662661075592041, "logps/chosen": -528.311279296875, "logps/rejected": -386.05096435546875, "loss": 0.5258, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.235386371612549, "rewards/margins": 1.6470565795898438, "rewards/rejected": -3.8824431896209717, "step": 8740 }, { "epoch": 1.13, "learning_rate": 3.4637085206082053e-07, "logits/chosen": -2.778367280960083, "logits/rejected": -2.6217868328094482, "logps/chosen": -577.7277221679688, "logps/rejected": -475.6748962402344, "loss": 0.6126, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1501784324645996, "rewards/margins": 0.7986355423927307, "rewards/rejected": -2.9488139152526855, "step": 8750 }, { "epoch": 1.13, "learning_rate": 3.461317777565267e-07, "logits/chosen": -2.7931582927703857, "logits/rejected": -2.727447748184204, "logps/chosen": -523.576171875, "logps/rejected": -396.66766357421875, "loss": 0.7213, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.314640760421753, "rewards/margins": 0.5511221289634705, "rewards/rejected": -2.865762948989868, "step": 8760 }, { "epoch": 1.13, "learning_rate": 3.4589270345223295e-07, "logits/chosen": -2.6663737297058105, "logits/rejected": -2.635394811630249, "logps/chosen": -583.8070068359375, "logps/rejected": -508.46734619140625, "loss": 0.6717, "rewards/accuracies": 0.625, "rewards/chosen": -2.392744779586792, "rewards/margins": 0.5664997696876526, "rewards/rejected": -2.959244728088379, "step": 8770 }, { "epoch": 1.13, "learning_rate": 3.456536291479392e-07, "logits/chosen": -2.7086501121520996, "logits/rejected": -2.6188981533050537, "logps/chosen": -608.8511352539062, "logps/rejected": -517.33203125, "loss": 0.6472, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.273730516433716, "rewards/margins": 0.7585776448249817, "rewards/rejected": -3.0323081016540527, "step": 8780 }, { "epoch": 1.13, "learning_rate": 3.454145548436454e-07, "logits/chosen": -2.5673489570617676, "logits/rejected": -2.621546745300293, "logps/chosen": -534.0498046875, "logps/rejected": -530.7503662109375, "loss": 0.6954, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.4308571815490723, "rewards/margins": 0.6274678707122803, "rewards/rejected": -3.0583245754241943, "step": 8790 }, { "epoch": 1.14, "learning_rate": 3.4517548053935166e-07, "logits/chosen": -2.6513798236846924, "logits/rejected": -2.63264799118042, "logps/chosen": -550.3482055664062, "logps/rejected": -484.1080017089844, "loss": 0.6095, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1839659214019775, "rewards/margins": 0.6949368715286255, "rewards/rejected": -2.8789024353027344, "step": 8800 }, { "epoch": 1.14, "learning_rate": 3.4493640623505784e-07, "logits/chosen": -2.8524417877197266, "logits/rejected": -2.651901960372925, "logps/chosen": -629.597412109375, "logps/rejected": -420.8883361816406, "loss": 0.5163, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.8617900609970093, "rewards/margins": 1.020526647567749, "rewards/rejected": -2.882316827774048, "step": 8810 }, { "epoch": 1.14, "learning_rate": 3.446973319307641e-07, "logits/chosen": -2.734520673751831, "logits/rejected": -2.6300148963928223, "logps/chosen": -505.10711669921875, "logps/rejected": -415.16510009765625, "loss": 0.5453, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0176289081573486, "rewards/margins": 0.8249828219413757, "rewards/rejected": -2.8426120281219482, "step": 8820 }, { "epoch": 1.14, "learning_rate": 3.444582576264703e-07, "logits/chosen": -2.838057518005371, "logits/rejected": -2.6889023780822754, "logps/chosen": -562.8583374023438, "logps/rejected": -375.55023193359375, "loss": 0.5564, "rewards/accuracies": 0.6875, "rewards/chosen": -2.176997423171997, "rewards/margins": 0.9409663081169128, "rewards/rejected": -3.1179633140563965, "step": 8830 }, { "epoch": 1.14, "learning_rate": 3.4421918332217655e-07, "logits/chosen": -2.741302013397217, "logits/rejected": -2.631103515625, "logps/chosen": -580.4947509765625, "logps/rejected": -442.49114990234375, "loss": 0.5293, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.212951183319092, "rewards/margins": 1.005584955215454, "rewards/rejected": -3.2185356616973877, "step": 8840 }, { "epoch": 1.14, "learning_rate": 3.439801090178828e-07, "logits/chosen": -2.6479060649871826, "logits/rejected": -2.6283016204833984, "logps/chosen": -514.1539916992188, "logps/rejected": -482.5419921875, "loss": 0.7807, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.3568148612976074, "rewards/margins": 0.41056638956069946, "rewards/rejected": -2.767381191253662, "step": 8850 }, { "epoch": 1.14, "learning_rate": 3.4374103471358897e-07, "logits/chosen": -2.747988224029541, "logits/rejected": -2.725904941558838, "logps/chosen": -537.460205078125, "logps/rejected": -493.1636657714844, "loss": 0.5712, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0998928546905518, "rewards/margins": 0.7993863224983215, "rewards/rejected": -2.8992793560028076, "step": 8860 }, { "epoch": 1.15, "learning_rate": 3.435019604092952e-07, "logits/chosen": -2.7654407024383545, "logits/rejected": -2.680609941482544, "logps/chosen": -614.1395263671875, "logps/rejected": -535.7999267578125, "loss": 0.7201, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.2968242168426514, "rewards/margins": 0.6297492980957031, "rewards/rejected": -2.9265732765197754, "step": 8870 }, { "epoch": 1.15, "learning_rate": 3.4326288610500144e-07, "logits/chosen": -2.7755789756774902, "logits/rejected": -2.703195095062256, "logps/chosen": -531.9508056640625, "logps/rejected": -450.68310546875, "loss": 0.656, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.043966770172119, "rewards/margins": 0.5874232649803162, "rewards/rejected": -2.631389856338501, "step": 8880 }, { "epoch": 1.15, "learning_rate": 3.430238118007077e-07, "logits/chosen": -2.6627211570739746, "logits/rejected": -2.57539701461792, "logps/chosen": -518.1597900390625, "logps/rejected": -468.26336669921875, "loss": 0.6212, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0013983249664307, "rewards/margins": 0.7353730201721191, "rewards/rejected": -2.73677134513855, "step": 8890 }, { "epoch": 1.15, "learning_rate": 3.4278473749641386e-07, "logits/chosen": -2.7555365562438965, "logits/rejected": -2.706190586090088, "logps/chosen": -569.80224609375, "logps/rejected": -484.55035400390625, "loss": 0.6463, "rewards/accuracies": 0.6875, "rewards/chosen": -2.282074213027954, "rewards/margins": 0.5158353447914124, "rewards/rejected": -2.797909736633301, "step": 8900 }, { "epoch": 1.15, "learning_rate": 3.425456631921201e-07, "logits/chosen": -2.66076397895813, "logits/rejected": -2.4889724254608154, "logps/chosen": -553.94921875, "logps/rejected": -397.1055603027344, "loss": 0.5966, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.181917905807495, "rewards/margins": 0.9088815450668335, "rewards/rejected": -3.090799570083618, "step": 8910 }, { "epoch": 1.15, "learning_rate": 3.4230658888782633e-07, "logits/chosen": -2.7158169746398926, "logits/rejected": -2.601181983947754, "logps/chosen": -515.0187377929688, "logps/rejected": -410.33123779296875, "loss": 0.6178, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0311524868011475, "rewards/margins": 0.6331676244735718, "rewards/rejected": -2.664320468902588, "step": 8920 }, { "epoch": 1.15, "learning_rate": 3.4206751458353256e-07, "logits/chosen": -2.7455270290374756, "logits/rejected": -2.681690216064453, "logps/chosen": -514.8248291015625, "logps/rejected": -495.89630126953125, "loss": 0.5984, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.265632390975952, "rewards/margins": 0.5986028909683228, "rewards/rejected": -2.8642351627349854, "step": 8930 }, { "epoch": 1.15, "learning_rate": 3.418284402792388e-07, "logits/chosen": -2.7612664699554443, "logits/rejected": -2.5822463035583496, "logps/chosen": -668.5414428710938, "logps/rejected": -512.6603393554688, "loss": 0.7313, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.588947296142578, "rewards/margins": 0.5167784094810486, "rewards/rejected": -3.1057257652282715, "step": 8940 }, { "epoch": 1.16, "learning_rate": 3.41589365974945e-07, "logits/chosen": -2.709428310394287, "logits/rejected": -2.6049935817718506, "logps/chosen": -583.7616577148438, "logps/rejected": -489.6817321777344, "loss": 0.7051, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.2422025203704834, "rewards/margins": 0.4584556519985199, "rewards/rejected": -2.700658082962036, "step": 8950 }, { "epoch": 1.16, "learning_rate": 3.413502916706512e-07, "logits/chosen": -2.859423875808716, "logits/rejected": -2.6673755645751953, "logps/chosen": -627.96875, "logps/rejected": -461.95599365234375, "loss": 0.6964, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1677677631378174, "rewards/margins": 0.6106666326522827, "rewards/rejected": -2.7784345149993896, "step": 8960 }, { "epoch": 1.16, "learning_rate": 3.4111121736635746e-07, "logits/chosen": -2.704984188079834, "logits/rejected": -2.666337013244629, "logps/chosen": -540.3207397460938, "logps/rejected": -460.71807861328125, "loss": 0.6942, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.063244581222534, "rewards/margins": 0.738702118396759, "rewards/rejected": -2.8019464015960693, "step": 8970 }, { "epoch": 1.16, "learning_rate": 3.408721430620637e-07, "logits/chosen": -2.7886555194854736, "logits/rejected": -2.605813980102539, "logps/chosen": -589.4595947265625, "logps/rejected": -380.2214050292969, "loss": 0.4949, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.842442512512207, "rewards/margins": 0.8869462013244629, "rewards/rejected": -2.72938871383667, "step": 8980 }, { "epoch": 1.16, "learning_rate": 3.4063306875776993e-07, "logits/chosen": -2.7395431995391846, "logits/rejected": -2.629047393798828, "logps/chosen": -549.7359619140625, "logps/rejected": -464.1791076660156, "loss": 0.5661, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1582720279693604, "rewards/margins": 0.9128656387329102, "rewards/rejected": -3.0711374282836914, "step": 8990 }, { "epoch": 1.16, "learning_rate": 3.403939944534761e-07, "logits/chosen": -2.7325966358184814, "logits/rejected": -2.677659511566162, "logps/chosen": -574.0753173828125, "logps/rejected": -441.6094665527344, "loss": 0.6507, "rewards/accuracies": 0.6875, "rewards/chosen": -2.062699794769287, "rewards/margins": 0.6577935218811035, "rewards/rejected": -2.7204928398132324, "step": 9000 }, { "epoch": 1.16, "eval_logits/chosen": -3.0594563484191895, "eval_logits/rejected": -2.9981322288513184, "eval_logps/chosen": -540.9644165039062, "eval_logps/rejected": -420.2049255371094, "eval_loss": 0.6215416789054871, "eval_rewards/accuracies": 0.6700000166893005, "eval_rewards/chosen": -0.914466381072998, "eval_rewards/margins": 1.0131609439849854, "eval_rewards/rejected": -1.9276273250579834, "eval_runtime": 280.0657, "eval_samples_per_second": 7.141, "eval_steps_per_second": 3.571, "step": 9000 }, { "epoch": 1.16, "learning_rate": 3.4015492014918235e-07, "logits/chosen": -2.6529288291931152, "logits/rejected": -2.505251407623291, "logps/chosen": -558.2811279296875, "logps/rejected": -435.82672119140625, "loss": 0.6226, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1470134258270264, "rewards/margins": 0.6874309182167053, "rewards/rejected": -2.834444522857666, "step": 9010 }, { "epoch": 1.16, "learning_rate": 3.399158458448886e-07, "logits/chosen": -2.878875494003296, "logits/rejected": -2.745487928390503, "logps/chosen": -660.5261840820312, "logps/rejected": -513.0556030273438, "loss": 0.5599, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2522451877593994, "rewards/margins": 0.8265716433525085, "rewards/rejected": -3.078817129135132, "step": 9020 }, { "epoch": 1.17, "learning_rate": 3.396767715405948e-07, "logits/chosen": -2.7590391635894775, "logits/rejected": -2.647611618041992, "logps/chosen": -572.045654296875, "logps/rejected": -478.7655334472656, "loss": 0.5532, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1365160942077637, "rewards/margins": 0.755820631980896, "rewards/rejected": -2.8923370838165283, "step": 9030 }, { "epoch": 1.17, "learning_rate": 3.39437697236301e-07, "logits/chosen": -2.743781089782715, "logits/rejected": -2.6340250968933105, "logps/chosen": -501.44366455078125, "logps/rejected": -364.6751708984375, "loss": 0.5589, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0007030963897705, "rewards/margins": 0.8043910264968872, "rewards/rejected": -2.805093765258789, "step": 9040 }, { "epoch": 1.17, "learning_rate": 3.3919862293200724e-07, "logits/chosen": -2.684966802597046, "logits/rejected": -2.691868543624878, "logps/chosen": -466.89715576171875, "logps/rejected": -456.50970458984375, "loss": 0.7844, "rewards/accuracies": 0.5625, "rewards/chosen": -2.3507649898529053, "rewards/margins": 0.33404025435447693, "rewards/rejected": -2.684805393218994, "step": 9050 }, { "epoch": 1.17, "learning_rate": 3.3895954862771347e-07, "logits/chosen": -2.646955966949463, "logits/rejected": -2.6393046379089355, "logps/chosen": -587.1826171875, "logps/rejected": -494.91619873046875, "loss": 0.7271, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.3797965049743652, "rewards/margins": 0.4646824300289154, "rewards/rejected": -2.8444786071777344, "step": 9060 }, { "epoch": 1.17, "learning_rate": 3.387204743234197e-07, "logits/chosen": -2.6429519653320312, "logits/rejected": -2.582624912261963, "logps/chosen": -483.7911682128906, "logps/rejected": -355.65557861328125, "loss": 0.564, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.150204658508301, "rewards/margins": 0.9368387460708618, "rewards/rejected": -3.087043285369873, "step": 9070 }, { "epoch": 1.17, "learning_rate": 3.3848140001912594e-07, "logits/chosen": -2.7972183227539062, "logits/rejected": -2.6656277179718018, "logps/chosen": -553.7890625, "logps/rejected": -434.119873046875, "loss": 0.7475, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.379781484603882, "rewards/margins": 0.45172232389450073, "rewards/rejected": -2.8315038681030273, "step": 9080 }, { "epoch": 1.17, "learning_rate": 3.3824232571483213e-07, "logits/chosen": -2.6429240703582764, "logits/rejected": -2.7071633338928223, "logps/chosen": -544.6856689453125, "logps/rejected": -507.4732360839844, "loss": 0.6669, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.4233317375183105, "rewards/margins": 0.5901135206222534, "rewards/rejected": -3.0134453773498535, "step": 9090 }, { "epoch": 1.17, "learning_rate": 3.3800325141053836e-07, "logits/chosen": -2.6966562271118164, "logits/rejected": -2.6069045066833496, "logps/chosen": -540.0928955078125, "logps/rejected": -466.4312438964844, "loss": 0.6571, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.44891619682312, "rewards/margins": 0.8096848726272583, "rewards/rejected": -3.258600950241089, "step": 9100 }, { "epoch": 1.18, "learning_rate": 3.377641771062446e-07, "logits/chosen": -2.64597225189209, "logits/rejected": -2.532032012939453, "logps/chosen": -562.6595458984375, "logps/rejected": -403.97607421875, "loss": 0.716, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.3710174560546875, "rewards/margins": 0.5440124273300171, "rewards/rejected": -2.915030002593994, "step": 9110 }, { "epoch": 1.18, "learning_rate": 3.3752510280195084e-07, "logits/chosen": -2.6773369312286377, "logits/rejected": -2.6379599571228027, "logps/chosen": -546.234619140625, "logps/rejected": -435.75604248046875, "loss": 0.5842, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9765567779541016, "rewards/margins": 0.7578718662261963, "rewards/rejected": -2.734428644180298, "step": 9120 }, { "epoch": 1.18, "learning_rate": 3.3728602849765707e-07, "logits/chosen": -2.721524715423584, "logits/rejected": -2.6743557453155518, "logps/chosen": -546.7986450195312, "logps/rejected": -446.807861328125, "loss": 0.82, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.367633581161499, "rewards/margins": 0.30305859446525574, "rewards/rejected": -2.670691967010498, "step": 9130 }, { "epoch": 1.18, "learning_rate": 3.3704695419336325e-07, "logits/chosen": -2.654982566833496, "logits/rejected": -2.6682941913604736, "logps/chosen": -491.1917419433594, "logps/rejected": -460.53302001953125, "loss": 0.6313, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.1370182037353516, "rewards/margins": 0.6107877492904663, "rewards/rejected": -2.7478060722351074, "step": 9140 }, { "epoch": 1.18, "learning_rate": 3.368078798890695e-07, "logits/chosen": -2.7031455039978027, "logits/rejected": -2.5852184295654297, "logps/chosen": -551.6412963867188, "logps/rejected": -397.0000915527344, "loss": 0.5918, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0127971172332764, "rewards/margins": 0.7409812211990356, "rewards/rejected": -2.7537784576416016, "step": 9150 }, { "epoch": 1.18, "learning_rate": 3.365688055847757e-07, "logits/chosen": -2.657698154449463, "logits/rejected": -2.47788667678833, "logps/chosen": -502.91912841796875, "logps/rejected": -369.14129638671875, "loss": 0.6556, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.0513734817504883, "rewards/margins": 0.8017080426216125, "rewards/rejected": -2.853081703186035, "step": 9160 }, { "epoch": 1.18, "learning_rate": 3.3632973128048196e-07, "logits/chosen": -2.6108179092407227, "logits/rejected": -2.5464818477630615, "logps/chosen": -490.9784240722656, "logps/rejected": -383.6356201171875, "loss": 0.6668, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.2147140502929688, "rewards/margins": 0.4965059757232666, "rewards/rejected": -2.7112200260162354, "step": 9170 }, { "epoch": 1.19, "learning_rate": 3.3609065697618815e-07, "logits/chosen": -2.7248358726501465, "logits/rejected": -2.6553239822387695, "logps/chosen": -586.0028076171875, "logps/rejected": -462.4295349121094, "loss": 0.5715, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.136082649230957, "rewards/margins": 0.8848042488098145, "rewards/rejected": -3.0208868980407715, "step": 9180 }, { "epoch": 1.19, "learning_rate": 3.358515826718944e-07, "logits/chosen": -2.626929759979248, "logits/rejected": -2.548552989959717, "logps/chosen": -599.3068237304688, "logps/rejected": -451.03790283203125, "loss": 0.6357, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.0413317680358887, "rewards/margins": 0.6591695547103882, "rewards/rejected": -2.7005014419555664, "step": 9190 }, { "epoch": 1.19, "learning_rate": 3.356125083676006e-07, "logits/chosen": -2.729731798171997, "logits/rejected": -2.521811008453369, "logps/chosen": -647.470703125, "logps/rejected": -428.81494140625, "loss": 0.6866, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0469655990600586, "rewards/margins": 0.5972694158554077, "rewards/rejected": -2.644235134124756, "step": 9200 }, { "epoch": 1.19, "learning_rate": 3.3537343406330685e-07, "logits/chosen": -2.7893950939178467, "logits/rejected": -2.67757511138916, "logps/chosen": -492.41558837890625, "logps/rejected": -413.07806396484375, "loss": 0.6038, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.917046308517456, "rewards/margins": 0.6945460438728333, "rewards/rejected": -2.6115925312042236, "step": 9210 }, { "epoch": 1.19, "learning_rate": 3.351343597590131e-07, "logits/chosen": -2.802342414855957, "logits/rejected": -2.710324764251709, "logps/chosen": -553.583984375, "logps/rejected": -451.59228515625, "loss": 0.6232, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.2650952339172363, "rewards/margins": 0.6872947812080383, "rewards/rejected": -2.95238995552063, "step": 9220 }, { "epoch": 1.19, "learning_rate": 3.3489528545471927e-07, "logits/chosen": -2.7889323234558105, "logits/rejected": -2.5772953033447266, "logps/chosen": -495.67437744140625, "logps/rejected": -359.51776123046875, "loss": 0.4647, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9896581172943115, "rewards/margins": 1.0595347881317139, "rewards/rejected": -3.0491929054260254, "step": 9230 }, { "epoch": 1.19, "learning_rate": 3.346562111504255e-07, "logits/chosen": -2.73093843460083, "logits/rejected": -2.58838152885437, "logps/chosen": -529.8412475585938, "logps/rejected": -418.05517578125, "loss": 0.5976, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.0019614696502686, "rewards/margins": 0.713873028755188, "rewards/rejected": -2.715834379196167, "step": 9240 }, { "epoch": 1.19, "learning_rate": 3.3441713684613174e-07, "logits/chosen": -2.664951801300049, "logits/rejected": -2.509486436843872, "logps/chosen": -567.1980590820312, "logps/rejected": -447.58197021484375, "loss": 0.7882, "rewards/accuracies": 0.625, "rewards/chosen": -2.364701509475708, "rewards/margins": 0.5610717535018921, "rewards/rejected": -2.9257736206054688, "step": 9250 }, { "epoch": 1.2, "learning_rate": 3.34178062541838e-07, "logits/chosen": -2.591764450073242, "logits/rejected": -2.5806257724761963, "logps/chosen": -422.16217041015625, "logps/rejected": -377.5687255859375, "loss": 0.6295, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.0153489112854004, "rewards/margins": 0.6109936237335205, "rewards/rejected": -2.626342535018921, "step": 9260 }, { "epoch": 1.2, "learning_rate": 3.339389882375442e-07, "logits/chosen": -2.6956610679626465, "logits/rejected": -2.578744411468506, "logps/chosen": -536.4766845703125, "logps/rejected": -408.15704345703125, "loss": 0.6776, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.3814857006073, "rewards/margins": 0.5238484144210815, "rewards/rejected": -2.90533447265625, "step": 9270 }, { "epoch": 1.2, "learning_rate": 3.336999139332504e-07, "logits/chosen": -2.7172296047210693, "logits/rejected": -2.5366663932800293, "logps/chosen": -569.8889770507812, "logps/rejected": -426.76593017578125, "loss": 0.6215, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.3922410011291504, "rewards/margins": 0.6470062136650085, "rewards/rejected": -3.0392470359802246, "step": 9280 }, { "epoch": 1.2, "learning_rate": 3.3346083962895663e-07, "logits/chosen": -2.7611794471740723, "logits/rejected": -2.5558533668518066, "logps/chosen": -601.677734375, "logps/rejected": -413.7518005371094, "loss": 0.6111, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0422186851501465, "rewards/margins": 0.7120535969734192, "rewards/rejected": -2.7542724609375, "step": 9290 }, { "epoch": 1.2, "learning_rate": 3.3322176532466287e-07, "logits/chosen": -2.7407310009002686, "logits/rejected": -2.6093268394470215, "logps/chosen": -627.3674926757812, "logps/rejected": -477.10540771484375, "loss": 0.6113, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0898430347442627, "rewards/margins": 0.8006587028503418, "rewards/rejected": -2.8905014991760254, "step": 9300 }, { "epoch": 1.2, "learning_rate": 3.3298269102036916e-07, "logits/chosen": -2.643718719482422, "logits/rejected": -2.564018726348877, "logps/chosen": -575.5028076171875, "logps/rejected": -425.767578125, "loss": 0.6015, "rewards/accuracies": 0.75, "rewards/chosen": -2.0172648429870605, "rewards/margins": 0.9273583292961121, "rewards/rejected": -2.9446232318878174, "step": 9310 }, { "epoch": 1.2, "learning_rate": 3.327436167160753e-07, "logits/chosen": -2.6470630168914795, "logits/rejected": -2.5656909942626953, "logps/chosen": -577.5350952148438, "logps/rejected": -464.36151123046875, "loss": 0.6374, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.173614025115967, "rewards/margins": 0.6899082064628601, "rewards/rejected": -2.8635222911834717, "step": 9320 }, { "epoch": 1.2, "learning_rate": 3.325045424117816e-07, "logits/chosen": -2.584991931915283, "logits/rejected": -2.5342936515808105, "logps/chosen": -505.0516662597656, "logps/rejected": -424.6084899902344, "loss": 0.6402, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.94863760471344, "rewards/margins": 0.5580220222473145, "rewards/rejected": -2.506659507751465, "step": 9330 }, { "epoch": 1.21, "learning_rate": 3.322654681074878e-07, "logits/chosen": -2.7289860248565674, "logits/rejected": -2.5403873920440674, "logps/chosen": -608.3770751953125, "logps/rejected": -418.22149658203125, "loss": 0.6043, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.20831298828125, "rewards/margins": 0.8100987672805786, "rewards/rejected": -3.0184121131896973, "step": 9340 }, { "epoch": 1.21, "learning_rate": 3.3202639380319405e-07, "logits/chosen": -2.5409672260284424, "logits/rejected": -2.5280659198760986, "logps/chosen": -545.6832275390625, "logps/rejected": -425.10302734375, "loss": 0.6542, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.224447250366211, "rewards/margins": 0.6403327584266663, "rewards/rejected": -2.8647799491882324, "step": 9350 }, { "epoch": 1.21, "learning_rate": 3.317873194989003e-07, "logits/chosen": -2.5691323280334473, "logits/rejected": -2.4512505531311035, "logps/chosen": -521.588623046875, "logps/rejected": -397.9109191894531, "loss": 0.7265, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.269090175628662, "rewards/margins": 0.5187544226646423, "rewards/rejected": -2.787844657897949, "step": 9360 }, { "epoch": 1.21, "learning_rate": 3.3154824519460647e-07, "logits/chosen": -2.7955524921417236, "logits/rejected": -2.66363525390625, "logps/chosen": -545.1548461914062, "logps/rejected": -397.63201904296875, "loss": 0.6687, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.2323763370513916, "rewards/margins": 0.5144761800765991, "rewards/rejected": -2.746852397918701, "step": 9370 }, { "epoch": 1.21, "learning_rate": 3.313091708903127e-07, "logits/chosen": -2.6430561542510986, "logits/rejected": -2.5552616119384766, "logps/chosen": -566.2005004882812, "logps/rejected": -453.593994140625, "loss": 0.7418, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.309573173522949, "rewards/margins": 0.42470306158065796, "rewards/rejected": -2.734276294708252, "step": 9380 }, { "epoch": 1.21, "learning_rate": 3.3107009658601894e-07, "logits/chosen": -2.714048385620117, "logits/rejected": -2.6350536346435547, "logps/chosen": -466.21466064453125, "logps/rejected": -378.10711669921875, "loss": 0.6777, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.1695950031280518, "rewards/margins": 0.6038080453872681, "rewards/rejected": -2.7734029293060303, "step": 9390 }, { "epoch": 1.21, "learning_rate": 3.308310222817252e-07, "logits/chosen": -2.747887134552002, "logits/rejected": -2.484941005706787, "logps/chosen": -633.3187255859375, "logps/rejected": -406.975341796875, "loss": 0.6724, "rewards/accuracies": 0.6875, "rewards/chosen": -2.4394469261169434, "rewards/margins": 0.6349649429321289, "rewards/rejected": -3.074411392211914, "step": 9400 }, { "epoch": 1.21, "learning_rate": 3.305919479774314e-07, "logits/chosen": -2.8015635013580322, "logits/rejected": -2.6130101680755615, "logps/chosen": -655.8599853515625, "logps/rejected": -471.1328125, "loss": 0.6284, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1075119972229004, "rewards/margins": 0.7079107165336609, "rewards/rejected": -2.815422773361206, "step": 9410 }, { "epoch": 1.22, "learning_rate": 3.303528736731376e-07, "logits/chosen": -2.7106571197509766, "logits/rejected": -2.6593661308288574, "logps/chosen": -576.8692016601562, "logps/rejected": -445.24609375, "loss": 0.6595, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1417791843414307, "rewards/margins": 0.5919339656829834, "rewards/rejected": -2.733712911605835, "step": 9420 }, { "epoch": 1.22, "learning_rate": 3.3011379936884383e-07, "logits/chosen": -2.645402431488037, "logits/rejected": -2.553274631500244, "logps/chosen": -585.381103515625, "logps/rejected": -440.69671630859375, "loss": 0.5395, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.125080108642578, "rewards/margins": 0.8061084747314453, "rewards/rejected": -2.9311885833740234, "step": 9430 }, { "epoch": 1.22, "learning_rate": 3.2987472506455007e-07, "logits/chosen": -2.6902060508728027, "logits/rejected": -2.555497169494629, "logps/chosen": -653.2166748046875, "logps/rejected": -496.04766845703125, "loss": 0.7153, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0503296852111816, "rewards/margins": 0.5039013624191284, "rewards/rejected": -2.5542304515838623, "step": 9440 }, { "epoch": 1.22, "learning_rate": 3.296356507602563e-07, "logits/chosen": -2.6476261615753174, "logits/rejected": -2.5681252479553223, "logps/chosen": -485.26824951171875, "logps/rejected": -389.0088806152344, "loss": 0.5589, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0241358280181885, "rewards/margins": 0.7126181721687317, "rewards/rejected": -2.7367539405822754, "step": 9450 }, { "epoch": 1.22, "learning_rate": 3.293965764559625e-07, "logits/chosen": -2.7239222526550293, "logits/rejected": -2.613083600997925, "logps/chosen": -588.3970336914062, "logps/rejected": -456.17333984375, "loss": 0.6807, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.3382904529571533, "rewards/margins": 0.6569923162460327, "rewards/rejected": -2.9952826499938965, "step": 9460 }, { "epoch": 1.22, "learning_rate": 3.291575021516687e-07, "logits/chosen": -2.728060245513916, "logits/rejected": -2.6265599727630615, "logps/chosen": -551.9659423828125, "logps/rejected": -443.9220275878906, "loss": 0.661, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.981200933456421, "rewards/margins": 0.5524669885635376, "rewards/rejected": -2.533668041229248, "step": 9470 }, { "epoch": 1.22, "learning_rate": 3.2891842784737496e-07, "logits/chosen": -2.5705831050872803, "logits/rejected": -2.475494861602783, "logps/chosen": -452.3827209472656, "logps/rejected": -372.6798400878906, "loss": 0.6074, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0650010108947754, "rewards/margins": 0.686158299446106, "rewards/rejected": -2.7511589527130127, "step": 9480 }, { "epoch": 1.23, "learning_rate": 3.286793535430812e-07, "logits/chosen": -2.677685260772705, "logits/rejected": -2.560645341873169, "logps/chosen": -631.1047973632812, "logps/rejected": -487.4044494628906, "loss": 0.6488, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.363598108291626, "rewards/margins": 0.6946107745170593, "rewards/rejected": -3.05820894241333, "step": 9490 }, { "epoch": 1.23, "learning_rate": 3.2844027923878743e-07, "logits/chosen": -2.7376391887664795, "logits/rejected": -2.6287403106689453, "logps/chosen": -639.5602416992188, "logps/rejected": -478.80816650390625, "loss": 0.6943, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.355435848236084, "rewards/margins": 0.6384981274604797, "rewards/rejected": -2.993934154510498, "step": 9500 }, { "epoch": 1.23, "learning_rate": 3.282012049344936e-07, "logits/chosen": -2.76405668258667, "logits/rejected": -2.60678768157959, "logps/chosen": -645.9998168945312, "logps/rejected": -413.5650329589844, "loss": 0.5696, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0568594932556152, "rewards/margins": 0.892564594745636, "rewards/rejected": -2.9494240283966064, "step": 9510 }, { "epoch": 1.23, "learning_rate": 3.2796213063019985e-07, "logits/chosen": -2.687678337097168, "logits/rejected": -2.5616018772125244, "logps/chosen": -594.0545654296875, "logps/rejected": -467.106689453125, "loss": 0.5689, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.108668565750122, "rewards/margins": 0.8446692228317261, "rewards/rejected": -2.9533379077911377, "step": 9520 }, { "epoch": 1.23, "learning_rate": 3.277230563259061e-07, "logits/chosen": -2.728590726852417, "logits/rejected": -2.6652324199676514, "logps/chosen": -527.3499755859375, "logps/rejected": -389.8302307128906, "loss": 0.6558, "rewards/accuracies": 0.625, "rewards/chosen": -2.119438648223877, "rewards/margins": 0.4907059669494629, "rewards/rejected": -2.61014461517334, "step": 9530 }, { "epoch": 1.23, "learning_rate": 3.274839820216123e-07, "logits/chosen": -2.7017760276794434, "logits/rejected": -2.5397276878356934, "logps/chosen": -553.5303344726562, "logps/rejected": -386.5821838378906, "loss": 0.5771, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.9529645442962646, "rewards/margins": 0.9106518626213074, "rewards/rejected": -2.863616466522217, "step": 9540 }, { "epoch": 1.23, "learning_rate": 3.2724490771731856e-07, "logits/chosen": -2.648372173309326, "logits/rejected": -2.5634584426879883, "logps/chosen": -528.8529052734375, "logps/rejected": -425.72015380859375, "loss": 0.6067, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.1390538215637207, "rewards/margins": 0.7615028619766235, "rewards/rejected": -2.900556802749634, "step": 9550 }, { "epoch": 1.23, "learning_rate": 3.2700583341302474e-07, "logits/chosen": -2.73722505569458, "logits/rejected": -2.552701711654663, "logps/chosen": -639.3427124023438, "logps/rejected": -469.19976806640625, "loss": 0.5115, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.7222576141357422, "rewards/margins": 1.0831111669540405, "rewards/rejected": -2.8053689002990723, "step": 9560 }, { "epoch": 1.24, "learning_rate": 3.26766759108731e-07, "logits/chosen": -2.738949775695801, "logits/rejected": -2.548448085784912, "logps/chosen": -557.432373046875, "logps/rejected": -414.75274658203125, "loss": 0.5615, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.296401023864746, "rewards/margins": 0.7167196273803711, "rewards/rejected": -3.013120651245117, "step": 9570 }, { "epoch": 1.24, "learning_rate": 3.265276848044372e-07, "logits/chosen": -2.7297747135162354, "logits/rejected": -2.6292154788970947, "logps/chosen": -499.04388427734375, "logps/rejected": -386.58477783203125, "loss": 0.502, "rewards/accuracies": 0.75, "rewards/chosen": -2.078878879547119, "rewards/margins": 0.8975990414619446, "rewards/rejected": -2.976477861404419, "step": 9580 }, { "epoch": 1.24, "learning_rate": 3.2628861050014345e-07, "logits/chosen": -2.6680619716644287, "logits/rejected": -2.622779369354248, "logps/chosen": -546.0547485351562, "logps/rejected": -494.8414001464844, "loss": 0.7816, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.2496209144592285, "rewards/margins": 0.18588955700397491, "rewards/rejected": -2.4355101585388184, "step": 9590 }, { "epoch": 1.24, "learning_rate": 3.2604953619584963e-07, "logits/chosen": -2.670281410217285, "logits/rejected": -2.6916775703430176, "logps/chosen": -459.103515625, "logps/rejected": -386.9423522949219, "loss": 0.55, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9232547283172607, "rewards/margins": 0.7715011835098267, "rewards/rejected": -2.694756031036377, "step": 9600 }, { "epoch": 1.24, "learning_rate": 3.2581046189155587e-07, "logits/chosen": -2.567711353302002, "logits/rejected": -2.5096628665924072, "logps/chosen": -531.0281372070312, "logps/rejected": -436.011474609375, "loss": 0.6049, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.207214832305908, "rewards/margins": 0.6991463899612427, "rewards/rejected": -2.9063611030578613, "step": 9610 }, { "epoch": 1.24, "learning_rate": 3.255713875872621e-07, "logits/chosen": -2.7253615856170654, "logits/rejected": -2.6292333602905273, "logps/chosen": -559.6708374023438, "logps/rejected": -464.5552673339844, "loss": 0.7405, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.085524320602417, "rewards/margins": 0.5022348761558533, "rewards/rejected": -2.587759494781494, "step": 9620 }, { "epoch": 1.24, "learning_rate": 3.2533231328296834e-07, "logits/chosen": -2.6560888290405273, "logits/rejected": -2.513685703277588, "logps/chosen": -560.55810546875, "logps/rejected": -404.9812316894531, "loss": 0.6442, "rewards/accuracies": 0.75, "rewards/chosen": -2.065347671508789, "rewards/margins": 0.7077101469039917, "rewards/rejected": -2.773057699203491, "step": 9630 }, { "epoch": 1.24, "learning_rate": 3.250932389786746e-07, "logits/chosen": -2.697622776031494, "logits/rejected": -2.5960748195648193, "logps/chosen": -556.9632568359375, "logps/rejected": -433.9429626464844, "loss": 0.6713, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.2506585121154785, "rewards/margins": 0.5418495535850525, "rewards/rejected": -2.7925078868865967, "step": 9640 }, { "epoch": 1.25, "learning_rate": 3.2485416467438076e-07, "logits/chosen": -2.7589826583862305, "logits/rejected": -2.7100372314453125, "logps/chosen": -576.3023681640625, "logps/rejected": -504.6283264160156, "loss": 0.7153, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.202892780303955, "rewards/margins": 0.5527589917182922, "rewards/rejected": -2.7556519508361816, "step": 9650 }, { "epoch": 1.25, "learning_rate": 3.24615090370087e-07, "logits/chosen": -2.6774234771728516, "logits/rejected": -2.6118829250335693, "logps/chosen": -468.18023681640625, "logps/rejected": -357.43988037109375, "loss": 0.6056, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9380528926849365, "rewards/margins": 0.7417635321617126, "rewards/rejected": -2.679816484451294, "step": 9660 }, { "epoch": 1.25, "learning_rate": 3.2437601606579323e-07, "logits/chosen": -2.73968505859375, "logits/rejected": -2.591549873352051, "logps/chosen": -599.1770629882812, "logps/rejected": -458.6421813964844, "loss": 0.5809, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0622308254241943, "rewards/margins": 0.6789888143539429, "rewards/rejected": -2.7412195205688477, "step": 9670 }, { "epoch": 1.25, "learning_rate": 3.2413694176149946e-07, "logits/chosen": -2.65706467628479, "logits/rejected": -2.571587085723877, "logps/chosen": -576.1657104492188, "logps/rejected": -471.5946350097656, "loss": 0.5154, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0237975120544434, "rewards/margins": 1.0667165517807007, "rewards/rejected": -3.0905144214630127, "step": 9680 }, { "epoch": 1.25, "learning_rate": 3.238978674572057e-07, "logits/chosen": -2.748629093170166, "logits/rejected": -2.5943045616149902, "logps/chosen": -680.77392578125, "logps/rejected": -494.15142822265625, "loss": 0.6572, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.3251960277557373, "rewards/margins": 0.7260445356369019, "rewards/rejected": -3.0512406826019287, "step": 9690 }, { "epoch": 1.25, "learning_rate": 3.236587931529119e-07, "logits/chosen": -2.6268718242645264, "logits/rejected": -2.575216770172119, "logps/chosen": -530.3544921875, "logps/rejected": -441.32476806640625, "loss": 0.521, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0849156379699707, "rewards/margins": 0.9704225659370422, "rewards/rejected": -3.0553383827209473, "step": 9700 }, { "epoch": 1.25, "learning_rate": 3.234197188486181e-07, "logits/chosen": -2.7330880165100098, "logits/rejected": -2.5626015663146973, "logps/chosen": -546.092529296875, "logps/rejected": -423.04486083984375, "loss": 0.7364, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.2205357551574707, "rewards/margins": 0.5399960875511169, "rewards/rejected": -2.7605319023132324, "step": 9710 }, { "epoch": 1.25, "learning_rate": 3.2318064454432436e-07, "logits/chosen": -2.6497936248779297, "logits/rejected": -2.576176881790161, "logps/chosen": -506.7890625, "logps/rejected": -431.9772033691406, "loss": 0.5843, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.038032293319702, "rewards/margins": 0.7449958920478821, "rewards/rejected": -2.7830283641815186, "step": 9720 }, { "epoch": 1.26, "learning_rate": 3.229415702400306e-07, "logits/chosen": -2.793416738510132, "logits/rejected": -2.561819076538086, "logps/chosen": -574.5917358398438, "logps/rejected": -420.05072021484375, "loss": 0.653, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1987884044647217, "rewards/margins": 0.5456705689430237, "rewards/rejected": -2.7444586753845215, "step": 9730 }, { "epoch": 1.26, "learning_rate": 3.227024959357368e-07, "logits/chosen": -2.8103299140930176, "logits/rejected": -2.720663070678711, "logps/chosen": -633.3172607421875, "logps/rejected": -503.0668029785156, "loss": 0.6283, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.299976110458374, "rewards/margins": 0.5824490785598755, "rewards/rejected": -2.88242506980896, "step": 9740 }, { "epoch": 1.26, "learning_rate": 3.22463421631443e-07, "logits/chosen": -2.7694177627563477, "logits/rejected": -2.6415390968322754, "logps/chosen": -605.5657958984375, "logps/rejected": -469.2818908691406, "loss": 0.5597, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1363964080810547, "rewards/margins": 1.0054841041564941, "rewards/rejected": -3.141880512237549, "step": 9750 }, { "epoch": 1.26, "learning_rate": 3.2222434732714925e-07, "logits/chosen": -2.8299193382263184, "logits/rejected": -2.7401487827301025, "logps/chosen": -572.8629150390625, "logps/rejected": -446.9300842285156, "loss": 0.5077, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.839870810508728, "rewards/margins": 1.0640804767608643, "rewards/rejected": -2.903951406478882, "step": 9760 }, { "epoch": 1.26, "learning_rate": 3.219852730228555e-07, "logits/chosen": -2.7814602851867676, "logits/rejected": -2.6948704719543457, "logps/chosen": -566.3734130859375, "logps/rejected": -460.9817810058594, "loss": 0.6348, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0052618980407715, "rewards/margins": 0.7159797549247742, "rewards/rejected": -2.7212417125701904, "step": 9770 }, { "epoch": 1.26, "learning_rate": 3.217461987185617e-07, "logits/chosen": -2.6977243423461914, "logits/rejected": -2.556387424468994, "logps/chosen": -572.3264770507812, "logps/rejected": -435.9730529785156, "loss": 0.5162, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.123786449432373, "rewards/margins": 0.9562602043151855, "rewards/rejected": -3.0800468921661377, "step": 9780 }, { "epoch": 1.26, "learning_rate": 3.215071244142679e-07, "logits/chosen": -2.738538980484009, "logits/rejected": -2.6438183784484863, "logps/chosen": -604.4803466796875, "logps/rejected": -470.4649353027344, "loss": 0.6084, "rewards/accuracies": 0.6875, "rewards/chosen": -2.123140335083008, "rewards/margins": 0.7832401394844055, "rewards/rejected": -2.9063804149627686, "step": 9790 }, { "epoch": 1.27, "learning_rate": 3.2126805010997414e-07, "logits/chosen": -2.716648578643799, "logits/rejected": -2.5803520679473877, "logps/chosen": -528.1575317382812, "logps/rejected": -412.1150817871094, "loss": 0.6054, "rewards/accuracies": 0.625, "rewards/chosen": -2.2297005653381348, "rewards/margins": 0.7479394674301147, "rewards/rejected": -2.977640151977539, "step": 9800 }, { "epoch": 1.27, "learning_rate": 3.2102897580568037e-07, "logits/chosen": -2.752058744430542, "logits/rejected": -2.5989434719085693, "logps/chosen": -585.6206665039062, "logps/rejected": -500.41326904296875, "loss": 0.5754, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.3788866996765137, "rewards/margins": 0.845761775970459, "rewards/rejected": -3.2246482372283936, "step": 9810 }, { "epoch": 1.27, "learning_rate": 3.207899015013866e-07, "logits/chosen": -2.707167625427246, "logits/rejected": -2.5792205333709717, "logps/chosen": -502.87225341796875, "logps/rejected": -394.7745666503906, "loss": 0.7104, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.3096299171447754, "rewards/margins": 0.5030425786972046, "rewards/rejected": -2.8126723766326904, "step": 9820 }, { "epoch": 1.27, "learning_rate": 3.2055082719709284e-07, "logits/chosen": -2.7003891468048096, "logits/rejected": -2.6718380451202393, "logps/chosen": -584.7938842773438, "logps/rejected": -497.08416748046875, "loss": 0.4538, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9138052463531494, "rewards/margins": 1.2024463415145874, "rewards/rejected": -3.1162517070770264, "step": 9830 }, { "epoch": 1.27, "learning_rate": 3.2031175289279903e-07, "logits/chosen": -2.712559938430786, "logits/rejected": -2.649782180786133, "logps/chosen": -531.6632080078125, "logps/rejected": -463.66876220703125, "loss": 0.5361, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1909968852996826, "rewards/margins": 0.7706430554389954, "rewards/rejected": -2.961639881134033, "step": 9840 }, { "epoch": 1.27, "learning_rate": 3.2007267858850526e-07, "logits/chosen": -2.7236621379852295, "logits/rejected": -2.6199772357940674, "logps/chosen": -523.21728515625, "logps/rejected": -425.61187744140625, "loss": 0.6223, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.154067039489746, "rewards/margins": 0.6573672294616699, "rewards/rejected": -2.811434268951416, "step": 9850 }, { "epoch": 1.27, "learning_rate": 3.198336042842115e-07, "logits/chosen": -2.7226505279541016, "logits/rejected": -2.712618350982666, "logps/chosen": -569.1441650390625, "logps/rejected": -528.0308227539062, "loss": 0.7984, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.485469341278076, "rewards/margins": 0.2371480017900467, "rewards/rejected": -2.7226173877716064, "step": 9860 }, { "epoch": 1.27, "learning_rate": 3.1959452997991774e-07, "logits/chosen": -2.656946897506714, "logits/rejected": -2.569685697555542, "logps/chosen": -557.3668212890625, "logps/rejected": -470.60491943359375, "loss": 0.6107, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2463440895080566, "rewards/margins": 0.8239313364028931, "rewards/rejected": -3.07027530670166, "step": 9870 }, { "epoch": 1.28, "learning_rate": 3.1935545567562397e-07, "logits/chosen": -2.6833903789520264, "logits/rejected": -2.6236863136291504, "logps/chosen": -547.2039794921875, "logps/rejected": -463.2782287597656, "loss": 0.6599, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.232163667678833, "rewards/margins": 0.5967535376548767, "rewards/rejected": -2.8289170265197754, "step": 9880 }, { "epoch": 1.28, "learning_rate": 3.1911638137133015e-07, "logits/chosen": -2.6396536827087402, "logits/rejected": -2.595259189605713, "logps/chosen": -569.897216796875, "logps/rejected": -457.96502685546875, "loss": 0.6599, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.0468544960021973, "rewards/margins": 0.7000446915626526, "rewards/rejected": -2.746899366378784, "step": 9890 }, { "epoch": 1.28, "learning_rate": 3.188773070670364e-07, "logits/chosen": -2.709758758544922, "logits/rejected": -2.5768091678619385, "logps/chosen": -614.1597900390625, "logps/rejected": -427.79742431640625, "loss": 0.6129, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1472008228302, "rewards/margins": 0.8317820429801941, "rewards/rejected": -2.978982448577881, "step": 9900 }, { "epoch": 1.28, "learning_rate": 3.186382327627427e-07, "logits/chosen": -2.7397725582122803, "logits/rejected": -2.5060982704162598, "logps/chosen": -571.8486938476562, "logps/rejected": -369.94287109375, "loss": 0.6595, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.244694232940674, "rewards/margins": 0.6633360385894775, "rewards/rejected": -2.9080300331115723, "step": 9910 }, { "epoch": 1.28, "learning_rate": 3.183991584584489e-07, "logits/chosen": -2.7950005531311035, "logits/rejected": -2.699486017227173, "logps/chosen": -531.2218627929688, "logps/rejected": -433.81781005859375, "loss": 0.6152, "rewards/accuracies": 0.625, "rewards/chosen": -1.9368892908096313, "rewards/margins": 0.7390422821044922, "rewards/rejected": -2.675931453704834, "step": 9920 }, { "epoch": 1.28, "learning_rate": 3.181600841541551e-07, "logits/chosen": -2.6077630519866943, "logits/rejected": -2.5336122512817383, "logps/chosen": -605.49267578125, "logps/rejected": -450.9217224121094, "loss": 0.6176, "rewards/accuracies": 0.6875, "rewards/chosen": -2.02937650680542, "rewards/margins": 0.8662470579147339, "rewards/rejected": -2.8956236839294434, "step": 9930 }, { "epoch": 1.28, "learning_rate": 3.1792100984986133e-07, "logits/chosen": -2.763308048248291, "logits/rejected": -2.631863832473755, "logps/chosen": -649.1085205078125, "logps/rejected": -450.98065185546875, "loss": 0.5862, "rewards/accuracies": 0.6875, "rewards/chosen": -2.130091905593872, "rewards/margins": 0.833636462688446, "rewards/rejected": -2.963728427886963, "step": 9940 }, { "epoch": 1.28, "learning_rate": 3.1768193554556757e-07, "logits/chosen": -2.8156745433807373, "logits/rejected": -2.624336004257202, "logps/chosen": -540.9461669921875, "logps/rejected": -402.19586181640625, "loss": 0.6668, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0573184490203857, "rewards/margins": 0.6373929381370544, "rewards/rejected": -2.694711446762085, "step": 9950 }, { "epoch": 1.29, "learning_rate": 3.174428612412738e-07, "logits/chosen": -2.6854846477508545, "logits/rejected": -2.5827956199645996, "logps/chosen": -491.09033203125, "logps/rejected": -385.75347900390625, "loss": 0.7313, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.1934821605682373, "rewards/margins": 0.37395498156547546, "rewards/rejected": -2.567437171936035, "step": 9960 }, { "epoch": 1.29, "learning_rate": 3.1720378693698004e-07, "logits/chosen": -2.6296286582946777, "logits/rejected": -2.5168895721435547, "logps/chosen": -529.33447265625, "logps/rejected": -447.7083435058594, "loss": 0.6827, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.129586696624756, "rewards/margins": 0.5520305633544922, "rewards/rejected": -2.681617259979248, "step": 9970 }, { "epoch": 1.29, "learning_rate": 3.169647126326862e-07, "logits/chosen": -2.719338893890381, "logits/rejected": -2.533857583999634, "logps/chosen": -638.770263671875, "logps/rejected": -510.25494384765625, "loss": 0.5654, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.087991237640381, "rewards/margins": 0.8503351211547852, "rewards/rejected": -2.938326358795166, "step": 9980 }, { "epoch": 1.29, "learning_rate": 3.1672563832839246e-07, "logits/chosen": -2.7548184394836426, "logits/rejected": -2.6037192344665527, "logps/chosen": -575.9880981445312, "logps/rejected": -491.7601623535156, "loss": 0.6368, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.209806203842163, "rewards/margins": 0.6302379369735718, "rewards/rejected": -2.8400442600250244, "step": 9990 }, { "epoch": 1.29, "learning_rate": 3.164865640240987e-07, "logits/chosen": -2.643841505050659, "logits/rejected": -2.5675501823425293, "logps/chosen": -449.1241760253906, "logps/rejected": -365.3070373535156, "loss": 0.5921, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.121131420135498, "rewards/margins": 0.6771448850631714, "rewards/rejected": -2.79827618598938, "step": 10000 }, { "epoch": 1.29, "eval_logits/chosen": -3.053816795349121, "eval_logits/rejected": -2.990481376647949, "eval_logps/chosen": -541.5154418945312, "eval_logps/rejected": -421.04156494140625, "eval_loss": 0.6201235055923462, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -0.9695709943771362, "eval_rewards/margins": 1.0417231321334839, "eval_rewards/rejected": -2.01129412651062, "eval_runtime": 278.1831, "eval_samples_per_second": 7.19, "eval_steps_per_second": 3.595, "step": 10000 }, { "epoch": 1.29, "learning_rate": 3.1624748971980493e-07, "logits/chosen": -2.7624897956848145, "logits/rejected": -2.6279239654541016, "logps/chosen": -676.6697387695312, "logps/rejected": -539.3289794921875, "loss": 0.6542, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.1224255561828613, "rewards/margins": 0.7078044414520264, "rewards/rejected": -2.8302295207977295, "step": 10010 }, { "epoch": 1.29, "learning_rate": 3.1600841541551117e-07, "logits/chosen": -2.746706962585449, "logits/rejected": -2.6854496002197266, "logps/chosen": -596.2918701171875, "logps/rejected": -484.6265563964844, "loss": 0.6903, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.3214240074157715, "rewards/margins": 0.5043527483940125, "rewards/rejected": -2.8257768154144287, "step": 10020 }, { "epoch": 1.29, "learning_rate": 3.1576934111121735e-07, "logits/chosen": -2.83534574508667, "logits/rejected": -2.759265422821045, "logps/chosen": -530.4229736328125, "logps/rejected": -418.33343505859375, "loss": 0.6714, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.133078098297119, "rewards/margins": 0.4180684983730316, "rewards/rejected": -2.5511467456817627, "step": 10030 }, { "epoch": 1.3, "learning_rate": 3.155302668069236e-07, "logits/chosen": -2.758915662765503, "logits/rejected": -2.7129197120666504, "logps/chosen": -473.7384338378906, "logps/rejected": -368.53564453125, "loss": 0.5912, "rewards/accuracies": 0.6875, "rewards/chosen": -2.090153932571411, "rewards/margins": 0.6507670283317566, "rewards/rejected": -2.7409212589263916, "step": 10040 }, { "epoch": 1.3, "learning_rate": 3.152911925026298e-07, "logits/chosen": -2.721925973892212, "logits/rejected": -2.642956256866455, "logps/chosen": -497.6317443847656, "logps/rejected": -449.35992431640625, "loss": 0.6622, "rewards/accuracies": 0.625, "rewards/chosen": -2.1382555961608887, "rewards/margins": 0.6268779039382935, "rewards/rejected": -2.7651336193084717, "step": 10050 }, { "epoch": 1.3, "learning_rate": 3.1505211819833606e-07, "logits/chosen": -2.762460231781006, "logits/rejected": -2.6736905574798584, "logps/chosen": -473.16107177734375, "logps/rejected": -375.0450744628906, "loss": 0.6537, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.1965432167053223, "rewards/margins": 0.5383942723274231, "rewards/rejected": -2.7349371910095215, "step": 10060 }, { "epoch": 1.3, "learning_rate": 3.1481304389404224e-07, "logits/chosen": -2.690469741821289, "logits/rejected": -2.6076226234436035, "logps/chosen": -541.044677734375, "logps/rejected": -423.04449462890625, "loss": 0.7603, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.3428282737731934, "rewards/margins": 0.5049716830253601, "rewards/rejected": -2.8478000164031982, "step": 10070 }, { "epoch": 1.3, "learning_rate": 3.145739695897485e-07, "logits/chosen": -2.678448438644409, "logits/rejected": -2.6643810272216797, "logps/chosen": -386.8894958496094, "logps/rejected": -358.72515869140625, "loss": 0.6163, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1258459091186523, "rewards/margins": 0.5675450563430786, "rewards/rejected": -2.6933908462524414, "step": 10080 }, { "epoch": 1.3, "learning_rate": 3.143348952854547e-07, "logits/chosen": -2.754441499710083, "logits/rejected": -2.6892168521881104, "logps/chosen": -415.3655700683594, "logps/rejected": -393.566162109375, "loss": 0.6162, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1535115242004395, "rewards/margins": 0.5386825799942017, "rewards/rejected": -2.6921942234039307, "step": 10090 }, { "epoch": 1.3, "learning_rate": 3.1409582098116095e-07, "logits/chosen": -2.72291898727417, "logits/rejected": -2.6887459754943848, "logps/chosen": -487.4955139160156, "logps/rejected": -419.38201904296875, "loss": 0.5, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9749311208724976, "rewards/margins": 1.0213420391082764, "rewards/rejected": -2.9962730407714844, "step": 10100 }, { "epoch": 1.31, "learning_rate": 3.138567466768672e-07, "logits/chosen": -2.736111879348755, "logits/rejected": -2.643156051635742, "logps/chosen": -523.560546875, "logps/rejected": -422.7015075683594, "loss": 0.6366, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.202235460281372, "rewards/margins": 0.7062669396400452, "rewards/rejected": -2.9085021018981934, "step": 10110 }, { "epoch": 1.31, "learning_rate": 3.1361767237257337e-07, "logits/chosen": -2.8174920082092285, "logits/rejected": -2.5882086753845215, "logps/chosen": -591.3258666992188, "logps/rejected": -441.00250244140625, "loss": 0.6904, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.4145781993865967, "rewards/margins": 0.5760143995285034, "rewards/rejected": -2.9905924797058105, "step": 10120 }, { "epoch": 1.31, "learning_rate": 3.133785980682796e-07, "logits/chosen": -2.6497766971588135, "logits/rejected": -2.584366798400879, "logps/chosen": -521.6788940429688, "logps/rejected": -413.5623474121094, "loss": 0.6041, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.074284553527832, "rewards/margins": 0.8720687031745911, "rewards/rejected": -2.9463531970977783, "step": 10130 }, { "epoch": 1.31, "learning_rate": 3.1313952376398584e-07, "logits/chosen": -2.690887928009033, "logits/rejected": -2.598468780517578, "logps/chosen": -463.8199157714844, "logps/rejected": -410.5873107910156, "loss": 0.6012, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.157554864883423, "rewards/margins": 0.578691840171814, "rewards/rejected": -2.7362473011016846, "step": 10140 }, { "epoch": 1.31, "learning_rate": 3.129004494596921e-07, "logits/chosen": -2.773575782775879, "logits/rejected": -2.5145413875579834, "logps/chosen": -550.9229736328125, "logps/rejected": -345.9684753417969, "loss": 0.599, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2188587188720703, "rewards/margins": 0.6470907926559448, "rewards/rejected": -2.8659493923187256, "step": 10150 }, { "epoch": 1.31, "learning_rate": 3.126613751553983e-07, "logits/chosen": -2.7326884269714355, "logits/rejected": -2.565549373626709, "logps/chosen": -502.4989318847656, "logps/rejected": -406.94610595703125, "loss": 0.692, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.230393886566162, "rewards/margins": 0.6710303425788879, "rewards/rejected": -2.9014244079589844, "step": 10160 }, { "epoch": 1.31, "learning_rate": 3.124223008511045e-07, "logits/chosen": -2.7730050086975098, "logits/rejected": -2.636734962463379, "logps/chosen": -544.1104736328125, "logps/rejected": -386.14031982421875, "loss": 0.5965, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.186814308166504, "rewards/margins": 0.709342896938324, "rewards/rejected": -2.8961572647094727, "step": 10170 }, { "epoch": 1.31, "learning_rate": 3.1218322654681073e-07, "logits/chosen": -2.662792682647705, "logits/rejected": -2.511443614959717, "logps/chosen": -478.7613220214844, "logps/rejected": -385.28607177734375, "loss": 0.6261, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.031578540802002, "rewards/margins": 0.6287914514541626, "rewards/rejected": -2.660370111465454, "step": 10180 }, { "epoch": 1.32, "learning_rate": 3.1194415224251697e-07, "logits/chosen": -2.627713441848755, "logits/rejected": -2.5748000144958496, "logps/chosen": -531.8638916015625, "logps/rejected": -382.9067077636719, "loss": 0.7943, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.2156214714050293, "rewards/margins": 0.2895192503929138, "rewards/rejected": -2.505140542984009, "step": 10190 }, { "epoch": 1.32, "learning_rate": 3.117050779382232e-07, "logits/chosen": -2.7746469974517822, "logits/rejected": -2.7179150581359863, "logps/chosen": -541.4010009765625, "logps/rejected": -400.1529541015625, "loss": 0.6625, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.0009264945983887, "rewards/margins": 0.7140669822692871, "rewards/rejected": -2.714993715286255, "step": 10200 }, { "epoch": 1.32, "learning_rate": 3.114660036339294e-07, "logits/chosen": -2.736293077468872, "logits/rejected": -2.600982427597046, "logps/chosen": -610.0956420898438, "logps/rejected": -464.8710021972656, "loss": 0.7561, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.5252251625061035, "rewards/margins": 0.2747662663459778, "rewards/rejected": -2.7999911308288574, "step": 10210 }, { "epoch": 1.32, "learning_rate": 3.112269293296356e-07, "logits/chosen": -2.7739977836608887, "logits/rejected": -2.5401084423065186, "logps/chosen": -589.1558837890625, "logps/rejected": -396.37200927734375, "loss": 0.8295, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.4176878929138184, "rewards/margins": 0.2760433554649353, "rewards/rejected": -2.6937310695648193, "step": 10220 }, { "epoch": 1.32, "learning_rate": 3.1098785502534186e-07, "logits/chosen": -2.65443754196167, "logits/rejected": -2.6124677658081055, "logps/chosen": -528.66015625, "logps/rejected": -450.4681091308594, "loss": 0.5996, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9966280460357666, "rewards/margins": 0.8149410486221313, "rewards/rejected": -2.8115687370300293, "step": 10230 }, { "epoch": 1.32, "learning_rate": 3.107487807210481e-07, "logits/chosen": -2.711016893386841, "logits/rejected": -2.6515729427337646, "logps/chosen": -540.6109619140625, "logps/rejected": -407.7076416015625, "loss": 0.6095, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.9697033166885376, "rewards/margins": 0.9809345006942749, "rewards/rejected": -2.9506375789642334, "step": 10240 }, { "epoch": 1.32, "learning_rate": 3.1050970641675433e-07, "logits/chosen": -2.7089009284973145, "logits/rejected": -2.5575947761535645, "logps/chosen": -588.1746826171875, "logps/rejected": -451.74554443359375, "loss": 0.6085, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.115772247314453, "rewards/margins": 0.6505261659622192, "rewards/rejected": -2.766298294067383, "step": 10250 }, { "epoch": 1.32, "learning_rate": 3.102706321124605e-07, "logits/chosen": -2.7325375080108643, "logits/rejected": -2.6823999881744385, "logps/chosen": -496.67230224609375, "logps/rejected": -437.68341064453125, "loss": 0.632, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.0494606494903564, "rewards/margins": 0.6825860142707825, "rewards/rejected": -2.732046604156494, "step": 10260 }, { "epoch": 1.33, "learning_rate": 3.1003155780816675e-07, "logits/chosen": -2.71950101852417, "logits/rejected": -2.5174355506896973, "logps/chosen": -647.4984130859375, "logps/rejected": -414.5377502441406, "loss": 0.6506, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.3513944149017334, "rewards/margins": 0.6826199293136597, "rewards/rejected": -3.0340147018432617, "step": 10270 }, { "epoch": 1.33, "learning_rate": 3.09792483503873e-07, "logits/chosen": -2.657820224761963, "logits/rejected": -2.6046993732452393, "logps/chosen": -493.2801208496094, "logps/rejected": -439.2547302246094, "loss": 0.598, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0008914470672607, "rewards/margins": 0.651240348815918, "rewards/rejected": -2.6521317958831787, "step": 10280 }, { "epoch": 1.33, "learning_rate": 3.095534091995792e-07, "logits/chosen": -2.7148590087890625, "logits/rejected": -2.565324544906616, "logps/chosen": -603.841552734375, "logps/rejected": -413.0560607910156, "loss": 0.6334, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.439845323562622, "rewards/margins": 0.5489651560783386, "rewards/rejected": -2.9888105392456055, "step": 10290 }, { "epoch": 1.33, "learning_rate": 3.0931433489528546e-07, "logits/chosen": -2.653470993041992, "logits/rejected": -2.524681568145752, "logps/chosen": -547.7688598632812, "logps/rejected": -477.45050048828125, "loss": 0.6551, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.306295394897461, "rewards/margins": 0.4740130305290222, "rewards/rejected": -2.780308723449707, "step": 10300 }, { "epoch": 1.33, "learning_rate": 3.0907526059099164e-07, "logits/chosen": -2.7368381023406982, "logits/rejected": -2.568673610687256, "logps/chosen": -493.3605041503906, "logps/rejected": -399.4215393066406, "loss": 0.5845, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2872161865234375, "rewards/margins": 0.7100191116333008, "rewards/rejected": -2.9972355365753174, "step": 10310 }, { "epoch": 1.33, "learning_rate": 3.088361862866979e-07, "logits/chosen": -2.8279128074645996, "logits/rejected": -2.576908826828003, "logps/chosen": -577.4457397460938, "logps/rejected": -412.4205017089844, "loss": 0.5318, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.193276882171631, "rewards/margins": 0.9867523312568665, "rewards/rejected": -3.1800293922424316, "step": 10320 }, { "epoch": 1.33, "learning_rate": 3.085971119824041e-07, "logits/chosen": -2.6947720050811768, "logits/rejected": -2.5639681816101074, "logps/chosen": -520.9155883789062, "logps/rejected": -434.5235900878906, "loss": 0.6017, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.8997156620025635, "rewards/margins": 0.8187553286552429, "rewards/rejected": -2.718471050262451, "step": 10330 }, { "epoch": 1.33, "learning_rate": 3.0835803767811035e-07, "logits/chosen": -2.693765163421631, "logits/rejected": -2.590970039367676, "logps/chosen": -492.31103515625, "logps/rejected": -373.7989196777344, "loss": 0.626, "rewards/accuracies": 0.625, "rewards/chosen": -2.118722677230835, "rewards/margins": 0.5344741940498352, "rewards/rejected": -2.6531968116760254, "step": 10340 }, { "epoch": 1.34, "learning_rate": 3.0811896337381653e-07, "logits/chosen": -2.723686933517456, "logits/rejected": -2.5741991996765137, "logps/chosen": -577.5592041015625, "logps/rejected": -407.60650634765625, "loss": 0.547, "rewards/accuracies": 0.75, "rewards/chosen": -2.1274020671844482, "rewards/margins": 0.8658388257026672, "rewards/rejected": -2.9932408332824707, "step": 10350 }, { "epoch": 1.34, "learning_rate": 3.0787988906952277e-07, "logits/chosen": -2.797607421875, "logits/rejected": -2.690403461456299, "logps/chosen": -482.43121337890625, "logps/rejected": -379.7496032714844, "loss": 0.7521, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.3570587635040283, "rewards/margins": 0.3162350058555603, "rewards/rejected": -2.6732938289642334, "step": 10360 }, { "epoch": 1.34, "learning_rate": 3.07640814765229e-07, "logits/chosen": -2.704955816268921, "logits/rejected": -2.5119309425354004, "logps/chosen": -615.12158203125, "logps/rejected": -411.75909423828125, "loss": 0.6964, "rewards/accuracies": 0.625, "rewards/chosen": -2.4788565635681152, "rewards/margins": 0.531213104724884, "rewards/rejected": -3.0100693702697754, "step": 10370 }, { "epoch": 1.34, "learning_rate": 3.0740174046093524e-07, "logits/chosen": -2.7056219577789307, "logits/rejected": -2.6637730598449707, "logps/chosen": -643.3690795898438, "logps/rejected": -563.8370361328125, "loss": 0.5324, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2350316047668457, "rewards/margins": 1.0305255651474, "rewards/rejected": -3.265556812286377, "step": 10380 }, { "epoch": 1.34, "learning_rate": 3.071626661566415e-07, "logits/chosen": -2.69566011428833, "logits/rejected": -2.547212600708008, "logps/chosen": -589.8518676757812, "logps/rejected": -422.83880615234375, "loss": 0.521, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.8516613245010376, "rewards/margins": 1.045674443244934, "rewards/rejected": -2.8973355293273926, "step": 10390 }, { "epoch": 1.34, "learning_rate": 3.0692359185234766e-07, "logits/chosen": -2.720262050628662, "logits/rejected": -2.6369271278381348, "logps/chosen": -593.18017578125, "logps/rejected": -457.751953125, "loss": 0.5074, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9869701862335205, "rewards/margins": 1.0158064365386963, "rewards/rejected": -3.0027763843536377, "step": 10400 }, { "epoch": 1.34, "learning_rate": 3.066845175480539e-07, "logits/chosen": -2.615575075149536, "logits/rejected": -2.501734495162964, "logps/chosen": -543.7977294921875, "logps/rejected": -424.1134338378906, "loss": 0.5545, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.095268726348877, "rewards/margins": 0.8240516781806946, "rewards/rejected": -2.9193203449249268, "step": 10410 }, { "epoch": 1.35, "learning_rate": 3.0644544324376013e-07, "logits/chosen": -2.737541675567627, "logits/rejected": -2.725250720977783, "logps/chosen": -539.9990234375, "logps/rejected": -464.6868591308594, "loss": 0.5248, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.917601227760315, "rewards/margins": 1.016849160194397, "rewards/rejected": -2.934450387954712, "step": 10420 }, { "epoch": 1.35, "learning_rate": 3.0620636893946636e-07, "logits/chosen": -2.7055883407592773, "logits/rejected": -2.5937256813049316, "logps/chosen": -465.0992736816406, "logps/rejected": -374.6890563964844, "loss": 0.6369, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0429625511169434, "rewards/margins": 0.6005575060844421, "rewards/rejected": -2.6435201168060303, "step": 10430 }, { "epoch": 1.35, "learning_rate": 3.059672946351726e-07, "logits/chosen": -2.684584140777588, "logits/rejected": -2.601071834564209, "logps/chosen": -583.7686767578125, "logps/rejected": -482.2657775878906, "loss": 0.6917, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2534735202789307, "rewards/margins": 0.7246580123901367, "rewards/rejected": -2.9781317710876465, "step": 10440 }, { "epoch": 1.35, "learning_rate": 3.057282203308788e-07, "logits/chosen": -2.743751049041748, "logits/rejected": -2.645473003387451, "logps/chosen": -538.7210693359375, "logps/rejected": -401.7384948730469, "loss": 0.5505, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.2239060401916504, "rewards/margins": 0.7271556258201599, "rewards/rejected": -2.951061725616455, "step": 10450 }, { "epoch": 1.35, "learning_rate": 3.05489146026585e-07, "logits/chosen": -2.788482666015625, "logits/rejected": -2.6810214519500732, "logps/chosen": -591.0159912109375, "logps/rejected": -454.6720275878906, "loss": 0.4921, "rewards/accuracies": 0.75, "rewards/chosen": -2.173088788986206, "rewards/margins": 1.0209341049194336, "rewards/rejected": -3.1940231323242188, "step": 10460 }, { "epoch": 1.35, "learning_rate": 3.0525007172229126e-07, "logits/chosen": -2.7154977321624756, "logits/rejected": -2.434347629547119, "logps/chosen": -549.8165283203125, "logps/rejected": -319.9741516113281, "loss": 0.6725, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.137228488922119, "rewards/margins": 0.4663693904876709, "rewards/rejected": -2.603597640991211, "step": 10470 }, { "epoch": 1.35, "learning_rate": 3.050109974179975e-07, "logits/chosen": -2.855210781097412, "logits/rejected": -2.654160976409912, "logps/chosen": -635.1910400390625, "logps/rejected": -443.91943359375, "loss": 0.6072, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1489319801330566, "rewards/margins": 0.881773829460144, "rewards/rejected": -3.0307059288024902, "step": 10480 }, { "epoch": 1.35, "learning_rate": 3.047719231137037e-07, "logits/chosen": -2.7116103172302246, "logits/rejected": -2.560417890548706, "logps/chosen": -558.283203125, "logps/rejected": -388.75042724609375, "loss": 0.5152, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9846107959747314, "rewards/margins": 0.9460541009902954, "rewards/rejected": -2.9306650161743164, "step": 10490 }, { "epoch": 1.36, "learning_rate": 3.0453284880940996e-07, "logits/chosen": -2.618685245513916, "logits/rejected": -2.549915075302124, "logps/chosen": -542.04541015625, "logps/rejected": -466.70721435546875, "loss": 0.6212, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1257236003875732, "rewards/margins": 0.7524712085723877, "rewards/rejected": -2.878194808959961, "step": 10500 }, { "epoch": 1.36, "learning_rate": 3.042937745051162e-07, "logits/chosen": -2.684706687927246, "logits/rejected": -2.6089723110198975, "logps/chosen": -549.6314697265625, "logps/rejected": -435.5603942871094, "loss": 0.618, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0412943363189697, "rewards/margins": 0.693703293800354, "rewards/rejected": -2.734997510910034, "step": 10510 }, { "epoch": 1.36, "learning_rate": 3.0405470020082243e-07, "logits/chosen": -2.6646504402160645, "logits/rejected": -2.55857515335083, "logps/chosen": -492.1932678222656, "logps/rejected": -397.1830139160156, "loss": 0.6195, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1146953105926514, "rewards/margins": 0.7341774702072144, "rewards/rejected": -2.8488729000091553, "step": 10520 }, { "epoch": 1.36, "learning_rate": 3.0381562589652867e-07, "logits/chosen": -2.7234880924224854, "logits/rejected": -2.5673398971557617, "logps/chosen": -596.4129638671875, "logps/rejected": -485.886474609375, "loss": 0.6561, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.9006855487823486, "rewards/margins": 0.7315076589584351, "rewards/rejected": -2.632193088531494, "step": 10530 }, { "epoch": 1.36, "learning_rate": 3.0357655159223485e-07, "logits/chosen": -2.6943092346191406, "logits/rejected": -2.5618162155151367, "logps/chosen": -550.35498046875, "logps/rejected": -366.1949768066406, "loss": 0.6092, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.087996482849121, "rewards/margins": 0.7218301296234131, "rewards/rejected": -2.8098270893096924, "step": 10540 }, { "epoch": 1.36, "learning_rate": 3.033374772879411e-07, "logits/chosen": -2.7945382595062256, "logits/rejected": -2.6078972816467285, "logps/chosen": -586.1735229492188, "logps/rejected": -429.45050048828125, "loss": 0.5777, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2220091819763184, "rewards/margins": 0.8514703512191772, "rewards/rejected": -3.0734798908233643, "step": 10550 }, { "epoch": 1.36, "learning_rate": 3.030984029836473e-07, "logits/chosen": -2.716857671737671, "logits/rejected": -2.7100443840026855, "logps/chosen": -464.3146057128906, "logps/rejected": -407.0299987792969, "loss": 0.534, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9629037380218506, "rewards/margins": 0.8245373964309692, "rewards/rejected": -2.7874410152435303, "step": 10560 }, { "epoch": 1.36, "learning_rate": 3.0285932867935356e-07, "logits/chosen": -2.6882541179656982, "logits/rejected": -2.4634857177734375, "logps/chosen": -634.8557739257812, "logps/rejected": -466.50067138671875, "loss": 0.7059, "rewards/accuracies": 0.6875, "rewards/chosen": -2.3789031505584717, "rewards/margins": 0.6259651184082031, "rewards/rejected": -3.004868507385254, "step": 10570 }, { "epoch": 1.37, "learning_rate": 3.026202543750598e-07, "logits/chosen": -2.6627869606018066, "logits/rejected": -2.5285048484802246, "logps/chosen": -562.4306640625, "logps/rejected": -453.9371032714844, "loss": 0.6209, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1898226737976074, "rewards/margins": 0.7496984601020813, "rewards/rejected": -2.939521312713623, "step": 10580 }, { "epoch": 1.37, "learning_rate": 3.02381180070766e-07, "logits/chosen": -2.7695839405059814, "logits/rejected": -2.667783260345459, "logps/chosen": -497.55938720703125, "logps/rejected": -374.46270751953125, "loss": 0.664, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.129323720932007, "rewards/margins": 0.6752163767814636, "rewards/rejected": -2.8045403957366943, "step": 10590 }, { "epoch": 1.37, "learning_rate": 3.021421057664722e-07, "logits/chosen": -2.727307081222534, "logits/rejected": -2.598890542984009, "logps/chosen": -636.3381958007812, "logps/rejected": -427.74322509765625, "loss": 0.5492, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0144877433776855, "rewards/margins": 1.1002308130264282, "rewards/rejected": -3.114718437194824, "step": 10600 }, { "epoch": 1.37, "learning_rate": 3.0190303146217845e-07, "logits/chosen": -2.7923312187194824, "logits/rejected": -2.75750994682312, "logps/chosen": -579.3289794921875, "logps/rejected": -473.87744140625, "loss": 0.6454, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2579054832458496, "rewards/margins": 0.6967266798019409, "rewards/rejected": -2.95463228225708, "step": 10610 }, { "epoch": 1.37, "learning_rate": 3.016639571578847e-07, "logits/chosen": -2.7674202919006348, "logits/rejected": -2.6435458660125732, "logps/chosen": -582.0269165039062, "logps/rejected": -482.5614318847656, "loss": 0.5142, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.0000545978546143, "rewards/margins": 1.1204521656036377, "rewards/rejected": -3.120506763458252, "step": 10620 }, { "epoch": 1.37, "learning_rate": 3.0142488285359087e-07, "logits/chosen": -2.7336456775665283, "logits/rejected": -2.573914051055908, "logps/chosen": -592.2950439453125, "logps/rejected": -371.70806884765625, "loss": 0.6354, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2569777965545654, "rewards/margins": 0.6827982664108276, "rewards/rejected": -2.9397764205932617, "step": 10630 }, { "epoch": 1.37, "learning_rate": 3.011858085492971e-07, "logits/chosen": -2.795811414718628, "logits/rejected": -2.6973774433135986, "logps/chosen": -498.2134704589844, "logps/rejected": -390.2891540527344, "loss": 0.5956, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.211914539337158, "rewards/margins": 0.8375340700149536, "rewards/rejected": -3.0494487285614014, "step": 10640 }, { "epoch": 1.37, "learning_rate": 3.0094673424500334e-07, "logits/chosen": -2.6678719520568848, "logits/rejected": -2.5347046852111816, "logps/chosen": -526.9495849609375, "logps/rejected": -405.8002014160156, "loss": 0.6044, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1543402671813965, "rewards/margins": 0.872983455657959, "rewards/rejected": -3.0273237228393555, "step": 10650 }, { "epoch": 1.38, "learning_rate": 3.007076599407096e-07, "logits/chosen": -2.7525148391723633, "logits/rejected": -2.5101118087768555, "logps/chosen": -553.0772705078125, "logps/rejected": -362.0571594238281, "loss": 0.4229, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.9106130599975586, "rewards/margins": 1.259324073791504, "rewards/rejected": -3.1699376106262207, "step": 10660 }, { "epoch": 1.38, "learning_rate": 3.004685856364158e-07, "logits/chosen": -2.54938006401062, "logits/rejected": -2.4424707889556885, "logps/chosen": -509.674072265625, "logps/rejected": -405.89569091796875, "loss": 0.584, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.18745756149292, "rewards/margins": 0.7491198182106018, "rewards/rejected": -2.936577320098877, "step": 10670 }, { "epoch": 1.38, "learning_rate": 3.00229511332122e-07, "logits/chosen": -2.664313793182373, "logits/rejected": -2.5956828594207764, "logps/chosen": -617.6326904296875, "logps/rejected": -480.4095764160156, "loss": 0.5052, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.8977289199829102, "rewards/margins": 1.0851078033447266, "rewards/rejected": -2.9828364849090576, "step": 10680 }, { "epoch": 1.38, "learning_rate": 2.9999043702782823e-07, "logits/chosen": -2.614215612411499, "logits/rejected": -2.5714616775512695, "logps/chosen": -547.3966674804688, "logps/rejected": -441.154296875, "loss": 0.6079, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.049140453338623, "rewards/margins": 0.700585663318634, "rewards/rejected": -2.7497262954711914, "step": 10690 }, { "epoch": 1.38, "learning_rate": 2.9975136272353447e-07, "logits/chosen": -2.6272215843200684, "logits/rejected": -2.4533190727233887, "logps/chosen": -589.2410278320312, "logps/rejected": -404.0630798339844, "loss": 0.6559, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.137296438217163, "rewards/margins": 0.6436055898666382, "rewards/rejected": -2.7809019088745117, "step": 10700 }, { "epoch": 1.38, "learning_rate": 2.995122884192407e-07, "logits/chosen": -2.6551003456115723, "logits/rejected": -2.566251039505005, "logps/chosen": -538.2159423828125, "logps/rejected": -469.29193115234375, "loss": 0.5797, "rewards/accuracies": 0.75, "rewards/chosen": -2.1713011264801025, "rewards/margins": 0.8146902918815613, "rewards/rejected": -2.9859917163848877, "step": 10710 }, { "epoch": 1.38, "learning_rate": 2.9927321411494694e-07, "logits/chosen": -2.768150806427002, "logits/rejected": -2.610391139984131, "logps/chosen": -624.9105224609375, "logps/rejected": -494.5921325683594, "loss": 0.5811, "rewards/accuracies": 0.625, "rewards/chosen": -2.231703281402588, "rewards/margins": 1.0089836120605469, "rewards/rejected": -3.2406868934631348, "step": 10720 }, { "epoch": 1.39, "learning_rate": 2.990341398106531e-07, "logits/chosen": -2.5659542083740234, "logits/rejected": -2.5546767711639404, "logps/chosen": -459.66241455078125, "logps/rejected": -422.85235595703125, "loss": 0.5615, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9405063390731812, "rewards/margins": 0.8177895545959473, "rewards/rejected": -2.758296489715576, "step": 10730 }, { "epoch": 1.39, "learning_rate": 2.9879506550635936e-07, "logits/chosen": -2.5365774631500244, "logits/rejected": -2.484078884124756, "logps/chosen": -523.772216796875, "logps/rejected": -425.57696533203125, "loss": 0.6994, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.4609920978546143, "rewards/margins": 0.4974699020385742, "rewards/rejected": -2.9584617614746094, "step": 10740 }, { "epoch": 1.39, "learning_rate": 2.985559912020656e-07, "logits/chosen": -2.7046215534210205, "logits/rejected": -2.553785800933838, "logps/chosen": -616.7454223632812, "logps/rejected": -415.82550048828125, "loss": 0.627, "rewards/accuracies": 0.625, "rewards/chosen": -2.4871933460235596, "rewards/margins": 0.7911552786827087, "rewards/rejected": -3.278348445892334, "step": 10750 }, { "epoch": 1.39, "learning_rate": 2.9831691689777183e-07, "logits/chosen": -2.838683605194092, "logits/rejected": -2.653499126434326, "logps/chosen": -559.8011474609375, "logps/rejected": -370.49053955078125, "loss": 0.5999, "rewards/accuracies": 0.625, "rewards/chosen": -2.029978036880493, "rewards/margins": 0.6561917066574097, "rewards/rejected": -2.686169385910034, "step": 10760 }, { "epoch": 1.39, "learning_rate": 2.98077842593478e-07, "logits/chosen": -2.589106798171997, "logits/rejected": -2.6049275398254395, "logps/chosen": -421.10015869140625, "logps/rejected": -391.82940673828125, "loss": 0.6176, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9897346496582031, "rewards/margins": 0.6140792965888977, "rewards/rejected": -2.603814125061035, "step": 10770 }, { "epoch": 1.39, "learning_rate": 2.9783876828918425e-07, "logits/chosen": -2.734149217605591, "logits/rejected": -2.6234397888183594, "logps/chosen": -517.3286743164062, "logps/rejected": -408.5875549316406, "loss": 0.5982, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2150206565856934, "rewards/margins": 0.647689700126648, "rewards/rejected": -2.862710475921631, "step": 10780 }, { "epoch": 1.39, "learning_rate": 2.975996939848905e-07, "logits/chosen": -2.7050788402557373, "logits/rejected": -2.6505534648895264, "logps/chosen": -438.5226135253906, "logps/rejected": -398.8965148925781, "loss": 0.6882, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.326911449432373, "rewards/margins": 0.4988720417022705, "rewards/rejected": -2.8257832527160645, "step": 10790 }, { "epoch": 1.39, "learning_rate": 2.973606196805967e-07, "logits/chosen": -2.683826446533203, "logits/rejected": -2.585042953491211, "logps/chosen": -586.9400634765625, "logps/rejected": -431.1474609375, "loss": 0.5506, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.9953315258026123, "rewards/margins": 1.012374758720398, "rewards/rejected": -3.007706642150879, "step": 10800 }, { "epoch": 1.4, "learning_rate": 2.9712154537630296e-07, "logits/chosen": -2.6096079349517822, "logits/rejected": -2.4836621284484863, "logps/chosen": -613.5689697265625, "logps/rejected": -464.1520080566406, "loss": 0.5726, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2577037811279297, "rewards/margins": 0.8429608345031738, "rewards/rejected": -3.1006646156311035, "step": 10810 }, { "epoch": 1.4, "learning_rate": 2.9688247107200914e-07, "logits/chosen": -2.6686089038848877, "logits/rejected": -2.5501937866210938, "logps/chosen": -615.6456909179688, "logps/rejected": -440.4280700683594, "loss": 0.6239, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0816829204559326, "rewards/margins": 0.8807071447372437, "rewards/rejected": -2.962390184402466, "step": 10820 }, { "epoch": 1.4, "learning_rate": 2.966433967677154e-07, "logits/chosen": -2.683460235595703, "logits/rejected": -2.426356792449951, "logps/chosen": -597.2648315429688, "logps/rejected": -382.490478515625, "loss": 0.6626, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2535130977630615, "rewards/margins": 0.7782970070838928, "rewards/rejected": -3.0318100452423096, "step": 10830 }, { "epoch": 1.4, "learning_rate": 2.964043224634216e-07, "logits/chosen": -2.641173839569092, "logits/rejected": -2.599006175994873, "logps/chosen": -555.5729370117188, "logps/rejected": -481.81475830078125, "loss": 0.6092, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.151780843734741, "rewards/margins": 0.7072498202323914, "rewards/rejected": -2.8590307235717773, "step": 10840 }, { "epoch": 1.4, "learning_rate": 2.9616524815912785e-07, "logits/chosen": -2.63862943649292, "logits/rejected": -2.527844190597534, "logps/chosen": -572.0134887695312, "logps/rejected": -465.8799743652344, "loss": 0.5499, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0713181495666504, "rewards/margins": 0.7747901678085327, "rewards/rejected": -2.8461081981658936, "step": 10850 }, { "epoch": 1.4, "learning_rate": 2.959261738548341e-07, "logits/chosen": -2.798729419708252, "logits/rejected": -2.6394941806793213, "logps/chosen": -626.5701293945312, "logps/rejected": -470.9847106933594, "loss": 0.5579, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.146097421646118, "rewards/margins": 0.9628686904907227, "rewards/rejected": -3.108966112136841, "step": 10860 }, { "epoch": 1.4, "learning_rate": 2.9568709955054027e-07, "logits/chosen": -2.841723918914795, "logits/rejected": -2.5957627296447754, "logps/chosen": -590.08056640625, "logps/rejected": -403.97674560546875, "loss": 0.5875, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.124901056289673, "rewards/margins": 0.7379071712493896, "rewards/rejected": -2.8628087043762207, "step": 10870 }, { "epoch": 1.4, "learning_rate": 2.954480252462465e-07, "logits/chosen": -2.7564778327941895, "logits/rejected": -2.6341850757598877, "logps/chosen": -589.0924682617188, "logps/rejected": -467.74517822265625, "loss": 0.5093, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9049780368804932, "rewards/margins": 1.2727857828140259, "rewards/rejected": -3.1777639389038086, "step": 10880 }, { "epoch": 1.41, "learning_rate": 2.9520895094195274e-07, "logits/chosen": -2.701427936553955, "logits/rejected": -2.5998101234436035, "logps/chosen": -648.6103515625, "logps/rejected": -488.45587158203125, "loss": 0.6831, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.2446460723876953, "rewards/margins": 0.797760009765625, "rewards/rejected": -3.042405843734741, "step": 10890 }, { "epoch": 1.41, "learning_rate": 2.94969876637659e-07, "logits/chosen": -2.628077983856201, "logits/rejected": -2.5660746097564697, "logps/chosen": -590.0511474609375, "logps/rejected": -456.28863525390625, "loss": 0.7221, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.3506011962890625, "rewards/margins": 0.5067347884178162, "rewards/rejected": -2.8573360443115234, "step": 10900 }, { "epoch": 1.41, "learning_rate": 2.9473080233336516e-07, "logits/chosen": -2.7011075019836426, "logits/rejected": -2.5860378742218018, "logps/chosen": -613.5848388671875, "logps/rejected": -487.85760498046875, "loss": 0.6583, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.3222107887268066, "rewards/margins": 0.6859179139137268, "rewards/rejected": -3.0081286430358887, "step": 10910 }, { "epoch": 1.41, "learning_rate": 2.944917280290714e-07, "logits/chosen": -2.692751407623291, "logits/rejected": -2.6400318145751953, "logps/chosen": -584.7605590820312, "logps/rejected": -469.9977111816406, "loss": 0.7188, "rewards/accuracies": 0.625, "rewards/chosen": -2.597259759902954, "rewards/margins": 0.4090060591697693, "rewards/rejected": -3.006265640258789, "step": 10920 }, { "epoch": 1.41, "learning_rate": 2.9425265372477763e-07, "logits/chosen": -2.7313833236694336, "logits/rejected": -2.6432909965515137, "logps/chosen": -532.857421875, "logps/rejected": -443.9695739746094, "loss": 0.6964, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.1344082355499268, "rewards/margins": 0.5116229057312012, "rewards/rejected": -2.646031141281128, "step": 10930 }, { "epoch": 1.41, "learning_rate": 2.9401357942048387e-07, "logits/chosen": -2.721365451812744, "logits/rejected": -2.670377731323242, "logps/chosen": -529.5444946289062, "logps/rejected": -451.90704345703125, "loss": 0.6173, "rewards/accuracies": 0.6875, "rewards/chosen": -2.3352622985839844, "rewards/margins": 0.6421715021133423, "rewards/rejected": -2.977433919906616, "step": 10940 }, { "epoch": 1.41, "learning_rate": 2.937745051161901e-07, "logits/chosen": -2.794375419616699, "logits/rejected": -2.6217923164367676, "logps/chosen": -593.8672485351562, "logps/rejected": -446.8431091308594, "loss": 0.6259, "rewards/accuracies": 0.625, "rewards/chosen": -2.15386700630188, "rewards/margins": 0.8422731161117554, "rewards/rejected": -2.9961400032043457, "step": 10950 }, { "epoch": 1.41, "learning_rate": 2.935354308118963e-07, "logits/chosen": -2.720067024230957, "logits/rejected": -2.6199748516082764, "logps/chosen": -539.1050415039062, "logps/rejected": -433.47113037109375, "loss": 0.5044, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8749878406524658, "rewards/margins": 0.887503981590271, "rewards/rejected": -2.762491464614868, "step": 10960 }, { "epoch": 1.42, "learning_rate": 2.932963565076025e-07, "logits/chosen": -2.6739697456359863, "logits/rejected": -2.5997071266174316, "logps/chosen": -529.6044921875, "logps/rejected": -430.4444274902344, "loss": 0.6668, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1555428504943848, "rewards/margins": 0.5653549432754517, "rewards/rejected": -2.720897912979126, "step": 10970 }, { "epoch": 1.42, "learning_rate": 2.9305728220330876e-07, "logits/chosen": -2.6590707302093506, "logits/rejected": -2.508910655975342, "logps/chosen": -484.39337158203125, "logps/rejected": -359.7242736816406, "loss": 0.6672, "rewards/accuracies": 0.6875, "rewards/chosen": -2.20444917678833, "rewards/margins": 0.5352723002433777, "rewards/rejected": -2.7397215366363525, "step": 10980 }, { "epoch": 1.42, "learning_rate": 2.92818207899015e-07, "logits/chosen": -2.6422696113586426, "logits/rejected": -2.5211896896362305, "logps/chosen": -588.444091796875, "logps/rejected": -459.1827697753906, "loss": 0.6331, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.039494752883911, "rewards/margins": 0.6965292096138, "rewards/rejected": -2.7360241413116455, "step": 10990 }, { "epoch": 1.42, "learning_rate": 2.9257913359472123e-07, "logits/chosen": -2.591325044631958, "logits/rejected": -2.4341018199920654, "logps/chosen": -609.0697021484375, "logps/rejected": -404.6994323730469, "loss": 0.581, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.9701364040374756, "rewards/margins": 0.8489642143249512, "rewards/rejected": -2.819100856781006, "step": 11000 }, { "epoch": 1.42, "eval_logits/chosen": -3.0402891635894775, "eval_logits/rejected": -2.9768574237823486, "eval_logps/chosen": -540.6996459960938, "eval_logps/rejected": -420.32904052734375, "eval_loss": 0.623100221157074, "eval_rewards/accuracies": 0.6685000061988831, "eval_rewards/chosen": -0.8879872560501099, "eval_rewards/margins": 1.0520505905151367, "eval_rewards/rejected": -1.9400378465652466, "eval_runtime": 278.6554, "eval_samples_per_second": 7.177, "eval_steps_per_second": 3.589, "step": 11000 }, { "epoch": 1.42, "learning_rate": 2.923400592904274e-07, "logits/chosen": -2.6292436122894287, "logits/rejected": -2.5042624473571777, "logps/chosen": -545.0465087890625, "logps/rejected": -396.43719482421875, "loss": 0.7103, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.217146635055542, "rewards/margins": 0.470756858587265, "rewards/rejected": -2.687903642654419, "step": 11010 }, { "epoch": 1.42, "learning_rate": 2.9210098498613365e-07, "logits/chosen": -2.8059592247009277, "logits/rejected": -2.593705415725708, "logps/chosen": -687.330078125, "logps/rejected": -452.28753662109375, "loss": 0.68, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.2432332038879395, "rewards/margins": 0.5491209030151367, "rewards/rejected": -2.792354106903076, "step": 11020 }, { "epoch": 1.42, "learning_rate": 2.918619106818399e-07, "logits/chosen": -2.6476216316223145, "logits/rejected": -2.521864175796509, "logps/chosen": -544.8597412109375, "logps/rejected": -457.69732666015625, "loss": 0.618, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0401430130004883, "rewards/margins": 0.8035091161727905, "rewards/rejected": -2.8436522483825684, "step": 11030 }, { "epoch": 1.43, "learning_rate": 2.916228363775461e-07, "logits/chosen": -2.7144947052001953, "logits/rejected": -2.547685384750366, "logps/chosen": -649.8532104492188, "logps/rejected": -478.9456481933594, "loss": 0.6524, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1976161003112793, "rewards/margins": 0.7529795169830322, "rewards/rejected": -2.9505953788757324, "step": 11040 }, { "epoch": 1.43, "learning_rate": 2.913837620732523e-07, "logits/chosen": -2.7106432914733887, "logits/rejected": -2.576679229736328, "logps/chosen": -578.9241333007812, "logps/rejected": -448.47259521484375, "loss": 0.5913, "rewards/accuracies": 0.625, "rewards/chosen": -1.8260324001312256, "rewards/margins": 0.9317208528518677, "rewards/rejected": -2.7577528953552246, "step": 11050 }, { "epoch": 1.43, "learning_rate": 2.9114468776895854e-07, "logits/chosen": -2.664839267730713, "logits/rejected": -2.5890300273895264, "logps/chosen": -507.83184814453125, "logps/rejected": -451.6893005371094, "loss": 0.7718, "rewards/accuracies": 0.625, "rewards/chosen": -2.2030105590820312, "rewards/margins": 0.4442440867424011, "rewards/rejected": -2.6472549438476562, "step": 11060 }, { "epoch": 1.43, "learning_rate": 2.909056134646648e-07, "logits/chosen": -2.6650753021240234, "logits/rejected": -2.6128792762756348, "logps/chosen": -465.0108947753906, "logps/rejected": -399.46435546875, "loss": 0.7505, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.0261385440826416, "rewards/margins": 0.4828287959098816, "rewards/rejected": -2.508967161178589, "step": 11070 }, { "epoch": 1.43, "learning_rate": 2.9066653916037106e-07, "logits/chosen": -2.7578742504119873, "logits/rejected": -2.582009792327881, "logps/chosen": -535.1209716796875, "logps/rejected": -383.9891662597656, "loss": 0.6454, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.2427456378936768, "rewards/margins": 0.6284838914871216, "rewards/rejected": -2.871229410171509, "step": 11080 }, { "epoch": 1.43, "learning_rate": 2.904274648560773e-07, "logits/chosen": -2.7137515544891357, "logits/rejected": -2.632693290710449, "logps/chosen": -522.9850463867188, "logps/rejected": -432.838623046875, "loss": 0.5368, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.9566144943237305, "rewards/margins": 0.8328515291213989, "rewards/rejected": -2.78946590423584, "step": 11090 }, { "epoch": 1.43, "learning_rate": 2.901883905517835e-07, "logits/chosen": -2.6775925159454346, "logits/rejected": -2.6228058338165283, "logps/chosen": -486.4085998535156, "logps/rejected": -480.93914794921875, "loss": 0.6423, "rewards/accuracies": 0.6875, "rewards/chosen": -2.049206495285034, "rewards/margins": 0.638129472732544, "rewards/rejected": -2.687335968017578, "step": 11100 }, { "epoch": 1.43, "learning_rate": 2.899493162474897e-07, "logits/chosen": -2.629430055618286, "logits/rejected": -2.5852749347686768, "logps/chosen": -503.8207092285156, "logps/rejected": -401.72552490234375, "loss": 0.6382, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1572859287261963, "rewards/margins": 0.6048597097396851, "rewards/rejected": -2.762145519256592, "step": 11110 }, { "epoch": 1.44, "learning_rate": 2.8971024194319595e-07, "logits/chosen": -2.758589029312134, "logits/rejected": -2.6404097080230713, "logps/chosen": -644.5809326171875, "logps/rejected": -528.3360595703125, "loss": 0.6884, "rewards/accuracies": 0.625, "rewards/chosen": -2.1161952018737793, "rewards/margins": 0.6778016090393066, "rewards/rejected": -2.793996572494507, "step": 11120 }, { "epoch": 1.44, "learning_rate": 2.894711676389022e-07, "logits/chosen": -2.664226531982422, "logits/rejected": -2.5324933528900146, "logps/chosen": -544.2955322265625, "logps/rejected": -431.75323486328125, "loss": 0.6211, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.4353885650634766, "rewards/margins": 0.5941974520683289, "rewards/rejected": -3.02958607673645, "step": 11130 }, { "epoch": 1.44, "learning_rate": 2.8923209333460843e-07, "logits/chosen": -2.706851005554199, "logits/rejected": -2.5591022968292236, "logps/chosen": -575.1961059570312, "logps/rejected": -420.97412109375, "loss": 0.5724, "rewards/accuracies": 0.6875, "rewards/chosen": -2.215615749359131, "rewards/margins": 0.8120278120040894, "rewards/rejected": -3.0276436805725098, "step": 11140 }, { "epoch": 1.44, "learning_rate": 2.889930190303146e-07, "logits/chosen": -2.600795269012451, "logits/rejected": -2.486445665359497, "logps/chosen": -596.0960693359375, "logps/rejected": -460.18896484375, "loss": 0.6456, "rewards/accuracies": 0.6875, "rewards/chosen": -2.167325496673584, "rewards/margins": 0.6196428537368774, "rewards/rejected": -2.786968231201172, "step": 11150 }, { "epoch": 1.44, "learning_rate": 2.8875394472602085e-07, "logits/chosen": -2.747157335281372, "logits/rejected": -2.5771946907043457, "logps/chosen": -676.0743408203125, "logps/rejected": -475.1204528808594, "loss": 0.6627, "rewards/accuracies": 0.625, "rewards/chosen": -2.0700855255126953, "rewards/margins": 0.6894831657409668, "rewards/rejected": -2.759568691253662, "step": 11160 }, { "epoch": 1.44, "learning_rate": 2.885148704217271e-07, "logits/chosen": -2.4983606338500977, "logits/rejected": -2.4801723957061768, "logps/chosen": -577.3388671875, "logps/rejected": -444.2994079589844, "loss": 0.5773, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0855956077575684, "rewards/margins": 0.7733058333396912, "rewards/rejected": -2.858901262283325, "step": 11170 }, { "epoch": 1.44, "learning_rate": 2.882757961174333e-07, "logits/chosen": -2.6935172080993652, "logits/rejected": -2.4823107719421387, "logps/chosen": -562.7366943359375, "logps/rejected": -405.50701904296875, "loss": 0.7022, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.372982978820801, "rewards/margins": 0.4587990343570709, "rewards/rejected": -2.8317818641662598, "step": 11180 }, { "epoch": 1.44, "learning_rate": 2.8803672181313955e-07, "logits/chosen": -2.651087999343872, "logits/rejected": -2.6033623218536377, "logps/chosen": -516.9682006835938, "logps/rejected": -433.548583984375, "loss": 0.6326, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0161709785461426, "rewards/margins": 0.6548231840133667, "rewards/rejected": -2.670994281768799, "step": 11190 }, { "epoch": 1.45, "learning_rate": 2.8779764750884574e-07, "logits/chosen": -2.731851100921631, "logits/rejected": -2.6249213218688965, "logps/chosen": -583.4534912109375, "logps/rejected": -476.4869689941406, "loss": 0.6771, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.213914632797241, "rewards/margins": 0.5574566125869751, "rewards/rejected": -2.7713711261749268, "step": 11200 }, { "epoch": 1.45, "learning_rate": 2.8755857320455197e-07, "logits/chosen": -2.606905460357666, "logits/rejected": -2.6015777587890625, "logps/chosen": -467.36663818359375, "logps/rejected": -394.4827880859375, "loss": 0.6516, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9285504817962646, "rewards/margins": 0.562516450881958, "rewards/rejected": -2.4910669326782227, "step": 11210 }, { "epoch": 1.45, "learning_rate": 2.873194989002582e-07, "logits/chosen": -2.703287363052368, "logits/rejected": -2.5661559104919434, "logps/chosen": -525.801025390625, "logps/rejected": -353.07940673828125, "loss": 0.7059, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.256702184677124, "rewards/margins": 0.6788700819015503, "rewards/rejected": -2.9355721473693848, "step": 11220 }, { "epoch": 1.45, "learning_rate": 2.8708042459596444e-07, "logits/chosen": -2.6041035652160645, "logits/rejected": -2.466280221939087, "logps/chosen": -534.7881469726562, "logps/rejected": -399.08709716796875, "loss": 0.5557, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.9130809307098389, "rewards/margins": 0.8196486234664917, "rewards/rejected": -2.732729434967041, "step": 11230 }, { "epoch": 1.45, "learning_rate": 2.8684135029167063e-07, "logits/chosen": -2.761171817779541, "logits/rejected": -2.625739812850952, "logps/chosen": -607.8204345703125, "logps/rejected": -461.21441650390625, "loss": 0.5028, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.015307664871216, "rewards/margins": 0.9577857255935669, "rewards/rejected": -2.9730935096740723, "step": 11240 }, { "epoch": 1.45, "learning_rate": 2.8660227598737686e-07, "logits/chosen": -2.674787998199463, "logits/rejected": -2.658113479614258, "logps/chosen": -549.5526733398438, "logps/rejected": -518.2247924804688, "loss": 0.6335, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0341267585754395, "rewards/margins": 0.7005693912506104, "rewards/rejected": -2.73469614982605, "step": 11250 }, { "epoch": 1.45, "learning_rate": 2.863632016830831e-07, "logits/chosen": -2.690195322036743, "logits/rejected": -2.639200448989868, "logps/chosen": -504.68597412109375, "logps/rejected": -412.84600830078125, "loss": 0.5867, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0757663249969482, "rewards/margins": 0.9094275236129761, "rewards/rejected": -2.9851937294006348, "step": 11260 }, { "epoch": 1.45, "learning_rate": 2.8612412737878933e-07, "logits/chosen": -2.76119065284729, "logits/rejected": -2.5926547050476074, "logps/chosen": -577.3235473632812, "logps/rejected": -370.3683776855469, "loss": 0.6052, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9990593194961548, "rewards/margins": 0.8002057075500488, "rewards/rejected": -2.799264907836914, "step": 11270 }, { "epoch": 1.46, "learning_rate": 2.8588505307449557e-07, "logits/chosen": -2.7562096118927, "logits/rejected": -2.6560635566711426, "logps/chosen": -548.4207763671875, "logps/rejected": -413.5956115722656, "loss": 0.5117, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0242598056793213, "rewards/margins": 1.073714256286621, "rewards/rejected": -3.0979740619659424, "step": 11280 }, { "epoch": 1.46, "learning_rate": 2.8564597877020175e-07, "logits/chosen": -2.6748881340026855, "logits/rejected": -2.606168270111084, "logps/chosen": -637.4816284179688, "logps/rejected": -539.1502685546875, "loss": 0.6105, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0801825523376465, "rewards/margins": 0.5976347327232361, "rewards/rejected": -2.6778175830841064, "step": 11290 }, { "epoch": 1.46, "learning_rate": 2.85406904465908e-07, "logits/chosen": -2.7271981239318848, "logits/rejected": -2.57505464553833, "logps/chosen": -564.2353515625, "logps/rejected": -434.71063232421875, "loss": 0.5699, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.025439739227295, "rewards/margins": 0.8313396573066711, "rewards/rejected": -2.8567795753479004, "step": 11300 }, { "epoch": 1.46, "learning_rate": 2.851678301616142e-07, "logits/chosen": -2.6900265216827393, "logits/rejected": -2.538560152053833, "logps/chosen": -558.0711669921875, "logps/rejected": -425.3587341308594, "loss": 0.5781, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.280109405517578, "rewards/margins": 0.7315221428871155, "rewards/rejected": -3.011631727218628, "step": 11310 }, { "epoch": 1.46, "learning_rate": 2.8492875585732046e-07, "logits/chosen": -2.6895792484283447, "logits/rejected": -2.5696253776550293, "logps/chosen": -547.9337158203125, "logps/rejected": -419.99267578125, "loss": 0.5708, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.207397222518921, "rewards/margins": 0.7170736193656921, "rewards/rejected": -2.924471139907837, "step": 11320 }, { "epoch": 1.46, "learning_rate": 2.846896815530267e-07, "logits/chosen": -2.698927640914917, "logits/rejected": -2.5685834884643555, "logps/chosen": -590.1056518554688, "logps/rejected": -427.6539001464844, "loss": 0.7335, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.301586627960205, "rewards/margins": 0.5403566360473633, "rewards/rejected": -2.8419432640075684, "step": 11330 }, { "epoch": 1.46, "learning_rate": 2.844506072487329e-07, "logits/chosen": -2.6321988105773926, "logits/rejected": -2.5220375061035156, "logps/chosen": -522.5320434570312, "logps/rejected": -460.06805419921875, "loss": 0.6472, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.134852886199951, "rewards/margins": 0.5983940958976746, "rewards/rejected": -2.7332468032836914, "step": 11340 }, { "epoch": 1.47, "learning_rate": 2.842115329444391e-07, "logits/chosen": -2.8580756187438965, "logits/rejected": -2.68438458442688, "logps/chosen": -531.8880004882812, "logps/rejected": -406.4539489746094, "loss": 0.5202, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9310340881347656, "rewards/margins": 0.9528898000717163, "rewards/rejected": -2.8839237689971924, "step": 11350 }, { "epoch": 1.47, "learning_rate": 2.8397245864014535e-07, "logits/chosen": -2.6888914108276367, "logits/rejected": -2.643649101257324, "logps/chosen": -633.4893798828125, "logps/rejected": -521.5072631835938, "loss": 0.5276, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0472464561462402, "rewards/margins": 0.9302901029586792, "rewards/rejected": -2.97753643989563, "step": 11360 }, { "epoch": 1.47, "learning_rate": 2.837333843358516e-07, "logits/chosen": -2.661372661590576, "logits/rejected": -2.580749988555908, "logps/chosen": -511.7660217285156, "logps/rejected": -383.9587097167969, "loss": 0.6757, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.361964702606201, "rewards/margins": 0.460735559463501, "rewards/rejected": -2.822700262069702, "step": 11370 }, { "epoch": 1.47, "learning_rate": 2.8349431003155777e-07, "logits/chosen": -2.790040969848633, "logits/rejected": -2.632415771484375, "logps/chosen": -551.8988037109375, "logps/rejected": -401.14984130859375, "loss": 0.5991, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.046424150466919, "rewards/margins": 0.7005847692489624, "rewards/rejected": -2.747008800506592, "step": 11380 }, { "epoch": 1.47, "learning_rate": 2.83255235727264e-07, "logits/chosen": -2.783144950866699, "logits/rejected": -2.561995029449463, "logps/chosen": -584.3035278320312, "logps/rejected": -392.73687744140625, "loss": 0.6269, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1455743312835693, "rewards/margins": 0.6289796233177185, "rewards/rejected": -2.7745542526245117, "step": 11390 }, { "epoch": 1.47, "learning_rate": 2.8301616142297024e-07, "logits/chosen": -2.645925760269165, "logits/rejected": -2.5777556896209717, "logps/chosen": -489.29425048828125, "logps/rejected": -437.9686584472656, "loss": 0.5948, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.016688823699951, "rewards/margins": 0.7737330794334412, "rewards/rejected": -2.790422201156616, "step": 11400 }, { "epoch": 1.47, "learning_rate": 2.827770871186765e-07, "logits/chosen": -2.731684684753418, "logits/rejected": -2.592357873916626, "logps/chosen": -570.18798828125, "logps/rejected": -421.43341064453125, "loss": 0.728, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.218853712081909, "rewards/margins": 0.5169497728347778, "rewards/rejected": -2.7358036041259766, "step": 11410 }, { "epoch": 1.47, "learning_rate": 2.825380128143827e-07, "logits/chosen": -2.7686376571655273, "logits/rejected": -2.591452121734619, "logps/chosen": -561.201171875, "logps/rejected": -429.84912109375, "loss": 0.5916, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0557210445404053, "rewards/margins": 0.7953118085861206, "rewards/rejected": -2.8510327339172363, "step": 11420 }, { "epoch": 1.48, "learning_rate": 2.822989385100889e-07, "logits/chosen": -2.7946388721466064, "logits/rejected": -2.627768039703369, "logps/chosen": -620.6519775390625, "logps/rejected": -447.1705017089844, "loss": 0.7042, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.224065065383911, "rewards/margins": 0.5937709212303162, "rewards/rejected": -2.817836046218872, "step": 11430 }, { "epoch": 1.48, "learning_rate": 2.8205986420579513e-07, "logits/chosen": -2.516730546951294, "logits/rejected": -2.5594794750213623, "logps/chosen": -569.6987915039062, "logps/rejected": -542.32666015625, "loss": 0.6897, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.134157180786133, "rewards/margins": 0.5113784670829773, "rewards/rejected": -2.645535707473755, "step": 11440 }, { "epoch": 1.48, "learning_rate": 2.8182078990150137e-07, "logits/chosen": -2.8230018615722656, "logits/rejected": -2.662449598312378, "logps/chosen": -595.1641235351562, "logps/rejected": -409.20867919921875, "loss": 0.5208, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.119875431060791, "rewards/margins": 0.9900142550468445, "rewards/rejected": -3.1098899841308594, "step": 11450 }, { "epoch": 1.48, "learning_rate": 2.815817155972076e-07, "logits/chosen": -2.754132032394409, "logits/rejected": -2.6648170948028564, "logps/chosen": -542.38232421875, "logps/rejected": -483.5003967285156, "loss": 0.6113, "rewards/accuracies": 0.6875, "rewards/chosen": -2.090301990509033, "rewards/margins": 0.6552601456642151, "rewards/rejected": -2.7455620765686035, "step": 11460 }, { "epoch": 1.48, "learning_rate": 2.8134264129291384e-07, "logits/chosen": -2.6520943641662598, "logits/rejected": -2.6150355339050293, "logps/chosen": -478.9280700683594, "logps/rejected": -395.6644592285156, "loss": 0.6969, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.0732035636901855, "rewards/margins": 0.5422142148017883, "rewards/rejected": -2.615417957305908, "step": 11470 }, { "epoch": 1.48, "learning_rate": 2.8110356698862e-07, "logits/chosen": -2.6382975578308105, "logits/rejected": -2.5712757110595703, "logps/chosen": -479.7979431152344, "logps/rejected": -413.98748779296875, "loss": 0.6026, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1199662685394287, "rewards/margins": 0.6004474759101868, "rewards/rejected": -2.7204136848449707, "step": 11480 }, { "epoch": 1.48, "learning_rate": 2.8086449268432626e-07, "logits/chosen": -2.7754478454589844, "logits/rejected": -2.7525248527526855, "logps/chosen": -435.24456787109375, "logps/rejected": -371.7784729003906, "loss": 0.5966, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.9052772521972656, "rewards/margins": 0.7167661786079407, "rewards/rejected": -2.6220431327819824, "step": 11490 }, { "epoch": 1.48, "learning_rate": 2.806254183800325e-07, "logits/chosen": -2.810173273086548, "logits/rejected": -2.7102973461151123, "logps/chosen": -502.8597717285156, "logps/rejected": -385.94500732421875, "loss": 0.5836, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9441654682159424, "rewards/margins": 0.7476539611816406, "rewards/rejected": -2.691819429397583, "step": 11500 }, { "epoch": 1.49, "learning_rate": 2.8038634407573873e-07, "logits/chosen": -2.6204185485839844, "logits/rejected": -2.5342795848846436, "logps/chosen": -513.5599975585938, "logps/rejected": -406.1797790527344, "loss": 0.6948, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0444512367248535, "rewards/margins": 0.48958277702331543, "rewards/rejected": -2.534034013748169, "step": 11510 }, { "epoch": 1.49, "learning_rate": 2.801472697714449e-07, "logits/chosen": -2.6782498359680176, "logits/rejected": -2.623823404312134, "logps/chosen": -534.7537841796875, "logps/rejected": -468.272705078125, "loss": 0.6889, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1503446102142334, "rewards/margins": 0.5835763216018677, "rewards/rejected": -2.7339210510253906, "step": 11520 }, { "epoch": 1.49, "learning_rate": 2.7990819546715115e-07, "logits/chosen": -2.76054048538208, "logits/rejected": -2.582491636276245, "logps/chosen": -598.7380981445312, "logps/rejected": -463.02105712890625, "loss": 0.5543, "rewards/accuracies": 0.75, "rewards/chosen": -2.181612968444824, "rewards/margins": 0.9081758260726929, "rewards/rejected": -3.0897886753082275, "step": 11530 }, { "epoch": 1.49, "learning_rate": 2.796691211628574e-07, "logits/chosen": -2.6592531204223633, "logits/rejected": -2.531485080718994, "logps/chosen": -547.4722290039062, "logps/rejected": -458.2996520996094, "loss": 0.6438, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.147845983505249, "rewards/margins": 0.6673979759216309, "rewards/rejected": -2.815243721008301, "step": 11540 }, { "epoch": 1.49, "learning_rate": 2.794300468585636e-07, "logits/chosen": -2.7819950580596924, "logits/rejected": -2.608642101287842, "logps/chosen": -576.5765991210938, "logps/rejected": -462.03009033203125, "loss": 0.5693, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.146765947341919, "rewards/margins": 0.7048605680465698, "rewards/rejected": -2.851626396179199, "step": 11550 }, { "epoch": 1.49, "learning_rate": 2.7919097255426986e-07, "logits/chosen": -2.7999579906463623, "logits/rejected": -2.698270082473755, "logps/chosen": -542.46630859375, "logps/rejected": -449.6844177246094, "loss": 0.6104, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0964035987854004, "rewards/margins": 0.762248158454895, "rewards/rejected": -2.858652114868164, "step": 11560 }, { "epoch": 1.49, "learning_rate": 2.7895189824997604e-07, "logits/chosen": -2.6830177307128906, "logits/rejected": -2.6510910987854004, "logps/chosen": -429.2451171875, "logps/rejected": -377.74322509765625, "loss": 0.6783, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.120232105255127, "rewards/margins": 0.5908186435699463, "rewards/rejected": -2.7110512256622314, "step": 11570 }, { "epoch": 1.49, "learning_rate": 2.787128239456823e-07, "logits/chosen": -2.654628038406372, "logits/rejected": -2.6358368396759033, "logps/chosen": -536.234130859375, "logps/rejected": -460.618896484375, "loss": 0.6031, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0744199752807617, "rewards/margins": 0.7005216479301453, "rewards/rejected": -2.7749416828155518, "step": 11580 }, { "epoch": 1.5, "learning_rate": 2.784737496413885e-07, "logits/chosen": -2.6546335220336914, "logits/rejected": -2.5726356506347656, "logps/chosen": -564.6192016601562, "logps/rejected": -481.540283203125, "loss": 0.8303, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.2662835121154785, "rewards/margins": 0.345337450504303, "rewards/rejected": -2.6116209030151367, "step": 11590 }, { "epoch": 1.5, "learning_rate": 2.7823467533709475e-07, "logits/chosen": -2.71062970161438, "logits/rejected": -2.5320510864257812, "logps/chosen": -672.5858154296875, "logps/rejected": -541.3973388671875, "loss": 0.6433, "rewards/accuracies": 0.6875, "rewards/chosen": -2.366208791732788, "rewards/margins": 0.673836350440979, "rewards/rejected": -3.0400452613830566, "step": 11600 }, { "epoch": 1.5, "learning_rate": 2.77995601032801e-07, "logits/chosen": -2.751464605331421, "logits/rejected": -2.6457266807556152, "logps/chosen": -528.697265625, "logps/rejected": -432.9903869628906, "loss": 0.6781, "rewards/accuracies": 0.625, "rewards/chosen": -2.3827950954437256, "rewards/margins": 0.5966523885726929, "rewards/rejected": -2.979447364807129, "step": 11610 }, { "epoch": 1.5, "learning_rate": 2.7775652672850717e-07, "logits/chosen": -2.7402260303497314, "logits/rejected": -2.5380630493164062, "logps/chosen": -563.0191040039062, "logps/rejected": -396.2304992675781, "loss": 0.4644, "rewards/accuracies": 0.6875, "rewards/chosen": -1.8729321956634521, "rewards/margins": 1.17349374294281, "rewards/rejected": -3.0464260578155518, "step": 11620 }, { "epoch": 1.5, "learning_rate": 2.775174524242134e-07, "logits/chosen": -2.777366876602173, "logits/rejected": -2.6346275806427, "logps/chosen": -529.8588256835938, "logps/rejected": -445.92266845703125, "loss": 0.5833, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.123976230621338, "rewards/margins": 0.8771451711654663, "rewards/rejected": -3.001121759414673, "step": 11630 }, { "epoch": 1.5, "learning_rate": 2.7727837811991964e-07, "logits/chosen": -2.680905818939209, "logits/rejected": -2.5825963020324707, "logps/chosen": -513.5118408203125, "logps/rejected": -431.99346923828125, "loss": 0.5146, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0100650787353516, "rewards/margins": 0.8836072683334351, "rewards/rejected": -2.893671989440918, "step": 11640 }, { "epoch": 1.5, "learning_rate": 2.770393038156259e-07, "logits/chosen": -2.6743950843811035, "logits/rejected": -2.5790627002716064, "logps/chosen": -509.86883544921875, "logps/rejected": -396.83416748046875, "loss": 0.7535, "rewards/accuracies": 0.625, "rewards/chosen": -2.456385612487793, "rewards/margins": 0.44937315583229065, "rewards/rejected": -2.905759334564209, "step": 11650 }, { "epoch": 1.51, "learning_rate": 2.7680022951133206e-07, "logits/chosen": -2.5494446754455566, "logits/rejected": -2.505265235900879, "logps/chosen": -585.2263793945312, "logps/rejected": -440.904052734375, "loss": 0.5732, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.1126067638397217, "rewards/margins": 0.8056829571723938, "rewards/rejected": -2.9182896614074707, "step": 11660 }, { "epoch": 1.51, "learning_rate": 2.765611552070383e-07, "logits/chosen": -2.702704668045044, "logits/rejected": -2.5086476802825928, "logps/chosen": -591.1575927734375, "logps/rejected": -395.02435302734375, "loss": 0.5608, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1308064460754395, "rewards/margins": 0.8584381341934204, "rewards/rejected": -2.9892444610595703, "step": 11670 }, { "epoch": 1.51, "learning_rate": 2.763220809027446e-07, "logits/chosen": -2.6933443546295166, "logits/rejected": -2.632058620452881, "logps/chosen": -505.35382080078125, "logps/rejected": -399.9601135253906, "loss": 0.6314, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.123776912689209, "rewards/margins": 0.7403495907783508, "rewards/rejected": -2.864126682281494, "step": 11680 }, { "epoch": 1.51, "learning_rate": 2.760830065984508e-07, "logits/chosen": -2.6154277324676514, "logits/rejected": -2.466017484664917, "logps/chosen": -631.0084838867188, "logps/rejected": -429.9647521972656, "loss": 0.5115, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.041496992111206, "rewards/margins": 1.0584757328033447, "rewards/rejected": -3.0999724864959717, "step": 11690 }, { "epoch": 1.51, "learning_rate": 2.7584393229415706e-07, "logits/chosen": -2.7732417583465576, "logits/rejected": -2.641115427017212, "logps/chosen": -630.1514892578125, "logps/rejected": -409.96026611328125, "loss": 0.4484, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.7013187408447266, "rewards/margins": 1.2464672327041626, "rewards/rejected": -2.9477858543395996, "step": 11700 }, { "epoch": 1.51, "learning_rate": 2.7560485798986324e-07, "logits/chosen": -2.8220055103302, "logits/rejected": -2.600590229034424, "logps/chosen": -557.8449096679688, "logps/rejected": -394.35296630859375, "loss": 0.6006, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0113332271575928, "rewards/margins": 0.8400920033454895, "rewards/rejected": -2.8514249324798584, "step": 11710 }, { "epoch": 1.51, "learning_rate": 2.753657836855695e-07, "logits/chosen": -2.6518971920013428, "logits/rejected": -2.5198559761047363, "logps/chosen": -545.87060546875, "logps/rejected": -380.22833251953125, "loss": 0.5558, "rewards/accuracies": 0.6875, "rewards/chosen": -2.064301013946533, "rewards/margins": 1.0470324754714966, "rewards/rejected": -3.1113336086273193, "step": 11720 }, { "epoch": 1.51, "learning_rate": 2.751267093812757e-07, "logits/chosen": -2.6222195625305176, "logits/rejected": -2.482126235961914, "logps/chosen": -515.9642333984375, "logps/rejected": -413.34228515625, "loss": 0.6112, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.194236993789673, "rewards/margins": 0.8101555705070496, "rewards/rejected": -3.004392623901367, "step": 11730 }, { "epoch": 1.52, "learning_rate": 2.7488763507698195e-07, "logits/chosen": -2.7508208751678467, "logits/rejected": -2.658306121826172, "logps/chosen": -564.2601318359375, "logps/rejected": -506.9287109375, "loss": 0.687, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.380086898803711, "rewards/margins": 0.5278152227401733, "rewards/rejected": -2.907902240753174, "step": 11740 }, { "epoch": 1.52, "learning_rate": 2.746485607726882e-07, "logits/chosen": -2.7501370906829834, "logits/rejected": -2.5863564014434814, "logps/chosen": -516.9188232421875, "logps/rejected": -412.3036193847656, "loss": 0.5765, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.2103347778320312, "rewards/margins": 0.9449024200439453, "rewards/rejected": -3.1552374362945557, "step": 11750 }, { "epoch": 1.52, "learning_rate": 2.7440948646839437e-07, "logits/chosen": -2.7223589420318604, "logits/rejected": -2.5686426162719727, "logps/chosen": -518.2640991210938, "logps/rejected": -380.20965576171875, "loss": 0.5768, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.9965842962265015, "rewards/margins": 0.706307590007782, "rewards/rejected": -2.7028918266296387, "step": 11760 }, { "epoch": 1.52, "learning_rate": 2.741704121641006e-07, "logits/chosen": -2.760131359100342, "logits/rejected": -2.5684618949890137, "logps/chosen": -571.623046875, "logps/rejected": -399.30401611328125, "loss": 0.7233, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0358128547668457, "rewards/margins": 0.5202886462211609, "rewards/rejected": -2.5561013221740723, "step": 11770 }, { "epoch": 1.52, "learning_rate": 2.7393133785980684e-07, "logits/chosen": -2.618084192276001, "logits/rejected": -2.603959083557129, "logps/chosen": -541.8715209960938, "logps/rejected": -457.46954345703125, "loss": 0.5832, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1736929416656494, "rewards/margins": 0.8564106822013855, "rewards/rejected": -3.0301036834716797, "step": 11780 }, { "epoch": 1.52, "learning_rate": 2.7369226355551307e-07, "logits/chosen": -2.6753954887390137, "logits/rejected": -2.52119779586792, "logps/chosen": -618.2182006835938, "logps/rejected": -502.6859436035156, "loss": 0.697, "rewards/accuracies": 0.625, "rewards/chosen": -2.429870843887329, "rewards/margins": 0.6223932504653931, "rewards/rejected": -3.0522639751434326, "step": 11790 }, { "epoch": 1.52, "learning_rate": 2.7345318925121926e-07, "logits/chosen": -2.778547763824463, "logits/rejected": -2.5538387298583984, "logps/chosen": -658.0452880859375, "logps/rejected": -444.65179443359375, "loss": 0.4707, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.13360857963562, "rewards/margins": 1.0496028661727905, "rewards/rejected": -3.1832118034362793, "step": 11800 }, { "epoch": 1.52, "learning_rate": 2.732141149469255e-07, "logits/chosen": -2.66430401802063, "logits/rejected": -2.582958936691284, "logps/chosen": -555.7869873046875, "logps/rejected": -417.37286376953125, "loss": 0.6658, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2246391773223877, "rewards/margins": 0.5271438360214233, "rewards/rejected": -2.7517831325531006, "step": 11810 }, { "epoch": 1.53, "learning_rate": 2.7297504064263173e-07, "logits/chosen": -2.8295626640319824, "logits/rejected": -2.5453243255615234, "logps/chosen": -601.4797973632812, "logps/rejected": -363.3930358886719, "loss": 0.6031, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.929305076599121, "rewards/margins": 0.7875005602836609, "rewards/rejected": -2.7168056964874268, "step": 11820 }, { "epoch": 1.53, "learning_rate": 2.7273596633833796e-07, "logits/chosen": -2.6963610649108887, "logits/rejected": -2.53182315826416, "logps/chosen": -632.4950561523438, "logps/rejected": -505.4222717285156, "loss": 0.6061, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.16147780418396, "rewards/margins": 0.6822770833969116, "rewards/rejected": -2.843755006790161, "step": 11830 }, { "epoch": 1.53, "learning_rate": 2.724968920340442e-07, "logits/chosen": -2.678232431411743, "logits/rejected": -2.589273691177368, "logps/chosen": -606.8228759765625, "logps/rejected": -457.7847595214844, "loss": 0.6252, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0119621753692627, "rewards/margins": 0.7181410193443298, "rewards/rejected": -2.730103015899658, "step": 11840 }, { "epoch": 1.53, "learning_rate": 2.722578177297504e-07, "logits/chosen": -2.7834737300872803, "logits/rejected": -2.671767234802246, "logps/chosen": -601.6524658203125, "logps/rejected": -534.5066528320312, "loss": 0.6955, "rewards/accuracies": 0.625, "rewards/chosen": -2.1974103450775146, "rewards/margins": 0.5813153982162476, "rewards/rejected": -2.778726100921631, "step": 11850 }, { "epoch": 1.53, "learning_rate": 2.720187434254566e-07, "logits/chosen": -2.791055679321289, "logits/rejected": -2.675025701522827, "logps/chosen": -591.458984375, "logps/rejected": -484.450927734375, "loss": 0.7476, "rewards/accuracies": 0.5625, "rewards/chosen": -2.031890392303467, "rewards/margins": 0.48574668169021606, "rewards/rejected": -2.5176374912261963, "step": 11860 }, { "epoch": 1.53, "learning_rate": 2.7177966912116285e-07, "logits/chosen": -2.775129795074463, "logits/rejected": -2.6082165241241455, "logps/chosen": -586.1519775390625, "logps/rejected": -431.313720703125, "loss": 0.5573, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.8600482940673828, "rewards/margins": 0.8787357211112976, "rewards/rejected": -2.7387843132019043, "step": 11870 }, { "epoch": 1.53, "learning_rate": 2.715405948168691e-07, "logits/chosen": -2.600130558013916, "logits/rejected": -2.5383858680725098, "logps/chosen": -508.2626037597656, "logps/rejected": -415.4730529785156, "loss": 0.5826, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9758498668670654, "rewards/margins": 0.9379187822341919, "rewards/rejected": -2.9137685298919678, "step": 11880 }, { "epoch": 1.53, "learning_rate": 2.713015205125753e-07, "logits/chosen": -2.7461323738098145, "logits/rejected": -2.6287453174591064, "logps/chosen": -544.0051879882812, "logps/rejected": -392.6896667480469, "loss": 0.5717, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.8637359142303467, "rewards/margins": 0.7322660684585571, "rewards/rejected": -2.5960021018981934, "step": 11890 }, { "epoch": 1.54, "learning_rate": 2.710624462082815e-07, "logits/chosen": -2.7466881275177, "logits/rejected": -2.518235683441162, "logps/chosen": -586.9876708984375, "logps/rejected": -416.60601806640625, "loss": 0.4695, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9213787317276, "rewards/margins": 1.0765306949615479, "rewards/rejected": -2.9979090690612793, "step": 11900 }, { "epoch": 1.54, "learning_rate": 2.7082337190398775e-07, "logits/chosen": -2.6778464317321777, "logits/rejected": -2.610119104385376, "logps/chosen": -582.5311889648438, "logps/rejected": -479.4261779785156, "loss": 0.5783, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0824637413024902, "rewards/margins": 0.7307153940200806, "rewards/rejected": -2.8131790161132812, "step": 11910 }, { "epoch": 1.54, "learning_rate": 2.70584297599694e-07, "logits/chosen": -2.601731538772583, "logits/rejected": -2.4204678535461426, "logps/chosen": -597.9498901367188, "logps/rejected": -435.8639221191406, "loss": 0.6516, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.077671527862549, "rewards/margins": 0.9201186895370483, "rewards/rejected": -2.9977903366088867, "step": 11920 }, { "epoch": 1.54, "learning_rate": 2.703452232954002e-07, "logits/chosen": -2.762784481048584, "logits/rejected": -2.632967233657837, "logps/chosen": -527.6964111328125, "logps/rejected": -460.5862731933594, "loss": 0.5914, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1865668296813965, "rewards/margins": 0.7103053331375122, "rewards/rejected": -2.8968722820281982, "step": 11930 }, { "epoch": 1.54, "learning_rate": 2.701061489911064e-07, "logits/chosen": -2.632075786590576, "logits/rejected": -2.4453177452087402, "logps/chosen": -542.4067993164062, "logps/rejected": -394.7138977050781, "loss": 0.6439, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.207373857498169, "rewards/margins": 0.6083763837814331, "rewards/rejected": -2.8157498836517334, "step": 11940 }, { "epoch": 1.54, "learning_rate": 2.6986707468681264e-07, "logits/chosen": -2.6339035034179688, "logits/rejected": -2.527172088623047, "logps/chosen": -464.16387939453125, "logps/rejected": -380.5674743652344, "loss": 0.5277, "rewards/accuracies": 0.6875, "rewards/chosen": -2.08406138420105, "rewards/margins": 0.8726047277450562, "rewards/rejected": -2.9566662311553955, "step": 11950 }, { "epoch": 1.54, "learning_rate": 2.6962800038251887e-07, "logits/chosen": -2.6855342388153076, "logits/rejected": -2.553865909576416, "logps/chosen": -474.8309631347656, "logps/rejected": -348.80181884765625, "loss": 0.6272, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.952074646949768, "rewards/margins": 0.7418515086174011, "rewards/rejected": -2.6939260959625244, "step": 11960 }, { "epoch": 1.55, "learning_rate": 2.693889260782251e-07, "logits/chosen": -2.7582831382751465, "logits/rejected": -2.6980667114257812, "logps/chosen": -517.6364135742188, "logps/rejected": -466.943603515625, "loss": 0.7223, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.1356914043426514, "rewards/margins": 0.4422825872898102, "rewards/rejected": -2.5779738426208496, "step": 11970 }, { "epoch": 1.55, "learning_rate": 2.6914985177393134e-07, "logits/chosen": -2.714669704437256, "logits/rejected": -2.558021306991577, "logps/chosen": -589.7595825195312, "logps/rejected": -414.3516540527344, "loss": 0.5556, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.029566526412964, "rewards/margins": 0.8136910200119019, "rewards/rejected": -2.843257427215576, "step": 11980 }, { "epoch": 1.55, "learning_rate": 2.6891077746963753e-07, "logits/chosen": -2.7595839500427246, "logits/rejected": -2.6515302658081055, "logps/chosen": -514.2911376953125, "logps/rejected": -419.24139404296875, "loss": 0.6526, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1493515968322754, "rewards/margins": 0.6181458234786987, "rewards/rejected": -2.7674975395202637, "step": 11990 }, { "epoch": 1.55, "learning_rate": 2.6867170316534376e-07, "logits/chosen": -2.81187105178833, "logits/rejected": -2.4961864948272705, "logps/chosen": -591.9654541015625, "logps/rejected": -385.08843994140625, "loss": 0.6955, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.241931438446045, "rewards/margins": 0.449483722448349, "rewards/rejected": -2.6914148330688477, "step": 12000 }, { "epoch": 1.55, "eval_logits/chosen": -3.0003180503845215, "eval_logits/rejected": -2.9293746948242188, "eval_logps/chosen": -540.3406982421875, "eval_logps/rejected": -420.12945556640625, "eval_loss": 0.6199987530708313, "eval_rewards/accuracies": 0.671500027179718, "eval_rewards/chosen": -0.852096438407898, "eval_rewards/margins": 1.0679848194122314, "eval_rewards/rejected": -1.9200812578201294, "eval_runtime": 278.252, "eval_samples_per_second": 7.188, "eval_steps_per_second": 3.594, "step": 12000 }, { "epoch": 1.55, "learning_rate": 2.6843262886105e-07, "logits/chosen": -2.696312427520752, "logits/rejected": -2.5566461086273193, "logps/chosen": -486.8111267089844, "logps/rejected": -336.23907470703125, "loss": 0.8011, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.401247501373291, "rewards/margins": 0.28270068764686584, "rewards/rejected": -2.683948040008545, "step": 12010 }, { "epoch": 1.55, "learning_rate": 2.6819355455675623e-07, "logits/chosen": -2.544995069503784, "logits/rejected": -2.451578378677368, "logps/chosen": -480.3182067871094, "logps/rejected": -394.19842529296875, "loss": 0.5096, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9145710468292236, "rewards/margins": 0.9227372407913208, "rewards/rejected": -2.837307929992676, "step": 12020 }, { "epoch": 1.55, "learning_rate": 2.6795448025246247e-07, "logits/chosen": -2.571669101715088, "logits/rejected": -2.5705912113189697, "logps/chosen": -561.4000244140625, "logps/rejected": -470.7666931152344, "loss": 0.7153, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.1842923164367676, "rewards/margins": 0.5038815140724182, "rewards/rejected": -2.688173532485962, "step": 12030 }, { "epoch": 1.55, "learning_rate": 2.6771540594816865e-07, "logits/chosen": -2.6572117805480957, "logits/rejected": -2.567671537399292, "logps/chosen": -569.2562866210938, "logps/rejected": -443.0079040527344, "loss": 0.5657, "rewards/accuracies": 0.75, "rewards/chosen": -1.810359239578247, "rewards/margins": 0.9157341718673706, "rewards/rejected": -2.7260935306549072, "step": 12040 }, { "epoch": 1.56, "learning_rate": 2.674763316438749e-07, "logits/chosen": -2.646790027618408, "logits/rejected": -2.6167683601379395, "logps/chosen": -579.9796142578125, "logps/rejected": -463.9654846191406, "loss": 0.5791, "rewards/accuracies": 0.75, "rewards/chosen": -2.152564525604248, "rewards/margins": 0.9101690053939819, "rewards/rejected": -3.0627334117889404, "step": 12050 }, { "epoch": 1.56, "learning_rate": 2.672372573395811e-07, "logits/chosen": -2.717034339904785, "logits/rejected": -2.615161180496216, "logps/chosen": -535.3619384765625, "logps/rejected": -458.1192321777344, "loss": 0.5737, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.01546573638916, "rewards/margins": 0.9175039529800415, "rewards/rejected": -2.9329700469970703, "step": 12060 }, { "epoch": 1.56, "learning_rate": 2.6699818303528736e-07, "logits/chosen": -2.709188938140869, "logits/rejected": -2.54498028755188, "logps/chosen": -470.5482482910156, "logps/rejected": -378.29888916015625, "loss": 0.5519, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.7910325527191162, "rewards/margins": 0.7909413576126099, "rewards/rejected": -2.5819740295410156, "step": 12070 }, { "epoch": 1.56, "learning_rate": 2.6675910873099354e-07, "logits/chosen": -2.5883684158325195, "logits/rejected": -2.45088529586792, "logps/chosen": -569.049560546875, "logps/rejected": -433.2220153808594, "loss": 0.6905, "rewards/accuracies": 0.6875, "rewards/chosen": -2.077678680419922, "rewards/margins": 0.5751469731330872, "rewards/rejected": -2.6528258323669434, "step": 12080 }, { "epoch": 1.56, "learning_rate": 2.665200344266998e-07, "logits/chosen": -2.6858701705932617, "logits/rejected": -2.6090853214263916, "logps/chosen": -536.2083740234375, "logps/rejected": -429.964111328125, "loss": 0.5747, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1758434772491455, "rewards/margins": 0.9057248830795288, "rewards/rejected": -3.0815682411193848, "step": 12090 }, { "epoch": 1.56, "learning_rate": 2.66280960122406e-07, "logits/chosen": -2.6264638900756836, "logits/rejected": -2.476426362991333, "logps/chosen": -559.9469604492188, "logps/rejected": -386.95538330078125, "loss": 0.6047, "rewards/accuracies": 0.6875, "rewards/chosen": -1.797568678855896, "rewards/margins": 0.881226658821106, "rewards/rejected": -2.678795576095581, "step": 12100 }, { "epoch": 1.56, "learning_rate": 2.6604188581811225e-07, "logits/chosen": -2.72599458694458, "logits/rejected": -2.558305025100708, "logps/chosen": -540.5468139648438, "logps/rejected": -421.0648498535156, "loss": 0.7152, "rewards/accuracies": 0.625, "rewards/chosen": -2.295994520187378, "rewards/margins": 0.5020049810409546, "rewards/rejected": -2.797999620437622, "step": 12110 }, { "epoch": 1.56, "learning_rate": 2.658028115138185e-07, "logits/chosen": -2.7908129692077637, "logits/rejected": -2.61263370513916, "logps/chosen": -641.1272583007812, "logps/rejected": -415.2239685058594, "loss": 0.457, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.8691660165786743, "rewards/margins": 1.222374677658081, "rewards/rejected": -3.091540575027466, "step": 12120 }, { "epoch": 1.57, "learning_rate": 2.6556373720952467e-07, "logits/chosen": -2.7470171451568604, "logits/rejected": -2.6086556911468506, "logps/chosen": -604.8026733398438, "logps/rejected": -432.2822265625, "loss": 0.6479, "rewards/accuracies": 0.6875, "rewards/chosen": -2.121136426925659, "rewards/margins": 0.6807551383972168, "rewards/rejected": -2.801891565322876, "step": 12130 }, { "epoch": 1.57, "learning_rate": 2.653246629052309e-07, "logits/chosen": -2.6495189666748047, "logits/rejected": -2.5929741859436035, "logps/chosen": -527.6192626953125, "logps/rejected": -418.5760803222656, "loss": 0.5203, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.203660249710083, "rewards/margins": 0.8775337338447571, "rewards/rejected": -3.0811944007873535, "step": 12140 }, { "epoch": 1.57, "learning_rate": 2.6508558860093714e-07, "logits/chosen": -2.6639246940612793, "logits/rejected": -2.5830740928649902, "logps/chosen": -502.3204040527344, "logps/rejected": -411.83380126953125, "loss": 0.5562, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0493369102478027, "rewards/margins": 0.7241997718811035, "rewards/rejected": -2.7735366821289062, "step": 12150 }, { "epoch": 1.57, "learning_rate": 2.648465142966434e-07, "logits/chosen": -2.699286460876465, "logits/rejected": -2.5047004222869873, "logps/chosen": -544.3050537109375, "logps/rejected": -365.7315368652344, "loss": 0.6816, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2261528968811035, "rewards/margins": 0.5066218972206116, "rewards/rejected": -2.7327749729156494, "step": 12160 }, { "epoch": 1.57, "learning_rate": 2.646074399923496e-07, "logits/chosen": -2.724058151245117, "logits/rejected": -2.6293864250183105, "logps/chosen": -666.1663818359375, "logps/rejected": -485.3976135253906, "loss": 0.6366, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.256493330001831, "rewards/margins": 0.8770490884780884, "rewards/rejected": -3.133542537689209, "step": 12170 }, { "epoch": 1.57, "learning_rate": 2.643683656880558e-07, "logits/chosen": -2.6784989833831787, "logits/rejected": -2.513305902481079, "logps/chosen": -500.6884765625, "logps/rejected": -351.2707824707031, "loss": 0.5901, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.2756919860839844, "rewards/margins": 0.7240462899208069, "rewards/rejected": -2.9997382164001465, "step": 12180 }, { "epoch": 1.57, "learning_rate": 2.6412929138376203e-07, "logits/chosen": -2.6834723949432373, "logits/rejected": -2.565974473953247, "logps/chosen": -542.7906494140625, "logps/rejected": -422.35552978515625, "loss": 0.6496, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.4241111278533936, "rewards/margins": 0.6494240760803223, "rewards/rejected": -3.073535203933716, "step": 12190 }, { "epoch": 1.58, "learning_rate": 2.6389021707946827e-07, "logits/chosen": -2.732577085494995, "logits/rejected": -2.5750863552093506, "logps/chosen": -557.7155151367188, "logps/rejected": -394.00067138671875, "loss": 0.6227, "rewards/accuracies": 0.6875, "rewards/chosen": -2.123528480529785, "rewards/margins": 0.5861825942993164, "rewards/rejected": -2.7097108364105225, "step": 12200 }, { "epoch": 1.58, "learning_rate": 2.636511427751745e-07, "logits/chosen": -2.7252776622772217, "logits/rejected": -2.598353862762451, "logps/chosen": -561.7412719726562, "logps/rejected": -419.45245361328125, "loss": 0.5417, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0695393085479736, "rewards/margins": 0.8744803667068481, "rewards/rejected": -2.9440197944641113, "step": 12210 }, { "epoch": 1.58, "learning_rate": 2.634120684708807e-07, "logits/chosen": -2.62949800491333, "logits/rejected": -2.5175740718841553, "logps/chosen": -546.4586791992188, "logps/rejected": -418.736572265625, "loss": 0.6554, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.2530503273010254, "rewards/margins": 0.5217421054840088, "rewards/rejected": -2.774792194366455, "step": 12220 }, { "epoch": 1.58, "learning_rate": 2.631729941665869e-07, "logits/chosen": -2.8038852214813232, "logits/rejected": -2.6386196613311768, "logps/chosen": -535.0197143554688, "logps/rejected": -376.50982666015625, "loss": 0.6772, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.004904270172119, "rewards/margins": 0.5755138397216797, "rewards/rejected": -2.5804178714752197, "step": 12230 }, { "epoch": 1.58, "learning_rate": 2.6293391986229316e-07, "logits/chosen": -2.4898173809051514, "logits/rejected": -2.515876293182373, "logps/chosen": -424.81884765625, "logps/rejected": -415.26416015625, "loss": 0.6827, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.091580629348755, "rewards/margins": 0.466001033782959, "rewards/rejected": -2.557581663131714, "step": 12240 }, { "epoch": 1.58, "learning_rate": 2.6269484555799945e-07, "logits/chosen": -2.6897640228271484, "logits/rejected": -2.494393825531006, "logps/chosen": -548.6126708984375, "logps/rejected": -401.61602783203125, "loss": 0.5712, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1699726581573486, "rewards/margins": 0.8304969668388367, "rewards/rejected": -3.00046968460083, "step": 12250 }, { "epoch": 1.58, "learning_rate": 2.624557712537057e-07, "logits/chosen": -2.704810380935669, "logits/rejected": -2.611095905303955, "logps/chosen": -527.633056640625, "logps/rejected": -411.7967224121094, "loss": 0.6988, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.3009915351867676, "rewards/margins": 0.484356552362442, "rewards/rejected": -2.785348415374756, "step": 12260 }, { "epoch": 1.58, "learning_rate": 2.6221669694941187e-07, "logits/chosen": -2.657951593399048, "logits/rejected": -2.5586516857147217, "logps/chosen": -580.74267578125, "logps/rejected": -411.084228515625, "loss": 0.6326, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.038695812225342, "rewards/margins": 0.73858243227005, "rewards/rejected": -2.777277946472168, "step": 12270 }, { "epoch": 1.59, "learning_rate": 2.619776226451181e-07, "logits/chosen": -2.6401314735412598, "logits/rejected": -2.4851372241973877, "logps/chosen": -567.6277465820312, "logps/rejected": -445.060791015625, "loss": 0.697, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.3658833503723145, "rewards/margins": 0.5407739877700806, "rewards/rejected": -2.9066574573516846, "step": 12280 }, { "epoch": 1.59, "learning_rate": 2.6173854834082434e-07, "logits/chosen": -2.707425117492676, "logits/rejected": -2.507030963897705, "logps/chosen": -590.3570556640625, "logps/rejected": -433.48016357421875, "loss": 0.6249, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.8962514400482178, "rewards/margins": 0.7010520696640015, "rewards/rejected": -2.597303628921509, "step": 12290 }, { "epoch": 1.59, "learning_rate": 2.614994740365306e-07, "logits/chosen": -2.645195960998535, "logits/rejected": -2.538331985473633, "logps/chosen": -498.8971252441406, "logps/rejected": -377.4072570800781, "loss": 0.5445, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.8097435235977173, "rewards/margins": 0.9424184560775757, "rewards/rejected": -2.752161979675293, "step": 12300 }, { "epoch": 1.59, "learning_rate": 2.612603997322368e-07, "logits/chosen": -2.675628662109375, "logits/rejected": -2.6306378841400146, "logps/chosen": -516.1077880859375, "logps/rejected": -466.6620178222656, "loss": 0.5963, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.080993175506592, "rewards/margins": 0.9003957509994507, "rewards/rejected": -2.981388807296753, "step": 12310 }, { "epoch": 1.59, "learning_rate": 2.61021325427943e-07, "logits/chosen": -2.7297894954681396, "logits/rejected": -2.5776114463806152, "logps/chosen": -541.6709594726562, "logps/rejected": -425.0167541503906, "loss": 0.5179, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.9382946491241455, "rewards/margins": 1.000984787940979, "rewards/rejected": -2.939279556274414, "step": 12320 }, { "epoch": 1.59, "learning_rate": 2.6078225112364923e-07, "logits/chosen": -2.5999715328216553, "logits/rejected": -2.5140082836151123, "logps/chosen": -508.959228515625, "logps/rejected": -390.6808776855469, "loss": 0.5572, "rewards/accuracies": 0.625, "rewards/chosen": -2.063354015350342, "rewards/margins": 0.7154747247695923, "rewards/rejected": -2.7788286209106445, "step": 12330 }, { "epoch": 1.59, "learning_rate": 2.6054317681935547e-07, "logits/chosen": -2.746579170227051, "logits/rejected": -2.516995906829834, "logps/chosen": -635.7223510742188, "logps/rejected": -471.71209716796875, "loss": 0.5625, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0263772010803223, "rewards/margins": 0.842599093914032, "rewards/rejected": -2.868976593017578, "step": 12340 }, { "epoch": 1.59, "learning_rate": 2.603041025150617e-07, "logits/chosen": -2.6552138328552246, "logits/rejected": -2.4888405799865723, "logps/chosen": -561.586669921875, "logps/rejected": -366.2642517089844, "loss": 0.5103, "rewards/accuracies": 0.6875, "rewards/chosen": -1.8403304815292358, "rewards/margins": 1.0093520879745483, "rewards/rejected": -2.849682569503784, "step": 12350 }, { "epoch": 1.6, "learning_rate": 2.600650282107679e-07, "logits/chosen": -2.5801520347595215, "logits/rejected": -2.521721839904785, "logps/chosen": -497.6580505371094, "logps/rejected": -398.5681457519531, "loss": 0.5826, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.119966506958008, "rewards/margins": 0.702819287776947, "rewards/rejected": -2.8227858543395996, "step": 12360 }, { "epoch": 1.6, "learning_rate": 2.598259539064741e-07, "logits/chosen": -2.6797051429748535, "logits/rejected": -2.5421485900878906, "logps/chosen": -527.3258056640625, "logps/rejected": -387.408203125, "loss": 0.5471, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9917442798614502, "rewards/margins": 0.8378100395202637, "rewards/rejected": -2.829554319381714, "step": 12370 }, { "epoch": 1.6, "learning_rate": 2.5958687960218036e-07, "logits/chosen": -2.7934584617614746, "logits/rejected": -2.731083869934082, "logps/chosen": -597.9642944335938, "logps/rejected": -496.4839782714844, "loss": 0.6239, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0652897357940674, "rewards/margins": 0.8149319887161255, "rewards/rejected": -2.8802218437194824, "step": 12380 }, { "epoch": 1.6, "learning_rate": 2.593478052978866e-07, "logits/chosen": -2.7442333698272705, "logits/rejected": -2.5745139122009277, "logps/chosen": -623.8660278320312, "logps/rejected": -455.69976806640625, "loss": 0.5521, "rewards/accuracies": 0.75, "rewards/chosen": -1.9611940383911133, "rewards/margins": 0.9953511953353882, "rewards/rejected": -2.956545114517212, "step": 12390 }, { "epoch": 1.6, "learning_rate": 2.5910873099359283e-07, "logits/chosen": -2.6350574493408203, "logits/rejected": -2.605740785598755, "logps/chosen": -521.631591796875, "logps/rejected": -478.25213623046875, "loss": 0.651, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.265155792236328, "rewards/margins": 0.6436401605606079, "rewards/rejected": -2.9087958335876465, "step": 12400 }, { "epoch": 1.6, "learning_rate": 2.58869656689299e-07, "logits/chosen": -2.643038511276245, "logits/rejected": -2.614166736602783, "logps/chosen": -534.9050903320312, "logps/rejected": -472.9659729003906, "loss": 0.8938, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.5608112812042236, "rewards/margins": 0.13175955414772034, "rewards/rejected": -2.692570447921753, "step": 12410 }, { "epoch": 1.6, "learning_rate": 2.5863058238500525e-07, "logits/chosen": -2.6098690032958984, "logits/rejected": -2.5668892860412598, "logps/chosen": -465.08233642578125, "logps/rejected": -391.39508056640625, "loss": 0.5659, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0697906017303467, "rewards/margins": 0.6603742241859436, "rewards/rejected": -2.7301647663116455, "step": 12420 }, { "epoch": 1.6, "learning_rate": 2.583915080807115e-07, "logits/chosen": -2.7224068641662598, "logits/rejected": -2.5014076232910156, "logps/chosen": -541.765625, "logps/rejected": -384.0108337402344, "loss": 0.6314, "rewards/accuracies": 0.625, "rewards/chosen": -2.178027629852295, "rewards/margins": 0.5418356657028198, "rewards/rejected": -2.7198636531829834, "step": 12430 }, { "epoch": 1.61, "learning_rate": 2.581524337764177e-07, "logits/chosen": -2.6677422523498535, "logits/rejected": -2.54740571975708, "logps/chosen": -616.4039306640625, "logps/rejected": -520.4685668945312, "loss": 0.5296, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.106344223022461, "rewards/margins": 1.0864735841751099, "rewards/rejected": -3.1928181648254395, "step": 12440 }, { "epoch": 1.61, "learning_rate": 2.5791335947212396e-07, "logits/chosen": -2.7925517559051514, "logits/rejected": -2.635443687438965, "logps/chosen": -572.3385620117188, "logps/rejected": -470.35662841796875, "loss": 0.4593, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.8095916509628296, "rewards/margins": 1.2342708110809326, "rewards/rejected": -3.043862819671631, "step": 12450 }, { "epoch": 1.61, "learning_rate": 2.5767428516783014e-07, "logits/chosen": -2.5528671741485596, "logits/rejected": -2.452746868133545, "logps/chosen": -534.0164794921875, "logps/rejected": -421.85821533203125, "loss": 0.8049, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.4506072998046875, "rewards/margins": 0.3081907033920288, "rewards/rejected": -2.758798122406006, "step": 12460 }, { "epoch": 1.61, "learning_rate": 2.574352108635364e-07, "logits/chosen": -2.711775779724121, "logits/rejected": -2.5477380752563477, "logps/chosen": -547.9949951171875, "logps/rejected": -439.2520446777344, "loss": 0.6157, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1140949726104736, "rewards/margins": 0.758248507976532, "rewards/rejected": -2.8723435401916504, "step": 12470 }, { "epoch": 1.61, "learning_rate": 2.571961365592426e-07, "logits/chosen": -2.674387216567993, "logits/rejected": -2.5945065021514893, "logps/chosen": -580.5152587890625, "logps/rejected": -486.2855529785156, "loss": 0.537, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.030757188796997, "rewards/margins": 0.8353780508041382, "rewards/rejected": -2.8661351203918457, "step": 12480 }, { "epoch": 1.61, "learning_rate": 2.5695706225494885e-07, "logits/chosen": -2.6239304542541504, "logits/rejected": -2.4748332500457764, "logps/chosen": -580.6300048828125, "logps/rejected": -419.58392333984375, "loss": 0.6295, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0314924716949463, "rewards/margins": 0.6594904065132141, "rewards/rejected": -2.6909828186035156, "step": 12490 }, { "epoch": 1.61, "learning_rate": 2.567179879506551e-07, "logits/chosen": -2.6527113914489746, "logits/rejected": -2.4850854873657227, "logps/chosen": -614.2982788085938, "logps/rejected": -467.08819580078125, "loss": 0.645, "rewards/accuracies": 0.6875, "rewards/chosen": -2.357635021209717, "rewards/margins": 0.6311579942703247, "rewards/rejected": -2.988792896270752, "step": 12500 }, { "epoch": 1.62, "learning_rate": 2.5647891364636127e-07, "logits/chosen": -2.7818899154663086, "logits/rejected": -2.627197265625, "logps/chosen": -603.3425903320312, "logps/rejected": -459.2454528808594, "loss": 0.5992, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.016525983810425, "rewards/margins": 0.83924800157547, "rewards/rejected": -2.855774164199829, "step": 12510 }, { "epoch": 1.62, "learning_rate": 2.562398393420675e-07, "logits/chosen": -2.738433361053467, "logits/rejected": -2.636167049407959, "logps/chosen": -511.69073486328125, "logps/rejected": -381.7633361816406, "loss": 0.601, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.073178291320801, "rewards/margins": 0.7330199480056763, "rewards/rejected": -2.8061981201171875, "step": 12520 }, { "epoch": 1.62, "learning_rate": 2.5600076503777374e-07, "logits/chosen": -2.684455394744873, "logits/rejected": -2.524665594100952, "logps/chosen": -578.0045166015625, "logps/rejected": -437.93572998046875, "loss": 0.5794, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.1796088218688965, "rewards/margins": 0.8749157190322876, "rewards/rejected": -3.0545248985290527, "step": 12530 }, { "epoch": 1.62, "learning_rate": 2.5576169073347997e-07, "logits/chosen": -2.588996410369873, "logits/rejected": -2.4805023670196533, "logps/chosen": -479.2840881347656, "logps/rejected": -434.78125, "loss": 0.6293, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.155663013458252, "rewards/margins": 0.7007865309715271, "rewards/rejected": -2.8564493656158447, "step": 12540 }, { "epoch": 1.62, "learning_rate": 2.5552261642918616e-07, "logits/chosen": -2.595221996307373, "logits/rejected": -2.5961127281188965, "logps/chosen": -464.0071716308594, "logps/rejected": -464.3616638183594, "loss": 0.6347, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0566248893737793, "rewards/margins": 0.7470012307167053, "rewards/rejected": -2.80362606048584, "step": 12550 }, { "epoch": 1.62, "learning_rate": 2.552835421248924e-07, "logits/chosen": -2.6586806774139404, "logits/rejected": -2.483435869216919, "logps/chosen": -613.3964233398438, "logps/rejected": -464.9542541503906, "loss": 0.6264, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.325207233428955, "rewards/margins": 0.7516292929649353, "rewards/rejected": -3.076836347579956, "step": 12560 }, { "epoch": 1.62, "learning_rate": 2.5504446782059863e-07, "logits/chosen": -2.7129573822021484, "logits/rejected": -2.510357618331909, "logps/chosen": -623.711181640625, "logps/rejected": -492.81915283203125, "loss": 0.6405, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.2353968620300293, "rewards/margins": 0.8035660982131958, "rewards/rejected": -3.0389628410339355, "step": 12570 }, { "epoch": 1.62, "learning_rate": 2.5480539351630486e-07, "logits/chosen": -2.6941168308258057, "logits/rejected": -2.569653034210205, "logps/chosen": -504.8997497558594, "logps/rejected": -380.450927734375, "loss": 0.57, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9702497720718384, "rewards/margins": 0.8425014615058899, "rewards/rejected": -2.812751054763794, "step": 12580 }, { "epoch": 1.63, "learning_rate": 2.545663192120111e-07, "logits/chosen": -2.5715160369873047, "logits/rejected": -2.4965744018554688, "logps/chosen": -564.6146240234375, "logps/rejected": -474.66912841796875, "loss": 0.5457, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.077573299407959, "rewards/margins": 0.7869274616241455, "rewards/rejected": -2.8645007610321045, "step": 12590 }, { "epoch": 1.63, "learning_rate": 2.543272449077173e-07, "logits/chosen": -2.641329765319824, "logits/rejected": -2.546948194503784, "logps/chosen": -481.76373291015625, "logps/rejected": -429.58392333984375, "loss": 0.5746, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.9554258584976196, "rewards/margins": 0.7561285495758057, "rewards/rejected": -2.7115542888641357, "step": 12600 }, { "epoch": 1.63, "learning_rate": 2.540881706034235e-07, "logits/chosen": -2.664785623550415, "logits/rejected": -2.4803595542907715, "logps/chosen": -612.4119873046875, "logps/rejected": -454.088623046875, "loss": 0.5457, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0486080646514893, "rewards/margins": 0.9761778116226196, "rewards/rejected": -3.0247855186462402, "step": 12610 }, { "epoch": 1.63, "learning_rate": 2.5384909629912975e-07, "logits/chosen": -2.8491642475128174, "logits/rejected": -2.597351551055908, "logps/chosen": -627.7946166992188, "logps/rejected": -420.68597412109375, "loss": 0.5181, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0108184814453125, "rewards/margins": 1.07212233543396, "rewards/rejected": -3.0829405784606934, "step": 12620 }, { "epoch": 1.63, "learning_rate": 2.53610021994836e-07, "logits/chosen": -2.616333484649658, "logits/rejected": -2.580686092376709, "logps/chosen": -510.58447265625, "logps/rejected": -392.7334289550781, "loss": 0.652, "rewards/accuracies": 0.625, "rewards/chosen": -2.0972468852996826, "rewards/margins": 0.5475118160247803, "rewards/rejected": -2.644758939743042, "step": 12630 }, { "epoch": 1.63, "learning_rate": 2.533709476905422e-07, "logits/chosen": -2.6136341094970703, "logits/rejected": -2.4755759239196777, "logps/chosen": -598.8626708984375, "logps/rejected": -460.9109802246094, "loss": 0.4619, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.9104877710342407, "rewards/margins": 1.1962916851043701, "rewards/rejected": -3.1067793369293213, "step": 12640 }, { "epoch": 1.63, "learning_rate": 2.531318733862484e-07, "logits/chosen": -2.787238836288452, "logits/rejected": -2.6952383518218994, "logps/chosen": -523.99853515625, "logps/rejected": -455.23046875, "loss": 0.5876, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9490394592285156, "rewards/margins": 0.8089573979377747, "rewards/rejected": -2.7579963207244873, "step": 12650 }, { "epoch": 1.63, "learning_rate": 2.5289279908195465e-07, "logits/chosen": -2.6805052757263184, "logits/rejected": -2.562460422515869, "logps/chosen": -508.2455139160156, "logps/rejected": -389.1187744140625, "loss": 0.6498, "rewards/accuracies": 0.6875, "rewards/chosen": -2.129697799682617, "rewards/margins": 0.5650705099105835, "rewards/rejected": -2.694768190383911, "step": 12660 }, { "epoch": 1.64, "learning_rate": 2.526537247776609e-07, "logits/chosen": -2.7713146209716797, "logits/rejected": -2.5526223182678223, "logps/chosen": -610.9960327148438, "logps/rejected": -427.9396057128906, "loss": 0.6687, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2807412147521973, "rewards/margins": 0.5456690788269043, "rewards/rejected": -2.8264102935791016, "step": 12670 }, { "epoch": 1.64, "learning_rate": 2.524146504733671e-07, "logits/chosen": -2.61008882522583, "logits/rejected": -2.536198377609253, "logps/chosen": -498.804931640625, "logps/rejected": -403.7704162597656, "loss": 0.4737, "rewards/accuracies": 0.75, "rewards/chosen": -1.8021876811981201, "rewards/margins": 1.1653025150299072, "rewards/rejected": -2.9674899578094482, "step": 12680 }, { "epoch": 1.64, "learning_rate": 2.521755761690733e-07, "logits/chosen": -2.709280490875244, "logits/rejected": -2.4931302070617676, "logps/chosen": -611.9619140625, "logps/rejected": -424.64111328125, "loss": 0.7679, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.442802906036377, "rewards/margins": 0.4783322215080261, "rewards/rejected": -2.921135187149048, "step": 12690 }, { "epoch": 1.64, "learning_rate": 2.5193650186477954e-07, "logits/chosen": -2.682664632797241, "logits/rejected": -2.6622745990753174, "logps/chosen": -557.9137573242188, "logps/rejected": -554.4412841796875, "loss": 0.6523, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.4130873680114746, "rewards/margins": 0.535050094127655, "rewards/rejected": -2.9481377601623535, "step": 12700 }, { "epoch": 1.64, "learning_rate": 2.5169742756048577e-07, "logits/chosen": -2.589298725128174, "logits/rejected": -2.5748531818389893, "logps/chosen": -525.4495849609375, "logps/rejected": -413.4991149902344, "loss": 0.7115, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.3894078731536865, "rewards/margins": 0.6465499997138977, "rewards/rejected": -3.0359580516815186, "step": 12710 }, { "epoch": 1.64, "learning_rate": 2.51458353256192e-07, "logits/chosen": -2.7395083904266357, "logits/rejected": -2.6462550163269043, "logps/chosen": -604.2825927734375, "logps/rejected": -489.14703369140625, "loss": 0.6572, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.2456681728363037, "rewards/margins": 0.6195296049118042, "rewards/rejected": -2.8651976585388184, "step": 12720 }, { "epoch": 1.64, "learning_rate": 2.5121927895189824e-07, "logits/chosen": -2.6717209815979004, "logits/rejected": -2.618986129760742, "logps/chosen": -596.3748779296875, "logps/rejected": -477.9872131347656, "loss": 0.7197, "rewards/accuracies": 0.625, "rewards/chosen": -2.2366151809692383, "rewards/margins": 0.5602022409439087, "rewards/rejected": -2.7968173027038574, "step": 12730 }, { "epoch": 1.64, "learning_rate": 2.5098020464760443e-07, "logits/chosen": -2.665523052215576, "logits/rejected": -2.576829433441162, "logps/chosen": -614.5711669921875, "logps/rejected": -489.123291015625, "loss": 0.5668, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.1307449340820312, "rewards/margins": 0.782673716545105, "rewards/rejected": -2.9134185314178467, "step": 12740 }, { "epoch": 1.65, "learning_rate": 2.5074113034331066e-07, "logits/chosen": -2.7579445838928223, "logits/rejected": -2.5467472076416016, "logps/chosen": -650.5723876953125, "logps/rejected": -439.9810485839844, "loss": 0.6117, "rewards/accuracies": 0.6875, "rewards/chosen": -2.3482494354248047, "rewards/margins": 0.839749813079834, "rewards/rejected": -3.1879992485046387, "step": 12750 }, { "epoch": 1.65, "learning_rate": 2.505020560390169e-07, "logits/chosen": -2.759192705154419, "logits/rejected": -2.61413836479187, "logps/chosen": -617.0782470703125, "logps/rejected": -461.026611328125, "loss": 0.5874, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1677517890930176, "rewards/margins": 0.8786371350288391, "rewards/rejected": -3.046388864517212, "step": 12760 }, { "epoch": 1.65, "learning_rate": 2.5026298173472313e-07, "logits/chosen": -2.9002203941345215, "logits/rejected": -2.7623450756073, "logps/chosen": -596.36669921875, "logps/rejected": -460.91259765625, "loss": 0.7013, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.0314574241638184, "rewards/margins": 0.7234105467796326, "rewards/rejected": -2.7548680305480957, "step": 12770 }, { "epoch": 1.65, "learning_rate": 2.5002390743042937e-07, "logits/chosen": -2.5416481494903564, "logits/rejected": -2.4672303199768066, "logps/chosen": -500.320556640625, "logps/rejected": -465.0869140625, "loss": 0.6706, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1771559715270996, "rewards/margins": 0.4950490891933441, "rewards/rejected": -2.6722054481506348, "step": 12780 }, { "epoch": 1.65, "learning_rate": 2.497848331261356e-07, "logits/chosen": -2.6704535484313965, "logits/rejected": -2.6056008338928223, "logps/chosen": -536.2022705078125, "logps/rejected": -425.7822265625, "loss": 0.6338, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9949554204940796, "rewards/margins": 0.7239019870758057, "rewards/rejected": -2.7188572883605957, "step": 12790 }, { "epoch": 1.65, "learning_rate": 2.495457588218418e-07, "logits/chosen": -2.7778658866882324, "logits/rejected": -2.6461668014526367, "logps/chosen": -503.3868103027344, "logps/rejected": -435.6620178222656, "loss": 0.5475, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0353198051452637, "rewards/margins": 0.8872072100639343, "rewards/rejected": -2.9225268363952637, "step": 12800 }, { "epoch": 1.65, "learning_rate": 2.49306684517548e-07, "logits/chosen": -2.7304420471191406, "logits/rejected": -2.646944284439087, "logps/chosen": -528.7200927734375, "logps/rejected": -398.146240234375, "loss": 0.5712, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9074207544326782, "rewards/margins": 0.7927961349487305, "rewards/rejected": -2.700216770172119, "step": 12810 }, { "epoch": 1.66, "learning_rate": 2.4906761021325426e-07, "logits/chosen": -2.774287462234497, "logits/rejected": -2.611701250076294, "logps/chosen": -592.0068359375, "logps/rejected": -453.9798889160156, "loss": 0.6752, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0031471252441406, "rewards/margins": 0.6041911840438843, "rewards/rejected": -2.6073384284973145, "step": 12820 }, { "epoch": 1.66, "learning_rate": 2.488285359089605e-07, "logits/chosen": -2.7329814434051514, "logits/rejected": -2.6699531078338623, "logps/chosen": -546.6734619140625, "logps/rejected": -444.79241943359375, "loss": 0.6004, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1036906242370605, "rewards/margins": 0.8937233090400696, "rewards/rejected": -2.9974138736724854, "step": 12830 }, { "epoch": 1.66, "learning_rate": 2.485894616046667e-07, "logits/chosen": -2.775211811065674, "logits/rejected": -2.4701457023620605, "logps/chosen": -686.0325927734375, "logps/rejected": -384.564697265625, "loss": 0.6993, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.218986749649048, "rewards/margins": 0.5561274886131287, "rewards/rejected": -2.7751142978668213, "step": 12840 }, { "epoch": 1.66, "learning_rate": 2.4835038730037297e-07, "logits/chosen": -2.643749713897705, "logits/rejected": -2.5661251544952393, "logps/chosen": -594.7081298828125, "logps/rejected": -500.13092041015625, "loss": 0.5926, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0482964515686035, "rewards/margins": 0.8614327311515808, "rewards/rejected": -2.909728765487671, "step": 12850 }, { "epoch": 1.66, "learning_rate": 2.481113129960792e-07, "logits/chosen": -2.7485244274139404, "logits/rejected": -2.5680630207061768, "logps/chosen": -643.3986206054688, "logps/rejected": -449.6837463378906, "loss": 0.5892, "rewards/accuracies": 0.6875, "rewards/chosen": -2.21763277053833, "rewards/margins": 0.9364809989929199, "rewards/rejected": -3.15411376953125, "step": 12860 }, { "epoch": 1.66, "learning_rate": 2.478722386917854e-07, "logits/chosen": -2.5693256855010986, "logits/rejected": -2.540663957595825, "logps/chosen": -509.9010314941406, "logps/rejected": -456.16278076171875, "loss": 0.6012, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1444997787475586, "rewards/margins": 0.8640500903129578, "rewards/rejected": -3.0085501670837402, "step": 12870 }, { "epoch": 1.66, "learning_rate": 2.476331643874916e-07, "logits/chosen": -2.744330883026123, "logits/rejected": -2.6088156700134277, "logps/chosen": -581.7527465820312, "logps/rejected": -417.232177734375, "loss": 0.5734, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2427124977111816, "rewards/margins": 0.709011435508728, "rewards/rejected": -2.951724052429199, "step": 12880 }, { "epoch": 1.66, "learning_rate": 2.4739409008319786e-07, "logits/chosen": -2.7407164573669434, "logits/rejected": -2.6677706241607666, "logps/chosen": -493.47119140625, "logps/rejected": -399.78863525390625, "loss": 0.6732, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.099095582962036, "rewards/margins": 0.5177613496780396, "rewards/rejected": -2.616856813430786, "step": 12890 }, { "epoch": 1.67, "learning_rate": 2.471550157789041e-07, "logits/chosen": -2.7105624675750732, "logits/rejected": -2.5474693775177, "logps/chosen": -616.82373046875, "logps/rejected": -509.306396484375, "loss": 0.6564, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.963193655014038, "rewards/margins": 0.6496317982673645, "rewards/rejected": -2.612825870513916, "step": 12900 }, { "epoch": 1.67, "learning_rate": 2.469159414746103e-07, "logits/chosen": -2.721980571746826, "logits/rejected": -2.6045384407043457, "logps/chosen": -608.2617797851562, "logps/rejected": -466.11370849609375, "loss": 0.5908, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1671409606933594, "rewards/margins": 0.8796916007995605, "rewards/rejected": -3.04683256149292, "step": 12910 }, { "epoch": 1.67, "learning_rate": 2.466768671703165e-07, "logits/chosen": -2.61198091506958, "logits/rejected": -2.5694143772125244, "logps/chosen": -468.5250549316406, "logps/rejected": -394.21240234375, "loss": 0.6222, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.117790699005127, "rewards/margins": 0.5856410264968872, "rewards/rejected": -2.7034318447113037, "step": 12920 }, { "epoch": 1.67, "learning_rate": 2.4643779286602275e-07, "logits/chosen": -2.7532317638397217, "logits/rejected": -2.6492514610290527, "logps/chosen": -525.7278442382812, "logps/rejected": -453.96441650390625, "loss": 0.5824, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2550127506256104, "rewards/margins": 0.7526522874832153, "rewards/rejected": -3.0076651573181152, "step": 12930 }, { "epoch": 1.67, "learning_rate": 2.46198718561729e-07, "logits/chosen": -2.7362446784973145, "logits/rejected": -2.573732852935791, "logps/chosen": -535.3280029296875, "logps/rejected": -427.3916015625, "loss": 0.6915, "rewards/accuracies": 0.625, "rewards/chosen": -2.39396333694458, "rewards/margins": 0.6554819345474243, "rewards/rejected": -3.0494449138641357, "step": 12940 }, { "epoch": 1.67, "learning_rate": 2.459596442574352e-07, "logits/chosen": -2.8292860984802246, "logits/rejected": -2.5949769020080566, "logps/chosen": -705.340087890625, "logps/rejected": -455.25506591796875, "loss": 0.6206, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.406667709350586, "rewards/margins": 0.6894325017929077, "rewards/rejected": -3.096100330352783, "step": 12950 }, { "epoch": 1.67, "learning_rate": 2.457205699531414e-07, "logits/chosen": -2.7195868492126465, "logits/rejected": -2.630826950073242, "logps/chosen": -601.6151123046875, "logps/rejected": -483.8651428222656, "loss": 0.684, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.3945891857147217, "rewards/margins": 0.4696744382381439, "rewards/rejected": -2.8642640113830566, "step": 12960 }, { "epoch": 1.67, "learning_rate": 2.4548149564884764e-07, "logits/chosen": -2.574599027633667, "logits/rejected": -2.505826473236084, "logps/chosen": -501.8545837402344, "logps/rejected": -417.154052734375, "loss": 0.5988, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.0103583335876465, "rewards/margins": 0.7098949551582336, "rewards/rejected": -2.7202532291412354, "step": 12970 }, { "epoch": 1.68, "learning_rate": 2.452424213445539e-07, "logits/chosen": -2.6220269203186035, "logits/rejected": -2.5069034099578857, "logps/chosen": -584.9295043945312, "logps/rejected": -420.13653564453125, "loss": 0.6447, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.189301013946533, "rewards/margins": 0.5566642880439758, "rewards/rejected": -2.7459654808044434, "step": 12980 }, { "epoch": 1.68, "learning_rate": 2.450033470402601e-07, "logits/chosen": -2.7034378051757812, "logits/rejected": -2.6338999271392822, "logps/chosen": -620.224365234375, "logps/rejected": -468.98291015625, "loss": 0.6119, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.349459648132324, "rewards/margins": 0.7583855986595154, "rewards/rejected": -3.1078450679779053, "step": 12990 }, { "epoch": 1.68, "learning_rate": 2.4476427273596635e-07, "logits/chosen": -2.814535617828369, "logits/rejected": -2.664177417755127, "logps/chosen": -546.1422729492188, "logps/rejected": -389.76043701171875, "loss": 0.6388, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.3594861030578613, "rewards/margins": 0.627895176410675, "rewards/rejected": -2.9873807430267334, "step": 13000 }, { "epoch": 1.68, "eval_logits/chosen": -3.0471715927124023, "eval_logits/rejected": -2.983360767364502, "eval_logps/chosen": -541.1925048828125, "eval_logps/rejected": -421.14447021484375, "eval_loss": 0.6220831274986267, "eval_rewards/accuracies": 0.6735000014305115, "eval_rewards/chosen": -0.9372793436050415, "eval_rewards/margins": 1.084301471710205, "eval_rewards/rejected": -2.021580934524536, "eval_runtime": 279.3683, "eval_samples_per_second": 7.159, "eval_steps_per_second": 3.58, "step": 13000 }, { "epoch": 1.68, "learning_rate": 2.4452519843167253e-07, "logits/chosen": -2.843116521835327, "logits/rejected": -2.731076717376709, "logps/chosen": -533.9647216796875, "logps/rejected": -464.26763916015625, "loss": 0.5592, "rewards/accuracies": 0.75, "rewards/chosen": -1.981142282485962, "rewards/margins": 0.9119982719421387, "rewards/rejected": -2.8931400775909424, "step": 13010 }, { "epoch": 1.68, "learning_rate": 2.4428612412737877e-07, "logits/chosen": -2.743621587753296, "logits/rejected": -2.7397077083587646, "logps/chosen": -514.5599365234375, "logps/rejected": -464.5299377441406, "loss": 0.5992, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1455416679382324, "rewards/margins": 0.7247432470321655, "rewards/rejected": -2.8702852725982666, "step": 13020 }, { "epoch": 1.68, "learning_rate": 2.44047049823085e-07, "logits/chosen": -2.727898120880127, "logits/rejected": -2.59954833984375, "logps/chosen": -504.9842834472656, "logps/rejected": -389.328857421875, "loss": 0.6611, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0127878189086914, "rewards/margins": 0.6038355827331543, "rewards/rejected": -2.6166234016418457, "step": 13030 }, { "epoch": 1.68, "learning_rate": 2.4380797551879124e-07, "logits/chosen": -2.679253101348877, "logits/rejected": -2.6497573852539062, "logps/chosen": -562.8079833984375, "logps/rejected": -507.3871154785156, "loss": 0.722, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.2991585731506348, "rewards/margins": 0.4847885072231293, "rewards/rejected": -2.783946990966797, "step": 13040 }, { "epoch": 1.68, "learning_rate": 2.435689012144975e-07, "logits/chosen": -2.7283120155334473, "logits/rejected": -2.561873197555542, "logps/chosen": -632.6639404296875, "logps/rejected": -502.22900390625, "loss": 0.6807, "rewards/accuracies": 0.6875, "rewards/chosen": -2.23158597946167, "rewards/margins": 0.6917856931686401, "rewards/rejected": -2.9233717918395996, "step": 13050 }, { "epoch": 1.69, "learning_rate": 2.4332982691020366e-07, "logits/chosen": -2.7502338886260986, "logits/rejected": -2.6497514247894287, "logps/chosen": -532.8193359375, "logps/rejected": -423.283203125, "loss": 0.5237, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.1568961143493652, "rewards/margins": 0.9594618082046509, "rewards/rejected": -3.1163575649261475, "step": 13060 }, { "epoch": 1.69, "learning_rate": 2.430907526059099e-07, "logits/chosen": -2.6896800994873047, "logits/rejected": -2.549112319946289, "logps/chosen": -584.4795532226562, "logps/rejected": -417.72186279296875, "loss": 0.484, "rewards/accuracies": 0.75, "rewards/chosen": -2.026007890701294, "rewards/margins": 0.9636082649230957, "rewards/rejected": -2.9896161556243896, "step": 13070 }, { "epoch": 1.69, "learning_rate": 2.4285167830161613e-07, "logits/chosen": -2.7745792865753174, "logits/rejected": -2.6359264850616455, "logps/chosen": -597.7738647460938, "logps/rejected": -436.399658203125, "loss": 0.6335, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.110456943511963, "rewards/margins": 0.7958669066429138, "rewards/rejected": -2.9063236713409424, "step": 13080 }, { "epoch": 1.69, "learning_rate": 2.4261260399732237e-07, "logits/chosen": -2.6012024879455566, "logits/rejected": -2.5517616271972656, "logps/chosen": -543.3233642578125, "logps/rejected": -493.2937927246094, "loss": 0.6129, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9512264728546143, "rewards/margins": 0.7856655716896057, "rewards/rejected": -2.736891984939575, "step": 13090 }, { "epoch": 1.69, "learning_rate": 2.4237352969302855e-07, "logits/chosen": -2.7756009101867676, "logits/rejected": -2.569042921066284, "logps/chosen": -537.9466552734375, "logps/rejected": -386.5289001464844, "loss": 0.5542, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.381197452545166, "rewards/margins": 0.7127641439437866, "rewards/rejected": -3.093961715698242, "step": 13100 }, { "epoch": 1.69, "learning_rate": 2.421344553887348e-07, "logits/chosen": -2.734475612640381, "logits/rejected": -2.5291848182678223, "logps/chosen": -596.4717407226562, "logps/rejected": -402.29583740234375, "loss": 0.664, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.170060873031616, "rewards/margins": 0.5305428504943848, "rewards/rejected": -2.700603723526001, "step": 13110 }, { "epoch": 1.69, "learning_rate": 2.41895381084441e-07, "logits/chosen": -2.7003941535949707, "logits/rejected": -2.556161642074585, "logps/chosen": -516.8034057617188, "logps/rejected": -369.73858642578125, "loss": 0.6717, "rewards/accuracies": 0.6875, "rewards/chosen": -2.4045448303222656, "rewards/margins": 0.6214269399642944, "rewards/rejected": -3.0259718894958496, "step": 13120 }, { "epoch": 1.7, "learning_rate": 2.4165630678014726e-07, "logits/chosen": -2.7716240882873535, "logits/rejected": -2.626709222793579, "logps/chosen": -577.8142700195312, "logps/rejected": -428.731689453125, "loss": 0.6144, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.489920139312744, "rewards/margins": 0.6478891372680664, "rewards/rejected": -3.1378092765808105, "step": 13130 }, { "epoch": 1.7, "learning_rate": 2.414172324758535e-07, "logits/chosen": -2.705998182296753, "logits/rejected": -2.6658458709716797, "logps/chosen": -519.4610595703125, "logps/rejected": -511.55206298828125, "loss": 0.6741, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.3691277503967285, "rewards/margins": 0.5616040229797363, "rewards/rejected": -2.930731773376465, "step": 13140 }, { "epoch": 1.7, "learning_rate": 2.4117815817155973e-07, "logits/chosen": -2.6685173511505127, "logits/rejected": -2.5856480598449707, "logps/chosen": -474.2044372558594, "logps/rejected": -385.66949462890625, "loss": 0.5859, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9904807806015015, "rewards/margins": 0.7908818125724792, "rewards/rejected": -2.781362295150757, "step": 13150 }, { "epoch": 1.7, "learning_rate": 2.4093908386726596e-07, "logits/chosen": -2.5747313499450684, "logits/rejected": -2.555793285369873, "logps/chosen": -544.0556640625, "logps/rejected": -436.11785888671875, "loss": 0.6168, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.3406262397766113, "rewards/margins": 0.6785711050033569, "rewards/rejected": -3.0191972255706787, "step": 13160 }, { "epoch": 1.7, "learning_rate": 2.4070000956297215e-07, "logits/chosen": -2.736794948577881, "logits/rejected": -2.6295905113220215, "logps/chosen": -527.4315185546875, "logps/rejected": -381.6240234375, "loss": 0.5825, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1382105350494385, "rewards/margins": 0.6214796900749207, "rewards/rejected": -2.759690046310425, "step": 13170 }, { "epoch": 1.7, "learning_rate": 2.404609352586784e-07, "logits/chosen": -2.7135119438171387, "logits/rejected": -2.5713953971862793, "logps/chosen": -566.9525756835938, "logps/rejected": -405.99481201171875, "loss": 0.6148, "rewards/accuracies": 0.6875, "rewards/chosen": -2.197317600250244, "rewards/margins": 0.8039563894271851, "rewards/rejected": -3.0012741088867188, "step": 13180 }, { "epoch": 1.7, "learning_rate": 2.402218609543846e-07, "logits/chosen": -2.683917284011841, "logits/rejected": -2.600151300430298, "logps/chosen": -569.1538696289062, "logps/rejected": -474.6814880371094, "loss": 0.6139, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.255431652069092, "rewards/margins": 0.7447786331176758, "rewards/rejected": -3.0002105236053467, "step": 13190 }, { "epoch": 1.7, "learning_rate": 2.3998278665009086e-07, "logits/chosen": -2.697774648666382, "logits/rejected": -2.6158947944641113, "logps/chosen": -617.1314697265625, "logps/rejected": -496.3955993652344, "loss": 0.6224, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.039428234100342, "rewards/margins": 0.8512946367263794, "rewards/rejected": -2.8907229900360107, "step": 13200 }, { "epoch": 1.71, "learning_rate": 2.397437123457971e-07, "logits/chosen": -2.674844741821289, "logits/rejected": -2.5379369258880615, "logps/chosen": -532.41357421875, "logps/rejected": -378.4986267089844, "loss": 0.5617, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9153226613998413, "rewards/margins": 0.8890944719314575, "rewards/rejected": -2.804417371749878, "step": 13210 }, { "epoch": 1.71, "learning_rate": 2.395046380415033e-07, "logits/chosen": -2.6186447143554688, "logits/rejected": -2.6513314247131348, "logps/chosen": -480.7500915527344, "logps/rejected": -463.31475830078125, "loss": 0.6021, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.0490081310272217, "rewards/margins": 0.7150495648384094, "rewards/rejected": -2.7640576362609863, "step": 13220 }, { "epoch": 1.71, "learning_rate": 2.392655637372095e-07, "logits/chosen": -2.560105800628662, "logits/rejected": -2.4664204120635986, "logps/chosen": -552.4951782226562, "logps/rejected": -475.56927490234375, "loss": 0.4666, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.8890094757080078, "rewards/margins": 1.1056289672851562, "rewards/rejected": -2.994638442993164, "step": 13230 }, { "epoch": 1.71, "learning_rate": 2.3902648943291575e-07, "logits/chosen": -2.7276668548583984, "logits/rejected": -2.5828728675842285, "logps/chosen": -523.1748046875, "logps/rejected": -440.8168029785156, "loss": 0.6926, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1104824542999268, "rewards/margins": 0.4573639929294586, "rewards/rejected": -2.5678462982177734, "step": 13240 }, { "epoch": 1.71, "learning_rate": 2.38787415128622e-07, "logits/chosen": -2.720494031906128, "logits/rejected": -2.5133118629455566, "logps/chosen": -564.2581787109375, "logps/rejected": -407.7645263671875, "loss": 0.6751, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -2.4274096488952637, "rewards/margins": 0.42458242177963257, "rewards/rejected": -2.851992130279541, "step": 13250 }, { "epoch": 1.71, "learning_rate": 2.385483408243282e-07, "logits/chosen": -2.609114408493042, "logits/rejected": -2.5369973182678223, "logps/chosen": -502.57183837890625, "logps/rejected": -427.18218994140625, "loss": 0.6448, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1333978176116943, "rewards/margins": 0.6157571077346802, "rewards/rejected": -2.749155044555664, "step": 13260 }, { "epoch": 1.71, "learning_rate": 2.383092665200344e-07, "logits/chosen": -2.6763968467712402, "logits/rejected": -2.536553382873535, "logps/chosen": -524.9608154296875, "logps/rejected": -364.05609130859375, "loss": 0.6681, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.245157480239868, "rewards/margins": 0.6559170484542847, "rewards/rejected": -2.9010744094848633, "step": 13270 }, { "epoch": 1.71, "learning_rate": 2.3807019221574066e-07, "logits/chosen": -2.6831159591674805, "logits/rejected": -2.5590786933898926, "logps/chosen": -557.473876953125, "logps/rejected": -395.73150634765625, "loss": 0.5716, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.031392812728882, "rewards/margins": 0.8719221949577332, "rewards/rejected": -2.9033148288726807, "step": 13280 }, { "epoch": 1.72, "learning_rate": 2.3783111791144687e-07, "logits/chosen": -2.770077705383301, "logits/rejected": -2.621605634689331, "logps/chosen": -568.882568359375, "logps/rejected": -419.2613830566406, "loss": 0.6499, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1732914447784424, "rewards/margins": 0.7103427648544312, "rewards/rejected": -2.883634328842163, "step": 13290 }, { "epoch": 1.72, "learning_rate": 2.375920436071531e-07, "logits/chosen": -2.755906581878662, "logits/rejected": -2.71860671043396, "logps/chosen": -483.71722412109375, "logps/rejected": -409.4564514160156, "loss": 0.6306, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.063603162765503, "rewards/margins": 0.47505220770835876, "rewards/rejected": -2.5386555194854736, "step": 13300 }, { "epoch": 1.72, "learning_rate": 2.3735296930285932e-07, "logits/chosen": -2.612428665161133, "logits/rejected": -2.467618942260742, "logps/chosen": -528.1998291015625, "logps/rejected": -421.1888122558594, "loss": 0.6074, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1029322147369385, "rewards/margins": 0.7280167937278748, "rewards/rejected": -2.830949068069458, "step": 13310 }, { "epoch": 1.72, "learning_rate": 2.3711389499856555e-07, "logits/chosen": -2.7417211532592773, "logits/rejected": -2.5751681327819824, "logps/chosen": -677.2390747070312, "logps/rejected": -517.8243408203125, "loss": 0.666, "rewards/accuracies": 0.625, "rewards/chosen": -2.214176893234253, "rewards/margins": 0.6083287000656128, "rewards/rejected": -2.822505474090576, "step": 13320 }, { "epoch": 1.72, "learning_rate": 2.368748206942718e-07, "logits/chosen": -2.7710797786712646, "logits/rejected": -2.660679340362549, "logps/chosen": -597.0963134765625, "logps/rejected": -472.84326171875, "loss": 0.5696, "rewards/accuracies": 0.75, "rewards/chosen": -2.0586934089660645, "rewards/margins": 0.8770779371261597, "rewards/rejected": -2.9357714653015137, "step": 13330 }, { "epoch": 1.72, "learning_rate": 2.36635746389978e-07, "logits/chosen": -2.854801893234253, "logits/rejected": -2.6773927211761475, "logps/chosen": -558.3410034179688, "logps/rejected": -418.6270446777344, "loss": 0.6654, "rewards/accuracies": 0.625, "rewards/chosen": -2.2379488945007324, "rewards/margins": 0.5865924954414368, "rewards/rejected": -2.8245418071746826, "step": 13340 }, { "epoch": 1.72, "learning_rate": 2.3639667208568424e-07, "logits/chosen": -2.6802313327789307, "logits/rejected": -2.558772563934326, "logps/chosen": -601.0579833984375, "logps/rejected": -477.13250732421875, "loss": 0.616, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0138840675354004, "rewards/margins": 0.7862241864204407, "rewards/rejected": -2.8001084327697754, "step": 13350 }, { "epoch": 1.72, "learning_rate": 2.3615759778139044e-07, "logits/chosen": -2.7709217071533203, "logits/rejected": -2.6902294158935547, "logps/chosen": -530.9627685546875, "logps/rejected": -484.16796875, "loss": 0.5312, "rewards/accuracies": 0.75, "rewards/chosen": -1.7425416707992554, "rewards/margins": 1.0803492069244385, "rewards/rejected": -2.8228909969329834, "step": 13360 }, { "epoch": 1.73, "learning_rate": 2.3591852347709668e-07, "logits/chosen": -2.7074718475341797, "logits/rejected": -2.660646915435791, "logps/chosen": -441.0941467285156, "logps/rejected": -357.1502380371094, "loss": 0.6398, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.9632179737091064, "rewards/margins": 0.5784570574760437, "rewards/rejected": -2.541675090789795, "step": 13370 }, { "epoch": 1.73, "learning_rate": 2.356794491728029e-07, "logits/chosen": -2.780268669128418, "logits/rejected": -2.621293306350708, "logps/chosen": -610.0377807617188, "logps/rejected": -426.15142822265625, "loss": 0.6258, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0230565071105957, "rewards/margins": 0.705403208732605, "rewards/rejected": -2.728459596633911, "step": 13380 }, { "epoch": 1.73, "learning_rate": 2.3544037486850913e-07, "logits/chosen": -2.7257468700408936, "logits/rejected": -2.558422327041626, "logps/chosen": -620.8070678710938, "logps/rejected": -476.3687438964844, "loss": 0.6716, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.10471773147583, "rewards/margins": 0.6087031364440918, "rewards/rejected": -2.7134203910827637, "step": 13390 }, { "epoch": 1.73, "learning_rate": 2.3520130056421536e-07, "logits/chosen": -2.753030300140381, "logits/rejected": -2.6552090644836426, "logps/chosen": -465.59423828125, "logps/rejected": -356.2554931640625, "loss": 0.5774, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9981321096420288, "rewards/margins": 0.6535446047782898, "rewards/rejected": -2.651676893234253, "step": 13400 }, { "epoch": 1.73, "learning_rate": 2.3496222625992157e-07, "logits/chosen": -2.8093371391296387, "logits/rejected": -2.6007907390594482, "logps/chosen": -556.6286010742188, "logps/rejected": -369.90692138671875, "loss": 0.5175, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9461190700531006, "rewards/margins": 1.0463508367538452, "rewards/rejected": -2.9924702644348145, "step": 13410 }, { "epoch": 1.73, "learning_rate": 2.347231519556278e-07, "logits/chosen": -2.662209987640381, "logits/rejected": -2.5377607345581055, "logps/chosen": -534.3958129882812, "logps/rejected": -419.62054443359375, "loss": 0.6262, "rewards/accuracies": 0.75, "rewards/chosen": -2.0232625007629395, "rewards/margins": 0.8335606455802917, "rewards/rejected": -2.856823205947876, "step": 13420 }, { "epoch": 1.73, "learning_rate": 2.3448407765133402e-07, "logits/chosen": -2.650001049041748, "logits/rejected": -2.557285785675049, "logps/chosen": -505.84307861328125, "logps/rejected": -403.77130126953125, "loss": 0.5995, "rewards/accuracies": 0.75, "rewards/chosen": -2.116450548171997, "rewards/margins": 0.814712643623352, "rewards/rejected": -2.9311633110046387, "step": 13430 }, { "epoch": 1.74, "learning_rate": 2.3424500334704025e-07, "logits/chosen": -2.593249559402466, "logits/rejected": -2.4228835105895996, "logps/chosen": -522.0611572265625, "logps/rejected": -367.5707092285156, "loss": 0.6265, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.177211046218872, "rewards/margins": 0.6781955361366272, "rewards/rejected": -2.8554067611694336, "step": 13440 }, { "epoch": 1.74, "learning_rate": 2.3400592904274646e-07, "logits/chosen": -2.822549343109131, "logits/rejected": -2.665281295776367, "logps/chosen": -599.9628295898438, "logps/rejected": -486.5040588378906, "loss": 0.5844, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0354437828063965, "rewards/margins": 0.9719996452331543, "rewards/rejected": -3.00744366645813, "step": 13450 }, { "epoch": 1.74, "learning_rate": 2.337668547384527e-07, "logits/chosen": -2.64111328125, "logits/rejected": -2.5181241035461426, "logps/chosen": -592.3386840820312, "logps/rejected": -487.31268310546875, "loss": 0.6915, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.38073468208313, "rewards/margins": 0.5294649600982666, "rewards/rejected": -2.9101996421813965, "step": 13460 }, { "epoch": 1.74, "learning_rate": 2.3352778043415893e-07, "logits/chosen": -2.6603612899780273, "logits/rejected": -2.502009153366089, "logps/chosen": -627.6202392578125, "logps/rejected": -411.4161071777344, "loss": 0.6369, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0422699451446533, "rewards/margins": 0.8230811953544617, "rewards/rejected": -2.8653512001037598, "step": 13470 }, { "epoch": 1.74, "learning_rate": 2.3328870612986514e-07, "logits/chosen": -2.6140449047088623, "logits/rejected": -2.5241384506225586, "logps/chosen": -584.3873291015625, "logps/rejected": -423.65826416015625, "loss": 0.6096, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.2624077796936035, "rewards/margins": 0.6935142874717712, "rewards/rejected": -2.9559218883514404, "step": 13480 }, { "epoch": 1.74, "learning_rate": 2.3304963182557138e-07, "logits/chosen": -2.656893730163574, "logits/rejected": -2.5909717082977295, "logps/chosen": -525.3053588867188, "logps/rejected": -441.96063232421875, "loss": 0.7252, "rewards/accuracies": 0.625, "rewards/chosen": -2.486201524734497, "rewards/margins": 0.34265363216400146, "rewards/rejected": -2.828855276107788, "step": 13490 }, { "epoch": 1.74, "learning_rate": 2.328105575212776e-07, "logits/chosen": -2.8660545349121094, "logits/rejected": -2.701751947402954, "logps/chosen": -514.0311279296875, "logps/rejected": -390.3356628417969, "loss": 0.7197, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.2045693397521973, "rewards/margins": 0.5065322518348694, "rewards/rejected": -2.711101770401001, "step": 13500 }, { "epoch": 1.74, "learning_rate": 2.3257148321698382e-07, "logits/chosen": -2.694340944290161, "logits/rejected": -2.5405988693237305, "logps/chosen": -659.7880859375, "logps/rejected": -556.9647216796875, "loss": 0.6069, "rewards/accuracies": 0.75, "rewards/chosen": -1.9172868728637695, "rewards/margins": 0.8424975275993347, "rewards/rejected": -2.759784460067749, "step": 13510 }, { "epoch": 1.75, "learning_rate": 2.3233240891269003e-07, "logits/chosen": -2.772127628326416, "logits/rejected": -2.727285623550415, "logps/chosen": -484.71429443359375, "logps/rejected": -395.48529052734375, "loss": 0.5972, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.003481388092041, "rewards/margins": 0.6411157250404358, "rewards/rejected": -2.644597053527832, "step": 13520 }, { "epoch": 1.75, "learning_rate": 2.3209333460839627e-07, "logits/chosen": -2.649080514907837, "logits/rejected": -2.4860916137695312, "logps/chosen": -604.0956420898438, "logps/rejected": -450.9315490722656, "loss": 0.5757, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.337480068206787, "rewards/margins": 0.6773591637611389, "rewards/rejected": -3.0148394107818604, "step": 13530 }, { "epoch": 1.75, "learning_rate": 2.318542603041025e-07, "logits/chosen": -2.735496759414673, "logits/rejected": -2.670186758041382, "logps/chosen": -527.8984375, "logps/rejected": -447.6431579589844, "loss": 0.5854, "rewards/accuracies": 0.625, "rewards/chosen": -2.1079235076904297, "rewards/margins": 0.8107849359512329, "rewards/rejected": -2.918708324432373, "step": 13540 }, { "epoch": 1.75, "learning_rate": 2.3161518599980872e-07, "logits/chosen": -2.7088027000427246, "logits/rejected": -2.6062216758728027, "logps/chosen": -685.8440551757812, "logps/rejected": -547.38525390625, "loss": 0.6207, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.934973955154419, "rewards/margins": 0.7810277938842773, "rewards/rejected": -2.7160017490386963, "step": 13550 }, { "epoch": 1.75, "learning_rate": 2.3137611169551495e-07, "logits/chosen": -2.692686080932617, "logits/rejected": -2.634202480316162, "logps/chosen": -538.7937622070312, "logps/rejected": -461.80029296875, "loss": 0.579, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.021996021270752, "rewards/margins": 0.870539665222168, "rewards/rejected": -2.89253568649292, "step": 13560 }, { "epoch": 1.75, "learning_rate": 2.3113703739122116e-07, "logits/chosen": -2.6812095642089844, "logits/rejected": -2.5592093467712402, "logps/chosen": -501.3243103027344, "logps/rejected": -392.25244140625, "loss": 0.5347, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9431272745132446, "rewards/margins": 0.8982846140861511, "rewards/rejected": -2.84141206741333, "step": 13570 }, { "epoch": 1.75, "learning_rate": 2.3089796308692742e-07, "logits/chosen": -2.6702752113342285, "logits/rejected": -2.6128287315368652, "logps/chosen": -575.4544067382812, "logps/rejected": -473.810791015625, "loss": 0.5889, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1787867546081543, "rewards/margins": 0.9443605542182922, "rewards/rejected": -3.123147487640381, "step": 13580 }, { "epoch": 1.75, "learning_rate": 2.3065888878263363e-07, "logits/chosen": -2.6925206184387207, "logits/rejected": -2.6726508140563965, "logps/chosen": -502.36126708984375, "logps/rejected": -423.2294006347656, "loss": 0.5227, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.782758116722107, "rewards/margins": 1.0309271812438965, "rewards/rejected": -2.8136849403381348, "step": 13590 }, { "epoch": 1.76, "learning_rate": 2.3041981447833987e-07, "logits/chosen": -2.722705364227295, "logits/rejected": -2.718538999557495, "logps/chosen": -523.2410278320312, "logps/rejected": -440.031982421875, "loss": 0.6901, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.27864670753479, "rewards/margins": 0.6121454238891602, "rewards/rejected": -2.89079213142395, "step": 13600 }, { "epoch": 1.76, "learning_rate": 2.301807401740461e-07, "logits/chosen": -2.6665573120117188, "logits/rejected": -2.6030335426330566, "logps/chosen": -520.740234375, "logps/rejected": -417.841796875, "loss": 0.8026, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.2979791164398193, "rewards/margins": 0.41699084639549255, "rewards/rejected": -2.714970111846924, "step": 13610 }, { "epoch": 1.76, "learning_rate": 2.2994166586975231e-07, "logits/chosen": -2.5759482383728027, "logits/rejected": -2.5103652477264404, "logps/chosen": -561.035400390625, "logps/rejected": -460.7577209472656, "loss": 0.6611, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2605929374694824, "rewards/margins": 0.7217822074890137, "rewards/rejected": -2.982375383377075, "step": 13620 }, { "epoch": 1.76, "learning_rate": 2.2970259156545855e-07, "logits/chosen": -2.6690263748168945, "logits/rejected": -2.5591328144073486, "logps/chosen": -527.6405029296875, "logps/rejected": -447.51885986328125, "loss": 0.633, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.1828227043151855, "rewards/margins": 0.650978147983551, "rewards/rejected": -2.833801031112671, "step": 13630 }, { "epoch": 1.76, "learning_rate": 2.2946351726116476e-07, "logits/chosen": -2.6750128269195557, "logits/rejected": -2.5879032611846924, "logps/chosen": -458.74078369140625, "logps/rejected": -392.1932067871094, "loss": 0.5792, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.083625316619873, "rewards/margins": 0.7104710340499878, "rewards/rejected": -2.794095993041992, "step": 13640 }, { "epoch": 1.76, "learning_rate": 2.29224442956871e-07, "logits/chosen": -2.65539288520813, "logits/rejected": -2.534996747970581, "logps/chosen": -588.1304931640625, "logps/rejected": -431.87335205078125, "loss": 0.5647, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1976430416107178, "rewards/margins": 0.7976224422454834, "rewards/rejected": -2.9952657222747803, "step": 13650 }, { "epoch": 1.76, "learning_rate": 2.289853686525772e-07, "logits/chosen": -2.7618775367736816, "logits/rejected": -2.62846040725708, "logps/chosen": -573.1077270507812, "logps/rejected": -402.2069091796875, "loss": 0.5956, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.002291679382324, "rewards/margins": 0.7376019954681396, "rewards/rejected": -2.739893674850464, "step": 13660 }, { "epoch": 1.76, "learning_rate": 2.2874629434828344e-07, "logits/chosen": -2.4656004905700684, "logits/rejected": -2.5158379077911377, "logps/chosen": -444.0308532714844, "logps/rejected": -466.7021484375, "loss": 0.633, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.932215929031372, "rewards/margins": 0.6547106504440308, "rewards/rejected": -2.586926221847534, "step": 13670 }, { "epoch": 1.77, "learning_rate": 2.2850722004398968e-07, "logits/chosen": -2.941887140274048, "logits/rejected": -2.7695040702819824, "logps/chosen": -615.2619018554688, "logps/rejected": -481.5726623535156, "loss": 0.6421, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.1958155632019043, "rewards/margins": 0.6146937608718872, "rewards/rejected": -2.810509204864502, "step": 13680 }, { "epoch": 1.77, "learning_rate": 2.2826814573969589e-07, "logits/chosen": -2.790844202041626, "logits/rejected": -2.5494484901428223, "logps/chosen": -665.9395141601562, "logps/rejected": -462.80828857421875, "loss": 0.5539, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.3472161293029785, "rewards/margins": 0.8353204727172852, "rewards/rejected": -3.1825366020202637, "step": 13690 }, { "epoch": 1.77, "learning_rate": 2.2802907143540212e-07, "logits/chosen": -2.6970438957214355, "logits/rejected": -2.591555118560791, "logps/chosen": -484.62103271484375, "logps/rejected": -365.4180603027344, "loss": 0.5837, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1503567695617676, "rewards/margins": 0.6769029498100281, "rewards/rejected": -2.8272600173950195, "step": 13700 }, { "epoch": 1.77, "learning_rate": 2.2778999713110833e-07, "logits/chosen": -2.787813186645508, "logits/rejected": -2.643146514892578, "logps/chosen": -536.9826049804688, "logps/rejected": -411.76953125, "loss": 0.6134, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.139782428741455, "rewards/margins": 0.8794851303100586, "rewards/rejected": -3.0192673206329346, "step": 13710 }, { "epoch": 1.77, "learning_rate": 2.2755092282681457e-07, "logits/chosen": -2.621987819671631, "logits/rejected": -2.485285997390747, "logps/chosen": -557.8696899414062, "logps/rejected": -406.1325988769531, "loss": 0.5358, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.8926910161972046, "rewards/margins": 0.7815249562263489, "rewards/rejected": -2.674215793609619, "step": 13720 }, { "epoch": 1.77, "learning_rate": 2.2731184852252078e-07, "logits/chosen": -2.7243895530700684, "logits/rejected": -2.665116786956787, "logps/chosen": -526.8226928710938, "logps/rejected": -477.2970275878906, "loss": 0.6546, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1130545139312744, "rewards/margins": 0.7062844038009644, "rewards/rejected": -2.81933856010437, "step": 13730 }, { "epoch": 1.77, "learning_rate": 2.27072774218227e-07, "logits/chosen": -2.725771427154541, "logits/rejected": -2.611114025115967, "logps/chosen": -515.8469848632812, "logps/rejected": -414.51373291015625, "loss": 0.5673, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9859082698822021, "rewards/margins": 0.7471264004707336, "rewards/rejected": -2.73303484916687, "step": 13740 }, { "epoch": 1.78, "learning_rate": 2.2683369991393325e-07, "logits/chosen": -2.8726720809936523, "logits/rejected": -2.660761594772339, "logps/chosen": -592.5925903320312, "logps/rejected": -399.27215576171875, "loss": 0.5263, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.065462827682495, "rewards/margins": 1.0319831371307373, "rewards/rejected": -3.0974457263946533, "step": 13750 }, { "epoch": 1.78, "learning_rate": 2.2659462560963946e-07, "logits/chosen": -2.721461534500122, "logits/rejected": -2.5668015480041504, "logps/chosen": -494.5966796875, "logps/rejected": -405.0016174316406, "loss": 0.5599, "rewards/accuracies": 0.6875, "rewards/chosen": -2.044912815093994, "rewards/margins": 0.738488495349884, "rewards/rejected": -2.7834010124206543, "step": 13760 }, { "epoch": 1.78, "learning_rate": 2.263555513053457e-07, "logits/chosen": -2.662806272506714, "logits/rejected": -2.543026924133301, "logps/chosen": -538.8604125976562, "logps/rejected": -428.16497802734375, "loss": 0.5181, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.038835048675537, "rewards/margins": 0.9582756757736206, "rewards/rejected": -2.9971108436584473, "step": 13770 }, { "epoch": 1.78, "learning_rate": 2.261164770010519e-07, "logits/chosen": -2.650080680847168, "logits/rejected": -2.6314187049865723, "logps/chosen": -562.9757080078125, "logps/rejected": -516.5299072265625, "loss": 0.625, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.2231967449188232, "rewards/margins": 0.7139509916305542, "rewards/rejected": -2.937147855758667, "step": 13780 }, { "epoch": 1.78, "learning_rate": 2.2587740269675814e-07, "logits/chosen": -2.867398977279663, "logits/rejected": -2.6836273670196533, "logps/chosen": -578.1776123046875, "logps/rejected": -390.1368408203125, "loss": 0.5221, "rewards/accuracies": 0.75, "rewards/chosen": -1.910078763961792, "rewards/margins": 0.9894372820854187, "rewards/rejected": -2.8995163440704346, "step": 13790 }, { "epoch": 1.78, "learning_rate": 2.2563832839246435e-07, "logits/chosen": -2.7248167991638184, "logits/rejected": -2.6133041381835938, "logps/chosen": -546.4743041992188, "logps/rejected": -441.34674072265625, "loss": 0.6659, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.0313544273376465, "rewards/margins": 0.6386906504631042, "rewards/rejected": -2.6700448989868164, "step": 13800 }, { "epoch": 1.78, "learning_rate": 2.2539925408817058e-07, "logits/chosen": -2.700385570526123, "logits/rejected": -2.6011388301849365, "logps/chosen": -572.8375244140625, "logps/rejected": -444.88623046875, "loss": 0.644, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0249037742614746, "rewards/margins": 0.8586069345474243, "rewards/rejected": -2.8835108280181885, "step": 13810 }, { "epoch": 1.78, "learning_rate": 2.2516017978387682e-07, "logits/chosen": -2.776832103729248, "logits/rejected": -2.6107475757598877, "logps/chosen": -559.1094970703125, "logps/rejected": -407.52471923828125, "loss": 0.6535, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.019627094268799, "rewards/margins": 0.6927070617675781, "rewards/rejected": -2.712334156036377, "step": 13820 }, { "epoch": 1.79, "learning_rate": 2.2492110547958303e-07, "logits/chosen": -2.723763942718506, "logits/rejected": -2.574873685836792, "logps/chosen": -608.5477294921875, "logps/rejected": -432.2290954589844, "loss": 0.6166, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0969176292419434, "rewards/margins": 0.8642122149467468, "rewards/rejected": -2.9611294269561768, "step": 13830 }, { "epoch": 1.79, "learning_rate": 2.2468203117528927e-07, "logits/chosen": -2.7337687015533447, "logits/rejected": -2.6040849685668945, "logps/chosen": -566.21337890625, "logps/rejected": -419.02191162109375, "loss": 0.5803, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.165825128555298, "rewards/margins": 0.7578860521316528, "rewards/rejected": -2.923711061477661, "step": 13840 }, { "epoch": 1.79, "learning_rate": 2.2444295687099548e-07, "logits/chosen": -2.695847749710083, "logits/rejected": -2.547919988632202, "logps/chosen": -515.0695190429688, "logps/rejected": -374.8662414550781, "loss": 0.603, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1020445823669434, "rewards/margins": 0.8289209604263306, "rewards/rejected": -2.9309659004211426, "step": 13850 }, { "epoch": 1.79, "learning_rate": 2.242038825667017e-07, "logits/chosen": -2.709676742553711, "logits/rejected": -2.6470417976379395, "logps/chosen": -522.2538452148438, "logps/rejected": -409.0666198730469, "loss": 0.6429, "rewards/accuracies": 0.625, "rewards/chosen": -1.9444477558135986, "rewards/margins": 0.6803504228591919, "rewards/rejected": -2.624797821044922, "step": 13860 }, { "epoch": 1.79, "learning_rate": 2.2396480826240792e-07, "logits/chosen": -2.6634976863861084, "logits/rejected": -2.610599994659424, "logps/chosen": -436.04522705078125, "logps/rejected": -365.00048828125, "loss": 0.6212, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1294429302215576, "rewards/margins": 0.5153406858444214, "rewards/rejected": -2.6447839736938477, "step": 13870 }, { "epoch": 1.79, "learning_rate": 2.2372573395811418e-07, "logits/chosen": -2.619751453399658, "logits/rejected": -2.5348925590515137, "logps/chosen": -493.9488830566406, "logps/rejected": -379.12353515625, "loss": 0.6554, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0421926975250244, "rewards/margins": 0.6898266673088074, "rewards/rejected": -2.7320191860198975, "step": 13880 }, { "epoch": 1.79, "learning_rate": 2.2348665965382042e-07, "logits/chosen": -2.7443668842315674, "logits/rejected": -2.616121292114258, "logps/chosen": -556.4512329101562, "logps/rejected": -417.8892517089844, "loss": 0.6043, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.189450979232788, "rewards/margins": 0.8097261190414429, "rewards/rejected": -2.9991772174835205, "step": 13890 }, { "epoch": 1.79, "learning_rate": 2.2324758534952663e-07, "logits/chosen": -2.6348977088928223, "logits/rejected": -2.5456578731536865, "logps/chosen": -519.8370361328125, "logps/rejected": -448.40069580078125, "loss": 0.5913, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.068166494369507, "rewards/margins": 0.6993528604507446, "rewards/rejected": -2.767519474029541, "step": 13900 }, { "epoch": 1.8, "learning_rate": 2.2300851104523286e-07, "logits/chosen": -2.7516729831695557, "logits/rejected": -2.631502866744995, "logps/chosen": -508.69000244140625, "logps/rejected": -428.21246337890625, "loss": 0.6716, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.2009778022766113, "rewards/margins": 0.7280235290527344, "rewards/rejected": -2.9290013313293457, "step": 13910 }, { "epoch": 1.8, "learning_rate": 2.2276943674093907e-07, "logits/chosen": -2.6644694805145264, "logits/rejected": -2.527221202850342, "logps/chosen": -535.37451171875, "logps/rejected": -378.77178955078125, "loss": 0.484, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0715365409851074, "rewards/margins": 0.9812078475952148, "rewards/rejected": -3.0527443885803223, "step": 13920 }, { "epoch": 1.8, "learning_rate": 2.225303624366453e-07, "logits/chosen": -2.6592233180999756, "logits/rejected": -2.613982677459717, "logps/chosen": -587.8624877929688, "logps/rejected": -504.18682861328125, "loss": 0.6208, "rewards/accuracies": 0.6875, "rewards/chosen": -2.366647243499756, "rewards/margins": 0.7052031755447388, "rewards/rejected": -3.071850538253784, "step": 13930 }, { "epoch": 1.8, "learning_rate": 2.2229128813235152e-07, "logits/chosen": -2.7415578365325928, "logits/rejected": -2.5916314125061035, "logps/chosen": -630.407470703125, "logps/rejected": -485.6471252441406, "loss": 0.7079, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.211286783218384, "rewards/margins": 0.5394357442855835, "rewards/rejected": -2.7507224082946777, "step": 13940 }, { "epoch": 1.8, "learning_rate": 2.2205221382805776e-07, "logits/chosen": -2.7551445960998535, "logits/rejected": -2.5724663734436035, "logps/chosen": -539.4112548828125, "logps/rejected": -385.8069763183594, "loss": 0.6715, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1931300163269043, "rewards/margins": 0.5927701592445374, "rewards/rejected": -2.785900592803955, "step": 13950 }, { "epoch": 1.8, "learning_rate": 2.21813139523764e-07, "logits/chosen": -2.7736449241638184, "logits/rejected": -2.649688720703125, "logps/chosen": -553.9362182617188, "logps/rejected": -407.94183349609375, "loss": 0.5216, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0220115184783936, "rewards/margins": 1.015019416809082, "rewards/rejected": -3.0370309352874756, "step": 13960 }, { "epoch": 1.8, "learning_rate": 2.215740652194702e-07, "logits/chosen": -2.795726776123047, "logits/rejected": -2.6375460624694824, "logps/chosen": -634.9755859375, "logps/rejected": -443.34375, "loss": 0.6088, "rewards/accuracies": 0.6875, "rewards/chosen": -1.8711719512939453, "rewards/margins": 0.8675785064697266, "rewards/rejected": -2.738750457763672, "step": 13970 }, { "epoch": 1.8, "learning_rate": 2.2133499091517644e-07, "logits/chosen": -2.7990097999572754, "logits/rejected": -2.6674885749816895, "logps/chosen": -527.5956420898438, "logps/rejected": -388.2579650878906, "loss": 0.5381, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.967037558555603, "rewards/margins": 0.8836778402328491, "rewards/rejected": -2.850715160369873, "step": 13980 }, { "epoch": 1.81, "learning_rate": 2.2109591661088265e-07, "logits/chosen": -2.590620994567871, "logits/rejected": -2.592775821685791, "logps/chosen": -596.4307861328125, "logps/rejected": -487.5042419433594, "loss": 0.6237, "rewards/accuracies": 0.6875, "rewards/chosen": -2.114586591720581, "rewards/margins": 0.7939534187316895, "rewards/rejected": -2.9085400104522705, "step": 13990 }, { "epoch": 1.81, "learning_rate": 2.2085684230658888e-07, "logits/chosen": -2.7238001823425293, "logits/rejected": -2.602356433868408, "logps/chosen": -604.1683349609375, "logps/rejected": -442.4443359375, "loss": 0.511, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9413292407989502, "rewards/margins": 0.9283748865127563, "rewards/rejected": -2.869704008102417, "step": 14000 }, { "epoch": 1.81, "eval_logits/chosen": -3.0624704360961914, "eval_logits/rejected": -3.007758617401123, "eval_logps/chosen": -540.3145141601562, "eval_logps/rejected": -420.3077392578125, "eval_loss": 0.6167316436767578, "eval_rewards/accuracies": 0.671500027179718, "eval_rewards/chosen": -0.849477231502533, "eval_rewards/margins": 1.0884329080581665, "eval_rewards/rejected": -1.9379103183746338, "eval_runtime": 279.2277, "eval_samples_per_second": 7.163, "eval_steps_per_second": 3.581, "step": 14000 }, { "epoch": 1.81, "learning_rate": 2.206177680022951e-07, "logits/chosen": -2.7006118297576904, "logits/rejected": -2.627225399017334, "logps/chosen": -553.4785766601562, "logps/rejected": -459.92010498046875, "loss": 0.6628, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1210269927978516, "rewards/margins": 0.6003091931343079, "rewards/rejected": -2.7213363647460938, "step": 14010 }, { "epoch": 1.81, "learning_rate": 2.2037869369800133e-07, "logits/chosen": -2.7567994594573975, "logits/rejected": -2.6656858921051025, "logps/chosen": -477.1485290527344, "logps/rejected": -411.446533203125, "loss": 0.6133, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9493398666381836, "rewards/margins": 0.6444752812385559, "rewards/rejected": -2.5938148498535156, "step": 14020 }, { "epoch": 1.81, "learning_rate": 2.2013961939370756e-07, "logits/chosen": -2.766019582748413, "logits/rejected": -2.6894371509552, "logps/chosen": -582.5353393554688, "logps/rejected": -507.02117919921875, "loss": 0.5569, "rewards/accuracies": 0.75, "rewards/chosen": -2.1207833290100098, "rewards/margins": 0.8069151043891907, "rewards/rejected": -2.9276986122131348, "step": 14030 }, { "epoch": 1.81, "learning_rate": 2.1990054508941377e-07, "logits/chosen": -2.7979166507720947, "logits/rejected": -2.6548781394958496, "logps/chosen": -531.4434204101562, "logps/rejected": -408.61785888671875, "loss": 0.6041, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.260209321975708, "rewards/margins": 0.6946015954017639, "rewards/rejected": -2.9548110961914062, "step": 14040 }, { "epoch": 1.81, "learning_rate": 2.1966147078512e-07, "logits/chosen": -2.733816623687744, "logits/rejected": -2.6415510177612305, "logps/chosen": -508.3985290527344, "logps/rejected": -475.5780334472656, "loss": 0.6296, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.028085708618164, "rewards/margins": 0.8428437113761902, "rewards/rejected": -2.870929718017578, "step": 14050 }, { "epoch": 1.82, "learning_rate": 2.1942239648082622e-07, "logits/chosen": -2.814343214035034, "logits/rejected": -2.723266124725342, "logps/chosen": -454.7928161621094, "logps/rejected": -343.69232177734375, "loss": 0.592, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.9459543228149414, "rewards/margins": 0.6064020395278931, "rewards/rejected": -2.552356243133545, "step": 14060 }, { "epoch": 1.82, "learning_rate": 2.1918332217653245e-07, "logits/chosen": -2.7543530464172363, "logits/rejected": -2.6227545738220215, "logps/chosen": -562.65234375, "logps/rejected": -393.61334228515625, "loss": 0.4581, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.0573344230651855, "rewards/margins": 1.1019198894500732, "rewards/rejected": -3.159254550933838, "step": 14070 }, { "epoch": 1.82, "learning_rate": 2.1894424787223866e-07, "logits/chosen": -2.7018208503723145, "logits/rejected": -2.6037449836730957, "logps/chosen": -528.391357421875, "logps/rejected": -375.5564880371094, "loss": 0.5275, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.8698030710220337, "rewards/margins": 0.8390668630599976, "rewards/rejected": -2.7088701725006104, "step": 14080 }, { "epoch": 1.82, "learning_rate": 2.187051735679449e-07, "logits/chosen": -2.6323585510253906, "logits/rejected": -2.5494771003723145, "logps/chosen": -637.8290405273438, "logps/rejected": -528.3914184570312, "loss": 0.5311, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0997681617736816, "rewards/margins": 1.0974671840667725, "rewards/rejected": -3.197235345840454, "step": 14090 }, { "epoch": 1.82, "learning_rate": 2.1846609926365114e-07, "logits/chosen": -2.826988935470581, "logits/rejected": -2.5982882976531982, "logps/chosen": -588.9720458984375, "logps/rejected": -442.17291259765625, "loss": 0.7277, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.295078754425049, "rewards/margins": 0.5684062242507935, "rewards/rejected": -2.8634848594665527, "step": 14100 }, { "epoch": 1.82, "learning_rate": 2.1822702495935734e-07, "logits/chosen": -2.778120279312134, "logits/rejected": -2.67885684967041, "logps/chosen": -555.7342529296875, "logps/rejected": -451.9007873535156, "loss": 0.6743, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.275031566619873, "rewards/margins": 0.6098883748054504, "rewards/rejected": -2.8849198818206787, "step": 14110 }, { "epoch": 1.82, "learning_rate": 2.1798795065506358e-07, "logits/chosen": -2.6425280570983887, "logits/rejected": -2.6151676177978516, "logps/chosen": -522.8720092773438, "logps/rejected": -476.5850524902344, "loss": 0.6061, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.917822241783142, "rewards/margins": 0.7824870944023132, "rewards/rejected": -2.7003092765808105, "step": 14120 }, { "epoch": 1.82, "learning_rate": 2.177488763507698e-07, "logits/chosen": -2.706450939178467, "logits/rejected": -2.6572718620300293, "logps/chosen": -562.1246948242188, "logps/rejected": -470.1814880371094, "loss": 0.7336, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.1550636291503906, "rewards/margins": 0.7244311571121216, "rewards/rejected": -2.8794944286346436, "step": 14130 }, { "epoch": 1.83, "learning_rate": 2.1750980204647603e-07, "logits/chosen": -2.813646078109741, "logits/rejected": -2.6976237297058105, "logps/chosen": -560.7418212890625, "logps/rejected": -432.2762756347656, "loss": 0.6378, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.0518596172332764, "rewards/margins": 0.7229136824607849, "rewards/rejected": -2.774773359298706, "step": 14140 }, { "epoch": 1.83, "learning_rate": 2.1727072774218224e-07, "logits/chosen": -2.7643790245056152, "logits/rejected": -2.6232454776763916, "logps/chosen": -629.1680908203125, "logps/rejected": -428.55462646484375, "loss": 0.6001, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1629579067230225, "rewards/margins": 0.7262879610061646, "rewards/rejected": -2.8892457485198975, "step": 14150 }, { "epoch": 1.83, "learning_rate": 2.1703165343788847e-07, "logits/chosen": -2.66064715385437, "logits/rejected": -2.575808048248291, "logps/chosen": -520.6550903320312, "logps/rejected": -461.960693359375, "loss": 0.585, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.890249252319336, "rewards/margins": 0.767863392829895, "rewards/rejected": -2.6581127643585205, "step": 14160 }, { "epoch": 1.83, "learning_rate": 2.1679257913359473e-07, "logits/chosen": -2.7266016006469727, "logits/rejected": -2.5979485511779785, "logps/chosen": -658.6280517578125, "logps/rejected": -476.51934814453125, "loss": 0.5583, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.212162494659424, "rewards/margins": 1.0197875499725342, "rewards/rejected": -3.2319495677948, "step": 14170 }, { "epoch": 1.83, "learning_rate": 2.1655350482930094e-07, "logits/chosen": -2.85465931892395, "logits/rejected": -2.701781749725342, "logps/chosen": -584.1262817382812, "logps/rejected": -475.20855712890625, "loss": 0.571, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.091658115386963, "rewards/margins": 0.8460786938667297, "rewards/rejected": -2.937736988067627, "step": 14180 }, { "epoch": 1.83, "learning_rate": 2.1631443052500718e-07, "logits/chosen": -2.6916956901550293, "logits/rejected": -2.6244564056396484, "logps/chosen": -566.2893676757812, "logps/rejected": -424.16851806640625, "loss": 0.5628, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1654882431030273, "rewards/margins": 0.92414391040802, "rewards/rejected": -3.089632034301758, "step": 14190 }, { "epoch": 1.83, "learning_rate": 2.160753562207134e-07, "logits/chosen": -2.8115458488464355, "logits/rejected": -2.5847718715667725, "logps/chosen": -608.2259521484375, "logps/rejected": -386.8984069824219, "loss": 0.5842, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.096287250518799, "rewards/margins": 0.8435249328613281, "rewards/rejected": -2.939812183380127, "step": 14200 }, { "epoch": 1.83, "learning_rate": 2.1583628191641962e-07, "logits/chosen": -2.7011547088623047, "logits/rejected": -2.5488839149475098, "logps/chosen": -651.828857421875, "logps/rejected": -518.2875366210938, "loss": 0.7362, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.174474000930786, "rewards/margins": 0.6112269163131714, "rewards/rejected": -2.785701036453247, "step": 14210 }, { "epoch": 1.84, "learning_rate": 2.1559720761212583e-07, "logits/chosen": -2.6961989402770996, "logits/rejected": -2.695951461791992, "logps/chosen": -494.15625, "logps/rejected": -419.29522705078125, "loss": 0.6502, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9958980083465576, "rewards/margins": 0.6224155426025391, "rewards/rejected": -2.6183135509490967, "step": 14220 }, { "epoch": 1.84, "learning_rate": 2.1535813330783207e-07, "logits/chosen": -2.700378894805908, "logits/rejected": -2.721513509750366, "logps/chosen": -534.3284301757812, "logps/rejected": -476.69805908203125, "loss": 0.5533, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9726272821426392, "rewards/margins": 0.9625275731086731, "rewards/rejected": -2.935154914855957, "step": 14230 }, { "epoch": 1.84, "learning_rate": 2.151190590035383e-07, "logits/chosen": -2.681999921798706, "logits/rejected": -2.657177686691284, "logps/chosen": -577.9228515625, "logps/rejected": -487.68829345703125, "loss": 0.7953, "rewards/accuracies": 0.625, "rewards/chosen": -2.407989025115967, "rewards/margins": 0.4945116937160492, "rewards/rejected": -2.902500629425049, "step": 14240 }, { "epoch": 1.84, "learning_rate": 2.1487998469924452e-07, "logits/chosen": -2.6408181190490723, "logits/rejected": -2.4995341300964355, "logps/chosen": -484.41436767578125, "logps/rejected": -369.58331298828125, "loss": 0.6368, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.100492238998413, "rewards/margins": 0.6782867312431335, "rewards/rejected": -2.7787792682647705, "step": 14250 }, { "epoch": 1.84, "learning_rate": 2.1464091039495075e-07, "logits/chosen": -2.7824294567108154, "logits/rejected": -2.558115005493164, "logps/chosen": -543.7130126953125, "logps/rejected": -418.79803466796875, "loss": 0.698, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.376260995864868, "rewards/margins": 0.5360320210456848, "rewards/rejected": -2.912292957305908, "step": 14260 }, { "epoch": 1.84, "learning_rate": 2.1440183609065696e-07, "logits/chosen": -2.784548759460449, "logits/rejected": -2.568666458129883, "logps/chosen": -623.16943359375, "logps/rejected": -445.4876403808594, "loss": 0.588, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0766549110412598, "rewards/margins": 0.8983839154243469, "rewards/rejected": -2.975038766860962, "step": 14270 }, { "epoch": 1.84, "learning_rate": 2.141627617863632e-07, "logits/chosen": -2.7654154300689697, "logits/rejected": -2.640002965927124, "logps/chosen": -530.043212890625, "logps/rejected": -400.78729248046875, "loss": 0.5808, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.8321382999420166, "rewards/margins": 0.8644756078720093, "rewards/rejected": -2.6966137886047363, "step": 14280 }, { "epoch": 1.84, "learning_rate": 2.139236874820694e-07, "logits/chosen": -2.7849605083465576, "logits/rejected": -2.625483751296997, "logps/chosen": -547.3504638671875, "logps/rejected": -376.0516662597656, "loss": 0.502, "rewards/accuracies": 0.75, "rewards/chosen": -1.9876991510391235, "rewards/margins": 0.9632689356803894, "rewards/rejected": -2.9509682655334473, "step": 14290 }, { "epoch": 1.85, "learning_rate": 2.1368461317777564e-07, "logits/chosen": -2.7246410846710205, "logits/rejected": -2.5479564666748047, "logps/chosen": -546.68408203125, "logps/rejected": -378.8130798339844, "loss": 0.6992, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0899391174316406, "rewards/margins": 0.5289554595947266, "rewards/rejected": -2.618894577026367, "step": 14300 }, { "epoch": 1.85, "learning_rate": 2.1344553887348188e-07, "logits/chosen": -2.7086689472198486, "logits/rejected": -2.5613129138946533, "logps/chosen": -571.9090576171875, "logps/rejected": -386.5354919433594, "loss": 0.4724, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.780705451965332, "rewards/margins": 1.0674262046813965, "rewards/rejected": -2.8481316566467285, "step": 14310 }, { "epoch": 1.85, "learning_rate": 2.132064645691881e-07, "logits/chosen": -2.838547706604004, "logits/rejected": -2.620670795440674, "logps/chosen": -577.8394165039062, "logps/rejected": -392.9034118652344, "loss": 0.5958, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.007530689239502, "rewards/margins": 0.815445601940155, "rewards/rejected": -2.8229763507843018, "step": 14320 }, { "epoch": 1.85, "learning_rate": 2.1296739026489432e-07, "logits/chosen": -2.8276772499084473, "logits/rejected": -2.713886260986328, "logps/chosen": -516.986328125, "logps/rejected": -389.37738037109375, "loss": 0.6271, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1022331714630127, "rewards/margins": 0.7939386963844299, "rewards/rejected": -2.896171808242798, "step": 14330 }, { "epoch": 1.85, "learning_rate": 2.1272831596060053e-07, "logits/chosen": -2.663684606552124, "logits/rejected": -2.529623031616211, "logps/chosen": -627.421630859375, "logps/rejected": -401.32080078125, "loss": 0.5768, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9428819417953491, "rewards/margins": 0.8233539462089539, "rewards/rejected": -2.766235828399658, "step": 14340 }, { "epoch": 1.85, "learning_rate": 2.1248924165630677e-07, "logits/chosen": -2.804123640060425, "logits/rejected": -2.689652919769287, "logps/chosen": -584.7874755859375, "logps/rejected": -431.55792236328125, "loss": 0.6911, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.266385555267334, "rewards/margins": 0.6001550555229187, "rewards/rejected": -2.8665404319763184, "step": 14350 }, { "epoch": 1.85, "learning_rate": 2.12250167352013e-07, "logits/chosen": -2.6874420642852783, "logits/rejected": -2.562866449356079, "logps/chosen": -540.53857421875, "logps/rejected": -394.6040344238281, "loss": 0.5702, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1208319664001465, "rewards/margins": 0.8872283697128296, "rewards/rejected": -3.0080602169036865, "step": 14360 }, { "epoch": 1.86, "learning_rate": 2.1201109304771921e-07, "logits/chosen": -2.6916420459747314, "logits/rejected": -2.6624159812927246, "logps/chosen": -570.0140991210938, "logps/rejected": -468.94964599609375, "loss": 0.5796, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0882532596588135, "rewards/margins": 0.7853443622589111, "rewards/rejected": -2.8735976219177246, "step": 14370 }, { "epoch": 1.86, "learning_rate": 2.1177201874342545e-07, "logits/chosen": -2.743215799331665, "logits/rejected": -2.634669303894043, "logps/chosen": -555.183837890625, "logps/rejected": -428.31842041015625, "loss": 0.7054, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.170414924621582, "rewards/margins": 0.6738014817237854, "rewards/rejected": -2.8442163467407227, "step": 14380 }, { "epoch": 1.86, "learning_rate": 2.1153294443913166e-07, "logits/chosen": -2.8076913356781006, "logits/rejected": -2.633415937423706, "logps/chosen": -528.8939819335938, "logps/rejected": -375.3627014160156, "loss": 0.6731, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.0631418228149414, "rewards/margins": 0.6020824909210205, "rewards/rejected": -2.665224313735962, "step": 14390 }, { "epoch": 1.86, "learning_rate": 2.112938701348379e-07, "logits/chosen": -2.7916207313537598, "logits/rejected": -2.724832534790039, "logps/chosen": -470.98675537109375, "logps/rejected": -406.1842956542969, "loss": 0.5818, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.1818573474884033, "rewards/margins": 0.6977201104164124, "rewards/rejected": -2.879577159881592, "step": 14400 }, { "epoch": 1.86, "learning_rate": 2.110547958305441e-07, "logits/chosen": -2.664372444152832, "logits/rejected": -2.5384459495544434, "logps/chosen": -584.155517578125, "logps/rejected": -415.34490966796875, "loss": 0.5434, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.9813015460968018, "rewards/margins": 0.8417682647705078, "rewards/rejected": -2.8230700492858887, "step": 14410 }, { "epoch": 1.86, "learning_rate": 2.1081572152625034e-07, "logits/chosen": -2.7367048263549805, "logits/rejected": -2.6596765518188477, "logps/chosen": -492.1371154785156, "logps/rejected": -391.7942199707031, "loss": 0.5794, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.947408676147461, "rewards/margins": 0.8700436353683472, "rewards/rejected": -2.8174521923065186, "step": 14420 }, { "epoch": 1.86, "learning_rate": 2.1057664722195658e-07, "logits/chosen": -2.670516014099121, "logits/rejected": -2.6019325256347656, "logps/chosen": -520.3949584960938, "logps/rejected": -469.93017578125, "loss": 0.6548, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1208572387695312, "rewards/margins": 0.6306872963905334, "rewards/rejected": -2.75154447555542, "step": 14430 }, { "epoch": 1.86, "learning_rate": 2.1033757291766279e-07, "logits/chosen": -2.6885130405426025, "logits/rejected": -2.6030890941619873, "logps/chosen": -635.7813720703125, "logps/rejected": -515.4835205078125, "loss": 0.5158, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.064256429672241, "rewards/margins": 1.1314418315887451, "rewards/rejected": -3.1956984996795654, "step": 14440 }, { "epoch": 1.87, "learning_rate": 2.1009849861336902e-07, "logits/chosen": -2.70436429977417, "logits/rejected": -2.5821125507354736, "logps/chosen": -534.3090209960938, "logps/rejected": -420.77276611328125, "loss": 0.5864, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.170696496963501, "rewards/margins": 0.6599725484848022, "rewards/rejected": -2.8306689262390137, "step": 14450 }, { "epoch": 1.87, "learning_rate": 2.0985942430907526e-07, "logits/chosen": -2.6969807147979736, "logits/rejected": -2.551253080368042, "logps/chosen": -492.5542907714844, "logps/rejected": -338.75640869140625, "loss": 0.577, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9684232473373413, "rewards/margins": 0.8508535623550415, "rewards/rejected": -2.819276809692383, "step": 14460 }, { "epoch": 1.87, "learning_rate": 2.096203500047815e-07, "logits/chosen": -2.6378350257873535, "logits/rejected": -2.5486483573913574, "logps/chosen": -603.9617919921875, "logps/rejected": -474.9522399902344, "loss": 0.6597, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.179482936859131, "rewards/margins": 0.6773698925971985, "rewards/rejected": -2.8568527698516846, "step": 14470 }, { "epoch": 1.87, "learning_rate": 2.093812757004877e-07, "logits/chosen": -2.774358034133911, "logits/rejected": -2.652184247970581, "logps/chosen": -592.8646850585938, "logps/rejected": -450.6553649902344, "loss": 0.6346, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1330084800720215, "rewards/margins": 0.6982041597366333, "rewards/rejected": -2.8312127590179443, "step": 14480 }, { "epoch": 1.87, "learning_rate": 2.0914220139619394e-07, "logits/chosen": -2.6768524646759033, "logits/rejected": -2.657315731048584, "logps/chosen": -591.1275024414062, "logps/rejected": -494.92724609375, "loss": 0.5265, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.8344027996063232, "rewards/margins": 0.8776327967643738, "rewards/rejected": -2.712035655975342, "step": 14490 }, { "epoch": 1.87, "learning_rate": 2.0890312709190018e-07, "logits/chosen": -2.765537738800049, "logits/rejected": -2.6438920497894287, "logps/chosen": -575.8434448242188, "logps/rejected": -472.50933837890625, "loss": 0.5596, "rewards/accuracies": 0.6875, "rewards/chosen": -1.7564741373062134, "rewards/margins": 1.079082727432251, "rewards/rejected": -2.835556745529175, "step": 14500 }, { "epoch": 1.87, "learning_rate": 2.0866405278760638e-07, "logits/chosen": -2.7428369522094727, "logits/rejected": -2.6702723503112793, "logps/chosen": -570.8301391601562, "logps/rejected": -470.5765686035156, "loss": 0.7543, "rewards/accuracies": 0.6875, "rewards/chosen": -2.3999271392822266, "rewards/margins": 0.5061328411102295, "rewards/rejected": -2.906060218811035, "step": 14510 }, { "epoch": 1.87, "learning_rate": 2.0842497848331262e-07, "logits/chosen": -2.6263861656188965, "logits/rejected": -2.5644402503967285, "logps/chosen": -595.5992431640625, "logps/rejected": -428.81866455078125, "loss": 0.6909, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.9963128566741943, "rewards/margins": 0.5596219897270203, "rewards/rejected": -2.5559351444244385, "step": 14520 }, { "epoch": 1.88, "learning_rate": 2.0818590417901883e-07, "logits/chosen": -2.670475959777832, "logits/rejected": -2.6416707038879395, "logps/chosen": -502.70751953125, "logps/rejected": -442.8829650878906, "loss": 0.6615, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.9945971965789795, "rewards/margins": 0.6670731902122498, "rewards/rejected": -2.661670446395874, "step": 14530 }, { "epoch": 1.88, "learning_rate": 2.0794682987472507e-07, "logits/chosen": -2.708738327026367, "logits/rejected": -2.6693692207336426, "logps/chosen": -448.4375, "logps/rejected": -383.41021728515625, "loss": 0.7741, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -2.2267560958862305, "rewards/margins": 0.22079697251319885, "rewards/rejected": -2.4475531578063965, "step": 14540 }, { "epoch": 1.88, "learning_rate": 2.0770775557043128e-07, "logits/chosen": -2.7436254024505615, "logits/rejected": -2.6222198009490967, "logps/chosen": -586.4843139648438, "logps/rejected": -454.0836486816406, "loss": 0.4939, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.9289592504501343, "rewards/margins": 1.1723130941390991, "rewards/rejected": -3.1012725830078125, "step": 14550 }, { "epoch": 1.88, "learning_rate": 2.074686812661375e-07, "logits/chosen": -2.7529029846191406, "logits/rejected": -2.6040027141571045, "logps/chosen": -504.0262145996094, "logps/rejected": -413.13604736328125, "loss": 0.5956, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9772405624389648, "rewards/margins": 0.7930135726928711, "rewards/rejected": -2.770254135131836, "step": 14560 }, { "epoch": 1.88, "learning_rate": 2.0722960696184375e-07, "logits/chosen": -2.776569366455078, "logits/rejected": -2.6149685382843018, "logps/chosen": -572.8392333984375, "logps/rejected": -407.8907165527344, "loss": 0.4466, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.0590360164642334, "rewards/margins": 1.0851713418960571, "rewards/rejected": -3.144207239151001, "step": 14570 }, { "epoch": 1.88, "learning_rate": 2.0699053265754996e-07, "logits/chosen": -2.6635704040527344, "logits/rejected": -2.5780069828033447, "logps/chosen": -583.2394409179688, "logps/rejected": -514.2677612304688, "loss": 0.7719, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.2521300315856934, "rewards/margins": 0.5388683080673218, "rewards/rejected": -2.7909982204437256, "step": 14580 }, { "epoch": 1.88, "learning_rate": 2.067514583532562e-07, "logits/chosen": -2.7946937084198, "logits/rejected": -2.6629834175109863, "logps/chosen": -582.4759521484375, "logps/rejected": -454.9994201660156, "loss": 0.5771, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.976414442062378, "rewards/margins": 0.916828989982605, "rewards/rejected": -2.8932437896728516, "step": 14590 }, { "epoch": 1.88, "learning_rate": 2.065123840489624e-07, "logits/chosen": -2.651740312576294, "logits/rejected": -2.517385482788086, "logps/chosen": -534.8493041992188, "logps/rejected": -403.84368896484375, "loss": 0.4949, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.9481738805770874, "rewards/margins": 1.124685525894165, "rewards/rejected": -3.072859525680542, "step": 14600 }, { "epoch": 1.89, "learning_rate": 2.0627330974466864e-07, "logits/chosen": -2.780062198638916, "logits/rejected": -2.7930994033813477, "logps/chosen": -573.7943725585938, "logps/rejected": -532.2667236328125, "loss": 0.634, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.103400707244873, "rewards/margins": 0.7007867097854614, "rewards/rejected": -2.804187297821045, "step": 14610 }, { "epoch": 1.89, "learning_rate": 2.0603423544037485e-07, "logits/chosen": -2.761960744857788, "logits/rejected": -2.671938419342041, "logps/chosen": -583.5623168945312, "logps/rejected": -462.56005859375, "loss": 0.5763, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.9799197912216187, "rewards/margins": 0.8443568348884583, "rewards/rejected": -2.8242766857147217, "step": 14620 }, { "epoch": 1.89, "learning_rate": 2.0579516113608108e-07, "logits/chosen": -2.64892315864563, "logits/rejected": -2.5846219062805176, "logps/chosen": -545.2601318359375, "logps/rejected": -469.6708984375, "loss": 0.5755, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1540284156799316, "rewards/margins": 0.7237850427627563, "rewards/rejected": -2.8778133392333984, "step": 14630 }, { "epoch": 1.89, "learning_rate": 2.0555608683178732e-07, "logits/chosen": -2.7423040866851807, "logits/rejected": -2.6212782859802246, "logps/chosen": -500.50421142578125, "logps/rejected": -390.2088317871094, "loss": 0.6012, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.805373191833496, "rewards/margins": 0.7119220495223999, "rewards/rejected": -2.5172953605651855, "step": 14640 }, { "epoch": 1.89, "learning_rate": 2.0531701252749353e-07, "logits/chosen": -2.706325054168701, "logits/rejected": -2.582672595977783, "logps/chosen": -542.3154907226562, "logps/rejected": -426.00048828125, "loss": 0.6244, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.257037401199341, "rewards/margins": 0.6213769912719727, "rewards/rejected": -2.8784143924713135, "step": 14650 }, { "epoch": 1.89, "learning_rate": 2.0507793822319976e-07, "logits/chosen": -2.7356045246124268, "logits/rejected": -2.5792629718780518, "logps/chosen": -564.7877807617188, "logps/rejected": -403.0102844238281, "loss": 0.505, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.9926410913467407, "rewards/margins": 1.0672907829284668, "rewards/rejected": -3.059931516647339, "step": 14660 }, { "epoch": 1.89, "learning_rate": 2.0483886391890597e-07, "logits/chosen": -2.7589449882507324, "logits/rejected": -2.6226253509521484, "logps/chosen": -520.6844482421875, "logps/rejected": -439.40142822265625, "loss": 0.5437, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9251083135604858, "rewards/margins": 0.8791717290878296, "rewards/rejected": -2.8042802810668945, "step": 14670 }, { "epoch": 1.9, "learning_rate": 2.045997896146122e-07, "logits/chosen": -2.769296169281006, "logits/rejected": -2.7370152473449707, "logps/chosen": -517.1736450195312, "logps/rejected": -377.95196533203125, "loss": 0.5408, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.041868209838867, "rewards/margins": 1.010830283164978, "rewards/rejected": -3.0526986122131348, "step": 14680 }, { "epoch": 1.9, "learning_rate": 2.0436071531031842e-07, "logits/chosen": -2.738063097000122, "logits/rejected": -2.5622334480285645, "logps/chosen": -518.1301879882812, "logps/rejected": -416.250732421875, "loss": 0.7355, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.215333938598633, "rewards/margins": 0.5019243955612183, "rewards/rejected": -2.7172584533691406, "step": 14690 }, { "epoch": 1.9, "learning_rate": 2.0412164100602466e-07, "logits/chosen": -2.6389451026916504, "logits/rejected": -2.4463629722595215, "logps/chosen": -575.2347412109375, "logps/rejected": -396.8063659667969, "loss": 0.5662, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0934712886810303, "rewards/margins": 0.8380733728408813, "rewards/rejected": -2.931544780731201, "step": 14700 }, { "epoch": 1.9, "learning_rate": 2.038825667017309e-07, "logits/chosen": -2.8667426109313965, "logits/rejected": -2.7034411430358887, "logps/chosen": -666.693115234375, "logps/rejected": -472.3580627441406, "loss": 0.4919, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9941078424453735, "rewards/margins": 1.0602822303771973, "rewards/rejected": -3.0543899536132812, "step": 14710 }, { "epoch": 1.9, "learning_rate": 2.036434923974371e-07, "logits/chosen": -2.816896438598633, "logits/rejected": -2.673196315765381, "logps/chosen": -649.4793090820312, "logps/rejected": -497.519287109375, "loss": 0.4077, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9412647485733032, "rewards/margins": 1.3977595567703247, "rewards/rejected": -3.339024782180786, "step": 14720 }, { "epoch": 1.9, "learning_rate": 2.0340441809314334e-07, "logits/chosen": -2.6397218704223633, "logits/rejected": -2.5806524753570557, "logps/chosen": -528.6619873046875, "logps/rejected": -427.69769287109375, "loss": 0.5469, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.1121127605438232, "rewards/margins": 0.9516986608505249, "rewards/rejected": -3.0638110637664795, "step": 14730 }, { "epoch": 1.9, "learning_rate": 2.0316534378884955e-07, "logits/chosen": -2.827324151992798, "logits/rejected": -2.75024676322937, "logps/chosen": -616.3538818359375, "logps/rejected": -461.82769775390625, "loss": 0.6514, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.146545886993408, "rewards/margins": 0.7455530166625977, "rewards/rejected": -2.892099142074585, "step": 14740 }, { "epoch": 1.9, "learning_rate": 2.029262694845558e-07, "logits/chosen": -2.6778512001037598, "logits/rejected": -2.644958257675171, "logps/chosen": -527.8704223632812, "logps/rejected": -487.34234619140625, "loss": 0.6619, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.921129584312439, "rewards/margins": 0.5654603838920593, "rewards/rejected": -2.4865899085998535, "step": 14750 }, { "epoch": 1.91, "learning_rate": 2.0268719518026202e-07, "logits/chosen": -2.7065556049346924, "logits/rejected": -2.615875720977783, "logps/chosen": -569.4573364257812, "logps/rejected": -520.2298583984375, "loss": 0.6985, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.004481792449951, "rewards/margins": 0.7017321586608887, "rewards/rejected": -2.706214189529419, "step": 14760 }, { "epoch": 1.91, "learning_rate": 2.0244812087596825e-07, "logits/chosen": -2.755030870437622, "logits/rejected": -2.608452320098877, "logps/chosen": -524.837646484375, "logps/rejected": -422.75665283203125, "loss": 0.6605, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.185166597366333, "rewards/margins": 0.6251745820045471, "rewards/rejected": -2.8103411197662354, "step": 14770 }, { "epoch": 1.91, "learning_rate": 2.022090465716745e-07, "logits/chosen": -2.8023414611816406, "logits/rejected": -2.6835262775421143, "logps/chosen": -580.9874267578125, "logps/rejected": -421.8038024902344, "loss": 0.6066, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0531585216522217, "rewards/margins": 0.693474531173706, "rewards/rejected": -2.7466330528259277, "step": 14780 }, { "epoch": 1.91, "learning_rate": 2.019699722673807e-07, "logits/chosen": -2.7638442516326904, "logits/rejected": -2.623547077178955, "logps/chosen": -539.3737182617188, "logps/rejected": -431.498291015625, "loss": 0.6177, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0657057762145996, "rewards/margins": 0.7546465992927551, "rewards/rejected": -2.82035231590271, "step": 14790 }, { "epoch": 1.91, "learning_rate": 2.0173089796308694e-07, "logits/chosen": -2.738569736480713, "logits/rejected": -2.603700637817383, "logps/chosen": -568.007080078125, "logps/rejected": -453.8046875, "loss": 0.5962, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9034526348114014, "rewards/margins": 0.7916036248207092, "rewards/rejected": -2.6950559616088867, "step": 14800 }, { "epoch": 1.91, "learning_rate": 2.0149182365879314e-07, "logits/chosen": -2.6990203857421875, "logits/rejected": -2.573791027069092, "logps/chosen": -551.8687744140625, "logps/rejected": -404.2066955566406, "loss": 0.6755, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.1410164833068848, "rewards/margins": 0.5468040704727173, "rewards/rejected": -2.6878204345703125, "step": 14810 }, { "epoch": 1.91, "learning_rate": 2.0125274935449938e-07, "logits/chosen": -2.7082226276397705, "logits/rejected": -2.6613612174987793, "logps/chosen": -547.1222534179688, "logps/rejected": -438.87103271484375, "loss": 0.5639, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1710188388824463, "rewards/margins": 0.8812678456306458, "rewards/rejected": -3.0522866249084473, "step": 14820 }, { "epoch": 1.91, "learning_rate": 2.010136750502056e-07, "logits/chosen": -2.7139809131622314, "logits/rejected": -2.6797542572021484, "logps/chosen": -518.0662231445312, "logps/rejected": -473.88153076171875, "loss": 0.6069, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.059396743774414, "rewards/margins": 0.7454272508621216, "rewards/rejected": -2.8048243522644043, "step": 14830 }, { "epoch": 1.92, "learning_rate": 2.0077460074591183e-07, "logits/chosen": -2.6737473011016846, "logits/rejected": -2.4882373809814453, "logps/chosen": -567.997802734375, "logps/rejected": -384.4302062988281, "loss": 0.6419, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.148343563079834, "rewards/margins": 0.7904902100563049, "rewards/rejected": -2.938833713531494, "step": 14840 }, { "epoch": 1.92, "learning_rate": 2.0053552644161806e-07, "logits/chosen": -2.7375903129577637, "logits/rejected": -2.6321184635162354, "logps/chosen": -568.89306640625, "logps/rejected": -461.52947998046875, "loss": 0.652, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1798644065856934, "rewards/margins": 0.6830139756202698, "rewards/rejected": -2.8628780841827393, "step": 14850 }, { "epoch": 1.92, "learning_rate": 2.0029645213732427e-07, "logits/chosen": -2.822225332260132, "logits/rejected": -2.6313998699188232, "logps/chosen": -608.6297607421875, "logps/rejected": -410.091552734375, "loss": 0.5824, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.4367547035217285, "rewards/margins": 0.797214925289154, "rewards/rejected": -3.2339699268341064, "step": 14860 }, { "epoch": 1.92, "learning_rate": 2.000573778330305e-07, "logits/chosen": -2.8124804496765137, "logits/rejected": -2.622645139694214, "logps/chosen": -682.479248046875, "logps/rejected": -516.0601196289062, "loss": 0.6977, "rewards/accuracies": 0.625, "rewards/chosen": -2.1318180561065674, "rewards/margins": 0.7099889516830444, "rewards/rejected": -2.8418068885803223, "step": 14870 }, { "epoch": 1.92, "learning_rate": 1.9981830352873672e-07, "logits/chosen": -2.7926077842712402, "logits/rejected": -2.725058078765869, "logps/chosen": -550.5626831054688, "logps/rejected": -442.6182556152344, "loss": 0.624, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1881051063537598, "rewards/margins": 0.7397439479827881, "rewards/rejected": -2.9278488159179688, "step": 14880 }, { "epoch": 1.92, "learning_rate": 1.9957922922444295e-07, "logits/chosen": -2.697165012359619, "logits/rejected": -2.592982769012451, "logps/chosen": -585.7525634765625, "logps/rejected": -427.6591796875, "loss": 0.5062, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0995705127716064, "rewards/margins": 1.0029380321502686, "rewards/rejected": -3.102508306503296, "step": 14890 }, { "epoch": 1.92, "learning_rate": 1.9934015492014916e-07, "logits/chosen": -2.796748638153076, "logits/rejected": -2.6403708457946777, "logps/chosen": -569.6085205078125, "logps/rejected": -419.927001953125, "loss": 0.687, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.350451707839966, "rewards/margins": 0.4694640040397644, "rewards/rejected": -2.819915771484375, "step": 14900 }, { "epoch": 1.92, "learning_rate": 1.991010806158554e-07, "logits/chosen": -2.586317539215088, "logits/rejected": -2.5482254028320312, "logps/chosen": -515.4229125976562, "logps/rejected": -424.8426818847656, "loss": 0.599, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.9909250736236572, "rewards/margins": 0.7567735910415649, "rewards/rejected": -2.7476987838745117, "step": 14910 }, { "epoch": 1.93, "learning_rate": 1.9886200631156163e-07, "logits/chosen": -2.7699103355407715, "logits/rejected": -2.575268507003784, "logps/chosen": -577.7676391601562, "logps/rejected": -443.09124755859375, "loss": 0.7282, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.2999308109283447, "rewards/margins": 0.3697684407234192, "rewards/rejected": -2.669699192047119, "step": 14920 }, { "epoch": 1.93, "learning_rate": 1.9862293200726784e-07, "logits/chosen": -2.838723659515381, "logits/rejected": -2.6984431743621826, "logps/chosen": -566.3756103515625, "logps/rejected": -435.718017578125, "loss": 0.6201, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1778762340545654, "rewards/margins": 0.6877838969230652, "rewards/rejected": -2.8656601905822754, "step": 14930 }, { "epoch": 1.93, "learning_rate": 1.9838385770297408e-07, "logits/chosen": -2.892976760864258, "logits/rejected": -2.670344829559326, "logps/chosen": -552.5631103515625, "logps/rejected": -378.2486267089844, "loss": 0.6701, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0881025791168213, "rewards/margins": 0.6565229296684265, "rewards/rejected": -2.7446255683898926, "step": 14940 }, { "epoch": 1.93, "learning_rate": 1.981447833986803e-07, "logits/chosen": -2.756838798522949, "logits/rejected": -2.6200127601623535, "logps/chosen": -559.6304321289062, "logps/rejected": -492.52056884765625, "loss": 0.5861, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1686902046203613, "rewards/margins": 0.8932687640190125, "rewards/rejected": -3.0619590282440186, "step": 14950 }, { "epoch": 1.93, "learning_rate": 1.9790570909438652e-07, "logits/chosen": -2.7997682094573975, "logits/rejected": -2.5989997386932373, "logps/chosen": -582.345947265625, "logps/rejected": -374.56781005859375, "loss": 0.5623, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1031055450439453, "rewards/margins": 0.7405593395233154, "rewards/rejected": -2.8436648845672607, "step": 14960 }, { "epoch": 1.93, "learning_rate": 1.9766663479009273e-07, "logits/chosen": -2.7255425453186035, "logits/rejected": -2.510983943939209, "logps/chosen": -555.1942138671875, "logps/rejected": -372.0382385253906, "loss": 0.545, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.240068197250366, "rewards/margins": 0.7965300679206848, "rewards/rejected": -3.0365984439849854, "step": 14970 }, { "epoch": 1.93, "learning_rate": 1.9742756048579897e-07, "logits/chosen": -2.801295518875122, "logits/rejected": -2.6307458877563477, "logps/chosen": -614.6251220703125, "logps/rejected": -499.87176513671875, "loss": 0.5902, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.4352879524230957, "rewards/margins": 0.6436265110969543, "rewards/rejected": -3.078914165496826, "step": 14980 }, { "epoch": 1.94, "learning_rate": 1.971884861815052e-07, "logits/chosen": -2.778301954269409, "logits/rejected": -2.721034288406372, "logps/chosen": -539.9666748046875, "logps/rejected": -477.884521484375, "loss": 0.5554, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1083433628082275, "rewards/margins": 0.8919129371643066, "rewards/rejected": -3.000256061553955, "step": 14990 }, { "epoch": 1.94, "learning_rate": 1.9694941187721142e-07, "logits/chosen": -2.7306113243103027, "logits/rejected": -2.636119842529297, "logps/chosen": -543.0123291015625, "logps/rejected": -427.85736083984375, "loss": 0.5239, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9349400997161865, "rewards/margins": 1.0284868478775024, "rewards/rejected": -2.9634270668029785, "step": 15000 }, { "epoch": 1.94, "eval_logits/chosen": -3.0908336639404297, "eval_logits/rejected": -3.0403523445129395, "eval_logps/chosen": -540.7867431640625, "eval_logps/rejected": -420.7780456542969, "eval_loss": 0.6158281564712524, "eval_rewards/accuracies": 0.6775000095367432, "eval_rewards/chosen": -0.8967026472091675, "eval_rewards/margins": 1.0882349014282227, "eval_rewards/rejected": -1.9849377870559692, "eval_runtime": 279.5827, "eval_samples_per_second": 7.154, "eval_steps_per_second": 3.577, "step": 15000 }, { "epoch": 1.94, "learning_rate": 1.9671033757291765e-07, "logits/chosen": -2.904331684112549, "logits/rejected": -2.7667253017425537, "logps/chosen": -551.8350219726562, "logps/rejected": -464.7417907714844, "loss": 0.6226, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.01773738861084, "rewards/margins": 0.6471500396728516, "rewards/rejected": -2.6648871898651123, "step": 15010 }, { "epoch": 1.94, "learning_rate": 1.9647126326862386e-07, "logits/chosen": -2.7318551540374756, "logits/rejected": -2.562401533126831, "logps/chosen": -538.5556640625, "logps/rejected": -385.6553955078125, "loss": 0.7144, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.3654911518096924, "rewards/margins": 0.5445488095283508, "rewards/rejected": -2.9100399017333984, "step": 15020 }, { "epoch": 1.94, "learning_rate": 1.962321889643301e-07, "logits/chosen": -2.706681489944458, "logits/rejected": -2.5878641605377197, "logps/chosen": -546.2606201171875, "logps/rejected": -454.2659606933594, "loss": 0.6204, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2247512340545654, "rewards/margins": 0.6091987490653992, "rewards/rejected": -2.8339498043060303, "step": 15030 }, { "epoch": 1.94, "learning_rate": 1.959931146600363e-07, "logits/chosen": -2.637042760848999, "logits/rejected": -2.6159238815307617, "logps/chosen": -484.1927185058594, "logps/rejected": -433.79852294921875, "loss": 0.5769, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1488311290740967, "rewards/margins": 0.8869396448135376, "rewards/rejected": -3.035770893096924, "step": 15040 }, { "epoch": 1.94, "learning_rate": 1.9575404035574257e-07, "logits/chosen": -2.754368543624878, "logits/rejected": -2.6937241554260254, "logps/chosen": -501.2120056152344, "logps/rejected": -408.2104187011719, "loss": 0.6831, "rewards/accuracies": 0.625, "rewards/chosen": -2.078908920288086, "rewards/margins": 0.57866370677948, "rewards/rejected": -2.6575729846954346, "step": 15050 }, { "epoch": 1.94, "learning_rate": 1.955149660514488e-07, "logits/chosen": -2.7284207344055176, "logits/rejected": -2.5595014095306396, "logps/chosen": -564.0391845703125, "logps/rejected": -408.18212890625, "loss": 0.579, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.421792984008789, "rewards/margins": 0.7646595239639282, "rewards/rejected": -3.1864523887634277, "step": 15060 }, { "epoch": 1.95, "learning_rate": 1.9527589174715501e-07, "logits/chosen": -2.7342255115509033, "logits/rejected": -2.646972179412842, "logps/chosen": -648.0106201171875, "logps/rejected": -493.51556396484375, "loss": 0.638, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.13305401802063, "rewards/margins": 0.7687081098556519, "rewards/rejected": -2.9017622470855713, "step": 15070 }, { "epoch": 1.95, "learning_rate": 1.9503681744286125e-07, "logits/chosen": -2.6708109378814697, "logits/rejected": -2.6009521484375, "logps/chosen": -553.287109375, "logps/rejected": -426.4173889160156, "loss": 0.682, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0988824367523193, "rewards/margins": 0.6061711311340332, "rewards/rejected": -2.7050535678863525, "step": 15080 }, { "epoch": 1.95, "learning_rate": 1.9479774313856746e-07, "logits/chosen": -2.844939708709717, "logits/rejected": -2.6249523162841797, "logps/chosen": -542.7302856445312, "logps/rejected": -415.1686096191406, "loss": 0.5523, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0338034629821777, "rewards/margins": 0.9199355244636536, "rewards/rejected": -2.9537386894226074, "step": 15090 }, { "epoch": 1.95, "learning_rate": 1.945586688342737e-07, "logits/chosen": -2.6809420585632324, "logits/rejected": -2.6662449836730957, "logps/chosen": -485.7569274902344, "logps/rejected": -435.0341796875, "loss": 0.5407, "rewards/accuracies": 0.75, "rewards/chosen": -2.133450984954834, "rewards/margins": 0.8235113024711609, "rewards/rejected": -2.9569621086120605, "step": 15100 }, { "epoch": 1.95, "learning_rate": 1.943195945299799e-07, "logits/chosen": -2.7283565998077393, "logits/rejected": -2.6513314247131348, "logps/chosen": -573.971435546875, "logps/rejected": -470.82421875, "loss": 0.608, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.163905143737793, "rewards/margins": 0.644361138343811, "rewards/rejected": -2.8082661628723145, "step": 15110 }, { "epoch": 1.95, "learning_rate": 1.9408052022568614e-07, "logits/chosen": -2.736923933029175, "logits/rejected": -2.592313766479492, "logps/chosen": -682.5001220703125, "logps/rejected": -515.9844360351562, "loss": 0.4501, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9938275814056396, "rewards/margins": 1.0540533065795898, "rewards/rejected": -3.0478813648223877, "step": 15120 }, { "epoch": 1.95, "learning_rate": 1.9384144592139238e-07, "logits/chosen": -2.7760846614837646, "logits/rejected": -2.771836519241333, "logps/chosen": -528.4393310546875, "logps/rejected": -445.7470703125, "loss": 0.5991, "rewards/accuracies": 0.75, "rewards/chosen": -2.197725534439087, "rewards/margins": 0.888806939125061, "rewards/rejected": -3.0865321159362793, "step": 15130 }, { "epoch": 1.95, "learning_rate": 1.9360237161709859e-07, "logits/chosen": -2.8504021167755127, "logits/rejected": -2.7897186279296875, "logps/chosen": -517.8538208007812, "logps/rejected": -432.841064453125, "loss": 0.6889, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.330322742462158, "rewards/margins": 0.5465334057807922, "rewards/rejected": -2.876856565475464, "step": 15140 }, { "epoch": 1.96, "learning_rate": 1.9336329731280482e-07, "logits/chosen": -2.8110547065734863, "logits/rejected": -2.5961074829101562, "logps/chosen": -627.2508544921875, "logps/rejected": -373.12664794921875, "loss": 0.5451, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9023147821426392, "rewards/margins": 1.0986731052398682, "rewards/rejected": -3.000988245010376, "step": 15150 }, { "epoch": 1.96, "learning_rate": 1.9312422300851103e-07, "logits/chosen": -2.7396857738494873, "logits/rejected": -2.716564178466797, "logps/chosen": -548.2476806640625, "logps/rejected": -461.99493408203125, "loss": 0.7364, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.4312615394592285, "rewards/margins": 0.46439695358276367, "rewards/rejected": -2.895658493041992, "step": 15160 }, { "epoch": 1.96, "learning_rate": 1.9288514870421727e-07, "logits/chosen": -2.59934401512146, "logits/rejected": -2.698207139968872, "logps/chosen": -425.4571228027344, "logps/rejected": -474.2586364746094, "loss": 0.8177, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -2.0669679641723633, "rewards/margins": 0.22939082980155945, "rewards/rejected": -2.296358823776245, "step": 15170 }, { "epoch": 1.96, "learning_rate": 1.9264607439992348e-07, "logits/chosen": -2.7992606163024902, "logits/rejected": -2.610278606414795, "logps/chosen": -568.5703125, "logps/rejected": -460.09808349609375, "loss": 0.6503, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1719295978546143, "rewards/margins": 0.580925703048706, "rewards/rejected": -2.752855062484741, "step": 15180 }, { "epoch": 1.96, "learning_rate": 1.924070000956297e-07, "logits/chosen": -2.6879665851593018, "logits/rejected": -2.5561585426330566, "logps/chosen": -576.8784790039062, "logps/rejected": -415.511962890625, "loss": 0.7442, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.234666109085083, "rewards/margins": 0.5391125679016113, "rewards/rejected": -2.7737786769866943, "step": 15190 }, { "epoch": 1.96, "learning_rate": 1.9216792579133595e-07, "logits/chosen": -2.754016637802124, "logits/rejected": -2.618269205093384, "logps/chosen": -553.1851196289062, "logps/rejected": -378.22711181640625, "loss": 0.6088, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.9560024738311768, "rewards/margins": 0.8285994529724121, "rewards/rejected": -2.784601926803589, "step": 15200 }, { "epoch": 1.96, "learning_rate": 1.9192885148704216e-07, "logits/chosen": -2.7797210216522217, "logits/rejected": -2.6830894947052, "logps/chosen": -649.6876220703125, "logps/rejected": -461.88336181640625, "loss": 0.6029, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1965596675872803, "rewards/margins": 0.6581524610519409, "rewards/rejected": -2.8547120094299316, "step": 15210 }, { "epoch": 1.96, "learning_rate": 1.916897771827484e-07, "logits/chosen": -2.6917710304260254, "logits/rejected": -2.713322162628174, "logps/chosen": -505.3926696777344, "logps/rejected": -420.518798828125, "loss": 0.6904, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.1035988330841064, "rewards/margins": 0.5308338403701782, "rewards/rejected": -2.634432554244995, "step": 15220 }, { "epoch": 1.97, "learning_rate": 1.914507028784546e-07, "logits/chosen": -2.671797037124634, "logits/rejected": -2.663745164871216, "logps/chosen": -578.2312622070312, "logps/rejected": -484.2088928222656, "loss": 0.6209, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.209374189376831, "rewards/margins": 0.8207567930221558, "rewards/rejected": -3.0301308631896973, "step": 15230 }, { "epoch": 1.97, "learning_rate": 1.9121162857416084e-07, "logits/chosen": -2.6709001064300537, "logits/rejected": -2.585597038269043, "logps/chosen": -685.36669921875, "logps/rejected": -452.613037109375, "loss": 0.6106, "rewards/accuracies": 0.6875, "rewards/chosen": -2.05204439163208, "rewards/margins": 0.8485413789749146, "rewards/rejected": -2.900585651397705, "step": 15240 }, { "epoch": 1.97, "learning_rate": 1.9097255426986705e-07, "logits/chosen": -2.7660043239593506, "logits/rejected": -2.794801950454712, "logps/chosen": -414.7342834472656, "logps/rejected": -395.2092590332031, "loss": 0.544, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0153355598449707, "rewards/margins": 0.8239368200302124, "rewards/rejected": -2.8392727375030518, "step": 15250 }, { "epoch": 1.97, "learning_rate": 1.9073347996557328e-07, "logits/chosen": -2.7351279258728027, "logits/rejected": -2.6899185180664062, "logps/chosen": -489.79901123046875, "logps/rejected": -444.391357421875, "loss": 0.652, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.11954665184021, "rewards/margins": 0.5372982025146484, "rewards/rejected": -2.6568450927734375, "step": 15260 }, { "epoch": 1.97, "learning_rate": 1.9049440566127952e-07, "logits/chosen": -2.6605563163757324, "logits/rejected": -2.583998203277588, "logps/chosen": -552.3141479492188, "logps/rejected": -451.9276428222656, "loss": 0.6569, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1494953632354736, "rewards/margins": 0.5625936985015869, "rewards/rejected": -2.7120890617370605, "step": 15270 }, { "epoch": 1.97, "learning_rate": 1.9025533135698573e-07, "logits/chosen": -2.794893741607666, "logits/rejected": -2.593043088912964, "logps/chosen": -628.8561401367188, "logps/rejected": -397.3992614746094, "loss": 0.5979, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1880412101745605, "rewards/margins": 0.7627966403961182, "rewards/rejected": -2.9508376121520996, "step": 15280 }, { "epoch": 1.97, "learning_rate": 1.9001625705269197e-07, "logits/chosen": -2.725663423538208, "logits/rejected": -2.6460788249969482, "logps/chosen": -539.8975219726562, "logps/rejected": -408.0265197753906, "loss": 0.6422, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.1858277320861816, "rewards/margins": 0.5714628100395203, "rewards/rejected": -2.7572906017303467, "step": 15290 }, { "epoch": 1.98, "learning_rate": 1.8977718274839818e-07, "logits/chosen": -2.7391412258148193, "logits/rejected": -2.6101436614990234, "logps/chosen": -486.68524169921875, "logps/rejected": -434.8233337402344, "loss": 0.5848, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.173086166381836, "rewards/margins": 0.7685021758079529, "rewards/rejected": -2.9415881633758545, "step": 15300 }, { "epoch": 1.98, "learning_rate": 1.895381084441044e-07, "logits/chosen": -2.6025354862213135, "logits/rejected": -2.535527467727661, "logps/chosen": -544.8760986328125, "logps/rejected": -435.26788330078125, "loss": 0.6211, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2635324001312256, "rewards/margins": 0.6003071665763855, "rewards/rejected": -2.863839626312256, "step": 15310 }, { "epoch": 1.98, "learning_rate": 1.8929903413981062e-07, "logits/chosen": -2.6850714683532715, "logits/rejected": -2.6139614582061768, "logps/chosen": -543.2139282226562, "logps/rejected": -465.26727294921875, "loss": 0.6555, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.131812334060669, "rewards/margins": 0.6519659161567688, "rewards/rejected": -2.783777952194214, "step": 15320 }, { "epoch": 1.98, "learning_rate": 1.8905995983551686e-07, "logits/chosen": -2.738384485244751, "logits/rejected": -2.602332592010498, "logps/chosen": -567.8790893554688, "logps/rejected": -421.7894592285156, "loss": 0.6836, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2197515964508057, "rewards/margins": 0.5961961150169373, "rewards/rejected": -2.8159477710723877, "step": 15330 }, { "epoch": 1.98, "learning_rate": 1.8882088553122312e-07, "logits/chosen": -2.6488027572631836, "logits/rejected": -2.552995204925537, "logps/chosen": -544.687255859375, "logps/rejected": -444.35723876953125, "loss": 0.6649, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0770812034606934, "rewards/margins": 0.6476441621780396, "rewards/rejected": -2.7247252464294434, "step": 15340 }, { "epoch": 1.98, "learning_rate": 1.8858181122692933e-07, "logits/chosen": -2.7680444717407227, "logits/rejected": -2.6382060050964355, "logps/chosen": -642.7134399414062, "logps/rejected": -485.05731201171875, "loss": 0.5654, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0646347999572754, "rewards/margins": 0.8024538159370422, "rewards/rejected": -2.867088794708252, "step": 15350 }, { "epoch": 1.98, "learning_rate": 1.8834273692263556e-07, "logits/chosen": -2.818166971206665, "logits/rejected": -2.6438844203948975, "logps/chosen": -554.393310546875, "logps/rejected": -408.43646240234375, "loss": 0.6844, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.3495993614196777, "rewards/margins": 0.5403726696968079, "rewards/rejected": -2.88997220993042, "step": 15360 }, { "epoch": 1.98, "learning_rate": 1.8810366261834177e-07, "logits/chosen": -2.790497064590454, "logits/rejected": -2.5295557975769043, "logps/chosen": -627.2350463867188, "logps/rejected": -433.61083984375, "loss": 0.5896, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.192863702774048, "rewards/margins": 0.7649389505386353, "rewards/rejected": -2.9578022956848145, "step": 15370 }, { "epoch": 1.99, "learning_rate": 1.87864588314048e-07, "logits/chosen": -2.783315658569336, "logits/rejected": -2.628131866455078, "logps/chosen": -693.041748046875, "logps/rejected": -516.5418090820312, "loss": 0.6556, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.097351551055908, "rewards/margins": 0.8854547739028931, "rewards/rejected": -2.9828059673309326, "step": 15380 }, { "epoch": 1.99, "learning_rate": 1.8762551400975422e-07, "logits/chosen": -2.664048433303833, "logits/rejected": -2.5955073833465576, "logps/chosen": -522.1605224609375, "logps/rejected": -441.9852600097656, "loss": 0.5352, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.095315456390381, "rewards/margins": 1.03943932056427, "rewards/rejected": -3.1347544193267822, "step": 15390 }, { "epoch": 1.99, "learning_rate": 1.8738643970546045e-07, "logits/chosen": -2.6712541580200195, "logits/rejected": -2.5261759757995605, "logps/chosen": -546.1666870117188, "logps/rejected": -444.47576904296875, "loss": 0.9341, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.395221710205078, "rewards/margins": 0.3252303898334503, "rewards/rejected": -2.720452070236206, "step": 15400 }, { "epoch": 1.99, "learning_rate": 1.871473654011667e-07, "logits/chosen": -2.7179131507873535, "logits/rejected": -2.675389528274536, "logps/chosen": -520.3854370117188, "logps/rejected": -459.05633544921875, "loss": 0.654, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.16593599319458, "rewards/margins": 0.6745327115058899, "rewards/rejected": -2.840468645095825, "step": 15410 }, { "epoch": 1.99, "learning_rate": 1.869082910968729e-07, "logits/chosen": -2.794487476348877, "logits/rejected": -2.6134533882141113, "logps/chosen": -531.8943481445312, "logps/rejected": -372.5009460449219, "loss": 0.6695, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.321160078048706, "rewards/margins": 0.6636377573013306, "rewards/rejected": -2.984797954559326, "step": 15420 }, { "epoch": 1.99, "learning_rate": 1.8666921679257914e-07, "logits/chosen": -2.7696444988250732, "logits/rejected": -2.689220428466797, "logps/chosen": -557.249267578125, "logps/rejected": -447.14349365234375, "loss": 0.5775, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0019946098327637, "rewards/margins": 0.9136720895767212, "rewards/rejected": -2.9156668186187744, "step": 15430 }, { "epoch": 1.99, "learning_rate": 1.8643014248828535e-07, "logits/chosen": -2.787870407104492, "logits/rejected": -2.74094820022583, "logps/chosen": -533.2545776367188, "logps/rejected": -448.53778076171875, "loss": 0.5263, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2650904655456543, "rewards/margins": 0.9451848268508911, "rewards/rejected": -3.210275173187256, "step": 15440 }, { "epoch": 1.99, "learning_rate": 1.8619106818399158e-07, "logits/chosen": -2.68306565284729, "logits/rejected": -2.5840353965759277, "logps/chosen": -562.5986328125, "logps/rejected": -488.94610595703125, "loss": 0.5533, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.301645517349243, "rewards/margins": 0.8350297212600708, "rewards/rejected": -3.1366751194000244, "step": 15450 }, { "epoch": 2.0, "learning_rate": 1.859519938796978e-07, "logits/chosen": -2.703068256378174, "logits/rejected": -2.5126566886901855, "logps/chosen": -637.3009033203125, "logps/rejected": -396.02520751953125, "loss": 0.5928, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2101426124572754, "rewards/margins": 0.9712456464767456, "rewards/rejected": -3.1813883781433105, "step": 15460 }, { "epoch": 2.0, "learning_rate": 1.8571291957540403e-07, "logits/chosen": -2.8112101554870605, "logits/rejected": -2.5580382347106934, "logps/chosen": -637.051025390625, "logps/rejected": -435.1160583496094, "loss": 0.4546, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.1700799465179443, "rewards/margins": 1.1894824504852295, "rewards/rejected": -3.3595619201660156, "step": 15470 }, { "epoch": 2.0, "learning_rate": 1.8547384527111026e-07, "logits/chosen": -2.781386137008667, "logits/rejected": -2.748220920562744, "logps/chosen": -565.39990234375, "logps/rejected": -494.01300048828125, "loss": 0.498, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.041114091873169, "rewards/margins": 1.10586416721344, "rewards/rejected": -3.1469781398773193, "step": 15480 }, { "epoch": 2.0, "learning_rate": 1.8523477096681647e-07, "logits/chosen": -2.68202543258667, "logits/rejected": -2.593074083328247, "logps/chosen": -518.6759643554688, "logps/rejected": -387.43316650390625, "loss": 0.651, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1718573570251465, "rewards/margins": 0.7104286551475525, "rewards/rejected": -2.882286310195923, "step": 15490 }, { "epoch": 2.0, "learning_rate": 1.849956966625227e-07, "logits/chosen": -2.8083720207214355, "logits/rejected": -2.6874032020568848, "logps/chosen": -585.5565795898438, "logps/rejected": -452.054931640625, "loss": 0.4761, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.00799298286438, "rewards/margins": 1.1529614925384521, "rewards/rejected": -3.160954713821411, "step": 15500 }, { "epoch": 2.0, "learning_rate": 1.8475662235822892e-07, "logits/chosen": -2.6957526206970215, "logits/rejected": -2.564018964767456, "logps/chosen": -551.4783935546875, "logps/rejected": -413.80450439453125, "loss": 0.5508, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.038095474243164, "rewards/margins": 0.997907280921936, "rewards/rejected": -3.0360031127929688, "step": 15510 }, { "epoch": 2.0, "learning_rate": 1.8451754805393515e-07, "logits/chosen": -2.6180882453918457, "logits/rejected": -2.5672004222869873, "logps/chosen": -553.5213623046875, "logps/rejected": -471.7325744628906, "loss": 0.6329, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.27101469039917, "rewards/margins": 0.6399329900741577, "rewards/rejected": -2.910947799682617, "step": 15520 }, { "epoch": 2.0, "learning_rate": 1.8427847374964136e-07, "logits/chosen": -2.7674551010131836, "logits/rejected": -2.668375015258789, "logps/chosen": -535.994384765625, "logps/rejected": -411.02020263671875, "loss": 0.5585, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0736706256866455, "rewards/margins": 0.839364230632782, "rewards/rejected": -2.9130349159240723, "step": 15530 }, { "epoch": 2.01, "learning_rate": 1.840393994453476e-07, "logits/chosen": -2.824047803878784, "logits/rejected": -2.7181408405303955, "logps/chosen": -620.650390625, "logps/rejected": -459.2146911621094, "loss": 0.5087, "rewards/accuracies": 0.6875, "rewards/chosen": -1.8481073379516602, "rewards/margins": 1.1949684619903564, "rewards/rejected": -3.0430750846862793, "step": 15540 }, { "epoch": 2.01, "learning_rate": 1.8380032514105383e-07, "logits/chosen": -2.6176059246063232, "logits/rejected": -2.657289981842041, "logps/chosen": -482.28363037109375, "logps/rejected": -476.34649658203125, "loss": 0.4862, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.045891284942627, "rewards/margins": 1.1682407855987549, "rewards/rejected": -3.214132308959961, "step": 15550 }, { "epoch": 2.01, "learning_rate": 1.8356125083676004e-07, "logits/chosen": -2.787374973297119, "logits/rejected": -2.6430039405822754, "logps/chosen": -669.14697265625, "logps/rejected": -489.5421447753906, "loss": 0.6547, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2478833198547363, "rewards/margins": 0.5854207873344421, "rewards/rejected": -2.8333041667938232, "step": 15560 }, { "epoch": 2.01, "learning_rate": 1.8332217653246628e-07, "logits/chosen": -2.7048745155334473, "logits/rejected": -2.6300880908966064, "logps/chosen": -516.8692626953125, "logps/rejected": -444.2862854003906, "loss": 0.5581, "rewards/accuracies": 0.6875, "rewards/chosen": -2.028008222579956, "rewards/margins": 0.7495276927947998, "rewards/rejected": -2.7775356769561768, "step": 15570 }, { "epoch": 2.01, "learning_rate": 1.830831022281725e-07, "logits/chosen": -2.734194040298462, "logits/rejected": -2.5961384773254395, "logps/chosen": -472.14898681640625, "logps/rejected": -359.2301330566406, "loss": 0.5864, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.026456594467163, "rewards/margins": 0.7947681546211243, "rewards/rejected": -2.821225166320801, "step": 15580 }, { "epoch": 2.01, "learning_rate": 1.8284402792387873e-07, "logits/chosen": -2.572615385055542, "logits/rejected": -2.6295275688171387, "logps/chosen": -516.1229248046875, "logps/rejected": -476.13409423828125, "loss": 0.5261, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9569463729858398, "rewards/margins": 1.0634241104125977, "rewards/rejected": -3.0203704833984375, "step": 15590 }, { "epoch": 2.01, "learning_rate": 1.8260495361958494e-07, "logits/chosen": -2.71496844291687, "logits/rejected": -2.7046353816986084, "logps/chosen": -533.9068603515625, "logps/rejected": -483.65289306640625, "loss": 0.6578, "rewards/accuracies": 0.6875, "rewards/chosen": -2.16613507270813, "rewards/margins": 0.6807119846343994, "rewards/rejected": -2.8468470573425293, "step": 15600 }, { "epoch": 2.02, "learning_rate": 1.8236587931529117e-07, "logits/chosen": -2.677644729614258, "logits/rejected": -2.7080883979797363, "logps/chosen": -634.175537109375, "logps/rejected": -527.0159301757812, "loss": 0.5284, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9959659576416016, "rewards/margins": 0.9237972497940063, "rewards/rejected": -2.9197630882263184, "step": 15610 }, { "epoch": 2.02, "learning_rate": 1.821268050109974e-07, "logits/chosen": -2.6651995182037354, "logits/rejected": -2.647719144821167, "logps/chosen": -511.4430236816406, "logps/rejected": -405.57257080078125, "loss": 0.5588, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.138380527496338, "rewards/margins": 0.794744610786438, "rewards/rejected": -2.9331250190734863, "step": 15620 }, { "epoch": 2.02, "learning_rate": 1.8188773070670362e-07, "logits/chosen": -2.6327836513519287, "logits/rejected": -2.5379395484924316, "logps/chosen": -495.95751953125, "logps/rejected": -357.56573486328125, "loss": 0.4632, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.9054619073867798, "rewards/margins": 1.0294053554534912, "rewards/rejected": -2.9348671436309814, "step": 15630 }, { "epoch": 2.02, "learning_rate": 1.8164865640240988e-07, "logits/chosen": -2.7075321674346924, "logits/rejected": -2.5622401237487793, "logps/chosen": -537.71044921875, "logps/rejected": -419.6671447753906, "loss": 0.5457, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9953196048736572, "rewards/margins": 0.8837642669677734, "rewards/rejected": -2.8790836334228516, "step": 15640 }, { "epoch": 2.02, "learning_rate": 1.814095820981161e-07, "logits/chosen": -2.735572099685669, "logits/rejected": -2.632488489151001, "logps/chosen": -499.60296630859375, "logps/rejected": -366.91888427734375, "loss": 0.4583, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0290274620056152, "rewards/margins": 1.093124270439148, "rewards/rejected": -3.122152090072632, "step": 15650 }, { "epoch": 2.02, "learning_rate": 1.8117050779382232e-07, "logits/chosen": -2.7148470878601074, "logits/rejected": -2.572031021118164, "logps/chosen": -574.8402099609375, "logps/rejected": -404.471435546875, "loss": 0.524, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.153367280960083, "rewards/margins": 0.9606853723526001, "rewards/rejected": -3.1140530109405518, "step": 15660 }, { "epoch": 2.02, "learning_rate": 1.8093143348952856e-07, "logits/chosen": -2.8355088233947754, "logits/rejected": -2.6856696605682373, "logps/chosen": -596.2754516601562, "logps/rejected": -445.82318115234375, "loss": 0.5771, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0335216522216797, "rewards/margins": 0.8939725160598755, "rewards/rejected": -2.9274938106536865, "step": 15670 }, { "epoch": 2.02, "learning_rate": 1.8069235918523477e-07, "logits/chosen": -2.76615571975708, "logits/rejected": -2.691056489944458, "logps/chosen": -575.2119750976562, "logps/rejected": -479.001708984375, "loss": 0.4317, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.200350046157837, "rewards/margins": 1.0588538646697998, "rewards/rejected": -3.2592036724090576, "step": 15680 }, { "epoch": 2.03, "learning_rate": 1.80453284880941e-07, "logits/chosen": -2.681379795074463, "logits/rejected": -2.6567084789276123, "logps/chosen": -519.7257690429688, "logps/rejected": -439.40899658203125, "loss": 0.4438, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.0487289428710938, "rewards/margins": 1.0802924633026123, "rewards/rejected": -3.129021406173706, "step": 15690 }, { "epoch": 2.03, "learning_rate": 1.8021421057664721e-07, "logits/chosen": -2.7683074474334717, "logits/rejected": -2.6739306449890137, "logps/chosen": -501.914794921875, "logps/rejected": -386.5697937011719, "loss": 0.4315, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.0045716762542725, "rewards/margins": 1.0435471534729004, "rewards/rejected": -3.048119068145752, "step": 15700 }, { "epoch": 2.03, "learning_rate": 1.7997513627235345e-07, "logits/chosen": -2.6987862586975098, "logits/rejected": -2.6118812561035156, "logps/chosen": -637.160400390625, "logps/rejected": -467.319091796875, "loss": 0.6397, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.096198558807373, "rewards/margins": 0.8251880407333374, "rewards/rejected": -2.92138671875, "step": 15710 }, { "epoch": 2.03, "learning_rate": 1.7973606196805966e-07, "logits/chosen": -2.656728744506836, "logits/rejected": -2.5991640090942383, "logps/chosen": -590.7293701171875, "logps/rejected": -499.4239196777344, "loss": 0.5498, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1822962760925293, "rewards/margins": 0.8315328359603882, "rewards/rejected": -3.013829469680786, "step": 15720 }, { "epoch": 2.03, "learning_rate": 1.794969876637659e-07, "logits/chosen": -2.6939547061920166, "logits/rejected": -2.614835262298584, "logps/chosen": -497.91363525390625, "logps/rejected": -405.6688232421875, "loss": 0.5329, "rewards/accuracies": 0.75, "rewards/chosen": -2.1335442066192627, "rewards/margins": 0.8648913502693176, "rewards/rejected": -2.9984354972839355, "step": 15730 }, { "epoch": 2.03, "learning_rate": 1.7925791335947213e-07, "logits/chosen": -2.8543550968170166, "logits/rejected": -2.6410727500915527, "logps/chosen": -582.289794921875, "logps/rejected": -411.839111328125, "loss": 0.6339, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1434664726257324, "rewards/margins": 0.5923014283180237, "rewards/rejected": -2.7357680797576904, "step": 15740 }, { "epoch": 2.03, "learning_rate": 1.7901883905517834e-07, "logits/chosen": -2.777231454849243, "logits/rejected": -2.7012367248535156, "logps/chosen": -490.69488525390625, "logps/rejected": -400.11968994140625, "loss": 0.5435, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0707359313964844, "rewards/margins": 0.9154629707336426, "rewards/rejected": -2.986198902130127, "step": 15750 }, { "epoch": 2.03, "learning_rate": 1.7877976475088458e-07, "logits/chosen": -2.6961593627929688, "logits/rejected": -2.5789828300476074, "logps/chosen": -565.177978515625, "logps/rejected": -421.5916442871094, "loss": 0.5826, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.12176513671875, "rewards/margins": 0.8233081698417664, "rewards/rejected": -2.9450736045837402, "step": 15760 }, { "epoch": 2.04, "learning_rate": 1.785406904465908e-07, "logits/chosen": -2.674232244491577, "logits/rejected": -2.5790908336639404, "logps/chosen": -636.6239013671875, "logps/rejected": -497.1231384277344, "loss": 0.4281, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.7268060445785522, "rewards/margins": 1.1435120105743408, "rewards/rejected": -2.8703179359436035, "step": 15770 }, { "epoch": 2.04, "learning_rate": 1.7830161614229702e-07, "logits/chosen": -2.781517505645752, "logits/rejected": -2.556450366973877, "logps/chosen": -667.3082885742188, "logps/rejected": -436.41229248046875, "loss": 0.4733, "rewards/accuracies": 0.75, "rewards/chosen": -2.174795389175415, "rewards/margins": 1.1704912185668945, "rewards/rejected": -3.3452866077423096, "step": 15780 }, { "epoch": 2.04, "learning_rate": 1.7806254183800323e-07, "logits/chosen": -2.687221050262451, "logits/rejected": -2.53355073928833, "logps/chosen": -510.4129943847656, "logps/rejected": -386.8094177246094, "loss": 0.7025, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.2782578468322754, "rewards/margins": 0.5277835130691528, "rewards/rejected": -2.8060412406921387, "step": 15790 }, { "epoch": 2.04, "learning_rate": 1.7782346753370947e-07, "logits/chosen": -2.734930992126465, "logits/rejected": -2.627690553665161, "logps/chosen": -611.872314453125, "logps/rejected": -482.53045654296875, "loss": 0.4547, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.0116147994995117, "rewards/margins": 1.0201506614685059, "rewards/rejected": -3.0317656993865967, "step": 15800 }, { "epoch": 2.04, "learning_rate": 1.775843932294157e-07, "logits/chosen": -2.6368393898010254, "logits/rejected": -2.6522698402404785, "logps/chosen": -544.7159423828125, "logps/rejected": -501.11810302734375, "loss": 0.5519, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2545907497406006, "rewards/margins": 0.8133015632629395, "rewards/rejected": -3.067892551422119, "step": 15810 }, { "epoch": 2.04, "learning_rate": 1.7734531892512191e-07, "logits/chosen": -2.6793935298919678, "logits/rejected": -2.6175355911254883, "logps/chosen": -574.0875854492188, "logps/rejected": -443.4072265625, "loss": 0.5117, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1245293617248535, "rewards/margins": 0.9904994964599609, "rewards/rejected": -3.1150290966033936, "step": 15820 }, { "epoch": 2.04, "learning_rate": 1.7710624462082815e-07, "logits/chosen": -2.7340762615203857, "logits/rejected": -2.6609787940979004, "logps/chosen": -485.25909423828125, "logps/rejected": -397.6617126464844, "loss": 0.5279, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9828331470489502, "rewards/margins": 0.9122559428215027, "rewards/rejected": -2.8950889110565186, "step": 15830 }, { "epoch": 2.04, "learning_rate": 1.7686717031653436e-07, "logits/chosen": -2.6794393062591553, "logits/rejected": -2.6027305126190186, "logps/chosen": -525.37744140625, "logps/rejected": -418.29913330078125, "loss": 0.5446, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0333685874938965, "rewards/margins": 0.8740461468696594, "rewards/rejected": -2.907414674758911, "step": 15840 }, { "epoch": 2.05, "learning_rate": 1.766280960122406e-07, "logits/chosen": -2.683589220046997, "logits/rejected": -2.5894627571105957, "logps/chosen": -465.17169189453125, "logps/rejected": -369.2214660644531, "loss": 0.4747, "rewards/accuracies": 0.75, "rewards/chosen": -1.8485409021377563, "rewards/margins": 0.9833611249923706, "rewards/rejected": -2.831902027130127, "step": 15850 }, { "epoch": 2.05, "learning_rate": 1.763890217079468e-07, "logits/chosen": -2.62750506401062, "logits/rejected": -2.6060898303985596, "logps/chosen": -527.3961181640625, "logps/rejected": -476.9634704589844, "loss": 0.54, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1114864349365234, "rewards/margins": 0.9977298974990845, "rewards/rejected": -3.1092162132263184, "step": 15860 }, { "epoch": 2.05, "learning_rate": 1.7614994740365304e-07, "logits/chosen": -2.7836079597473145, "logits/rejected": -2.616711139678955, "logps/chosen": -622.229248046875, "logps/rejected": -452.88006591796875, "loss": 0.5405, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.9618839025497437, "rewards/margins": 0.8679577708244324, "rewards/rejected": -2.8298418521881104, "step": 15870 }, { "epoch": 2.05, "learning_rate": 1.7591087309935928e-07, "logits/chosen": -2.689079999923706, "logits/rejected": -2.6209397315979004, "logps/chosen": -513.9140625, "logps/rejected": -408.5860595703125, "loss": 0.5243, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.2193009853363037, "rewards/margins": 0.9371811151504517, "rewards/rejected": -3.156482219696045, "step": 15880 }, { "epoch": 2.05, "learning_rate": 1.7567179879506549e-07, "logits/chosen": -2.7776293754577637, "logits/rejected": -2.7217893600463867, "logps/chosen": -555.6466064453125, "logps/rejected": -498.1510314941406, "loss": 0.4848, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.965294599533081, "rewards/margins": 0.9823633432388306, "rewards/rejected": -2.947658061981201, "step": 15890 }, { "epoch": 2.05, "learning_rate": 1.7543272449077172e-07, "logits/chosen": -2.850172758102417, "logits/rejected": -2.6876988410949707, "logps/chosen": -557.4830322265625, "logps/rejected": -349.8135070800781, "loss": 0.4395, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.0056416988372803, "rewards/margins": 1.2684962749481201, "rewards/rejected": -3.2741379737854004, "step": 15900 }, { "epoch": 2.05, "learning_rate": 1.7519365018647793e-07, "logits/chosen": -2.7673733234405518, "logits/rejected": -2.650634288787842, "logps/chosen": -536.7921142578125, "logps/rejected": -443.0262756347656, "loss": 0.4722, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.138801097869873, "rewards/margins": 1.124875783920288, "rewards/rejected": -3.263676881790161, "step": 15910 }, { "epoch": 2.06, "learning_rate": 1.7495457588218417e-07, "logits/chosen": -2.793942928314209, "logits/rejected": -2.677493095397949, "logps/chosen": -639.7744140625, "logps/rejected": -442.54376220703125, "loss": 0.6259, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1716971397399902, "rewards/margins": 0.7870337963104248, "rewards/rejected": -2.958731174468994, "step": 15920 }, { "epoch": 2.06, "learning_rate": 1.747155015778904e-07, "logits/chosen": -2.690372943878174, "logits/rejected": -2.588696002960205, "logps/chosen": -546.7692260742188, "logps/rejected": -413.80169677734375, "loss": 0.5063, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.0440680980682373, "rewards/margins": 1.07588791847229, "rewards/rejected": -3.1199560165405273, "step": 15930 }, { "epoch": 2.06, "learning_rate": 1.7447642727359664e-07, "logits/chosen": -2.701991558074951, "logits/rejected": -2.5867526531219482, "logps/chosen": -549.2972412109375, "logps/rejected": -420.2171325683594, "loss": 0.5138, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.2470953464508057, "rewards/margins": 0.8302184343338013, "rewards/rejected": -3.0773138999938965, "step": 15940 }, { "epoch": 2.06, "learning_rate": 1.7423735296930287e-07, "logits/chosen": -2.648634433746338, "logits/rejected": -2.5474140644073486, "logps/chosen": -584.4354858398438, "logps/rejected": -491.95263671875, "loss": 0.5065, "rewards/accuracies": 0.6875, "rewards/chosen": -2.137873411178589, "rewards/margins": 0.8986722826957703, "rewards/rejected": -3.036545991897583, "step": 15950 }, { "epoch": 2.06, "learning_rate": 1.7399827866500908e-07, "logits/chosen": -2.752929210662842, "logits/rejected": -2.643355131149292, "logps/chosen": -559.8423461914062, "logps/rejected": -446.47186279296875, "loss": 0.6472, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.3712992668151855, "rewards/margins": 0.7220799922943115, "rewards/rejected": -3.093378782272339, "step": 15960 }, { "epoch": 2.06, "learning_rate": 1.7375920436071532e-07, "logits/chosen": -2.772965669631958, "logits/rejected": -2.6733479499816895, "logps/chosen": -635.1339111328125, "logps/rejected": -487.4976501464844, "loss": 0.5304, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.2603206634521484, "rewards/margins": 1.0100598335266113, "rewards/rejected": -3.2703804969787598, "step": 15970 }, { "epoch": 2.06, "learning_rate": 1.7352013005642153e-07, "logits/chosen": -2.7220263481140137, "logits/rejected": -2.597092390060425, "logps/chosen": -536.7499389648438, "logps/rejected": -413.26458740234375, "loss": 0.5632, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.175072193145752, "rewards/margins": 0.9740579724311829, "rewards/rejected": -3.149130344390869, "step": 15980 }, { "epoch": 2.06, "learning_rate": 1.7328105575212777e-07, "logits/chosen": -2.724583625793457, "logits/rejected": -2.6171464920043945, "logps/chosen": -533.3798828125, "logps/rejected": -461.22430419921875, "loss": 0.6503, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.262551784515381, "rewards/margins": 0.5835323929786682, "rewards/rejected": -2.846083879470825, "step": 15990 }, { "epoch": 2.07, "learning_rate": 1.7304198144783397e-07, "logits/chosen": -2.8095521926879883, "logits/rejected": -2.656545877456665, "logps/chosen": -508.93817138671875, "logps/rejected": -435.3094177246094, "loss": 0.5769, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0757596492767334, "rewards/margins": 0.7738211154937744, "rewards/rejected": -2.8495802879333496, "step": 16000 }, { "epoch": 2.07, "eval_logits/chosen": -3.075199842453003, "eval_logits/rejected": -3.023000717163086, "eval_logps/chosen": -541.5254516601562, "eval_logps/rejected": -421.77862548828125, "eval_loss": 0.6220055818557739, "eval_rewards/accuracies": 0.6694999933242798, "eval_rewards/chosen": -0.9705724120140076, "eval_rewards/margins": 1.1144232749938965, "eval_rewards/rejected": -2.0849955081939697, "eval_runtime": 277.2851, "eval_samples_per_second": 7.213, "eval_steps_per_second": 3.606, "step": 16000 }, { "epoch": 2.07, "learning_rate": 1.728029071435402e-07, "logits/chosen": -2.8086161613464355, "logits/rejected": -2.687558650970459, "logps/chosen": -616.9071044921875, "logps/rejected": -394.99981689453125, "loss": 0.6095, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.3158814907073975, "rewards/margins": 0.7273725271224976, "rewards/rejected": -3.0432543754577637, "step": 16010 }, { "epoch": 2.07, "learning_rate": 1.7256383283924645e-07, "logits/chosen": -2.8345398902893066, "logits/rejected": -2.6063992977142334, "logps/chosen": -598.4365844726562, "logps/rejected": -402.7333984375, "loss": 0.5233, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.0334553718566895, "rewards/margins": 1.0890676975250244, "rewards/rejected": -3.1225228309631348, "step": 16020 }, { "epoch": 2.07, "learning_rate": 1.7232475853495266e-07, "logits/chosen": -2.6331071853637695, "logits/rejected": -2.6012158393859863, "logps/chosen": -483.7911071777344, "logps/rejected": -370.0927734375, "loss": 0.5599, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0412330627441406, "rewards/margins": 0.7593597173690796, "rewards/rejected": -2.8005928993225098, "step": 16030 }, { "epoch": 2.07, "learning_rate": 1.720856842306589e-07, "logits/chosen": -2.758589029312134, "logits/rejected": -2.7213680744171143, "logps/chosen": -578.4405517578125, "logps/rejected": -492.67645263671875, "loss": 0.5548, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.919842004776001, "rewards/margins": 1.0530675649642944, "rewards/rejected": -2.972909688949585, "step": 16040 }, { "epoch": 2.07, "learning_rate": 1.718466099263651e-07, "logits/chosen": -2.749969482421875, "logits/rejected": -2.704333543777466, "logps/chosen": -456.537841796875, "logps/rejected": -373.49456787109375, "loss": 0.4467, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.848801612854004, "rewards/margins": 1.1036427021026611, "rewards/rejected": -2.952444076538086, "step": 16050 }, { "epoch": 2.07, "learning_rate": 1.7160753562207134e-07, "logits/chosen": -2.7695870399475098, "logits/rejected": -2.620879650115967, "logps/chosen": -601.1954345703125, "logps/rejected": -448.72833251953125, "loss": 0.492, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.12182879447937, "rewards/margins": 1.0582685470581055, "rewards/rejected": -3.1800971031188965, "step": 16060 }, { "epoch": 2.07, "learning_rate": 1.7136846131777755e-07, "logits/chosen": -2.722283124923706, "logits/rejected": -2.54723858833313, "logps/chosen": -481.07958984375, "logps/rejected": -378.11627197265625, "loss": 0.486, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.999991774559021, "rewards/margins": 1.057852029800415, "rewards/rejected": -3.0578436851501465, "step": 16070 }, { "epoch": 2.08, "learning_rate": 1.7112938701348378e-07, "logits/chosen": -2.70440411567688, "logits/rejected": -2.649101734161377, "logps/chosen": -504.87213134765625, "logps/rejected": -434.767822265625, "loss": 0.5001, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.0280468463897705, "rewards/margins": 0.9925267100334167, "rewards/rejected": -3.020573616027832, "step": 16080 }, { "epoch": 2.08, "learning_rate": 1.7089031270919002e-07, "logits/chosen": -2.707793712615967, "logits/rejected": -2.594597339630127, "logps/chosen": -551.664306640625, "logps/rejected": -432.9012756347656, "loss": 0.513, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.8864014148712158, "rewards/margins": 0.9498047828674316, "rewards/rejected": -2.8362059593200684, "step": 16090 }, { "epoch": 2.08, "learning_rate": 1.7065123840489623e-07, "logits/chosen": -2.7429206371307373, "logits/rejected": -2.619131565093994, "logps/chosen": -493.701416015625, "logps/rejected": -419.808837890625, "loss": 0.553, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1332030296325684, "rewards/margins": 0.8572152256965637, "rewards/rejected": -2.9904181957244873, "step": 16100 }, { "epoch": 2.08, "learning_rate": 1.7041216410060246e-07, "logits/chosen": -2.799999475479126, "logits/rejected": -2.6875479221343994, "logps/chosen": -514.7276611328125, "logps/rejected": -428.8377990722656, "loss": 0.5164, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.8737834692001343, "rewards/margins": 0.9176376461982727, "rewards/rejected": -2.7914211750030518, "step": 16110 }, { "epoch": 2.08, "learning_rate": 1.7017308979630867e-07, "logits/chosen": -2.636103868484497, "logits/rejected": -2.497783660888672, "logps/chosen": -612.6914672851562, "logps/rejected": -429.90380859375, "loss": 0.4968, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.822981834411621, "rewards/margins": 1.0217024087905884, "rewards/rejected": -2.84468412399292, "step": 16120 }, { "epoch": 2.08, "learning_rate": 1.699340154920149e-07, "logits/chosen": -2.6870276927948, "logits/rejected": -2.535398006439209, "logps/chosen": -582.6930541992188, "logps/rejected": -445.21881103515625, "loss": 0.5237, "rewards/accuracies": 0.75, "rewards/chosen": -1.9552295207977295, "rewards/margins": 0.9119974374771118, "rewards/rejected": -2.867227077484131, "step": 16130 }, { "epoch": 2.08, "learning_rate": 1.6969494118772112e-07, "logits/chosen": -2.6820313930511475, "logits/rejected": -2.602262258529663, "logps/chosen": -576.9219970703125, "logps/rejected": -377.14544677734375, "loss": 0.6055, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.178762912750244, "rewards/margins": 0.8606170415878296, "rewards/rejected": -3.039379596710205, "step": 16140 }, { "epoch": 2.08, "learning_rate": 1.6945586688342735e-07, "logits/chosen": -2.831362724304199, "logits/rejected": -2.8109681606292725, "logps/chosen": -602.4659423828125, "logps/rejected": -487.97747802734375, "loss": 0.6706, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.226155996322632, "rewards/margins": 0.656830906867981, "rewards/rejected": -2.8829872608184814, "step": 16150 }, { "epoch": 2.09, "learning_rate": 1.692167925791336e-07, "logits/chosen": -2.7617688179016113, "logits/rejected": -2.653507947921753, "logps/chosen": -459.21685791015625, "logps/rejected": -401.21075439453125, "loss": 0.7379, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.433107376098633, "rewards/margins": 0.41146278381347656, "rewards/rejected": -2.8445706367492676, "step": 16160 }, { "epoch": 2.09, "learning_rate": 1.689777182748398e-07, "logits/chosen": -2.6947083473205566, "logits/rejected": -2.602506399154663, "logps/chosen": -546.8339233398438, "logps/rejected": -459.58343505859375, "loss": 0.6096, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.041780471801758, "rewards/margins": 0.7261396646499634, "rewards/rejected": -2.76792049407959, "step": 16170 }, { "epoch": 2.09, "learning_rate": 1.6873864397054604e-07, "logits/chosen": -2.7262701988220215, "logits/rejected": -2.631704807281494, "logps/chosen": -511.2059631347656, "logps/rejected": -384.7381896972656, "loss": 0.6245, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0403523445129395, "rewards/margins": 0.6813429594039917, "rewards/rejected": -2.7216954231262207, "step": 16180 }, { "epoch": 2.09, "learning_rate": 1.6849956966625225e-07, "logits/chosen": -2.73036527633667, "logits/rejected": -2.625645399093628, "logps/chosen": -475.6647033691406, "logps/rejected": -368.92694091796875, "loss": 0.5551, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9648382663726807, "rewards/margins": 0.7918621301651001, "rewards/rejected": -2.7567005157470703, "step": 16190 }, { "epoch": 2.09, "learning_rate": 1.6826049536195848e-07, "logits/chosen": -2.675591230392456, "logits/rejected": -2.5540244579315186, "logps/chosen": -535.2053833007812, "logps/rejected": -406.88177490234375, "loss": 0.6076, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1591267585754395, "rewards/margins": 0.5804471969604492, "rewards/rejected": -2.7395739555358887, "step": 16200 }, { "epoch": 2.09, "learning_rate": 1.680214210576647e-07, "logits/chosen": -2.7192912101745605, "logits/rejected": -2.5761804580688477, "logps/chosen": -559.2965087890625, "logps/rejected": -415.90069580078125, "loss": 0.676, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.2305145263671875, "rewards/margins": 0.768750786781311, "rewards/rejected": -2.999265670776367, "step": 16210 }, { "epoch": 2.09, "learning_rate": 1.6778234675337095e-07, "logits/chosen": -2.664170265197754, "logits/rejected": -2.5658252239227295, "logps/chosen": -506.4527282714844, "logps/rejected": -395.21966552734375, "loss": 0.5219, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0700531005859375, "rewards/margins": 0.8828436136245728, "rewards/rejected": -2.9528965950012207, "step": 16220 }, { "epoch": 2.1, "learning_rate": 1.675432724490772e-07, "logits/chosen": -2.666821241378784, "logits/rejected": -2.598331928253174, "logps/chosen": -601.6417846679688, "logps/rejected": -510.4458923339844, "loss": 0.6082, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2281346321105957, "rewards/margins": 0.8676369786262512, "rewards/rejected": -3.095771312713623, "step": 16230 }, { "epoch": 2.1, "learning_rate": 1.673041981447834e-07, "logits/chosen": -2.7912425994873047, "logits/rejected": -2.6039390563964844, "logps/chosen": -604.4366455078125, "logps/rejected": -426.8350524902344, "loss": 0.5095, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.9113128185272217, "rewards/margins": 1.0060603618621826, "rewards/rejected": -2.9173731803894043, "step": 16240 }, { "epoch": 2.1, "learning_rate": 1.6706512384048963e-07, "logits/chosen": -2.734553813934326, "logits/rejected": -2.6383543014526367, "logps/chosen": -565.6779174804688, "logps/rejected": -433.4234924316406, "loss": 0.5254, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.082515239715576, "rewards/margins": 0.8546460270881653, "rewards/rejected": -2.9371609687805176, "step": 16250 }, { "epoch": 2.1, "learning_rate": 1.6682604953619584e-07, "logits/chosen": -2.7583823204040527, "logits/rejected": -2.6615347862243652, "logps/chosen": -532.72119140625, "logps/rejected": -375.2275390625, "loss": 0.5785, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.9379723072052002, "rewards/margins": 0.7219119668006897, "rewards/rejected": -2.659884214401245, "step": 16260 }, { "epoch": 2.1, "learning_rate": 1.6658697523190208e-07, "logits/chosen": -2.649955987930298, "logits/rejected": -2.6307928562164307, "logps/chosen": -519.1983642578125, "logps/rejected": -445.38677978515625, "loss": 0.6544, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.008338451385498, "rewards/margins": 0.6635834574699402, "rewards/rejected": -2.671921968460083, "step": 16270 }, { "epoch": 2.1, "learning_rate": 1.663479009276083e-07, "logits/chosen": -2.693131923675537, "logits/rejected": -2.604766845703125, "logps/chosen": -550.8753662109375, "logps/rejected": -436.97332763671875, "loss": 0.5188, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1186370849609375, "rewards/margins": 1.0847902297973633, "rewards/rejected": -3.2034270763397217, "step": 16280 }, { "epoch": 2.1, "learning_rate": 1.6610882662331453e-07, "logits/chosen": -2.734262466430664, "logits/rejected": -2.6220996379852295, "logps/chosen": -576.7682495117188, "logps/rejected": -439.152099609375, "loss": 0.6204, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.131986141204834, "rewards/margins": 0.8995087742805481, "rewards/rejected": -3.0314950942993164, "step": 16290 }, { "epoch": 2.1, "learning_rate": 1.6586975231902076e-07, "logits/chosen": -2.6741487979888916, "logits/rejected": -2.5952930450439453, "logps/chosen": -502.18389892578125, "logps/rejected": -422.5003967285156, "loss": 0.5034, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.1003715991973877, "rewards/margins": 0.9773193597793579, "rewards/rejected": -3.0776913166046143, "step": 16300 }, { "epoch": 2.11, "learning_rate": 1.6563067801472697e-07, "logits/chosen": -2.7642433643341064, "logits/rejected": -2.704097032546997, "logps/chosen": -563.1353149414062, "logps/rejected": -490.84234619140625, "loss": 0.599, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.325838804244995, "rewards/margins": 0.934483528137207, "rewards/rejected": -3.2603225708007812, "step": 16310 }, { "epoch": 2.11, "learning_rate": 1.653916037104332e-07, "logits/chosen": -2.705960512161255, "logits/rejected": -2.631063938140869, "logps/chosen": -574.1942138671875, "logps/rejected": -459.41168212890625, "loss": 0.5893, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.218607187271118, "rewards/margins": 0.8570125699043274, "rewards/rejected": -3.0756192207336426, "step": 16320 }, { "epoch": 2.11, "learning_rate": 1.6515252940613942e-07, "logits/chosen": -2.6697394847869873, "logits/rejected": -2.5912938117980957, "logps/chosen": -608.7952880859375, "logps/rejected": -510.5113220214844, "loss": 0.6358, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.272548198699951, "rewards/margins": 0.7988362312316895, "rewards/rejected": -3.0713844299316406, "step": 16330 }, { "epoch": 2.11, "learning_rate": 1.6491345510184565e-07, "logits/chosen": -2.721374034881592, "logits/rejected": -2.6715195178985596, "logps/chosen": -550.3895874023438, "logps/rejected": -475.0224609375, "loss": 0.5497, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.995074987411499, "rewards/margins": 0.9175745248794556, "rewards/rejected": -2.912649631500244, "step": 16340 }, { "epoch": 2.11, "learning_rate": 1.6467438079755186e-07, "logits/chosen": -2.7963831424713135, "logits/rejected": -2.6066081523895264, "logps/chosen": -615.7313842773438, "logps/rejected": -371.3451843261719, "loss": 0.4926, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.036390781402588, "rewards/margins": 1.074235200881958, "rewards/rejected": -3.110625982284546, "step": 16350 }, { "epoch": 2.11, "learning_rate": 1.644353064932581e-07, "logits/chosen": -2.665797472000122, "logits/rejected": -2.483412504196167, "logps/chosen": -516.7774658203125, "logps/rejected": -341.2754211425781, "loss": 0.5315, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0544865131378174, "rewards/margins": 0.9580774307250977, "rewards/rejected": -3.012564182281494, "step": 16360 }, { "epoch": 2.11, "learning_rate": 1.6419623218896433e-07, "logits/chosen": -2.6289215087890625, "logits/rejected": -2.6007297039031982, "logps/chosen": -561.44873046875, "logps/rejected": -468.01849365234375, "loss": 0.5599, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.270270347595215, "rewards/margins": 0.7804743647575378, "rewards/rejected": -3.0507450103759766, "step": 16370 }, { "epoch": 2.11, "learning_rate": 1.6395715788467054e-07, "logits/chosen": -2.6789398193359375, "logits/rejected": -2.6473429203033447, "logps/chosen": -578.9848022460938, "logps/rejected": -497.48382568359375, "loss": 0.5591, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.028587818145752, "rewards/margins": 0.9549644589424133, "rewards/rejected": -2.9835522174835205, "step": 16380 }, { "epoch": 2.12, "learning_rate": 1.6371808358037678e-07, "logits/chosen": -2.617952346801758, "logits/rejected": -2.5646979808807373, "logps/chosen": -546.4915771484375, "logps/rejected": -448.3631896972656, "loss": 0.7131, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.352260112762451, "rewards/margins": 0.5163453817367554, "rewards/rejected": -2.868605613708496, "step": 16390 }, { "epoch": 2.12, "learning_rate": 1.63479009276083e-07, "logits/chosen": -2.738330602645874, "logits/rejected": -2.6317780017852783, "logps/chosen": -579.1546630859375, "logps/rejected": -440.7102966308594, "loss": 0.5, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.2503626346588135, "rewards/margins": 0.920239269733429, "rewards/rejected": -3.1706018447875977, "step": 16400 }, { "epoch": 2.12, "learning_rate": 1.6323993497178922e-07, "logits/chosen": -2.802677631378174, "logits/rejected": -2.6815342903137207, "logps/chosen": -547.08935546875, "logps/rejected": -424.97003173828125, "loss": 0.6222, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.125054121017456, "rewards/margins": 0.8309026956558228, "rewards/rejected": -2.9559571743011475, "step": 16410 }, { "epoch": 2.12, "learning_rate": 1.6300086066749543e-07, "logits/chosen": -2.711029529571533, "logits/rejected": -2.5096678733825684, "logps/chosen": -661.3432006835938, "logps/rejected": -415.834228515625, "loss": 0.5122, "rewards/accuracies": 0.6875, "rewards/chosen": -2.300396680831909, "rewards/margins": 0.8556958436965942, "rewards/rejected": -3.156092643737793, "step": 16420 }, { "epoch": 2.12, "learning_rate": 1.6276178636320167e-07, "logits/chosen": -2.787670850753784, "logits/rejected": -2.601104259490967, "logps/chosen": -542.6302490234375, "logps/rejected": -357.02850341796875, "loss": 0.6348, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0595459938049316, "rewards/margins": 0.7526935338973999, "rewards/rejected": -2.812239408493042, "step": 16430 }, { "epoch": 2.12, "learning_rate": 1.625227120589079e-07, "logits/chosen": -2.752171277999878, "logits/rejected": -2.6846184730529785, "logps/chosen": -573.84423828125, "logps/rejected": -442.2362365722656, "loss": 0.6108, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.120007276535034, "rewards/margins": 0.8908340334892273, "rewards/rejected": -3.010841131210327, "step": 16440 }, { "epoch": 2.12, "learning_rate": 1.6228363775461411e-07, "logits/chosen": -2.7027485370635986, "logits/rejected": -2.58756160736084, "logps/chosen": -500.82781982421875, "logps/rejected": -406.00360107421875, "loss": 0.6269, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.165553569793701, "rewards/margins": 0.7895927429199219, "rewards/rejected": -2.955146551132202, "step": 16450 }, { "epoch": 2.12, "learning_rate": 1.6204456345032035e-07, "logits/chosen": -2.802001476287842, "logits/rejected": -2.703766345977783, "logps/chosen": -579.3763427734375, "logps/rejected": -491.35113525390625, "loss": 0.5585, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1245028972625732, "rewards/margins": 0.8010837435722351, "rewards/rejected": -2.925586223602295, "step": 16460 }, { "epoch": 2.13, "learning_rate": 1.6180548914602656e-07, "logits/chosen": -2.706907272338867, "logits/rejected": -2.5591084957122803, "logps/chosen": -618.6683349609375, "logps/rejected": -449.2037048339844, "loss": 0.5537, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.19158673286438, "rewards/margins": 0.9696523547172546, "rewards/rejected": -3.1612391471862793, "step": 16470 }, { "epoch": 2.13, "learning_rate": 1.615664148417328e-07, "logits/chosen": -2.820303440093994, "logits/rejected": -2.660341501235962, "logps/chosen": -627.08349609375, "logps/rejected": -395.683349609375, "loss": 0.5098, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1441593170166016, "rewards/margins": 0.9745954275131226, "rewards/rejected": -3.1187546253204346, "step": 16480 }, { "epoch": 2.13, "learning_rate": 1.61327340537439e-07, "logits/chosen": -2.6425728797912598, "logits/rejected": -2.570401906967163, "logps/chosen": -525.0691528320312, "logps/rejected": -476.0218200683594, "loss": 0.5686, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.162163496017456, "rewards/margins": 0.919187068939209, "rewards/rejected": -3.081350803375244, "step": 16490 }, { "epoch": 2.13, "learning_rate": 1.6108826623314524e-07, "logits/chosen": -2.615257740020752, "logits/rejected": -2.57840895652771, "logps/chosen": -502.36993408203125, "logps/rejected": -505.01971435546875, "loss": 0.5847, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2746071815490723, "rewards/margins": 0.7999303936958313, "rewards/rejected": -3.0745372772216797, "step": 16500 }, { "epoch": 2.13, "learning_rate": 1.608491919288515e-07, "logits/chosen": -2.6740238666534424, "logits/rejected": -2.5667338371276855, "logps/chosen": -591.04248046875, "logps/rejected": -453.3799743652344, "loss": 0.6137, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.182962656021118, "rewards/margins": 0.7746570110321045, "rewards/rejected": -2.9576194286346436, "step": 16510 }, { "epoch": 2.13, "learning_rate": 1.606101176245577e-07, "logits/chosen": -2.723583936691284, "logits/rejected": -2.6327855587005615, "logps/chosen": -511.0516052246094, "logps/rejected": -455.82080078125, "loss": 0.5372, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0211331844329834, "rewards/margins": 0.9670776128768921, "rewards/rejected": -2.988211154937744, "step": 16520 }, { "epoch": 2.13, "learning_rate": 1.6037104332026395e-07, "logits/chosen": -2.7492458820343018, "logits/rejected": -2.629992961883545, "logps/chosen": -561.9869995117188, "logps/rejected": -379.9822998046875, "loss": 0.5224, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0743298530578613, "rewards/margins": 0.9899803996086121, "rewards/rejected": -3.064310073852539, "step": 16530 }, { "epoch": 2.14, "learning_rate": 1.6013196901597016e-07, "logits/chosen": -2.7377114295959473, "logits/rejected": -2.684267044067383, "logps/chosen": -566.3670654296875, "logps/rejected": -450.54742431640625, "loss": 0.5832, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0517537593841553, "rewards/margins": 0.9058197140693665, "rewards/rejected": -2.957573652267456, "step": 16540 }, { "epoch": 2.14, "learning_rate": 1.598928947116764e-07, "logits/chosen": -2.7309987545013428, "logits/rejected": -2.6252734661102295, "logps/chosen": -456.9788513183594, "logps/rejected": -401.64898681640625, "loss": 0.566, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.9327328205108643, "rewards/margins": 0.7703275084495544, "rewards/rejected": -2.7030606269836426, "step": 16550 }, { "epoch": 2.14, "learning_rate": 1.596538204073826e-07, "logits/chosen": -2.63218355178833, "logits/rejected": -2.5251455307006836, "logps/chosen": -582.3482055664062, "logps/rejected": -436.753173828125, "loss": 0.611, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.003011465072632, "rewards/margins": 0.7147125005722046, "rewards/rejected": -2.717723846435547, "step": 16560 }, { "epoch": 2.14, "learning_rate": 1.5941474610308884e-07, "logits/chosen": -2.6345925331115723, "logits/rejected": -2.5762012004852295, "logps/chosen": -503.73388671875, "logps/rejected": -373.7250061035156, "loss": 0.6, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1414895057678223, "rewards/margins": 0.6426378488540649, "rewards/rejected": -2.7841269969940186, "step": 16570 }, { "epoch": 2.14, "learning_rate": 1.5917567179879508e-07, "logits/chosen": -2.71567440032959, "logits/rejected": -2.5972537994384766, "logps/chosen": -540.8619384765625, "logps/rejected": -459.818115234375, "loss": 0.5447, "rewards/accuracies": 0.75, "rewards/chosen": -2.1701488494873047, "rewards/margins": 0.8426430821418762, "rewards/rejected": -3.012791872024536, "step": 16580 }, { "epoch": 2.14, "learning_rate": 1.5893659749450129e-07, "logits/chosen": -2.707432270050049, "logits/rejected": -2.631152868270874, "logps/chosen": -556.1505126953125, "logps/rejected": -456.4017028808594, "loss": 0.423, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.109118938446045, "rewards/margins": 1.0782684087753296, "rewards/rejected": -3.187386989593506, "step": 16590 }, { "epoch": 2.14, "learning_rate": 1.5869752319020752e-07, "logits/chosen": -2.713615894317627, "logits/rejected": -2.5857796669006348, "logps/chosen": -502.5967712402344, "logps/rejected": -376.54840087890625, "loss": 0.5905, "rewards/accuracies": 0.6875, "rewards/chosen": -2.182931423187256, "rewards/margins": 0.7395087480545044, "rewards/rejected": -2.9224400520324707, "step": 16600 }, { "epoch": 2.14, "learning_rate": 1.5845844888591373e-07, "logits/chosen": -2.768383502960205, "logits/rejected": -2.5702929496765137, "logps/chosen": -496.262939453125, "logps/rejected": -348.4413146972656, "loss": 0.5709, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.060075283050537, "rewards/margins": 0.7825108766555786, "rewards/rejected": -2.842585802078247, "step": 16610 }, { "epoch": 2.15, "learning_rate": 1.5821937458161997e-07, "logits/chosen": -2.775470495223999, "logits/rejected": -2.5838844776153564, "logps/chosen": -514.2855224609375, "logps/rejected": -370.24212646484375, "loss": 0.5396, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9267747402191162, "rewards/margins": 0.8832618594169617, "rewards/rejected": -2.8100364208221436, "step": 16620 }, { "epoch": 2.15, "learning_rate": 1.5798030027732618e-07, "logits/chosen": -2.771728277206421, "logits/rejected": -2.6278278827667236, "logps/chosen": -628.2802124023438, "logps/rejected": -451.24786376953125, "loss": 0.484, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.916833519935608, "rewards/margins": 1.113802433013916, "rewards/rejected": -3.0306360721588135, "step": 16630 }, { "epoch": 2.15, "learning_rate": 1.577412259730324e-07, "logits/chosen": -2.827669620513916, "logits/rejected": -2.741751194000244, "logps/chosen": -530.7667236328125, "logps/rejected": -429.75640869140625, "loss": 0.5844, "rewards/accuracies": 0.6875, "rewards/chosen": -2.018878936767578, "rewards/margins": 0.8395910263061523, "rewards/rejected": -2.8584699630737305, "step": 16640 }, { "epoch": 2.15, "learning_rate": 1.5750215166873865e-07, "logits/chosen": -2.777764320373535, "logits/rejected": -2.6118998527526855, "logps/chosen": -615.8616333007812, "logps/rejected": -439.7730407714844, "loss": 0.6002, "rewards/accuracies": 0.6875, "rewards/chosen": -2.186412811279297, "rewards/margins": 0.8839647173881531, "rewards/rejected": -3.0703773498535156, "step": 16650 }, { "epoch": 2.15, "learning_rate": 1.5726307736444486e-07, "logits/chosen": -2.723792552947998, "logits/rejected": -2.613330364227295, "logps/chosen": -530.4500732421875, "logps/rejected": -392.3229064941406, "loss": 0.6417, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.231276750564575, "rewards/margins": 0.6926860213279724, "rewards/rejected": -2.9239625930786133, "step": 16660 }, { "epoch": 2.15, "learning_rate": 1.570240030601511e-07, "logits/chosen": -2.7950692176818848, "logits/rejected": -2.6808741092681885, "logps/chosen": -541.8619384765625, "logps/rejected": -414.5777893066406, "loss": 0.5732, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1672706604003906, "rewards/margins": 0.7916160821914673, "rewards/rejected": -2.9588866233825684, "step": 16670 }, { "epoch": 2.15, "learning_rate": 1.567849287558573e-07, "logits/chosen": -2.815643310546875, "logits/rejected": -2.6362643241882324, "logps/chosen": -591.0076904296875, "logps/rejected": -469.06414794921875, "loss": 0.4628, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0213494300842285, "rewards/margins": 1.2727258205413818, "rewards/rejected": -3.2940750122070312, "step": 16680 }, { "epoch": 2.15, "learning_rate": 1.5654585445156354e-07, "logits/chosen": -2.7546544075012207, "logits/rejected": -2.6476926803588867, "logps/chosen": -539.0831298828125, "logps/rejected": -426.28692626953125, "loss": 0.5719, "rewards/accuracies": 0.6875, "rewards/chosen": -2.4240574836730957, "rewards/margins": 0.8661662936210632, "rewards/rejected": -3.2902235984802246, "step": 16690 }, { "epoch": 2.16, "learning_rate": 1.5630678014726975e-07, "logits/chosen": -2.699949026107788, "logits/rejected": -2.747061252593994, "logps/chosen": -506.3782653808594, "logps/rejected": -443.6259765625, "loss": 0.5865, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1723477840423584, "rewards/margins": 0.7540324926376343, "rewards/rejected": -2.9263803958892822, "step": 16700 }, { "epoch": 2.16, "learning_rate": 1.5606770584297598e-07, "logits/chosen": -2.725780963897705, "logits/rejected": -2.6355738639831543, "logps/chosen": -626.8984375, "logps/rejected": -582.2753295898438, "loss": 0.5053, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1343142986297607, "rewards/margins": 0.9682634472846985, "rewards/rejected": -3.1025779247283936, "step": 16710 }, { "epoch": 2.16, "learning_rate": 1.5582863153868222e-07, "logits/chosen": -2.736340045928955, "logits/rejected": -2.594884157180786, "logps/chosen": -484.5732421875, "logps/rejected": -386.2017822265625, "loss": 0.5322, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.172234058380127, "rewards/margins": 0.832785427570343, "rewards/rejected": -3.005018949508667, "step": 16720 }, { "epoch": 2.16, "learning_rate": 1.5558955723438843e-07, "logits/chosen": -2.6608355045318604, "logits/rejected": -2.4963512420654297, "logps/chosen": -707.7124633789062, "logps/rejected": -446.63031005859375, "loss": 0.5096, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0393576622009277, "rewards/margins": 1.066575527191162, "rewards/rejected": -3.105933427810669, "step": 16730 }, { "epoch": 2.16, "learning_rate": 1.5535048293009467e-07, "logits/chosen": -2.7229275703430176, "logits/rejected": -2.670936107635498, "logps/chosen": -484.5625915527344, "logps/rejected": -358.43206787109375, "loss": 0.6256, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.3091583251953125, "rewards/margins": 0.7420637607574463, "rewards/rejected": -3.051222324371338, "step": 16740 }, { "epoch": 2.16, "learning_rate": 1.5511140862580087e-07, "logits/chosen": -2.7889955043792725, "logits/rejected": -2.6849026679992676, "logps/chosen": -595.9024658203125, "logps/rejected": -430.9064025878906, "loss": 0.6246, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.204540252685547, "rewards/margins": 0.8263400793075562, "rewards/rejected": -3.0308804512023926, "step": 16750 }, { "epoch": 2.16, "learning_rate": 1.548723343215071e-07, "logits/chosen": -2.7827329635620117, "logits/rejected": -2.7018349170684814, "logps/chosen": -594.4105834960938, "logps/rejected": -442.49267578125, "loss": 0.4557, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.8762743473052979, "rewards/margins": 1.11091148853302, "rewards/rejected": -2.9871859550476074, "step": 16760 }, { "epoch": 2.16, "learning_rate": 1.5463326001721332e-07, "logits/chosen": -2.7416257858276367, "logits/rejected": -2.631115674972534, "logps/chosen": -567.6775512695312, "logps/rejected": -415.56414794921875, "loss": 0.5945, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1084089279174805, "rewards/margins": 0.7345203757286072, "rewards/rejected": -2.8429293632507324, "step": 16770 }, { "epoch": 2.17, "learning_rate": 1.5439418571291956e-07, "logits/chosen": -2.8201050758361816, "logits/rejected": -2.721865177154541, "logps/chosen": -623.1201171875, "logps/rejected": -452.3085021972656, "loss": 0.5683, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.265455722808838, "rewards/margins": 0.9339459538459778, "rewards/rejected": -3.199401378631592, "step": 16780 }, { "epoch": 2.17, "learning_rate": 1.541551114086258e-07, "logits/chosen": -2.836360454559326, "logits/rejected": -2.7588093280792236, "logps/chosen": -555.0341796875, "logps/rejected": -429.72381591796875, "loss": 0.6539, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0931029319763184, "rewards/margins": 0.5636757612228394, "rewards/rejected": -2.6567788124084473, "step": 16790 }, { "epoch": 2.17, "learning_rate": 1.53916037104332e-07, "logits/chosen": -2.866856336593628, "logits/rejected": -2.7336409091949463, "logps/chosen": -474.0838928222656, "logps/rejected": -348.17913818359375, "loss": 0.6235, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1036558151245117, "rewards/margins": 0.6896656155586243, "rewards/rejected": -2.793321371078491, "step": 16800 }, { "epoch": 2.17, "learning_rate": 1.5367696280003826e-07, "logits/chosen": -2.7176809310913086, "logits/rejected": -2.5552964210510254, "logps/chosen": -472.2915954589844, "logps/rejected": -335.5244140625, "loss": 0.5951, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.235180139541626, "rewards/margins": 0.8736304044723511, "rewards/rejected": -3.1088106632232666, "step": 16810 }, { "epoch": 2.17, "learning_rate": 1.5343788849574447e-07, "logits/chosen": -2.7218480110168457, "logits/rejected": -2.635871410369873, "logps/chosen": -640.1100463867188, "logps/rejected": -460.90814208984375, "loss": 0.545, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0454328060150146, "rewards/margins": 0.962197482585907, "rewards/rejected": -3.0076308250427246, "step": 16820 }, { "epoch": 2.17, "learning_rate": 1.531988141914507e-07, "logits/chosen": -2.7374041080474854, "logits/rejected": -2.6636574268341064, "logps/chosen": -549.2916259765625, "logps/rejected": -443.37957763671875, "loss": 0.553, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0922634601593018, "rewards/margins": 0.8992765545845032, "rewards/rejected": -2.991539478302002, "step": 16830 }, { "epoch": 2.17, "learning_rate": 1.5295973988715692e-07, "logits/chosen": -2.7436630725860596, "logits/rejected": -2.5653414726257324, "logps/chosen": -557.5076904296875, "logps/rejected": -422.809326171875, "loss": 0.5736, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1830005645751953, "rewards/margins": 0.9252142906188965, "rewards/rejected": -3.108214855194092, "step": 16840 }, { "epoch": 2.18, "learning_rate": 1.5272066558286315e-07, "logits/chosen": -2.6683802604675293, "logits/rejected": -2.5421347618103027, "logps/chosen": -563.2977294921875, "logps/rejected": -411.6390075683594, "loss": 0.664, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.4022727012634277, "rewards/margins": 0.7672864198684692, "rewards/rejected": -3.1695590019226074, "step": 16850 }, { "epoch": 2.18, "learning_rate": 1.524815912785694e-07, "logits/chosen": -2.8150956630706787, "logits/rejected": -2.7027268409729004, "logps/chosen": -525.5145263671875, "logps/rejected": -434.523681640625, "loss": 0.654, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1640515327453613, "rewards/margins": 0.5692169070243835, "rewards/rejected": -2.7332687377929688, "step": 16860 }, { "epoch": 2.18, "learning_rate": 1.522425169742756e-07, "logits/chosen": -2.6989009380340576, "logits/rejected": -2.6807408332824707, "logps/chosen": -511.35479736328125, "logps/rejected": -423.1070861816406, "loss": 0.6424, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0139167308807373, "rewards/margins": 0.69932621717453, "rewards/rejected": -2.713243007659912, "step": 16870 }, { "epoch": 2.18, "learning_rate": 1.5200344266998184e-07, "logits/chosen": -2.7337281703948975, "logits/rejected": -2.6881942749023438, "logps/chosen": -483.30572509765625, "logps/rejected": -432.83770751953125, "loss": 0.6942, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9033864736557007, "rewards/margins": 0.6783775091171265, "rewards/rejected": -2.581763982772827, "step": 16880 }, { "epoch": 2.18, "learning_rate": 1.5176436836568805e-07, "logits/chosen": -2.639667510986328, "logits/rejected": -2.536447763442993, "logps/chosen": -605.5472412109375, "logps/rejected": -483.9283142089844, "loss": 0.5825, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.102844715118408, "rewards/margins": 0.8031288385391235, "rewards/rejected": -2.905973434448242, "step": 16890 }, { "epoch": 2.18, "learning_rate": 1.5152529406139428e-07, "logits/chosen": -2.72860050201416, "logits/rejected": -2.6292271614074707, "logps/chosen": -504.1354064941406, "logps/rejected": -430.4305725097656, "loss": 0.58, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.013124942779541, "rewards/margins": 0.8530683517456055, "rewards/rejected": -2.8661932945251465, "step": 16900 }, { "epoch": 2.18, "learning_rate": 1.512862197571005e-07, "logits/chosen": -2.740586757659912, "logits/rejected": -2.6668148040771484, "logps/chosen": -493.6337890625, "logps/rejected": -401.72137451171875, "loss": 0.6398, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0502371788024902, "rewards/margins": 0.8940572738647461, "rewards/rejected": -2.9442944526672363, "step": 16910 }, { "epoch": 2.18, "learning_rate": 1.5104714545280673e-07, "logits/chosen": -2.732196569442749, "logits/rejected": -2.6722679138183594, "logps/chosen": -493.73272705078125, "logps/rejected": -433.7527770996094, "loss": 0.5785, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1205010414123535, "rewards/margins": 0.7701288461685181, "rewards/rejected": -2.890630006790161, "step": 16920 }, { "epoch": 2.19, "learning_rate": 1.5080807114851296e-07, "logits/chosen": -2.8419816493988037, "logits/rejected": -2.7324042320251465, "logps/chosen": -570.6292724609375, "logps/rejected": -457.8941345214844, "loss": 0.4964, "rewards/accuracies": 0.75, "rewards/chosen": -2.140106439590454, "rewards/margins": 1.0311899185180664, "rewards/rejected": -3.1712958812713623, "step": 16930 }, { "epoch": 2.19, "learning_rate": 1.5056899684421917e-07, "logits/chosen": -2.8892722129821777, "logits/rejected": -2.6413938999176025, "logps/chosen": -685.365234375, "logps/rejected": -455.5008850097656, "loss": 0.544, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.227689266204834, "rewards/margins": 0.975903332233429, "rewards/rejected": -3.2035927772521973, "step": 16940 }, { "epoch": 2.19, "learning_rate": 1.503299225399254e-07, "logits/chosen": -2.6534273624420166, "logits/rejected": -2.6347854137420654, "logps/chosen": -509.1876525878906, "logps/rejected": -428.21160888671875, "loss": 0.5033, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.192199230194092, "rewards/margins": 0.9645968675613403, "rewards/rejected": -3.1567959785461426, "step": 16950 }, { "epoch": 2.19, "learning_rate": 1.5009084823563162e-07, "logits/chosen": -2.6376259326934814, "logits/rejected": -2.6507809162139893, "logps/chosen": -465.5701599121094, "logps/rejected": -435.52471923828125, "loss": 0.5076, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.144489049911499, "rewards/margins": 0.9154095649719238, "rewards/rejected": -3.059898853302002, "step": 16960 }, { "epoch": 2.19, "learning_rate": 1.4985177393133785e-07, "logits/chosen": -2.66398286819458, "logits/rejected": -2.5294222831726074, "logps/chosen": -584.4539184570312, "logps/rejected": -395.56072998046875, "loss": 0.6024, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.3444106578826904, "rewards/margins": 0.8422247767448425, "rewards/rejected": -3.186635732650757, "step": 16970 }, { "epoch": 2.19, "learning_rate": 1.496126996270441e-07, "logits/chosen": -2.6746764183044434, "logits/rejected": -2.614353656768799, "logps/chosen": -545.4656982421875, "logps/rejected": -447.802490234375, "loss": 0.6056, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9600303173065186, "rewards/margins": 0.8703650236129761, "rewards/rejected": -2.830395460128784, "step": 16980 }, { "epoch": 2.19, "learning_rate": 1.493736253227503e-07, "logits/chosen": -2.706740617752075, "logits/rejected": -2.628643274307251, "logps/chosen": -635.7771606445312, "logps/rejected": -486.7479553222656, "loss": 0.5644, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.3159983158111572, "rewards/margins": 0.9359525442123413, "rewards/rejected": -3.251951217651367, "step": 16990 }, { "epoch": 2.19, "learning_rate": 1.4913455101845653e-07, "logits/chosen": -2.714263916015625, "logits/rejected": -2.624660015106201, "logps/chosen": -696.0841064453125, "logps/rejected": -514.1287841796875, "loss": 0.407, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.051459550857544, "rewards/margins": 1.3952958583831787, "rewards/rejected": -3.4467556476593018, "step": 17000 }, { "epoch": 2.19, "eval_logits/chosen": -3.0743215084075928, "eval_logits/rejected": -3.0223751068115234, "eval_logps/chosen": -541.240234375, "eval_logps/rejected": -421.51544189453125, "eval_loss": 0.6137406229972839, "eval_rewards/accuracies": 0.6754999756813049, "eval_rewards/chosen": -0.9420534372329712, "eval_rewards/margins": 1.1166281700134277, "eval_rewards/rejected": -2.0586817264556885, "eval_runtime": 279.129, "eval_samples_per_second": 7.165, "eval_steps_per_second": 3.583, "step": 17000 }, { "epoch": 2.2, "learning_rate": 1.4889547671416274e-07, "logits/chosen": -2.791347026824951, "logits/rejected": -2.6571497917175293, "logps/chosen": -563.1116333007812, "logps/rejected": -430.947265625, "loss": 0.6689, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.2689478397369385, "rewards/margins": 0.6386265158653259, "rewards/rejected": -2.90757417678833, "step": 17010 }, { "epoch": 2.2, "learning_rate": 1.4865640240986898e-07, "logits/chosen": -2.731213092803955, "logits/rejected": -2.6209194660186768, "logps/chosen": -476.554931640625, "logps/rejected": -391.70343017578125, "loss": 0.583, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.084473133087158, "rewards/margins": 0.8570160865783691, "rewards/rejected": -2.9414896965026855, "step": 17020 }, { "epoch": 2.2, "learning_rate": 1.484173281055752e-07, "logits/chosen": -2.8948638439178467, "logits/rejected": -2.751823902130127, "logps/chosen": -544.0997314453125, "logps/rejected": -431.5458068847656, "loss": 0.5015, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.9208581447601318, "rewards/margins": 1.1630054712295532, "rewards/rejected": -3.0838637351989746, "step": 17030 }, { "epoch": 2.2, "learning_rate": 1.4817825380128143e-07, "logits/chosen": -2.714360237121582, "logits/rejected": -2.7135491371154785, "logps/chosen": -560.91162109375, "logps/rejected": -549.8511962890625, "loss": 0.4828, "rewards/accuracies": 0.75, "rewards/chosen": -2.1705708503723145, "rewards/margins": 1.101682424545288, "rewards/rejected": -3.2722535133361816, "step": 17040 }, { "epoch": 2.2, "learning_rate": 1.4793917949698766e-07, "logits/chosen": -2.8129749298095703, "logits/rejected": -2.7447052001953125, "logps/chosen": -682.3733520507812, "logps/rejected": -522.2222900390625, "loss": 0.5643, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9947221279144287, "rewards/margins": 1.0668119192123413, "rewards/rejected": -3.0615339279174805, "step": 17050 }, { "epoch": 2.2, "learning_rate": 1.4770010519269387e-07, "logits/chosen": -2.629300117492676, "logits/rejected": -2.550933837890625, "logps/chosen": -560.85888671875, "logps/rejected": -441.04052734375, "loss": 0.5932, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1102402210235596, "rewards/margins": 0.6992872953414917, "rewards/rejected": -2.8095271587371826, "step": 17060 }, { "epoch": 2.2, "learning_rate": 1.474610308884001e-07, "logits/chosen": -2.680906295776367, "logits/rejected": -2.6130378246307373, "logps/chosen": -463.368896484375, "logps/rejected": -388.18450927734375, "loss": 0.5978, "rewards/accuracies": 0.6875, "rewards/chosen": -2.161681652069092, "rewards/margins": 0.7670055627822876, "rewards/rejected": -2.9286868572235107, "step": 17070 }, { "epoch": 2.21, "learning_rate": 1.4722195658410632e-07, "logits/chosen": -2.730334997177124, "logits/rejected": -2.586808443069458, "logps/chosen": -501.14947509765625, "logps/rejected": -360.92108154296875, "loss": 0.6457, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0735552310943604, "rewards/margins": 0.7419045567512512, "rewards/rejected": -2.815459728240967, "step": 17080 }, { "epoch": 2.21, "learning_rate": 1.4698288227981255e-07, "logits/chosen": -2.694187641143799, "logits/rejected": -2.57794451713562, "logps/chosen": -471.02520751953125, "logps/rejected": -380.6410217285156, "loss": 0.5641, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9680650234222412, "rewards/margins": 0.7425957918167114, "rewards/rejected": -2.7106611728668213, "step": 17090 }, { "epoch": 2.21, "learning_rate": 1.4674380797551876e-07, "logits/chosen": -2.7550058364868164, "logits/rejected": -2.6574339866638184, "logps/chosen": -540.673095703125, "logps/rejected": -420.3252868652344, "loss": 0.5383, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9162174463272095, "rewards/margins": 1.0880573987960815, "rewards/rejected": -3.004274845123291, "step": 17100 }, { "epoch": 2.21, "learning_rate": 1.4650473367122502e-07, "logits/chosen": -2.7554080486297607, "logits/rejected": -2.6407344341278076, "logps/chosen": -520.8427734375, "logps/rejected": -393.31695556640625, "loss": 0.6525, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.10785174369812, "rewards/margins": 0.6136177778244019, "rewards/rejected": -2.7214696407318115, "step": 17110 }, { "epoch": 2.21, "learning_rate": 1.4626565936693126e-07, "logits/chosen": -2.6390938758850098, "logits/rejected": -2.4915239810943604, "logps/chosen": -473.85797119140625, "logps/rejected": -358.3470458984375, "loss": 0.5151, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.8803869485855103, "rewards/margins": 0.8629570007324219, "rewards/rejected": -2.7433438301086426, "step": 17120 }, { "epoch": 2.21, "learning_rate": 1.4602658506263747e-07, "logits/chosen": -2.7205841541290283, "logits/rejected": -2.7114880084991455, "logps/chosen": -528.83740234375, "logps/rejected": -504.47027587890625, "loss": 0.4851, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.0894598960876465, "rewards/margins": 0.9489137530326843, "rewards/rejected": -3.0383737087249756, "step": 17130 }, { "epoch": 2.21, "learning_rate": 1.457875107583437e-07, "logits/chosen": -2.8427071571350098, "logits/rejected": -2.6865222454071045, "logps/chosen": -650.766845703125, "logps/rejected": -500.0686950683594, "loss": 0.5934, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.288994312286377, "rewards/margins": 0.8620032072067261, "rewards/rejected": -3.1509978771209717, "step": 17140 }, { "epoch": 2.21, "learning_rate": 1.4554843645404991e-07, "logits/chosen": -2.725874423980713, "logits/rejected": -2.6255807876586914, "logps/chosen": -593.0726318359375, "logps/rejected": -462.7613830566406, "loss": 0.4907, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.8936436176300049, "rewards/margins": 1.2661027908325195, "rewards/rejected": -3.1597464084625244, "step": 17150 }, { "epoch": 2.22, "learning_rate": 1.4530936214975615e-07, "logits/chosen": -2.751667022705078, "logits/rejected": -2.660606622695923, "logps/chosen": -612.4225463867188, "logps/rejected": -480.5955505371094, "loss": 0.668, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.3079240322113037, "rewards/margins": 0.5176312327384949, "rewards/rejected": -2.8255553245544434, "step": 17160 }, { "epoch": 2.22, "learning_rate": 1.4507028784546236e-07, "logits/chosen": -2.886751413345337, "logits/rejected": -2.6781065464019775, "logps/chosen": -606.0753784179688, "logps/rejected": -427.7509765625, "loss": 0.5306, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.043989658355713, "rewards/margins": 0.9161401987075806, "rewards/rejected": -2.960130214691162, "step": 17170 }, { "epoch": 2.22, "learning_rate": 1.448312135411686e-07, "logits/chosen": -2.7756428718566895, "logits/rejected": -2.6548824310302734, "logps/chosen": -578.5237426757812, "logps/rejected": -422.66400146484375, "loss": 0.5184, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8996846675872803, "rewards/margins": 1.024048924446106, "rewards/rejected": -2.9237332344055176, "step": 17180 }, { "epoch": 2.22, "learning_rate": 1.4459213923687483e-07, "logits/chosen": -2.8106772899627686, "logits/rejected": -2.6151320934295654, "logps/chosen": -535.8876342773438, "logps/rejected": -426.87841796875, "loss": 0.5369, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.354304790496826, "rewards/margins": 0.8363629579544067, "rewards/rejected": -3.1906678676605225, "step": 17190 }, { "epoch": 2.22, "learning_rate": 1.4435306493258104e-07, "logits/chosen": -2.7447943687438965, "logits/rejected": -2.6581435203552246, "logps/chosen": -694.7154541015625, "logps/rejected": -514.2162475585938, "loss": 0.5138, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.14786434173584, "rewards/margins": 0.9950195550918579, "rewards/rejected": -3.142883777618408, "step": 17200 }, { "epoch": 2.22, "learning_rate": 1.4411399062828728e-07, "logits/chosen": -2.7278504371643066, "logits/rejected": -2.680445909500122, "logps/chosen": -586.4568481445312, "logps/rejected": -420.3605041503906, "loss": 0.5573, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9250946044921875, "rewards/margins": 0.9467706680297852, "rewards/rejected": -2.871865749359131, "step": 17210 }, { "epoch": 2.22, "learning_rate": 1.4387491632399349e-07, "logits/chosen": -2.650357961654663, "logits/rejected": -2.5488131046295166, "logps/chosen": -563.8385009765625, "logps/rejected": -488.60064697265625, "loss": 0.4698, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.0917859077453613, "rewards/margins": 1.0665286779403687, "rewards/rejected": -3.1583147048950195, "step": 17220 }, { "epoch": 2.22, "learning_rate": 1.4363584201969972e-07, "logits/chosen": -2.8409159183502197, "logits/rejected": -2.7406563758850098, "logps/chosen": -503.5477600097656, "logps/rejected": -395.0743713378906, "loss": 0.5651, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.950103759765625, "rewards/margins": 0.7544013261795044, "rewards/rejected": -2.70450496673584, "step": 17230 }, { "epoch": 2.23, "learning_rate": 1.4339676771540593e-07, "logits/chosen": -2.73344349861145, "logits/rejected": -2.5816445350646973, "logps/chosen": -609.5152587890625, "logps/rejected": -413.82159423828125, "loss": 0.5128, "rewards/accuracies": 0.6875, "rewards/chosen": -2.054847240447998, "rewards/margins": 0.9560596346855164, "rewards/rejected": -3.0109074115753174, "step": 17240 }, { "epoch": 2.23, "learning_rate": 1.4315769341111217e-07, "logits/chosen": -2.7316575050354004, "logits/rejected": -2.6699185371398926, "logps/chosen": -521.8712158203125, "logps/rejected": -424.83135986328125, "loss": 0.6847, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2255008220672607, "rewards/margins": 0.6394723653793335, "rewards/rejected": -2.864973545074463, "step": 17250 }, { "epoch": 2.23, "learning_rate": 1.429186191068184e-07, "logits/chosen": -2.7835824489593506, "logits/rejected": -2.6062731742858887, "logps/chosen": -605.68798828125, "logps/rejected": -398.36578369140625, "loss": 0.6217, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2223381996154785, "rewards/margins": 0.7447764873504639, "rewards/rejected": -2.9671149253845215, "step": 17260 }, { "epoch": 2.23, "learning_rate": 1.426795448025246e-07, "logits/chosen": -2.753641128540039, "logits/rejected": -2.6733641624450684, "logps/chosen": -563.7345581054688, "logps/rejected": -419.4165954589844, "loss": 0.5208, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2264034748077393, "rewards/margins": 0.8786007761955261, "rewards/rejected": -3.10500431060791, "step": 17270 }, { "epoch": 2.23, "learning_rate": 1.4244047049823085e-07, "logits/chosen": -2.7536864280700684, "logits/rejected": -2.6334431171417236, "logps/chosen": -529.0238037109375, "logps/rejected": -415.62701416015625, "loss": 0.6241, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1009104251861572, "rewards/margins": 0.7106025218963623, "rewards/rejected": -2.8115127086639404, "step": 17280 }, { "epoch": 2.23, "learning_rate": 1.4220139619393706e-07, "logits/chosen": -2.628539562225342, "logits/rejected": -2.4940617084503174, "logps/chosen": -548.7349853515625, "logps/rejected": -416.0895080566406, "loss": 0.5797, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.9711040258407593, "rewards/margins": 0.8295114636421204, "rewards/rejected": -2.8006155490875244, "step": 17290 }, { "epoch": 2.23, "learning_rate": 1.419623218896433e-07, "logits/chosen": -2.782414674758911, "logits/rejected": -2.6205005645751953, "logps/chosen": -477.31195068359375, "logps/rejected": -353.16900634765625, "loss": 0.5515, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0443925857543945, "rewards/margins": 0.8653877377510071, "rewards/rejected": -2.909780263900757, "step": 17300 }, { "epoch": 2.23, "learning_rate": 1.417232475853495e-07, "logits/chosen": -2.7323238849639893, "logits/rejected": -2.62831449508667, "logps/chosen": -566.2843017578125, "logps/rejected": -486.4825744628906, "loss": 0.6344, "rewards/accuracies": 0.6875, "rewards/chosen": -2.490797996520996, "rewards/margins": 0.5522103309631348, "rewards/rejected": -3.043008327484131, "step": 17310 }, { "epoch": 2.24, "learning_rate": 1.4148417328105574e-07, "logits/chosen": -2.724459409713745, "logits/rejected": -2.720451831817627, "logps/chosen": -502.95648193359375, "logps/rejected": -462.25115966796875, "loss": 0.6119, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.127639055252075, "rewards/margins": 0.804736316204071, "rewards/rejected": -2.932375431060791, "step": 17320 }, { "epoch": 2.24, "learning_rate": 1.4124509897676198e-07, "logits/chosen": -2.7318294048309326, "logits/rejected": -2.5491201877593994, "logps/chosen": -577.2131958007812, "logps/rejected": -385.74346923828125, "loss": 0.4636, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.1727631092071533, "rewards/margins": 1.0406156778335571, "rewards/rejected": -3.213378429412842, "step": 17330 }, { "epoch": 2.24, "learning_rate": 1.4100602467246819e-07, "logits/chosen": -2.8560094833374023, "logits/rejected": -2.6455845832824707, "logps/chosen": -566.8753662109375, "logps/rejected": -412.1195373535156, "loss": 0.4817, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.944401741027832, "rewards/margins": 0.9791971445083618, "rewards/rejected": -2.923598527908325, "step": 17340 }, { "epoch": 2.24, "learning_rate": 1.4076695036817442e-07, "logits/chosen": -2.747715950012207, "logits/rejected": -2.6265039443969727, "logps/chosen": -697.0675048828125, "logps/rejected": -512.86279296875, "loss": 0.5249, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.001903533935547, "rewards/margins": 1.09589421749115, "rewards/rejected": -3.0977978706359863, "step": 17350 }, { "epoch": 2.24, "learning_rate": 1.4052787606388063e-07, "logits/chosen": -2.7967705726623535, "logits/rejected": -2.635401725769043, "logps/chosen": -589.4223022460938, "logps/rejected": -450.89385986328125, "loss": 0.5282, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.803855299949646, "rewards/margins": 0.9932734370231628, "rewards/rejected": -2.7971291542053223, "step": 17360 }, { "epoch": 2.24, "learning_rate": 1.4028880175958687e-07, "logits/chosen": -2.752685070037842, "logits/rejected": -2.6611618995666504, "logps/chosen": -538.1617431640625, "logps/rejected": -401.6909484863281, "loss": 0.4924, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9690284729003906, "rewards/margins": 0.9667495489120483, "rewards/rejected": -2.9357781410217285, "step": 17370 }, { "epoch": 2.24, "learning_rate": 1.4004972745529308e-07, "logits/chosen": -2.6491057872772217, "logits/rejected": -2.4290273189544678, "logps/chosen": -669.5499267578125, "logps/rejected": -485.24810791015625, "loss": 0.4608, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.073270320892334, "rewards/margins": 1.2655484676361084, "rewards/rejected": -3.3388187885284424, "step": 17380 }, { "epoch": 2.25, "learning_rate": 1.398106531509993e-07, "logits/chosen": -2.878246307373047, "logits/rejected": -2.722047805786133, "logps/chosen": -502.34075927734375, "logps/rejected": -372.5865173339844, "loss": 0.4616, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.704075574874878, "rewards/margins": 1.0881505012512207, "rewards/rejected": -2.792226552963257, "step": 17390 }, { "epoch": 2.25, "learning_rate": 1.3957157884670557e-07, "logits/chosen": -2.7012531757354736, "logits/rejected": -2.5763678550720215, "logps/chosen": -643.441162109375, "logps/rejected": -523.2747802734375, "loss": 0.5399, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2584874629974365, "rewards/margins": 0.9640541076660156, "rewards/rejected": -3.222541332244873, "step": 17400 }, { "epoch": 2.25, "learning_rate": 1.3933250454241178e-07, "logits/chosen": -2.812612533569336, "logits/rejected": -2.628859519958496, "logps/chosen": -567.2840576171875, "logps/rejected": -427.1178283691406, "loss": 0.5467, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9079939126968384, "rewards/margins": 0.9153097867965698, "rewards/rejected": -2.823303699493408, "step": 17410 }, { "epoch": 2.25, "learning_rate": 1.3909343023811802e-07, "logits/chosen": -2.7673420906066895, "logits/rejected": -2.662358045578003, "logps/chosen": -520.4659423828125, "logps/rejected": -374.87225341796875, "loss": 0.6086, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1526026725769043, "rewards/margins": 0.9076792001724243, "rewards/rejected": -3.060281753540039, "step": 17420 }, { "epoch": 2.25, "learning_rate": 1.3885435593382423e-07, "logits/chosen": -2.7564077377319336, "logits/rejected": -2.661900043487549, "logps/chosen": -628.0650634765625, "logps/rejected": -480.18170166015625, "loss": 0.6614, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.0088858604431152, "rewards/margins": 0.7653211951255798, "rewards/rejected": -2.77420711517334, "step": 17430 }, { "epoch": 2.25, "learning_rate": 1.3861528162953046e-07, "logits/chosen": -2.6362404823303223, "logits/rejected": -2.546339988708496, "logps/chosen": -502.0399475097656, "logps/rejected": -376.2704772949219, "loss": 0.6203, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.2452266216278076, "rewards/margins": 0.6028550863265991, "rewards/rejected": -2.8480818271636963, "step": 17440 }, { "epoch": 2.25, "learning_rate": 1.3837620732523667e-07, "logits/chosen": -2.6713805198669434, "logits/rejected": -2.5081441402435303, "logps/chosen": -535.203369140625, "logps/rejected": -360.86322021484375, "loss": 0.5949, "rewards/accuracies": 0.6875, "rewards/chosen": -2.071092128753662, "rewards/margins": 0.8615267872810364, "rewards/rejected": -2.932619094848633, "step": 17450 }, { "epoch": 2.25, "learning_rate": 1.381371330209429e-07, "logits/chosen": -2.719393253326416, "logits/rejected": -2.683607578277588, "logps/chosen": -558.31396484375, "logps/rejected": -458.9444274902344, "loss": 0.6392, "rewards/accuracies": 0.6875, "rewards/chosen": -2.195060968399048, "rewards/margins": 0.7236794829368591, "rewards/rejected": -2.9187400341033936, "step": 17460 }, { "epoch": 2.26, "learning_rate": 1.3789805871664915e-07, "logits/chosen": -2.7842581272125244, "logits/rejected": -2.714481830596924, "logps/chosen": -485.43988037109375, "logps/rejected": -393.1763000488281, "loss": 0.5957, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.9353971481323242, "rewards/margins": 0.819216251373291, "rewards/rejected": -2.7546133995056152, "step": 17470 }, { "epoch": 2.26, "learning_rate": 1.3765898441235536e-07, "logits/chosen": -2.7422754764556885, "logits/rejected": -2.6614675521850586, "logps/chosen": -523.91796875, "logps/rejected": -439.05535888671875, "loss": 0.5377, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.082730770111084, "rewards/margins": 0.8260897397994995, "rewards/rejected": -2.908820390701294, "step": 17480 }, { "epoch": 2.26, "learning_rate": 1.374199101080616e-07, "logits/chosen": -2.835087537765503, "logits/rejected": -2.638486862182617, "logps/chosen": -628.0099487304688, "logps/rejected": -488.23077392578125, "loss": 0.5134, "rewards/accuracies": 0.75, "rewards/chosen": -2.1472928524017334, "rewards/margins": 1.034337043762207, "rewards/rejected": -3.1816296577453613, "step": 17490 }, { "epoch": 2.26, "learning_rate": 1.371808358037678e-07, "logits/chosen": -2.743241786956787, "logits/rejected": -2.6593527793884277, "logps/chosen": -493.58892822265625, "logps/rejected": -406.22601318359375, "loss": 0.6592, "rewards/accuracies": 0.625, "rewards/chosen": -2.237640857696533, "rewards/margins": 0.7385370135307312, "rewards/rejected": -2.97617769241333, "step": 17500 }, { "epoch": 2.26, "learning_rate": 1.3694176149947404e-07, "logits/chosen": -2.812544345855713, "logits/rejected": -2.708090305328369, "logps/chosen": -520.7924194335938, "logps/rejected": -423.50860595703125, "loss": 0.726, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.285693407058716, "rewards/margins": 0.5119642019271851, "rewards/rejected": -2.7976572513580322, "step": 17510 }, { "epoch": 2.26, "learning_rate": 1.3670268719518025e-07, "logits/chosen": -2.7727460861206055, "logits/rejected": -2.66937518119812, "logps/chosen": -521.8733520507812, "logps/rejected": -386.9874572753906, "loss": 0.4686, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.9033149480819702, "rewards/margins": 1.0527598857879639, "rewards/rejected": -2.9560744762420654, "step": 17520 }, { "epoch": 2.26, "learning_rate": 1.3646361289088648e-07, "logits/chosen": -2.5592339038848877, "logits/rejected": -2.5600223541259766, "logps/chosen": -559.0127563476562, "logps/rejected": -500.06036376953125, "loss": 0.6779, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.070131540298462, "rewards/margins": 0.9160404205322266, "rewards/rejected": -2.9861714839935303, "step": 17530 }, { "epoch": 2.26, "learning_rate": 1.3622453858659272e-07, "logits/chosen": -2.7234206199645996, "logits/rejected": -2.6016743183135986, "logps/chosen": -554.4006958007812, "logps/rejected": -410.7335510253906, "loss": 0.4097, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.065760612487793, "rewards/margins": 1.1260799169540405, "rewards/rejected": -3.191840648651123, "step": 17540 }, { "epoch": 2.27, "learning_rate": 1.3598546428229893e-07, "logits/chosen": -2.647773265838623, "logits/rejected": -2.6352198123931885, "logps/chosen": -579.6278076171875, "logps/rejected": -522.866455078125, "loss": 0.552, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.2360434532165527, "rewards/margins": 0.8822612762451172, "rewards/rejected": -3.11830472946167, "step": 17550 }, { "epoch": 2.27, "learning_rate": 1.3574638997800516e-07, "logits/chosen": -2.776564121246338, "logits/rejected": -2.6653571128845215, "logps/chosen": -573.6260986328125, "logps/rejected": -463.4024963378906, "loss": 0.3966, "rewards/accuracies": 0.8125, "rewards/chosen": -1.732217788696289, "rewards/margins": 1.420806884765625, "rewards/rejected": -3.153024911880493, "step": 17560 }, { "epoch": 2.27, "learning_rate": 1.3550731567371137e-07, "logits/chosen": -2.769207000732422, "logits/rejected": -2.691784381866455, "logps/chosen": -592.6925659179688, "logps/rejected": -461.2679748535156, "loss": 0.5285, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.7780771255493164, "rewards/margins": 0.965909481048584, "rewards/rejected": -2.7439866065979004, "step": 17570 }, { "epoch": 2.27, "learning_rate": 1.352682413694176e-07, "logits/chosen": -2.877732038497925, "logits/rejected": -2.655888795852661, "logps/chosen": -580.6697998046875, "logps/rejected": -368.34893798828125, "loss": 0.4965, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.1620781421661377, "rewards/margins": 0.992437481880188, "rewards/rejected": -3.154515504837036, "step": 17580 }, { "epoch": 2.27, "learning_rate": 1.3502916706512382e-07, "logits/chosen": -2.7345757484436035, "logits/rejected": -2.5645508766174316, "logps/chosen": -605.3291015625, "logps/rejected": -479.37353515625, "loss": 0.539, "rewards/accuracies": 0.75, "rewards/chosen": -2.1329236030578613, "rewards/margins": 0.8428758382797241, "rewards/rejected": -2.975799083709717, "step": 17590 }, { "epoch": 2.27, "learning_rate": 1.3479009276083005e-07, "logits/chosen": -2.7260231971740723, "logits/rejected": -2.68196439743042, "logps/chosen": -697.8281860351562, "logps/rejected": -552.338134765625, "loss": 0.6243, "rewards/accuracies": 0.625, "rewards/chosen": -2.142779588699341, "rewards/margins": 0.7041845917701721, "rewards/rejected": -2.846963882446289, "step": 17600 }, { "epoch": 2.27, "learning_rate": 1.345510184565363e-07, "logits/chosen": -2.737125873565674, "logits/rejected": -2.525574207305908, "logps/chosen": -659.692138671875, "logps/rejected": -412.8739318847656, "loss": 0.5098, "rewards/accuracies": 0.75, "rewards/chosen": -2.0846199989318848, "rewards/margins": 1.0560483932495117, "rewards/rejected": -3.1406683921813965, "step": 17610 }, { "epoch": 2.27, "learning_rate": 1.343119441522425e-07, "logits/chosen": -2.6657931804656982, "logits/rejected": -2.6223385334014893, "logps/chosen": -551.8417358398438, "logps/rejected": -476.153564453125, "loss": 0.4454, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8465343713760376, "rewards/margins": 1.207804799079895, "rewards/rejected": -3.0543389320373535, "step": 17620 }, { "epoch": 2.28, "learning_rate": 1.3407286984794874e-07, "logits/chosen": -2.648510694503784, "logits/rejected": -2.545830249786377, "logps/chosen": -598.8138427734375, "logps/rejected": -469.3397521972656, "loss": 0.6435, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.1184401512145996, "rewards/margins": 0.8359308242797852, "rewards/rejected": -2.9543709754943848, "step": 17630 }, { "epoch": 2.28, "learning_rate": 1.3383379554365495e-07, "logits/chosen": -2.709369659423828, "logits/rejected": -2.5021812915802, "logps/chosen": -675.8084106445312, "logps/rejected": -456.83013916015625, "loss": 0.5538, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.081084728240967, "rewards/margins": 0.9529293179512024, "rewards/rejected": -3.0340142250061035, "step": 17640 }, { "epoch": 2.28, "learning_rate": 1.3359472123936118e-07, "logits/chosen": -2.8240535259246826, "logits/rejected": -2.701864242553711, "logps/chosen": -574.1658935546875, "logps/rejected": -477.1216735839844, "loss": 0.6069, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1802616119384766, "rewards/margins": 0.8976082801818848, "rewards/rejected": -3.0778698921203613, "step": 17650 }, { "epoch": 2.28, "learning_rate": 1.333556469350674e-07, "logits/chosen": -2.6276440620422363, "logits/rejected": -2.6138086318969727, "logps/chosen": -550.0621337890625, "logps/rejected": -533.9893798828125, "loss": 0.5455, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.192603588104248, "rewards/margins": 0.9384220838546753, "rewards/rejected": -3.131025791168213, "step": 17660 }, { "epoch": 2.28, "learning_rate": 1.3311657263077363e-07, "logits/chosen": -2.7572898864746094, "logits/rejected": -2.6309585571289062, "logps/chosen": -611.0433349609375, "logps/rejected": -432.9873962402344, "loss": 0.5171, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.1118850708007812, "rewards/margins": 0.9566381573677063, "rewards/rejected": -3.068523406982422, "step": 17670 }, { "epoch": 2.28, "learning_rate": 1.3287749832647986e-07, "logits/chosen": -2.6916027069091797, "logits/rejected": -2.6116576194763184, "logps/chosen": -492.04754638671875, "logps/rejected": -388.8577880859375, "loss": 0.6338, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.012497901916504, "rewards/margins": 0.7368690371513367, "rewards/rejected": -2.7493667602539062, "step": 17680 }, { "epoch": 2.28, "learning_rate": 1.326384240221861e-07, "logits/chosen": -2.797140121459961, "logits/rejected": -2.7359132766723633, "logps/chosen": -547.1463623046875, "logps/rejected": -400.56683349609375, "loss": 0.4911, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0364904403686523, "rewards/margins": 0.9584361910820007, "rewards/rejected": -2.994926929473877, "step": 17690 }, { "epoch": 2.29, "learning_rate": 1.3239934971789233e-07, "logits/chosen": -2.7679190635681152, "logits/rejected": -2.5590720176696777, "logps/chosen": -556.6360473632812, "logps/rejected": -394.80023193359375, "loss": 0.546, "rewards/accuracies": 0.75, "rewards/chosen": -2.0427491664886475, "rewards/margins": 0.7946333885192871, "rewards/rejected": -2.8373825550079346, "step": 17700 }, { "epoch": 2.29, "learning_rate": 1.3216027541359854e-07, "logits/chosen": -2.6229004859924316, "logits/rejected": -2.566336154937744, "logps/chosen": -377.04388427734375, "logps/rejected": -395.0232238769531, "loss": 0.6397, "rewards/accuracies": 0.625, "rewards/chosen": -1.798643708229065, "rewards/margins": 0.6581300497055054, "rewards/rejected": -2.4567739963531494, "step": 17710 }, { "epoch": 2.29, "learning_rate": 1.3192120110930478e-07, "logits/chosen": -2.7883942127227783, "logits/rejected": -2.6728386878967285, "logps/chosen": -572.455322265625, "logps/rejected": -401.8914489746094, "loss": 0.5454, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.9505380392074585, "rewards/margins": 0.8943275213241577, "rewards/rejected": -2.844865560531616, "step": 17720 }, { "epoch": 2.29, "learning_rate": 1.31682126805011e-07, "logits/chosen": -2.7418458461761475, "logits/rejected": -2.693701982498169, "logps/chosen": -429.6761169433594, "logps/rejected": -401.16033935546875, "loss": 0.4791, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.8482555150985718, "rewards/margins": 0.9176506996154785, "rewards/rejected": -2.7659060955047607, "step": 17730 }, { "epoch": 2.29, "learning_rate": 1.3144305250071722e-07, "logits/chosen": -2.6871306896209717, "logits/rejected": -2.582836866378784, "logps/chosen": -562.8719482421875, "logps/rejected": -379.31689453125, "loss": 0.539, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.0977272987365723, "rewards/margins": 0.9055293798446655, "rewards/rejected": -3.0032565593719482, "step": 17740 }, { "epoch": 2.29, "learning_rate": 1.3120397819642346e-07, "logits/chosen": -2.6834025382995605, "logits/rejected": -2.6445891857147217, "logps/chosen": -480.35009765625, "logps/rejected": -417.90716552734375, "loss": 0.6527, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1394104957580566, "rewards/margins": 0.6081194877624512, "rewards/rejected": -2.7475297451019287, "step": 17750 }, { "epoch": 2.29, "learning_rate": 1.3096490389212967e-07, "logits/chosen": -2.758195638656616, "logits/rejected": -2.732114315032959, "logps/chosen": -627.356201171875, "logps/rejected": -496.9607849121094, "loss": 0.4673, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0068514347076416, "rewards/margins": 1.065300464630127, "rewards/rejected": -3.0721518993377686, "step": 17760 }, { "epoch": 2.29, "learning_rate": 1.307258295878359e-07, "logits/chosen": -2.638643264770508, "logits/rejected": -2.667720079421997, "logps/chosen": -475.2254333496094, "logps/rejected": -462.31591796875, "loss": 0.5176, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.1409449577331543, "rewards/margins": 0.8761254549026489, "rewards/rejected": -3.0170700550079346, "step": 17770 }, { "epoch": 2.3, "learning_rate": 1.3048675528354212e-07, "logits/chosen": -2.6169657707214355, "logits/rejected": -2.516693115234375, "logps/chosen": -468.77142333984375, "logps/rejected": -381.76666259765625, "loss": 0.6203, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1391007900238037, "rewards/margins": 0.7771346569061279, "rewards/rejected": -2.9162354469299316, "step": 17780 }, { "epoch": 2.3, "learning_rate": 1.3024768097924835e-07, "logits/chosen": -2.7587220668792725, "logits/rejected": -2.6409125328063965, "logps/chosen": -644.5451049804688, "logps/rejected": -474.4126892089844, "loss": 0.5645, "rewards/accuracies": 0.75, "rewards/chosen": -2.1746249198913574, "rewards/margins": 0.8469465374946594, "rewards/rejected": -3.021571636199951, "step": 17790 }, { "epoch": 2.3, "learning_rate": 1.3000860667495456e-07, "logits/chosen": -2.7510006427764893, "logits/rejected": -2.6128108501434326, "logps/chosen": -580.2392578125, "logps/rejected": -487.5181579589844, "loss": 0.6826, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.2412919998168945, "rewards/margins": 0.7157723903656006, "rewards/rejected": -2.957064390182495, "step": 17800 }, { "epoch": 2.3, "learning_rate": 1.297695323706608e-07, "logits/chosen": -2.7819130420684814, "logits/rejected": -2.714162588119507, "logps/chosen": -552.365234375, "logps/rejected": -409.9449157714844, "loss": 0.6238, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1091413497924805, "rewards/margins": 0.850338339805603, "rewards/rejected": -2.959479808807373, "step": 17810 }, { "epoch": 2.3, "learning_rate": 1.2953045806636703e-07, "logits/chosen": -2.671103000640869, "logits/rejected": -2.5951664447784424, "logps/chosen": -487.328857421875, "logps/rejected": -431.6280212402344, "loss": 0.5351, "rewards/accuracies": 0.6875, "rewards/chosen": -2.21830153465271, "rewards/margins": 0.9237899780273438, "rewards/rejected": -3.1420915126800537, "step": 17820 }, { "epoch": 2.3, "learning_rate": 1.2929138376207324e-07, "logits/chosen": -2.8268277645111084, "logits/rejected": -2.6247828006744385, "logps/chosen": -502.164306640625, "logps/rejected": -316.6130676269531, "loss": 0.7104, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.1558918952941895, "rewards/margins": 0.543222188949585, "rewards/rejected": -2.6991143226623535, "step": 17830 }, { "epoch": 2.3, "learning_rate": 1.2905230945777948e-07, "logits/chosen": -2.6560044288635254, "logits/rejected": -2.5236876010894775, "logps/chosen": -520.9718627929688, "logps/rejected": -412.3727111816406, "loss": 0.5574, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9706265926361084, "rewards/margins": 0.934573769569397, "rewards/rejected": -2.905200481414795, "step": 17840 }, { "epoch": 2.3, "learning_rate": 1.288132351534857e-07, "logits/chosen": -2.69171404838562, "logits/rejected": -2.568446397781372, "logps/chosen": -523.1221923828125, "logps/rejected": -396.34588623046875, "loss": 0.6128, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.112250804901123, "rewards/margins": 0.7721604108810425, "rewards/rejected": -2.884411334991455, "step": 17850 }, { "epoch": 2.31, "learning_rate": 1.2857416084919192e-07, "logits/chosen": -2.7912793159484863, "logits/rejected": -2.623312473297119, "logps/chosen": -624.1697387695312, "logps/rejected": -434.17181396484375, "loss": 0.6046, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.143242120742798, "rewards/margins": 0.7286003828048706, "rewards/rejected": -2.871842622756958, "step": 17860 }, { "epoch": 2.31, "learning_rate": 1.2833508654489813e-07, "logits/chosen": -2.6912405490875244, "logits/rejected": -2.7199013233184814, "logps/chosen": -520.5380249023438, "logps/rejected": -457.29595947265625, "loss": 0.5478, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0413172245025635, "rewards/margins": 0.734799325466156, "rewards/rejected": -2.7761166095733643, "step": 17870 }, { "epoch": 2.31, "learning_rate": 1.2809601224060437e-07, "logits/chosen": -2.683213472366333, "logits/rejected": -2.6563849449157715, "logps/chosen": -506.18658447265625, "logps/rejected": -422.82891845703125, "loss": 0.7205, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1219944953918457, "rewards/margins": 0.5290063619613647, "rewards/rejected": -2.651000499725342, "step": 17880 }, { "epoch": 2.31, "learning_rate": 1.278569379363106e-07, "logits/chosen": -2.811178207397461, "logits/rejected": -2.6242480278015137, "logps/chosen": -538.0741577148438, "logps/rejected": -374.44378662109375, "loss": 0.5174, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2057085037231445, "rewards/margins": 0.764985203742981, "rewards/rejected": -2.970693588256836, "step": 17890 }, { "epoch": 2.31, "learning_rate": 1.2761786363201681e-07, "logits/chosen": -2.702843427658081, "logits/rejected": -2.65714430809021, "logps/chosen": -553.5977783203125, "logps/rejected": -431.23809814453125, "loss": 0.6078, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1420891284942627, "rewards/margins": 0.6837973594665527, "rewards/rejected": -2.8258867263793945, "step": 17900 }, { "epoch": 2.31, "learning_rate": 1.2737878932772305e-07, "logits/chosen": -2.6942524909973145, "logits/rejected": -2.531686782836914, "logps/chosen": -558.5453491210938, "logps/rejected": -448.601806640625, "loss": 0.6641, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.475594997406006, "rewards/margins": 0.45069795846939087, "rewards/rejected": -2.926293134689331, "step": 17910 }, { "epoch": 2.31, "learning_rate": 1.2713971502342926e-07, "logits/chosen": -2.696373462677002, "logits/rejected": -2.5925681591033936, "logps/chosen": -595.6552734375, "logps/rejected": -449.95477294921875, "loss": 0.5841, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.050493001937866, "rewards/margins": 1.0098097324371338, "rewards/rejected": -3.060302734375, "step": 17920 }, { "epoch": 2.31, "learning_rate": 1.269006407191355e-07, "logits/chosen": -2.831345319747925, "logits/rejected": -2.6070961952209473, "logps/chosen": -602.2533569335938, "logps/rejected": -420.76458740234375, "loss": 0.5333, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.8568607568740845, "rewards/margins": 1.0589373111724854, "rewards/rejected": -2.915797710418701, "step": 17930 }, { "epoch": 2.32, "learning_rate": 1.266615664148417e-07, "logits/chosen": -2.7850146293640137, "logits/rejected": -2.629164457321167, "logps/chosen": -532.4837036132812, "logps/rejected": -403.12237548828125, "loss": 0.5187, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.9181444644927979, "rewards/margins": 0.9719356298446655, "rewards/rejected": -2.890079975128174, "step": 17940 }, { "epoch": 2.32, "learning_rate": 1.2642249211054794e-07, "logits/chosen": -2.7369604110717773, "logits/rejected": -2.5717649459838867, "logps/chosen": -599.5671997070312, "logps/rejected": -456.1842346191406, "loss": 0.6624, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.272423267364502, "rewards/margins": 0.7478927373886108, "rewards/rejected": -3.0203158855438232, "step": 17950 }, { "epoch": 2.32, "learning_rate": 1.2618341780625418e-07, "logits/chosen": -2.7271411418914795, "logits/rejected": -2.5782666206359863, "logps/chosen": -631.3267822265625, "logps/rejected": -440.5077209472656, "loss": 0.4, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.9898065328598022, "rewards/margins": 1.2623722553253174, "rewards/rejected": -3.252178907394409, "step": 17960 }, { "epoch": 2.32, "learning_rate": 1.2594434350196039e-07, "logits/chosen": -2.793797731399536, "logits/rejected": -2.4952709674835205, "logps/chosen": -636.2952880859375, "logps/rejected": -426.4608459472656, "loss": 0.55, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.9941012859344482, "rewards/margins": 0.9746288061141968, "rewards/rejected": -2.9687302112579346, "step": 17970 }, { "epoch": 2.32, "learning_rate": 1.2570526919766665e-07, "logits/chosen": -2.697760581970215, "logits/rejected": -2.561292886734009, "logps/chosen": -562.4002685546875, "logps/rejected": -470.6311950683594, "loss": 0.6205, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.007457971572876, "rewards/margins": 0.8043592572212219, "rewards/rejected": -2.811817169189453, "step": 17980 }, { "epoch": 2.32, "learning_rate": 1.2546619489337286e-07, "logits/chosen": -2.7484755516052246, "logits/rejected": -2.727177143096924, "logps/chosen": -574.6592407226562, "logps/rejected": -478.83392333984375, "loss": 0.4702, "rewards/accuracies": 0.75, "rewards/chosen": -1.937212347984314, "rewards/margins": 1.0626914501190186, "rewards/rejected": -2.999903678894043, "step": 17990 }, { "epoch": 2.32, "learning_rate": 1.252271205890791e-07, "logits/chosen": -2.7246949672698975, "logits/rejected": -2.7158799171447754, "logps/chosen": -530.3668212890625, "logps/rejected": -452.03594970703125, "loss": 0.5732, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9109004735946655, "rewards/margins": 0.7251878380775452, "rewards/rejected": -2.6360886096954346, "step": 18000 }, { "epoch": 2.32, "eval_logits/chosen": -3.0811357498168945, "eval_logits/rejected": -3.029374599456787, "eval_logps/chosen": -540.81689453125, "eval_logps/rejected": -421.0493469238281, "eval_loss": 0.6119464039802551, "eval_rewards/accuracies": 0.6740000247955322, "eval_rewards/chosen": -0.8997114300727844, "eval_rewards/margins": 1.1123578548431396, "eval_rewards/rejected": -2.0120692253112793, "eval_runtime": 283.962, "eval_samples_per_second": 7.043, "eval_steps_per_second": 3.522, "step": 18000 }, { "epoch": 2.33, "learning_rate": 1.249880462847853e-07, "logits/chosen": -2.8923449516296387, "logits/rejected": -2.7819809913635254, "logps/chosen": -441.53607177734375, "logps/rejected": -401.09527587890625, "loss": 0.5932, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.022514820098877, "rewards/margins": 0.7116460800170898, "rewards/rejected": -2.734160900115967, "step": 18010 }, { "epoch": 2.33, "learning_rate": 1.2474897198049154e-07, "logits/chosen": -2.8503706455230713, "logits/rejected": -2.714585781097412, "logps/chosen": -578.5828857421875, "logps/rejected": -375.4177551269531, "loss": 0.5679, "rewards/accuracies": 0.6875, "rewards/chosen": -1.7601251602172852, "rewards/margins": 0.9141169786453247, "rewards/rejected": -2.6742422580718994, "step": 18020 }, { "epoch": 2.33, "learning_rate": 1.2450989767619775e-07, "logits/chosen": -2.774325370788574, "logits/rejected": -2.703886032104492, "logps/chosen": -527.36669921875, "logps/rejected": -425.909423828125, "loss": 0.584, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.7773374319076538, "rewards/margins": 1.005260705947876, "rewards/rejected": -2.7825980186462402, "step": 18030 }, { "epoch": 2.33, "learning_rate": 1.2427082337190398e-07, "logits/chosen": -2.7734642028808594, "logits/rejected": -2.6908483505249023, "logps/chosen": -531.37939453125, "logps/rejected": -429.0433654785156, "loss": 0.568, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9250202178955078, "rewards/margins": 0.8367685079574585, "rewards/rejected": -2.761788845062256, "step": 18040 }, { "epoch": 2.33, "learning_rate": 1.2403174906761022e-07, "logits/chosen": -2.6785213947296143, "logits/rejected": -2.6920371055603027, "logps/chosen": -426.17962646484375, "logps/rejected": -407.36273193359375, "loss": 0.5734, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9943106174468994, "rewards/margins": 0.8152728080749512, "rewards/rejected": -2.8095836639404297, "step": 18050 }, { "epoch": 2.33, "learning_rate": 1.2379267476331643e-07, "logits/chosen": -2.633258104324341, "logits/rejected": -2.5883305072784424, "logps/chosen": -492.99456787109375, "logps/rejected": -404.96533203125, "loss": 0.594, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.9579575061798096, "rewards/margins": 0.7631974816322327, "rewards/rejected": -2.7211546897888184, "step": 18060 }, { "epoch": 2.33, "learning_rate": 1.2355360045902267e-07, "logits/chosen": -2.721355438232422, "logits/rejected": -2.553715467453003, "logps/chosen": -550.8196411132812, "logps/rejected": -430.7235412597656, "loss": 0.6214, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.2859272956848145, "rewards/margins": 0.6095834970474243, "rewards/rejected": -2.8955111503601074, "step": 18070 }, { "epoch": 2.33, "learning_rate": 1.2331452615472888e-07, "logits/chosen": -2.722538471221924, "logits/rejected": -2.591886043548584, "logps/chosen": -567.3790283203125, "logps/rejected": -500.40118408203125, "loss": 0.546, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1073646545410156, "rewards/margins": 0.9191287755966187, "rewards/rejected": -3.026493549346924, "step": 18080 }, { "epoch": 2.34, "learning_rate": 1.230754518504351e-07, "logits/chosen": -2.723886489868164, "logits/rejected": -2.6758437156677246, "logps/chosen": -404.2786865234375, "logps/rejected": -401.70404052734375, "loss": 0.5845, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1312801837921143, "rewards/margins": 0.620576024055481, "rewards/rejected": -2.7518563270568848, "step": 18090 }, { "epoch": 2.34, "learning_rate": 1.2283637754614132e-07, "logits/chosen": -2.8169121742248535, "logits/rejected": -2.6821727752685547, "logps/chosen": -598.6100463867188, "logps/rejected": -456.98419189453125, "loss": 0.6231, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.216229200363159, "rewards/margins": 0.6206780672073364, "rewards/rejected": -2.836907386779785, "step": 18100 }, { "epoch": 2.34, "learning_rate": 1.2259730324184756e-07, "logits/chosen": -2.7950613498687744, "logits/rejected": -2.660870313644409, "logps/chosen": -651.5653686523438, "logps/rejected": -475.58758544921875, "loss": 0.5129, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.086578130722046, "rewards/margins": 0.9453317523002625, "rewards/rejected": -3.0319101810455322, "step": 18110 }, { "epoch": 2.34, "learning_rate": 1.223582289375538e-07, "logits/chosen": -2.756620168685913, "logits/rejected": -2.6284947395324707, "logps/chosen": -553.7334594726562, "logps/rejected": -418.4029846191406, "loss": 0.5689, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.984301209449768, "rewards/margins": 0.7068820595741272, "rewards/rejected": -2.691183567047119, "step": 18120 }, { "epoch": 2.34, "learning_rate": 1.2211915463326003e-07, "logits/chosen": -2.760509729385376, "logits/rejected": -2.6770834922790527, "logps/chosen": -596.8595581054688, "logps/rejected": -478.59393310546875, "loss": 0.6896, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.476047992706299, "rewards/margins": 0.5050615072250366, "rewards/rejected": -2.981110095977783, "step": 18130 }, { "epoch": 2.34, "learning_rate": 1.2188008032896624e-07, "logits/chosen": -2.705087184906006, "logits/rejected": -2.6352407932281494, "logps/chosen": -527.650634765625, "logps/rejected": -442.1390075683594, "loss": 0.6581, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.125948429107666, "rewards/margins": 0.5398039817810059, "rewards/rejected": -2.665752410888672, "step": 18140 }, { "epoch": 2.34, "learning_rate": 1.2164100602467247e-07, "logits/chosen": -2.7868094444274902, "logits/rejected": -2.6666648387908936, "logps/chosen": -559.3416137695312, "logps/rejected": -431.93731689453125, "loss": 0.5968, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1470437049865723, "rewards/margins": 0.7408377528190613, "rewards/rejected": -2.8878817558288574, "step": 18150 }, { "epoch": 2.34, "learning_rate": 1.2140193172037868e-07, "logits/chosen": -2.718294858932495, "logits/rejected": -2.5866594314575195, "logps/chosen": -575.2918090820312, "logps/rejected": -376.92742919921875, "loss": 0.6918, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.1017584800720215, "rewards/margins": 0.5311309099197388, "rewards/rejected": -2.6328892707824707, "step": 18160 }, { "epoch": 2.35, "learning_rate": 1.2116285741608492e-07, "logits/chosen": -2.7286157608032227, "logits/rejected": -2.5832793712615967, "logps/chosen": -597.1136474609375, "logps/rejected": -411.9754943847656, "loss": 0.607, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.4083683490753174, "rewards/margins": 0.747626006603241, "rewards/rejected": -3.1559946537017822, "step": 18170 }, { "epoch": 2.35, "learning_rate": 1.2092378311179113e-07, "logits/chosen": -2.618670701980591, "logits/rejected": -2.5336239337921143, "logps/chosen": -433.1336975097656, "logps/rejected": -357.1441345214844, "loss": 0.6926, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.119419574737549, "rewards/margins": 0.5039975643157959, "rewards/rejected": -2.623417377471924, "step": 18180 }, { "epoch": 2.35, "learning_rate": 1.2068470880749736e-07, "logits/chosen": -2.8420462608337402, "logits/rejected": -2.6511178016662598, "logps/chosen": -656.552001953125, "logps/rejected": -489.8299865722656, "loss": 0.5485, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9629325866699219, "rewards/margins": 0.7635535597801208, "rewards/rejected": -2.7264864444732666, "step": 18190 }, { "epoch": 2.35, "learning_rate": 1.204456345032036e-07, "logits/chosen": -2.660470485687256, "logits/rejected": -2.624178409576416, "logps/chosen": -576.8260498046875, "logps/rejected": -494.6944885253906, "loss": 0.4599, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.0988450050354004, "rewards/margins": 1.1637681722640991, "rewards/rejected": -3.262612819671631, "step": 18200 }, { "epoch": 2.35, "learning_rate": 1.202065601989098e-07, "logits/chosen": -2.700143575668335, "logits/rejected": -2.6040701866149902, "logps/chosen": -641.9421997070312, "logps/rejected": -488.45245361328125, "loss": 0.5291, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.178337574005127, "rewards/margins": 0.963810920715332, "rewards/rejected": -3.142148494720459, "step": 18210 }, { "epoch": 2.35, "learning_rate": 1.1996748589461605e-07, "logits/chosen": -2.6774420738220215, "logits/rejected": -2.5739028453826904, "logps/chosen": -605.2735595703125, "logps/rejected": -539.8345947265625, "loss": 0.6713, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.112640857696533, "rewards/margins": 0.4896061420440674, "rewards/rejected": -2.6022467613220215, "step": 18220 }, { "epoch": 2.35, "learning_rate": 1.1972841159032226e-07, "logits/chosen": -2.7473721504211426, "logits/rejected": -2.6537108421325684, "logps/chosen": -470.1644592285156, "logps/rejected": -395.42279052734375, "loss": 0.5385, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9764471054077148, "rewards/margins": 0.9012539982795715, "rewards/rejected": -2.8777012825012207, "step": 18230 }, { "epoch": 2.35, "learning_rate": 1.194893372860285e-07, "logits/chosen": -2.8270201683044434, "logits/rejected": -2.599907636642456, "logps/chosen": -588.9806518554688, "logps/rejected": -409.8807067871094, "loss": 0.5136, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.850198745727539, "rewards/margins": 0.9937502145767212, "rewards/rejected": -2.8439488410949707, "step": 18240 }, { "epoch": 2.36, "learning_rate": 1.192502629817347e-07, "logits/chosen": -2.7036819458007812, "logits/rejected": -2.6958751678466797, "logps/chosen": -492.36834716796875, "logps/rejected": -444.24298095703125, "loss": 0.569, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1610119342803955, "rewards/margins": 0.8058667182922363, "rewards/rejected": -2.966878890991211, "step": 18250 }, { "epoch": 2.36, "learning_rate": 1.1901118867744095e-07, "logits/chosen": -2.783264636993408, "logits/rejected": -2.599867820739746, "logps/chosen": -565.2562255859375, "logps/rejected": -421.2386169433594, "loss": 0.5243, "rewards/accuracies": 0.75, "rewards/chosen": -1.9033435583114624, "rewards/margins": 0.8907788395881653, "rewards/rejected": -2.7941222190856934, "step": 18260 }, { "epoch": 2.36, "learning_rate": 1.1877211437314717e-07, "logits/chosen": -2.768000841140747, "logits/rejected": -2.728956460952759, "logps/chosen": -522.6976318359375, "logps/rejected": -470.0169372558594, "loss": 0.5889, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0995967388153076, "rewards/margins": 0.6989462971687317, "rewards/rejected": -2.7985429763793945, "step": 18270 }, { "epoch": 2.36, "learning_rate": 1.185330400688534e-07, "logits/chosen": -2.7416794300079346, "logits/rejected": -2.6930301189422607, "logps/chosen": -567.6334228515625, "logps/rejected": -488.8018493652344, "loss": 0.6449, "rewards/accuracies": 0.6875, "rewards/chosen": -2.216688394546509, "rewards/margins": 0.7035412788391113, "rewards/rejected": -2.9202301502227783, "step": 18280 }, { "epoch": 2.36, "learning_rate": 1.1829396576455962e-07, "logits/chosen": -2.667585849761963, "logits/rejected": -2.6302809715270996, "logps/chosen": -556.2423095703125, "logps/rejected": -481.38818359375, "loss": 0.6107, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1348941326141357, "rewards/margins": 0.8188976049423218, "rewards/rejected": -2.953792095184326, "step": 18290 }, { "epoch": 2.36, "learning_rate": 1.1805489146026584e-07, "logits/chosen": -2.898458957672119, "logits/rejected": -2.683457851409912, "logps/chosen": -663.6673583984375, "logps/rejected": -457.0132751464844, "loss": 0.658, "rewards/accuracies": 0.6875, "rewards/chosen": -2.095428466796875, "rewards/margins": 0.7670086622238159, "rewards/rejected": -2.8624367713928223, "step": 18300 }, { "epoch": 2.36, "learning_rate": 1.1781581715597206e-07, "logits/chosen": -2.7365589141845703, "logits/rejected": -2.670358896255493, "logps/chosen": -468.6572265625, "logps/rejected": -375.08343505859375, "loss": 0.4389, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.8415069580078125, "rewards/margins": 1.0772149562835693, "rewards/rejected": -2.918722152709961, "step": 18310 }, { "epoch": 2.37, "learning_rate": 1.1757674285167829e-07, "logits/chosen": -2.6263718605041504, "logits/rejected": -2.563478946685791, "logps/chosen": -553.5418090820312, "logps/rejected": -485.7294006347656, "loss": 0.7867, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.3067588806152344, "rewards/margins": 0.6255373358726501, "rewards/rejected": -2.9322962760925293, "step": 18320 }, { "epoch": 2.37, "learning_rate": 1.1733766854738452e-07, "logits/chosen": -2.756852626800537, "logits/rejected": -2.573373317718506, "logps/chosen": -552.7741088867188, "logps/rejected": -423.81591796875, "loss": 0.5482, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.18180251121521, "rewards/margins": 0.831924557685852, "rewards/rejected": -3.0137269496917725, "step": 18330 }, { "epoch": 2.37, "learning_rate": 1.1709859424309074e-07, "logits/chosen": -2.758836030960083, "logits/rejected": -2.6026527881622314, "logps/chosen": -612.2305908203125, "logps/rejected": -501.1019592285156, "loss": 0.4816, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.8217933177947998, "rewards/margins": 1.393543004989624, "rewards/rejected": -3.215336561203003, "step": 18340 }, { "epoch": 2.37, "learning_rate": 1.1685951993879698e-07, "logits/chosen": -2.668549060821533, "logits/rejected": -2.6022047996520996, "logps/chosen": -530.0105590820312, "logps/rejected": -363.2291564941406, "loss": 0.5914, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.8914505243301392, "rewards/margins": 0.7443705797195435, "rewards/rejected": -2.6358211040496826, "step": 18350 }, { "epoch": 2.37, "learning_rate": 1.166204456345032e-07, "logits/chosen": -2.7689714431762695, "logits/rejected": -2.7040162086486816, "logps/chosen": -524.55078125, "logps/rejected": -445.57012939453125, "loss": 0.5705, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0442302227020264, "rewards/margins": 0.8746795654296875, "rewards/rejected": -2.918910264968872, "step": 18360 }, { "epoch": 2.37, "learning_rate": 1.1638137133020943e-07, "logits/chosen": -2.618816375732422, "logits/rejected": -2.553802728652954, "logps/chosen": -508.9988708496094, "logps/rejected": -434.32489013671875, "loss": 0.4752, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.7291361093521118, "rewards/margins": 1.2393696308135986, "rewards/rejected": -2.968505382537842, "step": 18370 }, { "epoch": 2.37, "learning_rate": 1.1614229702591565e-07, "logits/chosen": -2.6680305004119873, "logits/rejected": -2.53090238571167, "logps/chosen": -543.0623779296875, "logps/rejected": -466.78863525390625, "loss": 0.5334, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9561214447021484, "rewards/margins": 0.8342452049255371, "rewards/rejected": -2.7903666496276855, "step": 18380 }, { "epoch": 2.37, "learning_rate": 1.1590322272162187e-07, "logits/chosen": -2.6950199604034424, "logits/rejected": -2.600330352783203, "logps/chosen": -489.0272521972656, "logps/rejected": -368.01904296875, "loss": 0.711, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.2048027515411377, "rewards/margins": 0.5560616850852966, "rewards/rejected": -2.760864496231079, "step": 18390 }, { "epoch": 2.38, "learning_rate": 1.1566414841732811e-07, "logits/chosen": -2.757288932800293, "logits/rejected": -2.678021192550659, "logps/chosen": -541.6511840820312, "logps/rejected": -510.17193603515625, "loss": 0.5144, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.252772569656372, "rewards/margins": 1.0191926956176758, "rewards/rejected": -3.2719650268554688, "step": 18400 }, { "epoch": 2.38, "learning_rate": 1.1542507411303433e-07, "logits/chosen": -2.7351646423339844, "logits/rejected": -2.6082708835601807, "logps/chosen": -525.4293823242188, "logps/rejected": -404.83880615234375, "loss": 0.6162, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.9865272045135498, "rewards/margins": 0.7146016359329224, "rewards/rejected": -2.7011287212371826, "step": 18410 }, { "epoch": 2.38, "learning_rate": 1.1518599980874055e-07, "logits/chosen": -2.8497023582458496, "logits/rejected": -2.7128615379333496, "logps/chosen": -635.3630981445312, "logps/rejected": -473.33966064453125, "loss": 0.6047, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.9853719472885132, "rewards/margins": 0.8255825042724609, "rewards/rejected": -2.8109543323516846, "step": 18420 }, { "epoch": 2.38, "learning_rate": 1.1494692550444678e-07, "logits/chosen": -2.705901622772217, "logits/rejected": -2.6867098808288574, "logps/chosen": -537.240234375, "logps/rejected": -474.0330505371094, "loss": 0.474, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9884541034698486, "rewards/margins": 0.9524306058883667, "rewards/rejected": -2.940884828567505, "step": 18430 }, { "epoch": 2.38, "learning_rate": 1.14707851200153e-07, "logits/chosen": -2.8135428428649902, "logits/rejected": -2.7417774200439453, "logps/chosen": -526.2879638671875, "logps/rejected": -454.2239685058594, "loss": 0.5686, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1232821941375732, "rewards/margins": 0.7659021615982056, "rewards/rejected": -2.8891844749450684, "step": 18440 }, { "epoch": 2.38, "learning_rate": 1.1446877689585922e-07, "logits/chosen": -2.636058807373047, "logits/rejected": -2.551210641860962, "logps/chosen": -550.9734497070312, "logps/rejected": -401.54254150390625, "loss": 0.4667, "rewards/accuracies": 0.75, "rewards/chosen": -2.047877550125122, "rewards/margins": 1.0843894481658936, "rewards/rejected": -3.1322669982910156, "step": 18450 }, { "epoch": 2.38, "learning_rate": 1.1422970259156544e-07, "logits/chosen": -2.7403512001037598, "logits/rejected": -2.5962584018707275, "logps/chosen": -488.72491455078125, "logps/rejected": -375.81414794921875, "loss": 0.5211, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.9715337753295898, "rewards/margins": 0.8851419687271118, "rewards/rejected": -2.856675624847412, "step": 18460 }, { "epoch": 2.38, "learning_rate": 1.1399062828727168e-07, "logits/chosen": -2.816074848175049, "logits/rejected": -2.6240220069885254, "logps/chosen": -544.73291015625, "logps/rejected": -374.03411865234375, "loss": 0.432, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.7238562107086182, "rewards/margins": 1.1418755054473877, "rewards/rejected": -2.865731716156006, "step": 18470 }, { "epoch": 2.39, "learning_rate": 1.137515539829779e-07, "logits/chosen": -2.7353901863098145, "logits/rejected": -2.644482135772705, "logps/chosen": -613.2328491210938, "logps/rejected": -427.405517578125, "loss": 0.5383, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.844082236289978, "rewards/margins": 1.0772284269332886, "rewards/rejected": -2.9213106632232666, "step": 18480 }, { "epoch": 2.39, "learning_rate": 1.1351247967868412e-07, "logits/chosen": -2.831766128540039, "logits/rejected": -2.6757454872131348, "logps/chosen": -607.4423217773438, "logps/rejected": -424.759765625, "loss": 0.6348, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.190485715866089, "rewards/margins": 0.748065173625946, "rewards/rejected": -2.9385509490966797, "step": 18490 }, { "epoch": 2.39, "learning_rate": 1.1327340537439036e-07, "logits/chosen": -2.738823890686035, "logits/rejected": -2.68229603767395, "logps/chosen": -561.7499389648438, "logps/rejected": -460.60601806640625, "loss": 0.6621, "rewards/accuracies": 0.625, "rewards/chosen": -2.448803424835205, "rewards/margins": 0.5808150768280029, "rewards/rejected": -3.029618740081787, "step": 18500 }, { "epoch": 2.39, "learning_rate": 1.1303433107009658e-07, "logits/chosen": -2.8012824058532715, "logits/rejected": -2.6704182624816895, "logps/chosen": -531.5572509765625, "logps/rejected": -401.6275634765625, "loss": 0.5586, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.8749282360076904, "rewards/margins": 0.8726751208305359, "rewards/rejected": -2.747602939605713, "step": 18510 }, { "epoch": 2.39, "learning_rate": 1.127952567658028e-07, "logits/chosen": -2.748340129852295, "logits/rejected": -2.7026889324188232, "logps/chosen": -552.3614501953125, "logps/rejected": -439.8231506347656, "loss": 0.5118, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.188060998916626, "rewards/margins": 0.9128303527832031, "rewards/rejected": -3.100891590118408, "step": 18520 }, { "epoch": 2.39, "learning_rate": 1.1255618246150904e-07, "logits/chosen": -2.7736802101135254, "logits/rejected": -2.69936466217041, "logps/chosen": -591.8986206054688, "logps/rejected": -462.55499267578125, "loss": 0.5078, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.950627088546753, "rewards/margins": 0.9581171274185181, "rewards/rejected": -2.9087443351745605, "step": 18530 }, { "epoch": 2.39, "learning_rate": 1.1231710815721526e-07, "logits/chosen": -2.687593936920166, "logits/rejected": -2.566742420196533, "logps/chosen": -599.8703002929688, "logps/rejected": -414.8479919433594, "loss": 0.5223, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.119422435760498, "rewards/margins": 0.9381914138793945, "rewards/rejected": -3.0576140880584717, "step": 18540 }, { "epoch": 2.39, "learning_rate": 1.1207803385292149e-07, "logits/chosen": -2.707552433013916, "logits/rejected": -2.6465816497802734, "logps/chosen": -504.62872314453125, "logps/rejected": -374.102294921875, "loss": 0.4736, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0047049522399902, "rewards/margins": 1.0155727863311768, "rewards/rejected": -3.020277500152588, "step": 18550 }, { "epoch": 2.4, "learning_rate": 1.1183895954862771e-07, "logits/chosen": -2.8225905895233154, "logits/rejected": -2.6566648483276367, "logps/chosen": -625.9871826171875, "logps/rejected": -453.8692932128906, "loss": 0.6449, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0906128883361816, "rewards/margins": 0.7409902811050415, "rewards/rejected": -2.8316028118133545, "step": 18560 }, { "epoch": 2.4, "learning_rate": 1.1159988524433393e-07, "logits/chosen": -2.808833122253418, "logits/rejected": -2.6559770107269287, "logps/chosen": -579.4532470703125, "logps/rejected": -416.6014709472656, "loss": 0.57, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.303123712539673, "rewards/margins": 0.7322690486907959, "rewards/rejected": -3.0353927612304688, "step": 18570 }, { "epoch": 2.4, "learning_rate": 1.1136081094004016e-07, "logits/chosen": -2.734696865081787, "logits/rejected": -2.5887513160705566, "logps/chosen": -654.8181762695312, "logps/rejected": -489.48858642578125, "loss": 0.4424, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.086456298828125, "rewards/margins": 1.3376398086547852, "rewards/rejected": -3.4240963459014893, "step": 18580 }, { "epoch": 2.4, "learning_rate": 1.1112173663574638e-07, "logits/chosen": -2.689396381378174, "logits/rejected": -2.5985798835754395, "logps/chosen": -499.40545654296875, "logps/rejected": -447.6239318847656, "loss": 0.6886, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.2742693424224854, "rewards/margins": 0.5694490671157837, "rewards/rejected": -2.8437182903289795, "step": 18590 }, { "epoch": 2.4, "learning_rate": 1.1088266233145261e-07, "logits/chosen": -2.77872896194458, "logits/rejected": -2.667186975479126, "logps/chosen": -462.96929931640625, "logps/rejected": -373.6197509765625, "loss": 0.5311, "rewards/accuracies": 0.75, "rewards/chosen": -2.070814847946167, "rewards/margins": 0.8542320132255554, "rewards/rejected": -2.9250471591949463, "step": 18600 }, { "epoch": 2.4, "learning_rate": 1.1064358802715884e-07, "logits/chosen": -2.8359484672546387, "logits/rejected": -2.6942543983459473, "logps/chosen": -577.6190185546875, "logps/rejected": -407.8101806640625, "loss": 0.5317, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9975967407226562, "rewards/margins": 1.1487822532653809, "rewards/rejected": -3.146378993988037, "step": 18610 }, { "epoch": 2.4, "learning_rate": 1.1040451372286506e-07, "logits/chosen": -2.713413715362549, "logits/rejected": -2.6246695518493652, "logps/chosen": -509.9126892089844, "logps/rejected": -409.60137939453125, "loss": 0.6261, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.098928451538086, "rewards/margins": 0.6916003227233887, "rewards/rejected": -2.7905287742614746, "step": 18620 }, { "epoch": 2.41, "learning_rate": 1.1016543941857128e-07, "logits/chosen": -2.757463216781616, "logits/rejected": -2.6708812713623047, "logps/chosen": -564.2911376953125, "logps/rejected": -464.92803955078125, "loss": 0.6118, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.2404558658599854, "rewards/margins": 0.6181926131248474, "rewards/rejected": -2.8586485385894775, "step": 18630 }, { "epoch": 2.41, "learning_rate": 1.099263651142775e-07, "logits/chosen": -2.6490402221679688, "logits/rejected": -2.527176856994629, "logps/chosen": -554.17041015625, "logps/rejected": -395.9151306152344, "loss": 0.5216, "rewards/accuracies": 0.8125, "rewards/chosen": -2.084364175796509, "rewards/margins": 1.0149797201156616, "rewards/rejected": -3.09934401512146, "step": 18640 }, { "epoch": 2.41, "learning_rate": 1.0968729080998374e-07, "logits/chosen": -2.73283052444458, "logits/rejected": -2.6470961570739746, "logps/chosen": -566.7198486328125, "logps/rejected": -441.896484375, "loss": 0.5608, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8103748559951782, "rewards/margins": 1.1343493461608887, "rewards/rejected": -2.9447243213653564, "step": 18650 }, { "epoch": 2.41, "learning_rate": 1.0944821650568996e-07, "logits/chosen": -2.7387542724609375, "logits/rejected": -2.62912917137146, "logps/chosen": -502.10870361328125, "logps/rejected": -403.4027099609375, "loss": 0.5224, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0033748149871826, "rewards/margins": 0.9424899816513062, "rewards/rejected": -2.9458651542663574, "step": 18660 }, { "epoch": 2.41, "learning_rate": 1.092091422013962e-07, "logits/chosen": -2.7266602516174316, "logits/rejected": -2.6428542137145996, "logps/chosen": -583.3487548828125, "logps/rejected": -463.05523681640625, "loss": 0.4029, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9776337146759033, "rewards/margins": 1.3617231845855713, "rewards/rejected": -3.3393568992614746, "step": 18670 }, { "epoch": 2.41, "learning_rate": 1.0897006789710242e-07, "logits/chosen": -2.6263794898986816, "logits/rejected": -2.6880435943603516, "logps/chosen": -472.36126708984375, "logps/rejected": -416.763916015625, "loss": 0.6233, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.248211622238159, "rewards/margins": 0.7002733945846558, "rewards/rejected": -2.9484851360321045, "step": 18680 }, { "epoch": 2.41, "learning_rate": 1.0873099359280864e-07, "logits/chosen": -2.7636938095092773, "logits/rejected": -2.6829121112823486, "logps/chosen": -562.0481567382812, "logps/rejected": -515.7732543945312, "loss": 0.5244, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0632283687591553, "rewards/margins": 1.010774850845337, "rewards/rejected": -3.074002981185913, "step": 18690 }, { "epoch": 2.41, "learning_rate": 1.0849191928851487e-07, "logits/chosen": -2.78576922416687, "logits/rejected": -2.7567169666290283, "logps/chosen": -511.579833984375, "logps/rejected": -461.202880859375, "loss": 0.6138, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2588207721710205, "rewards/margins": 0.7442539930343628, "rewards/rejected": -3.0030744075775146, "step": 18700 }, { "epoch": 2.42, "learning_rate": 1.0825284498422109e-07, "logits/chosen": -2.794729709625244, "logits/rejected": -2.6773030757904053, "logps/chosen": -644.7080688476562, "logps/rejected": -583.9110107421875, "loss": 0.6649, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.209744453430176, "rewards/margins": 0.7097281217575073, "rewards/rejected": -2.9194722175598145, "step": 18710 }, { "epoch": 2.42, "learning_rate": 1.0801377067992731e-07, "logits/chosen": -2.7640950679779053, "logits/rejected": -2.611459970474243, "logps/chosen": -556.0095825195312, "logps/rejected": -396.8251647949219, "loss": 0.6477, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.110701084136963, "rewards/margins": 0.6349871754646301, "rewards/rejected": -2.745687961578369, "step": 18720 }, { "epoch": 2.42, "learning_rate": 1.0777469637563354e-07, "logits/chosen": -2.7216615676879883, "logits/rejected": -2.628232002258301, "logps/chosen": -582.3121337890625, "logps/rejected": -473.7173767089844, "loss": 0.6308, "rewards/accuracies": 0.625, "rewards/chosen": -2.1923422813415527, "rewards/margins": 0.5897038578987122, "rewards/rejected": -2.782045841217041, "step": 18730 }, { "epoch": 2.42, "learning_rate": 1.0753562207133977e-07, "logits/chosen": -2.6769967079162598, "logits/rejected": -2.610077381134033, "logps/chosen": -506.7056579589844, "logps/rejected": -426.99932861328125, "loss": 0.5374, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.9864654541015625, "rewards/margins": 0.9932409524917603, "rewards/rejected": -2.979706287384033, "step": 18740 }, { "epoch": 2.42, "learning_rate": 1.07296547767046e-07, "logits/chosen": -2.8508753776550293, "logits/rejected": -2.7465620040893555, "logps/chosen": -595.6550903320312, "logps/rejected": -485.95025634765625, "loss": 0.5473, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1861610412597656, "rewards/margins": 1.0554287433624268, "rewards/rejected": -3.2415900230407715, "step": 18750 }, { "epoch": 2.42, "learning_rate": 1.0705747346275222e-07, "logits/chosen": -2.897310256958008, "logits/rejected": -2.713268995285034, "logps/chosen": -639.93408203125, "logps/rejected": -470.25946044921875, "loss": 0.6316, "rewards/accuracies": 0.625, "rewards/chosen": -2.309260368347168, "rewards/margins": 0.7452324032783508, "rewards/rejected": -3.054492473602295, "step": 18760 }, { "epoch": 2.42, "learning_rate": 1.0681839915845844e-07, "logits/chosen": -2.7562341690063477, "logits/rejected": -2.660731792449951, "logps/chosen": -557.56591796875, "logps/rejected": -468.6529235839844, "loss": 0.5338, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0268359184265137, "rewards/margins": 1.1473290920257568, "rewards/rejected": -3.1741652488708496, "step": 18770 }, { "epoch": 2.42, "learning_rate": 1.0657932485416466e-07, "logits/chosen": -2.844999313354492, "logits/rejected": -2.6490578651428223, "logps/chosen": -545.1961669921875, "logps/rejected": -338.53631591796875, "loss": 0.6187, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.031782627105713, "rewards/margins": 0.8206151127815247, "rewards/rejected": -2.8523976802825928, "step": 18780 }, { "epoch": 2.43, "learning_rate": 1.063402505498709e-07, "logits/chosen": -2.7417399883270264, "logits/rejected": -2.6307952404022217, "logps/chosen": -513.4461669921875, "logps/rejected": -415.917724609375, "loss": 0.5556, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.069746732711792, "rewards/margins": 0.7575851082801819, "rewards/rejected": -2.827332019805908, "step": 18790 }, { "epoch": 2.43, "learning_rate": 1.0610117624557712e-07, "logits/chosen": -2.8776373863220215, "logits/rejected": -2.741461992263794, "logps/chosen": -529.5042724609375, "logps/rejected": -381.4726867675781, "loss": 0.5008, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.0164780616760254, "rewards/margins": 1.085435390472412, "rewards/rejected": -3.1019134521484375, "step": 18800 }, { "epoch": 2.43, "learning_rate": 1.0586210194128336e-07, "logits/chosen": -2.8228306770324707, "logits/rejected": -2.682347536087036, "logps/chosen": -573.9780883789062, "logps/rejected": -434.73895263671875, "loss": 0.6053, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.2783775329589844, "rewards/margins": 0.9150528907775879, "rewards/rejected": -3.1934304237365723, "step": 18810 }, { "epoch": 2.43, "learning_rate": 1.0562302763698958e-07, "logits/chosen": -2.729815721511841, "logits/rejected": -2.6192259788513184, "logps/chosen": -650.429443359375, "logps/rejected": -474.07647705078125, "loss": 0.6464, "rewards/accuracies": 0.625, "rewards/chosen": -2.260633945465088, "rewards/margins": 0.7445657849311829, "rewards/rejected": -3.005199909210205, "step": 18820 }, { "epoch": 2.43, "learning_rate": 1.053839533326958e-07, "logits/chosen": -2.6484768390655518, "logits/rejected": -2.532216787338257, "logps/chosen": -561.2017211914062, "logps/rejected": -413.0906677246094, "loss": 0.583, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1708366870880127, "rewards/margins": 0.9378172755241394, "rewards/rejected": -3.108654022216797, "step": 18830 }, { "epoch": 2.43, "learning_rate": 1.0514487902840202e-07, "logits/chosen": -2.74904727935791, "logits/rejected": -2.6126132011413574, "logps/chosen": -550.2039184570312, "logps/rejected": -400.21514892578125, "loss": 0.5095, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.257091999053955, "rewards/margins": 0.9771696925163269, "rewards/rejected": -3.2342617511749268, "step": 18840 }, { "epoch": 2.43, "learning_rate": 1.0490580472410825e-07, "logits/chosen": -2.873419761657715, "logits/rejected": -2.6893391609191895, "logps/chosen": -552.3407592773438, "logps/rejected": -395.4771423339844, "loss": 0.5199, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.076023817062378, "rewards/margins": 1.0345919132232666, "rewards/rejected": -3.1106154918670654, "step": 18850 }, { "epoch": 2.43, "learning_rate": 1.0466673041981447e-07, "logits/chosen": -2.6537208557128906, "logits/rejected": -2.6292943954467773, "logps/chosen": -486.792236328125, "logps/rejected": -389.5815124511719, "loss": 0.5375, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.9066194295883179, "rewards/margins": 0.9474590420722961, "rewards/rejected": -2.854078769683838, "step": 18860 }, { "epoch": 2.44, "learning_rate": 1.0442765611552069e-07, "logits/chosen": -2.7120559215545654, "logits/rejected": -2.5857977867126465, "logps/chosen": -477.69757080078125, "logps/rejected": -381.6788024902344, "loss": 0.5915, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.116589069366455, "rewards/margins": 0.7676935791969299, "rewards/rejected": -2.8842830657958984, "step": 18870 }, { "epoch": 2.44, "learning_rate": 1.0418858181122693e-07, "logits/chosen": -2.6238934993743896, "logits/rejected": -2.5364880561828613, "logps/chosen": -551.9927368164062, "logps/rejected": -454.6644592285156, "loss": 0.4208, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9028370380401611, "rewards/margins": 1.2626004219055176, "rewards/rejected": -3.165437936782837, "step": 18880 }, { "epoch": 2.44, "learning_rate": 1.0394950750693315e-07, "logits/chosen": -2.743945360183716, "logits/rejected": -2.6717352867126465, "logps/chosen": -580.78076171875, "logps/rejected": -409.841064453125, "loss": 0.5504, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9533271789550781, "rewards/margins": 0.9435661435127258, "rewards/rejected": -2.8968935012817383, "step": 18890 }, { "epoch": 2.44, "learning_rate": 1.0371043320263937e-07, "logits/chosen": -2.7699363231658936, "logits/rejected": -2.645638942718506, "logps/chosen": -552.15234375, "logps/rejected": -440.53436279296875, "loss": 0.5563, "rewards/accuracies": 0.6875, "rewards/chosen": -2.069699764251709, "rewards/margins": 0.8378154039382935, "rewards/rejected": -2.907515287399292, "step": 18900 }, { "epoch": 2.44, "learning_rate": 1.034713588983456e-07, "logits/chosen": -2.794978618621826, "logits/rejected": -2.6698226928710938, "logps/chosen": -532.04931640625, "logps/rejected": -384.5025939941406, "loss": 0.6486, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.2643189430236816, "rewards/margins": 0.7574759125709534, "rewards/rejected": -3.0217947959899902, "step": 18910 }, { "epoch": 2.44, "learning_rate": 1.0323228459405182e-07, "logits/chosen": -2.6587367057800293, "logits/rejected": -2.5602962970733643, "logps/chosen": -535.7734375, "logps/rejected": -412.240966796875, "loss": 0.4925, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.105694055557251, "rewards/margins": 1.059140920639038, "rewards/rejected": -3.16483473777771, "step": 18920 }, { "epoch": 2.44, "learning_rate": 1.0299321028975804e-07, "logits/chosen": -2.8558738231658936, "logits/rejected": -2.676359176635742, "logps/chosen": -574.6197509765625, "logps/rejected": -428.3680725097656, "loss": 0.5717, "rewards/accuracies": 0.6875, "rewards/chosen": -2.098034620285034, "rewards/margins": 0.7242143750190735, "rewards/rejected": -2.822248935699463, "step": 18930 }, { "epoch": 2.45, "learning_rate": 1.0275413598546428e-07, "logits/chosen": -2.762321949005127, "logits/rejected": -2.619917392730713, "logps/chosen": -582.3015747070312, "logps/rejected": -459.79583740234375, "loss": 0.5642, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.981747031211853, "rewards/margins": 0.8172538876533508, "rewards/rejected": -2.7990007400512695, "step": 18940 }, { "epoch": 2.45, "learning_rate": 1.0251506168117051e-07, "logits/chosen": -2.7442967891693115, "logits/rejected": -2.6530568599700928, "logps/chosen": -469.5162658691406, "logps/rejected": -427.8858947753906, "loss": 0.5917, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.142667293548584, "rewards/margins": 0.7732492089271545, "rewards/rejected": -2.9159164428710938, "step": 18950 }, { "epoch": 2.45, "learning_rate": 1.0227598737687674e-07, "logits/chosen": -2.7266042232513428, "logits/rejected": -2.577867031097412, "logps/chosen": -661.1569213867188, "logps/rejected": -482.1021423339844, "loss": 0.5949, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0357227325439453, "rewards/margins": 1.0267608165740967, "rewards/rejected": -3.062483310699463, "step": 18960 }, { "epoch": 2.45, "learning_rate": 1.0203691307258296e-07, "logits/chosen": -2.8672916889190674, "logits/rejected": -2.733881711959839, "logps/chosen": -561.8352661132812, "logps/rejected": -431.24609375, "loss": 0.5693, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2075884342193604, "rewards/margins": 0.8342339396476746, "rewards/rejected": -3.0418224334716797, "step": 18970 }, { "epoch": 2.45, "learning_rate": 1.0179783876828918e-07, "logits/chosen": -2.8510501384735107, "logits/rejected": -2.6942696571350098, "logps/chosen": -572.39404296875, "logps/rejected": -408.1448974609375, "loss": 0.6219, "rewards/accuracies": 0.625, "rewards/chosen": -2.2338168621063232, "rewards/margins": 0.7116025686264038, "rewards/rejected": -2.9454197883605957, "step": 18980 }, { "epoch": 2.45, "learning_rate": 1.015587644639954e-07, "logits/chosen": -2.7350757122039795, "logits/rejected": -2.5478110313415527, "logps/chosen": -619.7872314453125, "logps/rejected": -397.47686767578125, "loss": 0.5687, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.094386577606201, "rewards/margins": 0.8649903535842896, "rewards/rejected": -2.959376811981201, "step": 18990 }, { "epoch": 2.45, "learning_rate": 1.0131969015970163e-07, "logits/chosen": -2.7962164878845215, "logits/rejected": -2.7617506980895996, "logps/chosen": -484.1336364746094, "logps/rejected": -417.95037841796875, "loss": 0.6627, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.307527542114258, "rewards/margins": 0.5399079322814941, "rewards/rejected": -2.847435474395752, "step": 19000 }, { "epoch": 2.45, "eval_logits/chosen": -3.0864267349243164, "eval_logits/rejected": -3.036318302154541, "eval_logps/chosen": -541.2406616210938, "eval_logps/rejected": -421.577880859375, "eval_loss": 0.6142950654029846, "eval_rewards/accuracies": 0.6754999756813049, "eval_rewards/chosen": -0.942097008228302, "eval_rewards/margins": 1.122825026512146, "eval_rewards/rejected": -2.0649218559265137, "eval_runtime": 288.3772, "eval_samples_per_second": 6.935, "eval_steps_per_second": 3.468, "step": 19000 }, { "epoch": 2.45, "learning_rate": 1.0108061585540785e-07, "logits/chosen": -2.7453596591949463, "logits/rejected": -2.638120412826538, "logps/chosen": -535.0828857421875, "logps/rejected": -397.49859619140625, "loss": 0.5971, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.1873068809509277, "rewards/margins": 0.6219242811203003, "rewards/rejected": -2.8092310428619385, "step": 19010 }, { "epoch": 2.46, "learning_rate": 1.0084154155111409e-07, "logits/chosen": -2.6286816596984863, "logits/rejected": -2.5156288146972656, "logps/chosen": -495.105712890625, "logps/rejected": -395.59686279296875, "loss": 0.5779, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0490849018096924, "rewards/margins": 0.7636421918869019, "rewards/rejected": -2.8127269744873047, "step": 19020 }, { "epoch": 2.46, "learning_rate": 1.0060246724682031e-07, "logits/chosen": -2.8026814460754395, "logits/rejected": -2.7225558757781982, "logps/chosen": -528.8698120117188, "logps/rejected": -434.31695556640625, "loss": 0.496, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0511631965637207, "rewards/margins": 1.1247062683105469, "rewards/rejected": -3.1758694648742676, "step": 19030 }, { "epoch": 2.46, "learning_rate": 1.0036339294252653e-07, "logits/chosen": -2.7237422466278076, "logits/rejected": -2.603633403778076, "logps/chosen": -499.31146240234375, "logps/rejected": -407.81390380859375, "loss": 0.4999, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.8962252140045166, "rewards/margins": 1.05374014377594, "rewards/rejected": -2.949965000152588, "step": 19040 }, { "epoch": 2.46, "learning_rate": 1.0012431863823275e-07, "logits/chosen": -2.743579864501953, "logits/rejected": -2.5663809776306152, "logps/chosen": -625.6654052734375, "logps/rejected": -408.82928466796875, "loss": 0.4552, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.401489734649658, "rewards/margins": 1.0929614305496216, "rewards/rejected": -3.4944510459899902, "step": 19050 }, { "epoch": 2.46, "learning_rate": 9.988524433393898e-08, "logits/chosen": -2.718161106109619, "logits/rejected": -2.6533470153808594, "logps/chosen": -552.1575317382812, "logps/rejected": -416.62738037109375, "loss": 0.727, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.9643466472625732, "rewards/margins": 0.5921102166175842, "rewards/rejected": -2.556457042694092, "step": 19060 }, { "epoch": 2.46, "learning_rate": 9.96461700296452e-08, "logits/chosen": -2.7053287029266357, "logits/rejected": -2.597355365753174, "logps/chosen": -548.4168090820312, "logps/rejected": -479.6275939941406, "loss": 0.5948, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.380770683288574, "rewards/margins": 0.707770824432373, "rewards/rejected": -3.0885415077209473, "step": 19070 }, { "epoch": 2.46, "learning_rate": 9.940709572535142e-08, "logits/chosen": -2.6311798095703125, "logits/rejected": -2.5311262607574463, "logps/chosen": -466.7571716308594, "logps/rejected": -390.60076904296875, "loss": 0.4827, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.8148744106292725, "rewards/margins": 1.0868048667907715, "rewards/rejected": -2.901679277420044, "step": 19080 }, { "epoch": 2.46, "learning_rate": 9.916802142105767e-08, "logits/chosen": -2.7135818004608154, "logits/rejected": -2.589370012283325, "logps/chosen": -565.0579833984375, "logps/rejected": -424.1441345214844, "loss": 0.6405, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2474706172943115, "rewards/margins": 0.7574498653411865, "rewards/rejected": -3.004920482635498, "step": 19090 }, { "epoch": 2.47, "learning_rate": 9.89289471167639e-08, "logits/chosen": -2.697537660598755, "logits/rejected": -2.6452202796936035, "logps/chosen": -492.76116943359375, "logps/rejected": -409.081298828125, "loss": 0.4785, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.2678749561309814, "rewards/margins": 0.9458745121955872, "rewards/rejected": -3.213749408721924, "step": 19100 }, { "epoch": 2.47, "learning_rate": 9.868987281247012e-08, "logits/chosen": -2.7784125804901123, "logits/rejected": -2.6732301712036133, "logps/chosen": -608.0958251953125, "logps/rejected": -528.2266845703125, "loss": 0.5959, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.2093348503112793, "rewards/margins": 0.660503625869751, "rewards/rejected": -2.8698384761810303, "step": 19110 }, { "epoch": 2.47, "learning_rate": 9.845079850817634e-08, "logits/chosen": -2.736943483352661, "logits/rejected": -2.586834192276001, "logps/chosen": -529.4918823242188, "logps/rejected": -408.23004150390625, "loss": 0.5127, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0847299098968506, "rewards/margins": 0.973480224609375, "rewards/rejected": -3.058210611343384, "step": 19120 }, { "epoch": 2.47, "learning_rate": 9.821172420388256e-08, "logits/chosen": -2.6957602500915527, "logits/rejected": -2.57033109664917, "logps/chosen": -519.472900390625, "logps/rejected": -436.2103576660156, "loss": 0.4578, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.031879425048828, "rewards/margins": 1.1354581117630005, "rewards/rejected": -3.167337417602539, "step": 19130 }, { "epoch": 2.47, "learning_rate": 9.797264989958878e-08, "logits/chosen": -2.8326315879821777, "logits/rejected": -2.68692946434021, "logps/chosen": -584.4713745117188, "logps/rejected": -452.7989807128906, "loss": 0.6351, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.196289539337158, "rewards/margins": 0.6322949528694153, "rewards/rejected": -2.828584671020508, "step": 19140 }, { "epoch": 2.47, "learning_rate": 9.773357559529501e-08, "logits/chosen": -2.6839935779571533, "logits/rejected": -2.555736541748047, "logps/chosen": -648.3729858398438, "logps/rejected": -482.10736083984375, "loss": 0.5275, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.1069962978363037, "rewards/margins": 1.1650108098983765, "rewards/rejected": -3.2720069885253906, "step": 19150 }, { "epoch": 2.47, "learning_rate": 9.749450129100124e-08, "logits/chosen": -2.7242391109466553, "logits/rejected": -2.658827304840088, "logps/chosen": -478.8045959472656, "logps/rejected": -415.1917419433594, "loss": 0.5841, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.0619733333587646, "rewards/margins": 0.6292322874069214, "rewards/rejected": -2.6912055015563965, "step": 19160 }, { "epoch": 2.47, "learning_rate": 9.725542698670747e-08, "logits/chosen": -2.6924610137939453, "logits/rejected": -2.490752696990967, "logps/chosen": -602.2577514648438, "logps/rejected": -426.0320739746094, "loss": 0.491, "rewards/accuracies": 0.75, "rewards/chosen": -2.118534564971924, "rewards/margins": 1.0357484817504883, "rewards/rejected": -3.154283046722412, "step": 19170 }, { "epoch": 2.48, "learning_rate": 9.701635268241369e-08, "logits/chosen": -2.783731698989868, "logits/rejected": -2.727703809738159, "logps/chosen": -586.3980712890625, "logps/rejected": -450.3226623535156, "loss": 0.5918, "rewards/accuracies": 0.75, "rewards/chosen": -2.41746187210083, "rewards/margins": 0.888473391532898, "rewards/rejected": -3.3059356212615967, "step": 19180 }, { "epoch": 2.48, "learning_rate": 9.677727837811991e-08, "logits/chosen": -2.805243730545044, "logits/rejected": -2.724181652069092, "logps/chosen": -589.0443115234375, "logps/rejected": -464.52923583984375, "loss": 0.5257, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1196396350860596, "rewards/margins": 0.9018366932868958, "rewards/rejected": -3.0214760303497314, "step": 19190 }, { "epoch": 2.48, "learning_rate": 9.653820407382613e-08, "logits/chosen": -2.713942050933838, "logits/rejected": -2.608776569366455, "logps/chosen": -467.61932373046875, "logps/rejected": -375.341796875, "loss": 0.6357, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.949814796447754, "rewards/margins": 0.5333492159843445, "rewards/rejected": -2.4831643104553223, "step": 19200 }, { "epoch": 2.48, "learning_rate": 9.629912976953236e-08, "logits/chosen": -2.8502869606018066, "logits/rejected": -2.687049388885498, "logps/chosen": -552.6864013671875, "logps/rejected": -400.3572082519531, "loss": 0.5995, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.0848288536071777, "rewards/margins": 0.7618759870529175, "rewards/rejected": -2.8467049598693848, "step": 19210 }, { "epoch": 2.48, "learning_rate": 9.606005546523859e-08, "logits/chosen": -2.7033677101135254, "logits/rejected": -2.6516857147216797, "logps/chosen": -582.6920166015625, "logps/rejected": -456.14361572265625, "loss": 0.4835, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.8872907161712646, "rewards/margins": 0.9801012873649597, "rewards/rejected": -2.8673925399780273, "step": 19220 }, { "epoch": 2.48, "learning_rate": 9.582098116094483e-08, "logits/chosen": -2.771594285964966, "logits/rejected": -2.7103400230407715, "logps/chosen": -530.6552734375, "logps/rejected": -428.91552734375, "loss": 0.5503, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.808091163635254, "rewards/margins": 0.9341049194335938, "rewards/rejected": -2.7421958446502686, "step": 19230 }, { "epoch": 2.48, "learning_rate": 9.558190685665105e-08, "logits/chosen": -2.740077257156372, "logits/rejected": -2.659680128097534, "logps/chosen": -544.5023193359375, "logps/rejected": -412.074462890625, "loss": 0.6794, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.289792537689209, "rewards/margins": 0.6105764508247375, "rewards/rejected": -2.9003689289093018, "step": 19240 }, { "epoch": 2.49, "learning_rate": 9.534283255235727e-08, "logits/chosen": -2.7312474250793457, "logits/rejected": -2.5929598808288574, "logps/chosen": -646.4603881835938, "logps/rejected": -509.7591857910156, "loss": 0.4523, "rewards/accuracies": 0.75, "rewards/chosen": -2.0434348583221436, "rewards/margins": 1.3853453397750854, "rewards/rejected": -3.4287807941436768, "step": 19250 }, { "epoch": 2.49, "learning_rate": 9.51037582480635e-08, "logits/chosen": -2.7293596267700195, "logits/rejected": -2.6517107486724854, "logps/chosen": -525.4467163085938, "logps/rejected": -400.6045837402344, "loss": 0.5008, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9504159688949585, "rewards/margins": 0.9475302696228027, "rewards/rejected": -2.897946357727051, "step": 19260 }, { "epoch": 2.49, "learning_rate": 9.486468394376972e-08, "logits/chosen": -2.833207607269287, "logits/rejected": -2.560232162475586, "logps/chosen": -566.4119873046875, "logps/rejected": -350.86114501953125, "loss": 0.5731, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1980044841766357, "rewards/margins": 0.8872610330581665, "rewards/rejected": -3.085265636444092, "step": 19270 }, { "epoch": 2.49, "learning_rate": 9.462560963947594e-08, "logits/chosen": -2.7527127265930176, "logits/rejected": -2.6156537532806396, "logps/chosen": -654.4334106445312, "logps/rejected": -474.046630859375, "loss": 0.5738, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.3083889484405518, "rewards/margins": 0.9375044107437134, "rewards/rejected": -3.2458934783935547, "step": 19280 }, { "epoch": 2.49, "learning_rate": 9.438653533518218e-08, "logits/chosen": -2.797088384628296, "logits/rejected": -2.6702375411987305, "logps/chosen": -498.610595703125, "logps/rejected": -425.49267578125, "loss": 0.5109, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.177082061767578, "rewards/margins": 0.9746294021606445, "rewards/rejected": -3.1517112255096436, "step": 19290 }, { "epoch": 2.49, "learning_rate": 9.41474610308884e-08, "logits/chosen": -2.857417345046997, "logits/rejected": -2.7703232765197754, "logps/chosen": -536.4822387695312, "logps/rejected": -461.37603759765625, "loss": 0.5126, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1349072456359863, "rewards/margins": 0.811766505241394, "rewards/rejected": -2.94667387008667, "step": 19300 }, { "epoch": 2.49, "learning_rate": 9.390838672659462e-08, "logits/chosen": -2.7244954109191895, "logits/rejected": -2.6266109943389893, "logps/chosen": -583.8863525390625, "logps/rejected": -442.72930908203125, "loss": 0.6406, "rewards/accuracies": 0.625, "rewards/chosen": -2.160792112350464, "rewards/margins": 0.7159017324447632, "rewards/rejected": -2.8766939640045166, "step": 19310 }, { "epoch": 2.49, "learning_rate": 9.366931242230085e-08, "logits/chosen": -2.933262348175049, "logits/rejected": -2.800312042236328, "logps/chosen": -561.7196044921875, "logps/rejected": -412.30218505859375, "loss": 0.6602, "rewards/accuracies": 0.625, "rewards/chosen": -2.0325050354003906, "rewards/margins": 0.7086345553398132, "rewards/rejected": -2.7411396503448486, "step": 19320 }, { "epoch": 2.5, "learning_rate": 9.343023811800707e-08, "logits/chosen": -2.734588146209717, "logits/rejected": -2.6187753677368164, "logps/chosen": -492.47076416015625, "logps/rejected": -431.343017578125, "loss": 0.5946, "rewards/accuracies": 0.6875, "rewards/chosen": -2.233388662338257, "rewards/margins": 0.6637065410614014, "rewards/rejected": -2.8970954418182373, "step": 19330 }, { "epoch": 2.5, "learning_rate": 9.319116381371329e-08, "logits/chosen": -2.73128080368042, "logits/rejected": -2.691565990447998, "logps/chosen": -577.3052978515625, "logps/rejected": -461.1766052246094, "loss": 0.5722, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1745028495788574, "rewards/margins": 0.8270454406738281, "rewards/rejected": -3.0015485286712646, "step": 19340 }, { "epoch": 2.5, "learning_rate": 9.295208950941951e-08, "logits/chosen": -2.7340567111968994, "logits/rejected": -2.660076379776001, "logps/chosen": -597.1727905273438, "logps/rejected": -461.70819091796875, "loss": 0.5549, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0808520317077637, "rewards/margins": 0.9161171913146973, "rewards/rejected": -2.99696946144104, "step": 19350 }, { "epoch": 2.5, "learning_rate": 9.271301520512575e-08, "logits/chosen": -2.706535577774048, "logits/rejected": -2.663726568222046, "logps/chosen": -573.455078125, "logps/rejected": -474.17803955078125, "loss": 0.5973, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.1959147453308105, "rewards/margins": 0.7170886397361755, "rewards/rejected": -2.913003444671631, "step": 19360 }, { "epoch": 2.5, "learning_rate": 9.247394090083197e-08, "logits/chosen": -2.8288309574127197, "logits/rejected": -2.704955577850342, "logps/chosen": -650.1490478515625, "logps/rejected": -460.93927001953125, "loss": 0.5418, "rewards/accuracies": 0.6875, "rewards/chosen": -2.003643751144409, "rewards/margins": 0.8705998659133911, "rewards/rejected": -2.8742434978485107, "step": 19370 }, { "epoch": 2.5, "learning_rate": 9.223486659653821e-08, "logits/chosen": -2.783207654953003, "logits/rejected": -2.551589250564575, "logps/chosen": -610.6886596679688, "logps/rejected": -395.4952697753906, "loss": 0.5146, "rewards/accuracies": 0.8125, "rewards/chosen": -2.188904285430908, "rewards/margins": 0.9053055047988892, "rewards/rejected": -3.0942094326019287, "step": 19380 }, { "epoch": 2.5, "learning_rate": 9.199579229224443e-08, "logits/chosen": -2.713576555252075, "logits/rejected": -2.6777665615081787, "logps/chosen": -515.0997314453125, "logps/rejected": -469.24542236328125, "loss": 0.5971, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0128273963928223, "rewards/margins": 0.8195663690567017, "rewards/rejected": -2.8323938846588135, "step": 19390 }, { "epoch": 2.5, "learning_rate": 9.175671798795065e-08, "logits/chosen": -2.741227626800537, "logits/rejected": -2.58699369430542, "logps/chosen": -472.35382080078125, "logps/rejected": -347.6202697753906, "loss": 0.5912, "rewards/accuracies": 0.625, "rewards/chosen": -2.065242290496826, "rewards/margins": 0.7492576837539673, "rewards/rejected": -2.814500093460083, "step": 19400 }, { "epoch": 2.51, "learning_rate": 9.151764368365688e-08, "logits/chosen": -2.727964162826538, "logits/rejected": -2.698646068572998, "logps/chosen": -473.4769592285156, "logps/rejected": -419.8558654785156, "loss": 0.6043, "rewards/accuracies": 0.6875, "rewards/chosen": -2.010582447052002, "rewards/margins": 0.6590703129768372, "rewards/rejected": -2.6696529388427734, "step": 19410 }, { "epoch": 2.51, "learning_rate": 9.12785693793631e-08, "logits/chosen": -2.6661224365234375, "logits/rejected": -2.5515589714050293, "logps/chosen": -606.1690673828125, "logps/rejected": -434.35797119140625, "loss": 0.5934, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.256695032119751, "rewards/margins": 0.7265981435775757, "rewards/rejected": -2.983293056488037, "step": 19420 }, { "epoch": 2.51, "learning_rate": 9.103949507506933e-08, "logits/chosen": -2.721308469772339, "logits/rejected": -2.610107898712158, "logps/chosen": -541.8231201171875, "logps/rejected": -443.25933837890625, "loss": 0.681, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.26631498336792, "rewards/margins": 0.5029581189155579, "rewards/rejected": -2.769273281097412, "step": 19430 }, { "epoch": 2.51, "learning_rate": 9.080042077077556e-08, "logits/chosen": -2.846980094909668, "logits/rejected": -2.713671922683716, "logps/chosen": -634.684814453125, "logps/rejected": -450.0335998535156, "loss": 0.5571, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.10371470451355, "rewards/margins": 1.0998656749725342, "rewards/rejected": -3.203580379486084, "step": 19440 }, { "epoch": 2.51, "learning_rate": 9.056134646648178e-08, "logits/chosen": -2.7838871479034424, "logits/rejected": -2.6333909034729004, "logps/chosen": -567.9153442382812, "logps/rejected": -391.12908935546875, "loss": 0.6324, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.005174160003662, "rewards/margins": 0.8125362396240234, "rewards/rejected": -2.8177101612091064, "step": 19450 }, { "epoch": 2.51, "learning_rate": 9.0322272162188e-08, "logits/chosen": -2.641167163848877, "logits/rejected": -2.609036922454834, "logps/chosen": -595.4849853515625, "logps/rejected": -478.7933654785156, "loss": 0.5459, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0051636695861816, "rewards/margins": 0.89605712890625, "rewards/rejected": -2.9012207984924316, "step": 19460 }, { "epoch": 2.51, "learning_rate": 9.008319785789423e-08, "logits/chosen": -2.7458112239837646, "logits/rejected": -2.662261486053467, "logps/chosen": -546.9490966796875, "logps/rejected": -444.7684631347656, "loss": 0.5895, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0238263607025146, "rewards/margins": 0.8268194198608398, "rewards/rejected": -2.8506455421447754, "step": 19470 }, { "epoch": 2.51, "learning_rate": 8.984412355360045e-08, "logits/chosen": -2.7831196784973145, "logits/rejected": -2.6255412101745605, "logps/chosen": -509.7847595214844, "logps/rejected": -383.8675231933594, "loss": 0.6572, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.256472587585449, "rewards/margins": 0.7950586080551147, "rewards/rejected": -3.0515313148498535, "step": 19480 }, { "epoch": 2.52, "learning_rate": 8.960504924930667e-08, "logits/chosen": -2.6629438400268555, "logits/rejected": -2.558317184448242, "logps/chosen": -526.076171875, "logps/rejected": -454.955810546875, "loss": 0.5058, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.04400897026062, "rewards/margins": 1.147899866104126, "rewards/rejected": -3.191908836364746, "step": 19490 }, { "epoch": 2.52, "learning_rate": 8.936597494501291e-08, "logits/chosen": -2.7949838638305664, "logits/rejected": -2.6742937564849854, "logps/chosen": -575.6024780273438, "logps/rejected": -439.37933349609375, "loss": 0.6415, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2122480869293213, "rewards/margins": 0.6578324437141418, "rewards/rejected": -2.8700804710388184, "step": 19500 }, { "epoch": 2.52, "learning_rate": 8.912690064071913e-08, "logits/chosen": -2.8094375133514404, "logits/rejected": -2.6911516189575195, "logps/chosen": -595.48388671875, "logps/rejected": -431.40313720703125, "loss": 0.6355, "rewards/accuracies": 0.6875, "rewards/chosen": -2.136627197265625, "rewards/margins": 0.8402729034423828, "rewards/rejected": -2.9768998622894287, "step": 19510 }, { "epoch": 2.52, "learning_rate": 8.888782633642535e-08, "logits/chosen": -2.638596296310425, "logits/rejected": -2.557359218597412, "logps/chosen": -480.5560607910156, "logps/rejected": -415.08624267578125, "loss": 0.4995, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.872833013534546, "rewards/margins": 1.0110989809036255, "rewards/rejected": -2.883931875228882, "step": 19520 }, { "epoch": 2.52, "learning_rate": 8.864875203213159e-08, "logits/chosen": -2.6252849102020264, "logits/rejected": -2.5162243843078613, "logps/chosen": -518.0851440429688, "logps/rejected": -410.52459716796875, "loss": 0.602, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.92205810546875, "rewards/margins": 0.8406052589416504, "rewards/rejected": -2.7626633644104004, "step": 19530 }, { "epoch": 2.52, "learning_rate": 8.840967772783781e-08, "logits/chosen": -2.6999385356903076, "logits/rejected": -2.654036283493042, "logps/chosen": -526.0579223632812, "logps/rejected": -458.80078125, "loss": 0.5434, "rewards/accuracies": 0.75, "rewards/chosen": -2.101177453994751, "rewards/margins": 0.7211915254592896, "rewards/rejected": -2.822368860244751, "step": 19540 }, { "epoch": 2.52, "learning_rate": 8.817060342354403e-08, "logits/chosen": -2.7223944664001465, "logits/rejected": -2.6948843002319336, "logps/chosen": -495.299072265625, "logps/rejected": -445.8056640625, "loss": 0.6058, "rewards/accuracies": 0.625, "rewards/chosen": -2.060966968536377, "rewards/margins": 0.6391102075576782, "rewards/rejected": -2.7000768184661865, "step": 19550 }, { "epoch": 2.53, "learning_rate": 8.793152911925026e-08, "logits/chosen": -2.6170997619628906, "logits/rejected": -2.5763514041900635, "logps/chosen": -588.0238037109375, "logps/rejected": -484.138916015625, "loss": 0.5627, "rewards/accuracies": 0.75, "rewards/chosen": -2.2103214263916016, "rewards/margins": 0.9559047818183899, "rewards/rejected": -3.166226387023926, "step": 19560 }, { "epoch": 2.53, "learning_rate": 8.769245481495649e-08, "logits/chosen": -2.87080454826355, "logits/rejected": -2.732070207595825, "logps/chosen": -565.0896606445312, "logps/rejected": -391.45831298828125, "loss": 0.6927, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9953094720840454, "rewards/margins": 0.777461051940918, "rewards/rejected": -2.772770643234253, "step": 19570 }, { "epoch": 2.53, "learning_rate": 8.745338051066271e-08, "logits/chosen": -2.730158567428589, "logits/rejected": -2.6659750938415527, "logps/chosen": -496.2366638183594, "logps/rejected": -454.5201110839844, "loss": 0.6124, "rewards/accuracies": 0.625, "rewards/chosen": -2.2531299591064453, "rewards/margins": 0.6298061609268188, "rewards/rejected": -2.8829360008239746, "step": 19580 }, { "epoch": 2.53, "learning_rate": 8.721430620636894e-08, "logits/chosen": -2.742798328399658, "logits/rejected": -2.717815637588501, "logps/chosen": -539.38427734375, "logps/rejected": -486.98681640625, "loss": 0.5655, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.2203049659729004, "rewards/margins": 1.0882467031478882, "rewards/rejected": -3.308551788330078, "step": 19590 }, { "epoch": 2.53, "learning_rate": 8.697523190207516e-08, "logits/chosen": -2.7279505729675293, "logits/rejected": -2.5765414237976074, "logps/chosen": -600.21337890625, "logps/rejected": -392.8628845214844, "loss": 0.4511, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9778190851211548, "rewards/margins": 1.2438125610351562, "rewards/rejected": -3.2216315269470215, "step": 19600 }, { "epoch": 2.53, "learning_rate": 8.673615759778138e-08, "logits/chosen": -2.7354507446289062, "logits/rejected": -2.6510863304138184, "logps/chosen": -521.369140625, "logps/rejected": -437.26458740234375, "loss": 0.6308, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.3037686347961426, "rewards/margins": 0.6263501048088074, "rewards/rejected": -2.9301185607910156, "step": 19610 }, { "epoch": 2.53, "learning_rate": 8.64970832934876e-08, "logits/chosen": -2.6652257442474365, "logits/rejected": -2.5822081565856934, "logps/chosen": -515.5509033203125, "logps/rejected": -425.26959228515625, "loss": 0.6493, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.382136821746826, "rewards/margins": 0.7624615430831909, "rewards/rejected": -3.1445984840393066, "step": 19620 }, { "epoch": 2.53, "learning_rate": 8.625800898919383e-08, "logits/chosen": -2.5735831260681152, "logits/rejected": -2.5362417697906494, "logps/chosen": -488.832763671875, "logps/rejected": -399.2392272949219, "loss": 0.6898, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.1351189613342285, "rewards/margins": 0.5044595003128052, "rewards/rejected": -2.639578342437744, "step": 19630 }, { "epoch": 2.54, "learning_rate": 8.601893468490006e-08, "logits/chosen": -2.837761402130127, "logits/rejected": -2.624983787536621, "logps/chosen": -573.3448486328125, "logps/rejected": -389.21563720703125, "loss": 0.5245, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.8355728387832642, "rewards/margins": 1.1722331047058105, "rewards/rejected": -3.007805824279785, "step": 19640 }, { "epoch": 2.54, "learning_rate": 8.577986038060629e-08, "logits/chosen": -2.677135467529297, "logits/rejected": -2.6875243186950684, "logps/chosen": -570.2850952148438, "logps/rejected": -498.55059814453125, "loss": 0.6212, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2590415477752686, "rewards/margins": 0.8422096371650696, "rewards/rejected": -3.1012511253356934, "step": 19650 }, { "epoch": 2.54, "learning_rate": 8.554078607631251e-08, "logits/chosen": -2.813136577606201, "logits/rejected": -2.6632776260375977, "logps/chosen": -605.7673950195312, "logps/rejected": -477.5635681152344, "loss": 0.6241, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1269116401672363, "rewards/margins": 0.69310063123703, "rewards/rejected": -2.820012331008911, "step": 19660 }, { "epoch": 2.54, "learning_rate": 8.530171177201875e-08, "logits/chosen": -2.784512519836426, "logits/rejected": -2.6365437507629395, "logps/chosen": -524.7754516601562, "logps/rejected": -407.90570068359375, "loss": 0.5893, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.190713405609131, "rewards/margins": 0.7734916806221008, "rewards/rejected": -2.9642045497894287, "step": 19670 }, { "epoch": 2.54, "learning_rate": 8.506263746772497e-08, "logits/chosen": -2.8830418586730957, "logits/rejected": -2.701160430908203, "logps/chosen": -653.6710205078125, "logps/rejected": -409.7144470214844, "loss": 0.4952, "rewards/accuracies": 0.75, "rewards/chosen": -2.231807231903076, "rewards/margins": 1.0981730222702026, "rewards/rejected": -3.329979658126831, "step": 19680 }, { "epoch": 2.54, "learning_rate": 8.482356316343119e-08, "logits/chosen": -2.794722557067871, "logits/rejected": -2.6118931770324707, "logps/chosen": -514.7408447265625, "logps/rejected": -351.73870849609375, "loss": 0.5734, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.4004383087158203, "rewards/margins": 0.7458927035331726, "rewards/rejected": -3.1463305950164795, "step": 19690 }, { "epoch": 2.54, "learning_rate": 8.458448885913741e-08, "logits/chosen": -2.7994179725646973, "logits/rejected": -2.730623722076416, "logps/chosen": -540.818115234375, "logps/rejected": -475.42529296875, "loss": 0.5693, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.216221332550049, "rewards/margins": 0.8112190365791321, "rewards/rejected": -3.027440309524536, "step": 19700 }, { "epoch": 2.54, "learning_rate": 8.434541455484365e-08, "logits/chosen": -2.7515692710876465, "logits/rejected": -2.670135021209717, "logps/chosen": -537.8304443359375, "logps/rejected": -417.1910095214844, "loss": 0.6141, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9721084833145142, "rewards/margins": 0.7537431716918945, "rewards/rejected": -2.7258517742156982, "step": 19710 }, { "epoch": 2.55, "learning_rate": 8.410634025054987e-08, "logits/chosen": -2.7227604389190674, "logits/rejected": -2.6622025966644287, "logps/chosen": -642.0103149414062, "logps/rejected": -501.3431701660156, "loss": 0.4389, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.866166114807129, "rewards/margins": 1.361885666847229, "rewards/rejected": -3.2280516624450684, "step": 19720 }, { "epoch": 2.55, "learning_rate": 8.38672659462561e-08, "logits/chosen": -2.7032876014709473, "logits/rejected": -2.6221444606781006, "logps/chosen": -612.2120971679688, "logps/rejected": -522.9436645507812, "loss": 0.6134, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2844061851501465, "rewards/margins": 0.7511395215988159, "rewards/rejected": -3.035545825958252, "step": 19730 }, { "epoch": 2.55, "learning_rate": 8.362819164196232e-08, "logits/chosen": -2.6782989501953125, "logits/rejected": -2.621065616607666, "logps/chosen": -516.3025512695312, "logps/rejected": -429.598876953125, "loss": 0.4956, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.180234432220459, "rewards/margins": 0.8979886174201965, "rewards/rejected": -3.0782227516174316, "step": 19740 }, { "epoch": 2.55, "learning_rate": 8.338911733766854e-08, "logits/chosen": -2.788604497909546, "logits/rejected": -2.638683557510376, "logps/chosen": -533.54345703125, "logps/rejected": -377.99505615234375, "loss": 0.5433, "rewards/accuracies": 0.75, "rewards/chosen": -2.2449231147766113, "rewards/margins": 0.7617908120155334, "rewards/rejected": -3.006713628768921, "step": 19750 }, { "epoch": 2.55, "learning_rate": 8.315004303337476e-08, "logits/chosen": -2.7826898097991943, "logits/rejected": -2.6364645957946777, "logps/chosen": -564.8587036132812, "logps/rejected": -444.30291748046875, "loss": 0.5664, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1362195014953613, "rewards/margins": 0.7635947465896606, "rewards/rejected": -2.8998141288757324, "step": 19760 }, { "epoch": 2.55, "learning_rate": 8.291096872908099e-08, "logits/chosen": -2.7295119762420654, "logits/rejected": -2.613769769668579, "logps/chosen": -546.658203125, "logps/rejected": -385.1810302734375, "loss": 0.6838, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1572890281677246, "rewards/margins": 0.6630931496620178, "rewards/rejected": -2.8203823566436768, "step": 19770 }, { "epoch": 2.55, "learning_rate": 8.267189442478722e-08, "logits/chosen": -2.764263391494751, "logits/rejected": -2.6459643840789795, "logps/chosen": -544.748291015625, "logps/rejected": -408.300537109375, "loss": 0.415, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7872852087020874, "rewards/margins": 1.2334710359573364, "rewards/rejected": -3.020756483078003, "step": 19780 }, { "epoch": 2.55, "learning_rate": 8.243282012049344e-08, "logits/chosen": -2.786660671234131, "logits/rejected": -2.6586263179779053, "logps/chosen": -514.6807861328125, "logps/rejected": -383.7185363769531, "loss": 0.5912, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.980470895767212, "rewards/margins": 0.7916682958602905, "rewards/rejected": -2.772139072418213, "step": 19790 }, { "epoch": 2.56, "learning_rate": 8.219374581619967e-08, "logits/chosen": -2.8290367126464844, "logits/rejected": -2.6320176124572754, "logps/chosen": -620.7086791992188, "logps/rejected": -466.367919921875, "loss": 0.6451, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.305182933807373, "rewards/margins": 0.6830007433891296, "rewards/rejected": -2.9881834983825684, "step": 19800 }, { "epoch": 2.56, "learning_rate": 8.195467151190589e-08, "logits/chosen": -2.837343692779541, "logits/rejected": -2.6653871536254883, "logps/chosen": -620.8126831054688, "logps/rejected": -413.3334045410156, "loss": 0.581, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.0616512298583984, "rewards/margins": 0.9927474856376648, "rewards/rejected": -3.054398775100708, "step": 19810 }, { "epoch": 2.56, "learning_rate": 8.171559720761213e-08, "logits/chosen": -2.7044918537139893, "logits/rejected": -2.6646385192871094, "logps/chosen": -483.508544921875, "logps/rejected": -405.4223937988281, "loss": 0.6165, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.1615867614746094, "rewards/margins": 0.8160349726676941, "rewards/rejected": -2.9776217937469482, "step": 19820 }, { "epoch": 2.56, "learning_rate": 8.147652290331835e-08, "logits/chosen": -2.7310917377471924, "logits/rejected": -2.62265682220459, "logps/chosen": -515.2100219726562, "logps/rejected": -427.76800537109375, "loss": 0.5712, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.2089691162109375, "rewards/margins": 0.8554760217666626, "rewards/rejected": -3.0644447803497314, "step": 19830 }, { "epoch": 2.56, "learning_rate": 8.123744859902458e-08, "logits/chosen": -2.654383659362793, "logits/rejected": -2.629499673843384, "logps/chosen": -549.8352661132812, "logps/rejected": -445.9024353027344, "loss": 0.657, "rewards/accuracies": 0.6875, "rewards/chosen": -2.3320765495300293, "rewards/margins": 0.7965047955513, "rewards/rejected": -3.1285810470581055, "step": 19840 }, { "epoch": 2.56, "learning_rate": 8.099837429473081e-08, "logits/chosen": -2.770740270614624, "logits/rejected": -2.698934316635132, "logps/chosen": -538.2140502929688, "logps/rejected": -440.35601806640625, "loss": 0.5477, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.260239362716675, "rewards/margins": 0.7759509682655334, "rewards/rejected": -3.0361900329589844, "step": 19850 }, { "epoch": 2.56, "learning_rate": 8.075929999043703e-08, "logits/chosen": -2.7960362434387207, "logits/rejected": -2.7030398845672607, "logps/chosen": -549.0274658203125, "logps/rejected": -416.88702392578125, "loss": 0.5778, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0380752086639404, "rewards/margins": 0.9983485341072083, "rewards/rejected": -3.036423683166504, "step": 19860 }, { "epoch": 2.57, "learning_rate": 8.052022568614325e-08, "logits/chosen": -2.8313279151916504, "logits/rejected": -2.74375319480896, "logps/chosen": -528.4674682617188, "logps/rejected": -401.803955078125, "loss": 0.4994, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.9711229801177979, "rewards/margins": 1.033159613609314, "rewards/rejected": -3.0042824745178223, "step": 19870 }, { "epoch": 2.57, "learning_rate": 8.028115138184947e-08, "logits/chosen": -2.6927034854888916, "logits/rejected": -2.6034111976623535, "logps/chosen": -493.2890625, "logps/rejected": -396.73187255859375, "loss": 0.5607, "rewards/accuracies": 0.6875, "rewards/chosen": -1.8902727365493774, "rewards/margins": 0.8354169726371765, "rewards/rejected": -2.72568941116333, "step": 19880 }, { "epoch": 2.57, "learning_rate": 8.00420770775557e-08, "logits/chosen": -2.8700485229492188, "logits/rejected": -2.7445404529571533, "logps/chosen": -508.646728515625, "logps/rejected": -400.4102783203125, "loss": 0.4761, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0454139709472656, "rewards/margins": 0.991778552532196, "rewards/rejected": -3.0371923446655273, "step": 19890 }, { "epoch": 2.57, "learning_rate": 7.980300277326192e-08, "logits/chosen": -2.6756515502929688, "logits/rejected": -2.5378928184509277, "logps/chosen": -544.6287841796875, "logps/rejected": -421.6639709472656, "loss": 0.5156, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.18326997756958, "rewards/margins": 0.965018093585968, "rewards/rejected": -3.1482880115509033, "step": 19900 }, { "epoch": 2.57, "learning_rate": 7.956392846896816e-08, "logits/chosen": -2.7730417251586914, "logits/rejected": -2.6983985900878906, "logps/chosen": -517.7859497070312, "logps/rejected": -446.0479431152344, "loss": 0.4835, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.032447576522827, "rewards/margins": 0.9366715550422668, "rewards/rejected": -2.9691193103790283, "step": 19910 }, { "epoch": 2.57, "learning_rate": 7.932485416467438e-08, "logits/chosen": -2.6376020908355713, "logits/rejected": -2.6020612716674805, "logps/chosen": -513.735595703125, "logps/rejected": -432.84234619140625, "loss": 0.6054, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.353687047958374, "rewards/margins": 0.7856069803237915, "rewards/rejected": -3.139293909072876, "step": 19920 }, { "epoch": 2.57, "learning_rate": 7.90857798603806e-08, "logits/chosen": -2.7611680030822754, "logits/rejected": -2.653271198272705, "logps/chosen": -579.02490234375, "logps/rejected": -482.99420166015625, "loss": 0.6377, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1960561275482178, "rewards/margins": 0.7972657084465027, "rewards/rejected": -2.9933218955993652, "step": 19930 }, { "epoch": 2.57, "learning_rate": 7.884670555608682e-08, "logits/chosen": -2.7252376079559326, "logits/rejected": -2.5958847999572754, "logps/chosen": -557.7631225585938, "logps/rejected": -427.3646545410156, "loss": 0.5655, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.4340548515319824, "rewards/margins": 0.8677032589912415, "rewards/rejected": -3.3017578125, "step": 19940 }, { "epoch": 2.58, "learning_rate": 7.860763125179305e-08, "logits/chosen": -2.687499523162842, "logits/rejected": -2.5460822582244873, "logps/chosen": -538.8734741210938, "logps/rejected": -388.7666931152344, "loss": 0.5032, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.1774325370788574, "rewards/margins": 0.9024175405502319, "rewards/rejected": -3.079850196838379, "step": 19950 }, { "epoch": 2.58, "learning_rate": 7.836855694749927e-08, "logits/chosen": -2.720029354095459, "logits/rejected": -2.5812394618988037, "logps/chosen": -531.2183837890625, "logps/rejected": -373.363037109375, "loss": 0.4902, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0637381076812744, "rewards/margins": 1.0422817468643188, "rewards/rejected": -3.1060194969177246, "step": 19960 }, { "epoch": 2.58, "learning_rate": 7.81294826432055e-08, "logits/chosen": -2.7680702209472656, "logits/rejected": -2.6916797161102295, "logps/chosen": -598.8069458007812, "logps/rejected": -458.9559631347656, "loss": 0.6675, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.1160082817077637, "rewards/margins": 0.804900050163269, "rewards/rejected": -2.9209084510803223, "step": 19970 }, { "epoch": 2.58, "learning_rate": 7.789040833891174e-08, "logits/chosen": -2.7599658966064453, "logits/rejected": -2.6227831840515137, "logps/chosen": -612.0132446289062, "logps/rejected": -439.25555419921875, "loss": 0.5039, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.1322109699249268, "rewards/margins": 1.0790756940841675, "rewards/rejected": -3.2112860679626465, "step": 19980 }, { "epoch": 2.58, "learning_rate": 7.765133403461796e-08, "logits/chosen": -2.662929058074951, "logits/rejected": -2.5714404582977295, "logps/chosen": -546.2092895507812, "logps/rejected": -447.67169189453125, "loss": 0.589, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.967031478881836, "rewards/margins": 0.8456530570983887, "rewards/rejected": -2.8126845359802246, "step": 19990 }, { "epoch": 2.58, "learning_rate": 7.741225973032419e-08, "logits/chosen": -2.711592197418213, "logits/rejected": -2.6578190326690674, "logps/chosen": -434.2452697753906, "logps/rejected": -422.7489318847656, "loss": 0.568, "rewards/accuracies": 0.6875, "rewards/chosen": -2.250537395477295, "rewards/margins": 0.9212640523910522, "rewards/rejected": -3.1718013286590576, "step": 20000 }, { "epoch": 2.58, "eval_logits/chosen": -3.10211443901062, "eval_logits/rejected": -3.0553455352783203, "eval_logps/chosen": -541.498291015625, "eval_logps/rejected": -421.92303466796875, "eval_loss": 0.6162945628166199, "eval_rewards/accuracies": 0.6779999732971191, "eval_rewards/chosen": -0.9678579568862915, "eval_rewards/margins": 1.131583571434021, "eval_rewards/rejected": -2.0994417667388916, "eval_runtime": 282.8189, "eval_samples_per_second": 7.072, "eval_steps_per_second": 3.536, "step": 20000 }, { "epoch": 2.58, "learning_rate": 7.717318542603041e-08, "logits/chosen": -2.58048677444458, "logits/rejected": -2.535534381866455, "logps/chosen": -545.1131591796875, "logps/rejected": -411.5746154785156, "loss": 0.5942, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1914968490600586, "rewards/margins": 0.7437125444412231, "rewards/rejected": -2.9352097511291504, "step": 20010 }, { "epoch": 2.58, "learning_rate": 7.693411112173663e-08, "logits/chosen": -2.7532336711883545, "logits/rejected": -2.5821738243103027, "logps/chosen": -562.4767456054688, "logps/rejected": -427.8396911621094, "loss": 0.5099, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1482608318328857, "rewards/margins": 0.9401863217353821, "rewards/rejected": -3.088447332382202, "step": 20020 }, { "epoch": 2.59, "learning_rate": 7.669503681744285e-08, "logits/chosen": -2.7171592712402344, "logits/rejected": -2.6718506813049316, "logps/chosen": -564.3753051757812, "logps/rejected": -530.491943359375, "loss": 0.5009, "rewards/accuracies": 0.75, "rewards/chosen": -1.984644889831543, "rewards/margins": 0.941302478313446, "rewards/rejected": -2.925947427749634, "step": 20030 }, { "epoch": 2.59, "learning_rate": 7.645596251314908e-08, "logits/chosen": -2.8539557456970215, "logits/rejected": -2.6982486248016357, "logps/chosen": -625.32080078125, "logps/rejected": -450.4046936035156, "loss": 0.6035, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.060279130935669, "rewards/margins": 0.8097130656242371, "rewards/rejected": -2.86999249458313, "step": 20040 }, { "epoch": 2.59, "learning_rate": 7.621688820885531e-08, "logits/chosen": -2.76350736618042, "logits/rejected": -2.603537082672119, "logps/chosen": -596.0537109375, "logps/rejected": -456.53399658203125, "loss": 0.5722, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0273666381835938, "rewards/margins": 0.8294389843940735, "rewards/rejected": -2.8568053245544434, "step": 20050 }, { "epoch": 2.59, "learning_rate": 7.597781390456154e-08, "logits/chosen": -2.8026480674743652, "logits/rejected": -2.5892410278320312, "logps/chosen": -564.7554931640625, "logps/rejected": -436.8197326660156, "loss": 0.5517, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2589149475097656, "rewards/margins": 0.8759633898735046, "rewards/rejected": -3.134878396987915, "step": 20060 }, { "epoch": 2.59, "learning_rate": 7.573873960026776e-08, "logits/chosen": -2.76153302192688, "logits/rejected": -2.5996756553649902, "logps/chosen": -541.7672119140625, "logps/rejected": -389.079345703125, "loss": 0.6317, "rewards/accuracies": 0.6875, "rewards/chosen": -2.3339104652404785, "rewards/margins": 0.803070068359375, "rewards/rejected": -3.1369805335998535, "step": 20070 }, { "epoch": 2.59, "learning_rate": 7.549966529597398e-08, "logits/chosen": -2.698875904083252, "logits/rejected": -2.637056827545166, "logps/chosen": -543.76220703125, "logps/rejected": -461.81915283203125, "loss": 0.5642, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0901553630828857, "rewards/margins": 0.893241286277771, "rewards/rejected": -2.9833970069885254, "step": 20080 }, { "epoch": 2.59, "learning_rate": 7.52605909916802e-08, "logits/chosen": -2.6748006343841553, "logits/rejected": -2.6184887886047363, "logps/chosen": -592.5310668945312, "logps/rejected": -435.484130859375, "loss": 0.4807, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.287785053253174, "rewards/margins": 0.9338000416755676, "rewards/rejected": -3.2215850353240967, "step": 20090 }, { "epoch": 2.59, "learning_rate": 7.502151668738643e-08, "logits/chosen": -2.8487322330474854, "logits/rejected": -2.6629345417022705, "logps/chosen": -574.5551147460938, "logps/rejected": -474.54608154296875, "loss": 0.5258, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.107494592666626, "rewards/margins": 1.0000325441360474, "rewards/rejected": -3.107527017593384, "step": 20100 }, { "epoch": 2.6, "learning_rate": 7.478244238309265e-08, "logits/chosen": -2.7658915519714355, "logits/rejected": -2.7075514793395996, "logps/chosen": -567.2210083007812, "logps/rejected": -510.4366149902344, "loss": 0.5313, "rewards/accuracies": 0.6875, "rewards/chosen": -2.184399366378784, "rewards/margins": 0.9041838645935059, "rewards/rejected": -3.088583469390869, "step": 20110 }, { "epoch": 2.6, "learning_rate": 7.45433680787989e-08, "logits/chosen": -2.7823472023010254, "logits/rejected": -2.609865665435791, "logps/chosen": -567.3945922851562, "logps/rejected": -394.44293212890625, "loss": 0.5457, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.966562271118164, "rewards/margins": 1.03261399269104, "rewards/rejected": -2.999176502227783, "step": 20120 }, { "epoch": 2.6, "learning_rate": 7.430429377450512e-08, "logits/chosen": -2.7485270500183105, "logits/rejected": -2.6357293128967285, "logps/chosen": -516.2210083007812, "logps/rejected": -451.5413513183594, "loss": 0.5324, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.2198128700256348, "rewards/margins": 0.9363752603530884, "rewards/rejected": -3.1561882495880127, "step": 20130 }, { "epoch": 2.6, "learning_rate": 7.406521947021134e-08, "logits/chosen": -2.7327942848205566, "logits/rejected": -2.6746561527252197, "logps/chosen": -592.2122802734375, "logps/rejected": -475.91070556640625, "loss": 0.6405, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2599024772644043, "rewards/margins": 0.8137826919555664, "rewards/rejected": -3.0736846923828125, "step": 20140 }, { "epoch": 2.6, "learning_rate": 7.382614516591757e-08, "logits/chosen": -2.85756778717041, "logits/rejected": -2.6686079502105713, "logps/chosen": -636.343017578125, "logps/rejected": -441.345703125, "loss": 0.4592, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9539406299591064, "rewards/margins": 1.1460654735565186, "rewards/rejected": -3.100006103515625, "step": 20150 }, { "epoch": 2.6, "learning_rate": 7.358707086162379e-08, "logits/chosen": -2.73502779006958, "logits/rejected": -2.6130783557891846, "logps/chosen": -560.6268310546875, "logps/rejected": -364.40582275390625, "loss": 0.569, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.3605828285217285, "rewards/margins": 0.7637310028076172, "rewards/rejected": -3.1243138313293457, "step": 20160 }, { "epoch": 2.6, "learning_rate": 7.334799655733001e-08, "logits/chosen": -2.6738476753234863, "logits/rejected": -2.632774591445923, "logps/chosen": -477.67840576171875, "logps/rejected": -390.5040283203125, "loss": 0.6122, "rewards/accuracies": 0.625, "rewards/chosen": -2.3125345706939697, "rewards/margins": 0.7728602290153503, "rewards/rejected": -3.0853946208953857, "step": 20170 }, { "epoch": 2.61, "learning_rate": 7.310892225303623e-08, "logits/chosen": -2.7042315006256104, "logits/rejected": -2.628239154815674, "logps/chosen": -573.2144775390625, "logps/rejected": -484.42657470703125, "loss": 0.538, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0199759006500244, "rewards/margins": 0.9842098355293274, "rewards/rejected": -3.004185676574707, "step": 20180 }, { "epoch": 2.61, "learning_rate": 7.286984794874247e-08, "logits/chosen": -2.729602336883545, "logits/rejected": -2.623875379562378, "logps/chosen": -465.4889221191406, "logps/rejected": -381.1214294433594, "loss": 0.6134, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.051485061645508, "rewards/margins": 0.6189576387405396, "rewards/rejected": -2.670442581176758, "step": 20190 }, { "epoch": 2.61, "learning_rate": 7.26307736444487e-08, "logits/chosen": -2.7259175777435303, "logits/rejected": -2.6344716548919678, "logps/chosen": -532.7291870117188, "logps/rejected": -414.81744384765625, "loss": 0.5292, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.120603084564209, "rewards/margins": 0.9770514369010925, "rewards/rejected": -3.097654342651367, "step": 20200 }, { "epoch": 2.61, "learning_rate": 7.239169934015492e-08, "logits/chosen": -2.700655460357666, "logits/rejected": -2.641038656234741, "logps/chosen": -514.1180419921875, "logps/rejected": -430.8834533691406, "loss": 0.6418, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.1246652603149414, "rewards/margins": 0.6024655103683472, "rewards/rejected": -2.727130889892578, "step": 20210 }, { "epoch": 2.61, "learning_rate": 7.215262503586114e-08, "logits/chosen": -2.804492950439453, "logits/rejected": -2.6198019981384277, "logps/chosen": -537.6318359375, "logps/rejected": -393.2866516113281, "loss": 0.7157, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -2.274160146713257, "rewards/margins": 0.5301461219787598, "rewards/rejected": -2.8043062686920166, "step": 20220 }, { "epoch": 2.61, "learning_rate": 7.191355073156736e-08, "logits/chosen": -2.7636189460754395, "logits/rejected": -2.7272684574127197, "logps/chosen": -534.3792724609375, "logps/rejected": -493.29803466796875, "loss": 0.637, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.3770313262939453, "rewards/margins": 0.5679275393486023, "rewards/rejected": -2.9449589252471924, "step": 20230 }, { "epoch": 2.61, "learning_rate": 7.167447642727358e-08, "logits/chosen": -2.760829448699951, "logits/rejected": -2.6184287071228027, "logps/chosen": -614.7677612304688, "logps/rejected": -436.64227294921875, "loss": 0.5638, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1845076084136963, "rewards/margins": 1.0417121648788452, "rewards/rejected": -3.226219892501831, "step": 20240 }, { "epoch": 2.61, "learning_rate": 7.143540212297981e-08, "logits/chosen": -2.7602477073669434, "logits/rejected": -2.643188238143921, "logps/chosen": -501.58770751953125, "logps/rejected": -402.80975341796875, "loss": 0.6143, "rewards/accuracies": 0.6875, "rewards/chosen": -2.138291835784912, "rewards/margins": 0.6577590703964233, "rewards/rejected": -2.796050786972046, "step": 20250 }, { "epoch": 2.62, "learning_rate": 7.119632781868606e-08, "logits/chosen": -2.815068483352661, "logits/rejected": -2.6527163982391357, "logps/chosen": -619.4699096679688, "logps/rejected": -460.52099609375, "loss": 0.5942, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.2841923236846924, "rewards/margins": 0.9882233738899231, "rewards/rejected": -3.2724156379699707, "step": 20260 }, { "epoch": 2.62, "learning_rate": 7.095725351439228e-08, "logits/chosen": -2.7488808631896973, "logits/rejected": -2.721906900405884, "logps/chosen": -448.8150329589844, "logps/rejected": -376.7615661621094, "loss": 0.5751, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2252509593963623, "rewards/margins": 0.685677170753479, "rewards/rejected": -2.9109280109405518, "step": 20270 }, { "epoch": 2.62, "learning_rate": 7.07181792100985e-08, "logits/chosen": -2.722198247909546, "logits/rejected": -2.646941900253296, "logps/chosen": -508.60540771484375, "logps/rejected": -385.99859619140625, "loss": 0.5041, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.902894377708435, "rewards/margins": 1.0519171953201294, "rewards/rejected": -2.9548115730285645, "step": 20280 }, { "epoch": 2.62, "learning_rate": 7.047910490580472e-08, "logits/chosen": -2.7232024669647217, "logits/rejected": -2.6263153553009033, "logps/chosen": -495.46258544921875, "logps/rejected": -407.38775634765625, "loss": 0.6675, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2130179405212402, "rewards/margins": 0.6360330581665039, "rewards/rejected": -2.849050998687744, "step": 20290 }, { "epoch": 2.62, "learning_rate": 7.024003060151095e-08, "logits/chosen": -2.727682590484619, "logits/rejected": -2.6821200847625732, "logps/chosen": -508.8401794433594, "logps/rejected": -455.791748046875, "loss": 0.4901, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.9955686330795288, "rewards/margins": 0.9041069149971008, "rewards/rejected": -2.8996756076812744, "step": 20300 }, { "epoch": 2.62, "learning_rate": 7.000095629721717e-08, "logits/chosen": -2.684757709503174, "logits/rejected": -2.5143697261810303, "logps/chosen": -647.7650146484375, "logps/rejected": -428.1724548339844, "loss": 0.5171, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1806938648223877, "rewards/margins": 1.0329792499542236, "rewards/rejected": -3.213672637939453, "step": 20310 }, { "epoch": 2.62, "learning_rate": 6.976188199292339e-08, "logits/chosen": -2.8484177589416504, "logits/rejected": -2.7500364780426025, "logps/chosen": -529.6068115234375, "logps/rejected": -420.1022033691406, "loss": 0.556, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0285305976867676, "rewards/margins": 0.8668271899223328, "rewards/rejected": -2.895357847213745, "step": 20320 }, { "epoch": 2.62, "learning_rate": 6.952280768862963e-08, "logits/chosen": -2.6864638328552246, "logits/rejected": -2.6645941734313965, "logps/chosen": -490.4966735839844, "logps/rejected": -422.13409423828125, "loss": 0.5734, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.3945584297180176, "rewards/margins": 0.7759562134742737, "rewards/rejected": -3.1705145835876465, "step": 20330 }, { "epoch": 2.63, "learning_rate": 6.928373338433585e-08, "logits/chosen": -2.888444185256958, "logits/rejected": -2.7429699897766113, "logps/chosen": -562.9955444335938, "logps/rejected": -429.9044494628906, "loss": 0.5452, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1201624870300293, "rewards/margins": 0.9566506147384644, "rewards/rejected": -3.076813220977783, "step": 20340 }, { "epoch": 2.63, "learning_rate": 6.904465908004207e-08, "logits/chosen": -2.773791790008545, "logits/rejected": -2.6764683723449707, "logps/chosen": -489.1263732910156, "logps/rejected": -456.19329833984375, "loss": 0.5499, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.942823052406311, "rewards/margins": 0.7871388792991638, "rewards/rejected": -2.72996187210083, "step": 20350 }, { "epoch": 2.63, "learning_rate": 6.88055847757483e-08, "logits/chosen": -2.7890517711639404, "logits/rejected": -2.663877010345459, "logps/chosen": -493.3934631347656, "logps/rejected": -364.1424865722656, "loss": 0.5364, "rewards/accuracies": 0.6875, "rewards/chosen": -2.239917278289795, "rewards/margins": 0.971224308013916, "rewards/rejected": -3.211141586303711, "step": 20360 }, { "epoch": 2.63, "learning_rate": 6.856651047145452e-08, "logits/chosen": -2.8548569679260254, "logits/rejected": -2.7869200706481934, "logps/chosen": -516.2193603515625, "logps/rejected": -408.3298034667969, "loss": 0.5257, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.087280750274658, "rewards/margins": 0.8881365656852722, "rewards/rejected": -2.975417375564575, "step": 20370 }, { "epoch": 2.63, "learning_rate": 6.832743616716074e-08, "logits/chosen": -2.7504944801330566, "logits/rejected": -2.6322755813598633, "logps/chosen": -566.8134765625, "logps/rejected": -375.1492919921875, "loss": 0.5403, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.166255474090576, "rewards/margins": 0.8371332287788391, "rewards/rejected": -3.0033891201019287, "step": 20380 }, { "epoch": 2.63, "learning_rate": 6.808836186286696e-08, "logits/chosen": -2.769230365753174, "logits/rejected": -2.6075832843780518, "logps/chosen": -610.7100830078125, "logps/rejected": -426.7613220214844, "loss": 0.6102, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.3000082969665527, "rewards/margins": 0.7729376554489136, "rewards/rejected": -3.072946071624756, "step": 20390 }, { "epoch": 2.63, "learning_rate": 6.78492875585732e-08, "logits/chosen": -2.7189364433288574, "logits/rejected": -2.5868337154388428, "logps/chosen": -579.7989501953125, "logps/rejected": -419.15753173828125, "loss": 0.5925, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.188654661178589, "rewards/margins": 0.847296416759491, "rewards/rejected": -3.0359513759613037, "step": 20400 }, { "epoch": 2.63, "learning_rate": 6.761021325427944e-08, "logits/chosen": -2.6525754928588867, "logits/rejected": -2.6224722862243652, "logps/chosen": -532.2655639648438, "logps/rejected": -430.8047790527344, "loss": 0.5393, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.5557332038879395, "rewards/margins": 0.8067455291748047, "rewards/rejected": -3.362478733062744, "step": 20410 }, { "epoch": 2.64, "learning_rate": 6.737113894998566e-08, "logits/chosen": -2.6988492012023926, "logits/rejected": -2.5917935371398926, "logps/chosen": -454.0945739746094, "logps/rejected": -394.385009765625, "loss": 0.5462, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0915050506591797, "rewards/margins": 0.8575296401977539, "rewards/rejected": -2.9490349292755127, "step": 20420 }, { "epoch": 2.64, "learning_rate": 6.713206464569188e-08, "logits/chosen": -2.7220027446746826, "logits/rejected": -2.5969901084899902, "logps/chosen": -499.585205078125, "logps/rejected": -341.70477294921875, "loss": 0.4966, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.0561680793762207, "rewards/margins": 0.9331196546554565, "rewards/rejected": -2.989287853240967, "step": 20430 }, { "epoch": 2.64, "learning_rate": 6.68929903413981e-08, "logits/chosen": -2.7208123207092285, "logits/rejected": -2.624840021133423, "logps/chosen": -531.0843505859375, "logps/rejected": -400.7716369628906, "loss": 0.5003, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1841483116149902, "rewards/margins": 1.0896903276443481, "rewards/rejected": -3.273838520050049, "step": 20440 }, { "epoch": 2.64, "learning_rate": 6.665391603710433e-08, "logits/chosen": -2.7512941360473633, "logits/rejected": -2.5949552059173584, "logps/chosen": -594.7218627929688, "logps/rejected": -447.5616760253906, "loss": 0.5321, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.255037784576416, "rewards/margins": 0.819412887096405, "rewards/rejected": -3.074450731277466, "step": 20450 }, { "epoch": 2.64, "learning_rate": 6.641484173281055e-08, "logits/chosen": -2.7977068424224854, "logits/rejected": -2.7104268074035645, "logps/chosen": -485.6470642089844, "logps/rejected": -398.79351806640625, "loss": 0.5982, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2987542152404785, "rewards/margins": 0.6853243112564087, "rewards/rejected": -2.9840781688690186, "step": 20460 }, { "epoch": 2.64, "learning_rate": 6.617576742851679e-08, "logits/chosen": -2.771998167037964, "logits/rejected": -2.6623599529266357, "logps/chosen": -640.066650390625, "logps/rejected": -464.6770935058594, "loss": 0.7164, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.3423733711242676, "rewards/margins": 0.644699215888977, "rewards/rejected": -2.987072706222534, "step": 20470 }, { "epoch": 2.64, "learning_rate": 6.593669312422301e-08, "logits/chosen": -2.813947916030884, "logits/rejected": -2.770526885986328, "logps/chosen": -534.8347778320312, "logps/rejected": -465.9535217285156, "loss": 0.5467, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2370760440826416, "rewards/margins": 0.8568041920661926, "rewards/rejected": -3.0938804149627686, "step": 20480 }, { "epoch": 2.65, "learning_rate": 6.569761881992923e-08, "logits/chosen": -2.745134115219116, "logits/rejected": -2.66699481010437, "logps/chosen": -599.834228515625, "logps/rejected": -516.6873779296875, "loss": 0.6508, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2986512184143066, "rewards/margins": 0.7460125684738159, "rewards/rejected": -3.044663667678833, "step": 20490 }, { "epoch": 2.65, "learning_rate": 6.545854451563545e-08, "logits/chosen": -2.7293286323547363, "logits/rejected": -2.695268154144287, "logps/chosen": -571.4554443359375, "logps/rejected": -522.669189453125, "loss": 0.537, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.049781560897827, "rewards/margins": 0.9354532957077026, "rewards/rejected": -2.9852347373962402, "step": 20500 }, { "epoch": 2.65, "learning_rate": 6.521947021134168e-08, "logits/chosen": -2.735625982284546, "logits/rejected": -2.5975451469421387, "logps/chosen": -615.19970703125, "logps/rejected": -470.15185546875, "loss": 0.5316, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.141540765762329, "rewards/margins": 0.8948376774787903, "rewards/rejected": -3.0363783836364746, "step": 20510 }, { "epoch": 2.65, "learning_rate": 6.49803959070479e-08, "logits/chosen": -2.7157320976257324, "logits/rejected": -2.5739083290100098, "logps/chosen": -548.6341552734375, "logps/rejected": -367.3426818847656, "loss": 0.4873, "rewards/accuracies": 0.75, "rewards/chosen": -2.0323033332824707, "rewards/margins": 1.015413522720337, "rewards/rejected": -3.0477168560028076, "step": 20520 }, { "epoch": 2.65, "learning_rate": 6.474132160275413e-08, "logits/chosen": -2.7308812141418457, "logits/rejected": -2.63739275932312, "logps/chosen": -694.671630859375, "logps/rejected": -566.5179443359375, "loss": 0.5148, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.8687002658843994, "rewards/margins": 1.1049809455871582, "rewards/rejected": -2.9736809730529785, "step": 20530 }, { "epoch": 2.65, "learning_rate": 6.450224729846036e-08, "logits/chosen": -2.8078999519348145, "logits/rejected": -2.693776845932007, "logps/chosen": -543.9520874023438, "logps/rejected": -466.0179138183594, "loss": 0.5696, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9597079753875732, "rewards/margins": 0.9298309087753296, "rewards/rejected": -2.8895392417907715, "step": 20540 }, { "epoch": 2.65, "learning_rate": 6.426317299416659e-08, "logits/chosen": -2.8796699047088623, "logits/rejected": -2.737924337387085, "logps/chosen": -542.8382568359375, "logps/rejected": -416.4075622558594, "loss": 0.5607, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.028597354888916, "rewards/margins": 0.9532394409179688, "rewards/rejected": -2.981837034225464, "step": 20550 }, { "epoch": 2.65, "learning_rate": 6.402409868987282e-08, "logits/chosen": -2.622140645980835, "logits/rejected": -2.535338878631592, "logps/chosen": -575.5020751953125, "logps/rejected": -438.55389404296875, "loss": 0.5165, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.140826940536499, "rewards/margins": 0.9103878140449524, "rewards/rejected": -3.0512146949768066, "step": 20560 }, { "epoch": 2.66, "learning_rate": 6.378502438557904e-08, "logits/chosen": -2.665372610092163, "logits/rejected": -2.506955623626709, "logps/chosen": -590.3330078125, "logps/rejected": -454.50872802734375, "loss": 0.5194, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0325348377227783, "rewards/margins": 1.1108478307724, "rewards/rejected": -3.1433825492858887, "step": 20570 }, { "epoch": 2.66, "learning_rate": 6.354595008128526e-08, "logits/chosen": -2.7548837661743164, "logits/rejected": -2.598842144012451, "logps/chosen": -586.8526611328125, "logps/rejected": -487.8570251464844, "loss": 0.5192, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.2314906120300293, "rewards/margins": 0.7797114849090576, "rewards/rejected": -3.011202096939087, "step": 20580 }, { "epoch": 2.66, "learning_rate": 6.330687577699148e-08, "logits/chosen": -2.7632052898406982, "logits/rejected": -2.612215280532837, "logps/chosen": -598.7017822265625, "logps/rejected": -431.6189880371094, "loss": 0.4484, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9527270793914795, "rewards/margins": 1.087573766708374, "rewards/rejected": -3.0403008460998535, "step": 20590 }, { "epoch": 2.66, "learning_rate": 6.306780147269772e-08, "logits/chosen": -2.695269823074341, "logits/rejected": -2.5729007720947266, "logps/chosen": -534.4168701171875, "logps/rejected": -432.3318786621094, "loss": 0.591, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0979270935058594, "rewards/margins": 0.694442629814148, "rewards/rejected": -2.792369842529297, "step": 20600 }, { "epoch": 2.66, "learning_rate": 6.282872716840394e-08, "logits/chosen": -2.6328928470611572, "logits/rejected": -2.703294515609741, "logps/chosen": -444.57470703125, "logps/rejected": -462.90753173828125, "loss": 0.6414, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1211209297180176, "rewards/margins": 0.6551491022109985, "rewards/rejected": -2.7762703895568848, "step": 20610 }, { "epoch": 2.66, "learning_rate": 6.258965286411017e-08, "logits/chosen": -2.7879927158355713, "logits/rejected": -2.7438414096832275, "logps/chosen": -640.6727294921875, "logps/rejected": -526.5389404296875, "loss": 0.5229, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0222959518432617, "rewards/margins": 1.052459478378296, "rewards/rejected": -3.0747554302215576, "step": 20620 }, { "epoch": 2.66, "learning_rate": 6.235057855981639e-08, "logits/chosen": -2.746555805206299, "logits/rejected": -2.674712657928467, "logps/chosen": -502.89178466796875, "logps/rejected": -463.76336669921875, "loss": 0.5376, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.2195980548858643, "rewards/margins": 0.9538418054580688, "rewards/rejected": -3.1734399795532227, "step": 20630 }, { "epoch": 2.66, "learning_rate": 6.211150425552261e-08, "logits/chosen": -2.725111961364746, "logits/rejected": -2.6357028484344482, "logps/chosen": -482.3943786621094, "logps/rejected": -387.34423828125, "loss": 0.5599, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0108370780944824, "rewards/margins": 0.9659849405288696, "rewards/rejected": -2.9768221378326416, "step": 20640 }, { "epoch": 2.67, "learning_rate": 6.187242995122885e-08, "logits/chosen": -2.7087655067443848, "logits/rejected": -2.66461443901062, "logps/chosen": -633.8018798828125, "logps/rejected": -543.4142456054688, "loss": 0.7354, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -2.4222702980041504, "rewards/margins": 0.673761248588562, "rewards/rejected": -3.096031665802002, "step": 20650 }, { "epoch": 2.67, "learning_rate": 6.163335564693507e-08, "logits/chosen": -2.673532009124756, "logits/rejected": -2.6470654010772705, "logps/chosen": -442.94915771484375, "logps/rejected": -385.58392333984375, "loss": 0.5545, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0573153495788574, "rewards/margins": 0.8127815127372742, "rewards/rejected": -2.8700966835021973, "step": 20660 }, { "epoch": 2.67, "learning_rate": 6.139428134264129e-08, "logits/chosen": -2.7198259830474854, "logits/rejected": -2.639678955078125, "logps/chosen": -503.0572204589844, "logps/rejected": -455.56146240234375, "loss": 0.6256, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.3407952785491943, "rewards/margins": 0.7527520060539246, "rewards/rejected": -3.0935473442077637, "step": 20670 }, { "epoch": 2.67, "learning_rate": 6.115520703834751e-08, "logits/chosen": -2.737208604812622, "logits/rejected": -2.5758800506591797, "logps/chosen": -646.6809692382812, "logps/rejected": -482.4691467285156, "loss": 0.5306, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2277088165283203, "rewards/margins": 0.9166023135185242, "rewards/rejected": -3.14431095123291, "step": 20680 }, { "epoch": 2.67, "learning_rate": 6.091613273405374e-08, "logits/chosen": -2.788022994995117, "logits/rejected": -2.681267023086548, "logps/chosen": -578.447998046875, "logps/rejected": -442.546875, "loss": 0.5384, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2716500759124756, "rewards/margins": 1.0091962814331055, "rewards/rejected": -3.280846357345581, "step": 20690 }, { "epoch": 2.67, "learning_rate": 6.067705842975997e-08, "logits/chosen": -2.721733570098877, "logits/rejected": -2.65287184715271, "logps/chosen": -514.4205322265625, "logps/rejected": -415.262451171875, "loss": 0.4622, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.1489052772521973, "rewards/margins": 1.1582605838775635, "rewards/rejected": -3.307166337966919, "step": 20700 }, { "epoch": 2.67, "learning_rate": 6.04379841254662e-08, "logits/chosen": -2.7277352809906006, "logits/rejected": -2.584728956222534, "logps/chosen": -568.3204345703125, "logps/rejected": -432.6529235839844, "loss": 0.4409, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.9341728687286377, "rewards/margins": 1.085858941078186, "rewards/rejected": -3.020031690597534, "step": 20710 }, { "epoch": 2.67, "learning_rate": 6.019890982117242e-08, "logits/chosen": -2.709693193435669, "logits/rejected": -2.601060628890991, "logps/chosen": -607.7329711914062, "logps/rejected": -458.6553649902344, "loss": 0.5841, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2203707695007324, "rewards/margins": 0.9767897725105286, "rewards/rejected": -3.197160482406616, "step": 20720 }, { "epoch": 2.68, "learning_rate": 5.995983551687864e-08, "logits/chosen": -2.744611978530884, "logits/rejected": -2.7213430404663086, "logps/chosen": -508.63800048828125, "logps/rejected": -448.96875, "loss": 0.6691, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.373175859451294, "rewards/margins": 0.7623414993286133, "rewards/rejected": -3.1355173587799072, "step": 20730 }, { "epoch": 2.68, "learning_rate": 5.972076121258486e-08, "logits/chosen": -2.8121278285980225, "logits/rejected": -2.630261182785034, "logps/chosen": -583.5321044921875, "logps/rejected": -393.4314270019531, "loss": 0.5152, "rewards/accuracies": 0.75, "rewards/chosen": -2.034723997116089, "rewards/margins": 0.9728208780288696, "rewards/rejected": -3.007544755935669, "step": 20740 }, { "epoch": 2.68, "learning_rate": 5.948168690829109e-08, "logits/chosen": -2.813143253326416, "logits/rejected": -2.6987690925598145, "logps/chosen": -533.2462768554688, "logps/rejected": -446.8487243652344, "loss": 0.5315, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0787737369537354, "rewards/margins": 0.7918688654899597, "rewards/rejected": -2.87064266204834, "step": 20750 }, { "epoch": 2.68, "learning_rate": 5.924261260399732e-08, "logits/chosen": -2.846106767654419, "logits/rejected": -2.6263794898986816, "logps/chosen": -589.1510620117188, "logps/rejected": -406.37835693359375, "loss": 0.5255, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1219074726104736, "rewards/margins": 1.0264341831207275, "rewards/rejected": -3.148341655731201, "step": 20760 }, { "epoch": 2.68, "learning_rate": 5.9003538299703545e-08, "logits/chosen": -2.7400481700897217, "logits/rejected": -2.588573694229126, "logps/chosen": -489.82550048828125, "logps/rejected": -413.10394287109375, "loss": 0.4999, "rewards/accuracies": 0.75, "rewards/chosen": -1.9810489416122437, "rewards/margins": 0.9668706655502319, "rewards/rejected": -2.9479196071624756, "step": 20770 }, { "epoch": 2.68, "learning_rate": 5.876446399540977e-08, "logits/chosen": -2.675541639328003, "logits/rejected": -2.6199092864990234, "logps/chosen": -608.2750854492188, "logps/rejected": -506.7250061035156, "loss": 0.6099, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1981558799743652, "rewards/margins": 0.7831908464431763, "rewards/rejected": -2.981346845626831, "step": 20780 }, { "epoch": 2.68, "learning_rate": 5.8525389691116e-08, "logits/chosen": -2.6780617237091064, "logits/rejected": -2.598933696746826, "logps/chosen": -601.1307373046875, "logps/rejected": -464.4645080566406, "loss": 0.5259, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2667903900146484, "rewards/margins": 1.0080795288085938, "rewards/rejected": -3.274869918823242, "step": 20790 }, { "epoch": 2.69, "learning_rate": 5.828631538682222e-08, "logits/chosen": -2.796125888824463, "logits/rejected": -2.6495745182037354, "logps/chosen": -542.8046264648438, "logps/rejected": -404.8467712402344, "loss": 0.4915, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0429880619049072, "rewards/margins": 0.986619770526886, "rewards/rejected": -3.0296082496643066, "step": 20800 }, { "epoch": 2.69, "learning_rate": 5.804724108252845e-08, "logits/chosen": -2.7537131309509277, "logits/rejected": -2.631397008895874, "logps/chosen": -573.7894287109375, "logps/rejected": -462.89324951171875, "loss": 0.5859, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0336239337921143, "rewards/margins": 0.8789612650871277, "rewards/rejected": -2.9125852584838867, "step": 20810 }, { "epoch": 2.69, "learning_rate": 5.780816677823467e-08, "logits/chosen": -2.812983751296997, "logits/rejected": -2.7151684761047363, "logps/chosen": -501.3306579589844, "logps/rejected": -422.557373046875, "loss": 0.5447, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1311607360839844, "rewards/margins": 0.879931628704071, "rewards/rejected": -3.011091947555542, "step": 20820 }, { "epoch": 2.69, "learning_rate": 5.75690924739409e-08, "logits/chosen": -2.745471477508545, "logits/rejected": -2.660393476486206, "logps/chosen": -496.7964782714844, "logps/rejected": -375.52508544921875, "loss": 0.6427, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2263922691345215, "rewards/margins": 0.6109119653701782, "rewards/rejected": -2.837303876876831, "step": 20830 }, { "epoch": 2.69, "learning_rate": 5.7330018169647124e-08, "logits/chosen": -2.845616579055786, "logits/rejected": -2.722299814224243, "logps/chosen": -584.4522705078125, "logps/rejected": -454.0224609375, "loss": 0.4282, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9734386205673218, "rewards/margins": 1.274522066116333, "rewards/rejected": -3.2479605674743652, "step": 20840 }, { "epoch": 2.69, "learning_rate": 5.7090943865353347e-08, "logits/chosen": -2.8134350776672363, "logits/rejected": -2.634732723236084, "logps/chosen": -650.6622924804688, "logps/rejected": -499.1244201660156, "loss": 0.5527, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0475645065307617, "rewards/margins": 0.8979727029800415, "rewards/rejected": -2.9455370903015137, "step": 20850 }, { "epoch": 2.69, "learning_rate": 5.6851869561059576e-08, "logits/chosen": -2.9011168479919434, "logits/rejected": -2.7518956661224365, "logps/chosen": -539.8963012695312, "logps/rejected": -423.0028381347656, "loss": 0.6598, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2973456382751465, "rewards/margins": 0.6912959814071655, "rewards/rejected": -2.9886417388916016, "step": 20860 }, { "epoch": 2.69, "learning_rate": 5.66127952567658e-08, "logits/chosen": -2.790095567703247, "logits/rejected": -2.723001480102539, "logps/chosen": -574.6844482421875, "logps/rejected": -531.5391845703125, "loss": 0.5653, "rewards/accuracies": 0.75, "rewards/chosen": -2.139416217803955, "rewards/margins": 1.0182627439498901, "rewards/rejected": -3.1576790809631348, "step": 20870 }, { "epoch": 2.7, "learning_rate": 5.637372095247202e-08, "logits/chosen": -2.7388195991516113, "logits/rejected": -2.6551918983459473, "logps/chosen": -477.3580017089844, "logps/rejected": -398.4288635253906, "loss": 0.6074, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2073702812194824, "rewards/margins": 0.6763095855712891, "rewards/rejected": -2.8836798667907715, "step": 20880 }, { "epoch": 2.7, "learning_rate": 5.613464664817825e-08, "logits/chosen": -2.7465245723724365, "logits/rejected": -2.682796001434326, "logps/chosen": -645.2154541015625, "logps/rejected": -475.0125427246094, "loss": 0.5756, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2260959148406982, "rewards/margins": 0.8222591280937195, "rewards/rejected": -3.0483548641204834, "step": 20890 }, { "epoch": 2.7, "learning_rate": 5.589557234388448e-08, "logits/chosen": -2.840226173400879, "logits/rejected": -2.728069543838501, "logps/chosen": -630.8211669921875, "logps/rejected": -458.32354736328125, "loss": 0.5474, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1483569145202637, "rewards/margins": 0.8485687971115112, "rewards/rejected": -2.9969255924224854, "step": 20900 }, { "epoch": 2.7, "learning_rate": 5.56564980395907e-08, "logits/chosen": -2.7356324195861816, "logits/rejected": -2.623387336730957, "logps/chosen": -591.02392578125, "logps/rejected": -430.11248779296875, "loss": 0.4936, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.199228286743164, "rewards/margins": 0.9797752499580383, "rewards/rejected": -3.1790032386779785, "step": 20910 }, { "epoch": 2.7, "learning_rate": 5.5417423735296925e-08, "logits/chosen": -2.8708131313323975, "logits/rejected": -2.732114553451538, "logps/chosen": -527.9807739257812, "logps/rejected": -378.78009033203125, "loss": 0.5956, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2249886989593506, "rewards/margins": 0.7871176600456238, "rewards/rejected": -3.012106418609619, "step": 20920 }, { "epoch": 2.7, "learning_rate": 5.5178349431003155e-08, "logits/chosen": -2.785789966583252, "logits/rejected": -2.6524062156677246, "logps/chosen": -557.2738647460938, "logps/rejected": -463.484375, "loss": 0.5591, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1416049003601074, "rewards/margins": 0.8824504613876343, "rewards/rejected": -3.024055242538452, "step": 20930 }, { "epoch": 2.7, "learning_rate": 5.493927512670938e-08, "logits/chosen": -2.7724008560180664, "logits/rejected": -2.7145979404449463, "logps/chosen": -493.4187927246094, "logps/rejected": -442.60675048828125, "loss": 0.5571, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1085867881774902, "rewards/margins": 0.8615533113479614, "rewards/rejected": -2.9701404571533203, "step": 20940 }, { "epoch": 2.7, "learning_rate": 5.47002008224156e-08, "logits/chosen": -2.675297260284424, "logits/rejected": -2.6415131092071533, "logps/chosen": -558.4443359375, "logps/rejected": -461.90753173828125, "loss": 0.6467, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.177791118621826, "rewards/margins": 0.6180279850959778, "rewards/rejected": -2.79581880569458, "step": 20950 }, { "epoch": 2.71, "learning_rate": 5.4461126518121836e-08, "logits/chosen": -2.6978507041931152, "logits/rejected": -2.7101266384124756, "logps/chosen": -532.1504516601562, "logps/rejected": -453.55596923828125, "loss": 0.6274, "rewards/accuracies": 0.6875, "rewards/chosen": -2.017136812210083, "rewards/margins": 0.7494871020317078, "rewards/rejected": -2.7666239738464355, "step": 20960 }, { "epoch": 2.71, "learning_rate": 5.422205221382806e-08, "logits/chosen": -2.6606853008270264, "logits/rejected": -2.5534942150115967, "logps/chosen": -548.7764282226562, "logps/rejected": -419.83685302734375, "loss": 0.5394, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9725275039672852, "rewards/margins": 0.9792994260787964, "rewards/rejected": -2.951827049255371, "step": 20970 }, { "epoch": 2.71, "learning_rate": 5.398297790953428e-08, "logits/chosen": -2.6665053367614746, "logits/rejected": -2.579775810241699, "logps/chosen": -621.43896484375, "logps/rejected": -517.3624877929688, "loss": 0.5686, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.9856140613555908, "rewards/margins": 0.8391572833061218, "rewards/rejected": -2.8247714042663574, "step": 20980 }, { "epoch": 2.71, "learning_rate": 5.3743903605240504e-08, "logits/chosen": -2.6443111896514893, "logits/rejected": -2.5398664474487305, "logps/chosen": -592.0619506835938, "logps/rejected": -436.65301513671875, "loss": 0.5359, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.2007243633270264, "rewards/margins": 0.8338476419448853, "rewards/rejected": -3.034572124481201, "step": 20990 }, { "epoch": 2.71, "learning_rate": 5.350482930094673e-08, "logits/chosen": -2.7433571815490723, "logits/rejected": -2.607135772705078, "logps/chosen": -592.9069213867188, "logps/rejected": -424.14532470703125, "loss": 0.5467, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2487146854400635, "rewards/margins": 0.7909637093544006, "rewards/rejected": -3.0396780967712402, "step": 21000 }, { "epoch": 2.71, "eval_logits/chosen": -3.095684051513672, "eval_logits/rejected": -3.0487513542175293, "eval_logps/chosen": -541.3981323242188, "eval_logps/rejected": -421.7609558105469, "eval_loss": 0.6156454086303711, "eval_rewards/accuracies": 0.6779999732971191, "eval_rewards/chosen": -0.9578377604484558, "eval_rewards/margins": 1.1253948211669922, "eval_rewards/rejected": -2.0832326412200928, "eval_runtime": 287.7925, "eval_samples_per_second": 6.949, "eval_steps_per_second": 3.475, "step": 21000 }, { "epoch": 2.71, "learning_rate": 5.3265754996652956e-08, "logits/chosen": -2.6672141551971436, "logits/rejected": -2.6782822608947754, "logps/chosen": -428.49566650390625, "logps/rejected": -387.0015869140625, "loss": 0.5878, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1474251747131348, "rewards/margins": 0.7178341746330261, "rewards/rejected": -2.8652591705322266, "step": 21010 }, { "epoch": 2.71, "learning_rate": 5.302668069235918e-08, "logits/chosen": -2.753941774368286, "logits/rejected": -2.6817872524261475, "logps/chosen": -608.6551513671875, "logps/rejected": -458.0011291503906, "loss": 0.4834, "rewards/accuracies": 0.75, "rewards/chosen": -2.1836514472961426, "rewards/margins": 1.0523579120635986, "rewards/rejected": -3.236009120941162, "step": 21020 }, { "epoch": 2.71, "learning_rate": 5.2787606388065415e-08, "logits/chosen": -2.756244659423828, "logits/rejected": -2.68424129486084, "logps/chosen": -519.7282104492188, "logps/rejected": -469.91937255859375, "loss": 0.6412, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.409245014190674, "rewards/margins": 0.7120423316955566, "rewards/rejected": -3.1212871074676514, "step": 21030 }, { "epoch": 2.72, "learning_rate": 5.254853208377164e-08, "logits/chosen": -2.713024854660034, "logits/rejected": -2.5437207221984863, "logps/chosen": -572.6878051757812, "logps/rejected": -456.28570556640625, "loss": 0.5789, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0962300300598145, "rewards/margins": 0.7682141065597534, "rewards/rejected": -2.8644440174102783, "step": 21040 }, { "epoch": 2.72, "learning_rate": 5.230945777947786e-08, "logits/chosen": -2.775848150253296, "logits/rejected": -2.6453163623809814, "logps/chosen": -610.787353515625, "logps/rejected": -444.56201171875, "loss": 0.6178, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.254392623901367, "rewards/margins": 0.7245239019393921, "rewards/rejected": -2.9789161682128906, "step": 21050 }, { "epoch": 2.72, "learning_rate": 5.207038347518408e-08, "logits/chosen": -2.6444830894470215, "logits/rejected": -2.5414817333221436, "logps/chosen": -539.5587768554688, "logps/rejected": -407.4660949707031, "loss": 0.5238, "rewards/accuracies": 0.75, "rewards/chosen": -2.0621442794799805, "rewards/margins": 1.0733845233917236, "rewards/rejected": -3.135528564453125, "step": 21060 }, { "epoch": 2.72, "learning_rate": 5.183130917089031e-08, "logits/chosen": -2.705252170562744, "logits/rejected": -2.6840975284576416, "logps/chosen": -514.2559204101562, "logps/rejected": -437.455322265625, "loss": 0.5664, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.128438711166382, "rewards/margins": 0.8526857495307922, "rewards/rejected": -2.9811244010925293, "step": 21070 }, { "epoch": 2.72, "learning_rate": 5.1592234866596535e-08, "logits/chosen": -2.732753276824951, "logits/rejected": -2.655397891998291, "logps/chosen": -598.7138061523438, "logps/rejected": -441.22979736328125, "loss": 0.4961, "rewards/accuracies": 0.75, "rewards/chosen": -2.139415979385376, "rewards/margins": 1.0830332040786743, "rewards/rejected": -3.222449541091919, "step": 21080 }, { "epoch": 2.72, "learning_rate": 5.135316056230276e-08, "logits/chosen": -2.6835689544677734, "logits/rejected": -2.613673448562622, "logps/chosen": -588.1923217773438, "logps/rejected": -468.261962890625, "loss": 0.4312, "rewards/accuracies": 0.8125, "rewards/chosen": -2.241825580596924, "rewards/margins": 1.2878003120422363, "rewards/rejected": -3.529625654220581, "step": 21090 }, { "epoch": 2.72, "learning_rate": 5.111408625800899e-08, "logits/chosen": -2.754114866256714, "logits/rejected": -2.6497366428375244, "logps/chosen": -571.2352294921875, "logps/rejected": -456.37249755859375, "loss": 0.5938, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.241293430328369, "rewards/margins": 0.9558942914009094, "rewards/rejected": -3.197187900543213, "step": 21100 }, { "epoch": 2.73, "learning_rate": 5.0875011953715216e-08, "logits/chosen": -2.79463529586792, "logits/rejected": -2.7496774196624756, "logps/chosen": -545.8467407226562, "logps/rejected": -424.03375244140625, "loss": 0.5473, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.8634954690933228, "rewards/margins": 0.9445453882217407, "rewards/rejected": -2.8080408573150635, "step": 21110 }, { "epoch": 2.73, "learning_rate": 5.063593764942144e-08, "logits/chosen": -2.7113919258117676, "logits/rejected": -2.5412800312042236, "logps/chosen": -663.9293212890625, "logps/rejected": -470.56817626953125, "loss": 0.6248, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.3067891597747803, "rewards/margins": 0.8619557619094849, "rewards/rejected": -3.1687445640563965, "step": 21120 }, { "epoch": 2.73, "learning_rate": 5.039686334512766e-08, "logits/chosen": -2.781095027923584, "logits/rejected": -2.6366777420043945, "logps/chosen": -555.8685913085938, "logps/rejected": -407.17236328125, "loss": 0.5808, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0187220573425293, "rewards/margins": 0.8545681238174438, "rewards/rejected": -2.8732900619506836, "step": 21130 }, { "epoch": 2.73, "learning_rate": 5.015778904083389e-08, "logits/chosen": -2.6885335445404053, "logits/rejected": -2.5691885948181152, "logps/chosen": -601.1829833984375, "logps/rejected": -488.587158203125, "loss": 0.5307, "rewards/accuracies": 0.75, "rewards/chosen": -2.21187686920166, "rewards/margins": 0.9658806920051575, "rewards/rejected": -3.177757501602173, "step": 21140 }, { "epoch": 2.73, "learning_rate": 4.991871473654011e-08, "logits/chosen": -2.7249927520751953, "logits/rejected": -2.6745083332061768, "logps/chosen": -623.9024658203125, "logps/rejected": -500.51104736328125, "loss": 0.4495, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.0785598754882812, "rewards/margins": 1.137793779373169, "rewards/rejected": -3.2163538932800293, "step": 21150 }, { "epoch": 2.73, "learning_rate": 4.9679640432246336e-08, "logits/chosen": -2.687591552734375, "logits/rejected": -2.609360694885254, "logps/chosen": -585.6786499023438, "logps/rejected": -478.0289611816406, "loss": 0.5168, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.0133249759674072, "rewards/margins": 1.0080913305282593, "rewards/rejected": -3.021416425704956, "step": 21160 }, { "epoch": 2.73, "learning_rate": 4.9440566127952565e-08, "logits/chosen": -2.759387254714966, "logits/rejected": -2.6928725242614746, "logps/chosen": -509.46331787109375, "logps/rejected": -434.76788330078125, "loss": 0.5522, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9124748706817627, "rewards/margins": 1.0160701274871826, "rewards/rejected": -2.928544759750366, "step": 21170 }, { "epoch": 2.73, "learning_rate": 4.9201491823658794e-08, "logits/chosen": -2.7640252113342285, "logits/rejected": -2.6503782272338867, "logps/chosen": -508.5474548339844, "logps/rejected": -380.4116516113281, "loss": 0.7044, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.3729958534240723, "rewards/margins": 0.574427604675293, "rewards/rejected": -2.9474234580993652, "step": 21180 }, { "epoch": 2.74, "learning_rate": 4.896241751936502e-08, "logits/chosen": -2.802640438079834, "logits/rejected": -2.6859021186828613, "logps/chosen": -713.1144409179688, "logps/rejected": -516.4920654296875, "loss": 0.5238, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.3710460662841797, "rewards/margins": 1.0251394510269165, "rewards/rejected": -3.3961849212646484, "step": 21190 }, { "epoch": 2.74, "learning_rate": 4.872334321507124e-08, "logits/chosen": -2.845740795135498, "logits/rejected": -2.7168726921081543, "logps/chosen": -547.513671875, "logps/rejected": -424.12823486328125, "loss": 0.6296, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.209787368774414, "rewards/margins": 0.7491225004196167, "rewards/rejected": -2.958909511566162, "step": 21200 }, { "epoch": 2.74, "learning_rate": 4.848426891077747e-08, "logits/chosen": -2.734006881713867, "logits/rejected": -2.63006591796875, "logps/chosen": -519.8920288085938, "logps/rejected": -440.6629943847656, "loss": 0.499, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.9943230152130127, "rewards/margins": 1.0121889114379883, "rewards/rejected": -3.006511688232422, "step": 21210 }, { "epoch": 2.74, "learning_rate": 4.824519460648369e-08, "logits/chosen": -2.7330334186553955, "logits/rejected": -2.590709686279297, "logps/chosen": -501.5227966308594, "logps/rejected": -373.5036315917969, "loss": 0.6081, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.3338727951049805, "rewards/margins": 0.8353793025016785, "rewards/rejected": -3.1692521572113037, "step": 21220 }, { "epoch": 2.74, "learning_rate": 4.8006120302189915e-08, "logits/chosen": -2.69099760055542, "logits/rejected": -2.635894536972046, "logps/chosen": -559.5933227539062, "logps/rejected": -397.25604248046875, "loss": 0.6968, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.2548766136169434, "rewards/margins": 0.7350447177886963, "rewards/rejected": -2.9899210929870605, "step": 21230 }, { "epoch": 2.74, "learning_rate": 4.7767045997896144e-08, "logits/chosen": -2.7682106494903564, "logits/rejected": -2.6486504077911377, "logps/chosen": -611.7486572265625, "logps/rejected": -503.5856018066406, "loss": 0.5106, "rewards/accuracies": 0.75, "rewards/chosen": -2.0679476261138916, "rewards/margins": 1.025924563407898, "rewards/rejected": -3.093872308731079, "step": 21240 }, { "epoch": 2.74, "learning_rate": 4.752797169360237e-08, "logits/chosen": -2.667440176010132, "logits/rejected": -2.635460615158081, "logps/chosen": -470.9149475097656, "logps/rejected": -420.7250061035156, "loss": 0.5438, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0345518589019775, "rewards/margins": 0.7702971696853638, "rewards/rejected": -2.8048486709594727, "step": 21250 }, { "epoch": 2.74, "learning_rate": 4.7288897389308596e-08, "logits/chosen": -2.723443031311035, "logits/rejected": -2.6027002334594727, "logps/chosen": -593.1102294921875, "logps/rejected": -506.51806640625, "loss": 0.4478, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.155189037322998, "rewards/margins": 1.1691124439239502, "rewards/rejected": -3.324301242828369, "step": 21260 }, { "epoch": 2.75, "learning_rate": 4.7049823085014825e-08, "logits/chosen": -2.7489914894104004, "logits/rejected": -2.5960583686828613, "logps/chosen": -555.0753784179688, "logps/rejected": -439.23712158203125, "loss": 0.43, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.0950992107391357, "rewards/margins": 1.2190052270889282, "rewards/rejected": -3.3141040802001953, "step": 21270 }, { "epoch": 2.75, "learning_rate": 4.681074878072105e-08, "logits/chosen": -2.735377073287964, "logits/rejected": -2.6961379051208496, "logps/chosen": -458.9764709472656, "logps/rejected": -400.32781982421875, "loss": 0.582, "rewards/accuracies": 0.75, "rewards/chosen": -2.0804543495178223, "rewards/margins": 0.8378036618232727, "rewards/rejected": -2.9182581901550293, "step": 21280 }, { "epoch": 2.75, "learning_rate": 4.657167447642727e-08, "logits/chosen": -2.7689261436462402, "logits/rejected": -2.6311135292053223, "logps/chosen": -568.8585815429688, "logps/rejected": -456.680908203125, "loss": 0.5224, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.96181321144104, "rewards/margins": 0.905215859413147, "rewards/rejected": -2.8670291900634766, "step": 21290 }, { "epoch": 2.75, "learning_rate": 4.633260017213349e-08, "logits/chosen": -2.7341055870056152, "logits/rejected": -2.663471221923828, "logps/chosen": -509.9290466308594, "logps/rejected": -406.6053466796875, "loss": 0.4875, "rewards/accuracies": 0.75, "rewards/chosen": -2.0399372577667236, "rewards/margins": 0.8800414800643921, "rewards/rejected": -2.919978618621826, "step": 21300 }, { "epoch": 2.75, "learning_rate": 4.609352586783972e-08, "logits/chosen": -2.6847586631774902, "logits/rejected": -2.592578172683716, "logps/chosen": -583.33935546875, "logps/rejected": -398.4449768066406, "loss": 0.5172, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9561344385147095, "rewards/margins": 0.9285122156143188, "rewards/rejected": -2.8846466541290283, "step": 21310 }, { "epoch": 2.75, "learning_rate": 4.5854451563545945e-08, "logits/chosen": -2.8738579750061035, "logits/rejected": -2.757110595703125, "logps/chosen": -643.6915283203125, "logps/rejected": -450.02056884765625, "loss": 0.684, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2740657329559326, "rewards/margins": 0.7456506490707397, "rewards/rejected": -3.019716501235962, "step": 21320 }, { "epoch": 2.75, "learning_rate": 4.5615377259252174e-08, "logits/chosen": -2.803234815597534, "logits/rejected": -2.6712231636047363, "logps/chosen": -584.456298828125, "logps/rejected": -468.55560302734375, "loss": 0.5611, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.130556106567383, "rewards/margins": 0.836251437664032, "rewards/rejected": -2.9668071269989014, "step": 21330 }, { "epoch": 2.75, "learning_rate": 4.5376302954958404e-08, "logits/chosen": -2.7973380088806152, "logits/rejected": -2.7191007137298584, "logps/chosen": -592.311279296875, "logps/rejected": -500.14508056640625, "loss": 0.5248, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.072017192840576, "rewards/margins": 0.9956508874893188, "rewards/rejected": -3.0676681995391846, "step": 21340 }, { "epoch": 2.76, "learning_rate": 4.5137228650664626e-08, "logits/chosen": -2.8400356769561768, "logits/rejected": -2.7942287921905518, "logps/chosen": -491.01788330078125, "logps/rejected": -429.37640380859375, "loss": 0.523, "rewards/accuracies": 0.75, "rewards/chosen": -1.9472370147705078, "rewards/margins": 0.9626928567886353, "rewards/rejected": -2.9099299907684326, "step": 21350 }, { "epoch": 2.76, "learning_rate": 4.489815434637085e-08, "logits/chosen": -2.732499361038208, "logits/rejected": -2.58589243888855, "logps/chosen": -554.9752807617188, "logps/rejected": -397.0893859863281, "loss": 0.5706, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.2619194984436035, "rewards/margins": 0.7188488245010376, "rewards/rejected": -2.9807682037353516, "step": 21360 }, { "epoch": 2.76, "learning_rate": 4.465908004207707e-08, "logits/chosen": -2.7033393383026123, "logits/rejected": -2.605858325958252, "logps/chosen": -549.54150390625, "logps/rejected": -469.97381591796875, "loss": 0.5018, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.080881118774414, "rewards/margins": 1.0473824739456177, "rewards/rejected": -3.128263473510742, "step": 21370 }, { "epoch": 2.76, "learning_rate": 4.44200057377833e-08, "logits/chosen": -2.751629114151001, "logits/rejected": -2.623706102371216, "logps/chosen": -568.0485229492188, "logps/rejected": -498.69647216796875, "loss": 0.57, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.072007656097412, "rewards/margins": 0.6930922269821167, "rewards/rejected": -2.7651000022888184, "step": 21380 }, { "epoch": 2.76, "learning_rate": 4.4180931433489524e-08, "logits/chosen": -2.760624885559082, "logits/rejected": -2.671668291091919, "logps/chosen": -597.6299438476562, "logps/rejected": -484.2980041503906, "loss": 0.5647, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1232597827911377, "rewards/margins": 0.9237775802612305, "rewards/rejected": -3.0470376014709473, "step": 21390 }, { "epoch": 2.76, "learning_rate": 4.394185712919575e-08, "logits/chosen": -2.694864511489868, "logits/rejected": -2.609125852584839, "logps/chosen": -524.2666625976562, "logps/rejected": -446.1717834472656, "loss": 0.611, "rewards/accuracies": 0.75, "rewards/chosen": -2.417829990386963, "rewards/margins": 0.8781687021255493, "rewards/rejected": -3.2959988117218018, "step": 21400 }, { "epoch": 2.76, "learning_rate": 4.370278282490198e-08, "logits/chosen": -2.7819385528564453, "logits/rejected": -2.71964430809021, "logps/chosen": -574.4152221679688, "logps/rejected": -484.91046142578125, "loss": 0.5867, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.185746908187866, "rewards/margins": 0.6776161789894104, "rewards/rejected": -2.8633627891540527, "step": 21410 }, { "epoch": 2.77, "learning_rate": 4.3463708520608205e-08, "logits/chosen": -2.610673427581787, "logits/rejected": -2.549105167388916, "logps/chosen": -591.615478515625, "logps/rejected": -474.7015686035156, "loss": 0.6167, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.3139889240264893, "rewards/margins": 0.7721632719039917, "rewards/rejected": -3.0861525535583496, "step": 21420 }, { "epoch": 2.77, "learning_rate": 4.322463421631443e-08, "logits/chosen": -2.7614738941192627, "logits/rejected": -2.7414538860321045, "logps/chosen": -485.76519775390625, "logps/rejected": -461.458740234375, "loss": 0.635, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.447263240814209, "rewards/margins": 0.6107288599014282, "rewards/rejected": -3.0579917430877686, "step": 21430 }, { "epoch": 2.77, "learning_rate": 4.298555991202065e-08, "logits/chosen": -2.667940855026245, "logits/rejected": -2.6007308959960938, "logps/chosen": -513.6617431640625, "logps/rejected": -436.9729919433594, "loss": 0.4875, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.007754325866699, "rewards/margins": 0.906955361366272, "rewards/rejected": -2.9147098064422607, "step": 21440 }, { "epoch": 2.77, "learning_rate": 4.274648560772688e-08, "logits/chosen": -2.759840488433838, "logits/rejected": -2.648364782333374, "logps/chosen": -516.3929443359375, "logps/rejected": -409.6922302246094, "loss": 0.6224, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.432508707046509, "rewards/margins": 0.5610894560813904, "rewards/rejected": -2.993598222732544, "step": 21450 }, { "epoch": 2.77, "learning_rate": 4.25074113034331e-08, "logits/chosen": -2.7230896949768066, "logits/rejected": -2.592761754989624, "logps/chosen": -601.0286865234375, "logps/rejected": -432.390869140625, "loss": 0.6871, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.448432683944702, "rewards/margins": 0.6196225881576538, "rewards/rejected": -3.0680556297302246, "step": 21460 }, { "epoch": 2.77, "learning_rate": 4.226833699913933e-08, "logits/chosen": -2.7112374305725098, "logits/rejected": -2.6866841316223145, "logps/chosen": -498.321044921875, "logps/rejected": -452.35382080078125, "loss": 0.4819, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.2036964893341064, "rewards/margins": 1.0479838848114014, "rewards/rejected": -3.2516798973083496, "step": 21470 }, { "epoch": 2.77, "learning_rate": 4.202926269484556e-08, "logits/chosen": -2.749514579772949, "logits/rejected": -2.6539394855499268, "logps/chosen": -559.8648681640625, "logps/rejected": -457.313232421875, "loss": 0.5416, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.1152684688568115, "rewards/margins": 0.9188674688339233, "rewards/rejected": -3.0341362953186035, "step": 21480 }, { "epoch": 2.77, "learning_rate": 4.1790188390551784e-08, "logits/chosen": -2.8102245330810547, "logits/rejected": -2.614372730255127, "logps/chosen": -563.4444580078125, "logps/rejected": -384.89984130859375, "loss": 0.5288, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0544345378875732, "rewards/margins": 1.0302765369415283, "rewards/rejected": -3.0847108364105225, "step": 21490 }, { "epoch": 2.78, "learning_rate": 4.1551114086258006e-08, "logits/chosen": -2.8960914611816406, "logits/rejected": -2.7275376319885254, "logps/chosen": -472.61395263671875, "logps/rejected": -339.71905517578125, "loss": 0.5029, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.076967716217041, "rewards/margins": 0.7841293215751648, "rewards/rejected": -2.8610968589782715, "step": 21500 }, { "epoch": 2.78, "learning_rate": 4.131203978196423e-08, "logits/chosen": -2.680697441101074, "logits/rejected": -2.581803321838379, "logps/chosen": -539.7433471679688, "logps/rejected": -421.0703125, "loss": 0.599, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1264069080352783, "rewards/margins": 0.7057081460952759, "rewards/rejected": -2.8321146965026855, "step": 21510 }, { "epoch": 2.78, "learning_rate": 4.107296547767046e-08, "logits/chosen": -2.784619092941284, "logits/rejected": -2.693969964981079, "logps/chosen": -527.597900390625, "logps/rejected": -405.59417724609375, "loss": 0.413, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.0479869842529297, "rewards/margins": 1.2374045848846436, "rewards/rejected": -3.2853915691375732, "step": 21520 }, { "epoch": 2.78, "learning_rate": 4.083389117337668e-08, "logits/chosen": -2.8182201385498047, "logits/rejected": -2.767199993133545, "logps/chosen": -537.49072265625, "logps/rejected": -458.1419982910156, "loss": 0.6267, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.980830192565918, "rewards/margins": 0.7947130799293518, "rewards/rejected": -2.775543451309204, "step": 21530 }, { "epoch": 2.78, "learning_rate": 4.0594816869082904e-08, "logits/chosen": -2.7396175861358643, "logits/rejected": -2.6506357192993164, "logps/chosen": -515.2398681640625, "logps/rejected": -383.19036865234375, "loss": 0.4966, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.010211944580078, "rewards/margins": 0.9917654991149902, "rewards/rejected": -3.0019774436950684, "step": 21540 }, { "epoch": 2.78, "learning_rate": 4.035574256478914e-08, "logits/chosen": -2.8209686279296875, "logits/rejected": -2.6702942848205566, "logps/chosen": -538.61083984375, "logps/rejected": -402.03204345703125, "loss": 0.5239, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0954456329345703, "rewards/margins": 0.9372965693473816, "rewards/rejected": -3.032742738723755, "step": 21550 }, { "epoch": 2.78, "learning_rate": 4.011666826049536e-08, "logits/chosen": -2.8134636878967285, "logits/rejected": -2.6993823051452637, "logps/chosen": -571.7305908203125, "logps/rejected": -400.1029052734375, "loss": 0.5234, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.999420404434204, "rewards/margins": 0.9481990933418274, "rewards/rejected": -2.9476194381713867, "step": 21560 }, { "epoch": 2.78, "learning_rate": 3.9877593956201585e-08, "logits/chosen": -2.6991801261901855, "logits/rejected": -2.5825085639953613, "logps/chosen": -646.5841064453125, "logps/rejected": -510.2052307128906, "loss": 0.7025, "rewards/accuracies": 0.625, "rewards/chosen": -2.4458868503570557, "rewards/margins": 0.5633024573326111, "rewards/rejected": -3.0091891288757324, "step": 21570 }, { "epoch": 2.79, "learning_rate": 3.963851965190781e-08, "logits/chosen": -2.6902308464050293, "logits/rejected": -2.6870856285095215, "logps/chosen": -490.4690856933594, "logps/rejected": -401.5585021972656, "loss": 0.6786, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.162412643432617, "rewards/margins": 0.5091531872749329, "rewards/rejected": -2.671565532684326, "step": 21580 }, { "epoch": 2.79, "learning_rate": 3.939944534761404e-08, "logits/chosen": -2.643575668334961, "logits/rejected": -2.587273359298706, "logps/chosen": -493.1355895996094, "logps/rejected": -428.3147888183594, "loss": 0.5161, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.9901386499404907, "rewards/margins": 0.8933164477348328, "rewards/rejected": -2.883455276489258, "step": 21590 }, { "epoch": 2.79, "learning_rate": 3.916037104332026e-08, "logits/chosen": -2.8683290481567383, "logits/rejected": -2.6430513858795166, "logps/chosen": -557.0247802734375, "logps/rejected": -380.4261474609375, "loss": 0.5313, "rewards/accuracies": 0.75, "rewards/chosen": -2.0754783153533936, "rewards/margins": 0.8815975189208984, "rewards/rejected": -2.957075834274292, "step": 21600 }, { "epoch": 2.79, "learning_rate": 3.892129673902648e-08, "logits/chosen": -2.8653757572174072, "logits/rejected": -2.7579989433288574, "logps/chosen": -646.7388305664062, "logps/rejected": -496.6056213378906, "loss": 0.4702, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.210871934890747, "rewards/margins": 1.1028343439102173, "rewards/rejected": -3.313706159591675, "step": 21610 }, { "epoch": 2.79, "learning_rate": 3.868222243473272e-08, "logits/chosen": -2.660731077194214, "logits/rejected": -2.549304246902466, "logps/chosen": -584.2027587890625, "logps/rejected": -454.0039978027344, "loss": 0.6245, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.167471408843994, "rewards/margins": 0.7251186966896057, "rewards/rejected": -2.892589807510376, "step": 21620 }, { "epoch": 2.79, "learning_rate": 3.844314813043894e-08, "logits/chosen": -2.807565927505493, "logits/rejected": -2.6567306518554688, "logps/chosen": -591.2916259765625, "logps/rejected": -429.25421142578125, "loss": 0.5081, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.987784743309021, "rewards/margins": 0.9004994630813599, "rewards/rejected": -2.88828444480896, "step": 21630 }, { "epoch": 2.79, "learning_rate": 3.8204073826145164e-08, "logits/chosen": -2.8546230792999268, "logits/rejected": -2.7245235443115234, "logps/chosen": -553.2108154296875, "logps/rejected": -435.57257080078125, "loss": 0.5205, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.051305055618286, "rewards/margins": 0.9190382957458496, "rewards/rejected": -2.9703431129455566, "step": 21640 }, { "epoch": 2.79, "learning_rate": 3.796499952185139e-08, "logits/chosen": -2.7026290893554688, "logits/rejected": -2.622476100921631, "logps/chosen": -531.5872802734375, "logps/rejected": -405.1729736328125, "loss": 0.5863, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0756399631500244, "rewards/margins": 0.7139464616775513, "rewards/rejected": -2.789586305618286, "step": 21650 }, { "epoch": 2.8, "learning_rate": 3.7725925217557616e-08, "logits/chosen": -2.7953922748565674, "logits/rejected": -2.7000091075897217, "logps/chosen": -567.6405639648438, "logps/rejected": -405.66119384765625, "loss": 0.6545, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.228628158569336, "rewards/margins": 0.623184084892273, "rewards/rejected": -2.8518126010894775, "step": 21660 }, { "epoch": 2.8, "learning_rate": 3.748685091326384e-08, "logits/chosen": -2.730039119720459, "logits/rejected": -2.6700024604797363, "logps/chosen": -567.6444702148438, "logps/rejected": -448.27032470703125, "loss": 0.5487, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.1933062076568604, "rewards/margins": 0.8278648257255554, "rewards/rejected": -3.0211706161499023, "step": 21670 }, { "epoch": 2.8, "learning_rate": 3.724777660897006e-08, "logits/chosen": -2.68229079246521, "logits/rejected": -2.646972179412842, "logps/chosen": -579.8462524414062, "logps/rejected": -483.55975341796875, "loss": 0.5528, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1829135417938232, "rewards/margins": 0.8382862210273743, "rewards/rejected": -3.021200180053711, "step": 21680 }, { "epoch": 2.8, "learning_rate": 3.70087023046763e-08, "logits/chosen": -2.7795541286468506, "logits/rejected": -2.6636781692504883, "logps/chosen": -563.762451171875, "logps/rejected": -452.3440856933594, "loss": 0.5833, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2674553394317627, "rewards/margins": 0.7335184812545776, "rewards/rejected": -3.000974178314209, "step": 21690 }, { "epoch": 2.8, "learning_rate": 3.676962800038252e-08, "logits/chosen": -2.8189897537231445, "logits/rejected": -2.676356077194214, "logps/chosen": -610.4939575195312, "logps/rejected": -460.98565673828125, "loss": 0.5886, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.0750114917755127, "rewards/margins": 1.043499231338501, "rewards/rejected": -3.118511199951172, "step": 21700 }, { "epoch": 2.8, "learning_rate": 3.653055369608874e-08, "logits/chosen": -2.8344295024871826, "logits/rejected": -2.646240472793579, "logps/chosen": -559.7704467773438, "logps/rejected": -395.26123046875, "loss": 0.615, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.074414014816284, "rewards/margins": 0.730993390083313, "rewards/rejected": -2.8054072856903076, "step": 21710 }, { "epoch": 2.8, "learning_rate": 3.629147939179497e-08, "logits/chosen": -2.7491984367370605, "logits/rejected": -2.6440231800079346, "logps/chosen": -580.1607666015625, "logps/rejected": -436.67523193359375, "loss": 0.7267, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.10831880569458, "rewards/margins": 0.6767742037773132, "rewards/rejected": -2.785092830657959, "step": 21720 }, { "epoch": 2.81, "learning_rate": 3.6052405087501194e-08, "logits/chosen": -2.7871932983398438, "logits/rejected": -2.707390546798706, "logps/chosen": -611.74169921875, "logps/rejected": -469.4375915527344, "loss": 0.556, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0354318618774414, "rewards/margins": 0.9587257504463196, "rewards/rejected": -2.994157314300537, "step": 21730 }, { "epoch": 2.81, "learning_rate": 3.581333078320742e-08, "logits/chosen": -2.7771143913269043, "logits/rejected": -2.657731533050537, "logps/chosen": -616.1072998046875, "logps/rejected": -477.33978271484375, "loss": 0.5196, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0934672355651855, "rewards/margins": 0.8751052618026733, "rewards/rejected": -2.9685726165771484, "step": 21740 }, { "epoch": 2.81, "learning_rate": 3.557425647891364e-08, "logits/chosen": -2.646632432937622, "logits/rejected": -2.6114909648895264, "logps/chosen": -512.1536865234375, "logps/rejected": -403.3301086425781, "loss": 0.5594, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.2090296745300293, "rewards/margins": 0.9432616233825684, "rewards/rejected": -3.1522915363311768, "step": 21750 }, { "epoch": 2.81, "learning_rate": 3.533518217461987e-08, "logits/chosen": -2.637253999710083, "logits/rejected": -2.5638949871063232, "logps/chosen": -640.2151489257812, "logps/rejected": -495.4400329589844, "loss": 0.6101, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0017056465148926, "rewards/margins": 1.0382990837097168, "rewards/rejected": -3.0400044918060303, "step": 21760 }, { "epoch": 2.81, "learning_rate": 3.50961078703261e-08, "logits/chosen": -2.693859577178955, "logits/rejected": -2.7278940677642822, "logps/chosen": -508.235595703125, "logps/rejected": -407.91412353515625, "loss": 0.4606, "rewards/accuracies": 0.8125, "rewards/chosen": -2.016421318054199, "rewards/margins": 0.9859814643859863, "rewards/rejected": -3.0024027824401855, "step": 21770 }, { "epoch": 2.81, "learning_rate": 3.485703356603232e-08, "logits/chosen": -2.7911105155944824, "logits/rejected": -2.7296924591064453, "logps/chosen": -539.1861572265625, "logps/rejected": -441.57489013671875, "loss": 0.6021, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.175570011138916, "rewards/margins": 0.7200402021408081, "rewards/rejected": -2.8956103324890137, "step": 21780 }, { "epoch": 2.81, "learning_rate": 3.461795926173855e-08, "logits/chosen": -2.7588276863098145, "logits/rejected": -2.697885036468506, "logps/chosen": -536.0328369140625, "logps/rejected": -434.6564025878906, "loss": 0.547, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0017001628875732, "rewards/margins": 0.9462043642997742, "rewards/rejected": -2.9479050636291504, "step": 21790 }, { "epoch": 2.81, "learning_rate": 3.437888495744477e-08, "logits/chosen": -2.7165074348449707, "logits/rejected": -2.590960741043091, "logps/chosen": -538.3447875976562, "logps/rejected": -408.6548767089844, "loss": 0.559, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.0948262214660645, "rewards/margins": 0.7793906331062317, "rewards/rejected": -2.8742167949676514, "step": 21800 }, { "epoch": 2.82, "learning_rate": 3.4139810653150996e-08, "logits/chosen": -2.836743116378784, "logits/rejected": -2.6892189979553223, "logps/chosen": -679.185302734375, "logps/rejected": -534.2720336914062, "loss": 0.5109, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.1791439056396484, "rewards/margins": 1.1346940994262695, "rewards/rejected": -3.313838243484497, "step": 21810 }, { "epoch": 2.82, "learning_rate": 3.390073634885722e-08, "logits/chosen": -2.783402442932129, "logits/rejected": -2.629812717437744, "logps/chosen": -593.1658935546875, "logps/rejected": -444.46356201171875, "loss": 0.5157, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.0904078483581543, "rewards/margins": 0.9244979023933411, "rewards/rejected": -3.0149059295654297, "step": 21820 }, { "epoch": 2.82, "learning_rate": 3.366166204456345e-08, "logits/chosen": -2.688310384750366, "logits/rejected": -2.6654720306396484, "logps/chosen": -655.5106201171875, "logps/rejected": -550.3829345703125, "loss": 0.6376, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.285926103591919, "rewards/margins": 0.7727882266044617, "rewards/rejected": -3.0587143898010254, "step": 21830 }, { "epoch": 2.82, "learning_rate": 3.342258774026968e-08, "logits/chosen": -2.736732006072998, "logits/rejected": -2.6818292140960693, "logps/chosen": -541.7520751953125, "logps/rejected": -504.85723876953125, "loss": 0.5784, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.2941195964813232, "rewards/margins": 0.754474937915802, "rewards/rejected": -3.0485947132110596, "step": 21840 }, { "epoch": 2.82, "learning_rate": 3.31835134359759e-08, "logits/chosen": -2.755852460861206, "logits/rejected": -2.5792078971862793, "logps/chosen": -619.4446411132812, "logps/rejected": -400.2401123046875, "loss": 0.5274, "rewards/accuracies": 0.75, "rewards/chosen": -2.1177732944488525, "rewards/margins": 0.9071356058120728, "rewards/rejected": -3.0249087810516357, "step": 21850 }, { "epoch": 2.82, "learning_rate": 3.294443913168213e-08, "logits/chosen": -2.738196849822998, "logits/rejected": -2.7087810039520264, "logps/chosen": -583.9071044921875, "logps/rejected": -465.009765625, "loss": 0.6621, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.200425624847412, "rewards/margins": 0.5988479852676392, "rewards/rejected": -2.799273729324341, "step": 21860 }, { "epoch": 2.82, "learning_rate": 3.270536482738835e-08, "logits/chosen": -2.7645835876464844, "logits/rejected": -2.7532410621643066, "logps/chosen": -496.404296875, "logps/rejected": -404.0264892578125, "loss": 0.6016, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.146214008331299, "rewards/margins": 0.803233802318573, "rewards/rejected": -2.9494478702545166, "step": 21870 }, { "epoch": 2.82, "learning_rate": 3.2466290523094574e-08, "logits/chosen": -2.7496371269226074, "logits/rejected": -2.6051201820373535, "logps/chosen": -593.3040161132812, "logps/rejected": -441.140380859375, "loss": 0.5613, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0769152641296387, "rewards/margins": 1.0415043830871582, "rewards/rejected": -3.118419885635376, "step": 21880 }, { "epoch": 2.83, "learning_rate": 3.22272162188008e-08, "logits/chosen": -2.8452401161193848, "logits/rejected": -2.774423122406006, "logps/chosen": -545.5921020507812, "logps/rejected": -457.8946838378906, "loss": 0.4953, "rewards/accuracies": 0.75, "rewards/chosen": -2.1308178901672363, "rewards/margins": 1.0220333337783813, "rewards/rejected": -3.152851104736328, "step": 21890 }, { "epoch": 2.83, "learning_rate": 3.1988141914507026e-08, "logits/chosen": -2.66451358795166, "logits/rejected": -2.5607786178588867, "logps/chosen": -516.1190185546875, "logps/rejected": -444.803955078125, "loss": 0.4944, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.882811188697815, "rewards/margins": 1.1388180255889893, "rewards/rejected": -3.0216293334960938, "step": 21900 }, { "epoch": 2.83, "learning_rate": 3.1749067610213256e-08, "logits/chosen": -2.814075231552124, "logits/rejected": -2.7223658561706543, "logps/chosen": -555.6741943359375, "logps/rejected": -444.52972412109375, "loss": 0.4746, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.084926128387451, "rewards/margins": 1.1020702123641968, "rewards/rejected": -3.1869969367980957, "step": 21910 }, { "epoch": 2.83, "learning_rate": 3.150999330591948e-08, "logits/chosen": -2.829713821411133, "logits/rejected": -2.656283378601074, "logps/chosen": -644.0105590820312, "logps/rejected": -454.19268798828125, "loss": 0.4927, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.3181560039520264, "rewards/margins": 1.1258795261383057, "rewards/rejected": -3.444035768508911, "step": 21920 }, { "epoch": 2.83, "learning_rate": 3.127091900162571e-08, "logits/chosen": -2.687398672103882, "logits/rejected": -2.5577964782714844, "logps/chosen": -582.93505859375, "logps/rejected": -446.449951171875, "loss": 0.5616, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.193493127822876, "rewards/margins": 0.8157742619514465, "rewards/rejected": -3.0092673301696777, "step": 21930 }, { "epoch": 2.83, "learning_rate": 3.103184469733193e-08, "logits/chosen": -2.6776633262634277, "logits/rejected": -2.607487678527832, "logps/chosen": -456.45404052734375, "logps/rejected": -398.55352783203125, "loss": 0.5815, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0777840614318848, "rewards/margins": 0.7266828417778015, "rewards/rejected": -2.804466724395752, "step": 21940 }, { "epoch": 2.83, "learning_rate": 3.079277039303815e-08, "logits/chosen": -2.734220027923584, "logits/rejected": -2.6100382804870605, "logps/chosen": -529.9268798828125, "logps/rejected": -369.7830810546875, "loss": 0.4862, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.0406908988952637, "rewards/margins": 0.9870848655700684, "rewards/rejected": -3.027776002883911, "step": 21950 }, { "epoch": 2.84, "learning_rate": 3.055369608874438e-08, "logits/chosen": -2.708181858062744, "logits/rejected": -2.6217544078826904, "logps/chosen": -460.97808837890625, "logps/rejected": -389.06793212890625, "loss": 0.5626, "rewards/accuracies": 0.6875, "rewards/chosen": -2.363992214202881, "rewards/margins": 0.8000141382217407, "rewards/rejected": -3.164006471633911, "step": 21960 }, { "epoch": 2.84, "learning_rate": 3.0314621784450605e-08, "logits/chosen": -2.7591021060943604, "logits/rejected": -2.6540989875793457, "logps/chosen": -546.0713500976562, "logps/rejected": -411.20947265625, "loss": 0.4809, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.8087974786758423, "rewards/margins": 1.1041569709777832, "rewards/rejected": -2.912954092025757, "step": 21970 }, { "epoch": 2.84, "learning_rate": 3.007554748015683e-08, "logits/chosen": -2.693333387374878, "logits/rejected": -2.567300319671631, "logps/chosen": -648.9979248046875, "logps/rejected": -449.4981994628906, "loss": 0.5358, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.1162915229797363, "rewards/margins": 0.8814433813095093, "rewards/rejected": -2.997734785079956, "step": 21980 }, { "epoch": 2.84, "learning_rate": 2.983647317586306e-08, "logits/chosen": -2.7945737838745117, "logits/rejected": -2.6685662269592285, "logps/chosen": -555.0767822265625, "logps/rejected": -485.2850646972656, "loss": 0.6354, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9970756769180298, "rewards/margins": 0.7649778127670288, "rewards/rejected": -2.7620532512664795, "step": 21990 }, { "epoch": 2.84, "learning_rate": 2.959739887156928e-08, "logits/chosen": -2.7221245765686035, "logits/rejected": -2.5956082344055176, "logps/chosen": -544.46923828125, "logps/rejected": -422.44976806640625, "loss": 0.4785, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.1273391246795654, "rewards/margins": 0.9979120492935181, "rewards/rejected": -3.125251293182373, "step": 22000 }, { "epoch": 2.84, "eval_logits/chosen": -3.1020314693450928, "eval_logits/rejected": -3.0553903579711914, "eval_logps/chosen": -541.3470458984375, "eval_logps/rejected": -421.7462463378906, "eval_loss": 0.6159974932670593, "eval_rewards/accuracies": 0.6754999756813049, "eval_rewards/chosen": -0.9527357220649719, "eval_rewards/margins": 1.1290234327316284, "eval_rewards/rejected": -2.081759214401245, "eval_runtime": 281.9275, "eval_samples_per_second": 7.094, "eval_steps_per_second": 3.547, "step": 22000 }, { "epoch": 2.84, "learning_rate": 2.935832456727551e-08, "logits/chosen": -2.751905679702759, "logits/rejected": -2.563225269317627, "logps/chosen": -582.2379150390625, "logps/rejected": -437.15277099609375, "loss": 0.5061, "rewards/accuracies": 0.75, "rewards/chosen": -2.1809427738189697, "rewards/margins": 1.0257179737091064, "rewards/rejected": -3.2066612243652344, "step": 22010 }, { "epoch": 2.84, "learning_rate": 2.9119250262981735e-08, "logits/chosen": -2.734994411468506, "logits/rejected": -2.661513090133667, "logps/chosen": -611.4901733398438, "logps/rejected": -430.17633056640625, "loss": 0.5906, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1310079097747803, "rewards/margins": 0.7529563903808594, "rewards/rejected": -2.8839645385742188, "step": 22020 }, { "epoch": 2.84, "learning_rate": 2.8880175958687958e-08, "logits/chosen": -2.7602713108062744, "logits/rejected": -2.618887424468994, "logps/chosen": -624.5811157226562, "logps/rejected": -460.9051818847656, "loss": 0.566, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.1249377727508545, "rewards/margins": 0.8838149905204773, "rewards/rejected": -3.0087528228759766, "step": 22030 }, { "epoch": 2.85, "learning_rate": 2.8641101654394187e-08, "logits/chosen": -2.6063244342803955, "logits/rejected": -2.485351085662842, "logps/chosen": -563.7271728515625, "logps/rejected": -372.6198425292969, "loss": 0.5439, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0089311599731445, "rewards/margins": 0.854773998260498, "rewards/rejected": -2.8637051582336426, "step": 22040 }, { "epoch": 2.85, "learning_rate": 2.840202735010041e-08, "logits/chosen": -2.8142457008361816, "logits/rejected": -2.702099561691284, "logps/chosen": -584.0242309570312, "logps/rejected": -493.3175354003906, "loss": 0.5659, "rewards/accuracies": 0.6875, "rewards/chosen": -2.28169846534729, "rewards/margins": 0.9593378305435181, "rewards/rejected": -3.2410361766815186, "step": 22050 }, { "epoch": 2.85, "learning_rate": 2.8162953045806636e-08, "logits/chosen": -2.7385478019714355, "logits/rejected": -2.5982816219329834, "logps/chosen": -691.8253173828125, "logps/rejected": -501.54974365234375, "loss": 0.5693, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.1922783851623535, "rewards/margins": 0.8933612108230591, "rewards/rejected": -3.085639476776123, "step": 22060 }, { "epoch": 2.85, "learning_rate": 2.7923878741512858e-08, "logits/chosen": -2.8820858001708984, "logits/rejected": -2.788679599761963, "logps/chosen": -506.3414001464844, "logps/rejected": -432.5939025878906, "loss": 0.5731, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2064902782440186, "rewards/margins": 0.773489236831665, "rewards/rejected": -2.9799792766571045, "step": 22070 }, { "epoch": 2.85, "learning_rate": 2.7684804437219088e-08, "logits/chosen": -2.8171005249023438, "logits/rejected": -2.734105110168457, "logps/chosen": -575.2208862304688, "logps/rejected": -478.799072265625, "loss": 0.5236, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.967767357826233, "rewards/margins": 1.1035248041152954, "rewards/rejected": -3.0712921619415283, "step": 22080 }, { "epoch": 2.85, "learning_rate": 2.7445730132925314e-08, "logits/chosen": -2.590095043182373, "logits/rejected": -2.553342580795288, "logps/chosen": -500.953857421875, "logps/rejected": -480.08233642578125, "loss": 0.5119, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.0692148208618164, "rewards/margins": 1.110059380531311, "rewards/rejected": -3.179274320602417, "step": 22090 }, { "epoch": 2.85, "learning_rate": 2.7206655828631536e-08, "logits/chosen": -2.819361448287964, "logits/rejected": -2.733581304550171, "logps/chosen": -615.5157470703125, "logps/rejected": -513.7431030273438, "loss": 0.6098, "rewards/accuracies": 0.6875, "rewards/chosen": -2.195374011993408, "rewards/margins": 0.798495888710022, "rewards/rejected": -2.9938695430755615, "step": 22100 }, { "epoch": 2.85, "learning_rate": 2.6967581524337762e-08, "logits/chosen": -2.715075969696045, "logits/rejected": -2.561818838119507, "logps/chosen": -668.8834838867188, "logps/rejected": -437.7088317871094, "loss": 0.4737, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.2535037994384766, "rewards/margins": 1.044085144996643, "rewards/rejected": -3.297589063644409, "step": 22110 }, { "epoch": 2.86, "learning_rate": 2.6728507220043988e-08, "logits/chosen": -2.7237370014190674, "logits/rejected": -2.590407609939575, "logps/chosen": -544.8870849609375, "logps/rejected": -435.65185546875, "loss": 0.6638, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.953251600265503, "rewards/margins": 0.8562033772468567, "rewards/rejected": -2.809455156326294, "step": 22120 }, { "epoch": 2.86, "learning_rate": 2.6489432915750214e-08, "logits/chosen": -2.686290740966797, "logits/rejected": -2.5923244953155518, "logps/chosen": -501.91595458984375, "logps/rejected": -385.5447082519531, "loss": 0.576, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.895776391029358, "rewards/margins": 1.095608115196228, "rewards/rejected": -2.991384267807007, "step": 22130 }, { "epoch": 2.86, "learning_rate": 2.6250358611456437e-08, "logits/chosen": -2.830379009246826, "logits/rejected": -2.726768970489502, "logps/chosen": -655.44189453125, "logps/rejected": -516.0288696289062, "loss": 0.416, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.8898169994354248, "rewards/margins": 1.2703403234481812, "rewards/rejected": -3.1601569652557373, "step": 22140 }, { "epoch": 2.86, "learning_rate": 2.6011284307162666e-08, "logits/chosen": -2.7817327976226807, "logits/rejected": -2.631178617477417, "logps/chosen": -556.93310546875, "logps/rejected": -415.2560119628906, "loss": 0.7437, "rewards/accuracies": 0.625, "rewards/chosen": -2.4036777019500732, "rewards/margins": 0.5116479396820068, "rewards/rejected": -2.91532564163208, "step": 22150 }, { "epoch": 2.86, "learning_rate": 2.5772210002868892e-08, "logits/chosen": -2.6783783435821533, "logits/rejected": -2.5094494819641113, "logps/chosen": -525.7401123046875, "logps/rejected": -409.0415344238281, "loss": 0.5213, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.233010768890381, "rewards/margins": 0.9561686515808105, "rewards/rejected": -3.1891791820526123, "step": 22160 }, { "epoch": 2.86, "learning_rate": 2.5533135698575115e-08, "logits/chosen": -2.8272929191589355, "logits/rejected": -2.6785831451416016, "logps/chosen": -542.6396484375, "logps/rejected": -447.5962829589844, "loss": 0.4272, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.9244760274887085, "rewards/margins": 1.1764806509017944, "rewards/rejected": -3.100956439971924, "step": 22170 }, { "epoch": 2.86, "learning_rate": 2.529406139428134e-08, "logits/chosen": -2.8319625854492188, "logits/rejected": -2.7835075855255127, "logps/chosen": -517.2247924804688, "logps/rejected": -443.81494140625, "loss": 0.5551, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.021439790725708, "rewards/margins": 0.7949774861335754, "rewards/rejected": -2.8164172172546387, "step": 22180 }, { "epoch": 2.86, "learning_rate": 2.5054987089987567e-08, "logits/chosen": -2.7201757431030273, "logits/rejected": -2.6557486057281494, "logps/chosen": -551.548828125, "logps/rejected": -470.38800048828125, "loss": 0.5848, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.3345463275909424, "rewards/margins": 0.720977783203125, "rewards/rejected": -3.0555241107940674, "step": 22190 }, { "epoch": 2.87, "learning_rate": 2.4815912785693793e-08, "logits/chosen": -2.6916444301605225, "logits/rejected": -2.5981037616729736, "logps/chosen": -544.8480224609375, "logps/rejected": -471.14434814453125, "loss": 0.6098, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0843253135681152, "rewards/margins": 0.7419224977493286, "rewards/rejected": -2.8262476921081543, "step": 22200 }, { "epoch": 2.87, "learning_rate": 2.457683848140002e-08, "logits/chosen": -2.788146495819092, "logits/rejected": -2.711883068084717, "logps/chosen": -580.1127319335938, "logps/rejected": -449.0526428222656, "loss": 0.5696, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.102550506591797, "rewards/margins": 0.9378700256347656, "rewards/rejected": -3.0404205322265625, "step": 22210 }, { "epoch": 2.87, "learning_rate": 2.433776417710624e-08, "logits/chosen": -2.7277679443359375, "logits/rejected": -2.48179030418396, "logps/chosen": -548.4950561523438, "logps/rejected": -301.3655700683594, "loss": 0.4879, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.0525598526000977, "rewards/margins": 0.9702574610710144, "rewards/rejected": -3.022817373275757, "step": 22220 }, { "epoch": 2.87, "learning_rate": 2.409868987281247e-08, "logits/chosen": -2.7205352783203125, "logits/rejected": -2.5719640254974365, "logps/chosen": -568.1842041015625, "logps/rejected": -429.27197265625, "loss": 0.6328, "rewards/accuracies": 0.6875, "rewards/chosen": -2.133622407913208, "rewards/margins": 0.8150887489318848, "rewards/rejected": -2.9487111568450928, "step": 22230 }, { "epoch": 2.87, "learning_rate": 2.3859615568518694e-08, "logits/chosen": -2.8951199054718018, "logits/rejected": -2.7433040142059326, "logps/chosen": -605.8839111328125, "logps/rejected": -494.4755859375, "loss": 0.46, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -2.1131832599639893, "rewards/margins": 1.3232513666152954, "rewards/rejected": -3.436434268951416, "step": 22240 }, { "epoch": 2.87, "learning_rate": 2.362054126422492e-08, "logits/chosen": -2.6752662658691406, "logits/rejected": -2.565187692642212, "logps/chosen": -564.8192138671875, "logps/rejected": -420.2792053222656, "loss": 0.5271, "rewards/accuracies": 0.75, "rewards/chosen": -1.9488937854766846, "rewards/margins": 1.0460008382797241, "rewards/rejected": -2.994894504547119, "step": 22250 }, { "epoch": 2.87, "learning_rate": 2.3381466959931146e-08, "logits/chosen": -2.775446653366089, "logits/rejected": -2.662724733352661, "logps/chosen": -584.2286376953125, "logps/rejected": -466.640625, "loss": 0.6593, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0058717727661133, "rewards/margins": 0.6351572871208191, "rewards/rejected": -2.641028881072998, "step": 22260 }, { "epoch": 2.88, "learning_rate": 2.314239265563737e-08, "logits/chosen": -2.7452869415283203, "logits/rejected": -2.571958065032959, "logps/chosen": -623.5223388671875, "logps/rejected": -463.250244140625, "loss": 0.5663, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1787190437316895, "rewards/margins": 1.0526316165924072, "rewards/rejected": -3.2313506603240967, "step": 22270 }, { "epoch": 2.88, "learning_rate": 2.2903318351343597e-08, "logits/chosen": -2.7915923595428467, "logits/rejected": -2.573070526123047, "logps/chosen": -680.19970703125, "logps/rejected": -463.7076721191406, "loss": 0.5763, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.2510266304016113, "rewards/margins": 0.9555074572563171, "rewards/rejected": -3.206533908843994, "step": 22280 }, { "epoch": 2.88, "learning_rate": 2.266424404704982e-08, "logits/chosen": -2.7429561614990234, "logits/rejected": -2.598703622817993, "logps/chosen": -531.0595092773438, "logps/rejected": -377.20892333984375, "loss": 0.7047, "rewards/accuracies": 0.6875, "rewards/chosen": -2.314326524734497, "rewards/margins": 0.5100107192993164, "rewards/rejected": -2.8243372440338135, "step": 22290 }, { "epoch": 2.88, "learning_rate": 2.242516974275605e-08, "logits/chosen": -2.789294481277466, "logits/rejected": -2.6329054832458496, "logps/chosen": -609.6475830078125, "logps/rejected": -400.1282043457031, "loss": 0.5276, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.046454668045044, "rewards/margins": 1.104714035987854, "rewards/rejected": -3.1511688232421875, "step": 22300 }, { "epoch": 2.88, "learning_rate": 2.2186095438462272e-08, "logits/chosen": -2.7430243492126465, "logits/rejected": -2.6427738666534424, "logps/chosen": -516.6419677734375, "logps/rejected": -360.03955078125, "loss": 0.4659, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.184757709503174, "rewards/margins": 1.1236366033554077, "rewards/rejected": -3.308394193649292, "step": 22310 }, { "epoch": 2.88, "learning_rate": 2.1947021134168498e-08, "logits/chosen": -2.772968292236328, "logits/rejected": -2.6650071144104004, "logps/chosen": -467.67822265625, "logps/rejected": -367.9386291503906, "loss": 0.6217, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.3191237449645996, "rewards/margins": 0.6588178873062134, "rewards/rejected": -2.9779415130615234, "step": 22320 }, { "epoch": 2.88, "learning_rate": 2.1707946829874724e-08, "logits/chosen": -2.8351731300354004, "logits/rejected": -2.7507712841033936, "logps/chosen": -585.9022216796875, "logps/rejected": -458.1552734375, "loss": 0.5525, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0498459339141846, "rewards/margins": 0.8901686668395996, "rewards/rejected": -2.940014362335205, "step": 22330 }, { "epoch": 2.88, "learning_rate": 2.146887252558095e-08, "logits/chosen": -2.854032516479492, "logits/rejected": -2.711367130279541, "logps/chosen": -597.1256713867188, "logps/rejected": -449.55859375, "loss": 0.4783, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.1706395149230957, "rewards/margins": 1.1405237913131714, "rewards/rejected": -3.3111634254455566, "step": 22340 }, { "epoch": 2.89, "learning_rate": 2.1229798221287176e-08, "logits/chosen": -2.7863662242889404, "logits/rejected": -2.6252760887145996, "logps/chosen": -549.2486572265625, "logps/rejected": -376.4411926269531, "loss": 0.4387, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.8027633428573608, "rewards/margins": 1.1687531471252441, "rewards/rejected": -2.9715161323547363, "step": 22350 }, { "epoch": 2.89, "learning_rate": 2.09907239169934e-08, "logits/chosen": -2.7130849361419678, "logits/rejected": -2.622483730316162, "logps/chosen": -640.7130126953125, "logps/rejected": -505.56756591796875, "loss": 0.5006, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9211032390594482, "rewards/margins": 1.1196386814117432, "rewards/rejected": -3.0407419204711914, "step": 22360 }, { "epoch": 2.89, "learning_rate": 2.0751649612699628e-08, "logits/chosen": -2.844942569732666, "logits/rejected": -2.704404592514038, "logps/chosen": -603.1237182617188, "logps/rejected": -455.8411560058594, "loss": 0.5371, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.2081151008605957, "rewards/margins": 0.8575957417488098, "rewards/rejected": -3.0657107830047607, "step": 22370 }, { "epoch": 2.89, "learning_rate": 2.051257530840585e-08, "logits/chosen": -2.800182819366455, "logits/rejected": -2.6517891883850098, "logps/chosen": -609.5850830078125, "logps/rejected": -505.7694396972656, "loss": 0.7207, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.5156772136688232, "rewards/margins": 0.48678818345069885, "rewards/rejected": -3.00246524810791, "step": 22380 }, { "epoch": 2.89, "learning_rate": 2.0273501004112077e-08, "logits/chosen": -2.7427992820739746, "logits/rejected": -2.6733736991882324, "logps/chosen": -492.91339111328125, "logps/rejected": -376.6744384765625, "loss": 0.6335, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.2321364879608154, "rewards/margins": 0.8347914814949036, "rewards/rejected": -3.066927909851074, "step": 22390 }, { "epoch": 2.89, "learning_rate": 2.0034426699818303e-08, "logits/chosen": -2.8142073154449463, "logits/rejected": -2.628429412841797, "logps/chosen": -598.38330078125, "logps/rejected": -388.561279296875, "loss": 0.5858, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.328626871109009, "rewards/margins": 0.7725290060043335, "rewards/rejected": -3.1011557579040527, "step": 22400 }, { "epoch": 2.89, "learning_rate": 1.979535239552453e-08, "logits/chosen": -2.7400126457214355, "logits/rejected": -2.5492777824401855, "logps/chosen": -668.4337768554688, "logps/rejected": -463.1839904785156, "loss": 0.4852, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.769083023071289, "rewards/margins": 1.1015936136245728, "rewards/rejected": -2.870676279067993, "step": 22410 }, { "epoch": 2.89, "learning_rate": 1.9556278091230755e-08, "logits/chosen": -2.8941538333892822, "logits/rejected": -2.7284934520721436, "logps/chosen": -552.1417846679688, "logps/rejected": -436.273681640625, "loss": 0.5807, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.2773146629333496, "rewards/margins": 0.7342174649238586, "rewards/rejected": -3.0115323066711426, "step": 22420 }, { "epoch": 2.9, "learning_rate": 1.9317203786936977e-08, "logits/chosen": -2.7631123065948486, "logits/rejected": -2.6940293312072754, "logps/chosen": -492.384521484375, "logps/rejected": -405.2735595703125, "loss": 0.5571, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2548701763153076, "rewards/margins": 0.7760655283927917, "rewards/rejected": -3.0309360027313232, "step": 22430 }, { "epoch": 2.9, "learning_rate": 1.9078129482643203e-08, "logits/chosen": -2.8506007194519043, "logits/rejected": -2.672518491744995, "logps/chosen": -556.2369384765625, "logps/rejected": -427.92620849609375, "loss": 0.5825, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1352500915527344, "rewards/margins": 0.7340695261955261, "rewards/rejected": -2.869319438934326, "step": 22440 }, { "epoch": 2.9, "learning_rate": 1.883905517834943e-08, "logits/chosen": -2.661135196685791, "logits/rejected": -2.565200090408325, "logps/chosen": -649.8948974609375, "logps/rejected": -507.47186279296875, "loss": 0.6007, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.340184450149536, "rewards/margins": 1.101552963256836, "rewards/rejected": -3.441737413406372, "step": 22450 }, { "epoch": 2.9, "learning_rate": 1.8599980874055655e-08, "logits/chosen": -2.7388083934783936, "logits/rejected": -2.6469366550445557, "logps/chosen": -465.7628479003906, "logps/rejected": -323.75848388671875, "loss": 0.5324, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1681103706359863, "rewards/margins": 0.7843239307403564, "rewards/rejected": -2.9524343013763428, "step": 22460 }, { "epoch": 2.9, "learning_rate": 1.836090656976188e-08, "logits/chosen": -2.7237119674682617, "logits/rejected": -2.719088077545166, "logps/chosen": -473.483642578125, "logps/rejected": -437.2723693847656, "loss": 0.6012, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.2560641765594482, "rewards/margins": 0.7707743644714355, "rewards/rejected": -3.026838779449463, "step": 22470 }, { "epoch": 2.9, "learning_rate": 1.8121832265468107e-08, "logits/chosen": -2.882495164871216, "logits/rejected": -2.7046191692352295, "logps/chosen": -562.3390502929688, "logps/rejected": -384.97906494140625, "loss": 0.5677, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.0282745361328125, "rewards/margins": 0.9978607892990112, "rewards/rejected": -3.0261356830596924, "step": 22480 }, { "epoch": 2.9, "learning_rate": 1.7882757961174333e-08, "logits/chosen": -2.7337646484375, "logits/rejected": -2.599547863006592, "logps/chosen": -600.0368041992188, "logps/rejected": -433.51239013671875, "loss": 0.5437, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2790963649749756, "rewards/margins": 1.0247502326965332, "rewards/rejected": -3.303846836090088, "step": 22490 }, { "epoch": 2.9, "learning_rate": 1.7643683656880556e-08, "logits/chosen": -2.8647847175598145, "logits/rejected": -2.6720900535583496, "logps/chosen": -564.0286865234375, "logps/rejected": -357.01947021484375, "loss": 0.5623, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.98349928855896, "rewards/margins": 0.8323315382003784, "rewards/rejected": -2.815831422805786, "step": 22500 }, { "epoch": 2.91, "learning_rate": 1.7404609352586782e-08, "logits/chosen": -2.7021987438201904, "logits/rejected": -2.6360559463500977, "logps/chosen": -548.8167724609375, "logps/rejected": -452.0616149902344, "loss": 0.4679, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.160313129425049, "rewards/margins": 1.2284306287765503, "rewards/rejected": -3.3887436389923096, "step": 22510 }, { "epoch": 2.91, "learning_rate": 1.716553504829301e-08, "logits/chosen": -2.7608394622802734, "logits/rejected": -2.734973907470703, "logps/chosen": -511.54290771484375, "logps/rejected": -492.9344177246094, "loss": 0.4326, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -2.076108694076538, "rewards/margins": 1.1214830875396729, "rewards/rejected": -3.197591781616211, "step": 22520 }, { "epoch": 2.91, "learning_rate": 1.6926460743999234e-08, "logits/chosen": -2.6068129539489746, "logits/rejected": -2.4725518226623535, "logps/chosen": -579.6769409179688, "logps/rejected": -440.8780822753906, "loss": 0.6738, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.5113863945007324, "rewards/margins": 0.5633095502853394, "rewards/rejected": -3.0746963024139404, "step": 22530 }, { "epoch": 2.91, "learning_rate": 1.668738643970546e-08, "logits/chosen": -2.928449869155884, "logits/rejected": -2.7784128189086914, "logps/chosen": -495.7769470214844, "logps/rejected": -391.39959716796875, "loss": 0.6039, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2902817726135254, "rewards/margins": 0.6759740114212036, "rewards/rejected": -2.9662556648254395, "step": 22540 }, { "epoch": 2.91, "learning_rate": 1.6448312135411683e-08, "logits/chosen": -2.8377084732055664, "logits/rejected": -2.7713704109191895, "logps/chosen": -584.2440185546875, "logps/rejected": -475.7557678222656, "loss": 0.4529, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9368503093719482, "rewards/margins": 1.057294487953186, "rewards/rejected": -2.994144916534424, "step": 22550 }, { "epoch": 2.91, "learning_rate": 1.6209237831117912e-08, "logits/chosen": -2.7209951877593994, "logits/rejected": -2.562037944793701, "logps/chosen": -547.1627197265625, "logps/rejected": -400.38006591796875, "loss": 0.4797, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.1181676387786865, "rewards/margins": 0.9987042546272278, "rewards/rejected": -3.1168718338012695, "step": 22560 }, { "epoch": 2.91, "learning_rate": 1.5970163526824135e-08, "logits/chosen": -2.67118501663208, "logits/rejected": -2.621845245361328, "logps/chosen": -536.706298828125, "logps/rejected": -466.141845703125, "loss": 0.5075, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0367391109466553, "rewards/margins": 1.178012490272522, "rewards/rejected": -3.214751720428467, "step": 22570 }, { "epoch": 2.92, "learning_rate": 1.573108922253036e-08, "logits/chosen": -2.8265247344970703, "logits/rejected": -2.685765027999878, "logps/chosen": -520.4766235351562, "logps/rejected": -412.1700134277344, "loss": 0.5637, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.335444211959839, "rewards/margins": 0.7696742415428162, "rewards/rejected": -3.1051182746887207, "step": 22580 }, { "epoch": 2.92, "learning_rate": 1.5492014918236587e-08, "logits/chosen": -2.721282482147217, "logits/rejected": -2.620255947113037, "logps/chosen": -506.68798828125, "logps/rejected": -447.103759765625, "loss": 0.5612, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1185457706451416, "rewards/margins": 0.9495170712471008, "rewards/rejected": -3.0680625438690186, "step": 22590 }, { "epoch": 2.92, "learning_rate": 1.5252940613942813e-08, "logits/chosen": -2.6764426231384277, "logits/rejected": -2.5720226764678955, "logps/chosen": -623.1202392578125, "logps/rejected": -434.5083923339844, "loss": 0.6498, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.100013256072998, "rewards/margins": 0.7756364941596985, "rewards/rejected": -2.8756494522094727, "step": 22600 }, { "epoch": 2.92, "learning_rate": 1.501386630964904e-08, "logits/chosen": -2.851170063018799, "logits/rejected": -2.672642230987549, "logps/chosen": -543.6373291015625, "logps/rejected": -403.44464111328125, "loss": 0.5015, "rewards/accuracies": 0.75, "rewards/chosen": -2.198871374130249, "rewards/margins": 1.0578765869140625, "rewards/rejected": -3.2567481994628906, "step": 22610 }, { "epoch": 2.92, "learning_rate": 1.4774792005355265e-08, "logits/chosen": -2.86369252204895, "logits/rejected": -2.715981960296631, "logps/chosen": -607.1310424804688, "logps/rejected": -432.74786376953125, "loss": 0.4018, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.0106496810913086, "rewards/margins": 1.267293930053711, "rewards/rejected": -3.2779438495635986, "step": 22620 }, { "epoch": 2.92, "learning_rate": 1.4535717701061489e-08, "logits/chosen": -2.720944404602051, "logits/rejected": -2.6287286281585693, "logps/chosen": -422.1285095214844, "logps/rejected": -352.2798156738281, "loss": 0.6179, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.305453062057495, "rewards/margins": 0.6621637344360352, "rewards/rejected": -2.967616558074951, "step": 22630 }, { "epoch": 2.92, "learning_rate": 1.4296643396767715e-08, "logits/chosen": -2.8002820014953613, "logits/rejected": -2.7057571411132812, "logps/chosen": -563.4481811523438, "logps/rejected": -424.04656982421875, "loss": 0.4693, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.0459418296813965, "rewards/margins": 1.3037598133087158, "rewards/rejected": -3.3497016429901123, "step": 22640 }, { "epoch": 2.92, "learning_rate": 1.405756909247394e-08, "logits/chosen": -2.7546775341033936, "logits/rejected": -2.7543578147888184, "logps/chosen": -496.30731201171875, "logps/rejected": -483.034423828125, "loss": 0.6315, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.8962695598602295, "rewards/margins": 0.7584190964698792, "rewards/rejected": -2.654688835144043, "step": 22650 }, { "epoch": 2.93, "learning_rate": 1.3818494788180165e-08, "logits/chosen": -2.7456936836242676, "logits/rejected": -2.7081737518310547, "logps/chosen": -646.9383544921875, "logps/rejected": -535.2176513671875, "loss": 0.6766, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.1723647117614746, "rewards/margins": 0.6003416180610657, "rewards/rejected": -2.7727062702178955, "step": 22660 }, { "epoch": 2.93, "learning_rate": 1.357942048388639e-08, "logits/chosen": -2.8299784660339355, "logits/rejected": -2.68184494972229, "logps/chosen": -680.9962158203125, "logps/rejected": -517.2330932617188, "loss": 0.4703, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.090038537979126, "rewards/margins": 1.350293755531311, "rewards/rejected": -3.4403319358825684, "step": 22670 }, { "epoch": 2.93, "learning_rate": 1.3340346179592617e-08, "logits/chosen": -2.797614336013794, "logits/rejected": -2.713466167449951, "logps/chosen": -578.7914428710938, "logps/rejected": -440.970947265625, "loss": 0.537, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1769859790802, "rewards/margins": 1.02457857131958, "rewards/rejected": -3.2015647888183594, "step": 22680 }, { "epoch": 2.93, "learning_rate": 1.3101271875298843e-08, "logits/chosen": -2.7208259105682373, "logits/rejected": -2.6030802726745605, "logps/chosen": -557.1212768554688, "logps/rejected": -436.6141662597656, "loss": 0.4725, "rewards/accuracies": 0.75, "rewards/chosen": -2.0116732120513916, "rewards/margins": 1.158496618270874, "rewards/rejected": -3.1701698303222656, "step": 22690 }, { "epoch": 2.93, "learning_rate": 1.2862197571005068e-08, "logits/chosen": -2.8191404342651367, "logits/rejected": -2.6038928031921387, "logps/chosen": -596.4337158203125, "logps/rejected": -420.050048828125, "loss": 0.5547, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.983551263809204, "rewards/margins": 1.1121456623077393, "rewards/rejected": -3.0956969261169434, "step": 22700 }, { "epoch": 2.93, "learning_rate": 1.2623123266711294e-08, "logits/chosen": -2.607670783996582, "logits/rejected": -2.505690813064575, "logps/chosen": -607.1160888671875, "logps/rejected": -447.31951904296875, "loss": 0.491, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9837089776992798, "rewards/margins": 1.0722272396087646, "rewards/rejected": -3.055936336517334, "step": 22710 }, { "epoch": 2.93, "learning_rate": 1.2384048962417518e-08, "logits/chosen": -2.707414388656616, "logits/rejected": -2.682340621948242, "logps/chosen": -527.0724487304688, "logps/rejected": -512.9738159179688, "loss": 0.5479, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.303358554840088, "rewards/margins": 0.9735006093978882, "rewards/rejected": -3.2768592834472656, "step": 22720 }, { "epoch": 2.93, "learning_rate": 1.2144974658123744e-08, "logits/chosen": -2.7377641201019287, "logits/rejected": -2.632944107055664, "logps/chosen": -497.95556640625, "logps/rejected": -400.8097229003906, "loss": 0.6765, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0524961948394775, "rewards/margins": 0.47852763533592224, "rewards/rejected": -2.5310237407684326, "step": 22730 }, { "epoch": 2.94, "learning_rate": 1.190590035382997e-08, "logits/chosen": -2.618680715560913, "logits/rejected": -2.5050578117370605, "logps/chosen": -590.33056640625, "logps/rejected": -433.9381408691406, "loss": 0.539, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1389615535736084, "rewards/margins": 0.983077883720398, "rewards/rejected": -3.122039318084717, "step": 22740 }, { "epoch": 2.94, "learning_rate": 1.1666826049536196e-08, "logits/chosen": -2.6942925453186035, "logits/rejected": -2.6037421226501465, "logps/chosen": -663.122314453125, "logps/rejected": -424.10675048828125, "loss": 0.499, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.944214105606079, "rewards/margins": 0.9679269790649414, "rewards/rejected": -2.9121410846710205, "step": 22750 }, { "epoch": 2.94, "learning_rate": 1.142775174524242e-08, "logits/chosen": -2.718055009841919, "logits/rejected": -2.6144933700561523, "logps/chosen": -623.7457885742188, "logps/rejected": -470.91583251953125, "loss": 0.5576, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.237856388092041, "rewards/margins": 0.8797027468681335, "rewards/rejected": -3.1175594329833984, "step": 22760 }, { "epoch": 2.94, "learning_rate": 1.1188677440948646e-08, "logits/chosen": -2.740041732788086, "logits/rejected": -2.644688844680786, "logps/chosen": -483.3519592285156, "logps/rejected": -401.1373291015625, "loss": 0.5848, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.2177786827087402, "rewards/margins": 0.6964766383171082, "rewards/rejected": -2.914255380630493, "step": 22770 }, { "epoch": 2.94, "learning_rate": 1.094960313665487e-08, "logits/chosen": -2.7162322998046875, "logits/rejected": -2.7391111850738525, "logps/chosen": -540.3347778320312, "logps/rejected": -527.4300537109375, "loss": 0.617, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.459496021270752, "rewards/margins": 0.7648278474807739, "rewards/rejected": -3.2243239879608154, "step": 22780 }, { "epoch": 2.94, "learning_rate": 1.0710528832361097e-08, "logits/chosen": -2.6532082557678223, "logits/rejected": -2.6126599311828613, "logps/chosen": -515.232421875, "logps/rejected": -462.94036865234375, "loss": 0.5065, "rewards/accuracies": 0.75, "rewards/chosen": -1.9663476943969727, "rewards/margins": 0.8912264704704285, "rewards/rejected": -2.857574224472046, "step": 22790 }, { "epoch": 2.94, "learning_rate": 1.0471454528067324e-08, "logits/chosen": -2.8266243934631348, "logits/rejected": -2.6205532550811768, "logps/chosen": -602.0136108398438, "logps/rejected": -394.17059326171875, "loss": 0.5548, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.057105302810669, "rewards/margins": 0.9528596997261047, "rewards/rejected": -3.009965419769287, "step": 22800 }, { "epoch": 2.94, "learning_rate": 1.0232380223773549e-08, "logits/chosen": -2.949758529663086, "logits/rejected": -2.8311729431152344, "logps/chosen": -563.0667724609375, "logps/rejected": -404.1985778808594, "loss": 0.5285, "rewards/accuracies": 0.75, "rewards/chosen": -1.9690145254135132, "rewards/margins": 0.8698192834854126, "rewards/rejected": -2.838833808898926, "step": 22810 }, { "epoch": 2.95, "learning_rate": 9.993305919479775e-09, "logits/chosen": -2.888486385345459, "logits/rejected": -2.6392011642456055, "logps/chosen": -643.3026733398438, "logps/rejected": -397.3679504394531, "loss": 0.6148, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.249143123626709, "rewards/margins": 0.7331309914588928, "rewards/rejected": -2.982274293899536, "step": 22820 }, { "epoch": 2.95, "learning_rate": 9.754231615185999e-09, "logits/chosen": -2.682034730911255, "logits/rejected": -2.578467607498169, "logps/chosen": -586.1964721679688, "logps/rejected": -481.8528747558594, "loss": 0.758, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.239478349685669, "rewards/margins": 0.5506809949874878, "rewards/rejected": -2.790159225463867, "step": 22830 }, { "epoch": 2.95, "learning_rate": 9.515157310892225e-09, "logits/chosen": -2.7518696784973145, "logits/rejected": -2.607414722442627, "logps/chosen": -498.73199462890625, "logps/rejected": -398.6495666503906, "loss": 0.5853, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.2070775032043457, "rewards/margins": 0.9183019399642944, "rewards/rejected": -3.1253795623779297, "step": 22840 }, { "epoch": 2.95, "learning_rate": 9.27608300659845e-09, "logits/chosen": -2.712972402572632, "logits/rejected": -2.6135787963867188, "logps/chosen": -529.8480224609375, "logps/rejected": -398.996826171875, "loss": 0.6897, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.2657485008239746, "rewards/margins": 0.5624204277992249, "rewards/rejected": -2.8281686305999756, "step": 22850 }, { "epoch": 2.95, "learning_rate": 9.037008702304677e-09, "logits/chosen": -2.7767539024353027, "logits/rejected": -2.6916353702545166, "logps/chosen": -503.56439208984375, "logps/rejected": -424.10577392578125, "loss": 0.4543, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.253103733062744, "rewards/margins": 1.1118395328521729, "rewards/rejected": -3.364943265914917, "step": 22860 }, { "epoch": 2.95, "learning_rate": 8.797934398010901e-09, "logits/chosen": -2.70878267288208, "logits/rejected": -2.572002410888672, "logps/chosen": -674.6787719726562, "logps/rejected": -479.97039794921875, "loss": 0.5757, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.41225004196167, "rewards/margins": 0.8375943303108215, "rewards/rejected": -3.2498443126678467, "step": 22870 }, { "epoch": 2.95, "learning_rate": 8.558860093717127e-09, "logits/chosen": -2.7043471336364746, "logits/rejected": -2.542947292327881, "logps/chosen": -597.2625122070312, "logps/rejected": -428.1678161621094, "loss": 0.5608, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.306255340576172, "rewards/margins": 0.8219898343086243, "rewards/rejected": -3.1282451152801514, "step": 22880 }, { "epoch": 2.96, "learning_rate": 8.319785789423352e-09, "logits/chosen": -2.7717878818511963, "logits/rejected": -2.667196750640869, "logps/chosen": -517.8555908203125, "logps/rejected": -409.821044921875, "loss": 0.4571, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9679005146026611, "rewards/margins": 1.1960086822509766, "rewards/rejected": -3.1639089584350586, "step": 22890 }, { "epoch": 2.96, "learning_rate": 8.080711485129578e-09, "logits/chosen": -2.778775453567505, "logits/rejected": -2.663665771484375, "logps/chosen": -589.9783935546875, "logps/rejected": -459.052734375, "loss": 0.4745, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.940015435218811, "rewards/margins": 1.1492918729782104, "rewards/rejected": -3.0893073081970215, "step": 22900 }, { "epoch": 2.96, "learning_rate": 7.841637180835804e-09, "logits/chosen": -2.706758499145508, "logits/rejected": -2.58133864402771, "logps/chosen": -540.3888549804688, "logps/rejected": -428.15765380859375, "loss": 0.5186, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.076418161392212, "rewards/margins": 0.8632799386978149, "rewards/rejected": -2.9396982192993164, "step": 22910 }, { "epoch": 2.96, "learning_rate": 7.60256287654203e-09, "logits/chosen": -2.7350199222564697, "logits/rejected": -2.660015106201172, "logps/chosen": -522.4580078125, "logps/rejected": -465.273193359375, "loss": 0.5214, "rewards/accuracies": 0.75, "rewards/chosen": -2.0742712020874023, "rewards/margins": 0.7861019372940063, "rewards/rejected": -2.860373020172119, "step": 22920 }, { "epoch": 2.96, "learning_rate": 7.363488572248254e-09, "logits/chosen": -2.751126766204834, "logits/rejected": -2.691131114959717, "logps/chosen": -531.3177490234375, "logps/rejected": -383.5625915527344, "loss": 0.5915, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.3090362548828125, "rewards/margins": 0.7280908823013306, "rewards/rejected": -3.0371270179748535, "step": 22930 }, { "epoch": 2.96, "learning_rate": 7.12441426795448e-09, "logits/chosen": -2.776099920272827, "logits/rejected": -2.638808250427246, "logps/chosen": -530.8773803710938, "logps/rejected": -426.103515625, "loss": 0.5665, "rewards/accuracies": 0.75, "rewards/chosen": -2.1284234523773193, "rewards/margins": 0.9932724237442017, "rewards/rejected": -3.1216959953308105, "step": 22940 }, { "epoch": 2.96, "learning_rate": 6.885339963660705e-09, "logits/chosen": -2.7674007415771484, "logits/rejected": -2.6392717361450195, "logps/chosen": -530.5498046875, "logps/rejected": -452.0743103027344, "loss": 0.5788, "rewards/accuracies": 0.75, "rewards/chosen": -2.042587995529175, "rewards/margins": 0.9199220538139343, "rewards/rejected": -2.962510347366333, "step": 22950 }, { "epoch": 2.96, "learning_rate": 6.646265659366931e-09, "logits/chosen": -2.7176272869110107, "logits/rejected": -2.681621551513672, "logps/chosen": -492.8802185058594, "logps/rejected": -456.64508056640625, "loss": 0.5089, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.2264304161071777, "rewards/margins": 0.8701683282852173, "rewards/rejected": -3.0965986251831055, "step": 22960 }, { "epoch": 2.97, "learning_rate": 6.407191355073157e-09, "logits/chosen": -2.76493763923645, "logits/rejected": -2.622626781463623, "logps/chosen": -645.611572265625, "logps/rejected": -466.94927978515625, "loss": 0.4974, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.98160719871521, "rewards/margins": 1.229923963546753, "rewards/rejected": -3.211531162261963, "step": 22970 }, { "epoch": 2.97, "learning_rate": 6.168117050779382e-09, "logits/chosen": -2.7199835777282715, "logits/rejected": -2.664344549179077, "logps/chosen": -545.619140625, "logps/rejected": -475.5179748535156, "loss": 0.6096, "rewards/accuracies": 0.625, "rewards/chosen": -2.076247453689575, "rewards/margins": 0.8493876457214355, "rewards/rejected": -2.92563533782959, "step": 22980 }, { "epoch": 2.97, "learning_rate": 5.9290427464856074e-09, "logits/chosen": -2.81787109375, "logits/rejected": -2.6867504119873047, "logps/chosen": -591.536376953125, "logps/rejected": -445.3056640625, "loss": 0.519, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.8255894184112549, "rewards/margins": 0.9404461979866028, "rewards/rejected": -2.766035556793213, "step": 22990 }, { "epoch": 2.97, "learning_rate": 5.6899684421918334e-09, "logits/chosen": -2.745645761489868, "logits/rejected": -2.7462425231933594, "logps/chosen": -526.7835693359375, "logps/rejected": -521.2772216796875, "loss": 0.4905, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.7523155212402344, "rewards/margins": 1.125349521636963, "rewards/rejected": -2.8776652812957764, "step": 23000 }, { "epoch": 2.97, "eval_logits/chosen": -3.1056323051452637, "eval_logits/rejected": -3.058304786682129, "eval_logps/chosen": -541.3571166992188, "eval_logps/rejected": -421.7638244628906, "eval_loss": 0.616146981716156, "eval_rewards/accuracies": 0.6769999861717224, "eval_rewards/chosen": -0.953740119934082, "eval_rewards/margins": 1.1297767162322998, "eval_rewards/rejected": -2.083516836166382, "eval_runtime": 320.8301, "eval_samples_per_second": 6.234, "eval_steps_per_second": 3.117, "step": 23000 }, { "epoch": 2.97, "learning_rate": 5.450894137898059e-09, "logits/chosen": -2.596318006515503, "logits/rejected": -2.594238758087158, "logps/chosen": -480.3946838378906, "logps/rejected": -471.13397216796875, "loss": 0.5753, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -2.1465048789978027, "rewards/margins": 1.0003836154937744, "rewards/rejected": -3.1468887329101562, "step": 23010 }, { "epoch": 2.97, "learning_rate": 5.211819833604284e-09, "logits/chosen": -2.772277593612671, "logits/rejected": -2.6491236686706543, "logps/chosen": -581.0428466796875, "logps/rejected": -501.441162109375, "loss": 0.5327, "rewards/accuracies": 0.75, "rewards/chosen": -2.021146059036255, "rewards/margins": 0.9681133031845093, "rewards/rejected": -2.9892592430114746, "step": 23020 }, { "epoch": 2.97, "learning_rate": 4.97274552931051e-09, "logits/chosen": -2.845492362976074, "logits/rejected": -2.713261365890503, "logps/chosen": -620.7496948242188, "logps/rejected": -448.9158630371094, "loss": 0.5167, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.2423152923583984, "rewards/margins": 1.0006440877914429, "rewards/rejected": -3.2429592609405518, "step": 23030 }, { "epoch": 2.97, "learning_rate": 4.733671225016735e-09, "logits/chosen": -2.8024325370788574, "logits/rejected": -2.610027551651001, "logps/chosen": -576.94677734375, "logps/rejected": -413.9851989746094, "loss": 0.5813, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.142634868621826, "rewards/margins": 0.8390877842903137, "rewards/rejected": -2.981722354888916, "step": 23040 }, { "epoch": 2.98, "learning_rate": 4.49459692072296e-09, "logits/chosen": -2.7225940227508545, "logits/rejected": -2.5873095989227295, "logps/chosen": -582.6495361328125, "logps/rejected": -458.443359375, "loss": 0.487, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -2.106889247894287, "rewards/margins": 1.1824688911437988, "rewards/rejected": -3.2893576622009277, "step": 23050 }, { "epoch": 2.98, "learning_rate": 4.255522616429185e-09, "logits/chosen": -2.787397623062134, "logits/rejected": -2.663234233856201, "logps/chosen": -492.38031005859375, "logps/rejected": -410.67303466796875, "loss": 0.4633, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9321101903915405, "rewards/margins": 1.0328289270401, "rewards/rejected": -2.9649393558502197, "step": 23060 }, { "epoch": 2.98, "learning_rate": 4.016448312135412e-09, "logits/chosen": -2.683946132659912, "logits/rejected": -2.579413652420044, "logps/chosen": -560.9088745117188, "logps/rejected": -466.9481506347656, "loss": 0.5305, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.1582815647125244, "rewards/margins": 0.9530784487724304, "rewards/rejected": -3.1113598346710205, "step": 23070 }, { "epoch": 2.98, "learning_rate": 3.777374007841637e-09, "logits/chosen": -2.700809955596924, "logits/rejected": -2.5667426586151123, "logps/chosen": -538.1483764648438, "logps/rejected": -421.3648376464844, "loss": 0.5105, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.9216289520263672, "rewards/margins": 1.0657562017440796, "rewards/rejected": -2.987384796142578, "step": 23080 }, { "epoch": 2.98, "learning_rate": 3.5382997035478624e-09, "logits/chosen": -2.8141047954559326, "logits/rejected": -2.6493477821350098, "logps/chosen": -504.57598876953125, "logps/rejected": -393.3653564453125, "loss": 0.5655, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.346402883529663, "rewards/margins": 0.7466580867767334, "rewards/rejected": -3.0930612087249756, "step": 23090 }, { "epoch": 2.98, "learning_rate": 3.2992253992540884e-09, "logits/chosen": -2.771340847015381, "logits/rejected": -2.6586368083953857, "logps/chosen": -572.9796752929688, "logps/rejected": -484.9627380371094, "loss": 0.6267, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.4118354320526123, "rewards/margins": 0.6482806205749512, "rewards/rejected": -3.0601162910461426, "step": 23100 }, { "epoch": 2.98, "learning_rate": 3.0601510949603136e-09, "logits/chosen": -2.68626070022583, "logits/rejected": -2.610502243041992, "logps/chosen": -491.44500732421875, "logps/rejected": -393.2298889160156, "loss": 0.677, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.186386823654175, "rewards/margins": 0.7751551270484924, "rewards/rejected": -2.9615418910980225, "step": 23110 }, { "epoch": 2.98, "learning_rate": 2.821076790666539e-09, "logits/chosen": -2.793257713317871, "logits/rejected": -2.7560641765594482, "logps/chosen": -534.6019287109375, "logps/rejected": -438.4395446777344, "loss": 0.6069, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -2.030890464782715, "rewards/margins": 0.6935739517211914, "rewards/rejected": -2.7244644165039062, "step": 23120 }, { "epoch": 2.99, "learning_rate": 2.5820024863727643e-09, "logits/chosen": -2.7898945808410645, "logits/rejected": -2.7212352752685547, "logps/chosen": -564.8001708984375, "logps/rejected": -501.6312561035156, "loss": 0.5884, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -2.1551194190979004, "rewards/margins": 0.899905800819397, "rewards/rejected": -3.0550248622894287, "step": 23130 }, { "epoch": 2.99, "learning_rate": 2.34292818207899e-09, "logits/chosen": -2.68971586227417, "logits/rejected": -2.593066453933716, "logps/chosen": -537.8116455078125, "logps/rejected": -436.90631103515625, "loss": 0.5715, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.380410671234131, "rewards/margins": 0.6883638501167297, "rewards/rejected": -3.068774700164795, "step": 23140 }, { "epoch": 2.99, "learning_rate": 2.1038538777852155e-09, "logits/chosen": -2.778963565826416, "logits/rejected": -2.6368682384490967, "logps/chosen": -517.849609375, "logps/rejected": -406.77301025390625, "loss": 0.5821, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -2.290025234222412, "rewards/margins": 0.8121423721313477, "rewards/rejected": -3.1021676063537598, "step": 23150 }, { "epoch": 2.99, "learning_rate": 1.864779573491441e-09, "logits/chosen": -2.8071348667144775, "logits/rejected": -2.619807720184326, "logps/chosen": -518.8523559570312, "logps/rejected": -381.4641418457031, "loss": 0.5257, "rewards/accuracies": 0.75, "rewards/chosen": -1.9476779699325562, "rewards/margins": 0.8116229176521301, "rewards/rejected": -2.75930118560791, "step": 23160 }, { "epoch": 2.99, "learning_rate": 1.6257052691976666e-09, "logits/chosen": -2.8507823944091797, "logits/rejected": -2.659789562225342, "logps/chosen": -540.0602416992188, "logps/rejected": -370.6182556152344, "loss": 0.5179, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.9780023097991943, "rewards/margins": 1.0719361305236816, "rewards/rejected": -3.049938201904297, "step": 23170 }, { "epoch": 2.99, "learning_rate": 1.386630964903892e-09, "logits/chosen": -2.7686853408813477, "logits/rejected": -2.6160740852355957, "logps/chosen": -689.8055419921875, "logps/rejected": -543.5774536132812, "loss": 0.4673, "rewards/accuracies": 0.8125, "rewards/chosen": -2.1838810443878174, "rewards/margins": 1.160839557647705, "rewards/rejected": -3.3447203636169434, "step": 23180 }, { "epoch": 2.99, "learning_rate": 1.1475566606101176e-09, "logits/chosen": -2.7567012310028076, "logits/rejected": -2.687746524810791, "logps/chosen": -608.5720825195312, "logps/rejected": -453.330078125, "loss": 0.5896, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -2.0456948280334473, "rewards/margins": 0.8663339614868164, "rewards/rejected": -2.9120290279388428, "step": 23190 }, { "epoch": 3.0, "learning_rate": 9.084823563163431e-10, "logits/chosen": -2.767395257949829, "logits/rejected": -2.696479320526123, "logps/chosen": -514.7835693359375, "logps/rejected": -421.87249755859375, "loss": 0.6245, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -2.0952484607696533, "rewards/margins": 0.7411233186721802, "rewards/rejected": -2.836371898651123, "step": 23200 }, { "epoch": 3.0, "learning_rate": 6.694080520225685e-10, "logits/chosen": -2.6648337841033936, "logits/rejected": -2.5971901416778564, "logps/chosen": -571.4296875, "logps/rejected": -435.806396484375, "loss": 0.6238, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2471508979797363, "rewards/margins": 0.8862024545669556, "rewards/rejected": -3.1333532333374023, "step": 23210 }, { "epoch": 3.0, "learning_rate": 4.3033374772879407e-10, "logits/chosen": -2.775848865509033, "logits/rejected": -2.7261452674865723, "logps/chosen": -530.3493041992188, "logps/rejected": -434.66461181640625, "loss": 0.5311, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.2953238487243652, "rewards/margins": 0.7722536325454712, "rewards/rejected": -3.067577600479126, "step": 23220 }, { "epoch": 3.0, "learning_rate": 1.912594434350196e-10, "logits/chosen": -2.733804941177368, "logits/rejected": -2.6173176765441895, "logps/chosen": -467.5093688964844, "logps/rejected": -354.1914367675781, "loss": 0.583, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.096590518951416, "rewards/margins": 0.7746478319168091, "rewards/rejected": -2.8712384700775146, "step": 23230 }, { "epoch": 3.0, "step": 23238, "total_flos": 0.0, "train_loss": 0.6616154855680051, "train_runtime": 49213.1151, "train_samples_per_second": 3.777, "train_steps_per_second": 0.472 } ], "logging_steps": 10, "max_steps": 23238, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }