{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 7500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.666666666666667e-09, "logits/chosen": -2.4099323749542236, "logits/rejected": -1.6240229606628418, "logps/chosen": -448.31744384765625, "logps/rejected": -191.33251953125, "loss": 0.4075, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 6.666666666666668e-08, "logits/chosen": -1.989020824432373, "logits/rejected": -1.5890716314315796, "logps/chosen": -236.1018829345703, "logps/rejected": -226.4254913330078, "loss": 0.3306, "rewards/accuracies": 0.4166666567325592, "rewards/chosen": -7.88598208600888e-06, "rewards/margins": 3.9829301385907456e-05, "rewards/rejected": -4.771529711433686e-05, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.3333333333333336e-07, "logits/chosen": -1.8541256189346313, "logits/rejected": -1.5651360750198364, "logps/chosen": -270.60076904296875, "logps/rejected": -270.3322448730469, "loss": 0.3308, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -5.58321007702034e-05, "rewards/margins": 6.728438893333077e-05, "rewards/rejected": -0.00012311647878959775, "step": 20 }, { "epoch": 0.0, "learning_rate": 2.0000000000000002e-07, "logits/chosen": -2.009821653366089, "logits/rejected": -1.5267605781555176, "logps/chosen": -289.7004699707031, "logps/rejected": -240.9877166748047, "loss": 0.3801, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 6.77972930134274e-05, "rewards/margins": 0.00017511224723421037, "rewards/rejected": -0.00010731497604865581, "step": 30 }, { "epoch": 0.01, "learning_rate": 2.666666666666667e-07, "logits/chosen": -2.0199921131134033, "logits/rejected": -1.3087493181228638, "logps/chosen": -318.3033752441406, "logps/rejected": -240.2353057861328, "loss": 0.3182, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0001012681022984907, "rewards/margins": 0.00041467478149570525, "rewards/rejected": -0.0003134066646452993, "step": 40 }, { "epoch": 0.01, "learning_rate": 3.3333333333333335e-07, "logits/chosen": -1.5997120141983032, "logits/rejected": -1.3897509574890137, "logps/chosen": -229.5198211669922, "logps/rejected": -240.71969604492188, "loss": 0.3759, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.0002661711478140205, "rewards/margins": 0.0016337096458300948, "rewards/rejected": -0.0013675385853275657, "step": 50 }, { "epoch": 0.01, "learning_rate": 4.0000000000000003e-07, "logits/chosen": -2.0922536849975586, "logits/rejected": -1.6669349670410156, "logps/chosen": -247.25009155273438, "logps/rejected": -203.55384826660156, "loss": 0.3801, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0004775059060193598, "rewards/margins": 0.0029451469890773296, "rewards/rejected": -0.0034226528368890285, "step": 60 }, { "epoch": 0.01, "learning_rate": 4.666666666666667e-07, "logits/chosen": -1.8292672634124756, "logits/rejected": -1.3837965726852417, "logps/chosen": -267.9219970703125, "logps/rejected": -246.97720336914062, "loss": 0.3532, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.00016045381198637187, "rewards/margins": 0.007861686870455742, "rewards/rejected": -0.007701232098042965, "step": 70 }, { "epoch": 0.01, "learning_rate": 5.333333333333335e-07, "logits/chosen": -1.9224233627319336, "logits/rejected": -1.5631624460220337, "logps/chosen": -271.2846374511719, "logps/rejected": -298.92950439453125, "loss": 0.2886, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.011401079595088959, "rewards/margins": 0.021195482462644577, "rewards/rejected": -0.032596562057733536, "step": 80 }, { "epoch": 0.01, "learning_rate": 6.000000000000001e-07, "logits/chosen": -1.8209606409072876, "logits/rejected": -1.4652128219604492, "logps/chosen": -242.1433868408203, "logps/rejected": -284.43731689453125, "loss": 0.3295, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.030289877206087112, "rewards/margins": 0.016816267743706703, "rewards/rejected": -0.047106143087148666, "step": 90 }, { "epoch": 0.01, "learning_rate": 6.666666666666667e-07, "logits/chosen": -2.0971648693084717, "logits/rejected": -1.5691124200820923, "logps/chosen": -373.49371337890625, "logps/rejected": -374.1169128417969, "loss": 0.2423, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04245720058679581, "rewards/margins": 0.04919002950191498, "rewards/rejected": -0.09164722263813019, "step": 100 }, { "epoch": 0.01, "learning_rate": 7.333333333333334e-07, "logits/chosen": -2.0052523612976074, "logits/rejected": -1.4158036708831787, "logps/chosen": -284.71112060546875, "logps/rejected": -388.4095458984375, "loss": 0.1985, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.03486671671271324, "rewards/margins": 0.10158306360244751, "rewards/rejected": -0.13644976913928986, "step": 110 }, { "epoch": 0.02, "learning_rate": 8.000000000000001e-07, "logits/chosen": -1.7910232543945312, "logits/rejected": -1.195896863937378, "logps/chosen": -349.8062438964844, "logps/rejected": -492.1412048339844, "loss": 0.2283, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07517056912183762, "rewards/margins": 0.15838217735290527, "rewards/rejected": -0.2335527241230011, "step": 120 }, { "epoch": 0.02, "learning_rate": 8.666666666666668e-07, "logits/chosen": -1.8132175207138062, "logits/rejected": -1.30657958984375, "logps/chosen": -379.88336181640625, "logps/rejected": -567.0684814453125, "loss": 0.1626, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11514802277088165, "rewards/margins": 0.23259301483631134, "rewards/rejected": -0.347741037607193, "step": 130 }, { "epoch": 0.02, "learning_rate": 9.333333333333334e-07, "logits/chosen": -1.6623605489730835, "logits/rejected": -1.2550121545791626, "logps/chosen": -436.0331115722656, "logps/rejected": -693.096923828125, "loss": 0.1626, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14200234413146973, "rewards/margins": 0.25166502594947815, "rewards/rejected": -0.3936673700809479, "step": 140 }, { "epoch": 0.02, "learning_rate": 1.0000000000000002e-06, "logits/chosen": -1.7733418941497803, "logits/rejected": -1.16778564453125, "logps/chosen": -511.66925048828125, "logps/rejected": -699.0992431640625, "loss": 0.1233, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21348969638347626, "rewards/margins": 0.27515870332717896, "rewards/rejected": -0.48864835500717163, "step": 150 }, { "epoch": 0.02, "learning_rate": 1.066666666666667e-06, "logits/chosen": -1.879869818687439, "logits/rejected": -1.2550278902053833, "logps/chosen": -482.166259765625, "logps/rejected": -760.3292236328125, "loss": 0.124, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2403174340724945, "rewards/margins": 0.2997317612171173, "rewards/rejected": -0.5400491952896118, "step": 160 }, { "epoch": 0.02, "learning_rate": 1.1333333333333334e-06, "logits/chosen": -1.6282508373260498, "logits/rejected": -0.9627411961555481, "logps/chosen": -414.8555603027344, "logps/rejected": -705.8746337890625, "loss": 0.1528, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14894428849220276, "rewards/margins": 0.37094196677207947, "rewards/rejected": -0.5198861956596375, "step": 170 }, { "epoch": 0.02, "learning_rate": 1.2000000000000002e-06, "logits/chosen": -1.5305386781692505, "logits/rejected": -1.1183087825775146, "logps/chosen": -443.141845703125, "logps/rejected": -795.3884887695312, "loss": 0.1274, "rewards/accuracies": 0.875, "rewards/chosen": -0.16620799899101257, "rewards/margins": 0.3535141348838806, "rewards/rejected": -0.5197221040725708, "step": 180 }, { "epoch": 0.03, "learning_rate": 1.2666666666666669e-06, "logits/chosen": -1.6709671020507812, "logits/rejected": -1.0611721277236938, "logps/chosen": -524.5181274414062, "logps/rejected": -810.8128662109375, "loss": 0.1407, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2099526822566986, "rewards/margins": 0.3740180432796478, "rewards/rejected": -0.583970844745636, "step": 190 }, { "epoch": 0.03, "learning_rate": 1.3333333333333334e-06, "logits/chosen": -1.864699363708496, "logits/rejected": -1.0785900354385376, "logps/chosen": -482.0804748535156, "logps/rejected": -730.0054931640625, "loss": 0.1423, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18910348415374756, "rewards/margins": 0.29500436782836914, "rewards/rejected": -0.4841078817844391, "step": 200 }, { "epoch": 0.03, "learning_rate": 1.4000000000000001e-06, "logits/chosen": -1.492980718612671, "logits/rejected": -0.9422389268875122, "logps/chosen": -467.66241455078125, "logps/rejected": -867.6456298828125, "loss": 0.131, "rewards/accuracies": 0.875, "rewards/chosen": -0.2241428643465042, "rewards/margins": 0.3977489471435547, "rewards/rejected": -0.6218917965888977, "step": 210 }, { "epoch": 0.03, "learning_rate": 1.4666666666666669e-06, "logits/chosen": -1.6000795364379883, "logits/rejected": -0.9295433163642883, "logps/chosen": -429.8138732910156, "logps/rejected": -812.4957885742188, "loss": 0.0933, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.15654727816581726, "rewards/margins": 0.4246586263179779, "rewards/rejected": -0.5812059640884399, "step": 220 }, { "epoch": 0.03, "learning_rate": 1.5333333333333334e-06, "logits/chosen": -1.561963438987732, "logits/rejected": -1.016340970993042, "logps/chosen": -496.5228576660156, "logps/rejected": -820.2783203125, "loss": 0.1253, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.23068983852863312, "rewards/margins": 0.3521668314933777, "rewards/rejected": -0.5828567743301392, "step": 230 }, { "epoch": 0.03, "learning_rate": 1.6000000000000001e-06, "logits/chosen": -1.543303370475769, "logits/rejected": -1.0427916049957275, "logps/chosen": -444.46014404296875, "logps/rejected": -828.2269287109375, "loss": 0.0941, "rewards/accuracies": 0.875, "rewards/chosen": -0.1384342759847641, "rewards/margins": 0.4045810103416443, "rewards/rejected": -0.5430153012275696, "step": 240 }, { "epoch": 0.03, "learning_rate": 1.6666666666666667e-06, "logits/chosen": -1.546133279800415, "logits/rejected": -1.1522371768951416, "logps/chosen": -533.6563110351562, "logps/rejected": -807.0501098632812, "loss": 0.1711, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2938057780265808, "rewards/margins": 0.25221148133277893, "rewards/rejected": -0.5460172295570374, "step": 250 }, { "epoch": 0.03, "learning_rate": 1.7333333333333336e-06, "logits/chosen": -1.8788810968399048, "logits/rejected": -1.0959275960922241, "logps/chosen": -480.61639404296875, "logps/rejected": -740.069091796875, "loss": 0.1395, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1702469289302826, "rewards/margins": 0.3451237082481384, "rewards/rejected": -0.5153706073760986, "step": 260 }, { "epoch": 0.04, "learning_rate": 1.8000000000000001e-06, "logits/chosen": -1.5190702676773071, "logits/rejected": -0.980027973651886, "logps/chosen": -489.94207763671875, "logps/rejected": -871.6253662109375, "loss": 0.0945, "rewards/accuracies": 0.875, "rewards/chosen": -0.2337626963853836, "rewards/margins": 0.3760890066623688, "rewards/rejected": -0.6098517179489136, "step": 270 }, { "epoch": 0.04, "learning_rate": 1.8666666666666669e-06, "logits/chosen": -1.468842625617981, "logits/rejected": -1.388031005859375, "logps/chosen": -381.8927307128906, "logps/rejected": -781.1408081054688, "loss": 0.176, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19682732224464417, "rewards/margins": 0.3542131185531616, "rewards/rejected": -0.5510404109954834, "step": 280 }, { "epoch": 0.04, "learning_rate": 1.9333333333333336e-06, "logits/chosen": -1.6719022989273071, "logits/rejected": -0.883672833442688, "logps/chosen": -480.5074768066406, "logps/rejected": -683.6339111328125, "loss": 0.1495, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15120992064476013, "rewards/margins": 0.3173143267631531, "rewards/rejected": -0.4685242176055908, "step": 290 }, { "epoch": 0.04, "learning_rate": 2.0000000000000003e-06, "logits/chosen": -1.5708786249160767, "logits/rejected": -0.9583484530448914, "logps/chosen": -463.66143798828125, "logps/rejected": -833.3902587890625, "loss": 0.1112, "rewards/accuracies": 0.875, "rewards/chosen": -0.1888360232114792, "rewards/margins": 0.4084080159664154, "rewards/rejected": -0.5972440838813782, "step": 300 }, { "epoch": 0.04, "learning_rate": 2.0666666666666666e-06, "logits/chosen": -1.4761362075805664, "logits/rejected": -0.9715279340744019, "logps/chosen": -498.23876953125, "logps/rejected": -934.1094970703125, "loss": 0.1048, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.23782452940940857, "rewards/margins": 0.4102650582790375, "rewards/rejected": -0.648089587688446, "step": 310 }, { "epoch": 0.04, "learning_rate": 2.133333333333334e-06, "logits/chosen": -1.631696343421936, "logits/rejected": -0.9176268577575684, "logps/chosen": -534.7830810546875, "logps/rejected": -852.8056640625, "loss": 0.1289, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.23324842751026154, "rewards/margins": 0.3649081587791443, "rewards/rejected": -0.5981565713882446, "step": 320 }, { "epoch": 0.04, "learning_rate": 2.2e-06, "logits/chosen": -1.6708920001983643, "logits/rejected": -1.3186357021331787, "logps/chosen": -372.9091796875, "logps/rejected": -703.5186767578125, "loss": 0.1738, "rewards/accuracies": 0.75, "rewards/chosen": -0.13428106904029846, "rewards/margins": 0.3291914463043213, "rewards/rejected": -0.463472455739975, "step": 330 }, { "epoch": 0.05, "learning_rate": 2.266666666666667e-06, "logits/chosen": -1.4466753005981445, "logits/rejected": -1.0592036247253418, "logps/chosen": -432.8397521972656, "logps/rejected": -774.4104614257812, "loss": 0.1597, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19159728288650513, "rewards/margins": 0.31294533610343933, "rewards/rejected": -0.5045426487922668, "step": 340 }, { "epoch": 0.05, "learning_rate": 2.3333333333333336e-06, "logits/chosen": -1.584106206893921, "logits/rejected": -1.0651090145111084, "logps/chosen": -403.37353515625, "logps/rejected": -676.6839599609375, "loss": 0.1674, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1616145223379135, "rewards/margins": 0.2902247905731201, "rewards/rejected": -0.4518393576145172, "step": 350 }, { "epoch": 0.05, "learning_rate": 2.4000000000000003e-06, "logits/chosen": -1.2375500202178955, "logits/rejected": -0.8195334672927856, "logps/chosen": -413.3699645996094, "logps/rejected": -646.4727172851562, "loss": 0.1935, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17713095247745514, "rewards/margins": 0.27770182490348816, "rewards/rejected": -0.4548327922821045, "step": 360 }, { "epoch": 0.05, "learning_rate": 2.466666666666667e-06, "logits/chosen": -1.9084575176239014, "logits/rejected": -1.1361953020095825, "logps/chosen": -406.1421813964844, "logps/rejected": -712.7696533203125, "loss": 0.1213, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.10945934057235718, "rewards/margins": 0.37037864327430725, "rewards/rejected": -0.47983798384666443, "step": 370 }, { "epoch": 0.05, "learning_rate": 2.5333333333333338e-06, "logits/chosen": -1.6088730096817017, "logits/rejected": -0.9428955316543579, "logps/chosen": -603.7337646484375, "logps/rejected": -854.6677856445312, "loss": 0.1224, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.32599180936813354, "rewards/margins": 0.3368051052093506, "rewards/rejected": -0.6627969145774841, "step": 380 }, { "epoch": 0.05, "learning_rate": 2.6e-06, "logits/chosen": -1.5955326557159424, "logits/rejected": -1.1725094318389893, "logps/chosen": -557.5750732421875, "logps/rejected": -774.2070922851562, "loss": 0.1827, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3110761344432831, "rewards/margins": 0.2598307728767395, "rewards/rejected": -0.570906937122345, "step": 390 }, { "epoch": 0.05, "learning_rate": 2.666666666666667e-06, "logits/chosen": -1.635376214981079, "logits/rejected": -0.9140356779098511, "logps/chosen": -605.5277709960938, "logps/rejected": -973.1331787109375, "loss": 0.0749, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.30397385358810425, "rewards/margins": 0.42590442299842834, "rewards/rejected": -0.729878306388855, "step": 400 }, { "epoch": 0.05, "learning_rate": 2.7333333333333336e-06, "logits/chosen": -1.6633880138397217, "logits/rejected": -1.0514023303985596, "logps/chosen": -480.266845703125, "logps/rejected": -835.7138671875, "loss": 0.1532, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21217575669288635, "rewards/margins": 0.3710072338581085, "rewards/rejected": -0.5831829309463501, "step": 410 }, { "epoch": 0.06, "learning_rate": 2.8000000000000003e-06, "logits/chosen": -1.7376649379730225, "logits/rejected": -1.1299479007720947, "logps/chosen": -487.1092834472656, "logps/rejected": -886.4588012695312, "loss": 0.1114, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20204193890094757, "rewards/margins": 0.41378122568130493, "rewards/rejected": -0.6158231496810913, "step": 420 }, { "epoch": 0.06, "learning_rate": 2.866666666666667e-06, "logits/chosen": -1.5146900415420532, "logits/rejected": -1.1276448965072632, "logps/chosen": -339.6864929199219, "logps/rejected": -572.2979125976562, "loss": 0.1681, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10513390600681305, "rewards/margins": 0.25415563583374023, "rewards/rejected": -0.3592894971370697, "step": 430 }, { "epoch": 0.06, "learning_rate": 2.9333333333333338e-06, "logits/chosen": -1.6274223327636719, "logits/rejected": -1.0441734790802002, "logps/chosen": -395.390869140625, "logps/rejected": -655.0814208984375, "loss": 0.1743, "rewards/accuracies": 0.75, "rewards/chosen": -0.18642660975456238, "rewards/margins": 0.28394168615341187, "rewards/rejected": -0.47036832571029663, "step": 440 }, { "epoch": 0.06, "learning_rate": 3e-06, "logits/chosen": -1.8228683471679688, "logits/rejected": -1.2179882526397705, "logps/chosen": -464.56201171875, "logps/rejected": -823.7962646484375, "loss": 0.1176, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21438221633434296, "rewards/margins": 0.38342320919036865, "rewards/rejected": -0.597805380821228, "step": 450 }, { "epoch": 0.06, "learning_rate": 3.066666666666667e-06, "logits/chosen": -1.4820787906646729, "logits/rejected": -0.9871037602424622, "logps/chosen": -505.381103515625, "logps/rejected": -717.4215087890625, "loss": 0.1718, "rewards/accuracies": 0.75, "rewards/chosen": -0.28233593702316284, "rewards/margins": 0.2751788794994354, "rewards/rejected": -0.5575148463249207, "step": 460 }, { "epoch": 0.06, "learning_rate": 3.133333333333334e-06, "logits/chosen": -1.724001169204712, "logits/rejected": -0.8987816572189331, "logps/chosen": -523.7393798828125, "logps/rejected": -955.359375, "loss": 0.0896, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.23277001082897186, "rewards/margins": 0.503928542137146, "rewards/rejected": -0.7366985082626343, "step": 470 }, { "epoch": 0.06, "learning_rate": 3.2000000000000003e-06, "logits/chosen": -1.648781180381775, "logits/rejected": -1.0982370376586914, "logps/chosen": -436.5008239746094, "logps/rejected": -818.0286865234375, "loss": 0.1631, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21771135926246643, "rewards/margins": 0.38935723900794983, "rewards/rejected": -0.607068657875061, "step": 480 }, { "epoch": 0.07, "learning_rate": 3.266666666666667e-06, "logits/chosen": -1.3881748914718628, "logits/rejected": -0.9845792651176453, "logps/chosen": -424.2598571777344, "logps/rejected": -879.8922119140625, "loss": 0.1189, "rewards/accuracies": 0.875, "rewards/chosen": -0.16997823119163513, "rewards/margins": 0.43233782052993774, "rewards/rejected": -0.6023160219192505, "step": 490 }, { "epoch": 0.07, "learning_rate": 3.3333333333333333e-06, "logits/chosen": -1.9221470355987549, "logits/rejected": -1.1122715473175049, "logps/chosen": -424.117919921875, "logps/rejected": -606.3838500976562, "loss": 0.1658, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12460688501596451, "rewards/margins": 0.3144245743751526, "rewards/rejected": -0.4390315115451813, "step": 500 }, { "epoch": 0.07, "learning_rate": 3.4000000000000005e-06, "logits/chosen": -1.4596699476242065, "logits/rejected": -1.0453931093215942, "logps/chosen": -382.4856262207031, "logps/rejected": -710.4122314453125, "loss": 0.1403, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1713198721408844, "rewards/margins": 0.3306172490119934, "rewards/rejected": -0.5019370913505554, "step": 510 }, { "epoch": 0.07, "learning_rate": 3.4666666666666672e-06, "logits/chosen": -1.5380425453186035, "logits/rejected": -1.0979361534118652, "logps/chosen": -400.77777099609375, "logps/rejected": -663.2037353515625, "loss": 0.16, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.12989595532417297, "rewards/margins": 0.27567845582962036, "rewards/rejected": -0.40557441115379333, "step": 520 }, { "epoch": 0.07, "learning_rate": 3.5333333333333335e-06, "logits/chosen": -1.3848693370819092, "logits/rejected": -1.0763051509857178, "logps/chosen": -434.2381286621094, "logps/rejected": -745.5271606445312, "loss": 0.154, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20348243415355682, "rewards/margins": 0.31607288122177124, "rewards/rejected": -0.519555389881134, "step": 530 }, { "epoch": 0.07, "learning_rate": 3.6000000000000003e-06, "logits/chosen": -1.5418269634246826, "logits/rejected": -0.9006346464157104, "logps/chosen": -563.9154052734375, "logps/rejected": -912.6988525390625, "loss": 0.1521, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.31098824739456177, "rewards/margins": 0.3480641841888428, "rewards/rejected": -0.6590523719787598, "step": 540 }, { "epoch": 0.07, "learning_rate": 3.6666666666666666e-06, "logits/chosen": -1.5205752849578857, "logits/rejected": -1.076106071472168, "logps/chosen": -494.74298095703125, "logps/rejected": -874.4945068359375, "loss": 0.1266, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.23468537628650665, "rewards/margins": 0.3964490294456482, "rewards/rejected": -0.6311343908309937, "step": 550 }, { "epoch": 0.07, "learning_rate": 3.7333333333333337e-06, "logits/chosen": -1.3866068124771118, "logits/rejected": -0.9945683479309082, "logps/chosen": -443.53277587890625, "logps/rejected": -780.4207763671875, "loss": 0.1463, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21203942596912384, "rewards/margins": 0.3310268521308899, "rewards/rejected": -0.5430662631988525, "step": 560 }, { "epoch": 0.08, "learning_rate": 3.8000000000000005e-06, "logits/chosen": -1.5126006603240967, "logits/rejected": -0.922463059425354, "logps/chosen": -485.6847229003906, "logps/rejected": -759.709716796875, "loss": 0.1369, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2078860104084015, "rewards/margins": 0.3498801589012146, "rewards/rejected": -0.5577661395072937, "step": 570 }, { "epoch": 0.08, "learning_rate": 3.866666666666667e-06, "logits/chosen": -1.4890224933624268, "logits/rejected": -1.0690945386886597, "logps/chosen": -372.1873474121094, "logps/rejected": -693.4901123046875, "loss": 0.1531, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1699579507112503, "rewards/margins": 0.333346426486969, "rewards/rejected": -0.5033043622970581, "step": 580 }, { "epoch": 0.08, "learning_rate": 3.9333333333333335e-06, "logits/chosen": -1.496760606765747, "logits/rejected": -1.0878002643585205, "logps/chosen": -333.7672424316406, "logps/rejected": -653.5516357421875, "loss": 0.1251, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.11113238334655762, "rewards/margins": 0.3392719626426697, "rewards/rejected": -0.4504043459892273, "step": 590 }, { "epoch": 0.08, "learning_rate": 4.000000000000001e-06, "logits/chosen": -1.385391354560852, "logits/rejected": -0.9585914611816406, "logps/chosen": -380.42291259765625, "logps/rejected": -723.8530883789062, "loss": 0.1679, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15061721205711365, "rewards/margins": 0.342263400554657, "rewards/rejected": -0.492880642414093, "step": 600 }, { "epoch": 0.08, "learning_rate": 4.066666666666667e-06, "logits/chosen": -1.3731095790863037, "logits/rejected": -1.0338678359985352, "logps/chosen": -412.2789001464844, "logps/rejected": -753.2153930664062, "loss": 0.1668, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19600458443164825, "rewards/margins": 0.33788540959358215, "rewards/rejected": -0.5338899493217468, "step": 610 }, { "epoch": 0.08, "learning_rate": 4.133333333333333e-06, "logits/chosen": -1.396970510482788, "logits/rejected": -0.7782880067825317, "logps/chosen": -517.6131591796875, "logps/rejected": -849.923828125, "loss": 0.1735, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22774609923362732, "rewards/margins": 0.35508590936660767, "rewards/rejected": -0.5828320384025574, "step": 620 }, { "epoch": 0.08, "learning_rate": 4.2000000000000004e-06, "logits/chosen": -1.1911531686782837, "logits/rejected": -0.7422036528587341, "logps/chosen": -549.0075073242188, "logps/rejected": -880.8527221679688, "loss": 0.1548, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3094066083431244, "rewards/margins": 0.3175147473812103, "rewards/rejected": -0.6269214153289795, "step": 630 }, { "epoch": 0.09, "learning_rate": 4.266666666666668e-06, "logits/chosen": -1.3914610147476196, "logits/rejected": -0.7553210854530334, "logps/chosen": -550.94580078125, "logps/rejected": -705.796875, "loss": 0.1998, "rewards/accuracies": 0.75, "rewards/chosen": -0.282638818025589, "rewards/margins": 0.24997854232788086, "rewards/rejected": -0.5326173901557922, "step": 640 }, { "epoch": 0.09, "learning_rate": 4.333333333333334e-06, "logits/chosen": -1.5466039180755615, "logits/rejected": -0.8693670034408569, "logps/chosen": -531.4341430664062, "logps/rejected": -816.57958984375, "loss": 0.1839, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2846863865852356, "rewards/margins": 0.29854658246040344, "rewards/rejected": -0.5832329392433167, "step": 650 }, { "epoch": 0.09, "learning_rate": 4.4e-06, "logits/chosen": -1.7242590188980103, "logits/rejected": -1.2059741020202637, "logps/chosen": -398.84722900390625, "logps/rejected": -687.1949462890625, "loss": 0.1864, "rewards/accuracies": 0.875, "rewards/chosen": -0.15967541933059692, "rewards/margins": 0.31971973180770874, "rewards/rejected": -0.4793950915336609, "step": 660 }, { "epoch": 0.09, "learning_rate": 4.4666666666666665e-06, "logits/chosen": -1.4636131525039673, "logits/rejected": -0.9360952377319336, "logps/chosen": -465.987060546875, "logps/rejected": -890.33544921875, "loss": 0.1079, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.211115762591362, "rewards/margins": 0.4524083137512207, "rewards/rejected": -0.6635240316390991, "step": 670 }, { "epoch": 0.09, "learning_rate": 4.533333333333334e-06, "logits/chosen": -1.55280339717865, "logits/rejected": -0.9204519391059875, "logps/chosen": -589.2633666992188, "logps/rejected": -837.0107421875, "loss": 0.1781, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2600969672203064, "rewards/margins": 0.3369695544242859, "rewards/rejected": -0.5970665216445923, "step": 680 }, { "epoch": 0.09, "learning_rate": 4.600000000000001e-06, "logits/chosen": -1.4817430973052979, "logits/rejected": -0.7725855708122253, "logps/chosen": -692.3911743164062, "logps/rejected": -1051.6116943359375, "loss": 0.1705, "rewards/accuracies": 0.875, "rewards/chosen": -0.3202987313270569, "rewards/margins": 0.4163621962070465, "rewards/rejected": -0.7366609573364258, "step": 690 }, { "epoch": 0.09, "learning_rate": 4.666666666666667e-06, "logits/chosen": -1.4252475500106812, "logits/rejected": -0.8196362257003784, "logps/chosen": -674.8631591796875, "logps/rejected": -991.6558837890625, "loss": 0.1071, "rewards/accuracies": 0.875, "rewards/chosen": -0.37729111313819885, "rewards/margins": 0.34081095457077026, "rewards/rejected": -0.718101978302002, "step": 700 }, { "epoch": 0.09, "learning_rate": 4.7333333333333335e-06, "logits/chosen": -1.3881539106369019, "logits/rejected": -0.8435044288635254, "logps/chosen": -610.1460571289062, "logps/rejected": -940.4801635742188, "loss": 0.1247, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.3053412437438965, "rewards/margins": 0.35758185386657715, "rewards/rejected": -0.6629230976104736, "step": 710 }, { "epoch": 0.1, "learning_rate": 4.800000000000001e-06, "logits/chosen": -1.4027553796768188, "logits/rejected": -0.835735023021698, "logps/chosen": -514.1181030273438, "logps/rejected": -825.6218872070312, "loss": 0.1403, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24213871359825134, "rewards/margins": 0.3390628397464752, "rewards/rejected": -0.581201434135437, "step": 720 }, { "epoch": 0.1, "learning_rate": 4.866666666666667e-06, "logits/chosen": -1.5231796503067017, "logits/rejected": -0.9179169535636902, "logps/chosen": -494.64447021484375, "logps/rejected": -796.8231201171875, "loss": 0.1334, "rewards/accuracies": 0.875, "rewards/chosen": -0.21210741996765137, "rewards/margins": 0.3426913917064667, "rewards/rejected": -0.5547988414764404, "step": 730 }, { "epoch": 0.1, "learning_rate": 4.933333333333334e-06, "logits/chosen": -1.555768609046936, "logits/rejected": -0.8261078000068665, "logps/chosen": -448.23956298828125, "logps/rejected": -778.2855834960938, "loss": 0.0803, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17634207010269165, "rewards/margins": 0.4066527485847473, "rewards/rejected": -0.582994818687439, "step": 740 }, { "epoch": 0.1, "learning_rate": 5e-06, "logits/chosen": -1.5711801052093506, "logits/rejected": -1.0917423963546753, "logps/chosen": -538.0059814453125, "logps/rejected": -896.7288818359375, "loss": 0.1498, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.23624293506145477, "rewards/margins": 0.36364543437957764, "rewards/rejected": -0.5998883843421936, "step": 750 }, { "epoch": 0.1, "learning_rate": 4.999972922944898e-06, "logits/chosen": -1.443225383758545, "logits/rejected": -0.8622598648071289, "logps/chosen": -394.50592041015625, "logps/rejected": -769.7340087890625, "loss": 0.0854, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.13901114463806152, "rewards/margins": 0.40866619348526, "rewards/rejected": -0.5476772785186768, "step": 760 }, { "epoch": 0.1, "learning_rate": 4.999891692366121e-06, "logits/chosen": -1.4831535816192627, "logits/rejected": -1.0045164823532104, "logps/chosen": -486.68145751953125, "logps/rejected": -841.0699462890625, "loss": 0.1678, "rewards/accuracies": 0.875, "rewards/chosen": -0.22009320557117462, "rewards/margins": 0.36181217432022095, "rewards/rejected": -0.5819053649902344, "step": 770 }, { "epoch": 0.1, "learning_rate": 4.999756310023261e-06, "logits/chosen": -1.7487876415252686, "logits/rejected": -1.058393955230713, "logps/chosen": -549.0352783203125, "logps/rejected": -802.88671875, "loss": 0.1662, "rewards/accuracies": 0.75, "rewards/chosen": -0.24724209308624268, "rewards/margins": 0.32394474744796753, "rewards/rejected": -0.5711868405342102, "step": 780 }, { "epoch": 0.11, "learning_rate": 4.99956677884892e-06, "logits/chosen": -1.243823766708374, "logits/rejected": -1.0226433277130127, "logps/chosen": -490.24554443359375, "logps/rejected": -882.51611328125, "loss": 0.132, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2924804091453552, "rewards/margins": 0.3418129086494446, "rewards/rejected": -0.6342933773994446, "step": 790 }, { "epoch": 0.11, "learning_rate": 4.999323102948655e-06, "logits/chosen": -1.3914577960968018, "logits/rejected": -0.9622076153755188, "logps/chosen": -560.9547119140625, "logps/rejected": -822.1790161132812, "loss": 0.2206, "rewards/accuracies": 0.75, "rewards/chosen": -0.3030504584312439, "rewards/margins": 0.3117767572402954, "rewards/rejected": -0.6148272752761841, "step": 800 }, { "epoch": 0.11, "learning_rate": 4.999025287600886e-06, "logits/chosen": -1.7494767904281616, "logits/rejected": -1.1633926630020142, "logps/chosen": -481.3182678222656, "logps/rejected": -820.7761840820312, "loss": 0.1194, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14612558484077454, "rewards/margins": 0.40427789092063904, "rewards/rejected": -0.5504035353660583, "step": 810 }, { "epoch": 0.11, "learning_rate": 4.998673339256785e-06, "logits/chosen": -1.7987353801727295, "logits/rejected": -1.194678544998169, "logps/chosen": -485.6026916503906, "logps/rejected": -903.0382690429688, "loss": 0.106, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.20885272324085236, "rewards/margins": 0.3845108449459076, "rewards/rejected": -0.5933635830879211, "step": 820 }, { "epoch": 0.11, "learning_rate": 4.99826726554013e-06, "logits/chosen": -1.6719648838043213, "logits/rejected": -1.136561393737793, "logps/chosen": -405.9174499511719, "logps/rejected": -852.544921875, "loss": 0.097, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15679454803466797, "rewards/margins": 0.4471038281917572, "rewards/rejected": -0.6038983464241028, "step": 830 }, { "epoch": 0.11, "learning_rate": 4.997807075247147e-06, "logits/chosen": -1.4109543561935425, "logits/rejected": -0.9229291081428528, "logps/chosen": -410.027587890625, "logps/rejected": -742.806884765625, "loss": 0.1399, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13022413849830627, "rewards/margins": 0.38399913907051086, "rewards/rejected": -0.5142232179641724, "step": 840 }, { "epoch": 0.11, "learning_rate": 4.997292778346312e-06, "logits/chosen": -1.8231170177459717, "logits/rejected": -1.1286346912384033, "logps/chosen": -409.29254150390625, "logps/rejected": -768.8543701171875, "loss": 0.1378, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13088057935237885, "rewards/margins": 0.37201347947120667, "rewards/rejected": -0.5028941035270691, "step": 850 }, { "epoch": 0.11, "learning_rate": 4.996724385978142e-06, "logits/chosen": -1.8869422674179077, "logits/rejected": -1.3146274089813232, "logps/chosen": -461.17913818359375, "logps/rejected": -739.4716186523438, "loss": 0.1808, "rewards/accuracies": 0.875, "rewards/chosen": -0.15798479318618774, "rewards/margins": 0.3153396248817444, "rewards/rejected": -0.4733244478702545, "step": 860 }, { "epoch": 0.12, "learning_rate": 4.996101910454953e-06, "logits/chosen": -1.6006109714508057, "logits/rejected": -0.9469090700149536, "logps/chosen": -449.0116271972656, "logps/rejected": -766.9967651367188, "loss": 0.1518, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14297400414943695, "rewards/margins": 0.36727023124694824, "rewards/rejected": -0.5102442502975464, "step": 870 }, { "epoch": 0.12, "learning_rate": 4.995425365260585e-06, "logits/chosen": -1.4800479412078857, "logits/rejected": -0.9261878728866577, "logps/chosen": -470.6463317871094, "logps/rejected": -783.7489624023438, "loss": 0.1492, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18247491121292114, "rewards/margins": 0.3636634945869446, "rewards/rejected": -0.546138346195221, "step": 880 }, { "epoch": 0.12, "learning_rate": 4.994694765050121e-06, "logits/chosen": -1.275754451751709, "logits/rejected": -0.8973702192306519, "logps/chosen": -363.8408508300781, "logps/rejected": -709.4940185546875, "loss": 0.1327, "rewards/accuracies": 0.875, "rewards/chosen": -0.1413012444972992, "rewards/margins": 0.36497193574905396, "rewards/rejected": -0.5062731504440308, "step": 890 }, { "epoch": 0.12, "learning_rate": 4.993910125649561e-06, "logits/chosen": -1.463733434677124, "logits/rejected": -1.1680874824523926, "logps/chosen": -325.5857849121094, "logps/rejected": -539.0804443359375, "loss": 0.2243, "rewards/accuracies": 0.625, "rewards/chosen": -0.13075824081897736, "rewards/margins": 0.24207767844200134, "rewards/rejected": -0.3728359341621399, "step": 900 }, { "epoch": 0.12, "learning_rate": 4.993071464055486e-06, "logits/chosen": -1.7845207452774048, "logits/rejected": -1.0263665914535522, "logps/chosen": -360.773193359375, "logps/rejected": -632.8513793945312, "loss": 0.2036, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.11185532808303833, "rewards/margins": 0.32573041319847107, "rewards/rejected": -0.4375857412815094, "step": 910 }, { "epoch": 0.12, "learning_rate": 4.992178798434684e-06, "logits/chosen": -1.693821668624878, "logits/rejected": -1.128700852394104, "logps/chosen": -323.9156494140625, "logps/rejected": -669.1837158203125, "loss": 0.155, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07970709353685379, "rewards/margins": 0.3461817800998688, "rewards/rejected": -0.42588886618614197, "step": 920 }, { "epoch": 0.12, "learning_rate": 4.9912321481237616e-06, "logits/chosen": -1.5555380582809448, "logits/rejected": -0.9651791453361511, "logps/chosen": -371.4592590332031, "logps/rejected": -629.7036743164062, "loss": 0.1393, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11751563847064972, "rewards/margins": 0.30828914046287537, "rewards/rejected": -0.4258047640323639, "step": 930 }, { "epoch": 0.13, "learning_rate": 4.990231533628719e-06, "logits/chosen": -1.428524136543274, "logits/rejected": -0.9469925165176392, "logps/chosen": -374.8126220703125, "logps/rejected": -788.1290283203125, "loss": 0.1347, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12096371501684189, "rewards/margins": 0.4053443372249603, "rewards/rejected": -0.5263080596923828, "step": 940 }, { "epoch": 0.13, "learning_rate": 4.989176976624511e-06, "logits/chosen": -1.436598539352417, "logits/rejected": -0.9486812353134155, "logps/chosen": -430.09332275390625, "logps/rejected": -847.3265380859375, "loss": 0.0798, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.12556599080562592, "rewards/margins": 0.45097970962524414, "rewards/rejected": -0.5765457153320312, "step": 950 }, { "epoch": 0.13, "learning_rate": 4.988068499954578e-06, "logits/chosen": -1.656904935836792, "logits/rejected": -0.7772586941719055, "logps/chosen": -555.0128784179688, "logps/rejected": -860.6650390625, "loss": 0.1112, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20253200829029083, "rewards/margins": 0.4072926938533783, "rewards/rejected": -0.6098247170448303, "step": 960 }, { "epoch": 0.13, "learning_rate": 4.986906127630346e-06, "logits/chosen": -1.301117181777954, "logits/rejected": -0.9449092149734497, "logps/chosen": -494.9105529785156, "logps/rejected": -889.8714599609375, "loss": 0.1113, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22420816123485565, "rewards/margins": 0.3903779685497284, "rewards/rejected": -0.6145861148834229, "step": 970 }, { "epoch": 0.13, "learning_rate": 4.985689884830711e-06, "logits/chosen": -1.3350300788879395, "logits/rejected": -0.9063273668289185, "logps/chosen": -417.68878173828125, "logps/rejected": -705.7192993164062, "loss": 0.1779, "rewards/accuracies": 0.75, "rewards/chosen": -0.22058455646038055, "rewards/margins": 0.28054580092430115, "rewards/rejected": -0.5011304020881653, "step": 980 }, { "epoch": 0.13, "learning_rate": 4.984419797901491e-06, "logits/chosen": -1.6169216632843018, "logits/rejected": -0.7380484938621521, "logps/chosen": -452.4661560058594, "logps/rejected": -739.5595703125, "loss": 0.1124, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1508241593837738, "rewards/margins": 0.3955535888671875, "rewards/rejected": -0.5463777780532837, "step": 990 }, { "epoch": 0.13, "learning_rate": 4.983095894354858e-06, "logits/chosen": -1.2634952068328857, "logits/rejected": -1.00887930393219, "logps/chosen": -380.5688781738281, "logps/rejected": -815.0224609375, "loss": 0.1, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.13798674941062927, "rewards/margins": 0.43448323011398315, "rewards/rejected": -0.5724700093269348, "step": 1000 }, { "epoch": 0.13, "learning_rate": 4.981718202868738e-06, "logits/chosen": -1.3051574230194092, "logits/rejected": -0.9665368795394897, "logps/chosen": -449.41912841796875, "logps/rejected": -758.8711547851562, "loss": 0.1449, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17802168428897858, "rewards/margins": 0.330491304397583, "rewards/rejected": -0.5085129737854004, "step": 1010 }, { "epoch": 0.14, "learning_rate": 4.980286753286196e-06, "logits/chosen": -1.7392864227294922, "logits/rejected": -0.7400856614112854, "logps/chosen": -542.4926147460938, "logps/rejected": -784.757568359375, "loss": 0.1234, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1896502673625946, "rewards/margins": 0.3813712000846863, "rewards/rejected": -0.5710214376449585, "step": 1020 }, { "epoch": 0.14, "learning_rate": 4.978801576614779e-06, "logits/chosen": -1.7218875885009766, "logits/rejected": -1.2126208543777466, "logps/chosen": -534.4451293945312, "logps/rejected": -855.2717895507812, "loss": 0.1815, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22779861092567444, "rewards/margins": 0.35777026414871216, "rewards/rejected": -0.5855687856674194, "step": 1030 }, { "epoch": 0.14, "learning_rate": 4.97726270502586e-06, "logits/chosen": -1.4519014358520508, "logits/rejected": -1.0071544647216797, "logps/chosen": -378.66204833984375, "logps/rejected": -685.9154663085938, "loss": 0.1499, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1705239862203598, "rewards/margins": 0.3545297682285309, "rewards/rejected": -0.5250537991523743, "step": 1040 }, { "epoch": 0.14, "learning_rate": 4.975670171853926e-06, "logits/chosen": -1.462651252746582, "logits/rejected": -1.1279051303863525, "logps/chosen": -422.732666015625, "logps/rejected": -821.091796875, "loss": 0.1274, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20711009204387665, "rewards/margins": 0.3589819073677063, "rewards/rejected": -0.5660920143127441, "step": 1050 }, { "epoch": 0.14, "learning_rate": 4.974024011595864e-06, "logits/chosen": -1.5038468837738037, "logits/rejected": -0.8036036491394043, "logps/chosen": -486.71124267578125, "logps/rejected": -806.163330078125, "loss": 0.1787, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2330636978149414, "rewards/margins": 0.34660226106643677, "rewards/rejected": -0.5796659588813782, "step": 1060 }, { "epoch": 0.14, "learning_rate": 4.97232425991021e-06, "logits/chosen": -1.6467853784561157, "logits/rejected": -0.9985973238945007, "logps/chosen": -512.62548828125, "logps/rejected": -888.6848754882812, "loss": 0.0914, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.21965539455413818, "rewards/margins": 0.3955782353878021, "rewards/rejected": -0.6152336597442627, "step": 1070 }, { "epoch": 0.14, "learning_rate": 4.970570953616383e-06, "logits/chosen": -1.4452916383743286, "logits/rejected": -0.927303671836853, "logps/chosen": -471.254638671875, "logps/rejected": -731.012939453125, "loss": 0.1654, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22571448981761932, "rewards/margins": 0.2892727255821228, "rewards/rejected": -0.5149871706962585, "step": 1080 }, { "epoch": 0.15, "learning_rate": 4.9687641306938766e-06, "logits/chosen": -1.5185739994049072, "logits/rejected": -1.169559359550476, "logps/chosen": -446.5347595214844, "logps/rejected": -802.6253051757812, "loss": 0.1374, "rewards/accuracies": 0.875, "rewards/chosen": -0.22027269005775452, "rewards/margins": 0.3299613296985626, "rewards/rejected": -0.5502340197563171, "step": 1090 }, { "epoch": 0.15, "learning_rate": 4.966903830281449e-06, "logits/chosen": -1.5317347049713135, "logits/rejected": -0.922308087348938, "logps/chosen": -376.71490478515625, "logps/rejected": -660.5279541015625, "loss": 0.1419, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14628192782402039, "rewards/margins": 0.3379586637020111, "rewards/rejected": -0.4842405915260315, "step": 1100 }, { "epoch": 0.15, "learning_rate": 4.964990092676263e-06, "logits/chosen": -1.753617525100708, "logits/rejected": -1.197157621383667, "logps/chosen": -461.98699951171875, "logps/rejected": -775.8490600585938, "loss": 0.1432, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17882071435451508, "rewards/margins": 0.3258904814720154, "rewards/rejected": -0.5047112703323364, "step": 1110 }, { "epoch": 0.15, "learning_rate": 4.9630229593330226e-06, "logits/chosen": -1.414660930633545, "logits/rejected": -1.0446290969848633, "logps/chosen": -500.90802001953125, "logps/rejected": -849.2281494140625, "loss": 0.1148, "rewards/accuracies": 0.875, "rewards/chosen": -0.23089566826820374, "rewards/margins": 0.3633985221385956, "rewards/rejected": -0.5942941904067993, "step": 1120 }, { "epoch": 0.15, "learning_rate": 4.96100247286307e-06, "logits/chosen": -1.594772219657898, "logits/rejected": -0.9971159100532532, "logps/chosen": -482.41131591796875, "logps/rejected": -744.8736572265625, "loss": 0.1841, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19598089158535004, "rewards/margins": 0.3241138756275177, "rewards/rejected": -0.5200947523117065, "step": 1130 }, { "epoch": 0.15, "learning_rate": 4.958928677033465e-06, "logits/chosen": -1.541329264640808, "logits/rejected": -1.0629053115844727, "logps/chosen": -504.20794677734375, "logps/rejected": -874.9547729492188, "loss": 0.1078, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2284460961818695, "rewards/margins": 0.3831626772880554, "rewards/rejected": -0.6116088032722473, "step": 1140 }, { "epoch": 0.15, "learning_rate": 4.956801616766033e-06, "logits/chosen": -1.495168924331665, "logits/rejected": -0.9355745315551758, "logps/chosen": -517.480224609375, "logps/rejected": -883.2626953125, "loss": 0.1319, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2589607536792755, "rewards/margins": 0.37442725896835327, "rewards/rejected": -0.6333879828453064, "step": 1150 }, { "epoch": 0.15, "learning_rate": 4.954621338136399e-06, "logits/chosen": -1.3089743852615356, "logits/rejected": -0.8916622996330261, "logps/chosen": -547.32666015625, "logps/rejected": -806.1666870117188, "loss": 0.167, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2952585518360138, "rewards/margins": 0.29442209005355835, "rewards/rejected": -0.5896806120872498, "step": 1160 }, { "epoch": 0.16, "learning_rate": 4.9523878883729794e-06, "logits/chosen": -1.5897184610366821, "logits/rejected": -1.0826232433319092, "logps/chosen": -606.2791748046875, "logps/rejected": -918.5662231445312, "loss": 0.1267, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.32889145612716675, "rewards/margins": 0.3194637894630432, "rewards/rejected": -0.64835524559021, "step": 1170 }, { "epoch": 0.16, "learning_rate": 4.95010131585597e-06, "logits/chosen": -1.5232877731323242, "logits/rejected": -0.9521480798721313, "logps/chosen": -433.63641357421875, "logps/rejected": -817.1611938476562, "loss": 0.1077, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2032690793275833, "rewards/margins": 0.39957109093666077, "rewards/rejected": -0.6028401851654053, "step": 1180 }, { "epoch": 0.16, "learning_rate": 4.94776167011629e-06, "logits/chosen": -1.4146318435668945, "logits/rejected": -1.2307662963867188, "logps/chosen": -374.317626953125, "logps/rejected": -775.4901123046875, "loss": 0.1527, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20076608657836914, "rewards/margins": 0.33227279782295227, "rewards/rejected": -0.5330389738082886, "step": 1190 }, { "epoch": 0.16, "learning_rate": 4.9453690018345144e-06, "logits/chosen": -1.8757169246673584, "logits/rejected": -1.1736624240875244, "logps/chosen": -530.3184814453125, "logps/rejected": -783.8491821289062, "loss": 0.1378, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17494268715381622, "rewards/margins": 0.35382869839668274, "rewards/rejected": -0.5287714004516602, "step": 1200 }, { "epoch": 0.16, "learning_rate": 4.94292336283977e-06, "logits/chosen": -1.5193207263946533, "logits/rejected": -0.9281194806098938, "logps/chosen": -563.0369873046875, "logps/rejected": -860.80419921875, "loss": 0.1374, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.25554361939430237, "rewards/margins": 0.3764990270137787, "rewards/rejected": -0.6320425868034363, "step": 1210 }, { "epoch": 0.16, "learning_rate": 4.940424806108619e-06, "logits/chosen": -1.1913942098617554, "logits/rejected": -1.1959201097488403, "logps/chosen": -644.5512084960938, "logps/rejected": -949.6307373046875, "loss": 0.1714, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4327170252799988, "rewards/margins": 0.29346293210983276, "rewards/rejected": -0.7261799573898315, "step": 1220 }, { "epoch": 0.16, "learning_rate": 4.937873385763909e-06, "logits/chosen": -1.3897337913513184, "logits/rejected": -0.5832411050796509, "logps/chosen": -696.0330200195312, "logps/rejected": -1003.8894653320312, "loss": 0.1041, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.33212608098983765, "rewards/margins": 0.4552842080593109, "rewards/rejected": -0.7874102592468262, "step": 1230 }, { "epoch": 0.17, "learning_rate": 4.935269157073597e-06, "logits/chosen": -1.3500540256500244, "logits/rejected": -0.8013589978218079, "logps/chosen": -672.7722778320312, "logps/rejected": -932.7517700195312, "loss": 0.1232, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4343793988227844, "rewards/margins": 0.2973952293395996, "rewards/rejected": -0.731774628162384, "step": 1240 }, { "epoch": 0.17, "learning_rate": 4.93261217644956e-06, "logits/chosen": -1.6776320934295654, "logits/rejected": -1.179521918296814, "logps/chosen": -562.4392700195312, "logps/rejected": -992.1153564453125, "loss": 0.1318, "rewards/accuracies": 0.875, "rewards/chosen": -0.29206374287605286, "rewards/margins": 0.37819522619247437, "rewards/rejected": -0.6702588796615601, "step": 1250 }, { "epoch": 0.17, "learning_rate": 4.9299025014463665e-06, "logits/chosen": -1.5510917901992798, "logits/rejected": -1.1057939529418945, "logps/chosen": -603.5531005859375, "logps/rejected": -955.1824951171875, "loss": 0.1113, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3010428547859192, "rewards/margins": 0.3675137460231781, "rewards/rejected": -0.6685565710067749, "step": 1260 }, { "epoch": 0.17, "learning_rate": 4.92714019076003e-06, "logits/chosen": -1.5722512006759644, "logits/rejected": -0.7237231135368347, "logps/chosen": -585.212890625, "logps/rejected": -873.8582153320312, "loss": 0.1326, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.30967992544174194, "rewards/margins": 0.38483959436416626, "rewards/rejected": -0.6945194602012634, "step": 1270 }, { "epoch": 0.17, "learning_rate": 4.924325304226745e-06, "logits/chosen": -1.391479730606079, "logits/rejected": -0.8646553754806519, "logps/chosen": -601.2059936523438, "logps/rejected": -808.3594970703125, "loss": 0.2136, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3519865870475769, "rewards/margins": 0.26446038484573364, "rewards/rejected": -0.6164470314979553, "step": 1280 }, { "epoch": 0.17, "learning_rate": 4.921457902821578e-06, "logits/chosen": -1.5498051643371582, "logits/rejected": -0.9521937370300293, "logps/chosen": -450.6568908691406, "logps/rejected": -783.283203125, "loss": 0.114, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1782601922750473, "rewards/margins": 0.3781951367855072, "rewards/rejected": -0.5564553737640381, "step": 1290 }, { "epoch": 0.17, "learning_rate": 4.91853804865716e-06, "logits/chosen": -1.7239599227905273, "logits/rejected": -0.9669686555862427, "logps/chosen": -506.9222106933594, "logps/rejected": -948.07421875, "loss": 0.0635, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.22643208503723145, "rewards/margins": 0.4678976535797119, "rewards/rejected": -0.6943297386169434, "step": 1300 }, { "epoch": 0.17, "learning_rate": 4.915565804982332e-06, "logits/chosen": -1.7978935241699219, "logits/rejected": -1.182894229888916, "logps/chosen": -494.53582763671875, "logps/rejected": -797.4810791015625, "loss": 0.197, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2301880419254303, "rewards/margins": 0.3509904742240906, "rewards/rejected": -0.5811785459518433, "step": 1310 }, { "epoch": 0.18, "learning_rate": 4.912541236180779e-06, "logits/chosen": -1.486143708229065, "logits/rejected": -1.1926538944244385, "logps/chosen": -456.87127685546875, "logps/rejected": -752.5570678710938, "loss": 0.1695, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24015769362449646, "rewards/margins": 0.29379063844680786, "rewards/rejected": -0.5339483618736267, "step": 1320 }, { "epoch": 0.18, "learning_rate": 4.909464407769633e-06, "logits/chosen": -1.9261629581451416, "logits/rejected": -1.1128313541412354, "logps/chosen": -486.73992919921875, "logps/rejected": -843.7420043945312, "loss": 0.0968, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22261467576026917, "rewards/margins": 0.41170936822891235, "rewards/rejected": -0.6343240737915039, "step": 1330 }, { "epoch": 0.18, "learning_rate": 4.9063353863980565e-06, "logits/chosen": -1.572691559791565, "logits/rejected": -1.1048139333724976, "logps/chosen": -556.69677734375, "logps/rejected": -925.6715698242188, "loss": 0.1952, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3395165205001831, "rewards/margins": 0.3469579815864563, "rewards/rejected": -0.6864745020866394, "step": 1340 }, { "epoch": 0.18, "learning_rate": 4.903154239845798e-06, "logits/chosen": -1.7576971054077148, "logits/rejected": -1.2093678712844849, "logps/chosen": -494.8274841308594, "logps/rejected": -986.6290283203125, "loss": 0.1029, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.23781287670135498, "rewards/margins": 0.46980786323547363, "rewards/rejected": -0.7076207399368286, "step": 1350 }, { "epoch": 0.18, "learning_rate": 4.899921037021719e-06, "logits/chosen": -1.593591332435608, "logits/rejected": -1.111011266708374, "logps/chosen": -531.3776245117188, "logps/rejected": -887.5836181640625, "loss": 0.1081, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.26948198676109314, "rewards/margins": 0.4102948307991028, "rewards/rejected": -0.6797767877578735, "step": 1360 }, { "epoch": 0.18, "learning_rate": 4.896635847962311e-06, "logits/chosen": -1.5916521549224854, "logits/rejected": -1.0768035650253296, "logps/chosen": -480.07305908203125, "logps/rejected": -845.6593627929688, "loss": 0.0772, "rewards/accuracies": 0.875, "rewards/chosen": -0.2070520669221878, "rewards/margins": 0.4083788990974426, "rewards/rejected": -0.6154308915138245, "step": 1370 }, { "epoch": 0.18, "learning_rate": 4.893298743830168e-06, "logits/chosen": -1.479468822479248, "logits/rejected": -1.1445562839508057, "logps/chosen": -425.95770263671875, "logps/rejected": -745.1259765625, "loss": 0.168, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21790972352027893, "rewards/margins": 0.3140547573566437, "rewards/rejected": -0.5319644808769226, "step": 1380 }, { "epoch": 0.19, "learning_rate": 4.889909796912454e-06, "logits/chosen": -1.646630883216858, "logits/rejected": -0.9626755714416504, "logps/chosen": -495.00518798828125, "logps/rejected": -861.8333740234375, "loss": 0.1344, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24911876022815704, "rewards/margins": 0.4423271715641022, "rewards/rejected": -0.6914458870887756, "step": 1390 }, { "epoch": 0.19, "learning_rate": 4.88646908061933e-06, "logits/chosen": -1.7141090631484985, "logits/rejected": -1.0960562229156494, "logps/chosen": -524.4849853515625, "logps/rejected": -900.6906127929688, "loss": 0.158, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23685124516487122, "rewards/margins": 0.42406564950942993, "rewards/rejected": -0.660916805267334, "step": 1400 }, { "epoch": 0.19, "learning_rate": 4.882976669482368e-06, "logits/chosen": -1.5529906749725342, "logits/rejected": -1.1421276330947876, "logps/chosen": -408.2734375, "logps/rejected": -759.225341796875, "loss": 0.1603, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1565893143415451, "rewards/margins": 0.382378488779068, "rewards/rejected": -0.5389677882194519, "step": 1410 }, { "epoch": 0.19, "learning_rate": 4.879432639152935e-06, "logits/chosen": -1.637101173400879, "logits/rejected": -1.1991854906082153, "logps/chosen": -459.44793701171875, "logps/rejected": -865.1060791015625, "loss": 0.1332, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20239762961864471, "rewards/margins": 0.38929039239883423, "rewards/rejected": -0.5916879177093506, "step": 1420 }, { "epoch": 0.19, "learning_rate": 4.875837066400553e-06, "logits/chosen": -1.6581170558929443, "logits/rejected": -1.0959937572479248, "logps/chosen": -405.64141845703125, "logps/rejected": -753.9737548828125, "loss": 0.1626, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16078518331050873, "rewards/margins": 0.3797248899936676, "rewards/rejected": -0.5405100584030151, "step": 1430 }, { "epoch": 0.19, "learning_rate": 4.8721900291112415e-06, "logits/chosen": -1.581157922744751, "logits/rejected": -1.0542665719985962, "logps/chosen": -461.44122314453125, "logps/rejected": -759.1900634765625, "loss": 0.1771, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21266725659370422, "rewards/margins": 0.32838940620422363, "rewards/rejected": -0.5410566329956055, "step": 1440 }, { "epoch": 0.19, "learning_rate": 4.868491606285823e-06, "logits/chosen": -1.7201025485992432, "logits/rejected": -1.107848048210144, "logps/chosen": -427.12225341796875, "logps/rejected": -793.6038208007812, "loss": 0.1358, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.144453227519989, "rewards/margins": 0.3804222643375397, "rewards/rejected": -0.5248754620552063, "step": 1450 }, { "epoch": 0.19, "learning_rate": 4.864741878038218e-06, "logits/chosen": -1.6883983612060547, "logits/rejected": -1.1231297254562378, "logps/chosen": -460.866943359375, "logps/rejected": -855.5372924804688, "loss": 0.1019, "rewards/accuracies": 0.875, "rewards/chosen": -0.16945675015449524, "rewards/margins": 0.4335222840309143, "rewards/rejected": -0.6029790639877319, "step": 1460 }, { "epoch": 0.2, "learning_rate": 4.860940925593703e-06, "logits/chosen": -1.944753885269165, "logits/rejected": -1.1425764560699463, "logps/chosen": -432.50830078125, "logps/rejected": -708.9544067382812, "loss": 0.1625, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14104752242565155, "rewards/margins": 0.3777088224887848, "rewards/rejected": -0.5187563896179199, "step": 1470 }, { "epoch": 0.2, "learning_rate": 4.857088831287158e-06, "logits/chosen": -1.645452857017517, "logits/rejected": -1.108695149421692, "logps/chosen": -394.0192565917969, "logps/rejected": -793.5757446289062, "loss": 0.1427, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1653439849615097, "rewards/margins": 0.4196528494358063, "rewards/rejected": -0.5849968194961548, "step": 1480 }, { "epoch": 0.2, "learning_rate": 4.85318567856128e-06, "logits/chosen": -1.715916633605957, "logits/rejected": -1.1867271661758423, "logps/chosen": -391.3207702636719, "logps/rejected": -817.5634155273438, "loss": 0.1369, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13050571084022522, "rewards/margins": 0.3892734944820404, "rewards/rejected": -0.5197792649269104, "step": 1490 }, { "epoch": 0.2, "learning_rate": 4.849231551964771e-06, "logits/chosen": -1.7923057079315186, "logits/rejected": -1.1524606943130493, "logps/chosen": -476.6368103027344, "logps/rejected": -782.6160278320312, "loss": 0.1402, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21068593859672546, "rewards/margins": 0.3372471332550049, "rewards/rejected": -0.5479329824447632, "step": 1500 }, { "epoch": 0.2, "learning_rate": 4.8452265371505176e-06, "logits/chosen": -1.6856279373168945, "logits/rejected": -1.022018313407898, "logps/chosen": -529.304443359375, "logps/rejected": -827.2320556640625, "loss": 0.1235, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21702179312705994, "rewards/margins": 0.35849815607070923, "rewards/rejected": -0.5755199193954468, "step": 1510 }, { "epoch": 0.2, "learning_rate": 4.841170720873723e-06, "logits/chosen": -1.4261338710784912, "logits/rejected": -1.0991995334625244, "logps/chosen": -479.01416015625, "logps/rejected": -837.9700317382812, "loss": 0.1442, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2580581605434418, "rewards/margins": 0.31764236092567444, "rewards/rejected": -0.575700581073761, "step": 1520 }, { "epoch": 0.2, "learning_rate": 4.837064190990036e-06, "logits/chosen": -1.6842906475067139, "logits/rejected": -1.2078498601913452, "logps/chosen": -509.49359130859375, "logps/rejected": -809.8397216796875, "loss": 0.1369, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.27093303203582764, "rewards/margins": 0.3115740120410919, "rewards/rejected": -0.5825070142745972, "step": 1530 }, { "epoch": 0.21, "learning_rate": 4.832907036453647e-06, "logits/chosen": -1.6339679956436157, "logits/rejected": -1.1635912656784058, "logps/chosen": -478.7958984375, "logps/rejected": -798.2124633789062, "loss": 0.1894, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22973430156707764, "rewards/margins": 0.3353918194770813, "rewards/rejected": -0.5651262402534485, "step": 1540 }, { "epoch": 0.21, "learning_rate": 4.828699347315357e-06, "logits/chosen": -1.6856197118759155, "logits/rejected": -1.1501435041427612, "logps/chosen": -434.57763671875, "logps/rejected": -643.5542602539062, "loss": 0.1656, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16013285517692566, "rewards/margins": 0.29741623997688293, "rewards/rejected": -0.4575490951538086, "step": 1550 }, { "epoch": 0.21, "learning_rate": 4.824441214720629e-06, "logits/chosen": -1.6989209651947021, "logits/rejected": -1.1924196481704712, "logps/chosen": -405.2452697753906, "logps/rejected": -653.5948486328125, "loss": 0.1908, "rewards/accuracies": 0.75, "rewards/chosen": -0.18102996051311493, "rewards/margins": 0.27397722005844116, "rewards/rejected": -0.4550072252750397, "step": 1560 }, { "epoch": 0.21, "learning_rate": 4.8201327309076176e-06, "logits/chosen": -1.582778811454773, "logits/rejected": -1.004863977432251, "logps/chosen": -476.71990966796875, "logps/rejected": -729.3921508789062, "loss": 0.1627, "rewards/accuracies": 0.75, "rewards/chosen": -0.20964249968528748, "rewards/margins": 0.2946922183036804, "rewards/rejected": -0.5043348073959351, "step": 1570 }, { "epoch": 0.21, "learning_rate": 4.815773989205165e-06, "logits/chosen": -1.5439220666885376, "logits/rejected": -1.1240843534469604, "logps/chosen": -545.4619140625, "logps/rejected": -922.7950439453125, "loss": 0.1253, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.29893285036087036, "rewards/margins": 0.38949456810951233, "rewards/rejected": -0.6884275078773499, "step": 1580 }, { "epoch": 0.21, "learning_rate": 4.811365084030784e-06, "logits/chosen": -1.7266323566436768, "logits/rejected": -0.853527843952179, "logps/chosen": -715.392578125, "logps/rejected": -1047.7681884765625, "loss": 0.109, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3249026834964752, "rewards/margins": 0.3879953920841217, "rewards/rejected": -0.7128980159759521, "step": 1590 }, { "epoch": 0.21, "learning_rate": 4.806906110888606e-06, "logits/chosen": -1.355763554573059, "logits/rejected": -0.7085272669792175, "logps/chosen": -526.99658203125, "logps/rejected": -888.4259643554688, "loss": 0.1378, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2837275564670563, "rewards/margins": 0.399003803730011, "rewards/rejected": -0.6827312707901001, "step": 1600 }, { "epoch": 0.21, "learning_rate": 4.8023971663673235e-06, "logits/chosen": -1.3139488697052002, "logits/rejected": -0.9653435945510864, "logps/chosen": -558.3027954101562, "logps/rejected": -852.5972900390625, "loss": 0.1833, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3123556673526764, "rewards/margins": 0.30963483452796936, "rewards/rejected": -0.6219905018806458, "step": 1610 }, { "epoch": 0.22, "learning_rate": 4.7978383481380865e-06, "logits/chosen": -1.6188299655914307, "logits/rejected": -0.9288069605827332, "logps/chosen": -449.55841064453125, "logps/rejected": -813.2579345703125, "loss": 0.1403, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20635005831718445, "rewards/margins": 0.38359805941581726, "rewards/rejected": -0.5899481177330017, "step": 1620 }, { "epoch": 0.22, "learning_rate": 4.793229754952393e-06, "logits/chosen": -1.500043511390686, "logits/rejected": -0.805435299873352, "logps/chosen": -551.26171875, "logps/rejected": -906.537109375, "loss": 0.0924, "rewards/accuracies": 0.875, "rewards/chosen": -0.2886804938316345, "rewards/margins": 0.3959423899650574, "rewards/rejected": -0.6846228837966919, "step": 1630 }, { "epoch": 0.22, "learning_rate": 4.788571486639948e-06, "logits/chosen": -1.3259161710739136, "logits/rejected": -0.6448807716369629, "logps/chosen": -577.3439331054688, "logps/rejected": -947.9227294921875, "loss": 0.113, "rewards/accuracies": 0.875, "rewards/chosen": -0.32035812735557556, "rewards/margins": 0.4091481566429138, "rewards/rejected": -0.7295061945915222, "step": 1640 }, { "epoch": 0.22, "learning_rate": 4.783863644106502e-06, "logits/chosen": -1.44963538646698, "logits/rejected": -0.7897195219993591, "logps/chosen": -593.3853759765625, "logps/rejected": -874.451171875, "loss": 0.172, "rewards/accuracies": 0.875, "rewards/chosen": -0.32261866331100464, "rewards/margins": 0.33006590604782104, "rewards/rejected": -0.6526845693588257, "step": 1650 }, { "epoch": 0.22, "learning_rate": 4.779106329331665e-06, "logits/chosen": -1.610004186630249, "logits/rejected": -0.855453610420227, "logps/chosen": -560.9100952148438, "logps/rejected": -858.4417114257812, "loss": 0.1716, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2593235373497009, "rewards/margins": 0.3451550602912903, "rewards/rejected": -0.6044785380363464, "step": 1660 }, { "epoch": 0.22, "learning_rate": 4.774299645366696e-06, "logits/chosen": -1.5046002864837646, "logits/rejected": -0.9667149782180786, "logps/chosen": -527.2569580078125, "logps/rejected": -902.4674072265625, "loss": 0.1106, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.27475467324256897, "rewards/margins": 0.3705506920814514, "rewards/rejected": -0.645305335521698, "step": 1670 }, { "epoch": 0.22, "learning_rate": 4.769443696332272e-06, "logits/chosen": -1.348170518875122, "logits/rejected": -0.6280630230903625, "logps/chosen": -596.0364990234375, "logps/rejected": -971.1904296875, "loss": 0.0834, "rewards/accuracies": 0.875, "rewards/chosen": -0.2669234573841095, "rewards/margins": 0.4294804036617279, "rewards/rejected": -0.6964038610458374, "step": 1680 }, { "epoch": 0.23, "learning_rate": 4.764538587416233e-06, "logits/chosen": -1.2824132442474365, "logits/rejected": -0.9618009328842163, "logps/chosen": -526.2830810546875, "logps/rejected": -815.99951171875, "loss": 0.1573, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.30147814750671387, "rewards/margins": 0.2752738893032074, "rewards/rejected": -0.5767520666122437, "step": 1690 }, { "epoch": 0.23, "learning_rate": 4.759584424871302e-06, "logits/chosen": -1.0407376289367676, "logits/rejected": -0.5895905494689941, "logps/chosen": -549.6045532226562, "logps/rejected": -836.8787231445312, "loss": 0.1874, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3209471106529236, "rewards/margins": 0.32041606307029724, "rewards/rejected": -0.6413631439208984, "step": 1700 }, { "epoch": 0.23, "learning_rate": 4.754581316012785e-06, "logits/chosen": -1.4987887144088745, "logits/rejected": -0.8465301394462585, "logps/chosen": -541.6981201171875, "logps/rejected": -859.7433471679688, "loss": 0.1707, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2668864130973816, "rewards/margins": 0.3836662769317627, "rewards/rejected": -0.6505526900291443, "step": 1710 }, { "epoch": 0.23, "learning_rate": 4.749529369216246e-06, "logits/chosen": -1.3862401247024536, "logits/rejected": -0.8952510952949524, "logps/chosen": -439.4287109375, "logps/rejected": -859.2282104492188, "loss": 0.0808, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18136066198349, "rewards/margins": 0.40953293442726135, "rewards/rejected": -0.5908936262130737, "step": 1720 }, { "epoch": 0.23, "learning_rate": 4.744428693915158e-06, "logits/chosen": -1.365352988243103, "logits/rejected": -0.8398834466934204, "logps/chosen": -441.2855529785156, "logps/rejected": -844.7384643554688, "loss": 0.1138, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21025002002716064, "rewards/margins": 0.3863261342048645, "rewards/rejected": -0.5965762138366699, "step": 1730 }, { "epoch": 0.23, "learning_rate": 4.7392794005985324e-06, "logits/chosen": -1.448047399520874, "logits/rejected": -0.8834174275398254, "logps/chosen": -370.3255615234375, "logps/rejected": -726.6725463867188, "loss": 0.1543, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16698488593101501, "rewards/margins": 0.37776023149490356, "rewards/rejected": -0.5447450876235962, "step": 1740 }, { "epoch": 0.23, "learning_rate": 4.734081600808531e-06, "logits/chosen": -1.7213389873504639, "logits/rejected": -0.924578845500946, "logps/chosen": -447.802490234375, "logps/rejected": -833.771484375, "loss": 0.1591, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15919806063175201, "rewards/margins": 0.4283002018928528, "rewards/rejected": -0.5874982476234436, "step": 1750 }, { "epoch": 0.23, "learning_rate": 4.7288354071380415e-06, "logits/chosen": -1.3697175979614258, "logits/rejected": -0.7719752788543701, "logps/chosen": -551.510009765625, "logps/rejected": -905.880859375, "loss": 0.1401, "rewards/accuracies": 0.875, "rewards/chosen": -0.2881290018558502, "rewards/margins": 0.4137204587459564, "rewards/rejected": -0.7018495202064514, "step": 1760 }, { "epoch": 0.24, "learning_rate": 4.723540933228245e-06, "logits/chosen": -1.226289987564087, "logits/rejected": -0.81830233335495, "logps/chosen": -660.1695556640625, "logps/rejected": -888.3117065429688, "loss": 0.1612, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.38691434264183044, "rewards/margins": 0.27954649925231934, "rewards/rejected": -0.6664608120918274, "step": 1770 }, { "epoch": 0.24, "learning_rate": 4.7181982937661485e-06, "logits/chosen": -1.4922593832015991, "logits/rejected": -0.7460156679153442, "logps/chosen": -632.2621459960938, "logps/rejected": -992.2732543945312, "loss": 0.0865, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.31351277232170105, "rewards/margins": 0.4019736647605896, "rewards/rejected": -0.7154864072799683, "step": 1780 }, { "epoch": 0.24, "learning_rate": 4.712807604482108e-06, "logits/chosen": -1.1576956510543823, "logits/rejected": -0.8577936291694641, "logps/chosen": -587.1032104492188, "logps/rejected": -970.9803466796875, "loss": 0.1084, "rewards/accuracies": 0.875, "rewards/chosen": -0.3230358064174652, "rewards/margins": 0.38744255900382996, "rewards/rejected": -0.7104784250259399, "step": 1790 }, { "epoch": 0.24, "learning_rate": 4.707368982147318e-06, "logits/chosen": -1.1999406814575195, "logits/rejected": -0.9700831174850464, "logps/chosen": -542.734130859375, "logps/rejected": -997.7930908203125, "loss": 0.1223, "rewards/accuracies": 0.875, "rewards/chosen": -0.28139543533325195, "rewards/margins": 0.39619675278663635, "rewards/rejected": -0.6775921583175659, "step": 1800 }, { "epoch": 0.24, "learning_rate": 4.701882544571277e-06, "logits/chosen": -1.3693358898162842, "logits/rejected": -0.8610717058181763, "logps/chosen": -425.99969482421875, "logps/rejected": -809.958740234375, "loss": 0.1192, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2034183293581009, "rewards/margins": 0.40131768584251404, "rewards/rejected": -0.6047360301017761, "step": 1810 }, { "epoch": 0.24, "learning_rate": 4.696348410599244e-06, "logits/chosen": -1.654017686843872, "logits/rejected": -0.8782070279121399, "logps/chosen": -553.987060546875, "logps/rejected": -852.6536254882812, "loss": 0.1298, "rewards/accuracies": 0.875, "rewards/chosen": -0.18976715207099915, "rewards/margins": 0.4038692116737366, "rewards/rejected": -0.5936363935470581, "step": 1820 }, { "epoch": 0.24, "learning_rate": 4.690766700109659e-06, "logits/chosen": -1.4825284481048584, "logits/rejected": -0.9183340072631836, "logps/chosen": -488.19488525390625, "logps/rejected": -913.3912963867188, "loss": 0.1293, "rewards/accuracies": 0.875, "rewards/chosen": -0.2699379622936249, "rewards/margins": 0.39716458320617676, "rewards/rejected": -0.6671024560928345, "step": 1830 }, { "epoch": 0.25, "learning_rate": 4.685137534011549e-06, "logits/chosen": -1.2411205768585205, "logits/rejected": -0.8965455889701843, "logps/chosen": -512.8843994140625, "logps/rejected": -809.3594970703125, "loss": 0.1386, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24077479541301727, "rewards/margins": 0.34711533784866333, "rewards/rejected": -0.5878901481628418, "step": 1840 }, { "epoch": 0.25, "learning_rate": 4.679461034241906e-06, "logits/chosen": -1.602836012840271, "logits/rejected": -0.8313294649124146, "logps/chosen": -574.6096801757812, "logps/rejected": -949.3818359375, "loss": 0.1248, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.22495639324188232, "rewards/margins": 0.43183308839797974, "rewards/rejected": -0.6567894220352173, "step": 1850 }, { "epoch": 0.25, "learning_rate": 4.673737323763048e-06, "logits/chosen": -1.4966367483139038, "logits/rejected": -1.098854422569275, "logps/chosen": -454.702880859375, "logps/rejected": -771.0753784179688, "loss": 0.1334, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15163853764533997, "rewards/margins": 0.3771851360797882, "rewards/rejected": -0.5288236737251282, "step": 1860 }, { "epoch": 0.25, "learning_rate": 4.667966526559953e-06, "logits/chosen": -1.396028995513916, "logits/rejected": -0.7422189712524414, "logps/chosen": -537.6190185546875, "logps/rejected": -792.0523071289062, "loss": 0.1821, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18353763222694397, "rewards/margins": 0.3589433431625366, "rewards/rejected": -0.5424809455871582, "step": 1870 }, { "epoch": 0.25, "learning_rate": 4.662148767637578e-06, "logits/chosen": -1.3878698348999023, "logits/rejected": -0.8569218516349792, "logps/chosen": -450.87225341796875, "logps/rejected": -836.5355224609375, "loss": 0.1115, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17101620137691498, "rewards/margins": 0.4047500491142273, "rewards/rejected": -0.5757663249969482, "step": 1880 }, { "epoch": 0.25, "learning_rate": 4.656284173018144e-06, "logits/chosen": -1.3037761449813843, "logits/rejected": -1.0120489597320557, "logps/chosen": -332.4677429199219, "logps/rejected": -664.6961669921875, "loss": 0.2034, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1323479413986206, "rewards/margins": 0.31846508383750916, "rewards/rejected": -0.45081305503845215, "step": 1890 }, { "epoch": 0.25, "learning_rate": 4.650372869738415e-06, "logits/chosen": -1.6151546239852905, "logits/rejected": -0.8685706257820129, "logps/chosen": -459.19580078125, "logps/rejected": -817.8629760742188, "loss": 0.1344, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14569053053855896, "rewards/margins": 0.4179355204105377, "rewards/rejected": -0.5636261105537415, "step": 1900 }, { "epoch": 0.25, "learning_rate": 4.644414985846934e-06, "logits/chosen": -1.3685284852981567, "logits/rejected": -0.6978548765182495, "logps/chosen": -474.23370361328125, "logps/rejected": -859.4371948242188, "loss": 0.1026, "rewards/accuracies": 0.875, "rewards/chosen": -0.19393648207187653, "rewards/margins": 0.4319811761379242, "rewards/rejected": -0.6259176135063171, "step": 1910 }, { "epoch": 0.26, "learning_rate": 4.638410650401267e-06, "logits/chosen": -1.1842671632766724, "logits/rejected": -0.7414307594299316, "logps/chosen": -456.84503173828125, "logps/rejected": -706.1348266601562, "loss": 0.2247, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.24000349640846252, "rewards/margins": 0.2802012264728546, "rewards/rejected": -0.5202046632766724, "step": 1920 }, { "epoch": 0.26, "learning_rate": 4.632359993465188e-06, "logits/chosen": -1.2892394065856934, "logits/rejected": -0.9212859869003296, "logps/chosen": -491.70135498046875, "logps/rejected": -833.9127197265625, "loss": 0.1346, "rewards/accuracies": 0.875, "rewards/chosen": -0.2496131956577301, "rewards/margins": 0.34575051069259644, "rewards/rejected": -0.5953637361526489, "step": 1930 }, { "epoch": 0.26, "learning_rate": 4.626263146105875e-06, "logits/chosen": -1.3242267370224, "logits/rejected": -0.7134020328521729, "logps/chosen": -457.72332763671875, "logps/rejected": -758.073486328125, "loss": 0.1219, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1769895851612091, "rewards/margins": 0.3537253141403198, "rewards/rejected": -0.5307148694992065, "step": 1940 }, { "epoch": 0.26, "learning_rate": 4.620120240391065e-06, "logits/chosen": -1.515119194984436, "logits/rejected": -0.9801965951919556, "logps/chosen": -454.9673767089844, "logps/rejected": -697.6735229492188, "loss": 0.1353, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1867002248764038, "rewards/margins": 0.3209150433540344, "rewards/rejected": -0.5076152682304382, "step": 1950 }, { "epoch": 0.26, "learning_rate": 4.613931409386196e-06, "logits/chosen": -1.4317365884780884, "logits/rejected": -1.0157676935195923, "logps/chosen": -565.1531982421875, "logps/rejected": -884.1183471679688, "loss": 0.1517, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.26452139019966125, "rewards/margins": 0.3587113320827484, "rewards/rejected": -0.6232327222824097, "step": 1960 }, { "epoch": 0.26, "learning_rate": 4.607696787151522e-06, "logits/chosen": -1.3605797290802002, "logits/rejected": -0.8848182559013367, "logps/chosen": -434.24066162109375, "logps/rejected": -841.8572387695312, "loss": 0.107, "rewards/accuracies": 0.875, "rewards/chosen": -0.2228735238313675, "rewards/margins": 0.3770410418510437, "rewards/rejected": -0.5999146103858948, "step": 1970 }, { "epoch": 0.26, "learning_rate": 4.601416508739211e-06, "logits/chosen": -1.5537292957305908, "logits/rejected": -0.7724756002426147, "logps/chosen": -470.87945556640625, "logps/rejected": -794.3897705078125, "loss": 0.1274, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18849892914295197, "rewards/margins": 0.3892095685005188, "rewards/rejected": -0.5777084827423096, "step": 1980 }, { "epoch": 0.27, "learning_rate": 4.595090710190419e-06, "logits/chosen": -1.3826183080673218, "logits/rejected": -0.9825338125228882, "logps/chosen": -487.48529052734375, "logps/rejected": -996.5350341796875, "loss": 0.1142, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.23563413321971893, "rewards/margins": 0.4019049108028412, "rewards/rejected": -0.6375390291213989, "step": 1990 }, { "epoch": 0.27, "learning_rate": 4.588719528532342e-06, "logits/chosen": -1.4032509326934814, "logits/rejected": -0.8891399502754211, "logps/chosen": -607.7633056640625, "logps/rejected": -910.2550048828125, "loss": 0.1593, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2815513014793396, "rewards/margins": 0.3732183575630188, "rewards/rejected": -0.6547696590423584, "step": 2000 }, { "epoch": 0.27, "learning_rate": 4.582303101775249e-06, "logits/chosen": -1.3333938121795654, "logits/rejected": -0.7552350759506226, "logps/chosen": -540.1254272460938, "logps/rejected": -880.9000854492188, "loss": 0.1156, "rewards/accuracies": 0.875, "rewards/chosen": -0.27906858921051025, "rewards/margins": 0.4015675485134125, "rewards/rejected": -0.6806361675262451, "step": 2010 }, { "epoch": 0.27, "learning_rate": 4.575841568909494e-06, "logits/chosen": -1.3045923709869385, "logits/rejected": -0.6709948778152466, "logps/chosen": -446.7435607910156, "logps/rejected": -876.2479248046875, "loss": 0.1014, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.16647282242774963, "rewards/margins": 0.4480370879173279, "rewards/rejected": -0.6145098805427551, "step": 2020 }, { "epoch": 0.27, "learning_rate": 4.569335069902502e-06, "logits/chosen": -1.4322322607040405, "logits/rejected": -0.5658160448074341, "logps/chosen": -449.83154296875, "logps/rejected": -833.4611206054688, "loss": 0.1121, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17377746105194092, "rewards/margins": 0.4472419321537018, "rewards/rejected": -0.6210194230079651, "step": 2030 }, { "epoch": 0.27, "learning_rate": 4.562783745695738e-06, "logits/chosen": -1.2313501834869385, "logits/rejected": -0.840812087059021, "logps/chosen": -346.85137939453125, "logps/rejected": -779.2326049804688, "loss": 0.1071, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16269828379154205, "rewards/margins": 0.4053496718406677, "rewards/rejected": -0.5680479407310486, "step": 2040 }, { "epoch": 0.27, "learning_rate": 4.556187738201656e-06, "logits/chosen": -1.3862967491149902, "logits/rejected": -0.760252833366394, "logps/chosen": -532.8507690429688, "logps/rejected": -826.9495239257812, "loss": 0.1067, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2203497439622879, "rewards/margins": 0.38376516103744507, "rewards/rejected": -0.604114830493927, "step": 2050 }, { "epoch": 0.27, "learning_rate": 4.549547190300622e-06, "logits/chosen": -1.2185680866241455, "logits/rejected": -0.8623320460319519, "logps/chosen": -493.34564208984375, "logps/rejected": -827.0408935546875, "loss": 0.1204, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2509461045265198, "rewards/margins": 0.3551548421382904, "rewards/rejected": -0.6061009168624878, "step": 2060 }, { "epoch": 0.28, "learning_rate": 4.542862245837821e-06, "logits/chosen": -1.5642473697662354, "logits/rejected": -0.926191508769989, "logps/chosen": -411.05242919921875, "logps/rejected": -723.1922607421875, "loss": 0.1455, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18251535296440125, "rewards/margins": 0.3536899983882904, "rewards/rejected": -0.5362053513526917, "step": 2070 }, { "epoch": 0.28, "learning_rate": 4.536133049620143e-06, "logits/chosen": -1.5234981775283813, "logits/rejected": -0.7424275279045105, "logps/chosen": -530.3670043945312, "logps/rejected": -836.0511474609375, "loss": 0.1389, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.16530658304691315, "rewards/margins": 0.42894306778907776, "rewards/rejected": -0.5942496061325073, "step": 2080 }, { "epoch": 0.28, "learning_rate": 4.529359747413038e-06, "logits/chosen": -1.338952660560608, "logits/rejected": -0.7675120234489441, "logps/chosen": -538.5235595703125, "logps/rejected": -848.5220947265625, "loss": 0.1503, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2846592962741852, "rewards/margins": 0.3030379116535187, "rewards/rejected": -0.5876971483230591, "step": 2090 }, { "epoch": 0.28, "learning_rate": 4.522542485937369e-06, "logits/chosen": -1.3365002870559692, "logits/rejected": -0.7856238484382629, "logps/chosen": -400.2157287597656, "logps/rejected": -783.48681640625, "loss": 0.0966, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16450873017311096, "rewards/margins": 0.3819323182106018, "rewards/rejected": -0.5464409589767456, "step": 2100 }, { "epoch": 0.28, "learning_rate": 4.515681412866228e-06, "logits/chosen": -1.3786782026290894, "logits/rejected": -0.7613228559494019, "logps/chosen": -449.18212890625, "logps/rejected": -853.5227661132812, "loss": 0.1186, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19289013743400574, "rewards/margins": 0.3786999583244324, "rewards/rejected": -0.5715900659561157, "step": 2110 }, { "epoch": 0.28, "learning_rate": 4.508776676821739e-06, "logits/chosen": -1.6430130004882812, "logits/rejected": -0.9073891639709473, "logps/chosen": -417.95391845703125, "logps/rejected": -733.4771728515625, "loss": 0.1272, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.10977999866008759, "rewards/margins": 0.3991335332393646, "rewards/rejected": -0.5089135766029358, "step": 2120 }, { "epoch": 0.28, "learning_rate": 4.501828427371834e-06, "logits/chosen": -1.4118962287902832, "logits/rejected": -0.835736095905304, "logps/chosen": -472.13134765625, "logps/rejected": -701.7862548828125, "loss": 0.1591, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17745926976203918, "rewards/margins": 0.3164125084877014, "rewards/rejected": -0.4938717782497406, "step": 2130 }, { "epoch": 0.29, "learning_rate": 4.494836815027022e-06, "logits/chosen": -1.6626451015472412, "logits/rejected": -0.9878584742546082, "logps/chosen": -423.9879455566406, "logps/rejected": -770.7366333007812, "loss": 0.1042, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.13481102883815765, "rewards/margins": 0.4097488522529602, "rewards/rejected": -0.5445598363876343, "step": 2140 }, { "epoch": 0.29, "learning_rate": 4.48780199123712e-06, "logits/chosen": -1.0662163496017456, "logits/rejected": -0.7782390713691711, "logps/chosen": -492.75469970703125, "logps/rejected": -793.8905639648438, "loss": 0.1767, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.25908443331718445, "rewards/margins": 0.32769250869750977, "rewards/rejected": -0.5867769122123718, "step": 2150 }, { "epoch": 0.29, "learning_rate": 4.4807241083879774e-06, "logits/chosen": -1.431119680404663, "logits/rejected": -1.0311700105667114, "logps/chosen": -495.00115966796875, "logps/rejected": -914.7777099609375, "loss": 0.1157, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.22871288657188416, "rewards/margins": 0.43868550658226013, "rewards/rejected": -0.6673983931541443, "step": 2160 }, { "epoch": 0.29, "learning_rate": 4.473603319798173e-06, "logits/chosen": -1.7834135293960571, "logits/rejected": -1.212897539138794, "logps/chosen": -476.74267578125, "logps/rejected": -959.1222534179688, "loss": 0.1058, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19958916306495667, "rewards/margins": 0.4401358962059021, "rewards/rejected": -0.6397250294685364, "step": 2170 }, { "epoch": 0.29, "learning_rate": 4.466439779715696e-06, "logits/chosen": -1.570049524307251, "logits/rejected": -1.1138522624969482, "logps/chosen": -457.2566833496094, "logps/rejected": -823.2316284179688, "loss": 0.1634, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2262900322675705, "rewards/margins": 0.37429413199424744, "rewards/rejected": -0.6005841493606567, "step": 2180 }, { "epoch": 0.29, "learning_rate": 4.4592336433146e-06, "logits/chosen": -1.434529423713684, "logits/rejected": -0.9157952070236206, "logps/chosen": -503.70184326171875, "logps/rejected": -816.2783203125, "loss": 0.1488, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17298392951488495, "rewards/margins": 0.39118996262550354, "rewards/rejected": -0.5641738772392273, "step": 2190 }, { "epoch": 0.29, "learning_rate": 4.451985066691649e-06, "logits/chosen": -1.9089066982269287, "logits/rejected": -1.133928656578064, "logps/chosen": -472.3794860839844, "logps/rejected": -772.7537841796875, "loss": 0.1542, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15726697444915771, "rewards/margins": 0.359056293964386, "rewards/rejected": -0.5163232088088989, "step": 2200 }, { "epoch": 0.29, "learning_rate": 4.444694206862929e-06, "logits/chosen": -1.5789159536361694, "logits/rejected": -1.0540728569030762, "logps/chosen": -390.03387451171875, "logps/rejected": -772.8970947265625, "loss": 0.1576, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14799469709396362, "rewards/margins": 0.37714943289756775, "rewards/rejected": -0.5251442193984985, "step": 2210 }, { "epoch": 0.3, "learning_rate": 4.437361221760449e-06, "logits/chosen": -1.6248416900634766, "logits/rejected": -1.0704140663146973, "logps/chosen": -492.60638427734375, "logps/rejected": -778.5485229492188, "loss": 0.1399, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2461838722229004, "rewards/margins": 0.3204324543476105, "rewards/rejected": -0.5666162967681885, "step": 2220 }, { "epoch": 0.3, "learning_rate": 4.4299862702287255e-06, "logits/chosen": -1.6715940237045288, "logits/rejected": -0.9992098808288574, "logps/chosen": -545.5618896484375, "logps/rejected": -752.6533813476562, "loss": 0.1674, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2320774346590042, "rewards/margins": 0.3299207091331482, "rewards/rejected": -0.5619980692863464, "step": 2230 }, { "epoch": 0.3, "learning_rate": 4.422569512021332e-06, "logits/chosen": -1.3779773712158203, "logits/rejected": -0.849908173084259, "logps/chosen": -454.3763122558594, "logps/rejected": -809.4392700195312, "loss": 0.1371, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20665785670280457, "rewards/margins": 0.37678617238998413, "rewards/rejected": -0.5834440588951111, "step": 2240 }, { "epoch": 0.3, "learning_rate": 4.415111107797445e-06, "logits/chosen": -1.6013469696044922, "logits/rejected": -0.9178832769393921, "logps/chosen": -450.80322265625, "logps/rejected": -827.7667846679688, "loss": 0.0834, "rewards/accuracies": 0.875, "rewards/chosen": -0.17798402905464172, "rewards/margins": 0.4255714416503906, "rewards/rejected": -0.60355544090271, "step": 2250 }, { "epoch": 0.3, "learning_rate": 4.407611219118363e-06, "logits/chosen": -1.5318782329559326, "logits/rejected": -1.0508818626403809, "logps/chosen": -577.3057861328125, "logps/rejected": -969.0447387695312, "loss": 0.1826, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2945424020290375, "rewards/margins": 0.3278708755970001, "rewards/rejected": -0.6224132776260376, "step": 2260 }, { "epoch": 0.3, "learning_rate": 4.4000700084440046e-06, "logits/chosen": -1.736802339553833, "logits/rejected": -1.0055776834487915, "logps/chosen": -491.3179626464844, "logps/rejected": -798.9041137695312, "loss": 0.0974, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.24061992764472961, "rewards/margins": 0.3673352599143982, "rewards/rejected": -0.6079551577568054, "step": 2270 }, { "epoch": 0.3, "learning_rate": 4.3924876391293915e-06, "logits/chosen": -1.4131683111190796, "logits/rejected": -0.8145586848258972, "logps/chosen": -452.830078125, "logps/rejected": -853.2399291992188, "loss": 0.1319, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19131293892860413, "rewards/margins": 0.4480679929256439, "rewards/rejected": -0.639380931854248, "step": 2280 }, { "epoch": 0.31, "learning_rate": 4.384864275421109e-06, "logits/chosen": -1.515782117843628, "logits/rejected": -0.9318229556083679, "logps/chosen": -366.37353515625, "logps/rejected": -722.2740478515625, "loss": 0.137, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1316198855638504, "rewards/margins": 0.4034051299095154, "rewards/rejected": -0.5350250005722046, "step": 2290 }, { "epoch": 0.31, "learning_rate": 4.377200082453748e-06, "logits/chosen": -1.5804684162139893, "logits/rejected": -1.0182714462280273, "logps/chosen": -350.4043273925781, "logps/rejected": -682.8316040039062, "loss": 0.1649, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14684131741523743, "rewards/margins": 0.34305766224861145, "rewards/rejected": -0.4898989200592041, "step": 2300 }, { "epoch": 0.31, "learning_rate": 4.36949522624633e-06, "logits/chosen": -1.346221685409546, "logits/rejected": -1.045738697052002, "logps/chosen": -370.6365661621094, "logps/rejected": -878.205078125, "loss": 0.0781, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1513518989086151, "rewards/margins": 0.43637341260910034, "rewards/rejected": -0.5877252817153931, "step": 2310 }, { "epoch": 0.31, "learning_rate": 4.361749873698707e-06, "logits/chosen": -1.887648344039917, "logits/rejected": -1.1761292219161987, "logps/chosen": -420.795166015625, "logps/rejected": -686.5606689453125, "loss": 0.1384, "rewards/accuracies": 0.75, "rewards/chosen": -0.1394852101802826, "rewards/margins": 0.3560732901096344, "rewards/rejected": -0.495558500289917, "step": 2320 }, { "epoch": 0.31, "learning_rate": 4.353964192587949e-06, "logits/chosen": -1.4805281162261963, "logits/rejected": -0.7832016348838806, "logps/chosen": -443.5951232910156, "logps/rejected": -762.2675170898438, "loss": 0.1246, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21405291557312012, "rewards/margins": 0.3591863512992859, "rewards/rejected": -0.573239266872406, "step": 2330 }, { "epoch": 0.31, "learning_rate": 4.346138351564711e-06, "logits/chosen": -1.7262485027313232, "logits/rejected": -0.9207326769828796, "logps/chosen": -474.5480041503906, "logps/rejected": -702.486572265625, "loss": 0.1708, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2004108726978302, "rewards/margins": 0.30196017026901245, "rewards/rejected": -0.5023710131645203, "step": 2340 }, { "epoch": 0.31, "learning_rate": 4.338272520149572e-06, "logits/chosen": -1.532149314880371, "logits/rejected": -0.9204761385917664, "logps/chosen": -534.96728515625, "logps/rejected": -887.0525512695312, "loss": 0.1276, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19786028563976288, "rewards/margins": 0.4368208944797516, "rewards/rejected": -0.6346812844276428, "step": 2350 }, { "epoch": 0.31, "learning_rate": 4.330366868729376e-06, "logits/chosen": -1.6435654163360596, "logits/rejected": -1.03916335105896, "logps/chosen": -494.278564453125, "logps/rejected": -798.6319580078125, "loss": 0.1529, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2153468132019043, "rewards/margins": 0.342166543006897, "rewards/rejected": -0.5575133562088013, "step": 2360 }, { "epoch": 0.32, "learning_rate": 4.322421568553529e-06, "logits/chosen": -1.6959123611450195, "logits/rejected": -1.2254985570907593, "logps/chosen": -438.72869873046875, "logps/rejected": -706.9481811523438, "loss": 0.1368, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.180532768368721, "rewards/margins": 0.3147241473197937, "rewards/rejected": -0.4952569007873535, "step": 2370 }, { "epoch": 0.32, "learning_rate": 4.3144367917302964e-06, "logits/chosen": -1.3080450296401978, "logits/rejected": -0.740618884563446, "logps/chosen": -504.75067138671875, "logps/rejected": -789.192138671875, "loss": 0.1527, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20803742110729218, "rewards/margins": 0.37267953157424927, "rewards/rejected": -0.5807169675827026, "step": 2380 }, { "epoch": 0.32, "learning_rate": 4.30641271122307e-06, "logits/chosen": -1.7245194911956787, "logits/rejected": -1.1383044719696045, "logps/chosen": -500.14764404296875, "logps/rejected": -779.827880859375, "loss": 0.1317, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24187901616096497, "rewards/margins": 0.34209081530570984, "rewards/rejected": -0.5839698314666748, "step": 2390 }, { "epoch": 0.32, "learning_rate": 4.2983495008466285e-06, "logits/chosen": -1.6434742212295532, "logits/rejected": -1.0891433954238892, "logps/chosen": -534.0972290039062, "logps/rejected": -861.4666137695312, "loss": 0.128, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22732210159301758, "rewards/margins": 0.38719356060028076, "rewards/rejected": -0.6145156621932983, "step": 2400 }, { "epoch": 0.32, "learning_rate": 4.290247335263362e-06, "logits/chosen": -1.530216932296753, "logits/rejected": -0.9373283386230469, "logps/chosen": -495.4457092285156, "logps/rejected": -882.18359375, "loss": 0.1107, "rewards/accuracies": 0.875, "rewards/chosen": -0.19321858882904053, "rewards/margins": 0.4353697896003723, "rewards/rejected": -0.6285883188247681, "step": 2410 }, { "epoch": 0.32, "learning_rate": 4.2821063899795015e-06, "logits/chosen": -1.4912149906158447, "logits/rejected": -0.9739956855773926, "logps/chosen": -488.48431396484375, "logps/rejected": -824.3746948242188, "loss": 0.1278, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21902771294116974, "rewards/margins": 0.3857320547103882, "rewards/rejected": -0.6047598123550415, "step": 2420 }, { "epoch": 0.32, "learning_rate": 4.273926841341303e-06, "logits/chosen": -1.606318712234497, "logits/rejected": -0.9916576147079468, "logps/chosen": -363.5210876464844, "logps/rejected": -840.6154174804688, "loss": 0.0952, "rewards/accuracies": 0.875, "rewards/chosen": -0.15150922536849976, "rewards/margins": 0.46653613448143005, "rewards/rejected": -0.6180453300476074, "step": 2430 }, { "epoch": 0.33, "learning_rate": 4.265708866531238e-06, "logits/chosen": -1.608624815940857, "logits/rejected": -0.9720064401626587, "logps/chosen": -510.4712829589844, "logps/rejected": -808.0747680664062, "loss": 0.1947, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.24383282661437988, "rewards/margins": 0.3697541356086731, "rewards/rejected": -0.613586962223053, "step": 2440 }, { "epoch": 0.33, "learning_rate": 4.257452643564155e-06, "logits/chosen": -1.499190092086792, "logits/rejected": -0.8431995511054993, "logps/chosen": -606.6427001953125, "logps/rejected": -963.7305908203125, "loss": 0.1502, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.30497798323631287, "rewards/margins": 0.37246131896972656, "rewards/rejected": -0.677439272403717, "step": 2450 }, { "epoch": 0.33, "learning_rate": 4.249158351283414e-06, "logits/chosen": -1.7652698755264282, "logits/rejected": -1.0435216426849365, "logps/chosen": -533.5185546875, "logps/rejected": -752.1669921875, "loss": 0.1436, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24484989047050476, "rewards/margins": 0.30627909302711487, "rewards/rejected": -0.5511289834976196, "step": 2460 }, { "epoch": 0.33, "learning_rate": 4.240826169357024e-06, "logits/chosen": -1.1542727947235107, "logits/rejected": -0.8217121958732605, "logps/chosen": -569.7604370117188, "logps/rejected": -882.7029418945312, "loss": 0.1906, "rewards/accuracies": 0.875, "rewards/chosen": -0.34456318616867065, "rewards/margins": 0.310018926858902, "rewards/rejected": -0.654582142829895, "step": 2470 }, { "epoch": 0.33, "learning_rate": 4.232456278273743e-06, "logits/chosen": -1.4869928359985352, "logits/rejected": -0.7625919580459595, "logps/chosen": -582.61083984375, "logps/rejected": -920.8674926757812, "loss": 0.0979, "rewards/accuracies": 0.875, "rewards/chosen": -0.300833523273468, "rewards/margins": 0.3972635865211487, "rewards/rejected": -0.6980971097946167, "step": 2480 }, { "epoch": 0.33, "learning_rate": 4.224048859339175e-06, "logits/chosen": -1.5701301097869873, "logits/rejected": -0.9477971792221069, "logps/chosen": -546.9784545898438, "logps/rejected": -904.2532348632812, "loss": 0.1018, "rewards/accuracies": 0.875, "rewards/chosen": -0.2308419942855835, "rewards/margins": 0.4231252074241638, "rewards/rejected": -0.6539672613143921, "step": 2490 }, { "epoch": 0.33, "learning_rate": 4.215604094671835e-06, "logits/chosen": -1.3893605470657349, "logits/rejected": -0.9853037595748901, "logps/chosen": -458.06365966796875, "logps/rejected": -872.8928833007812, "loss": 0.0986, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.226444274187088, "rewards/margins": 0.43268585205078125, "rewards/rejected": -0.6591302156448364, "step": 2500 }, { "epoch": 0.33, "learning_rate": 4.207122167199209e-06, "logits/chosen": -1.5319633483886719, "logits/rejected": -1.021113634109497, "logps/chosen": -471.55902099609375, "logps/rejected": -743.827392578125, "loss": 0.1398, "rewards/accuracies": 0.875, "rewards/chosen": -0.1666310578584671, "rewards/margins": 0.346027672290802, "rewards/rejected": -0.5126587152481079, "step": 2510 }, { "epoch": 0.34, "learning_rate": 4.198603260653792e-06, "logits/chosen": -1.6109545230865479, "logits/rejected": -0.9327214360237122, "logps/chosen": -447.84344482421875, "logps/rejected": -748.9298095703125, "loss": 0.1313, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13613386452198029, "rewards/margins": 0.39785856008529663, "rewards/rejected": -0.5339924097061157, "step": 2520 }, { "epoch": 0.34, "learning_rate": 4.1900475595691044e-06, "logits/chosen": -1.4914381504058838, "logits/rejected": -0.8920121192932129, "logps/chosen": -384.0286865234375, "logps/rejected": -717.90478515625, "loss": 0.1452, "rewards/accuracies": 0.75, "rewards/chosen": -0.13352106511592865, "rewards/margins": 0.37515324354171753, "rewards/rejected": -0.5086743235588074, "step": 2530 }, { "epoch": 0.34, "learning_rate": 4.181455249275701e-06, "logits/chosen": -1.5109646320343018, "logits/rejected": -1.1194543838500977, "logps/chosen": -518.5322265625, "logps/rejected": -869.0886840820312, "loss": 0.1341, "rewards/accuracies": 0.875, "rewards/chosen": -0.21598049998283386, "rewards/margins": 0.38135606050491333, "rewards/rejected": -0.5973365902900696, "step": 2540 }, { "epoch": 0.34, "learning_rate": 4.172826515897146e-06, "logits/chosen": -1.7286665439605713, "logits/rejected": -1.1569503545761108, "logps/chosen": -389.3125915527344, "logps/rejected": -790.7874755859375, "loss": 0.1067, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17809465527534485, "rewards/margins": 0.4077723026275635, "rewards/rejected": -0.5858669877052307, "step": 2550 }, { "epoch": 0.34, "learning_rate": 4.1641615463459926e-06, "logits/chosen": -1.7263851165771484, "logits/rejected": -1.102418065071106, "logps/chosen": -431.767822265625, "logps/rejected": -833.9580078125, "loss": 0.0802, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13353870809078217, "rewards/margins": 0.4436104893684387, "rewards/rejected": -0.5771491527557373, "step": 2560 }, { "epoch": 0.34, "learning_rate": 4.1554605283197255e-06, "logits/chosen": -1.7348525524139404, "logits/rejected": -1.1465692520141602, "logps/chosen": -418.4853515625, "logps/rejected": -732.1386108398438, "loss": 0.1604, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14343003928661346, "rewards/margins": 0.34780198335647583, "rewards/rejected": -0.4912320077419281, "step": 2570 }, { "epoch": 0.34, "learning_rate": 4.146723650296701e-06, "logits/chosen": -1.5681244134902954, "logits/rejected": -0.9826623797416687, "logps/chosen": -454.61669921875, "logps/rejected": -774.7626342773438, "loss": 0.1508, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15054744482040405, "rewards/margins": 0.34130164980888367, "rewards/rejected": -0.4918491244316101, "step": 2580 }, { "epoch": 0.35, "learning_rate": 4.1379511015320625e-06, "logits/chosen": -1.6330792903900146, "logits/rejected": -1.0874810218811035, "logps/chosen": -515.1699829101562, "logps/rejected": -785.629638671875, "loss": 0.1298, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21226203441619873, "rewards/margins": 0.3242810070514679, "rewards/rejected": -0.5365430116653442, "step": 2590 }, { "epoch": 0.35, "learning_rate": 4.129143072053639e-06, "logits/chosen": -1.873373031616211, "logits/rejected": -1.1326647996902466, "logps/chosen": -452.2235412597656, "logps/rejected": -815.4119873046875, "loss": 0.1293, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.178654745221138, "rewards/margins": 0.3714352250099182, "rewards/rejected": -0.550089955329895, "step": 2600 }, { "epoch": 0.35, "learning_rate": 4.120299752657828e-06, "logits/chosen": -1.548407793045044, "logits/rejected": -1.000870943069458, "logps/chosen": -445.26361083984375, "logps/rejected": -791.4591064453125, "loss": 0.1351, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2141580581665039, "rewards/margins": 0.3528524339199066, "rewards/rejected": -0.5670104622840881, "step": 2610 }, { "epoch": 0.35, "learning_rate": 4.111421334905468e-06, "logits/chosen": -1.5858871936798096, "logits/rejected": -1.104501485824585, "logps/chosen": -441.08575439453125, "logps/rejected": -841.6727294921875, "loss": 0.1148, "rewards/accuracies": 0.875, "rewards/chosen": -0.17988021671772003, "rewards/margins": 0.41606348752975464, "rewards/rejected": -0.5959437489509583, "step": 2620 }, { "epoch": 0.35, "learning_rate": 4.102508011117684e-06, "logits/chosen": -1.5839554071426392, "logits/rejected": -0.9679198265075684, "logps/chosen": -413.9541931152344, "logps/rejected": -723.2825927734375, "loss": 0.1373, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14662650227546692, "rewards/margins": 0.3743670582771301, "rewards/rejected": -0.5209935307502747, "step": 2630 }, { "epoch": 0.35, "learning_rate": 4.093559974371725e-06, "logits/chosen": -1.7011744976043701, "logits/rejected": -1.1305286884307861, "logps/chosen": -472.72796630859375, "logps/rejected": -883.6267700195312, "loss": 0.1277, "rewards/accuracies": 0.875, "rewards/chosen": -0.16943880915641785, "rewards/margins": 0.43496638536453247, "rewards/rejected": -0.6044051647186279, "step": 2640 }, { "epoch": 0.35, "learning_rate": 4.084577418496775e-06, "logits/chosen": -1.632817029953003, "logits/rejected": -1.0543255805969238, "logps/chosen": -532.606201171875, "logps/rejected": -842.3258056640625, "loss": 0.1031, "rewards/accuracies": 0.875, "rewards/chosen": -0.21153993904590607, "rewards/margins": 0.35972151160240173, "rewards/rejected": -0.5712614059448242, "step": 2650 }, { "epoch": 0.35, "learning_rate": 4.075560538069767e-06, "logits/chosen": -1.6261268854141235, "logits/rejected": -1.127497911453247, "logps/chosen": -509.52850341796875, "logps/rejected": -826.4234619140625, "loss": 0.1434, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22948768734931946, "rewards/margins": 0.346964955329895, "rewards/rejected": -0.5764526128768921, "step": 2660 }, { "epoch": 0.36, "learning_rate": 4.066509528411151e-06, "logits/chosen": -1.5209250450134277, "logits/rejected": -1.0693763494491577, "logps/chosen": -490.5738220214844, "logps/rejected": -792.5563354492188, "loss": 0.1522, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.23105911910533905, "rewards/margins": 0.33465856313705444, "rewards/rejected": -0.5657176375389099, "step": 2670 }, { "epoch": 0.36, "learning_rate": 4.05742458558068e-06, "logits/chosen": -1.447706937789917, "logits/rejected": -1.0158064365386963, "logps/chosen": -509.220947265625, "logps/rejected": -778.8126220703125, "loss": 0.1688, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22236888110637665, "rewards/margins": 0.2937421202659607, "rewards/rejected": -0.5161110162734985, "step": 2680 }, { "epoch": 0.36, "learning_rate": 4.048305906373151e-06, "logits/chosen": -1.3585022687911987, "logits/rejected": -1.0554983615875244, "logps/chosen": -467.7109375, "logps/rejected": -855.8693237304688, "loss": 0.0999, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.24606266617774963, "rewards/margins": 0.36643314361572266, "rewards/rejected": -0.6124957799911499, "step": 2690 }, { "epoch": 0.36, "learning_rate": 4.039153688314146e-06, "logits/chosen": -1.6710975170135498, "logits/rejected": -1.0430233478546143, "logps/chosen": -529.962890625, "logps/rejected": -842.4357299804688, "loss": 0.1731, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2704309821128845, "rewards/margins": 0.31774449348449707, "rewards/rejected": -0.5881755352020264, "step": 2700 }, { "epoch": 0.36, "learning_rate": 4.029968129655757e-06, "logits/chosen": -1.5244982242584229, "logits/rejected": -0.917557418346405, "logps/chosen": -492.8162536621094, "logps/rejected": -695.8026733398438, "loss": 0.1951, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24431486427783966, "rewards/margins": 0.2869857847690582, "rewards/rejected": -0.5313006639480591, "step": 2710 }, { "epoch": 0.36, "learning_rate": 4.020749429372286e-06, "logits/chosen": -1.7436425685882568, "logits/rejected": -1.2350785732269287, "logps/chosen": -436.5732421875, "logps/rejected": -712.8914794921875, "loss": 0.1449, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21191930770874023, "rewards/margins": 0.32732638716697693, "rewards/rejected": -0.5392457246780396, "step": 2720 }, { "epoch": 0.36, "learning_rate": 4.011497787155938e-06, "logits/chosen": -1.5413177013397217, "logits/rejected": -0.9734029769897461, "logps/chosen": -423.8431091308594, "logps/rejected": -793.4420166015625, "loss": 0.1218, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16747328639030457, "rewards/margins": 0.3857666850090027, "rewards/rejected": -0.5532399415969849, "step": 2730 }, { "epoch": 0.37, "learning_rate": 4.002213403412492e-06, "logits/chosen": -1.5266112089157104, "logits/rejected": -0.9985775947570801, "logps/chosen": -449.9827575683594, "logps/rejected": -859.7175903320312, "loss": 0.1585, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19104166328907013, "rewards/margins": 0.42445772886276245, "rewards/rejected": -0.6154993772506714, "step": 2740 }, { "epoch": 0.37, "learning_rate": 3.992896479256966e-06, "logits/chosen": -1.478150725364685, "logits/rejected": -1.0377126932144165, "logps/chosen": -505.3447265625, "logps/rejected": -800.2145385742188, "loss": 0.173, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21070396900177002, "rewards/margins": 0.3541814088821411, "rewards/rejected": -0.5648853182792664, "step": 2750 }, { "epoch": 0.37, "learning_rate": 3.983547216509254e-06, "logits/chosen": -1.5522105693817139, "logits/rejected": -1.0478585958480835, "logps/chosen": -468.27099609375, "logps/rejected": -687.7916259765625, "loss": 0.1426, "rewards/accuracies": 0.875, "rewards/chosen": -0.19997310638427734, "rewards/margins": 0.26816970109939575, "rewards/rejected": -0.4681428074836731, "step": 2760 }, { "epoch": 0.37, "learning_rate": 3.974165817689758e-06, "logits/chosen": -1.373271107673645, "logits/rejected": -0.8424400091171265, "logps/chosen": -415.9774475097656, "logps/rejected": -862.8703002929688, "loss": 0.0752, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17877209186553955, "rewards/margins": 0.41697627305984497, "rewards/rejected": -0.5957483649253845, "step": 2770 }, { "epoch": 0.37, "learning_rate": 3.964752486015001e-06, "logits/chosen": -1.390533208847046, "logits/rejected": -0.8571497201919556, "logps/chosen": -443.14984130859375, "logps/rejected": -853.4337768554688, "loss": 0.1228, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.154323548078537, "rewards/margins": 0.4272845685482025, "rewards/rejected": -0.5816081166267395, "step": 2780 }, { "epoch": 0.37, "learning_rate": 3.955307425393224e-06, "logits/chosen": -1.6358531713485718, "logits/rejected": -0.7802098989486694, "logps/chosen": -558.2034912109375, "logps/rejected": -880.1002197265625, "loss": 0.1227, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2329464852809906, "rewards/margins": 0.41606202721595764, "rewards/rejected": -0.6490085124969482, "step": 2790 }, { "epoch": 0.37, "learning_rate": 3.945830840419966e-06, "logits/chosen": -1.4261865615844727, "logits/rejected": -0.8663290739059448, "logps/chosen": -381.68060302734375, "logps/rejected": -708.1229248046875, "loss": 0.154, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1556844562292099, "rewards/margins": 0.35576796531677246, "rewards/rejected": -0.5114524364471436, "step": 2800 }, { "epoch": 0.37, "learning_rate": 3.936322936373641e-06, "logits/chosen": -1.616782546043396, "logits/rejected": -0.6785197854042053, "logps/chosen": -467.4331970214844, "logps/rejected": -773.0416259765625, "loss": 0.1211, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17097237706184387, "rewards/margins": 0.3937646746635437, "rewards/rejected": -0.56473708152771, "step": 2810 }, { "epoch": 0.38, "learning_rate": 3.92678391921108e-06, "logits/chosen": -1.3506128787994385, "logits/rejected": -0.9120258092880249, "logps/chosen": -515.1641235351562, "logps/rejected": -906.06689453125, "loss": 0.0919, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2047976702451706, "rewards/margins": 0.4362905025482178, "rewards/rejected": -0.6410881876945496, "step": 2820 }, { "epoch": 0.38, "learning_rate": 3.9172139955630774e-06, "logits/chosen": -1.5046745538711548, "logits/rejected": -0.8224016427993774, "logps/chosen": -412.1595764160156, "logps/rejected": -697.3492431640625, "loss": 0.1774, "rewards/accuracies": 0.75, "rewards/chosen": -0.18449755012989044, "rewards/margins": 0.3409607708454132, "rewards/rejected": -0.5254582166671753, "step": 2830 }, { "epoch": 0.38, "learning_rate": 3.907613372729916e-06, "logits/chosen": -1.5527722835540771, "logits/rejected": -0.9975868463516235, "logps/chosen": -405.668212890625, "logps/rejected": -790.2307739257812, "loss": 0.1042, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16405755281448364, "rewards/margins": 0.3927202522754669, "rewards/rejected": -0.556777834892273, "step": 2840 }, { "epoch": 0.38, "learning_rate": 3.897982258676867e-06, "logits/chosen": -1.488878607749939, "logits/rejected": -1.200114130973816, "logps/chosen": -445.1380920410156, "logps/rejected": -784.933349609375, "loss": 0.1423, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1881919950246811, "rewards/margins": 0.3287205696105957, "rewards/rejected": -0.5169125199317932, "step": 2850 }, { "epoch": 0.38, "learning_rate": 3.888320862029699e-06, "logits/chosen": -1.3110463619232178, "logits/rejected": -1.0240827798843384, "logps/chosen": -434.48358154296875, "logps/rejected": -900.2415161132812, "loss": 0.1204, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1918242871761322, "rewards/margins": 0.4229181408882141, "rewards/rejected": -0.6147423982620239, "step": 2860 }, { "epoch": 0.38, "learning_rate": 3.878629392070143e-06, "logits/chosen": -1.4728009700775146, "logits/rejected": -0.9224138259887695, "logps/chosen": -559.6575927734375, "logps/rejected": -926.2965087890625, "loss": 0.0955, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2580861747264862, "rewards/margins": 0.3979208469390869, "rewards/rejected": -0.6560070514678955, "step": 2870 }, { "epoch": 0.38, "learning_rate": 3.868908058731376e-06, "logits/chosen": -1.3200956583023071, "logits/rejected": -0.7975896596908569, "logps/chosen": -405.00048828125, "logps/rejected": -890.6468505859375, "loss": 0.0923, "rewards/accuracies": 0.875, "rewards/chosen": -0.16780821979045868, "rewards/margins": 0.4599390923976898, "rewards/rejected": -0.6277473568916321, "step": 2880 }, { "epoch": 0.39, "learning_rate": 3.859157072593459e-06, "logits/chosen": -1.4886184930801392, "logits/rejected": -0.9175424575805664, "logps/chosen": -541.4322509765625, "logps/rejected": -944.6419677734375, "loss": 0.0796, "rewards/accuracies": 1.0, "rewards/chosen": -0.23973998427391052, "rewards/margins": 0.4599900245666504, "rewards/rejected": -0.6997300386428833, "step": 2890 }, { "epoch": 0.39, "learning_rate": 3.849376644878783e-06, "logits/chosen": -1.635000467300415, "logits/rejected": -0.8878408670425415, "logps/chosen": -491.761474609375, "logps/rejected": -803.8742065429688, "loss": 0.1206, "rewards/accuracies": 0.875, "rewards/chosen": -0.19202962517738342, "rewards/margins": 0.38747844099998474, "rewards/rejected": -0.5795080661773682, "step": 2900 }, { "epoch": 0.39, "learning_rate": 3.839566987447492e-06, "logits/chosen": -1.3475700616836548, "logits/rejected": -0.7748931050300598, "logps/chosen": -468.58056640625, "logps/rejected": -784.2288208007812, "loss": 0.1174, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18681365251541138, "rewards/margins": 0.4012163281440735, "rewards/rejected": -0.5880299806594849, "step": 2910 }, { "epoch": 0.39, "learning_rate": 3.829728312792895e-06, "logits/chosen": -1.4943805932998657, "logits/rejected": -0.8177087903022766, "logps/chosen": -441.33258056640625, "logps/rejected": -884.5404052734375, "loss": 0.1021, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.18188966810703278, "rewards/margins": 0.4683989882469177, "rewards/rejected": -0.6502886414527893, "step": 2920 }, { "epoch": 0.39, "learning_rate": 3.819860834036859e-06, "logits/chosen": -1.371438980102539, "logits/rejected": -0.903844952583313, "logps/chosen": -394.12615966796875, "logps/rejected": -682.78955078125, "loss": 0.1467, "rewards/accuracies": 0.75, "rewards/chosen": -0.16386035084724426, "rewards/margins": 0.3356771469116211, "rewards/rejected": -0.49953746795654297, "step": 2930 }, { "epoch": 0.39, "learning_rate": 3.8099647649251984e-06, "logits/chosen": -1.5508685111999512, "logits/rejected": -1.134049654006958, "logps/chosen": -474.1361389160156, "logps/rejected": -845.4661254882812, "loss": 0.1215, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21748395264148712, "rewards/margins": 0.36039185523986816, "rewards/rejected": -0.5778758525848389, "step": 2940 }, { "epoch": 0.39, "learning_rate": 3.8000403198230385e-06, "logits/chosen": -1.3952841758728027, "logits/rejected": -0.7636578679084778, "logps/chosen": -570.806396484375, "logps/rejected": -870.5598754882812, "loss": 0.1219, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.22723431885242462, "rewards/margins": 0.38328367471694946, "rewards/rejected": -0.6105180382728577, "step": 2950 }, { "epoch": 0.39, "learning_rate": 3.790087713710179e-06, "logits/chosen": -1.2440921068191528, "logits/rejected": -0.8564810752868652, "logps/chosen": -419.251220703125, "logps/rejected": -766.0396118164062, "loss": 0.1609, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2219938039779663, "rewards/margins": 0.33859145641326904, "rewards/rejected": -0.5605852603912354, "step": 2960 }, { "epoch": 0.4, "learning_rate": 3.780107162176429e-06, "logits/chosen": -1.5238640308380127, "logits/rejected": -0.9895439147949219, "logps/chosen": -494.74609375, "logps/rejected": -814.00537109375, "loss": 0.1314, "rewards/accuracies": 0.875, "rewards/chosen": -0.2533033490180969, "rewards/margins": 0.3240547180175781, "rewards/rejected": -0.5773580074310303, "step": 2970 }, { "epoch": 0.4, "learning_rate": 3.770098881416945e-06, "logits/chosen": -1.3618767261505127, "logits/rejected": -0.6415785551071167, "logps/chosen": -565.89501953125, "logps/rejected": -892.7698364257812, "loss": 0.1137, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.22178450226783752, "rewards/margins": 0.44626274704933167, "rewards/rejected": -0.6680471897125244, "step": 2980 }, { "epoch": 0.4, "learning_rate": 3.760063088227542e-06, "logits/chosen": -1.2707054615020752, "logits/rejected": -0.9608383178710938, "logps/chosen": -476.09246826171875, "logps/rejected": -822.6774291992188, "loss": 0.108, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23713941872119904, "rewards/margins": 0.37951499223709106, "rewards/rejected": -0.6166543960571289, "step": 2990 }, { "epoch": 0.4, "learning_rate": 3.7500000000000005e-06, "logits/chosen": -1.34781813621521, "logits/rejected": -0.8298260569572449, "logps/chosen": -511.88116455078125, "logps/rejected": -827.2025146484375, "loss": 0.1354, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21970734000205994, "rewards/margins": 0.35646185278892517, "rewards/rejected": -0.5761691927909851, "step": 3000 }, { "epoch": 0.4, "learning_rate": 3.739909834717356e-06, "logits/chosen": -1.3226549625396729, "logits/rejected": -0.7820941209793091, "logps/chosen": -473.028564453125, "logps/rejected": -815.1127319335938, "loss": 0.1202, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16715632379055023, "rewards/margins": 0.38655251264572144, "rewards/rejected": -0.5537087917327881, "step": 3010 }, { "epoch": 0.4, "learning_rate": 3.7297928109491765e-06, "logits/chosen": -1.3209315538406372, "logits/rejected": -0.8163756132125854, "logps/chosen": -460.5916442871094, "logps/rejected": -885.9065551757812, "loss": 0.1215, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1943354308605194, "rewards/margins": 0.4527612626552582, "rewards/rejected": -0.6470966339111328, "step": 3020 }, { "epoch": 0.4, "learning_rate": 3.7196491478468322e-06, "logits/chosen": -1.1771209239959717, "logits/rejected": -0.7845407128334045, "logps/chosen": -478.44390869140625, "logps/rejected": -837.09521484375, "loss": 0.1583, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2497968226671219, "rewards/margins": 0.34961241483688354, "rewards/rejected": -0.5994092226028442, "step": 3030 }, { "epoch": 0.41, "learning_rate": 3.7094790651387414e-06, "logits/chosen": -1.425419807434082, "logits/rejected": -0.9499618411064148, "logps/chosen": -563.8905029296875, "logps/rejected": -868.3553466796875, "loss": 0.153, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2709125578403473, "rewards/margins": 0.30820900201797485, "rewards/rejected": -0.5791215300559998, "step": 3040 }, { "epoch": 0.41, "learning_rate": 3.699282783125616e-06, "logits/chosen": -1.3669326305389404, "logits/rejected": -0.837415874004364, "logps/chosen": -629.0724487304688, "logps/rejected": -925.4384765625, "loss": 0.1291, "rewards/accuracies": 0.875, "rewards/chosen": -0.2975293695926666, "rewards/margins": 0.33974334597587585, "rewards/rejected": -0.6372727155685425, "step": 3050 }, { "epoch": 0.41, "learning_rate": 3.689060522675689e-06, "logits/chosen": -1.4641399383544922, "logits/rejected": -0.7488492727279663, "logps/chosen": -615.8585205078125, "logps/rejected": -912.4896240234375, "loss": 0.1308, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2836320400238037, "rewards/margins": 0.3700219392776489, "rewards/rejected": -0.6536539793014526, "step": 3060 }, { "epoch": 0.41, "learning_rate": 3.6788125052199264e-06, "logits/chosen": -1.3359787464141846, "logits/rejected": -0.7459200024604797, "logps/chosen": -444.7274475097656, "logps/rejected": -821.0389404296875, "loss": 0.1039, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18269316852092743, "rewards/margins": 0.4336867928504944, "rewards/rejected": -0.6163799166679382, "step": 3070 }, { "epoch": 0.41, "learning_rate": 3.668538952747236e-06, "logits/chosen": -1.4183568954467773, "logits/rejected": -0.9894925355911255, "logps/chosen": -457.7557678222656, "logps/rejected": -861.2824096679688, "loss": 0.1165, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21521393954753876, "rewards/margins": 0.3878498077392578, "rewards/rejected": -0.603063702583313, "step": 3080 }, { "epoch": 0.41, "learning_rate": 3.658240087799655e-06, "logits/chosen": -1.5412737131118774, "logits/rejected": -0.9888244867324829, "logps/chosen": -496.530517578125, "logps/rejected": -874.2278442382812, "loss": 0.1376, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2602888345718384, "rewards/margins": 0.362166166305542, "rewards/rejected": -0.6224549412727356, "step": 3090 }, { "epoch": 0.41, "learning_rate": 3.6479161334675294e-06, "logits/chosen": -1.5275121927261353, "logits/rejected": -0.8869367837905884, "logps/chosen": -536.9756469726562, "logps/rejected": -823.1558837890625, "loss": 0.1431, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2103598564863205, "rewards/margins": 0.3759499192237854, "rewards/rejected": -0.5863097906112671, "step": 3100 }, { "epoch": 0.41, "learning_rate": 3.6375673133846847e-06, "logits/chosen": -1.3907606601715088, "logits/rejected": -0.9182275533676147, "logps/chosen": -517.0655517578125, "logps/rejected": -853.0217895507812, "loss": 0.1194, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22291526198387146, "rewards/margins": 0.3772997558116913, "rewards/rejected": -0.6002150774002075, "step": 3110 }, { "epoch": 0.42, "learning_rate": 3.627193851723577e-06, "logits/chosen": -1.3474422693252563, "logits/rejected": -0.7908347845077515, "logps/chosen": -425.14105224609375, "logps/rejected": -707.5562744140625, "loss": 0.2323, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1527886688709259, "rewards/margins": 0.3132486939430237, "rewards/rejected": -0.4660373330116272, "step": 3120 }, { "epoch": 0.42, "learning_rate": 3.616795973190442e-06, "logits/chosen": -1.3398116827011108, "logits/rejected": -0.9347062110900879, "logps/chosen": -406.9312438964844, "logps/rejected": -708.2562255859375, "loss": 0.1497, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18078385293483734, "rewards/margins": 0.30927419662475586, "rewards/rejected": -0.490058034658432, "step": 3130 }, { "epoch": 0.42, "learning_rate": 3.6063739030204226e-06, "logits/chosen": -1.6142152547836304, "logits/rejected": -0.9039406776428223, "logps/chosen": -384.0916748046875, "logps/rejected": -773.7716064453125, "loss": 0.1156, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.11609621345996857, "rewards/margins": 0.4180026054382324, "rewards/rejected": -0.534098744392395, "step": 3140 }, { "epoch": 0.42, "learning_rate": 3.595927866972694e-06, "logits/chosen": -1.4731504917144775, "logits/rejected": -0.8567525148391724, "logps/chosen": -488.36602783203125, "logps/rejected": -888.5875854492188, "loss": 0.1043, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.16218402981758118, "rewards/margins": 0.41839224100112915, "rewards/rejected": -0.5805762410163879, "step": 3150 }, { "epoch": 0.42, "learning_rate": 3.5854580913255706e-06, "logits/chosen": -1.484362244606018, "logits/rejected": -0.9274988174438477, "logps/chosen": -472.91107177734375, "logps/rejected": -783.4113159179688, "loss": 0.1497, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14970546960830688, "rewards/margins": 0.37639063596725464, "rewards/rejected": -0.5260960459709167, "step": 3160 }, { "epoch": 0.42, "learning_rate": 3.574964802871607e-06, "logits/chosen": -1.385100245475769, "logits/rejected": -0.8206901550292969, "logps/chosen": -449.2620544433594, "logps/rejected": -824.8053588867188, "loss": 0.1342, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19664952158927917, "rewards/margins": 0.3465280532836914, "rewards/rejected": -0.543177604675293, "step": 3170 }, { "epoch": 0.42, "learning_rate": 3.564448228912682e-06, "logits/chosen": -1.433672308921814, "logits/rejected": -0.7420425415039062, "logps/chosen": -449.03680419921875, "logps/rejected": -821.3928833007812, "loss": 0.1277, "rewards/accuracies": 0.875, "rewards/chosen": -0.17572662234306335, "rewards/margins": 0.4020700454711914, "rewards/rejected": -0.5777965784072876, "step": 3180 }, { "epoch": 0.43, "learning_rate": 3.5539085972550786e-06, "logits/chosen": -1.5129320621490479, "logits/rejected": -0.6774355173110962, "logps/chosen": -465.81524658203125, "logps/rejected": -860.1716918945312, "loss": 0.0563, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.15053944289684296, "rewards/margins": 0.453474760055542, "rewards/rejected": -0.6040140986442566, "step": 3190 }, { "epoch": 0.43, "learning_rate": 3.543346136204545e-06, "logits/chosen": -1.5318454504013062, "logits/rejected": -1.0314596891403198, "logps/chosen": -443.3661193847656, "logps/rejected": -740.8358764648438, "loss": 0.1382, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18502503633499146, "rewards/margins": 0.36333030462265015, "rewards/rejected": -0.5483554005622864, "step": 3200 }, { "epoch": 0.43, "learning_rate": 3.532761074561355e-06, "logits/chosen": -1.1741279363632202, "logits/rejected": -0.5288771986961365, "logps/chosen": -518.4847412109375, "logps/rejected": -838.5154418945312, "loss": 0.1252, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20451363921165466, "rewards/margins": 0.39652013778686523, "rewards/rejected": -0.6010338068008423, "step": 3210 }, { "epoch": 0.43, "learning_rate": 3.522153641615345e-06, "logits/chosen": -1.1148918867111206, "logits/rejected": -0.7859822511672974, "logps/chosen": -379.1599426269531, "logps/rejected": -732.1954345703125, "loss": 0.1231, "rewards/accuracies": 0.75, "rewards/chosen": -0.19221094250679016, "rewards/margins": 0.33260732889175415, "rewards/rejected": -0.5248182415962219, "step": 3220 }, { "epoch": 0.43, "learning_rate": 3.5115240671409534e-06, "logits/chosen": -1.4787633419036865, "logits/rejected": -0.8908971548080444, "logps/chosen": -522.7233276367188, "logps/rejected": -811.7718505859375, "loss": 0.136, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2209792137145996, "rewards/margins": 0.36088380217552185, "rewards/rejected": -0.5818630456924438, "step": 3230 }, { "epoch": 0.43, "learning_rate": 3.5008725813922383e-06, "logits/chosen": -1.3831818103790283, "logits/rejected": -0.928032398223877, "logps/chosen": -450.38995361328125, "logps/rejected": -843.4984130859375, "loss": 0.0915, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2006407231092453, "rewards/margins": 0.36500436067581177, "rewards/rejected": -0.5656450390815735, "step": 3240 }, { "epoch": 0.43, "learning_rate": 3.4901994150978926e-06, "logits/chosen": -1.2624738216400146, "logits/rejected": -1.0254709720611572, "logps/chosen": -492.21185302734375, "logps/rejected": -830.1780395507812, "loss": 0.1727, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2421186864376068, "rewards/margins": 0.3397548794746399, "rewards/rejected": -0.5818734765052795, "step": 3250 }, { "epoch": 0.43, "learning_rate": 3.4795047994562463e-06, "logits/chosen": -1.327124834060669, "logits/rejected": -0.8216224908828735, "logps/chosen": -557.8870849609375, "logps/rejected": -921.6619262695312, "loss": 0.1273, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.26846641302108765, "rewards/margins": 0.3692953884601593, "rewards/rejected": -0.6377617716789246, "step": 3260 }, { "epoch": 0.44, "learning_rate": 3.4687889661302577e-06, "logits/chosen": -1.5927109718322754, "logits/rejected": -1.05434250831604, "logps/chosen": -528.978271484375, "logps/rejected": -948.4773559570312, "loss": 0.1141, "rewards/accuracies": 0.875, "rewards/chosen": -0.2590687572956085, "rewards/margins": 0.3986712098121643, "rewards/rejected": -0.6577399969100952, "step": 3270 }, { "epoch": 0.44, "learning_rate": 3.458052147242494e-06, "logits/chosen": -1.6400234699249268, "logits/rejected": -0.8933243751525879, "logps/chosen": -522.9338989257812, "logps/rejected": -923.7149658203125, "loss": 0.0918, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19308575987815857, "rewards/margins": 0.486158549785614, "rewards/rejected": -0.679244339466095, "step": 3280 }, { "epoch": 0.44, "learning_rate": 3.4472945753701038e-06, "logits/chosen": -1.5157047510147095, "logits/rejected": -0.8758195042610168, "logps/chosen": -530.8990478515625, "logps/rejected": -964.5487060546875, "loss": 0.0872, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.21965882182121277, "rewards/margins": 0.4682764410972595, "rewards/rejected": -0.6879353523254395, "step": 3290 }, { "epoch": 0.44, "learning_rate": 3.436516483539781e-06, "logits/chosen": -1.5231119394302368, "logits/rejected": -1.2421231269836426, "logps/chosen": -448.62506103515625, "logps/rejected": -1045.0579833984375, "loss": 0.0683, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.21021994948387146, "rewards/margins": 0.49710339307785034, "rewards/rejected": -0.707323431968689, "step": 3300 }, { "epoch": 0.44, "learning_rate": 3.4257181052227133e-06, "logits/chosen": -1.4369999170303345, "logits/rejected": -0.8599227666854858, "logps/chosen": -442.81646728515625, "logps/rejected": -837.7467041015625, "loss": 0.1153, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20834989845752716, "rewards/margins": 0.42062148451805115, "rewards/rejected": -0.6289713382720947, "step": 3310 }, { "epoch": 0.44, "learning_rate": 3.4148996743295305e-06, "logits/chosen": -1.1891943216323853, "logits/rejected": -0.7156860828399658, "logps/chosen": -483.78857421875, "logps/rejected": -823.6920166015625, "loss": 0.1533, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22720369696617126, "rewards/margins": 0.39558321237564087, "rewards/rejected": -0.6227868795394897, "step": 3320 }, { "epoch": 0.44, "learning_rate": 3.4040614252052305e-06, "logits/chosen": -1.3941460847854614, "logits/rejected": -0.7187548875808716, "logps/chosen": -565.5982666015625, "logps/rejected": -822.6068115234375, "loss": 0.1826, "rewards/accuracies": 0.75, "rewards/chosen": -0.263577938079834, "rewards/margins": 0.32034698128700256, "rewards/rejected": -0.5839249491691589, "step": 3330 }, { "epoch": 0.45, "learning_rate": 3.3932035926241103e-06, "logits/chosen": -1.2055962085723877, "logits/rejected": -0.8065207600593567, "logps/chosen": -595.4679565429688, "logps/rejected": -894.6494140625, "loss": 0.2069, "rewards/accuracies": 0.75, "rewards/chosen": -0.33704596757888794, "rewards/margins": 0.32209089398384094, "rewards/rejected": -0.6591368317604065, "step": 3340 }, { "epoch": 0.45, "learning_rate": 3.3823264117846722e-06, "logits/chosen": -1.1408950090408325, "logits/rejected": -0.7288360595703125, "logps/chosen": -504.2162170410156, "logps/rejected": -865.4747314453125, "loss": 0.1382, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2324347048997879, "rewards/margins": 0.37575414776802063, "rewards/rejected": -0.608188807964325, "step": 3350 }, { "epoch": 0.45, "learning_rate": 3.3714301183045382e-06, "logits/chosen": -1.360996127128601, "logits/rejected": -0.8717953562736511, "logps/chosen": -476.2850036621094, "logps/rejected": -867.3087768554688, "loss": 0.0808, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2431853711605072, "rewards/margins": 0.39439740777015686, "rewards/rejected": -0.6375828385353088, "step": 3360 }, { "epoch": 0.45, "learning_rate": 3.360514948215339e-06, "logits/chosen": -1.2623645067214966, "logits/rejected": -1.0878899097442627, "logps/chosen": -432.14813232421875, "logps/rejected": -887.6476440429688, "loss": 0.083, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.20338860154151917, "rewards/margins": 0.41923999786376953, "rewards/rejected": -0.6226286292076111, "step": 3370 }, { "epoch": 0.45, "learning_rate": 3.349581137957604e-06, "logits/chosen": -1.3514466285705566, "logits/rejected": -0.8106681704521179, "logps/chosen": -488.593994140625, "logps/rejected": -883.98095703125, "loss": 0.1353, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.23182758688926697, "rewards/margins": 0.3952936828136444, "rewards/rejected": -0.6271212697029114, "step": 3380 }, { "epoch": 0.45, "learning_rate": 3.338628924375638e-06, "logits/chosen": -1.448120355606079, "logits/rejected": -0.6248105764389038, "logps/chosen": -492.08331298828125, "logps/rejected": -817.7333374023438, "loss": 0.119, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20928683876991272, "rewards/margins": 0.4339142441749573, "rewards/rejected": -0.6432010531425476, "step": 3390 }, { "epoch": 0.45, "learning_rate": 3.3276585447123957e-06, "logits/chosen": -1.4691401720046997, "logits/rejected": -0.9449941515922546, "logps/chosen": -499.2208557128906, "logps/rejected": -860.2800903320312, "loss": 0.1704, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22295789420604706, "rewards/margins": 0.38977617025375366, "rewards/rejected": -0.6127340197563171, "step": 3400 }, { "epoch": 0.45, "learning_rate": 3.3166702366043364e-06, "logits/chosen": -1.6984446048736572, "logits/rejected": -1.022908329963684, "logps/chosen": -398.6534729003906, "logps/rejected": -742.465576171875, "loss": 0.1171, "rewards/accuracies": 0.875, "rewards/chosen": -0.13367922604084015, "rewards/margins": 0.40723562240600586, "rewards/rejected": -0.54091477394104, "step": 3410 }, { "epoch": 0.46, "learning_rate": 3.3056642380762783e-06, "logits/chosen": -1.5041329860687256, "logits/rejected": -0.7527793645858765, "logps/chosen": -520.987060546875, "logps/rejected": -816.6175537109375, "loss": 0.1476, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22410738468170166, "rewards/margins": 0.37477830052375793, "rewards/rejected": -0.5988856554031372, "step": 3420 }, { "epoch": 0.46, "learning_rate": 3.294640787536245e-06, "logits/chosen": -1.3957608938217163, "logits/rejected": -0.8155180811882019, "logps/chosen": -481.0667419433594, "logps/rejected": -809.5614013671875, "loss": 0.1274, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18456891179084778, "rewards/margins": 0.3828362822532654, "rewards/rejected": -0.5674052238464355, "step": 3430 }, { "epoch": 0.46, "learning_rate": 3.2836001237702993e-06, "logits/chosen": -1.3207615613937378, "logits/rejected": -0.7140559554100037, "logps/chosen": -464.8232421875, "logps/rejected": -858.9168090820312, "loss": 0.1253, "rewards/accuracies": 0.875, "rewards/chosen": -0.21229639649391174, "rewards/margins": 0.41832393407821655, "rewards/rejected": -0.6306203603744507, "step": 3440 }, { "epoch": 0.46, "learning_rate": 3.272542485937369e-06, "logits/chosen": -1.4193308353424072, "logits/rejected": -0.9011315107345581, "logps/chosen": -470.04986572265625, "logps/rejected": -834.3380126953125, "loss": 0.1232, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2021806240081787, "rewards/margins": 0.38284215331077576, "rewards/rejected": -0.5850228071212769, "step": 3450 }, { "epoch": 0.46, "learning_rate": 3.2614681135640696e-06, "logits/chosen": -1.4956685304641724, "logits/rejected": -1.192681074142456, "logps/chosen": -351.14007568359375, "logps/rejected": -777.5610961914062, "loss": 0.1223, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1393372118473053, "rewards/margins": 0.39933890104293823, "rewards/rejected": -0.5386761426925659, "step": 3460 }, { "epoch": 0.46, "learning_rate": 3.2503772465395143e-06, "logits/chosen": -1.298201084136963, "logits/rejected": -1.021277666091919, "logps/chosen": -355.12640380859375, "logps/rejected": -815.8689575195312, "loss": 0.1204, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17074286937713623, "rewards/margins": 0.39700907468795776, "rewards/rejected": -0.5677520036697388, "step": 3470 }, { "epoch": 0.46, "learning_rate": 3.2392701251101172e-06, "logits/chosen": -1.5369417667388916, "logits/rejected": -0.932148277759552, "logps/chosen": -457.8705139160156, "logps/rejected": -800.6802368164062, "loss": 0.1309, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17033250629901886, "rewards/margins": 0.3913556933403015, "rewards/rejected": -0.5616881847381592, "step": 3480 }, { "epoch": 0.47, "learning_rate": 3.228146989874389e-06, "logits/chosen": -1.6076122522354126, "logits/rejected": -1.0611704587936401, "logps/chosen": -472.25360107421875, "logps/rejected": -867.3209228515625, "loss": 0.1334, "rewards/accuracies": 0.875, "rewards/chosen": -0.22368569672107697, "rewards/margins": 0.3945960998535156, "rewards/rejected": -0.6182817816734314, "step": 3490 }, { "epoch": 0.47, "learning_rate": 3.217008081777726e-06, "logits/chosen": -1.478785514831543, "logits/rejected": -0.9631083607673645, "logps/chosen": -506.24517822265625, "logps/rejected": -775.35107421875, "loss": 0.1945, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24012240767478943, "rewards/margins": 0.32278794050216675, "rewards/rejected": -0.5629103183746338, "step": 3500 }, { "epoch": 0.47, "learning_rate": 3.205853642107192e-06, "logits/chosen": -1.480486512184143, "logits/rejected": -1.117382287979126, "logps/chosen": -494.24310302734375, "logps/rejected": -832.8094482421875, "loss": 0.1905, "rewards/accuracies": 0.75, "rewards/chosen": -0.27982884645462036, "rewards/margins": 0.32004794478416443, "rewards/rejected": -0.5998767614364624, "step": 3510 }, { "epoch": 0.47, "learning_rate": 3.1946839124862873e-06, "logits/chosen": -1.1592427492141724, "logits/rejected": -0.8009225726127625, "logps/chosen": -427.3486328125, "logps/rejected": -778.5949096679688, "loss": 0.192, "rewards/accuracies": 0.75, "rewards/chosen": -0.22615352272987366, "rewards/margins": 0.3284720182418823, "rewards/rejected": -0.5546255111694336, "step": 3520 }, { "epoch": 0.47, "learning_rate": 3.183499134869721e-06, "logits/chosen": -1.5271915197372437, "logits/rejected": -0.893467128276825, "logps/chosen": -505.61572265625, "logps/rejected": -891.1272583007812, "loss": 0.0979, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.21243330836296082, "rewards/margins": 0.42251452803611755, "rewards/rejected": -0.6349478960037231, "step": 3530 }, { "epoch": 0.47, "learning_rate": 3.1722995515381644e-06, "logits/chosen": -1.4152326583862305, "logits/rejected": -0.8314677476882935, "logps/chosen": -408.21337890625, "logps/rejected": -841.8865966796875, "loss": 0.1201, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.16699185967445374, "rewards/margins": 0.4409754276275635, "rewards/rejected": -0.6079672574996948, "step": 3540 }, { "epoch": 0.47, "learning_rate": 3.1610854050930063e-06, "logits/chosen": -1.2182379961013794, "logits/rejected": -0.9724875688552856, "logps/chosen": -522.8060302734375, "logps/rejected": -934.7429809570312, "loss": 0.1299, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.24052861332893372, "rewards/margins": 0.3977331519126892, "rewards/rejected": -0.6382617354393005, "step": 3550 }, { "epoch": 0.47, "learning_rate": 3.149856938451094e-06, "logits/chosen": -1.5642400979995728, "logits/rejected": -1.1071844100952148, "logps/chosen": -546.7676391601562, "logps/rejected": -837.0343017578125, "loss": 0.1513, "rewards/accuracies": 0.875, "rewards/chosen": -0.2630346417427063, "rewards/margins": 0.30036547780036926, "rewards/rejected": -0.5634000897407532, "step": 3560 }, { "epoch": 0.48, "learning_rate": 3.1386143948394764e-06, "logits/chosen": -1.6900438070297241, "logits/rejected": -1.0532042980194092, "logps/chosen": -463.26873779296875, "logps/rejected": -905.4603271484375, "loss": 0.0941, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1867888867855072, "rewards/margins": 0.4671444892883301, "rewards/rejected": -0.6539333462715149, "step": 3570 }, { "epoch": 0.48, "learning_rate": 3.127358017790132e-06, "logits/chosen": -1.4791187047958374, "logits/rejected": -0.8488872647285461, "logps/chosen": -542.0985717773438, "logps/rejected": -987.3577270507812, "loss": 0.1222, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.24738340079784393, "rewards/margins": 0.47162240743637085, "rewards/rejected": -0.719005823135376, "step": 3580 }, { "epoch": 0.48, "learning_rate": 3.116088051134695e-06, "logits/chosen": -1.496320128440857, "logits/rejected": -0.7940270304679871, "logps/chosen": -513.0885009765625, "logps/rejected": -827.7974853515625, "loss": 0.1197, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2183947116136551, "rewards/margins": 0.3765661120414734, "rewards/rejected": -0.5949608087539673, "step": 3590 }, { "epoch": 0.48, "learning_rate": 3.1048047389991693e-06, "logits/chosen": -1.5090911388397217, "logits/rejected": -0.9907795190811157, "logps/chosen": -439.2784118652344, "logps/rejected": -896.08203125, "loss": 0.0892, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19088810682296753, "rewards/margins": 0.43076205253601074, "rewards/rejected": -0.6216501593589783, "step": 3600 }, { "epoch": 0.48, "learning_rate": 3.0935083257986493e-06, "logits/chosen": -1.8225009441375732, "logits/rejected": -1.1103214025497437, "logps/chosen": -483.91375732421875, "logps/rejected": -790.5393676757812, "loss": 0.18, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2046966850757599, "rewards/margins": 0.34245455265045166, "rewards/rejected": -0.5471512079238892, "step": 3610 }, { "epoch": 0.48, "learning_rate": 3.082199056232015e-06, "logits/chosen": -1.4404807090759277, "logits/rejected": -0.9755549430847168, "logps/chosen": -555.2987670898438, "logps/rejected": -895.1951293945312, "loss": 0.1219, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.27383953332901, "rewards/margins": 0.34614747762680054, "rewards/rejected": -0.6199870109558105, "step": 3620 }, { "epoch": 0.48, "learning_rate": 3.0708771752766397e-06, "logits/chosen": -1.5266478061676025, "logits/rejected": -0.9134511947631836, "logps/chosen": -450.92852783203125, "logps/rejected": -807.119384765625, "loss": 0.1357, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17292854189872742, "rewards/margins": 0.3673229515552521, "rewards/rejected": -0.5402514338493347, "step": 3630 }, { "epoch": 0.49, "learning_rate": 3.059542928183079e-06, "logits/chosen": -1.5561821460723877, "logits/rejected": -1.0397491455078125, "logps/chosen": -460.45941162109375, "logps/rejected": -793.2342529296875, "loss": 0.1322, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1769510954618454, "rewards/margins": 0.3535122573375702, "rewards/rejected": -0.5304633378982544, "step": 3640 }, { "epoch": 0.49, "learning_rate": 3.0481965604697582e-06, "logits/chosen": -1.623671531677246, "logits/rejected": -0.8607115745544434, "logps/chosen": -446.0001525878906, "logps/rejected": -781.194091796875, "loss": 0.1402, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17367401719093323, "rewards/margins": 0.3763401508331299, "rewards/rejected": -0.5500141382217407, "step": 3650 }, { "epoch": 0.49, "learning_rate": 3.0368383179176584e-06, "logits/chosen": -1.7052555084228516, "logits/rejected": -1.061753273010254, "logps/chosen": -525.1064453125, "logps/rejected": -799.73486328125, "loss": 0.1289, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17319978773593903, "rewards/margins": 0.35249972343444824, "rewards/rejected": -0.5256994962692261, "step": 3660 }, { "epoch": 0.49, "learning_rate": 3.025468446564985e-06, "logits/chosen": -1.3997642993927002, "logits/rejected": -1.0487319231033325, "logps/chosen": -529.3223876953125, "logps/rejected": -924.0462646484375, "loss": 0.0795, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.23997943103313446, "rewards/margins": 0.42228931188583374, "rewards/rejected": -0.6622687578201294, "step": 3670 }, { "epoch": 0.49, "learning_rate": 3.0140871927018466e-06, "logits/chosen": -1.5465452671051025, "logits/rejected": -0.8040558695793152, "logps/chosen": -551.8374633789062, "logps/rejected": -901.1784057617188, "loss": 0.1064, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21528461575508118, "rewards/margins": 0.4289511740207672, "rewards/rejected": -0.6442357301712036, "step": 3680 }, { "epoch": 0.49, "learning_rate": 3.002694802864912e-06, "logits/chosen": -1.336032509803772, "logits/rejected": -0.9531657099723816, "logps/chosen": -409.7906188964844, "logps/rejected": -796.7199096679688, "loss": 0.1285, "rewards/accuracies": 0.875, "rewards/chosen": -0.19792523980140686, "rewards/margins": 0.40255865454673767, "rewards/rejected": -0.6004839539527893, "step": 3690 }, { "epoch": 0.49, "learning_rate": 2.9912915238320755e-06, "logits/chosen": -1.6613883972167969, "logits/rejected": -0.9797622561454773, "logps/chosen": -476.36474609375, "logps/rejected": -673.6729736328125, "loss": 0.1785, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1722622811794281, "rewards/margins": 0.3274669051170349, "rewards/rejected": -0.4997292160987854, "step": 3700 }, { "epoch": 0.49, "learning_rate": 2.9798776026171087e-06, "logits/chosen": -1.4321849346160889, "logits/rejected": -0.8425230979919434, "logps/chosen": -474.7699279785156, "logps/rejected": -821.5621337890625, "loss": 0.1192, "rewards/accuracies": 0.875, "rewards/chosen": -0.1794072985649109, "rewards/margins": 0.40213608741760254, "rewards/rejected": -0.5815433859825134, "step": 3710 }, { "epoch": 0.5, "learning_rate": 2.9684532864643123e-06, "logits/chosen": -1.423341989517212, "logits/rejected": -0.8701097369194031, "logps/chosen": -482.8060607910156, "logps/rejected": -920.9959106445312, "loss": 0.1074, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.191407710313797, "rewards/margins": 0.44904837012290955, "rewards/rejected": -0.6404560804367065, "step": 3720 }, { "epoch": 0.5, "learning_rate": 2.957018822843154e-06, "logits/chosen": -1.5194377899169922, "logits/rejected": -0.7663905620574951, "logps/chosen": -465.7847595214844, "logps/rejected": -725.7530517578125, "loss": 0.1096, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1776033490896225, "rewards/margins": 0.35331064462661743, "rewards/rejected": -0.5309139490127563, "step": 3730 }, { "epoch": 0.5, "learning_rate": 2.945574459442917e-06, "logits/chosen": -1.799757957458496, "logits/rejected": -0.9618920087814331, "logps/chosen": -517.2938842773438, "logps/rejected": -848.6304931640625, "loss": 0.1334, "rewards/accuracies": 0.875, "rewards/chosen": -0.17708583176136017, "rewards/margins": 0.3980599045753479, "rewards/rejected": -0.5751457214355469, "step": 3740 }, { "epoch": 0.5, "learning_rate": 2.9341204441673267e-06, "logits/chosen": -1.3543765544891357, "logits/rejected": -0.9924432635307312, "logps/chosen": -404.3013610839844, "logps/rejected": -843.3447265625, "loss": 0.079, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1664157211780548, "rewards/margins": 0.4492465555667877, "rewards/rejected": -0.6156622171401978, "step": 3750 }, { "epoch": 0.5, "learning_rate": 2.922657025129185e-06, "logits/chosen": -1.5894160270690918, "logits/rejected": -0.9104646444320679, "logps/chosen": -456.28131103515625, "logps/rejected": -912.9832763671875, "loss": 0.0968, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19846588373184204, "rewards/margins": 0.45922431349754333, "rewards/rejected": -0.6576902270317078, "step": 3760 }, { "epoch": 0.5, "learning_rate": 2.9111844506449973e-06, "logits/chosen": -1.4002254009246826, "logits/rejected": -1.0213944911956787, "logps/chosen": -473.43035888671875, "logps/rejected": -841.4640502929688, "loss": 0.1599, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.23086082935333252, "rewards/margins": 0.3589969277381897, "rewards/rejected": -0.5898576974868774, "step": 3770 }, { "epoch": 0.5, "learning_rate": 2.8997029692295875e-06, "logits/chosen": -1.4831597805023193, "logits/rejected": -0.8599148988723755, "logps/chosen": -390.19244384765625, "logps/rejected": -815.3306884765625, "loss": 0.1172, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16593720018863678, "rewards/margins": 0.4113589823246002, "rewards/rejected": -0.5772961378097534, "step": 3780 }, { "epoch": 0.51, "learning_rate": 2.888212829590719e-06, "logits/chosen": -1.4598033428192139, "logits/rejected": -0.924828052520752, "logps/chosen": -414.89056396484375, "logps/rejected": -792.7503662109375, "loss": 0.1386, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1876627504825592, "rewards/margins": 0.38501837849617004, "rewards/rejected": -0.5726811289787292, "step": 3790 }, { "epoch": 0.51, "learning_rate": 2.876714280623708e-06, "logits/chosen": -1.418028473854065, "logits/rejected": -0.7986099123954773, "logps/chosen": -550.6697998046875, "logps/rejected": -891.5902099609375, "loss": 0.153, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2176857888698578, "rewards/margins": 0.4177919328212738, "rewards/rejected": -0.6354777812957764, "step": 3800 }, { "epoch": 0.51, "learning_rate": 2.8652075714060296e-06, "logits/chosen": -1.2953003644943237, "logits/rejected": -0.7636483907699585, "logps/chosen": -543.0831909179688, "logps/rejected": -802.9742431640625, "loss": 0.1486, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3118182122707367, "rewards/margins": 0.3155859410762787, "rewards/rejected": -0.6274041533470154, "step": 3810 }, { "epoch": 0.51, "learning_rate": 2.8536929511919227e-06, "logits/chosen": -1.270843505859375, "logits/rejected": -0.6340192556381226, "logps/chosen": -525.5787963867188, "logps/rejected": -820.7928466796875, "loss": 0.1023, "rewards/accuracies": 0.875, "rewards/chosen": -0.26634496450424194, "rewards/margins": 0.3647981584072113, "rewards/rejected": -0.6311432123184204, "step": 3820 }, { "epoch": 0.51, "learning_rate": 2.842170669406993e-06, "logits/chosen": -1.2666388750076294, "logits/rejected": -0.5316375494003296, "logps/chosen": -518.71044921875, "logps/rejected": -888.7023315429688, "loss": 0.0905, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21681174635887146, "rewards/margins": 0.4333477020263672, "rewards/rejected": -0.6501595377922058, "step": 3830 }, { "epoch": 0.51, "learning_rate": 2.8306409756428067e-06, "logits/chosen": -1.432254433631897, "logits/rejected": -0.826429545879364, "logps/chosen": -476.503662109375, "logps/rejected": -847.1607666015625, "loss": 0.0783, "rewards/accuracies": 0.875, "rewards/chosen": -0.2242472618818283, "rewards/margins": 0.39870744943618774, "rewards/rejected": -0.6229546666145325, "step": 3840 }, { "epoch": 0.51, "learning_rate": 2.8191041196514874e-06, "logits/chosen": -1.3423511981964111, "logits/rejected": -0.8700442314147949, "logps/chosen": -497.36370849609375, "logps/rejected": -851.2994995117188, "loss": 0.1482, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.26128625869750977, "rewards/margins": 0.33478981256484985, "rewards/rejected": -0.5960760712623596, "step": 3850 }, { "epoch": 0.51, "learning_rate": 2.807560351340302e-06, "logits/chosen": -1.5188156366348267, "logits/rejected": -0.9726465940475464, "logps/chosen": -368.9046325683594, "logps/rejected": -713.8798828125, "loss": 0.1383, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15176090598106384, "rewards/margins": 0.3572022020816803, "rewards/rejected": -0.5089630484580994, "step": 3860 }, { "epoch": 0.52, "learning_rate": 2.7960099207662535e-06, "logits/chosen": -1.2664258480072021, "logits/rejected": -0.7739121317863464, "logps/chosen": -359.26123046875, "logps/rejected": -743.7399291992188, "loss": 0.1523, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17123065888881683, "rewards/margins": 0.3718946576118469, "rewards/rejected": -0.5431252717971802, "step": 3870 }, { "epoch": 0.52, "learning_rate": 2.7844530781306544e-06, "logits/chosen": -1.3709384202957153, "logits/rejected": -0.8558367490768433, "logps/chosen": -428.074951171875, "logps/rejected": -773.4097290039062, "loss": 0.1323, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1847999542951584, "rewards/margins": 0.34443315863609314, "rewards/rejected": -0.5292330980300903, "step": 3880 }, { "epoch": 0.52, "learning_rate": 2.77289007377372e-06, "logits/chosen": -1.3629367351531982, "logits/rejected": -0.6934945583343506, "logps/chosen": -459.5267639160156, "logps/rejected": -874.0427856445312, "loss": 0.1017, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18202826380729675, "rewards/margins": 0.4448707103729248, "rewards/rejected": -0.626899003982544, "step": 3890 }, { "epoch": 0.52, "learning_rate": 2.761321158169134e-06, "logits/chosen": -1.4932496547698975, "logits/rejected": -0.7716339826583862, "logps/chosen": -508.470703125, "logps/rejected": -864.6212158203125, "loss": 0.1102, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2065424621105194, "rewards/margins": 0.4489240050315857, "rewards/rejected": -0.6554665565490723, "step": 3900 }, { "epoch": 0.52, "learning_rate": 2.749746581918629e-06, "logits/chosen": -1.4111101627349854, "logits/rejected": -0.871496856212616, "logps/chosen": -451.28240966796875, "logps/rejected": -913.0838012695312, "loss": 0.0692, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.16033974289894104, "rewards/margins": 0.4733968675136566, "rewards/rejected": -0.6337365508079529, "step": 3910 }, { "epoch": 0.52, "learning_rate": 2.738166595746554e-06, "logits/chosen": -1.5585377216339111, "logits/rejected": -0.8070343136787415, "logps/chosen": -532.346435546875, "logps/rejected": -916.8145751953125, "loss": 0.1155, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16915038228034973, "rewards/margins": 0.45431309938430786, "rewards/rejected": -0.62346351146698, "step": 3920 }, { "epoch": 0.52, "learning_rate": 2.726581450494451e-06, "logits/chosen": -1.497727632522583, "logits/rejected": -0.9493281245231628, "logps/chosen": -453.8994140625, "logps/rejected": -849.1500244140625, "loss": 0.1153, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1854790449142456, "rewards/margins": 0.4318001866340637, "rewards/rejected": -0.6172792315483093, "step": 3930 }, { "epoch": 0.53, "learning_rate": 2.7149913971156105e-06, "logits/chosen": -1.0641905069351196, "logits/rejected": -0.7125247716903687, "logps/chosen": -440.34698486328125, "logps/rejected": -811.2091674804688, "loss": 0.118, "rewards/accuracies": 0.875, "rewards/chosen": -0.19736629724502563, "rewards/margins": 0.39964836835861206, "rewards/rejected": -0.5970146059989929, "step": 3940 }, { "epoch": 0.53, "learning_rate": 2.703396686669646e-06, "logits/chosen": -1.396917700767517, "logits/rejected": -1.0328669548034668, "logps/chosen": -398.5068359375, "logps/rejected": -766.6187744140625, "loss": 0.1472, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1600363403558731, "rewards/margins": 0.3699353039264679, "rewards/rejected": -0.5299716591835022, "step": 3950 }, { "epoch": 0.53, "learning_rate": 2.6917975703170466e-06, "logits/chosen": -1.546156406402588, "logits/rejected": -0.8876574635505676, "logps/chosen": -481.607666015625, "logps/rejected": -790.7122192382812, "loss": 0.1302, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20209841430187225, "rewards/margins": 0.36280348896980286, "rewards/rejected": -0.5649019479751587, "step": 3960 }, { "epoch": 0.53, "learning_rate": 2.6801942993137435e-06, "logits/chosen": -1.3815232515335083, "logits/rejected": -0.9472028613090515, "logps/chosen": -370.59197998046875, "logps/rejected": -752.8310546875, "loss": 0.1158, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1438537836074829, "rewards/margins": 0.39828285574913025, "rewards/rejected": -0.542136549949646, "step": 3970 }, { "epoch": 0.53, "learning_rate": 2.668587125005663e-06, "logits/chosen": -1.4644068479537964, "logits/rejected": -0.8621865510940552, "logps/chosen": -443.9444885253906, "logps/rejected": -771.3038330078125, "loss": 0.138, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16472499072551727, "rewards/margins": 0.38489940762519836, "rewards/rejected": -0.5496243834495544, "step": 3980 }, { "epoch": 0.53, "learning_rate": 2.6569762988232838e-06, "logits/chosen": -1.2870821952819824, "logits/rejected": -0.9376013875007629, "logps/chosen": -398.32415771484375, "logps/rejected": -729.791748046875, "loss": 0.1378, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18426913022994995, "rewards/margins": 0.3460938334465027, "rewards/rejected": -0.5303629040718079, "step": 3990 }, { "epoch": 0.53, "learning_rate": 2.6453620722761897e-06, "logits/chosen": -1.534701943397522, "logits/rejected": -0.8690187335014343, "logps/chosen": -446.68365478515625, "logps/rejected": -857.0474853515625, "loss": 0.1392, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20550382137298584, "rewards/margins": 0.41372913122177124, "rewards/rejected": -0.6192329525947571, "step": 4000 }, { "epoch": 0.53, "learning_rate": 2.6337446969476234e-06, "logits/chosen": -1.697040319442749, "logits/rejected": -1.0818397998809814, "logps/chosen": -499.67626953125, "logps/rejected": -910.4700927734375, "loss": 0.1482, "rewards/accuracies": 0.875, "rewards/chosen": -0.2038416862487793, "rewards/margins": 0.41365185379981995, "rewards/rejected": -0.6174935698509216, "step": 4010 }, { "epoch": 0.54, "learning_rate": 2.6221244244890336e-06, "logits/chosen": -1.2337590456008911, "logits/rejected": -0.9159961938858032, "logps/chosen": -573.4525146484375, "logps/rejected": -832.6463012695312, "loss": 0.1838, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2931188642978668, "rewards/margins": 0.27465683221817017, "rewards/rejected": -0.5677756071090698, "step": 4020 }, { "epoch": 0.54, "learning_rate": 2.6105015066146266e-06, "logits/chosen": -1.4252393245697021, "logits/rejected": -0.9289156198501587, "logps/chosen": -476.4798889160156, "logps/rejected": -786.7811889648438, "loss": 0.1752, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19843436777591705, "rewards/margins": 0.3141877055168152, "rewards/rejected": -0.512622058391571, "step": 4030 }, { "epoch": 0.54, "learning_rate": 2.5988761950959133e-06, "logits/chosen": -1.4496116638183594, "logits/rejected": -1.0391935110092163, "logps/chosen": -441.1336975097656, "logps/rejected": -721.164794921875, "loss": 0.179, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22740332782268524, "rewards/margins": 0.2868295907974243, "rewards/rejected": -0.5142329931259155, "step": 4040 }, { "epoch": 0.54, "learning_rate": 2.587248741756253e-06, "logits/chosen": -1.5186278820037842, "logits/rejected": -0.9992215037345886, "logps/chosen": -456.5079040527344, "logps/rejected": -799.9073486328125, "loss": 0.1293, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18669745326042175, "rewards/margins": 0.3633093535900116, "rewards/rejected": -0.5500068664550781, "step": 4050 }, { "epoch": 0.54, "learning_rate": 2.575619398465402e-06, "logits/chosen": -1.3673820495605469, "logits/rejected": -0.8725525140762329, "logps/chosen": -481.6690979003906, "logps/rejected": -868.7838134765625, "loss": 0.0755, "rewards/accuracies": 1.0, "rewards/chosen": -0.17358894646167755, "rewards/margins": 0.4075016975402832, "rewards/rejected": -0.5810906291007996, "step": 4060 }, { "epoch": 0.54, "learning_rate": 2.563988417134056e-06, "logits/chosen": -1.3905086517333984, "logits/rejected": -0.7997684478759766, "logps/chosen": -513.9821166992188, "logps/rejected": -743.789794921875, "loss": 0.1757, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1909521520137787, "rewards/margins": 0.2950867712497711, "rewards/rejected": -0.4860389828681946, "step": 4070 }, { "epoch": 0.54, "learning_rate": 2.5523560497083927e-06, "logits/chosen": -1.5381324291229248, "logits/rejected": -1.2439024448394775, "logps/chosen": -407.6680603027344, "logps/rejected": -733.21337890625, "loss": 0.158, "rewards/accuracies": 0.875, "rewards/chosen": -0.16457518935203552, "rewards/margins": 0.3094017505645752, "rewards/rejected": -0.4739769399166107, "step": 4080 }, { "epoch": 0.55, "learning_rate": 2.5407225481646146e-06, "logits/chosen": -1.261339545249939, "logits/rejected": -0.8529649972915649, "logps/chosen": -363.42462158203125, "logps/rejected": -758.0584716796875, "loss": 0.1897, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16312359273433685, "rewards/margins": 0.36931270360946655, "rewards/rejected": -0.5324362516403198, "step": 4090 }, { "epoch": 0.55, "learning_rate": 2.5290881645034932e-06, "logits/chosen": -1.5052446126937866, "logits/rejected": -1.0409244298934937, "logps/chosen": -478.3084411621094, "logps/rejected": -739.9015502929688, "loss": 0.1518, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20306257903575897, "rewards/margins": 0.3137751519680023, "rewards/rejected": -0.5168377161026001, "step": 4100 }, { "epoch": 0.55, "learning_rate": 2.517453150744904e-06, "logits/chosen": -1.4215971231460571, "logits/rejected": -0.9293550252914429, "logps/chosen": -375.1071472167969, "logps/rejected": -751.5859375, "loss": 0.1263, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1632181853055954, "rewards/margins": 0.36968666315078735, "rewards/rejected": -0.5329049229621887, "step": 4110 }, { "epoch": 0.55, "learning_rate": 2.5058177589223766e-06, "logits/chosen": -1.3039724826812744, "logits/rejected": -0.7687618732452393, "logps/chosen": -522.455810546875, "logps/rejected": -841.9293212890625, "loss": 0.1155, "rewards/accuracies": 0.875, "rewards/chosen": -0.2402358055114746, "rewards/margins": 0.35761570930480957, "rewards/rejected": -0.597851574420929, "step": 4120 }, { "epoch": 0.55, "learning_rate": 2.4941822410776247e-06, "logits/chosen": -1.4730857610702515, "logits/rejected": -0.8496094942092896, "logps/chosen": -429.6819763183594, "logps/rejected": -800.4998779296875, "loss": 0.1311, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16971971094608307, "rewards/margins": 0.39771103858947754, "rewards/rejected": -0.5674307346343994, "step": 4130 }, { "epoch": 0.55, "learning_rate": 2.482546849255096e-06, "logits/chosen": -1.4145101308822632, "logits/rejected": -0.8954922556877136, "logps/chosen": -546.86962890625, "logps/rejected": -891.2218627929688, "loss": 0.1549, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.229902982711792, "rewards/margins": 0.3890032172203064, "rewards/rejected": -0.6189061403274536, "step": 4140 }, { "epoch": 0.55, "learning_rate": 2.470911835496508e-06, "logits/chosen": -1.2700661420822144, "logits/rejected": -0.9738311767578125, "logps/chosen": -461.74658203125, "logps/rejected": -822.8218994140625, "loss": 0.1273, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1996997594833374, "rewards/margins": 0.3569287657737732, "rewards/rejected": -0.5566284656524658, "step": 4150 }, { "epoch": 0.55, "learning_rate": 2.4592774518353858e-06, "logits/chosen": -1.367222547531128, "logits/rejected": -0.9692174196243286, "logps/chosen": -436.0655822753906, "logps/rejected": -840.5070190429688, "loss": 0.1205, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21375672519207, "rewards/margins": 0.4220011234283447, "rewards/rejected": -0.6357578039169312, "step": 4160 }, { "epoch": 0.56, "learning_rate": 2.447643950291608e-06, "logits/chosen": -1.2921645641326904, "logits/rejected": -0.7501717805862427, "logps/chosen": -484.4534606933594, "logps/rejected": -823.6820068359375, "loss": 0.1049, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1951848715543747, "rewards/margins": 0.39403340220451355, "rewards/rejected": -0.589218258857727, "step": 4170 }, { "epoch": 0.56, "learning_rate": 2.436011582865945e-06, "logits/chosen": -1.4127392768859863, "logits/rejected": -0.9878486394882202, "logps/chosen": -420.36163330078125, "logps/rejected": -847.9583740234375, "loss": 0.1344, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1866758018732071, "rewards/margins": 0.38426584005355835, "rewards/rejected": -0.5709416270256042, "step": 4180 }, { "epoch": 0.56, "learning_rate": 2.4243806015345988e-06, "logits/chosen": -1.2964539527893066, "logits/rejected": -0.8136736750602722, "logps/chosen": -385.2383117675781, "logps/rejected": -769.1781005859375, "loss": 0.0946, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.16683003306388855, "rewards/margins": 0.4348595142364502, "rewards/rejected": -0.6016895174980164, "step": 4190 }, { "epoch": 0.56, "learning_rate": 2.4127512582437486e-06, "logits/chosen": -1.497622013092041, "logits/rejected": -0.8333326578140259, "logps/chosen": -465.08026123046875, "logps/rejected": -788.7073974609375, "loss": 0.0977, "rewards/accuracies": 0.875, "rewards/chosen": -0.14578023552894592, "rewards/margins": 0.41077151894569397, "rewards/rejected": -0.5565518140792847, "step": 4200 }, { "epoch": 0.56, "learning_rate": 2.4011238049040875e-06, "logits/chosen": -1.2194325923919678, "logits/rejected": -0.9886236190795898, "logps/chosen": -443.2428283691406, "logps/rejected": -905.9352416992188, "loss": 0.1673, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22071197628974915, "rewards/margins": 0.39461037516593933, "rewards/rejected": -0.6153223514556885, "step": 4210 }, { "epoch": 0.56, "learning_rate": 2.3894984933853734e-06, "logits/chosen": -1.3924311399459839, "logits/rejected": -0.9971855878829956, "logps/chosen": -450.23785400390625, "logps/rejected": -876.2208251953125, "loss": 0.0855, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19274960458278656, "rewards/margins": 0.4028767943382263, "rewards/rejected": -0.5956264138221741, "step": 4220 }, { "epoch": 0.56, "learning_rate": 2.377875575510967e-06, "logits/chosen": -1.389924168586731, "logits/rejected": -0.8687151074409485, "logps/chosen": -508.93048095703125, "logps/rejected": -852.8839721679688, "loss": 0.1456, "rewards/accuracies": 0.875, "rewards/chosen": -0.2281205952167511, "rewards/margins": 0.3864901065826416, "rewards/rejected": -0.6146107316017151, "step": 4230 }, { "epoch": 0.57, "learning_rate": 2.366255303052377e-06, "logits/chosen": -1.2372336387634277, "logits/rejected": -0.7575851678848267, "logps/chosen": -546.502197265625, "logps/rejected": -838.81640625, "loss": 0.191, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2502215504646301, "rewards/margins": 0.33994418382644653, "rewards/rejected": -0.5901657342910767, "step": 4240 }, { "epoch": 0.57, "learning_rate": 2.3546379277238107e-06, "logits/chosen": -1.2474725246429443, "logits/rejected": -0.8925794363021851, "logps/chosen": -535.6268310546875, "logps/rejected": -912.724609375, "loss": 0.1311, "rewards/accuracies": 0.875, "rewards/chosen": -0.2528509497642517, "rewards/margins": 0.38632652163505554, "rewards/rejected": -0.6391774415969849, "step": 4250 }, { "epoch": 0.57, "learning_rate": 2.3430237011767166e-06, "logits/chosen": -1.0119497776031494, "logits/rejected": -0.8262157440185547, "logps/chosen": -419.077392578125, "logps/rejected": -869.7803955078125, "loss": 0.1629, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2351589947938919, "rewards/margins": 0.36828702688217163, "rewards/rejected": -0.6034461259841919, "step": 4260 }, { "epoch": 0.57, "learning_rate": 2.3314128749943376e-06, "logits/chosen": -1.3086574077606201, "logits/rejected": -0.7333884239196777, "logps/chosen": -440.7137145996094, "logps/rejected": -882.5616455078125, "loss": 0.1, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.16451126337051392, "rewards/margins": 0.46316853165626526, "rewards/rejected": -0.627679705619812, "step": 4270 }, { "epoch": 0.57, "learning_rate": 2.319805700686257e-06, "logits/chosen": -1.4417178630828857, "logits/rejected": -0.9919298887252808, "logps/chosen": -450.36883544921875, "logps/rejected": -760.4411010742188, "loss": 0.1301, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20442244410514832, "rewards/margins": 0.3279123604297638, "rewards/rejected": -0.5323348045349121, "step": 4280 }, { "epoch": 0.57, "learning_rate": 2.3082024296829538e-06, "logits/chosen": -1.46039879322052, "logits/rejected": -0.9283072352409363, "logps/chosen": -419.25714111328125, "logps/rejected": -750.0972290039062, "loss": 0.1237, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15519830584526062, "rewards/margins": 0.3568214178085327, "rewards/rejected": -0.5120197534561157, "step": 4290 }, { "epoch": 0.57, "learning_rate": 2.296603313330355e-06, "logits/chosen": -1.3774569034576416, "logits/rejected": -0.7986065149307251, "logps/chosen": -502.2596740722656, "logps/rejected": -776.649658203125, "loss": 0.1709, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24051257967948914, "rewards/margins": 0.33478790521621704, "rewards/rejected": -0.5753005146980286, "step": 4300 }, { "epoch": 0.57, "learning_rate": 2.2850086028843894e-06, "logits/chosen": -1.1812444925308228, "logits/rejected": -0.9695785641670227, "logps/chosen": -589.576416015625, "logps/rejected": -1001.6077880859375, "loss": 0.1276, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2969650626182556, "rewards/margins": 0.3733198344707489, "rewards/rejected": -0.6702848672866821, "step": 4310 }, { "epoch": 0.58, "learning_rate": 2.2734185495055503e-06, "logits/chosen": -1.3204107284545898, "logits/rejected": -0.750912606716156, "logps/chosen": -478.04534912109375, "logps/rejected": -769.1867065429688, "loss": 0.1379, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18711444735527039, "rewards/margins": 0.33850574493408203, "rewards/rejected": -0.52562016248703, "step": 4320 }, { "epoch": 0.58, "learning_rate": 2.2618334042534464e-06, "logits/chosen": -1.3586585521697998, "logits/rejected": -0.8808444142341614, "logps/chosen": -494.4283142089844, "logps/rejected": -857.0852661132812, "loss": 0.117, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20293231308460236, "rewards/margins": 0.40129345655441284, "rewards/rejected": -0.6042258143424988, "step": 4330 }, { "epoch": 0.58, "learning_rate": 2.250253418081373e-06, "logits/chosen": -1.2095056772232056, "logits/rejected": -0.7229364514350891, "logps/chosen": -462.7508850097656, "logps/rejected": -860.0739135742188, "loss": 0.1189, "rewards/accuracies": 0.875, "rewards/chosen": -0.1977497637271881, "rewards/margins": 0.42359238862991333, "rewards/rejected": -0.6213420629501343, "step": 4340 }, { "epoch": 0.58, "learning_rate": 2.238678841830867e-06, "logits/chosen": -1.38365638256073, "logits/rejected": -0.6831182837486267, "logps/chosen": -512.0778198242188, "logps/rejected": -776.4503173828125, "loss": 0.1178, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20228242874145508, "rewards/margins": 0.35464924573898315, "rewards/rejected": -0.5569316744804382, "step": 4350 }, { "epoch": 0.58, "learning_rate": 2.22710992622628e-06, "logits/chosen": -1.2855836153030396, "logits/rejected": -0.954262912273407, "logps/chosen": -506.0787048339844, "logps/rejected": -827.5216064453125, "loss": 0.1135, "rewards/accuracies": 0.875, "rewards/chosen": -0.22598835825920105, "rewards/margins": 0.3631274998188019, "rewards/rejected": -0.5891157984733582, "step": 4360 }, { "epoch": 0.58, "learning_rate": 2.2155469218693464e-06, "logits/chosen": -1.3684406280517578, "logits/rejected": -0.729507327079773, "logps/chosen": -547.8463134765625, "logps/rejected": -814.87939453125, "loss": 0.1016, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20620174705982208, "rewards/margins": 0.38247913122177124, "rewards/rejected": -0.5886809229850769, "step": 4370 }, { "epoch": 0.58, "learning_rate": 2.2039900792337477e-06, "logits/chosen": -1.4247252941131592, "logits/rejected": -0.8577558398246765, "logps/chosen": -427.704833984375, "logps/rejected": -700.4971923828125, "loss": 0.1349, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20636749267578125, "rewards/margins": 0.31686440110206604, "rewards/rejected": -0.5232318639755249, "step": 4380 }, { "epoch": 0.59, "learning_rate": 2.192439648659699e-06, "logits/chosen": -1.223595380783081, "logits/rejected": -0.8186568021774292, "logps/chosen": -466.29913330078125, "logps/rejected": -889.1475830078125, "loss": 0.0745, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19520318508148193, "rewards/margins": 0.4740225374698639, "rewards/rejected": -0.6692256331443787, "step": 4390 }, { "epoch": 0.59, "learning_rate": 2.1808958803485134e-06, "logits/chosen": -1.2180200815200806, "logits/rejected": -0.7487155199050903, "logps/chosen": -400.9490966796875, "logps/rejected": -729.8300170898438, "loss": 0.1399, "rewards/accuracies": 0.75, "rewards/chosen": -0.17819716036319733, "rewards/margins": 0.3473852276802063, "rewards/rejected": -0.5255824327468872, "step": 4400 }, { "epoch": 0.59, "learning_rate": 2.1693590243571937e-06, "logits/chosen": -1.048194169998169, "logits/rejected": -0.563649594783783, "logps/chosen": -494.73480224609375, "logps/rejected": -786.9961547851562, "loss": 0.23, "rewards/accuracies": 0.75, "rewards/chosen": -0.2585517466068268, "rewards/margins": 0.3485848009586334, "rewards/rejected": -0.6071365475654602, "step": 4410 }, { "epoch": 0.59, "learning_rate": 2.157829330593008e-06, "logits/chosen": -1.3244564533233643, "logits/rejected": -0.7698614597320557, "logps/chosen": -516.279541015625, "logps/rejected": -893.1917114257812, "loss": 0.1343, "rewards/accuracies": 0.875, "rewards/chosen": -0.2608487904071808, "rewards/margins": 0.39661940932273865, "rewards/rejected": -0.6574681997299194, "step": 4420 }, { "epoch": 0.59, "learning_rate": 2.1463070488080777e-06, "logits/chosen": -1.1845453977584839, "logits/rejected": -0.6892284154891968, "logps/chosen": -483.90234375, "logps/rejected": -813.65380859375, "loss": 0.1597, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22065512835979462, "rewards/margins": 0.3821337819099426, "rewards/rejected": -0.6027888655662537, "step": 4430 }, { "epoch": 0.59, "learning_rate": 2.134792428593971e-06, "logits/chosen": -1.0602244138717651, "logits/rejected": -0.5278804898262024, "logps/chosen": -568.511474609375, "logps/rejected": -895.26171875, "loss": 0.1174, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.28543621301651, "rewards/margins": 0.3837021291255951, "rewards/rejected": -0.6691383719444275, "step": 4440 }, { "epoch": 0.59, "learning_rate": 2.1232857193762923e-06, "logits/chosen": -1.5215568542480469, "logits/rejected": -0.8021551966667175, "logps/chosen": -510.27978515625, "logps/rejected": -886.8944091796875, "loss": 0.127, "rewards/accuracies": 0.875, "rewards/chosen": -0.2042963057756424, "rewards/margins": 0.4145194888114929, "rewards/rejected": -0.6188157796859741, "step": 4450 }, { "epoch": 0.59, "learning_rate": 2.1117871704092818e-06, "logits/chosen": -1.3588248491287231, "logits/rejected": -0.8943771123886108, "logps/chosen": -516.7095947265625, "logps/rejected": -807.9676513671875, "loss": 0.1561, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2409473955631256, "rewards/margins": 0.33728843927383423, "rewards/rejected": -0.5782358050346375, "step": 4460 }, { "epoch": 0.6, "learning_rate": 2.1002970307704134e-06, "logits/chosen": -1.5449442863464355, "logits/rejected": -0.8462098240852356, "logps/chosen": -463.8545837402344, "logps/rejected": -761.9031982421875, "loss": 0.1171, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17875051498413086, "rewards/margins": 0.39860549569129944, "rewards/rejected": -0.5773560404777527, "step": 4470 }, { "epoch": 0.6, "learning_rate": 2.0888155493550027e-06, "logits/chosen": -1.2901697158813477, "logits/rejected": -0.7369778156280518, "logps/chosen": -500.56597900390625, "logps/rejected": -905.2454223632812, "loss": 0.1076, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2221985161304474, "rewards/margins": 0.390224426984787, "rewards/rejected": -0.6124228835105896, "step": 4480 }, { "epoch": 0.6, "learning_rate": 2.0773429748708153e-06, "logits/chosen": -1.1163994073867798, "logits/rejected": -0.7667258977890015, "logps/chosen": -470.02362060546875, "logps/rejected": -905.4534912109375, "loss": 0.1087, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21903569996356964, "rewards/margins": 0.4150877892971039, "rewards/rejected": -0.6341235637664795, "step": 4490 }, { "epoch": 0.6, "learning_rate": 2.0658795558326745e-06, "logits/chosen": -1.0330345630645752, "logits/rejected": -0.8973774909973145, "logps/chosen": -397.1127624511719, "logps/rejected": -838.93994140625, "loss": 0.1465, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24607495963573456, "rewards/margins": 0.3894239664077759, "rewards/rejected": -0.6354988813400269, "step": 4500 }, { "epoch": 0.6, "learning_rate": 2.0544255405570843e-06, "logits/chosen": -1.3431646823883057, "logits/rejected": -0.8504700660705566, "logps/chosen": -544.9601440429688, "logps/rejected": -852.1673583984375, "loss": 0.1146, "rewards/accuracies": 0.875, "rewards/chosen": -0.2619132399559021, "rewards/margins": 0.3727183938026428, "rewards/rejected": -0.6346316337585449, "step": 4510 }, { "epoch": 0.6, "learning_rate": 2.0429811771568468e-06, "logits/chosen": -1.120411992073059, "logits/rejected": -0.5610482096672058, "logps/chosen": -440.0519104003906, "logps/rejected": -734.9476318359375, "loss": 0.1452, "rewards/accuracies": 0.875, "rewards/chosen": -0.2120869904756546, "rewards/margins": 0.338589608669281, "rewards/rejected": -0.5506765246391296, "step": 4520 }, { "epoch": 0.6, "learning_rate": 2.031546713535688e-06, "logits/chosen": -1.2549145221710205, "logits/rejected": -0.6215404272079468, "logps/chosen": -570.1345825195312, "logps/rejected": -889.5558471679688, "loss": 0.1513, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2643107771873474, "rewards/margins": 0.3893239498138428, "rewards/rejected": -0.6536347270011902, "step": 4530 }, { "epoch": 0.61, "learning_rate": 2.0201223973828917e-06, "logits/chosen": -1.3017076253890991, "logits/rejected": -0.9707794189453125, "logps/chosen": -416.0333557128906, "logps/rejected": -801.4716796875, "loss": 0.1628, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20128902792930603, "rewards/margins": 0.36991968750953674, "rewards/rejected": -0.571208655834198, "step": 4540 }, { "epoch": 0.61, "learning_rate": 2.0087084761679245e-06, "logits/chosen": -1.2727489471435547, "logits/rejected": -0.5715342164039612, "logps/chosen": -480.99822998046875, "logps/rejected": -861.1604614257812, "loss": 0.1005, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19247964024543762, "rewards/margins": 0.43824878334999084, "rewards/rejected": -0.6307284235954285, "step": 4550 }, { "epoch": 0.61, "learning_rate": 1.997305197135089e-06, "logits/chosen": -1.3220107555389404, "logits/rejected": -0.6351078748703003, "logps/chosen": -524.84716796875, "logps/rejected": -857.89404296875, "loss": 0.1023, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.23643498122692108, "rewards/margins": 0.38632455468177795, "rewards/rejected": -0.6227595210075378, "step": 4560 }, { "epoch": 0.61, "learning_rate": 1.985912807298154e-06, "logits/chosen": -1.4880512952804565, "logits/rejected": -0.8141192197799683, "logps/chosen": -608.7986450195312, "logps/rejected": -985.9041748046875, "loss": 0.1401, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2824017107486725, "rewards/margins": 0.3986584544181824, "rewards/rejected": -0.6810601353645325, "step": 4570 }, { "epoch": 0.61, "learning_rate": 1.9745315534350157e-06, "logits/chosen": -1.3150464296340942, "logits/rejected": -0.8679197430610657, "logps/chosen": -486.95196533203125, "logps/rejected": -788.3603515625, "loss": 0.1861, "rewards/accuracies": 0.75, "rewards/chosen": -0.2451094686985016, "rewards/margins": 0.332103431224823, "rewards/rejected": -0.577212929725647, "step": 4580 }, { "epoch": 0.61, "learning_rate": 1.963161682082342e-06, "logits/chosen": -1.1732512712478638, "logits/rejected": -0.8104772567749023, "logps/chosen": -465.8495178222656, "logps/rejected": -956.5872192382812, "loss": 0.1354, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2538664937019348, "rewards/margins": 0.42716091871261597, "rewards/rejected": -0.6810274720191956, "step": 4590 }, { "epoch": 0.61, "learning_rate": 1.9518034395302413e-06, "logits/chosen": -1.0655146837234497, "logits/rejected": -0.7662220597267151, "logps/chosen": -423.4452209472656, "logps/rejected": -801.560546875, "loss": 0.1413, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19744530320167542, "rewards/margins": 0.366693377494812, "rewards/rejected": -0.5641387104988098, "step": 4600 }, { "epoch": 0.61, "learning_rate": 1.940457071816922e-06, "logits/chosen": -1.2536901235580444, "logits/rejected": -0.6289219856262207, "logps/chosen": -515.1942138671875, "logps/rejected": -915.4596557617188, "loss": 0.064, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19910120964050293, "rewards/margins": 0.49133628606796265, "rewards/rejected": -0.6904375553131104, "step": 4610 }, { "epoch": 0.62, "learning_rate": 1.9291228247233607e-06, "logits/chosen": -1.217125654220581, "logits/rejected": -0.5908175110816956, "logps/chosen": -514.2653198242188, "logps/rejected": -868.1808471679688, "loss": 0.1088, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20080474019050598, "rewards/margins": 0.41219455003738403, "rewards/rejected": -0.6129993200302124, "step": 4620 }, { "epoch": 0.62, "learning_rate": 1.9178009437679855e-06, "logits/chosen": -1.2478214502334595, "logits/rejected": -0.9087751507759094, "logps/chosen": -518.7520141601562, "logps/rejected": -933.6763916015625, "loss": 0.135, "rewards/accuracies": 0.875, "rewards/chosen": -0.22716772556304932, "rewards/margins": 0.3981955647468567, "rewards/rejected": -0.6253632307052612, "step": 4630 }, { "epoch": 0.62, "learning_rate": 1.9064916742013515e-06, "logits/chosen": -1.1873037815093994, "logits/rejected": -0.5726872086524963, "logps/chosen": -490.1280212402344, "logps/rejected": -839.7019653320312, "loss": 0.123, "rewards/accuracies": 0.875, "rewards/chosen": -0.21053406596183777, "rewards/margins": 0.3976525366306305, "rewards/rejected": -0.6081866025924683, "step": 4640 }, { "epoch": 0.62, "learning_rate": 1.895195261000831e-06, "logits/chosen": -1.1191716194152832, "logits/rejected": -1.1251859664916992, "logps/chosen": -393.98504638671875, "logps/rejected": -694.9435424804688, "loss": 0.1647, "rewards/accuracies": 0.75, "rewards/chosen": -0.23374716937541962, "rewards/margins": 0.2719922959804535, "rewards/rejected": -0.5057394504547119, "step": 4650 }, { "epoch": 0.62, "learning_rate": 1.883911948865306e-06, "logits/chosen": -1.1896827220916748, "logits/rejected": -0.735011100769043, "logps/chosen": -502.1512756347656, "logps/rejected": -754.8998413085938, "loss": 0.1682, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21102912724018097, "rewards/margins": 0.3427314758300781, "rewards/rejected": -0.5537605881690979, "step": 4660 }, { "epoch": 0.62, "learning_rate": 1.872641982209868e-06, "logits/chosen": -1.2366998195648193, "logits/rejected": -0.6492923498153687, "logps/chosen": -550.7481689453125, "logps/rejected": -848.9842529296875, "loss": 0.1283, "rewards/accuracies": 0.875, "rewards/chosen": -0.24898889660835266, "rewards/margins": 0.3708731234073639, "rewards/rejected": -0.6198620200157166, "step": 4670 }, { "epoch": 0.62, "learning_rate": 1.8613856051605242e-06, "logits/chosen": -1.2035579681396484, "logits/rejected": -0.6728922128677368, "logps/chosen": -477.9755859375, "logps/rejected": -786.3551025390625, "loss": 0.1105, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19650466740131378, "rewards/margins": 0.3582767844200134, "rewards/rejected": -0.554781436920166, "step": 4680 }, { "epoch": 0.63, "learning_rate": 1.850143061548907e-06, "logits/chosen": -1.2262418270111084, "logits/rejected": -0.9310606122016907, "logps/chosen": -471.4136657714844, "logps/rejected": -890.5929565429688, "loss": 0.1287, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2113111913204193, "rewards/margins": 0.4012996554374695, "rewards/rejected": -0.612610936164856, "step": 4690 }, { "epoch": 0.63, "learning_rate": 1.8389145949069953e-06, "logits/chosen": -1.2006455659866333, "logits/rejected": -0.9293171167373657, "logps/chosen": -377.11248779296875, "logps/rejected": -778.6986083984375, "loss": 0.1215, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16506408154964447, "rewards/margins": 0.37131091952323914, "rewards/rejected": -0.5363749861717224, "step": 4700 }, { "epoch": 0.63, "learning_rate": 1.827700448461836e-06, "logits/chosen": -1.4228752851486206, "logits/rejected": -1.103596568107605, "logps/chosen": -459.54412841796875, "logps/rejected": -877.4918823242188, "loss": 0.1699, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2065921276807785, "rewards/margins": 0.36977314949035645, "rewards/rejected": -0.5763653516769409, "step": 4710 }, { "epoch": 0.63, "learning_rate": 1.816500865130279e-06, "logits/chosen": -1.1741989850997925, "logits/rejected": -0.665132999420166, "logps/chosen": -459.66094970703125, "logps/rejected": -793.422607421875, "loss": 0.125, "rewards/accuracies": 0.875, "rewards/chosen": -0.19809575378894806, "rewards/margins": 0.3923488259315491, "rewards/rejected": -0.5904445648193359, "step": 4720 }, { "epoch": 0.63, "learning_rate": 1.8053160875137137e-06, "logits/chosen": -1.3960039615631104, "logits/rejected": -0.9243966341018677, "logps/chosen": -489.2864685058594, "logps/rejected": -757.47998046875, "loss": 0.206, "rewards/accuracies": 0.75, "rewards/chosen": -0.23237042129039764, "rewards/margins": 0.3222199082374573, "rewards/rejected": -0.5545903444290161, "step": 4730 }, { "epoch": 0.63, "learning_rate": 1.7941463578928088e-06, "logits/chosen": -1.2899222373962402, "logits/rejected": -0.9475260972976685, "logps/chosen": -479.715087890625, "logps/rejected": -835.7548828125, "loss": 0.1082, "rewards/accuracies": 0.875, "rewards/chosen": -0.22872710227966309, "rewards/margins": 0.37602663040161133, "rewards/rejected": -0.6047536730766296, "step": 4740 }, { "epoch": 0.63, "learning_rate": 1.7829919182222752e-06, "logits/chosen": -1.4998255968093872, "logits/rejected": -1.0836795568466187, "logps/chosen": -425.07470703125, "logps/rejected": -870.9599609375, "loss": 0.0733, "rewards/accuracies": 0.875, "rewards/chosen": -0.16609852015972137, "rewards/margins": 0.43407559394836426, "rewards/rejected": -0.6001741290092468, "step": 4750 }, { "epoch": 0.63, "learning_rate": 1.7718530101256115e-06, "logits/chosen": -1.3878366947174072, "logits/rejected": -0.692621111869812, "logps/chosen": -577.058349609375, "logps/rejected": -932.189453125, "loss": 0.126, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.23522964119911194, "rewards/margins": 0.3958871364593506, "rewards/rejected": -0.6311167478561401, "step": 4760 }, { "epoch": 0.64, "learning_rate": 1.7607298748898844e-06, "logits/chosen": -1.5390503406524658, "logits/rejected": -0.9261069297790527, "logps/chosen": -451.36297607421875, "logps/rejected": -787.5709228515625, "loss": 0.1081, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16683024168014526, "rewards/margins": 0.412998765707016, "rewards/rejected": -0.5798289775848389, "step": 4770 }, { "epoch": 0.64, "learning_rate": 1.7496227534604859e-06, "logits/chosen": -1.3134208917617798, "logits/rejected": -0.7826110124588013, "logps/chosen": -433.45196533203125, "logps/rejected": -854.2193603515625, "loss": 0.1041, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1616908758878708, "rewards/margins": 0.4370441436767578, "rewards/rejected": -0.5987350344657898, "step": 4780 }, { "epoch": 0.64, "learning_rate": 1.7385318864359304e-06, "logits/chosen": -1.5158641338348389, "logits/rejected": -0.883456826210022, "logps/chosen": -431.32720947265625, "logps/rejected": -782.7167358398438, "loss": 0.1139, "rewards/accuracies": 0.875, "rewards/chosen": -0.14380015432834625, "rewards/margins": 0.3827371895313263, "rewards/rejected": -0.526537299156189, "step": 4790 }, { "epoch": 0.64, "learning_rate": 1.7274575140626318e-06, "logits/chosen": -1.2721059322357178, "logits/rejected": -1.0025657415390015, "logps/chosen": -487.8551330566406, "logps/rejected": -818.991943359375, "loss": 0.1359, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19590890407562256, "rewards/margins": 0.3537582457065582, "rewards/rejected": -0.5496671795845032, "step": 4800 }, { "epoch": 0.64, "learning_rate": 1.7163998762297013e-06, "logits/chosen": -1.4880658388137817, "logits/rejected": -1.024325966835022, "logps/chosen": -494.30218505859375, "logps/rejected": -823.3046875, "loss": 0.111, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.24541957676410675, "rewards/margins": 0.3485862910747528, "rewards/rejected": -0.5940058827400208, "step": 4810 }, { "epoch": 0.64, "learning_rate": 1.7053592124637557e-06, "logits/chosen": -1.3536102771759033, "logits/rejected": -0.9674237370491028, "logps/chosen": -489.28485107421875, "logps/rejected": -835.2489013671875, "loss": 0.1225, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20638641715049744, "rewards/margins": 0.3501652777194977, "rewards/rejected": -0.5565517544746399, "step": 4820 }, { "epoch": 0.64, "learning_rate": 1.6943357619237227e-06, "logits/chosen": -1.513027310371399, "logits/rejected": -0.980889618396759, "logps/chosen": -482.6014709472656, "logps/rejected": -868.0419921875, "loss": 0.1163, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1919030249118805, "rewards/margins": 0.41016140580177307, "rewards/rejected": -0.6020644307136536, "step": 4830 }, { "epoch": 0.65, "learning_rate": 1.6833297633956647e-06, "logits/chosen": -1.4574648141860962, "logits/rejected": -0.8266223073005676, "logps/chosen": -510.8272399902344, "logps/rejected": -938.8359375, "loss": 0.1105, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22904086112976074, "rewards/margins": 0.41577833890914917, "rewards/rejected": -0.6448192596435547, "step": 4840 }, { "epoch": 0.65, "learning_rate": 1.6723414552876052e-06, "logits/chosen": -1.4703316688537598, "logits/rejected": -1.021376371383667, "logps/chosen": -444.19305419921875, "logps/rejected": -775.9967041015625, "loss": 0.1324, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18805596232414246, "rewards/margins": 0.34675851464271545, "rewards/rejected": -0.5348144769668579, "step": 4850 }, { "epoch": 0.65, "learning_rate": 1.661371075624363e-06, "logits/chosen": -1.4893542528152466, "logits/rejected": -0.9865673780441284, "logps/chosen": -449.78662109375, "logps/rejected": -830.7879638671875, "loss": 0.1196, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19270475208759308, "rewards/margins": 0.38258641958236694, "rewards/rejected": -0.575291097164154, "step": 4860 }, { "epoch": 0.65, "learning_rate": 1.6504188620423977e-06, "logits/chosen": -1.3864202499389648, "logits/rejected": -0.7918799519538879, "logps/chosen": -403.9468078613281, "logps/rejected": -640.9031982421875, "loss": 0.1656, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.14833372831344604, "rewards/margins": 0.30061405897140503, "rewards/rejected": -0.44894781708717346, "step": 4870 }, { "epoch": 0.65, "learning_rate": 1.6394850517846621e-06, "logits/chosen": -1.5351966619491577, "logits/rejected": -0.7196094393730164, "logps/chosen": -473.9374084472656, "logps/rejected": -769.9342041015625, "loss": 0.0773, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18467064201831818, "rewards/margins": 0.4058653712272644, "rewards/rejected": -0.5905359983444214, "step": 4880 }, { "epoch": 0.65, "learning_rate": 1.6285698816954626e-06, "logits/chosen": -1.4006057977676392, "logits/rejected": -0.8048819303512573, "logps/chosen": -499.81365966796875, "logps/rejected": -856.2640380859375, "loss": 0.0872, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20508036017417908, "rewards/margins": 0.41568273305892944, "rewards/rejected": -0.6207630038261414, "step": 4890 }, { "epoch": 0.65, "learning_rate": 1.6176735882153284e-06, "logits/chosen": -1.6421102285385132, "logits/rejected": -1.0298190116882324, "logps/chosen": -520.8504028320312, "logps/rejected": -850.0789184570312, "loss": 0.1412, "rewards/accuracies": 0.875, "rewards/chosen": -0.2191186249256134, "rewards/margins": 0.3911200165748596, "rewards/rejected": -0.6102386713027954, "step": 4900 }, { "epoch": 0.65, "learning_rate": 1.6067964073758901e-06, "logits/chosen": -1.4560716152191162, "logits/rejected": -0.935789942741394, "logps/chosen": -456.36334228515625, "logps/rejected": -800.3477783203125, "loss": 0.1096, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21652796864509583, "rewards/margins": 0.3556264340877533, "rewards/rejected": -0.5721544027328491, "step": 4910 }, { "epoch": 0.66, "learning_rate": 1.5959385747947697e-06, "logits/chosen": -1.4257352352142334, "logits/rejected": -0.9093335270881653, "logps/chosen": -542.5935668945312, "logps/rejected": -819.0216674804688, "loss": 0.1967, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2606956362724304, "rewards/margins": 0.33398741483688354, "rewards/rejected": -0.594683051109314, "step": 4920 }, { "epoch": 0.66, "learning_rate": 1.5851003256704697e-06, "logits/chosen": -1.327136754989624, "logits/rejected": -1.1365773677825928, "logps/chosen": -542.2097778320312, "logps/rejected": -977.5718994140625, "loss": 0.0953, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2794331908226013, "rewards/margins": 0.3725283145904541, "rewards/rejected": -0.6519615054130554, "step": 4930 }, { "epoch": 0.66, "learning_rate": 1.5742818947772875e-06, "logits/chosen": -1.4624805450439453, "logits/rejected": -0.7566857933998108, "logps/chosen": -510.5843200683594, "logps/rejected": -880.57861328125, "loss": 0.1229, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20490141212940216, "rewards/margins": 0.42348456382751465, "rewards/rejected": -0.6283859610557556, "step": 4940 }, { "epoch": 0.66, "learning_rate": 1.56348351646022e-06, "logits/chosen": -1.6112909317016602, "logits/rejected": -0.8507854342460632, "logps/chosen": -470.6692810058594, "logps/rejected": -842.4249267578125, "loss": 0.1195, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19470658898353577, "rewards/margins": 0.410107284784317, "rewards/rejected": -0.6048139333724976, "step": 4950 }, { "epoch": 0.66, "learning_rate": 1.552705424629898e-06, "logits/chosen": -1.4331148862838745, "logits/rejected": -0.9108486175537109, "logps/chosen": -457.1399841308594, "logps/rejected": -811.8322143554688, "loss": 0.119, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20481880009174347, "rewards/margins": 0.3736223578453064, "rewards/rejected": -0.5784412026405334, "step": 4960 }, { "epoch": 0.66, "learning_rate": 1.5419478527575068e-06, "logits/chosen": -1.3583507537841797, "logits/rejected": -0.9901837110519409, "logps/chosen": -456.38592529296875, "logps/rejected": -837.0637817382812, "loss": 0.0921, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.23552127182483673, "rewards/margins": 0.38225287199020386, "rewards/rejected": -0.6177741289138794, "step": 4970 }, { "epoch": 0.66, "learning_rate": 1.5312110338697427e-06, "logits/chosen": -1.4094290733337402, "logits/rejected": -0.9213676452636719, "logps/chosen": -499.94049072265625, "logps/rejected": -883.0730590820312, "loss": 0.0928, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20382937788963318, "rewards/margins": 0.43424925208091736, "rewards/rejected": -0.6380786299705505, "step": 4980 }, { "epoch": 0.67, "learning_rate": 1.520495200543754e-06, "logits/chosen": -1.2632607221603394, "logits/rejected": -1.0204585790634155, "logps/chosen": -477.03753662109375, "logps/rejected": -916.0748291015625, "loss": 0.1045, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21876640617847443, "rewards/margins": 0.3910064995288849, "rewards/rejected": -0.6097728610038757, "step": 4990 }, { "epoch": 0.67, "learning_rate": 1.509800584902108e-06, "logits/chosen": -1.5550910234451294, "logits/rejected": -0.9333616495132446, "logps/chosen": -517.1362915039062, "logps/rejected": -892.5686645507812, "loss": 0.126, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.22209139168262482, "rewards/margins": 0.4143398404121399, "rewards/rejected": -0.6364312171936035, "step": 5000 }, { "epoch": 0.67, "learning_rate": 1.4991274186077632e-06, "logits/chosen": -1.3997657299041748, "logits/rejected": -0.8548883199691772, "logps/chosen": -460.355224609375, "logps/rejected": -829.3341674804688, "loss": 0.0926, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19663289189338684, "rewards/margins": 0.3977494239807129, "rewards/rejected": -0.5943823456764221, "step": 5010 }, { "epoch": 0.67, "learning_rate": 1.4884759328590476e-06, "logits/chosen": -1.4501681327819824, "logits/rejected": -1.0469526052474976, "logps/chosen": -440.510986328125, "logps/rejected": -780.1736450195312, "loss": 0.2179, "rewards/accuracies": 0.75, "rewards/chosen": -0.19426096975803375, "rewards/margins": 0.3323975205421448, "rewards/rejected": -0.5266584753990173, "step": 5020 }, { "epoch": 0.67, "learning_rate": 1.4778463583846553e-06, "logits/chosen": -1.473638892173767, "logits/rejected": -0.9495469331741333, "logps/chosen": -526.7481689453125, "logps/rejected": -855.7936401367188, "loss": 0.1462, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2372969686985016, "rewards/margins": 0.3921995759010315, "rewards/rejected": -0.6294964551925659, "step": 5030 }, { "epoch": 0.67, "learning_rate": 1.467238925438646e-06, "logits/chosen": -1.5571715831756592, "logits/rejected": -0.7043315172195435, "logps/chosen": -527.9861450195312, "logps/rejected": -847.37353515625, "loss": 0.1089, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.22108444571495056, "rewards/margins": 0.369198203086853, "rewards/rejected": -0.5902826189994812, "step": 5040 }, { "epoch": 0.67, "learning_rate": 1.4566538637954556e-06, "logits/chosen": -1.6278702020645142, "logits/rejected": -1.074779748916626, "logps/chosen": -430.89447021484375, "logps/rejected": -711.3259887695312, "loss": 0.1342, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16959848999977112, "rewards/margins": 0.35060760378837585, "rewards/rejected": -0.520206093788147, "step": 5050 }, { "epoch": 0.67, "learning_rate": 1.446091402744923e-06, "logits/chosen": -1.4020555019378662, "logits/rejected": -0.918738067150116, "logps/chosen": -445.22186279296875, "logps/rejected": -847.0481567382812, "loss": 0.1695, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20678460597991943, "rewards/margins": 0.39482367038726807, "rewards/rejected": -0.6016082167625427, "step": 5060 }, { "epoch": 0.68, "learning_rate": 1.4355517710873184e-06, "logits/chosen": -1.3787460327148438, "logits/rejected": -1.0521621704101562, "logps/chosen": -427.92388916015625, "logps/rejected": -887.6427612304688, "loss": 0.05, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2269415408372879, "rewards/margins": 0.4265185296535492, "rewards/rejected": -0.6534601449966431, "step": 5070 }, { "epoch": 0.68, "learning_rate": 1.4250351971283937e-06, "logits/chosen": -1.522359848022461, "logits/rejected": -1.0820255279541016, "logps/chosen": -524.0896606445312, "logps/rejected": -844.29052734375, "loss": 0.1148, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.27214130759239197, "rewards/margins": 0.37310561537742615, "rewards/rejected": -0.6452468633651733, "step": 5080 }, { "epoch": 0.68, "learning_rate": 1.41454190867443e-06, "logits/chosen": -1.4404693841934204, "logits/rejected": -1.0126572847366333, "logps/chosen": -495.394775390625, "logps/rejected": -934.8951416015625, "loss": 0.0975, "rewards/accuracies": 0.875, "rewards/chosen": -0.21723541617393494, "rewards/margins": 0.41770267486572266, "rewards/rejected": -0.63493812084198, "step": 5090 }, { "epoch": 0.68, "learning_rate": 1.4040721330273063e-06, "logits/chosen": -1.4913583993911743, "logits/rejected": -0.9799544215202332, "logps/chosen": -535.7487182617188, "logps/rejected": -877.2818603515625, "loss": 0.0793, "rewards/accuracies": 0.875, "rewards/chosen": -0.24695873260498047, "rewards/margins": 0.3976723849773407, "rewards/rejected": -0.6446312069892883, "step": 5100 }, { "epoch": 0.68, "learning_rate": 1.3936260969795778e-06, "logits/chosen": -1.2997585535049438, "logits/rejected": -0.9756921529769897, "logps/chosen": -443.191162109375, "logps/rejected": -821.5947265625, "loss": 0.1377, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21109957993030548, "rewards/margins": 0.3828147053718567, "rewards/rejected": -0.593914270401001, "step": 5110 }, { "epoch": 0.68, "learning_rate": 1.3832040268095589e-06, "logits/chosen": -1.3984973430633545, "logits/rejected": -0.8584139943122864, "logps/chosen": -571.9281005859375, "logps/rejected": -915.3826904296875, "loss": 0.0942, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2879997491836548, "rewards/margins": 0.3574121594429016, "rewards/rejected": -0.6454118490219116, "step": 5120 }, { "epoch": 0.68, "learning_rate": 1.3728061482764238e-06, "logits/chosen": -1.660528540611267, "logits/rejected": -1.1849465370178223, "logps/chosen": -456.5572814941406, "logps/rejected": -729.951904296875, "loss": 0.1009, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19059523940086365, "rewards/margins": 0.32504329085350037, "rewards/rejected": -0.515638530254364, "step": 5130 }, { "epoch": 0.69, "learning_rate": 1.362432686615316e-06, "logits/chosen": -1.5981024503707886, "logits/rejected": -1.0829527378082275, "logps/chosen": -470.1607360839844, "logps/rejected": -828.2596435546875, "loss": 0.1321, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21122093498706818, "rewards/margins": 0.40773335099220276, "rewards/rejected": -0.6189543008804321, "step": 5140 }, { "epoch": 0.69, "learning_rate": 1.3520838665324704e-06, "logits/chosen": -1.6276485919952393, "logits/rejected": -1.209346055984497, "logps/chosen": -420.7687072753906, "logps/rejected": -750.0629272460938, "loss": 0.1565, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1963231861591339, "rewards/margins": 0.3372722864151001, "rewards/rejected": -0.5335954427719116, "step": 5150 }, { "epoch": 0.69, "learning_rate": 1.3417599122003464e-06, "logits/chosen": -1.6005207300186157, "logits/rejected": -1.080200433731079, "logps/chosen": -449.393310546875, "logps/rejected": -822.0933837890625, "loss": 0.1065, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1929277628660202, "rewards/margins": 0.4021962583065033, "rewards/rejected": -0.5951240658760071, "step": 5160 }, { "epoch": 0.69, "learning_rate": 1.3314610472527645e-06, "logits/chosen": -1.6019260883331299, "logits/rejected": -0.8302914500236511, "logps/chosen": -595.9198608398438, "logps/rejected": -919.8703002929688, "loss": 0.138, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.25709301233291626, "rewards/margins": 0.39729732275009155, "rewards/rejected": -0.654390275478363, "step": 5170 }, { "epoch": 0.69, "learning_rate": 1.3211874947800747e-06, "logits/chosen": -1.7015514373779297, "logits/rejected": -1.154908537864685, "logps/chosen": -480.40728759765625, "logps/rejected": -860.6213989257812, "loss": 0.1151, "rewards/accuracies": 0.875, "rewards/chosen": -0.18247373402118683, "rewards/margins": 0.40979132056236267, "rewards/rejected": -0.5922650694847107, "step": 5180 }, { "epoch": 0.69, "learning_rate": 1.3109394773243117e-06, "logits/chosen": -1.5750248432159424, "logits/rejected": -0.8017935752868652, "logps/chosen": -551.1357421875, "logps/rejected": -933.3966674804688, "loss": 0.1147, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.23447374999523163, "rewards/margins": 0.4425485134124756, "rewards/rejected": -0.6770222187042236, "step": 5190 }, { "epoch": 0.69, "learning_rate": 1.3007172168743854e-06, "logits/chosen": -1.486428141593933, "logits/rejected": -1.04713773727417, "logps/chosen": -513.3190307617188, "logps/rejected": -883.0814208984375, "loss": 0.104, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2615116238594055, "rewards/margins": 0.36889469623565674, "rewards/rejected": -0.6304062604904175, "step": 5200 }, { "epoch": 0.69, "learning_rate": 1.2905209348612596e-06, "logits/chosen": -1.4805870056152344, "logits/rejected": -0.8896188735961914, "logps/chosen": -512.0989990234375, "logps/rejected": -872.7048950195312, "loss": 0.1018, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17740336060523987, "rewards/margins": 0.42626601457595825, "rewards/rejected": -0.6036693453788757, "step": 5210 }, { "epoch": 0.7, "learning_rate": 1.280350852153168e-06, "logits/chosen": -1.410609245300293, "logits/rejected": -1.0304107666015625, "logps/chosen": -523.1416625976562, "logps/rejected": -867.2039184570312, "loss": 0.1127, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1862618625164032, "rewards/margins": 0.40114229917526245, "rewards/rejected": -0.587404191493988, "step": 5220 }, { "epoch": 0.7, "learning_rate": 1.2702071890508235e-06, "logits/chosen": -1.3426989316940308, "logits/rejected": -1.0067704916000366, "logps/chosen": -518.0101318359375, "logps/rejected": -770.6543579101562, "loss": 0.1914, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23667840659618378, "rewards/margins": 0.2956293523311615, "rewards/rejected": -0.5323077440261841, "step": 5230 }, { "epoch": 0.7, "learning_rate": 1.260090165282645e-06, "logits/chosen": -1.3418034315109253, "logits/rejected": -0.6315348148345947, "logps/chosen": -537.9312744140625, "logps/rejected": -858.8912353515625, "loss": 0.0913, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.26209238171577454, "rewards/margins": 0.3787650465965271, "rewards/rejected": -0.6408575177192688, "step": 5240 }, { "epoch": 0.7, "learning_rate": 1.2500000000000007e-06, "logits/chosen": -1.4278695583343506, "logits/rejected": -0.9040371775627136, "logps/chosen": -548.1932373046875, "logps/rejected": -871.0941162109375, "loss": 0.1098, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.26213568449020386, "rewards/margins": 0.40073925256729126, "rewards/rejected": -0.6628749370574951, "step": 5250 }, { "epoch": 0.7, "learning_rate": 1.2399369117724582e-06, "logits/chosen": -1.5840961933135986, "logits/rejected": -0.7718688249588013, "logps/chosen": -483.53863525390625, "logps/rejected": -894.3240356445312, "loss": 0.0678, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18216930329799652, "rewards/margins": 0.45045241713523865, "rewards/rejected": -0.632621705532074, "step": 5260 }, { "epoch": 0.7, "learning_rate": 1.2299011185830557e-06, "logits/chosen": -1.483337640762329, "logits/rejected": -1.0077427625656128, "logps/chosen": -470.9730529785156, "logps/rejected": -837.7032470703125, "loss": 0.116, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.22441604733467102, "rewards/margins": 0.3804159164428711, "rewards/rejected": -0.6048319935798645, "step": 5270 }, { "epoch": 0.7, "learning_rate": 1.2198928378235717e-06, "logits/chosen": -1.6518104076385498, "logits/rejected": -0.8416474461555481, "logps/chosen": -490.2295837402344, "logps/rejected": -727.6588134765625, "loss": 0.16, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21518249809741974, "rewards/margins": 0.32688936591148376, "rewards/rejected": -0.5420718789100647, "step": 5280 }, { "epoch": 0.71, "learning_rate": 1.2099122862898214e-06, "logits/chosen": -1.5851049423217773, "logits/rejected": -0.9384450912475586, "logps/chosen": -555.9940185546875, "logps/rejected": -929.8065185546875, "loss": 0.0947, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1925552785396576, "rewards/margins": 0.4194509983062744, "rewards/rejected": -0.6120061874389648, "step": 5290 }, { "epoch": 0.71, "learning_rate": 1.1999596801769617e-06, "logits/chosen": -1.3924330472946167, "logits/rejected": -0.8665302991867065, "logps/chosen": -516.6383666992188, "logps/rejected": -852.7140502929688, "loss": 0.1274, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.214178204536438, "rewards/margins": 0.37730246782302856, "rewards/rejected": -0.5914807319641113, "step": 5300 }, { "epoch": 0.71, "learning_rate": 1.1900352350748026e-06, "logits/chosen": -1.4407775402069092, "logits/rejected": -0.857082724571228, "logps/chosen": -558.5343017578125, "logps/rejected": -859.2302856445312, "loss": 0.1271, "rewards/accuracies": 0.875, "rewards/chosen": -0.18870045244693756, "rewards/margins": 0.4341113567352295, "rewards/rejected": -0.6228117942810059, "step": 5310 }, { "epoch": 0.71, "learning_rate": 1.1801391659631423e-06, "logits/chosen": -1.574696660041809, "logits/rejected": -1.0441734790802002, "logps/chosen": -433.48211669921875, "logps/rejected": -792.7039794921875, "loss": 0.1499, "rewards/accuracies": 0.875, "rewards/chosen": -0.2097112238407135, "rewards/margins": 0.3791065812110901, "rewards/rejected": -0.5888177752494812, "step": 5320 }, { "epoch": 0.71, "learning_rate": 1.170271687207106e-06, "logits/chosen": -1.5611820220947266, "logits/rejected": -0.946784496307373, "logps/chosen": -499.36419677734375, "logps/rejected": -817.3825073242188, "loss": 0.1409, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20473620295524597, "rewards/margins": 0.3804923892021179, "rewards/rejected": -0.5852286219596863, "step": 5330 }, { "epoch": 0.71, "learning_rate": 1.160433012552508e-06, "logits/chosen": -1.5461738109588623, "logits/rejected": -0.9032590985298157, "logps/chosen": -444.84552001953125, "logps/rejected": -742.2452392578125, "loss": 0.1085, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18285486102104187, "rewards/margins": 0.3640900254249573, "rewards/rejected": -0.5469449162483215, "step": 5340 }, { "epoch": 0.71, "learning_rate": 1.1506233551212186e-06, "logits/chosen": -1.4107246398925781, "logits/rejected": -0.6989453434944153, "logps/chosen": -573.1099853515625, "logps/rejected": -791.4630737304688, "loss": 0.1497, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.23020067811012268, "rewards/margins": 0.3681562840938568, "rewards/rejected": -0.5983569025993347, "step": 5350 }, { "epoch": 0.71, "learning_rate": 1.1408429274065418e-06, "logits/chosen": -1.3513542413711548, "logits/rejected": -0.8181467056274414, "logps/chosen": -462.7870178222656, "logps/rejected": -814.8450927734375, "loss": 0.1171, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1746109426021576, "rewards/margins": 0.4220661520957947, "rewards/rejected": -0.5966770648956299, "step": 5360 }, { "epoch": 0.72, "learning_rate": 1.1310919412686248e-06, "logits/chosen": -1.6254085302352905, "logits/rejected": -0.8846661448478699, "logps/chosen": -421.10516357421875, "logps/rejected": -821.2198486328125, "loss": 0.098, "rewards/accuracies": 0.875, "rewards/chosen": -0.19411344826221466, "rewards/margins": 0.4121677875518799, "rewards/rejected": -0.6062811613082886, "step": 5370 }, { "epoch": 0.72, "learning_rate": 1.1213706079298566e-06, "logits/chosen": -1.5590204000473022, "logits/rejected": -0.9682413935661316, "logps/chosen": -486.61163330078125, "logps/rejected": -739.8365478515625, "loss": 0.178, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19468192756175995, "rewards/margins": 0.33544445037841797, "rewards/rejected": -0.5301263928413391, "step": 5380 }, { "epoch": 0.72, "learning_rate": 1.1116791379703032e-06, "logits/chosen": -1.5661708116531372, "logits/rejected": -0.9602205157279968, "logps/chosen": -535.9977416992188, "logps/rejected": -872.9022216796875, "loss": 0.1211, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19395512342453003, "rewards/margins": 0.4040953516960144, "rewards/rejected": -0.5980504751205444, "step": 5390 }, { "epoch": 0.72, "learning_rate": 1.1020177413231334e-06, "logits/chosen": -1.4590649604797363, "logits/rejected": -0.8140028119087219, "logps/chosen": -472.7320861816406, "logps/rejected": -786.4627685546875, "loss": 0.0836, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18391984701156616, "rewards/margins": 0.4102323651313782, "rewards/rejected": -0.5941521525382996, "step": 5400 }, { "epoch": 0.72, "learning_rate": 1.0923866272700845e-06, "logits/chosen": -1.2792116403579712, "logits/rejected": -1.075791597366333, "logps/chosen": -443.6817932128906, "logps/rejected": -858.3514404296875, "loss": 0.1238, "rewards/accuracies": 0.875, "rewards/chosen": -0.24240417778491974, "rewards/margins": 0.37138718366622925, "rewards/rejected": -0.6137913465499878, "step": 5410 }, { "epoch": 0.72, "learning_rate": 1.0827860044369226e-06, "logits/chosen": -1.5652382373809814, "logits/rejected": -1.2125917673110962, "logps/chosen": -389.00341796875, "logps/rejected": -668.3732299804688, "loss": 0.1302, "rewards/accuracies": 0.75, "rewards/chosen": -0.18405522406101227, "rewards/margins": 0.2935299277305603, "rewards/rejected": -0.47758516669273376, "step": 5420 }, { "epoch": 0.72, "learning_rate": 1.073216080788921e-06, "logits/chosen": -1.6024821996688843, "logits/rejected": -0.9295442700386047, "logps/chosen": -467.8318786621094, "logps/rejected": -864.23974609375, "loss": 0.0997, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14879059791564941, "rewards/margins": 0.4352756142616272, "rewards/rejected": -0.5840662717819214, "step": 5430 }, { "epoch": 0.73, "learning_rate": 1.06367706362636e-06, "logits/chosen": -1.5812941789627075, "logits/rejected": -1.0601966381072998, "logps/chosen": -469.12841796875, "logps/rejected": -844.4415893554688, "loss": 0.1249, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.173747256398201, "rewards/margins": 0.39489811658859253, "rewards/rejected": -0.5686453580856323, "step": 5440 }, { "epoch": 0.73, "learning_rate": 1.0541691595800338e-06, "logits/chosen": -1.6072155237197876, "logits/rejected": -1.0490585565567017, "logps/chosen": -494.46875, "logps/rejected": -798.1419677734375, "loss": 0.1041, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2061028927564621, "rewards/margins": 0.3626430332660675, "rewards/rejected": -0.5687459111213684, "step": 5450 }, { "epoch": 0.73, "learning_rate": 1.0446925746067768e-06, "logits/chosen": -1.5910913944244385, "logits/rejected": -0.9691600799560547, "logps/chosen": -541.7664184570312, "logps/rejected": -948.0553588867188, "loss": 0.0943, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.19300617277622223, "rewards/margins": 0.4319871962070465, "rewards/rejected": -0.6249933838844299, "step": 5460 }, { "epoch": 0.73, "learning_rate": 1.0352475139849993e-06, "logits/chosen": -1.5177781581878662, "logits/rejected": -1.020815134048462, "logps/chosen": -509.0782775878906, "logps/rejected": -837.248046875, "loss": 0.1226, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20897535979747772, "rewards/margins": 0.39487597346305847, "rewards/rejected": -0.6038513779640198, "step": 5470 }, { "epoch": 0.73, "learning_rate": 1.0258341823102418e-06, "logits/chosen": -1.4822075366973877, "logits/rejected": -0.8729708790779114, "logps/chosen": -532.9627685546875, "logps/rejected": -851.3942260742188, "loss": 0.1571, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.23945197463035583, "rewards/margins": 0.39208975434303284, "rewards/rejected": -0.6315417885780334, "step": 5480 }, { "epoch": 0.73, "learning_rate": 1.0164527834907468e-06, "logits/chosen": -1.3401305675506592, "logits/rejected": -1.0187923908233643, "logps/chosen": -385.40570068359375, "logps/rejected": -800.2244262695312, "loss": 0.1085, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20979323983192444, "rewards/margins": 0.3925233781337738, "rewards/rejected": -0.602316677570343, "step": 5490 }, { "epoch": 0.73, "learning_rate": 1.0071035207430352e-06, "logits/chosen": -1.360961675643921, "logits/rejected": -0.7782384157180786, "logps/chosen": -457.4180603027344, "logps/rejected": -876.7483520507812, "loss": 0.1563, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23960646986961365, "rewards/margins": 0.390997052192688, "rewards/rejected": -0.630603551864624, "step": 5500 }, { "epoch": 0.73, "learning_rate": 9.977865965875091e-07, "logits/chosen": -1.8332687616348267, "logits/rejected": -1.1459457874298096, "logps/chosen": -512.2830810546875, "logps/rejected": -801.5711669921875, "loss": 0.1433, "rewards/accuracies": 0.75, "rewards/chosen": -0.22823330760002136, "rewards/margins": 0.3319735825061798, "rewards/rejected": -0.5602068901062012, "step": 5510 }, { "epoch": 0.74, "learning_rate": 9.88502212844063e-07, "logits/chosen": -1.341430902481079, "logits/rejected": -0.9758291244506836, "logps/chosen": -451.395751953125, "logps/rejected": -721.2365112304688, "loss": 0.1819, "rewards/accuracies": 0.75, "rewards/chosen": -0.24484524130821228, "rewards/margins": 0.29828450083732605, "rewards/rejected": -0.5431298017501831, "step": 5520 }, { "epoch": 0.74, "learning_rate": 9.792505706277136e-07, "logits/chosen": -1.4987294673919678, "logits/rejected": -0.7679599523544312, "logps/chosen": -491.7013244628906, "logps/rejected": -751.9393310546875, "loss": 0.1781, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2140645980834961, "rewards/margins": 0.3486694395542145, "rewards/rejected": -0.5627340078353882, "step": 5530 }, { "epoch": 0.74, "learning_rate": 9.700318703442437e-07, "logits/chosen": -1.4482524394989014, "logits/rejected": -0.9931025505065918, "logps/chosen": -497.1576232910156, "logps/rejected": -871.8629150390625, "loss": 0.0972, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2180767059326172, "rewards/margins": 0.40633684396743774, "rewards/rejected": -0.6244135499000549, "step": 5540 }, { "epoch": 0.74, "learning_rate": 9.608463116858544e-07, "logits/chosen": -1.70029616355896, "logits/rejected": -1.0156257152557373, "logps/chosen": -471.2583923339844, "logps/rejected": -860.1076049804688, "loss": 0.0949, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18707410991191864, "rewards/margins": 0.43171149492263794, "rewards/rejected": -0.618785560131073, "step": 5550 }, { "epoch": 0.74, "learning_rate": 9.516940936268504e-07, "logits/chosen": -1.4975395202636719, "logits/rejected": -1.0617475509643555, "logps/chosen": -485.7090759277344, "logps/rejected": -802.9081420898438, "loss": 0.1614, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21952207386493683, "rewards/margins": 0.34664005041122437, "rewards/rejected": -0.566162109375, "step": 5560 }, { "epoch": 0.74, "learning_rate": 9.4257541441932e-07, "logits/chosen": -1.3621257543563843, "logits/rejected": -0.8228029012680054, "logps/chosen": -453.7808532714844, "logps/rejected": -819.4066162109375, "loss": 0.1126, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16346648335456848, "rewards/margins": 0.42321619391441345, "rewards/rejected": -0.5866826772689819, "step": 5570 }, { "epoch": 0.74, "learning_rate": 9.334904715888496e-07, "logits/chosen": -1.4563817977905273, "logits/rejected": -1.0051524639129639, "logps/chosen": -491.6202697753906, "logps/rejected": -865.3167724609375, "loss": 0.1398, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24924297630786896, "rewards/margins": 0.35569971799850464, "rewards/rejected": -0.6049426794052124, "step": 5580 }, { "epoch": 0.75, "learning_rate": 9.244394619302338e-07, "logits/chosen": -1.4319621324539185, "logits/rejected": -0.9194442629814148, "logps/chosen": -470.22174072265625, "logps/rejected": -769.84326171875, "loss": 0.184, "rewards/accuracies": 0.75, "rewards/chosen": -0.1991579532623291, "rewards/margins": 0.3460138738155365, "rewards/rejected": -0.545171856880188, "step": 5590 }, { "epoch": 0.75, "learning_rate": 9.154225815032242e-07, "logits/chosen": -1.3754334449768066, "logits/rejected": -0.8846775889396667, "logps/chosen": -550.1680908203125, "logps/rejected": -831.501953125, "loss": 0.1243, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.26152288913726807, "rewards/margins": 0.35007160902023315, "rewards/rejected": -0.6115944981575012, "step": 5600 }, { "epoch": 0.75, "learning_rate": 9.064400256282757e-07, "logits/chosen": -1.5064082145690918, "logits/rejected": -0.9161268472671509, "logps/chosen": -398.8093566894531, "logps/rejected": -654.99072265625, "loss": 0.1648, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1780550479888916, "rewards/margins": 0.298201322555542, "rewards/rejected": -0.4762563109397888, "step": 5610 }, { "epoch": 0.75, "learning_rate": 8.974919888823164e-07, "logits/chosen": -1.4121335744857788, "logits/rejected": -0.794890820980072, "logps/chosen": -460.91998291015625, "logps/rejected": -876.2742309570312, "loss": 0.1196, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1762869507074356, "rewards/margins": 0.45224347710609436, "rewards/rejected": -0.6285303831100464, "step": 5620 }, { "epoch": 0.75, "learning_rate": 8.885786650945333e-07, "logits/chosen": -1.3726732730865479, "logits/rejected": -0.8681763410568237, "logps/chosen": -510.92529296875, "logps/rejected": -892.0538940429688, "loss": 0.1079, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.23853686451911926, "rewards/margins": 0.363789826631546, "rewards/rejected": -0.6023266911506653, "step": 5630 }, { "epoch": 0.75, "learning_rate": 8.797002473421729e-07, "logits/chosen": -1.521254301071167, "logits/rejected": -1.0100994110107422, "logps/chosen": -459.5888671875, "logps/rejected": -865.0677490234375, "loss": 0.1113, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22328796982765198, "rewards/margins": 0.39054790139198303, "rewards/rejected": -0.6138359308242798, "step": 5640 }, { "epoch": 0.75, "learning_rate": 8.708569279463622e-07, "logits/chosen": -1.2005363702774048, "logits/rejected": -0.7217308282852173, "logps/chosen": -518.8699340820312, "logps/rejected": -975.6614990234375, "loss": 0.1145, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.27335843443870544, "rewards/margins": 0.45107731223106384, "rewards/rejected": -0.7244357466697693, "step": 5650 }, { "epoch": 0.75, "learning_rate": 8.620488984679378e-07, "logits/chosen": -1.4894965887069702, "logits/rejected": -0.9523374438285828, "logps/chosen": -542.3692626953125, "logps/rejected": -755.6817626953125, "loss": 0.1605, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2720409035682678, "rewards/margins": 0.3039925694465637, "rewards/rejected": -0.5760334730148315, "step": 5660 }, { "epoch": 0.76, "learning_rate": 8.532763497032987e-07, "logits/chosen": -1.4424383640289307, "logits/rejected": -0.8664541244506836, "logps/chosen": -482.08428955078125, "logps/rejected": -854.2926025390625, "loss": 0.0746, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2148595154285431, "rewards/margins": 0.4492289125919342, "rewards/rejected": -0.6640883684158325, "step": 5670 }, { "epoch": 0.76, "learning_rate": 8.445394716802754e-07, "logits/chosen": -1.4953354597091675, "logits/rejected": -1.0513901710510254, "logps/chosen": -495.39727783203125, "logps/rejected": -813.083984375, "loss": 0.0884, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23665395379066467, "rewards/margins": 0.3620506525039673, "rewards/rejected": -0.5987046957015991, "step": 5680 }, { "epoch": 0.76, "learning_rate": 8.35838453654009e-07, "logits/chosen": -1.5572940111160278, "logits/rejected": -0.9097954034805298, "logps/chosen": -520.28125, "logps/rejected": -940.6891479492188, "loss": 0.1427, "rewards/accuracies": 0.875, "rewards/chosen": -0.19597239792346954, "rewards/margins": 0.4340503215789795, "rewards/rejected": -0.6300228238105774, "step": 5690 }, { "epoch": 0.76, "learning_rate": 8.271734841028553e-07, "logits/chosen": -1.45353364944458, "logits/rejected": -1.0082950592041016, "logps/chosen": -492.7423400878906, "logps/rejected": -816.0848999023438, "loss": 0.163, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22472481429576874, "rewards/margins": 0.35925525426864624, "rewards/rejected": -0.583980143070221, "step": 5700 }, { "epoch": 0.76, "learning_rate": 8.185447507243e-07, "logits/chosen": -1.373942255973816, "logits/rejected": -0.5300694704055786, "logps/chosen": -515.666748046875, "logps/rejected": -820.1891479492188, "loss": 0.1535, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21491403877735138, "rewards/margins": 0.4033784866333008, "rewards/rejected": -0.6182926297187805, "step": 5710 }, { "epoch": 0.76, "learning_rate": 8.099524404308948e-07, "logits/chosen": -1.2152715921401978, "logits/rejected": -0.8097234964370728, "logps/chosen": -469.317138671875, "logps/rejected": -758.361328125, "loss": 0.1487, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.25387001037597656, "rewards/margins": 0.318764865398407, "rewards/rejected": -0.5726348757743835, "step": 5720 }, { "epoch": 0.76, "learning_rate": 8.013967393462094e-07, "logits/chosen": -1.3372713327407837, "logits/rejected": -0.9042348861694336, "logps/chosen": -486.88983154296875, "logps/rejected": -775.0665893554688, "loss": 0.133, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24530085921287537, "rewards/margins": 0.3258090615272522, "rewards/rejected": -0.5711098909378052, "step": 5730 }, { "epoch": 0.77, "learning_rate": 7.928778328007918e-07, "logits/chosen": -1.3288884162902832, "logits/rejected": -0.7175564765930176, "logps/chosen": -550.4727172851562, "logps/rejected": -796.298583984375, "loss": 0.1368, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23931817710399628, "rewards/margins": 0.3275078237056732, "rewards/rejected": -0.5668259859085083, "step": 5740 }, { "epoch": 0.77, "learning_rate": 7.843959053281663e-07, "logits/chosen": -1.5332810878753662, "logits/rejected": -0.8374568223953247, "logps/chosen": -508.918212890625, "logps/rejected": -825.8583984375, "loss": 0.1465, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20773963630199432, "rewards/margins": 0.3811188340187073, "rewards/rejected": -0.5888584852218628, "step": 5750 }, { "epoch": 0.77, "learning_rate": 7.759511406608255e-07, "logits/chosen": -1.5883281230926514, "logits/rejected": -0.9644662737846375, "logps/chosen": -477.9900817871094, "logps/rejected": -839.3572387695312, "loss": 0.0832, "rewards/accuracies": 0.875, "rewards/chosen": -0.2028050422668457, "rewards/margins": 0.40947359800338745, "rewards/rejected": -0.6122786998748779, "step": 5760 }, { "epoch": 0.77, "learning_rate": 7.675437217262571e-07, "logits/chosen": -1.27748703956604, "logits/rejected": -0.7101573944091797, "logps/chosen": -469.8975524902344, "logps/rejected": -739.5403442382812, "loss": 0.183, "rewards/accuracies": 0.75, "rewards/chosen": -0.22256311774253845, "rewards/margins": 0.3478880524635315, "rewards/rejected": -0.5704511404037476, "step": 5770 }, { "epoch": 0.77, "learning_rate": 7.591738306429769e-07, "logits/chosen": -1.1995785236358643, "logits/rejected": -0.8539302945137024, "logps/chosen": -464.575927734375, "logps/rejected": -806.1849365234375, "loss": 0.1375, "rewards/accuracies": 0.875, "rewards/chosen": -0.21260254085063934, "rewards/margins": 0.3813617527484894, "rewards/rejected": -0.5939642786979675, "step": 5780 }, { "epoch": 0.77, "learning_rate": 7.508416487165862e-07, "logits/chosen": -1.2585513591766357, "logits/rejected": -0.7907862663269043, "logps/chosen": -524.2095947265625, "logps/rejected": -848.9905395507812, "loss": 0.1235, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3004508912563324, "rewards/margins": 0.35597458481788635, "rewards/rejected": -0.656425416469574, "step": 5790 }, { "epoch": 0.77, "learning_rate": 7.425473564358457e-07, "logits/chosen": -1.5966112613677979, "logits/rejected": -0.937767505645752, "logps/chosen": -514.3389892578125, "logps/rejected": -877.3151245117188, "loss": 0.1471, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2573555111885071, "rewards/margins": 0.4029225707054138, "rewards/rejected": -0.6602780818939209, "step": 5800 }, { "epoch": 0.77, "learning_rate": 7.342911334687619e-07, "logits/chosen": -1.4570611715316772, "logits/rejected": -0.973736584186554, "logps/chosen": -409.6090393066406, "logps/rejected": -803.8706665039062, "loss": 0.1114, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1908612698316574, "rewards/margins": 0.3969055116176605, "rewards/rejected": -0.5877667665481567, "step": 5810 }, { "epoch": 0.78, "learning_rate": 7.260731586586983e-07, "logits/chosen": -1.5033175945281982, "logits/rejected": -1.153529405593872, "logps/chosen": -471.53924560546875, "logps/rejected": -780.7069091796875, "loss": 0.179, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24635379016399384, "rewards/margins": 0.29246678948402405, "rewards/rejected": -0.5388205051422119, "step": 5820 }, { "epoch": 0.78, "learning_rate": 7.178936100204994e-07, "logits/chosen": -1.6408525705337524, "logits/rejected": -1.0268441438674927, "logps/chosen": -499.1380920410156, "logps/rejected": -923.5458984375, "loss": 0.0953, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.19846662878990173, "rewards/margins": 0.46603909134864807, "rewards/rejected": -0.664505660533905, "step": 5830 }, { "epoch": 0.78, "learning_rate": 7.097526647366379e-07, "logits/chosen": -1.351193904876709, "logits/rejected": -0.8164033889770508, "logps/chosen": -527.61572265625, "logps/rejected": -889.2008666992188, "loss": 0.1224, "rewards/accuracies": 0.875, "rewards/chosen": -0.257089227437973, "rewards/margins": 0.3886021077632904, "rewards/rejected": -0.6456912755966187, "step": 5840 }, { "epoch": 0.78, "learning_rate": 7.016504991533727e-07, "logits/chosen": -1.2645586729049683, "logits/rejected": -0.9257951974868774, "logps/chosen": -491.51922607421875, "logps/rejected": -855.71875, "loss": 0.1333, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.25636953115463257, "rewards/margins": 0.356289803981781, "rewards/rejected": -0.6126593351364136, "step": 5850 }, { "epoch": 0.78, "learning_rate": 6.935872887769299e-07, "logits/chosen": -0.994711697101593, "logits/rejected": -0.8577106595039368, "logps/chosen": -418.3548889160156, "logps/rejected": -819.3732299804688, "loss": 0.1633, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20737271010875702, "rewards/margins": 0.3346274197101593, "rewards/rejected": -0.5420001745223999, "step": 5860 }, { "epoch": 0.78, "learning_rate": 6.855632082697045e-07, "logits/chosen": -1.4238831996917725, "logits/rejected": -0.7512753009796143, "logps/chosen": -548.7970581054688, "logps/rejected": -825.9925537109375, "loss": 0.1311, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2298368215560913, "rewards/margins": 0.3555445373058319, "rewards/rejected": -0.5853813886642456, "step": 5870 }, { "epoch": 0.78, "learning_rate": 6.775784314464717e-07, "logits/chosen": -1.6078815460205078, "logits/rejected": -0.993118166923523, "logps/chosen": -545.4841918945312, "logps/rejected": -895.5103759765625, "loss": 0.1547, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2508589029312134, "rewards/margins": 0.3634670674800873, "rewards/rejected": -0.6143259406089783, "step": 5880 }, { "epoch": 0.79, "learning_rate": 6.696331312706245e-07, "logits/chosen": -1.5820848941802979, "logits/rejected": -0.9387833476066589, "logps/chosen": -587.0062255859375, "logps/rejected": -810.4898681640625, "loss": 0.1313, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2731969952583313, "rewards/margins": 0.3355793356895447, "rewards/rejected": -0.608776330947876, "step": 5890 }, { "epoch": 0.79, "learning_rate": 6.617274798504286e-07, "logits/chosen": -1.398246169090271, "logits/rejected": -1.0464344024658203, "logps/chosen": -441.2962341308594, "logps/rejected": -797.6106567382812, "loss": 0.1301, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2251112014055252, "rewards/margins": 0.35472145676612854, "rewards/rejected": -0.5798326134681702, "step": 5900 }, { "epoch": 0.79, "learning_rate": 6.538616484352902e-07, "logits/chosen": -1.4681236743927002, "logits/rejected": -1.1271047592163086, "logps/chosen": -479.44580078125, "logps/rejected": -835.37744140625, "loss": 0.1398, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.25705060362815857, "rewards/margins": 0.36807265877723694, "rewards/rejected": -0.6251233220100403, "step": 5910 }, { "epoch": 0.79, "learning_rate": 6.460358074120518e-07, "logits/chosen": -1.3405694961547852, "logits/rejected": -0.8227552175521851, "logps/chosen": -527.5181884765625, "logps/rejected": -772.7880249023438, "loss": 0.1881, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2875741124153137, "rewards/margins": 0.3079223036766052, "rewards/rejected": -0.5954964756965637, "step": 5920 }, { "epoch": 0.79, "learning_rate": 6.382501263012936e-07, "logits/chosen": -1.526971459388733, "logits/rejected": -0.8887739181518555, "logps/chosen": -518.7437744140625, "logps/rejected": -957.9097900390625, "loss": 0.094, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.25182047486305237, "rewards/margins": 0.44587215781211853, "rewards/rejected": -0.6976926326751709, "step": 5930 }, { "epoch": 0.79, "learning_rate": 6.305047737536707e-07, "logits/chosen": -1.3693434000015259, "logits/rejected": -0.8970683813095093, "logps/chosen": -503.234130859375, "logps/rejected": -836.6292724609375, "loss": 0.1333, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22358956933021545, "rewards/margins": 0.36614367365837097, "rewards/rejected": -0.5897333025932312, "step": 5940 }, { "epoch": 0.79, "learning_rate": 6.227999175462521e-07, "logits/chosen": -1.4342982769012451, "logits/rejected": -0.8154445886611938, "logps/chosen": -522.7168579101562, "logps/rejected": -844.3760986328125, "loss": 0.1606, "rewards/accuracies": 0.875, "rewards/chosen": -0.26568546891212463, "rewards/margins": 0.35932299494743347, "rewards/rejected": -0.6250084638595581, "step": 5950 }, { "epoch": 0.79, "learning_rate": 6.151357245788917e-07, "logits/chosen": -1.4272327423095703, "logits/rejected": -0.9210633039474487, "logps/chosen": -579.3690185546875, "logps/rejected": -838.6309814453125, "loss": 0.1929, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2933214008808136, "rewards/margins": 0.3416084051132202, "rewards/rejected": -0.6349297761917114, "step": 5960 }, { "epoch": 0.8, "learning_rate": 6.075123608706093e-07, "logits/chosen": -1.4912382364273071, "logits/rejected": -0.6635556221008301, "logps/chosen": -514.3550415039062, "logps/rejected": -756.4337158203125, "loss": 0.1437, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2315598428249359, "rewards/margins": 0.3265839219093323, "rewards/rejected": -0.5581437349319458, "step": 5970 }, { "epoch": 0.8, "learning_rate": 5.999299915559956e-07, "logits/chosen": -1.5643749237060547, "logits/rejected": -1.040816068649292, "logps/chosen": -516.1356201171875, "logps/rejected": -852.8410034179688, "loss": 0.1228, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.23064880073070526, "rewards/margins": 0.37677282094955444, "rewards/rejected": -0.6074216365814209, "step": 5980 }, { "epoch": 0.8, "learning_rate": 5.923887808816373e-07, "logits/chosen": -1.2064945697784424, "logits/rejected": -0.849543571472168, "logps/chosen": -448.8682556152344, "logps/rejected": -885.9103393554688, "loss": 0.1258, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24305430054664612, "rewards/margins": 0.4217645227909088, "rewards/rejected": -0.6648188233375549, "step": 5990 }, { "epoch": 0.8, "learning_rate": 5.848888922025553e-07, "logits/chosen": -1.6217724084854126, "logits/rejected": -0.8952631950378418, "logps/chosen": -597.3336181640625, "logps/rejected": -819.6091918945312, "loss": 0.1111, "rewards/accuracies": 0.875, "rewards/chosen": -0.2815231680870056, "rewards/margins": 0.3476121127605438, "rewards/rejected": -0.6291353702545166, "step": 6000 }, { "epoch": 0.8, "learning_rate": 5.774304879786688e-07, "logits/chosen": -1.5052062273025513, "logits/rejected": -1.112230658531189, "logps/chosen": -436.70819091796875, "logps/rejected": -769.6760864257812, "loss": 0.1179, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.222015380859375, "rewards/margins": 0.36521318554878235, "rewards/rejected": -0.5872284770011902, "step": 6010 }, { "epoch": 0.8, "learning_rate": 5.700137297712749e-07, "logits/chosen": -1.4654874801635742, "logits/rejected": -0.9464197158813477, "logps/chosen": -471.94549560546875, "logps/rejected": -863.0076904296875, "loss": 0.1133, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22802074253559113, "rewards/margins": 0.39466968178749084, "rewards/rejected": -0.6226904392242432, "step": 6020 }, { "epoch": 0.8, "learning_rate": 5.626387782395512e-07, "logits/chosen": -1.281158685684204, "logits/rejected": -0.968732476234436, "logps/chosen": -498.4610900878906, "logps/rejected": -894.5953369140625, "loss": 0.1437, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.26076218485832214, "rewards/margins": 0.3403322994709015, "rewards/rejected": -0.6010944843292236, "step": 6030 }, { "epoch": 0.81, "learning_rate": 5.553057931370729e-07, "logits/chosen": -1.4274104833602905, "logits/rejected": -0.8784977793693542, "logps/chosen": -604.6317138671875, "logps/rejected": -912.6419067382812, "loss": 0.1183, "rewards/accuracies": 0.875, "rewards/chosen": -0.25740376114845276, "rewards/margins": 0.41545647382736206, "rewards/rejected": -0.6728602647781372, "step": 6040 }, { "epoch": 0.81, "learning_rate": 5.48014933308352e-07, "logits/chosen": -1.431522011756897, "logits/rejected": -0.7600606679916382, "logps/chosen": -506.6710510253906, "logps/rejected": -824.3439331054688, "loss": 0.1321, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22056278586387634, "rewards/margins": 0.3959851861000061, "rewards/rejected": -0.6165480017662048, "step": 6050 }, { "epoch": 0.81, "learning_rate": 5.407663566854008e-07, "logits/chosen": -1.5452665090560913, "logits/rejected": -0.8012416958808899, "logps/chosen": -544.3170776367188, "logps/rejected": -885.4869384765625, "loss": 0.0834, "rewards/accuracies": 0.875, "rewards/chosen": -0.2349054366350174, "rewards/margins": 0.41422995924949646, "rewards/rejected": -0.649135410785675, "step": 6060 }, { "epoch": 0.81, "learning_rate": 5.335602202843054e-07, "logits/chosen": -1.3676344156265259, "logits/rejected": -0.7313020825386047, "logps/chosen": -552.454833984375, "logps/rejected": -818.5693359375, "loss": 0.1449, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2200872004032135, "rewards/margins": 0.38154327869415283, "rewards/rejected": -0.601630449295044, "step": 6070 }, { "epoch": 0.81, "learning_rate": 5.263966802018275e-07, "logits/chosen": -1.5200622081756592, "logits/rejected": -0.8617421388626099, "logps/chosen": -531.3231201171875, "logps/rejected": -831.9881591796875, "loss": 0.1326, "rewards/accuracies": 0.875, "rewards/chosen": -0.25307679176330566, "rewards/margins": 0.3641541004180908, "rewards/rejected": -0.6172308921813965, "step": 6080 }, { "epoch": 0.81, "learning_rate": 5.192758916120236e-07, "logits/chosen": -1.6207752227783203, "logits/rejected": -0.8784235715866089, "logps/chosen": -567.6187744140625, "logps/rejected": -901.7517700195312, "loss": 0.0983, "rewards/accuracies": 0.875, "rewards/chosen": -0.2562420964241028, "rewards/margins": 0.4305727481842041, "rewards/rejected": -0.6868148446083069, "step": 6090 }, { "epoch": 0.81, "learning_rate": 5.121980087628802e-07, "logits/chosen": -1.4444869756698608, "logits/rejected": -1.1552057266235352, "logps/chosen": -447.08575439453125, "logps/rejected": -835.5751953125, "loss": 0.121, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2040017545223236, "rewards/margins": 0.3553166687488556, "rewards/rejected": -0.5593183636665344, "step": 6100 }, { "epoch": 0.81, "learning_rate": 5.051631849729785e-07, "logits/chosen": -1.4898678064346313, "logits/rejected": -0.743249237537384, "logps/chosen": -512.14697265625, "logps/rejected": -751.2730712890625, "loss": 0.1107, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2078522890806198, "rewards/margins": 0.36741724610328674, "rewards/rejected": -0.5752695798873901, "step": 6110 }, { "epoch": 0.82, "learning_rate": 4.981715726281666e-07, "logits/chosen": -1.5638794898986816, "logits/rejected": -0.8530174493789673, "logps/chosen": -569.1683349609375, "logps/rejected": -859.6868286132812, "loss": 0.1614, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2818874418735504, "rewards/margins": 0.33524757623672485, "rewards/rejected": -0.6171349883079529, "step": 6120 }, { "epoch": 0.82, "learning_rate": 4.912233231782623e-07, "logits/chosen": -1.4559434652328491, "logits/rejected": -0.9756882786750793, "logps/chosen": -414.2666931152344, "logps/rejected": -723.321044921875, "loss": 0.1484, "rewards/accuracies": 0.75, "rewards/chosen": -0.17090296745300293, "rewards/margins": 0.33516639471054077, "rewards/rejected": -0.5060693621635437, "step": 6130 }, { "epoch": 0.82, "learning_rate": 4.843185871337722e-07, "logits/chosen": -1.5099248886108398, "logits/rejected": -0.9456412196159363, "logps/chosen": -560.4591064453125, "logps/rejected": -834.3416137695312, "loss": 0.172, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.24070127308368683, "rewards/margins": 0.34346550703048706, "rewards/rejected": -0.5841667652130127, "step": 6140 }, { "epoch": 0.82, "learning_rate": 4.774575140626317e-07, "logits/chosen": -1.7518455982208252, "logits/rejected": -1.114401936531067, "logps/chosen": -525.9067993164062, "logps/rejected": -792.6747436523438, "loss": 0.1686, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.23079843819141388, "rewards/margins": 0.34348228573799133, "rewards/rejected": -0.5742807388305664, "step": 6150 }, { "epoch": 0.82, "learning_rate": 4.706402525869633e-07, "logits/chosen": -1.432835340499878, "logits/rejected": -0.8895187377929688, "logps/chosen": -462.48699951171875, "logps/rejected": -899.4519653320312, "loss": 0.0915, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18733736872673035, "rewards/margins": 0.42375391721725464, "rewards/rejected": -0.6110912561416626, "step": 6160 }, { "epoch": 0.82, "learning_rate": 4.638669503798579e-07, "logits/chosen": -1.5865939855575562, "logits/rejected": -0.840206503868103, "logps/chosen": -533.1041870117188, "logps/rejected": -855.7658081054688, "loss": 0.0956, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21983318030834198, "rewards/margins": 0.4039926528930664, "rewards/rejected": -0.6238259077072144, "step": 6170 }, { "epoch": 0.82, "learning_rate": 4.5713775416217884e-07, "logits/chosen": -1.4522035121917725, "logits/rejected": -1.3330981731414795, "logps/chosen": -482.1822204589844, "logps/rejected": -931.4268798828125, "loss": 0.1176, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2349972426891327, "rewards/margins": 0.3949509561061859, "rewards/rejected": -0.6299481987953186, "step": 6180 }, { "epoch": 0.83, "learning_rate": 4.5045280969937847e-07, "logits/chosen": -1.4791220426559448, "logits/rejected": -0.9238311052322388, "logps/chosen": -418.791748046875, "logps/rejected": -879.8201904296875, "loss": 0.0777, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20514781773090363, "rewards/margins": 0.4510798454284668, "rewards/rejected": -0.6562276482582092, "step": 6190 }, { "epoch": 0.83, "learning_rate": 4.438122617983442e-07, "logits/chosen": -1.5650171041488647, "logits/rejected": -0.9084704518318176, "logps/chosen": -580.3602905273438, "logps/rejected": -948.2159423828125, "loss": 0.1289, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.23897108435630798, "rewards/margins": 0.4406881332397461, "rewards/rejected": -0.6796592473983765, "step": 6200 }, { "epoch": 0.83, "learning_rate": 4.372162543042624e-07, "logits/chosen": -1.5728212594985962, "logits/rejected": -1.1380150318145752, "logps/chosen": -489.7958984375, "logps/rejected": -833.2279052734375, "loss": 0.1187, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20882126688957214, "rewards/margins": 0.3733726143836975, "rewards/rejected": -0.5821938514709473, "step": 6210 }, { "epoch": 0.83, "learning_rate": 4.3066493009749853e-07, "logits/chosen": -1.5403592586517334, "logits/rejected": -0.9451869130134583, "logps/chosen": -482.264892578125, "logps/rejected": -828.3956298828125, "loss": 0.1215, "rewards/accuracies": 0.875, "rewards/chosen": -0.2161424160003662, "rewards/margins": 0.414954274892807, "rewards/rejected": -0.6310966610908508, "step": 6220 }, { "epoch": 0.83, "learning_rate": 4.2415843109050667e-07, "logits/chosen": -1.489720344543457, "logits/rejected": -1.1852211952209473, "logps/chosen": -470.1988830566406, "logps/rejected": -860.6622924804688, "loss": 0.1426, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21867451071739197, "rewards/margins": 0.3845387101173401, "rewards/rejected": -0.6032131910324097, "step": 6230 }, { "epoch": 0.83, "learning_rate": 4.1769689822475147e-07, "logits/chosen": -1.3447024822235107, "logits/rejected": -0.9154160618782043, "logps/chosen": -440.44879150390625, "logps/rejected": -818.2030029296875, "loss": 0.1167, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19660237431526184, "rewards/margins": 0.3885725438594818, "rewards/rejected": -0.5851748585700989, "step": 6240 }, { "epoch": 0.83, "learning_rate": 4.1128047146765936e-07, "logits/chosen": -1.6222617626190186, "logits/rejected": -1.1826813220977783, "logps/chosen": -540.1722412109375, "logps/rejected": -831.1741333007812, "loss": 0.1666, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.26988035440444946, "rewards/margins": 0.30596500635147095, "rewards/rejected": -0.5758453607559204, "step": 6250 }, { "epoch": 0.83, "learning_rate": 4.049092898095816e-07, "logits/chosen": -1.3060246706008911, "logits/rejected": -0.8426834940910339, "logps/chosen": -546.715087890625, "logps/rejected": -976.0635986328125, "loss": 0.1294, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.28321272134780884, "rewards/margins": 0.4075555205345154, "rewards/rejected": -0.690768301486969, "step": 6260 }, { "epoch": 0.84, "learning_rate": 3.9858349126078945e-07, "logits/chosen": -1.6732780933380127, "logits/rejected": -0.7740602493286133, "logps/chosen": -577.4724731445312, "logps/rejected": -861.3342895507812, "loss": 0.1205, "rewards/accuracies": 0.875, "rewards/chosen": -0.2359362542629242, "rewards/margins": 0.38236093521118164, "rewards/rejected": -0.6182972192764282, "step": 6270 }, { "epoch": 0.84, "learning_rate": 3.9230321284847856e-07, "logits/chosen": -1.7987782955169678, "logits/rejected": -1.325372338294983, "logps/chosen": -440.06610107421875, "logps/rejected": -801.0474853515625, "loss": 0.1238, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21796616911888123, "rewards/margins": 0.3503972887992859, "rewards/rejected": -0.5683634281158447, "step": 6280 }, { "epoch": 0.84, "learning_rate": 3.86068590613804e-07, "logits/chosen": -1.49826979637146, "logits/rejected": -1.148301362991333, "logps/chosen": -485.7596130371094, "logps/rejected": -813.1477661132812, "loss": 0.107, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21670837700366974, "rewards/margins": 0.36857548356056213, "rewards/rejected": -0.5852838754653931, "step": 6290 }, { "epoch": 0.84, "learning_rate": 3.798797596089351e-07, "logits/chosen": -1.738193154335022, "logits/rejected": -1.0076912641525269, "logps/chosen": -429.09002685546875, "logps/rejected": -824.4563598632812, "loss": 0.0845, "rewards/accuracies": 0.875, "rewards/chosen": -0.1454305797815323, "rewards/margins": 0.4517894685268402, "rewards/rejected": -0.5972201228141785, "step": 6300 }, { "epoch": 0.84, "learning_rate": 3.737368538941255e-07, "logits/chosen": -1.3455018997192383, "logits/rejected": -0.9166312217712402, "logps/chosen": -503.8860778808594, "logps/rejected": -904.0072021484375, "loss": 0.1103, "rewards/accuracies": 0.875, "rewards/chosen": -0.2505475878715515, "rewards/margins": 0.4097828269004822, "rewards/rejected": -0.6603304147720337, "step": 6310 }, { "epoch": 0.84, "learning_rate": 3.6764000653481263e-07, "logits/chosen": -1.3718827962875366, "logits/rejected": -0.9547538757324219, "logps/chosen": -516.5181274414062, "logps/rejected": -796.8955078125, "loss": 0.1531, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.22998102009296417, "rewards/margins": 0.33470654487609863, "rewards/rejected": -0.5646876096725464, "step": 6320 }, { "epoch": 0.84, "learning_rate": 3.615893495987335e-07, "logits/chosen": -1.692065954208374, "logits/rejected": -1.0852649211883545, "logps/chosen": -397.7701416015625, "logps/rejected": -803.3123779296875, "loss": 0.1231, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20279788970947266, "rewards/margins": 0.41056856513023376, "rewards/rejected": -0.6133664846420288, "step": 6330 }, { "epoch": 0.85, "learning_rate": 3.555850141530659e-07, "logits/chosen": -1.4860546588897705, "logits/rejected": -1.1353389024734497, "logps/chosen": -407.8889465332031, "logps/rejected": -835.9850463867188, "loss": 0.1187, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1942521333694458, "rewards/margins": 0.3862732946872711, "rewards/rejected": -0.5805253982543945, "step": 6340 }, { "epoch": 0.85, "learning_rate": 3.4962713026158697e-07, "logits/chosen": -1.5381252765655518, "logits/rejected": -0.8441897630691528, "logps/chosen": -568.1682739257812, "logps/rejected": -914.52099609375, "loss": 0.0725, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.26694822311401367, "rewards/margins": 0.4364437460899353, "rewards/rejected": -0.703391969203949, "step": 6350 }, { "epoch": 0.85, "learning_rate": 3.4371582698185636e-07, "logits/chosen": -1.5919785499572754, "logits/rejected": -1.1603013277053833, "logps/chosen": -470.9925231933594, "logps/rejected": -867.21240234375, "loss": 0.1121, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21010012924671173, "rewards/margins": 0.3647227883338928, "rewards/rejected": -0.5748229026794434, "step": 6360 }, { "epoch": 0.85, "learning_rate": 3.378512323624228e-07, "logits/chosen": -1.5560038089752197, "logits/rejected": -0.8677865862846375, "logps/chosen": -498.01947021484375, "logps/rejected": -804.9054565429688, "loss": 0.142, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21834082901477814, "rewards/margins": 0.35967257618904114, "rewards/rejected": -0.5780134201049805, "step": 6370 }, { "epoch": 0.85, "learning_rate": 3.3203347344004737e-07, "logits/chosen": -1.524595856666565, "logits/rejected": -1.0703728199005127, "logps/chosen": -485.6414489746094, "logps/rejected": -871.1435546875, "loss": 0.0843, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19698016345500946, "rewards/margins": 0.4223461151123047, "rewards/rejected": -0.6193262934684753, "step": 6380 }, { "epoch": 0.85, "learning_rate": 3.262626762369525e-07, "logits/chosen": -1.3607814311981201, "logits/rejected": -1.1445564031600952, "logps/chosen": -532.5650634765625, "logps/rejected": -907.86376953125, "loss": 0.1871, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.307457834482193, "rewards/margins": 0.30070099234580994, "rewards/rejected": -0.6081588864326477, "step": 6390 }, { "epoch": 0.85, "learning_rate": 3.2053896575809426e-07, "logits/chosen": -1.4279563426971436, "logits/rejected": -0.8247480392456055, "logps/chosen": -432.3768615722656, "logps/rejected": -800.62841796875, "loss": 0.1202, "rewards/accuracies": 0.875, "rewards/chosen": -0.1926819384098053, "rewards/margins": 0.4079027771949768, "rewards/rejected": -0.6005846261978149, "step": 6400 }, { "epoch": 0.85, "learning_rate": 3.148624659884508e-07, "logits/chosen": -1.352543592453003, "logits/rejected": -0.9658149480819702, "logps/chosen": -481.97955322265625, "logps/rejected": -777.39208984375, "loss": 0.1552, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2425815314054489, "rewards/margins": 0.30716800689697266, "rewards/rejected": -0.5497495532035828, "step": 6410 }, { "epoch": 0.86, "learning_rate": 3.092332998903416e-07, "logits/chosen": -1.5252506732940674, "logits/rejected": -0.8489130735397339, "logps/chosen": -495.26458740234375, "logps/rejected": -972.8980712890625, "loss": 0.0648, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.20235852897167206, "rewards/margins": 0.48106271028518677, "rewards/rejected": -0.68342125415802, "step": 6420 }, { "epoch": 0.86, "learning_rate": 3.0365158940075664e-07, "logits/chosen": -1.437496542930603, "logits/rejected": -1.0414105653762817, "logps/chosen": -404.98583984375, "logps/rejected": -708.0098266601562, "loss": 0.1245, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17960545420646667, "rewards/margins": 0.3412194848060608, "rewards/rejected": -0.5208249092102051, "step": 6430 }, { "epoch": 0.86, "learning_rate": 2.981174554287239e-07, "logits/chosen": -1.4341957569122314, "logits/rejected": -1.1686787605285645, "logps/chosen": -483.11883544921875, "logps/rejected": -777.0546264648438, "loss": 0.2065, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24856820702552795, "rewards/margins": 0.3038688600063324, "rewards/rejected": -0.5524370670318604, "step": 6440 }, { "epoch": 0.86, "learning_rate": 2.9263101785268253e-07, "logits/chosen": -1.4215772151947021, "logits/rejected": -0.9602577090263367, "logps/chosen": -494.32537841796875, "logps/rejected": -817.6704711914062, "loss": 0.1312, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2329108715057373, "rewards/margins": 0.3553210198879242, "rewards/rejected": -0.5882318615913391, "step": 6450 }, { "epoch": 0.86, "learning_rate": 2.871923955178918e-07, "logits/chosen": -1.4866185188293457, "logits/rejected": -0.9591636657714844, "logps/chosen": -471.3702697753906, "logps/rejected": -813.4729614257812, "loss": 0.122, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.23452866077423096, "rewards/margins": 0.375867635011673, "rewards/rejected": -0.6103963255882263, "step": 6460 }, { "epoch": 0.86, "learning_rate": 2.8180170623385213e-07, "logits/chosen": -1.8203891515731812, "logits/rejected": -0.9665926694869995, "logps/chosen": -494.5782775878906, "logps/rejected": -841.2796020507812, "loss": 0.0975, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18856938183307648, "rewards/margins": 0.42396125197410583, "rewards/rejected": -0.6125305891036987, "step": 6470 }, { "epoch": 0.86, "learning_rate": 2.764590667717562e-07, "logits/chosen": -1.4959276914596558, "logits/rejected": -0.9779335260391235, "logps/chosen": -551.3182373046875, "logps/rejected": -888.9775390625, "loss": 0.1604, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2843288481235504, "rewards/margins": 0.3726845383644104, "rewards/rejected": -0.6570132970809937, "step": 6480 }, { "epoch": 0.87, "learning_rate": 2.7116459286195887e-07, "logits/chosen": -1.21626877784729, "logits/rejected": -0.9062309265136719, "logps/chosen": -490.125732421875, "logps/rejected": -901.1393432617188, "loss": 0.0989, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2371952086687088, "rewards/margins": 0.3920780420303345, "rewards/rejected": -0.6292732357978821, "step": 6490 }, { "epoch": 0.87, "learning_rate": 2.6591839919146963e-07, "logits/chosen": -1.7364749908447266, "logits/rejected": -1.07496178150177, "logps/chosen": -514.854736328125, "logps/rejected": -843.8689575195312, "loss": 0.1104, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18669569492340088, "rewards/margins": 0.3989318013191223, "rewards/rejected": -0.5856274962425232, "step": 6500 }, { "epoch": 0.87, "learning_rate": 2.6072059940146775e-07, "logits/chosen": -1.5681116580963135, "logits/rejected": -1.097745418548584, "logps/chosen": -456.2774353027344, "logps/rejected": -841.9134521484375, "loss": 0.1424, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21643777191638947, "rewards/margins": 0.3762553334236145, "rewards/rejected": -0.592693030834198, "step": 6510 }, { "epoch": 0.87, "learning_rate": 2.555713060848433e-07, "logits/chosen": -1.4545785188674927, "logits/rejected": -0.9097267389297485, "logps/chosen": -462.0049743652344, "logps/rejected": -810.7428588867188, "loss": 0.1178, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20860812067985535, "rewards/margins": 0.3714580833911896, "rewards/rejected": -0.5800662040710449, "step": 6520 }, { "epoch": 0.87, "learning_rate": 2.504706307837551e-07, "logits/chosen": -1.5105178356170654, "logits/rejected": -0.9397289156913757, "logps/chosen": -501.52630615234375, "logps/rejected": -918.1564331054688, "loss": 0.0959, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.26715564727783203, "rewards/margins": 0.4018022119998932, "rewards/rejected": -0.6689578294754028, "step": 6530 }, { "epoch": 0.87, "learning_rate": 2.454186839872158e-07, "logits/chosen": -1.3464300632476807, "logits/rejected": -0.8303709030151367, "logps/chosen": -504.12164306640625, "logps/rejected": -879.8346557617188, "loss": 0.1419, "rewards/accuracies": 0.875, "rewards/chosen": -0.23833408951759338, "rewards/margins": 0.40014615654945374, "rewards/rejected": -0.6384803056716919, "step": 6540 }, { "epoch": 0.87, "learning_rate": 2.404155751286988e-07, "logits/chosen": -1.4623782634735107, "logits/rejected": -1.1166012287139893, "logps/chosen": -529.0921630859375, "logps/rejected": -947.2532958984375, "loss": 0.1206, "rewards/accuracies": 0.875, "rewards/chosen": -0.22969821095466614, "rewards/margins": 0.4077332019805908, "rewards/rejected": -0.6374315023422241, "step": 6550 }, { "epoch": 0.87, "learning_rate": 2.3546141258376786e-07, "logits/chosen": -1.5455429553985596, "logits/rejected": -1.0416425466537476, "logps/chosen": -476.44134521484375, "logps/rejected": -872.48779296875, "loss": 0.1126, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.23007233440876007, "rewards/margins": 0.4278647005558014, "rewards/rejected": -0.6579370498657227, "step": 6560 }, { "epoch": 0.88, "learning_rate": 2.3055630366772857e-07, "logits/chosen": -1.398733377456665, "logits/rejected": -0.8798559904098511, "logps/chosen": -405.2383728027344, "logps/rejected": -711.0206909179688, "loss": 0.1581, "rewards/accuracies": 0.75, "rewards/chosen": -0.16562074422836304, "rewards/margins": 0.34439751505851746, "rewards/rejected": -0.5100182890892029, "step": 6570 }, { "epoch": 0.88, "learning_rate": 2.257003546333042e-07, "logits/chosen": -1.3985016345977783, "logits/rejected": -0.7250006794929504, "logps/chosen": -490.95794677734375, "logps/rejected": -854.8848876953125, "loss": 0.0832, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17378300428390503, "rewards/margins": 0.46776333451271057, "rewards/rejected": -0.6415463089942932, "step": 6580 }, { "epoch": 0.88, "learning_rate": 2.208936706683351e-07, "logits/chosen": -1.235718846321106, "logits/rejected": -0.9032597541809082, "logps/chosen": -488.0113830566406, "logps/rejected": -893.2078857421875, "loss": 0.1355, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24955153465270996, "rewards/margins": 0.38928717374801636, "rewards/rejected": -0.6388388276100159, "step": 6590 }, { "epoch": 0.88, "learning_rate": 2.1613635589349756e-07, "logits/chosen": -1.7433204650878906, "logits/rejected": -1.158917784690857, "logps/chosen": -535.4383544921875, "logps/rejected": -885.5426635742188, "loss": 0.0958, "rewards/accuracies": 0.875, "rewards/chosen": -0.1996287703514099, "rewards/margins": 0.43160730600357056, "rewards/rejected": -0.6312360763549805, "step": 6600 }, { "epoch": 0.88, "learning_rate": 2.1142851336005244e-07, "logits/chosen": -1.3180363178253174, "logits/rejected": -0.960915207862854, "logps/chosen": -549.1765747070312, "logps/rejected": -933.6243896484375, "loss": 0.1536, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.25747647881507874, "rewards/margins": 0.38168302178382874, "rewards/rejected": -0.6391595005989075, "step": 6610 }, { "epoch": 0.88, "learning_rate": 2.0677024504760752e-07, "logits/chosen": -1.5789819955825806, "logits/rejected": -1.0533421039581299, "logps/chosen": -491.7295837402344, "logps/rejected": -767.828857421875, "loss": 0.1244, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2204131782054901, "rewards/margins": 0.3404414653778076, "rewards/rejected": -0.5608546137809753, "step": 6620 }, { "epoch": 0.88, "learning_rate": 2.0216165186191406e-07, "logits/chosen": -1.5069531202316284, "logits/rejected": -0.9445350766181946, "logps/chosen": -547.0418701171875, "logps/rejected": -885.5628051757812, "loss": 0.1331, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.25327160954475403, "rewards/margins": 0.3645995259284973, "rewards/rejected": -0.617871105670929, "step": 6630 }, { "epoch": 0.89, "learning_rate": 1.9760283363267684e-07, "logits/chosen": -1.5341289043426514, "logits/rejected": -0.9979984164237976, "logps/chosen": -540.775146484375, "logps/rejected": -982.7828369140625, "loss": 0.1198, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2584487795829773, "rewards/margins": 0.4025515615940094, "rewards/rejected": -0.6610003709793091, "step": 6640 }, { "epoch": 0.89, "learning_rate": 1.9309388911139427e-07, "logits/chosen": -1.5053001642227173, "logits/rejected": -0.818169116973877, "logps/chosen": -558.2037963867188, "logps/rejected": -870.5218505859375, "loss": 0.16, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.24096794426441193, "rewards/margins": 0.3718743324279785, "rewards/rejected": -0.6128423810005188, "step": 6650 }, { "epoch": 0.89, "learning_rate": 1.8863491596921745e-07, "logits/chosen": -1.5473806858062744, "logits/rejected": -0.8469358682632446, "logps/chosen": -522.5948486328125, "logps/rejected": -878.1281127929688, "loss": 0.1199, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1900518834590912, "rewards/margins": 0.4157015383243561, "rewards/rejected": -0.6057534217834473, "step": 6660 }, { "epoch": 0.89, "learning_rate": 1.8422601079483516e-07, "logits/chosen": -1.5866916179656982, "logits/rejected": -0.9989362955093384, "logps/chosen": -437.7984924316406, "logps/rejected": -676.7769165039062, "loss": 0.1731, "rewards/accuracies": 0.75, "rewards/chosen": -0.19794021546840668, "rewards/margins": 0.33342671394348145, "rewards/rejected": -0.5313669443130493, "step": 6670 }, { "epoch": 0.89, "learning_rate": 1.798672690923828e-07, "logits/chosen": -1.2771915197372437, "logits/rejected": -0.7800703048706055, "logps/chosen": -459.9149475097656, "logps/rejected": -917.9443359375, "loss": 0.1012, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.20444636046886444, "rewards/margins": 0.46741732954978943, "rewards/rejected": -0.6718636751174927, "step": 6680 }, { "epoch": 0.89, "learning_rate": 1.7555878527937164e-07, "logits/chosen": -1.3597838878631592, "logits/rejected": -0.8564812541007996, "logps/chosen": -446.06414794921875, "logps/rejected": -770.828125, "loss": 0.1859, "rewards/accuracies": 0.75, "rewards/chosen": -0.16719356179237366, "rewards/margins": 0.36050131916999817, "rewards/rejected": -0.527694821357727, "step": 6690 }, { "epoch": 0.89, "learning_rate": 1.713006526846439e-07, "logits/chosen": -1.417188048362732, "logits/rejected": -0.9561182260513306, "logps/chosen": -540.4723510742188, "logps/rejected": -972.0789794921875, "loss": 0.0978, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2296275794506073, "rewards/margins": 0.42581015825271606, "rewards/rejected": -0.655437707901001, "step": 6700 }, { "epoch": 0.89, "learning_rate": 1.6709296354635335e-07, "logits/chosen": -1.616417646408081, "logits/rejected": -0.9375585317611694, "logps/chosen": -472.4241638183594, "logps/rejected": -782.0997314453125, "loss": 0.1564, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2145848572254181, "rewards/margins": 0.3725607991218567, "rewards/rejected": -0.5871456861495972, "step": 6710 }, { "epoch": 0.9, "learning_rate": 1.629358090099639e-07, "logits/chosen": -1.5024985074996948, "logits/rejected": -0.7005435824394226, "logps/chosen": -540.7516479492188, "logps/rejected": -905.38525390625, "loss": 0.0716, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21200323104858398, "rewards/margins": 0.445390522480011, "rewards/rejected": -0.6573936939239502, "step": 6720 }, { "epoch": 0.9, "learning_rate": 1.5882927912627772e-07, "logits/chosen": -1.2667381763458252, "logits/rejected": -0.9572645425796509, "logps/chosen": -498.649169921875, "logps/rejected": -937.0203247070312, "loss": 0.088, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.25593119859695435, "rewards/margins": 0.42086881399154663, "rewards/rejected": -0.676800012588501, "step": 6730 }, { "epoch": 0.9, "learning_rate": 1.5477346284948292e-07, "logits/chosen": -1.6087108850479126, "logits/rejected": -0.998091995716095, "logps/chosen": -511.945068359375, "logps/rejected": -836.44775390625, "loss": 0.1134, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18577376008033752, "rewards/margins": 0.39662545919418335, "rewards/rejected": -0.5823992490768433, "step": 6740 }, { "epoch": 0.9, "learning_rate": 1.507684480352292e-07, "logits/chosen": -1.4504854679107666, "logits/rejected": -0.9381822347640991, "logps/chosen": -489.9617614746094, "logps/rejected": -790.9918212890625, "loss": 0.1524, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2527294158935547, "rewards/margins": 0.33207181096076965, "rewards/rejected": -0.584801197052002, "step": 6750 }, { "epoch": 0.9, "learning_rate": 1.4681432143872133e-07, "logits/chosen": -1.4582185745239258, "logits/rejected": -1.1294184923171997, "logps/chosen": -485.6172790527344, "logps/rejected": -890.8049926757812, "loss": 0.1488, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.25202372670173645, "rewards/margins": 0.3751962184906006, "rewards/rejected": -0.6272198557853699, "step": 6760 }, { "epoch": 0.9, "learning_rate": 1.4291116871284205e-07, "logits/chosen": -1.5899076461791992, "logits/rejected": -0.7880529761314392, "logps/chosen": -546.0626831054688, "logps/rejected": -846.9996337890625, "loss": 0.1414, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.25682902336120605, "rewards/margins": 0.38425880670547485, "rewards/rejected": -0.6410877704620361, "step": 6770 }, { "epoch": 0.9, "learning_rate": 1.3905907440629752e-07, "logits/chosen": -1.5304118394851685, "logits/rejected": -1.0145976543426514, "logps/chosen": -461.466552734375, "logps/rejected": -848.7557373046875, "loss": 0.1288, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18898475170135498, "rewards/margins": 0.4102934002876282, "rewards/rejected": -0.5992781519889832, "step": 6780 }, { "epoch": 0.91, "learning_rate": 1.352581219617824e-07, "logits/chosen": -1.5305770635604858, "logits/rejected": -1.2398301362991333, "logps/chosen": -483.97308349609375, "logps/rejected": -866.2571411132812, "loss": 0.1351, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23582473397254944, "rewards/margins": 0.36082929372787476, "rewards/rejected": -0.5966540575027466, "step": 6790 }, { "epoch": 0.91, "learning_rate": 1.31508393714177e-07, "logits/chosen": -1.4465588331222534, "logits/rejected": -0.9440323114395142, "logps/chosen": -428.13958740234375, "logps/rejected": -710.9305419921875, "loss": 0.1242, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20407938957214355, "rewards/margins": 0.34757596254348755, "rewards/rejected": -0.5516553521156311, "step": 6800 }, { "epoch": 0.91, "learning_rate": 1.278099708887587e-07, "logits/chosen": -1.4667365550994873, "logits/rejected": -1.0703423023223877, "logps/chosen": -468.34747314453125, "logps/rejected": -818.04052734375, "loss": 0.1332, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20959529280662537, "rewards/margins": 0.36165666580200195, "rewards/rejected": -0.5712519884109497, "step": 6810 }, { "epoch": 0.91, "learning_rate": 1.241629335994471e-07, "logits/chosen": -1.7341434955596924, "logits/rejected": -1.1056764125823975, "logps/chosen": -533.9725341796875, "logps/rejected": -906.0426025390625, "loss": 0.1348, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.25106552243232727, "rewards/margins": 0.40419626235961914, "rewards/rejected": -0.655261754989624, "step": 6820 }, { "epoch": 0.91, "learning_rate": 1.2056736084706588e-07, "logits/chosen": -1.7266349792480469, "logits/rejected": -0.8465207815170288, "logps/chosen": -586.2349853515625, "logps/rejected": -901.1331176757812, "loss": 0.0947, "rewards/accuracies": 0.875, "rewards/chosen": -0.1905709058046341, "rewards/margins": 0.43153637647628784, "rewards/rejected": -0.6221072673797607, "step": 6830 }, { "epoch": 0.91, "learning_rate": 1.1702333051763271e-07, "logits/chosen": -1.5130211114883423, "logits/rejected": -0.8182242512702942, "logps/chosen": -551.0518798828125, "logps/rejected": -881.6070556640625, "loss": 0.1177, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.23995597660541534, "rewards/margins": 0.41696739196777344, "rewards/rejected": -0.6569232940673828, "step": 6840 }, { "epoch": 0.91, "learning_rate": 1.1353091938067024e-07, "logits/chosen": -1.4435842037200928, "logits/rejected": -0.8062426447868347, "logps/chosen": -483.5755920410156, "logps/rejected": -886.0081176757812, "loss": 0.115, "rewards/accuracies": 0.875, "rewards/chosen": -0.21369609236717224, "rewards/margins": 0.4300483763217926, "rewards/rejected": -0.6437444090843201, "step": 6850 }, { "epoch": 0.91, "learning_rate": 1.1009020308754587e-07, "logits/chosen": -1.1936941146850586, "logits/rejected": -0.9400952458381653, "logps/chosen": -454.123046875, "logps/rejected": -839.8204345703125, "loss": 0.1241, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.23472794890403748, "rewards/margins": 0.3685305714607239, "rewards/rejected": -0.603258490562439, "step": 6860 }, { "epoch": 0.92, "learning_rate": 1.067012561698319e-07, "logits/chosen": -1.328115701675415, "logits/rejected": -1.104390025138855, "logps/chosen": -499.9703063964844, "logps/rejected": -967.9503784179688, "loss": 0.1244, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2539631724357605, "rewards/margins": 0.43037325143814087, "rewards/rejected": -0.6843363642692566, "step": 6870 }, { "epoch": 0.92, "learning_rate": 1.0336415203768962e-07, "logits/chosen": -1.4963847398757935, "logits/rejected": -0.7440763711929321, "logps/chosen": -537.2203979492188, "logps/rejected": -843.69677734375, "loss": 0.095, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.26004061102867126, "rewards/margins": 0.40331918001174927, "rewards/rejected": -0.6633597612380981, "step": 6880 }, { "epoch": 0.92, "learning_rate": 1.0007896297828113e-07, "logits/chosen": -1.5366075038909912, "logits/rejected": -0.9819987416267395, "logps/chosen": -506.972412109375, "logps/rejected": -927.7294921875, "loss": 0.1008, "rewards/accuracies": 0.875, "rewards/chosen": -0.19697031378746033, "rewards/margins": 0.4403607249259949, "rewards/rejected": -0.6373311281204224, "step": 6890 }, { "epoch": 0.92, "learning_rate": 9.684576015420277e-08, "logits/chosen": -1.3810759782791138, "logits/rejected": -0.8703498840332031, "logps/chosen": -416.44244384765625, "logps/rejected": -860.0406494140625, "loss": 0.1008, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1907399594783783, "rewards/margins": 0.4118443429470062, "rewards/rejected": -0.6025842428207397, "step": 6900 }, { "epoch": 0.92, "learning_rate": 9.36646136019434e-08, "logits/chosen": -1.2979885339736938, "logits/rejected": -0.9057960510253906, "logps/chosen": -505.91644287109375, "logps/rejected": -838.0546875, "loss": 0.133, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.23805785179138184, "rewards/margins": 0.3697962164878845, "rewards/rejected": -0.6078540086746216, "step": 6910 }, { "epoch": 0.92, "learning_rate": 9.053559223036746e-08, "logits/chosen": -1.4613134860992432, "logits/rejected": -0.976353645324707, "logps/chosen": -489.4410095214844, "logps/rejected": -829.4075317382812, "loss": 0.1649, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.24528256058692932, "rewards/margins": 0.33869272470474243, "rewards/rejected": -0.5839753746986389, "step": 6920 }, { "epoch": 0.92, "learning_rate": 8.745876381922147e-08, "logits/chosen": -1.5626704692840576, "logits/rejected": -0.9608923196792603, "logps/chosen": -494.12286376953125, "logps/rejected": -1001.3294677734375, "loss": 0.0784, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2240646630525589, "rewards/margins": 0.47084444761276245, "rewards/rejected": -0.6949091553688049, "step": 6930 }, { "epoch": 0.93, "learning_rate": 8.44341950176683e-08, "logits/chosen": -1.4550175666809082, "logits/rejected": -1.1943072080612183, "logps/chosen": -452.69049072265625, "logps/rejected": -897.54150390625, "loss": 0.1415, "rewards/accuracies": 0.875, "rewards/chosen": -0.23807552456855774, "rewards/margins": 0.3828004002571106, "rewards/rejected": -0.620875895023346, "step": 6940 }, { "epoch": 0.93, "learning_rate": 8.146195134284052e-08, "logits/chosen": -1.3164992332458496, "logits/rejected": -0.8121700286865234, "logps/chosen": -574.8497314453125, "logps/rejected": -928.1170654296875, "loss": 0.1265, "rewards/accuracies": 0.875, "rewards/chosen": -0.2704790532588959, "rewards/margins": 0.3813510537147522, "rewards/rejected": -0.6518300771713257, "step": 6950 }, { "epoch": 0.93, "learning_rate": 7.854209717842231e-08, "logits/chosen": -1.461186408996582, "logits/rejected": -1.1725343465805054, "logps/chosen": -477.5694885253906, "logps/rejected": -870.6781005859375, "loss": 0.1222, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24190500378608704, "rewards/margins": 0.35992521047592163, "rewards/rejected": -0.601830244064331, "step": 6960 }, { "epoch": 0.93, "learning_rate": 7.567469577325598e-08, "logits/chosen": -1.387152910232544, "logits/rejected": -0.9511371850967407, "logps/chosen": -476.19635009765625, "logps/rejected": -967.412109375, "loss": 0.095, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20667386054992676, "rewards/margins": 0.4530416429042816, "rewards/rejected": -0.659715473651886, "step": 6970 }, { "epoch": 0.93, "learning_rate": 7.285980923996989e-08, "logits/chosen": -1.49413001537323, "logits/rejected": -0.7625577449798584, "logps/chosen": -521.863037109375, "logps/rejected": -931.6103515625, "loss": 0.0948, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.22189244627952576, "rewards/margins": 0.451913982629776, "rewards/rejected": -0.6738064885139465, "step": 6980 }, { "epoch": 0.93, "learning_rate": 7.009749855363457e-08, "logits/chosen": -1.3591458797454834, "logits/rejected": -0.9288158416748047, "logps/chosen": -439.74993896484375, "logps/rejected": -842.5948486328125, "loss": 0.1019, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21595802903175354, "rewards/margins": 0.42482906579971313, "rewards/rejected": -0.6407870650291443, "step": 6990 }, { "epoch": 0.93, "learning_rate": 6.738782355044048e-08, "logits/chosen": -1.5925794839859009, "logits/rejected": -0.8554970026016235, "logps/chosen": -559.6893310546875, "logps/rejected": -811.5947265625, "loss": 0.1832, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2499537169933319, "rewards/margins": 0.3769868314266205, "rewards/rejected": -0.6269404888153076, "step": 7000 }, { "epoch": 0.93, "learning_rate": 6.47308429264032e-08, "logits/chosen": -1.5308606624603271, "logits/rejected": -1.0868406295776367, "logps/chosen": -365.6773681640625, "logps/rejected": -676.6890258789062, "loss": 0.1413, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.15961240231990814, "rewards/margins": 0.33673742413520813, "rewards/rejected": -0.4963498115539551, "step": 7010 }, { "epoch": 0.94, "learning_rate": 6.212661423609184e-08, "logits/chosen": -1.453504204750061, "logits/rejected": -1.0217665433883667, "logps/chosen": -449.70880126953125, "logps/rejected": -719.7049560546875, "loss": 0.1692, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.19372782111167908, "rewards/margins": 0.3215334117412567, "rewards/rejected": -0.5152612924575806, "step": 7020 }, { "epoch": 0.94, "learning_rate": 5.957519389138106e-08, "logits/chosen": -1.5276187658309937, "logits/rejected": -0.8919947743415833, "logps/chosen": -510.0636291503906, "logps/rejected": -823.1393432617188, "loss": 0.1469, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2167677879333496, "rewards/margins": 0.38943716883659363, "rewards/rejected": -0.6062048673629761, "step": 7030 }, { "epoch": 0.94, "learning_rate": 5.707663716023021e-08, "logits/chosen": -1.3801076412200928, "logits/rejected": -0.7963376045227051, "logps/chosen": -459.43792724609375, "logps/rejected": -859.9181518554688, "loss": 0.0769, "rewards/accuracies": 0.875, "rewards/chosen": -0.19771870970726013, "rewards/margins": 0.44179147481918335, "rewards/rejected": -0.6395102739334106, "step": 7040 }, { "epoch": 0.94, "learning_rate": 5.463099816548578e-08, "logits/chosen": -1.586732268333435, "logits/rejected": -1.0025181770324707, "logps/chosen": -429.3204650878906, "logps/rejected": -764.2335205078125, "loss": 0.1221, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21364133059978485, "rewards/margins": 0.3726021349430084, "rewards/rejected": -0.5862435102462769, "step": 7050 }, { "epoch": 0.94, "learning_rate": 5.22383298837098e-08, "logits/chosen": -1.5779712200164795, "logits/rejected": -1.0023791790008545, "logps/chosen": -552.7449340820312, "logps/rejected": -950.6932373046875, "loss": 0.1082, "rewards/accuracies": 0.875, "rewards/chosen": -0.22625701129436493, "rewards/margins": 0.39922064542770386, "rewards/rejected": -0.62547767162323, "step": 7060 }, { "epoch": 0.94, "learning_rate": 4.989868414403048e-08, "logits/chosen": -1.1448707580566406, "logits/rejected": -0.7802811861038208, "logps/chosen": -524.0235595703125, "logps/rejected": -941.0114135742188, "loss": 0.1014, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.24730829894542694, "rewards/margins": 0.39939436316490173, "rewards/rejected": -0.6467026472091675, "step": 7070 }, { "epoch": 0.94, "learning_rate": 4.761211162702117e-08, "logits/chosen": -1.6764507293701172, "logits/rejected": -0.8461052179336548, "logps/chosen": -512.4414672851562, "logps/rejected": -812.2349853515625, "loss": 0.1489, "rewards/accuracies": 0.875, "rewards/chosen": -0.23166947066783905, "rewards/margins": 0.38821929693222046, "rewards/rejected": -0.6198887825012207, "step": 7080 }, { "epoch": 0.95, "learning_rate": 4.537866186360207e-08, "logits/chosen": -1.4507992267608643, "logits/rejected": -1.1204394102096558, "logps/chosen": -492.83172607421875, "logps/rejected": -920.1160278320312, "loss": 0.1301, "rewards/accuracies": 0.875, "rewards/chosen": -0.22511033713817596, "rewards/margins": 0.38674965500831604, "rewards/rejected": -0.6118600368499756, "step": 7090 }, { "epoch": 0.95, "learning_rate": 4.319838323396691e-08, "logits/chosen": -1.390995740890503, "logits/rejected": -0.9562789797782898, "logps/chosen": -543.2960205078125, "logps/rejected": -980.0902099609375, "loss": 0.1237, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.29335081577301025, "rewards/margins": 0.4251405596733093, "rewards/rejected": -0.7184914350509644, "step": 7100 }, { "epoch": 0.95, "learning_rate": 4.1071322966535487e-08, "logits/chosen": -1.4655487537384033, "logits/rejected": -1.0551692247390747, "logps/chosen": -470.721923828125, "logps/rejected": -880.9104614257812, "loss": 0.1372, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20322676002979279, "rewards/margins": 0.4107723832130432, "rewards/rejected": -0.6139991283416748, "step": 7110 }, { "epoch": 0.95, "learning_rate": 3.8997527136930004e-08, "logits/chosen": -1.5086078643798828, "logits/rejected": -0.8975256085395813, "logps/chosen": -532.8182373046875, "logps/rejected": -971.3642578125, "loss": 0.0899, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2515001893043518, "rewards/margins": 0.44144564867019653, "rewards/rejected": -0.6929458379745483, "step": 7120 }, { "epoch": 0.95, "learning_rate": 3.6977040666977546e-08, "logits/chosen": -1.493403673171997, "logits/rejected": -1.049023985862732, "logps/chosen": -445.50396728515625, "logps/rejected": -776.2445068359375, "loss": 0.1427, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17905649542808533, "rewards/margins": 0.3584365248680115, "rewards/rejected": -0.5374930500984192, "step": 7130 }, { "epoch": 0.95, "learning_rate": 3.5009907323737826e-08, "logits/chosen": -1.2035696506500244, "logits/rejected": -0.8543485403060913, "logps/chosen": -412.0126953125, "logps/rejected": -854.7228393554688, "loss": 0.1275, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21920505166053772, "rewards/margins": 0.3983645737171173, "rewards/rejected": -0.617569625377655, "step": 7140 }, { "epoch": 0.95, "learning_rate": 3.309616971855195e-08, "logits/chosen": -1.5766270160675049, "logits/rejected": -0.9550241231918335, "logps/chosen": -462.1172790527344, "logps/rejected": -746.0142822265625, "loss": 0.1635, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21523483097553253, "rewards/margins": 0.35420528054237366, "rewards/rejected": -0.5694400668144226, "step": 7150 }, { "epoch": 0.95, "learning_rate": 3.1235869306123766e-08, "logits/chosen": -1.4890674352645874, "logits/rejected": -0.964741051197052, "logps/chosen": -499.34759521484375, "logps/rejected": -769.0845947265625, "loss": 0.1513, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20712587237358093, "rewards/margins": 0.35121747851371765, "rewards/rejected": -0.5583433508872986, "step": 7160 }, { "epoch": 0.96, "learning_rate": 2.9429046383618042e-08, "logits/chosen": -1.4300627708435059, "logits/rejected": -0.9817788004875183, "logps/chosen": -498.69268798828125, "logps/rejected": -900.4196166992188, "loss": 0.1174, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.256017804145813, "rewards/margins": 0.41507309675216675, "rewards/rejected": -0.6710909605026245, "step": 7170 }, { "epoch": 0.96, "learning_rate": 2.767574008979007e-08, "logits/chosen": -1.3318694829940796, "logits/rejected": -0.6323962211608887, "logps/chosen": -522.8182373046875, "logps/rejected": -926.5597534179688, "loss": 0.0815, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.21063697338104248, "rewards/margins": 0.46514520049095154, "rewards/rejected": -0.6757822036743164, "step": 7180 }, { "epoch": 0.96, "learning_rate": 2.59759884041369e-08, "logits/chosen": -1.3710944652557373, "logits/rejected": -0.9549352526664734, "logps/chosen": -547.1004028320312, "logps/rejected": -943.3342895507812, "loss": 0.1506, "rewards/accuracies": 0.875, "rewards/chosen": -0.2388937771320343, "rewards/margins": 0.4177270531654358, "rewards/rejected": -0.6566208600997925, "step": 7190 }, { "epoch": 0.96, "learning_rate": 2.4329828146074096e-08, "logits/chosen": -1.5238111019134521, "logits/rejected": -0.9249058961868286, "logps/chosen": -562.8206787109375, "logps/rejected": -878.2859497070312, "loss": 0.0809, "rewards/accuracies": 0.875, "rewards/chosen": -0.25880998373031616, "rewards/margins": 0.38135746121406555, "rewards/rejected": -0.6401674151420593, "step": 7200 }, { "epoch": 0.96, "learning_rate": 2.2737294974140013e-08, "logits/chosen": -1.4199336767196655, "logits/rejected": -0.930211067199707, "logps/chosen": -514.1971435546875, "logps/rejected": -947.0245361328125, "loss": 0.0964, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.24326589703559875, "rewards/margins": 0.4398914873600006, "rewards/rejected": -0.6831573843955994, "step": 7210 }, { "epoch": 0.96, "learning_rate": 2.1198423385220822e-08, "logits/chosen": -1.3293390274047852, "logits/rejected": -1.005180835723877, "logps/chosen": -457.1399841308594, "logps/rejected": -823.82470703125, "loss": 0.1442, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.26542821526527405, "rewards/margins": 0.3484458923339844, "rewards/rejected": -0.613874077796936, "step": 7220 }, { "epoch": 0.96, "learning_rate": 1.9713246713805588e-08, "logits/chosen": -1.344305396080017, "logits/rejected": -0.8286038637161255, "logps/chosen": -508.82000732421875, "logps/rejected": -935.9703979492188, "loss": 0.113, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.26535990834236145, "rewards/margins": 0.40891551971435547, "rewards/rejected": -0.6742754578590393, "step": 7230 }, { "epoch": 0.97, "learning_rate": 1.82817971312621e-08, "logits/chosen": -1.4602086544036865, "logits/rejected": -1.0767791271209717, "logps/chosen": -545.56787109375, "logps/rejected": -914.0446166992188, "loss": 0.139, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.23958563804626465, "rewards/margins": 0.3950881361961365, "rewards/rejected": -0.6346737146377563, "step": 7240 }, { "epoch": 0.97, "learning_rate": 1.6904105645142443e-08, "logits/chosen": -1.5792304277420044, "logits/rejected": -0.8432053327560425, "logps/chosen": -544.8038330078125, "logps/rejected": -911.0470581054688, "loss": 0.104, "rewards/accuracies": 0.875, "rewards/chosen": -0.21954479813575745, "rewards/margins": 0.4037550985813141, "rewards/rejected": -0.6232999563217163, "step": 7250 }, { "epoch": 0.97, "learning_rate": 1.5580202098509078e-08, "logits/chosen": -1.454495906829834, "logits/rejected": -1.0603179931640625, "logps/chosen": -511.02783203125, "logps/rejected": -814.5255126953125, "loss": 0.1394, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2312597781419754, "rewards/margins": 0.3558243215084076, "rewards/rejected": -0.587084174156189, "step": 7260 }, { "epoch": 0.97, "learning_rate": 1.4310115169289263e-08, "logits/chosen": -1.4957376718521118, "logits/rejected": -0.9885585904121399, "logps/chosen": -570.3341064453125, "logps/rejected": -943.06005859375, "loss": 0.1166, "rewards/accuracies": 0.875, "rewards/chosen": -0.24677403271198273, "rewards/margins": 0.3916296362876892, "rewards/rejected": -0.6384036540985107, "step": 7270 }, { "epoch": 0.97, "learning_rate": 1.3093872369654148e-08, "logits/chosen": -1.4875476360321045, "logits/rejected": -0.9839875102043152, "logps/chosen": -489.46600341796875, "logps/rejected": -848.6428833007812, "loss": 0.1694, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.23207266628742218, "rewards/margins": 0.35699766874313354, "rewards/rejected": -0.5890703201293945, "step": 7280 }, { "epoch": 0.97, "learning_rate": 1.193150004542204e-08, "logits/chosen": -1.609070062637329, "logits/rejected": -0.6910431385040283, "logps/chosen": -544.7098999023438, "logps/rejected": -837.7176513671875, "loss": 0.1248, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18862029910087585, "rewards/margins": 0.427751362323761, "rewards/rejected": -0.6163716316223145, "step": 7290 }, { "epoch": 0.97, "learning_rate": 1.0823023375489128e-08, "logits/chosen": -1.5001671314239502, "logits/rejected": -1.0567773580551147, "logps/chosen": -374.1795349121094, "logps/rejected": -784.8349609375, "loss": 0.1002, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16520938277244568, "rewards/margins": 0.4134485125541687, "rewards/rejected": -0.5786579847335815, "step": 7300 }, { "epoch": 0.97, "learning_rate": 9.76846637128187e-09, "logits/chosen": -1.4122244119644165, "logits/rejected": -1.0274155139923096, "logps/chosen": -451.5098571777344, "logps/rejected": -796.8303833007812, "loss": 0.1209, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2220574915409088, "rewards/margins": 0.363582968711853, "rewards/rejected": -0.5856404304504395, "step": 7310 }, { "epoch": 0.98, "learning_rate": 8.767851876239075e-09, "logits/chosen": -1.361480951309204, "logits/rejected": -1.0346735715866089, "logps/chosen": -491.0078125, "logps/rejected": -834.4171752929688, "loss": 0.1703, "rewards/accuracies": 0.75, "rewards/chosen": -0.2649536728858948, "rewards/margins": 0.34156960248947144, "rewards/rejected": -0.6065232753753662, "step": 7320 }, { "epoch": 0.98, "learning_rate": 7.821201565316184e-09, "logits/chosen": -1.4470902681350708, "logits/rejected": -0.6657289862632751, "logps/chosen": -558.2814331054688, "logps/rejected": -830.0985107421875, "loss": 0.1877, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23699286580085754, "rewards/margins": 0.38297975063323975, "rewards/rejected": -0.6199725866317749, "step": 7330 }, { "epoch": 0.98, "learning_rate": 6.9285359445145366e-09, "logits/chosen": -1.2579832077026367, "logits/rejected": -0.8175121545791626, "logps/chosen": -511.2633361816406, "logps/rejected": -880.8670654296875, "loss": 0.0764, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.23451602458953857, "rewards/margins": 0.411950021982193, "rewards/rejected": -0.6464659571647644, "step": 7340 }, { "epoch": 0.98, "learning_rate": 6.089874350439507e-09, "logits/chosen": -1.3694077730178833, "logits/rejected": -0.9959748983383179, "logps/chosen": -499.08477783203125, "logps/rejected": -932.9157104492188, "loss": 0.1181, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.22797970473766327, "rewards/margins": 0.44914698600769043, "rewards/rejected": -0.6771267056465149, "step": 7350 }, { "epoch": 0.98, "learning_rate": 5.305234949880001e-09, "logits/chosen": -1.6984233856201172, "logits/rejected": -1.1672899723052979, "logps/chosen": -479.700927734375, "logps/rejected": -907.1622924804688, "loss": 0.1115, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20574238896369934, "rewards/margins": 0.4537379741668701, "rewards/rejected": -0.6594803333282471, "step": 7360 }, { "epoch": 0.98, "learning_rate": 4.57463473941544e-09, "logits/chosen": -1.5077688694000244, "logits/rejected": -0.8707934617996216, "logps/chosen": -503.54412841796875, "logps/rejected": -910.8088989257812, "loss": 0.1117, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2274107038974762, "rewards/margins": 0.4351657032966614, "rewards/rejected": -0.6625763773918152, "step": 7370 }, { "epoch": 0.98, "learning_rate": 3.8980895450474455e-09, "logits/chosen": -1.5453494787216187, "logits/rejected": -0.8584077954292297, "logps/chosen": -512.6802368164062, "logps/rejected": -945.4680786132812, "loss": 0.1014, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.20711109042167664, "rewards/margins": 0.4724665582180023, "rewards/rejected": -0.6795775294303894, "step": 7380 }, { "epoch": 0.99, "learning_rate": 3.275614021857609e-09, "logits/chosen": -1.203957200050354, "logits/rejected": -0.789203405380249, "logps/chosen": -470.5957946777344, "logps/rejected": -899.1009521484375, "loss": 0.1503, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.24380306899547577, "rewards/margins": 0.42183107137680054, "rewards/rejected": -0.6656340956687927, "step": 7390 }, { "epoch": 0.99, "learning_rate": 2.7072216536885855e-09, "logits/chosen": -1.386768102645874, "logits/rejected": -0.9607473611831665, "logps/chosen": -449.33380126953125, "logps/rejected": -835.1878662109375, "loss": 0.114, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1930471509695053, "rewards/margins": 0.3860008716583252, "rewards/rejected": -0.5790480375289917, "step": 7400 }, { "epoch": 0.99, "learning_rate": 2.192924752854042e-09, "logits/chosen": -1.329189658164978, "logits/rejected": -0.7284063100814819, "logps/chosen": -495.65350341796875, "logps/rejected": -949.3099365234375, "loss": 0.1467, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.2535052001476288, "rewards/margins": 0.43782129883766174, "rewards/rejected": -0.6913265585899353, "step": 7410 }, { "epoch": 0.99, "learning_rate": 1.7327344598702667e-09, "logits/chosen": -1.5910580158233643, "logits/rejected": -0.756806492805481, "logps/chosen": -580.9312744140625, "logps/rejected": -942.6422119140625, "loss": 0.0622, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.2450539618730545, "rewards/margins": 0.4502308964729309, "rewards/rejected": -0.6952848434448242, "step": 7420 }, { "epoch": 0.99, "learning_rate": 1.3266607432155243e-09, "logits/chosen": -1.3756208419799805, "logits/rejected": -0.8615263104438782, "logps/chosen": -461.8587341308594, "logps/rejected": -778.1923828125, "loss": 0.1286, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21157710254192352, "rewards/margins": 0.33319228887557983, "rewards/rejected": -0.5447694063186646, "step": 7430 }, { "epoch": 0.99, "learning_rate": 9.747123991141193e-10, "logits/chosen": -1.3460959196090698, "logits/rejected": -0.6247268915176392, "logps/chosen": -542.8588256835938, "logps/rejected": -933.49609375, "loss": 0.0912, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.24301931262016296, "rewards/margins": 0.43395286798477173, "rewards/rejected": -0.6769722104072571, "step": 7440 }, { "epoch": 0.99, "learning_rate": 6.768970513457151e-10, "logits/chosen": -1.4757121801376343, "logits/rejected": -1.0189615488052368, "logps/chosen": -477.2865295410156, "logps/rejected": -878.6105346679688, "loss": 0.1054, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20012429356575012, "rewards/margins": 0.4070053696632385, "rewards/rejected": -0.607129693031311, "step": 7450 }, { "epoch": 0.99, "learning_rate": 4.332211510807427e-10, "logits/chosen": -1.508866548538208, "logits/rejected": -1.2509024143218994, "logps/chosen": -491.2704162597656, "logps/rejected": -841.8795776367188, "loss": 0.1706, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2753935754299164, "rewards/margins": 0.3065844178199768, "rewards/rejected": -0.5819779634475708, "step": 7460 }, { "epoch": 1.0, "learning_rate": 2.43689976739403e-10, "logits/chosen": -1.3344948291778564, "logits/rejected": -1.0201300382614136, "logps/chosen": -427.71990966796875, "logps/rejected": -858.7521362304688, "loss": 0.1427, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1936722844839096, "rewards/margins": 0.4351615011692047, "rewards/rejected": -0.6288337707519531, "step": 7470 }, { "epoch": 1.0, "learning_rate": 1.0830763387897902e-10, "logits/chosen": -1.5639954805374146, "logits/rejected": -1.0077507495880127, "logps/chosen": -418.7438049316406, "logps/rejected": -676.6524047851562, "loss": 0.1722, "rewards/accuracies": 0.75, "rewards/chosen": -0.1520642638206482, "rewards/margins": 0.3314986228942871, "rewards/rejected": -0.4835628569126129, "step": 7480 }, { "epoch": 1.0, "learning_rate": 2.7077055103075233e-11, "logits/chosen": -1.5404798984527588, "logits/rejected": -1.0517535209655762, "logps/chosen": -540.0816650390625, "logps/rejected": -884.8899536132812, "loss": 0.0985, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2020460069179535, "rewards/margins": 0.3753736615180969, "rewards/rejected": -0.5774196982383728, "step": 7490 }, { "epoch": 1.0, "learning_rate": 0.0, "logits/chosen": -1.3304396867752075, "logits/rejected": -0.7842223048210144, "logps/chosen": -452.8385314941406, "logps/rejected": -821.3699340820312, "loss": 0.1491, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18334725499153137, "rewards/margins": 0.4060141444206238, "rewards/rejected": -0.5893615484237671, "step": 7500 }, { "epoch": 1.0, "step": 7500, "total_flos": 0.0, "train_loss": 0.134382330707709, "train_runtime": 31432.8555, "train_samples_per_second": 0.954, "train_steps_per_second": 0.239 } ], "logging_steps": 10, "max_steps": 7500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }