{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9997224149895905, "eval_steps": 500, "global_step": 3602, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.174311926605505e-09, "logits/chosen": -6.206940174102783, "logits/rejected": -6.137328147888184, "logps/chosen": -267.2960205078125, "logps/rejected": -273.2126770019531, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.834862385321101e-08, "logits/chosen": -6.211999893188477, "logits/rejected": -6.160707473754883, "logps/chosen": -244.981201171875, "logps/rejected": -193.9983673095703, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 2 }, { "epoch": 0.0, "learning_rate": 2.7522935779816516e-08, "logits/chosen": -6.1195549964904785, "logits/rejected": -6.217103004455566, "logps/chosen": -346.4855041503906, "logps/rejected": -225.93408203125, "loss": 0.6731, "rewards/accuracies": 0.5625, "rewards/chosen": -0.022722095251083374, "rewards/margins": 0.11413305997848511, "rewards/rejected": -0.13685515522956848, "step": 3 }, { "epoch": 0.0, "learning_rate": 3.669724770642202e-08, "logits/chosen": -6.265879154205322, "logits/rejected": -6.269151210784912, "logps/chosen": -286.19482421875, "logps/rejected": -200.93289184570312, "loss": 0.6924, "rewards/accuracies": 0.8125, "rewards/chosen": 0.05868479609489441, "rewards/margins": 0.048142969608306885, "rewards/rejected": 0.010541826486587524, "step": 4 }, { "epoch": 0.0, "learning_rate": 4.587155963302752e-08, "logits/chosen": -6.2576093673706055, "logits/rejected": -6.232880115509033, "logps/chosen": -219.9918975830078, "logps/rejected": -215.57469177246094, "loss": 0.6951, "rewards/accuracies": 0.5, "rewards/chosen": 0.0786994993686676, "rewards/margins": 0.028072327375411987, "rewards/rejected": 0.050627171993255615, "step": 5 }, { "epoch": 0.0, "learning_rate": 5.504587155963303e-08, "logits/chosen": -6.237405300140381, "logits/rejected": -6.275243759155273, "logps/chosen": -353.8137512207031, "logps/rejected": -251.53057861328125, "loss": 0.6909, "rewards/accuracies": 0.5, "rewards/chosen": 0.08734560012817383, "rewards/margins": 0.050387024879455566, "rewards/rejected": 0.03695857524871826, "step": 6 }, { "epoch": 0.0, "learning_rate": 6.422018348623853e-08, "logits/chosen": -6.253804683685303, "logits/rejected": -6.1922760009765625, "logps/chosen": -235.0895538330078, "logps/rejected": -97.94514465332031, "loss": 0.6847, "rewards/accuracies": 0.625, "rewards/chosen": -0.007527381181716919, "rewards/margins": 0.05774387717247009, "rewards/rejected": -0.06527125835418701, "step": 7 }, { "epoch": 0.0, "learning_rate": 7.339449541284404e-08, "logits/chosen": -6.36483097076416, "logits/rejected": -6.1967387199401855, "logps/chosen": -234.02561950683594, "logps/rejected": -133.62872314453125, "loss": 0.7039, "rewards/accuracies": 0.4375, "rewards/chosen": -0.05205333232879639, "rewards/margins": -0.018534064292907715, "rewards/rejected": -0.03351926803588867, "step": 8 }, { "epoch": 0.0, "learning_rate": 8.256880733944954e-08, "logits/chosen": -6.109651565551758, "logits/rejected": -6.173017501831055, "logps/chosen": -210.3278045654297, "logps/rejected": -174.39523315429688, "loss": 0.7046, "rewards/accuracies": 0.4375, "rewards/chosen": 0.0283709317445755, "rewards/margins": 0.08056710660457611, "rewards/rejected": -0.05219617486000061, "step": 9 }, { "epoch": 0.01, "learning_rate": 9.174311926605505e-08, "logits/chosen": -6.232292175292969, "logits/rejected": -6.234875679016113, "logps/chosen": -257.1283874511719, "logps/rejected": -344.83807373046875, "loss": 0.6875, "rewards/accuracies": 0.5, "rewards/chosen": -0.05888053774833679, "rewards/margins": 0.07941308617591858, "rewards/rejected": -0.13829362392425537, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.0091743119266055e-07, "logits/chosen": -6.13609504699707, "logits/rejected": -6.204712390899658, "logps/chosen": -317.9603576660156, "logps/rejected": -219.2523956298828, "loss": 0.7117, "rewards/accuracies": 0.5, "rewards/chosen": 0.01918765902519226, "rewards/margins": 0.042237669229507446, "rewards/rejected": -0.023050010204315186, "step": 11 }, { "epoch": 0.01, "learning_rate": 1.1009174311926606e-07, "logits/chosen": -6.170162200927734, "logits/rejected": -6.206058502197266, "logps/chosen": -338.0101013183594, "logps/rejected": -220.89141845703125, "loss": 0.7069, "rewards/accuracies": 0.625, "rewards/chosen": -0.017822980880737305, "rewards/margins": 0.07381805777549744, "rewards/rejected": -0.09164103865623474, "step": 12 }, { "epoch": 0.01, "learning_rate": 1.1926605504587156e-07, "logits/chosen": -6.15737771987915, "logits/rejected": -6.233816146850586, "logps/chosen": -189.65420532226562, "logps/rejected": -259.0435791015625, "loss": 0.7055, "rewards/accuracies": 0.8125, "rewards/chosen": 0.12606199085712433, "rewards/margins": 0.232444167137146, "rewards/rejected": -0.10638217628002167, "step": 13 }, { "epoch": 0.01, "learning_rate": 1.2844036697247705e-07, "logits/chosen": -6.142392158508301, "logits/rejected": -6.211019992828369, "logps/chosen": -528.602294921875, "logps/rejected": -273.03857421875, "loss": 0.695, "rewards/accuracies": 0.375, "rewards/chosen": -0.13963913917541504, "rewards/margins": -0.1437123417854309, "rewards/rejected": 0.004073202610015869, "step": 14 }, { "epoch": 0.01, "learning_rate": 1.3761467889908257e-07, "logits/chosen": -6.183204174041748, "logits/rejected": -6.2201056480407715, "logps/chosen": -312.4732666015625, "logps/rejected": -186.4169464111328, "loss": 0.7022, "rewards/accuracies": 0.5625, "rewards/chosen": 0.01651066541671753, "rewards/margins": -0.08016908168792725, "rewards/rejected": 0.09667974710464478, "step": 15 }, { "epoch": 0.01, "learning_rate": 1.4678899082568808e-07, "logits/chosen": -6.147592544555664, "logits/rejected": -6.109018802642822, "logps/chosen": -260.3468017578125, "logps/rejected": -240.677734375, "loss": 0.7036, "rewards/accuracies": 0.375, "rewards/chosen": -0.11749359965324402, "rewards/margins": -0.02184876799583435, "rewards/rejected": -0.09564483165740967, "step": 16 }, { "epoch": 0.01, "learning_rate": 1.5596330275229357e-07, "logits/chosen": -6.181396961212158, "logits/rejected": -6.147579669952393, "logps/chosen": -244.85028076171875, "logps/rejected": -155.66162109375, "loss": 0.7576, "rewards/accuracies": 0.3125, "rewards/chosen": -0.17468029260635376, "rewards/margins": -0.2066752016544342, "rewards/rejected": 0.03199491277337074, "step": 17 }, { "epoch": 0.01, "learning_rate": 1.6513761467889909e-07, "logits/chosen": -6.153230667114258, "logits/rejected": -6.090022563934326, "logps/chosen": -257.3760070800781, "logps/rejected": -243.33595275878906, "loss": 0.7151, "rewards/accuracies": 0.4375, "rewards/chosen": -0.06433504819869995, "rewards/margins": -0.10241639614105225, "rewards/rejected": 0.038081347942352295, "step": 18 }, { "epoch": 0.01, "learning_rate": 1.743119266055046e-07, "logits/chosen": -6.196568012237549, "logits/rejected": -6.190598964691162, "logps/chosen": -273.51934814453125, "logps/rejected": -170.24005126953125, "loss": 0.7157, "rewards/accuracies": 0.8125, "rewards/chosen": 0.07715100049972534, "rewards/margins": 0.13636749982833862, "rewards/rejected": -0.05921649932861328, "step": 19 }, { "epoch": 0.01, "learning_rate": 1.834862385321101e-07, "logits/chosen": -6.191539764404297, "logits/rejected": -6.217033386230469, "logps/chosen": -189.45822143554688, "logps/rejected": -254.88525390625, "loss": 0.6814, "rewards/accuracies": 0.5625, "rewards/chosen": 0.018482424318790436, "rewards/margins": 0.14268571138381958, "rewards/rejected": -0.12420329451560974, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.926605504587156e-07, "logits/chosen": -6.1568827629089355, "logits/rejected": -6.2199931144714355, "logps/chosen": -182.23355102539062, "logps/rejected": -225.51657104492188, "loss": 0.7013, "rewards/accuracies": 0.5, "rewards/chosen": -0.03528669476509094, "rewards/margins": 0.016705520451068878, "rewards/rejected": -0.05199221521615982, "step": 21 }, { "epoch": 0.01, "learning_rate": 2.018348623853211e-07, "logits/chosen": -6.080000877380371, "logits/rejected": -6.144230842590332, "logps/chosen": -188.3607635498047, "logps/rejected": -164.9675750732422, "loss": 0.7338, "rewards/accuracies": 0.4375, "rewards/chosen": -0.003188401460647583, "rewards/margins": -0.02416302263736725, "rewards/rejected": 0.020974621176719666, "step": 22 }, { "epoch": 0.01, "learning_rate": 2.110091743119266e-07, "logits/chosen": -6.1930952072143555, "logits/rejected": -6.298839569091797, "logps/chosen": -274.95892333984375, "logps/rejected": -175.87942504882812, "loss": 0.695, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0327204167842865, "rewards/margins": 0.007966246455907822, "rewards/rejected": -0.04068666324019432, "step": 23 }, { "epoch": 0.01, "learning_rate": 2.2018348623853212e-07, "logits/chosen": -6.245134353637695, "logits/rejected": -6.252564907073975, "logps/chosen": -397.4923095703125, "logps/rejected": -222.25393676757812, "loss": 0.6838, "rewards/accuracies": 0.625, "rewards/chosen": 0.11579498648643494, "rewards/margins": 0.08842052519321442, "rewards/rejected": 0.02737446129322052, "step": 24 }, { "epoch": 0.01, "learning_rate": 2.293577981651376e-07, "logits/chosen": -6.254660129547119, "logits/rejected": -6.098719120025635, "logps/chosen": -366.583984375, "logps/rejected": -209.92111206054688, "loss": 0.6853, "rewards/accuracies": 0.6875, "rewards/chosen": 0.1718335747718811, "rewards/margins": 0.21458333730697632, "rewards/rejected": -0.042749762535095215, "step": 25 }, { "epoch": 0.01, "learning_rate": 2.3853211009174313e-07, "logits/chosen": -6.096222400665283, "logits/rejected": -6.104360580444336, "logps/chosen": -473.5664978027344, "logps/rejected": -373.9259948730469, "loss": 0.6995, "rewards/accuracies": 0.4375, "rewards/chosen": 0.02579692006111145, "rewards/margins": -0.032229870557785034, "rewards/rejected": 0.058026790618896484, "step": 26 }, { "epoch": 0.01, "learning_rate": 2.477064220183486e-07, "logits/chosen": -6.08879280090332, "logits/rejected": -6.149423599243164, "logps/chosen": -312.11895751953125, "logps/rejected": -101.11184692382812, "loss": 0.6514, "rewards/accuracies": 0.4375, "rewards/chosen": 0.09139537811279297, "rewards/margins": 0.03305475413799286, "rewards/rejected": 0.05834062397480011, "step": 27 }, { "epoch": 0.02, "learning_rate": 2.568807339449541e-07, "logits/chosen": -6.204244613647461, "logits/rejected": -6.162717819213867, "logps/chosen": -228.04339599609375, "logps/rejected": -202.05142211914062, "loss": 0.6805, "rewards/accuracies": 0.8125, "rewards/chosen": 0.18981951475143433, "rewards/margins": 0.19794437289237976, "rewards/rejected": -0.008124858140945435, "step": 28 }, { "epoch": 0.02, "learning_rate": 2.6605504587155965e-07, "logits/chosen": -6.196178913116455, "logits/rejected": -6.162116050720215, "logps/chosen": -206.059814453125, "logps/rejected": -134.45864868164062, "loss": 0.673, "rewards/accuracies": 0.75, "rewards/chosen": 0.1635746955871582, "rewards/margins": 0.11731240153312683, "rewards/rejected": 0.04626229405403137, "step": 29 }, { "epoch": 0.02, "learning_rate": 2.7522935779816514e-07, "logits/chosen": -6.086681365966797, "logits/rejected": -6.165996551513672, "logps/chosen": -253.5934600830078, "logps/rejected": -184.6022186279297, "loss": 0.7061, "rewards/accuracies": 0.4375, "rewards/chosen": 0.10154229402542114, "rewards/margins": -0.012046396732330322, "rewards/rejected": 0.11358869075775146, "step": 30 }, { "epoch": 0.02, "learning_rate": 2.844036697247706e-07, "logits/chosen": -6.164673805236816, "logits/rejected": -6.270345211029053, "logps/chosen": -261.5312805175781, "logps/rejected": -263.467529296875, "loss": 0.6345, "rewards/accuracies": 0.875, "rewards/chosen": 0.16279053688049316, "rewards/margins": 0.19230884313583374, "rewards/rejected": -0.029518306255340576, "step": 31 }, { "epoch": 0.02, "learning_rate": 2.9357798165137617e-07, "logits/chosen": -6.159537315368652, "logits/rejected": -6.219645023345947, "logps/chosen": -450.3837890625, "logps/rejected": -268.144775390625, "loss": 0.6611, "rewards/accuracies": 0.4375, "rewards/chosen": 0.17729789018630981, "rewards/margins": 0.06313669681549072, "rewards/rejected": 0.11416119337081909, "step": 32 }, { "epoch": 0.02, "learning_rate": 3.0275229357798165e-07, "logits/chosen": -6.173525810241699, "logits/rejected": -6.182959079742432, "logps/chosen": -161.06472778320312, "logps/rejected": -167.816650390625, "loss": 0.6425, "rewards/accuracies": 0.75, "rewards/chosen": 0.20094305276870728, "rewards/margins": 0.09895480424165726, "rewards/rejected": 0.10198824107646942, "step": 33 }, { "epoch": 0.02, "learning_rate": 3.1192660550458714e-07, "logits/chosen": -6.200464725494385, "logits/rejected": -6.163137435913086, "logps/chosen": -237.78085327148438, "logps/rejected": -241.73062133789062, "loss": 0.6386, "rewards/accuracies": 0.8125, "rewards/chosen": 0.22058087587356567, "rewards/margins": 0.27709323167800903, "rewards/rejected": -0.05651235580444336, "step": 34 }, { "epoch": 0.02, "learning_rate": 3.211009174311927e-07, "logits/chosen": -6.164303779602051, "logits/rejected": -6.213827610015869, "logps/chosen": -239.3385772705078, "logps/rejected": -179.43345642089844, "loss": 0.6225, "rewards/accuracies": 0.625, "rewards/chosen": 0.2530614137649536, "rewards/margins": 0.06085546314716339, "rewards/rejected": 0.19220596551895142, "step": 35 }, { "epoch": 0.02, "learning_rate": 3.3027522935779817e-07, "logits/chosen": -6.170777320861816, "logits/rejected": -6.190823554992676, "logps/chosen": -399.03448486328125, "logps/rejected": -262.05633544921875, "loss": 0.5893, "rewards/accuracies": 0.6875, "rewards/chosen": 0.301869660615921, "rewards/margins": 0.230087548494339, "rewards/rejected": 0.07178211212158203, "step": 36 }, { "epoch": 0.02, "learning_rate": 3.3944954128440366e-07, "logits/chosen": -6.1732306480407715, "logits/rejected": -6.1658244132995605, "logps/chosen": -219.39491271972656, "logps/rejected": -183.135498046875, "loss": 0.616, "rewards/accuracies": 0.5625, "rewards/chosen": 0.3733223080635071, "rewards/margins": 0.16515251994132996, "rewards/rejected": 0.20816978812217712, "step": 37 }, { "epoch": 0.02, "learning_rate": 3.486238532110092e-07, "logits/chosen": -6.2049407958984375, "logits/rejected": -6.245874404907227, "logps/chosen": -323.1064453125, "logps/rejected": -208.39486694335938, "loss": 0.5983, "rewards/accuracies": 0.6875, "rewards/chosen": 0.5002657175064087, "rewards/margins": 0.26952916383743286, "rewards/rejected": 0.23073658347129822, "step": 38 }, { "epoch": 0.02, "learning_rate": 3.577981651376147e-07, "logits/chosen": -6.218743324279785, "logits/rejected": -6.266625881195068, "logps/chosen": -181.04444885253906, "logps/rejected": -189.85733032226562, "loss": 0.598, "rewards/accuracies": 0.9375, "rewards/chosen": 0.5657440423965454, "rewards/margins": 0.4846094846725464, "rewards/rejected": 0.08113455772399902, "step": 39 }, { "epoch": 0.02, "learning_rate": 3.669724770642202e-07, "logits/chosen": -6.094534873962402, "logits/rejected": -6.101428508758545, "logps/chosen": -418.79180908203125, "logps/rejected": -323.08367919921875, "loss": 0.5914, "rewards/accuracies": 0.875, "rewards/chosen": 0.7040545344352722, "rewards/margins": 0.671910285949707, "rewards/rejected": 0.032144274562597275, "step": 40 }, { "epoch": 0.02, "learning_rate": 3.7614678899082567e-07, "logits/chosen": -6.060713768005371, "logits/rejected": -6.068734169006348, "logps/chosen": -312.0681457519531, "logps/rejected": -340.11163330078125, "loss": 0.5987, "rewards/accuracies": 0.6875, "rewards/chosen": 0.3958855867385864, "rewards/margins": 0.26791703701019287, "rewards/rejected": 0.12796854972839355, "step": 41 }, { "epoch": 0.02, "learning_rate": 3.853211009174312e-07, "logits/chosen": -6.202687740325928, "logits/rejected": -6.13270378112793, "logps/chosen": -222.56341552734375, "logps/rejected": -147.2832794189453, "loss": 0.5756, "rewards/accuracies": 0.625, "rewards/chosen": 0.34510326385498047, "rewards/margins": 0.2814144790172577, "rewards/rejected": 0.06368878483772278, "step": 42 }, { "epoch": 0.02, "learning_rate": 3.944954128440367e-07, "logits/chosen": -6.0988450050354, "logits/rejected": -6.186623573303223, "logps/chosen": -294.94842529296875, "logps/rejected": -205.5319061279297, "loss": 0.5775, "rewards/accuracies": 0.875, "rewards/chosen": 0.7018561363220215, "rewards/margins": 0.31032323837280273, "rewards/rejected": 0.39153289794921875, "step": 43 }, { "epoch": 0.02, "learning_rate": 4.036697247706422e-07, "logits/chosen": -6.127443313598633, "logits/rejected": -6.085208415985107, "logps/chosen": -190.12094116210938, "logps/rejected": -192.70819091796875, "loss": 0.5273, "rewards/accuracies": 0.8125, "rewards/chosen": 0.5166041254997253, "rewards/margins": 0.4215918779373169, "rewards/rejected": 0.09501226246356964, "step": 44 }, { "epoch": 0.02, "learning_rate": 4.1284403669724773e-07, "logits/chosen": -6.12139368057251, "logits/rejected": -6.054557800292969, "logps/chosen": -286.97186279296875, "logps/rejected": -265.6119689941406, "loss": 0.5165, "rewards/accuracies": 0.8125, "rewards/chosen": 0.6833313703536987, "rewards/margins": 0.444656640291214, "rewards/rejected": 0.23867470026016235, "step": 45 }, { "epoch": 0.03, "learning_rate": 4.220183486238532e-07, "logits/chosen": -6.1671905517578125, "logits/rejected": -6.219301223754883, "logps/chosen": -215.88829040527344, "logps/rejected": -216.59185791015625, "loss": 0.5595, "rewards/accuracies": 0.5625, "rewards/chosen": 0.5601650476455688, "rewards/margins": 0.1595948040485382, "rewards/rejected": 0.40057024359703064, "step": 46 }, { "epoch": 0.03, "learning_rate": 4.311926605504587e-07, "logits/chosen": -6.161221981048584, "logits/rejected": -6.110541820526123, "logps/chosen": -209.693359375, "logps/rejected": -106.35030364990234, "loss": 0.5003, "rewards/accuracies": 0.9375, "rewards/chosen": 0.7651668787002563, "rewards/margins": 0.560517430305481, "rewards/rejected": 0.20464947819709778, "step": 47 }, { "epoch": 0.03, "learning_rate": 4.4036697247706425e-07, "logits/chosen": -6.174136161804199, "logits/rejected": -6.138863563537598, "logps/chosen": -270.05108642578125, "logps/rejected": -236.7761993408203, "loss": 0.5171, "rewards/accuracies": 0.875, "rewards/chosen": 0.9570471048355103, "rewards/margins": 0.7408165335655212, "rewards/rejected": 0.21623054146766663, "step": 48 }, { "epoch": 0.03, "learning_rate": 4.4954128440366974e-07, "logits/chosen": -6.130465030670166, "logits/rejected": -6.160401344299316, "logps/chosen": -267.31103515625, "logps/rejected": -273.801513671875, "loss": 0.4698, "rewards/accuracies": 0.6875, "rewards/chosen": 0.7375653386116028, "rewards/margins": 0.5008587837219238, "rewards/rejected": 0.23670655488967896, "step": 49 }, { "epoch": 0.03, "learning_rate": 4.587155963302752e-07, "logits/chosen": -6.248987197875977, "logits/rejected": -6.2000203132629395, "logps/chosen": -199.0467987060547, "logps/rejected": -209.95022583007812, "loss": 0.4653, "rewards/accuracies": 0.8125, "rewards/chosen": 0.6641163229942322, "rewards/margins": 0.44646912813186646, "rewards/rejected": 0.21764719486236572, "step": 50 }, { "epoch": 0.03, "learning_rate": 4.678899082568807e-07, "logits/chosen": -6.169628620147705, "logits/rejected": -6.212778091430664, "logps/chosen": -329.9162902832031, "logps/rejected": -193.08990478515625, "loss": 0.4978, "rewards/accuracies": 0.8125, "rewards/chosen": 1.2529165744781494, "rewards/margins": 1.1146411895751953, "rewards/rejected": 0.13827532529830933, "step": 51 }, { "epoch": 0.03, "learning_rate": 4.770642201834863e-07, "logits/chosen": -6.162743091583252, "logits/rejected": -6.213168144226074, "logps/chosen": -205.49053955078125, "logps/rejected": -217.6488037109375, "loss": 0.4758, "rewards/accuracies": 0.75, "rewards/chosen": 0.7566354274749756, "rewards/margins": 0.5356510281562805, "rewards/rejected": 0.22098439931869507, "step": 52 }, { "epoch": 0.03, "learning_rate": 4.862385321100917e-07, "logits/chosen": -6.168766021728516, "logits/rejected": -6.216578006744385, "logps/chosen": -392.4620666503906, "logps/rejected": -320.812255859375, "loss": 0.4795, "rewards/accuracies": 0.875, "rewards/chosen": 0.5142183303833008, "rewards/margins": 0.3558823764324188, "rewards/rejected": 0.15833592414855957, "step": 53 }, { "epoch": 0.03, "learning_rate": 4.954128440366972e-07, "logits/chosen": -6.139773368835449, "logits/rejected": -6.2056756019592285, "logps/chosen": -240.60455322265625, "logps/rejected": -227.57891845703125, "loss": 0.4782, "rewards/accuracies": 0.6875, "rewards/chosen": 1.1812222003936768, "rewards/margins": 0.7581262588500977, "rewards/rejected": 0.4230959117412567, "step": 54 }, { "epoch": 0.03, "learning_rate": 5.045871559633027e-07, "logits/chosen": -6.120220184326172, "logits/rejected": -6.171637535095215, "logps/chosen": -233.86090087890625, "logps/rejected": -221.7625274658203, "loss": 0.3875, "rewards/accuracies": 0.8125, "rewards/chosen": 0.9950985908508301, "rewards/margins": 0.7755612134933472, "rewards/rejected": 0.2195373773574829, "step": 55 }, { "epoch": 0.03, "learning_rate": 5.137614678899082e-07, "logits/chosen": -6.12500524520874, "logits/rejected": -6.1519293785095215, "logps/chosen": -237.5142059326172, "logps/rejected": -219.84561157226562, "loss": 0.4418, "rewards/accuracies": 0.9375, "rewards/chosen": 1.2456867694854736, "rewards/margins": 1.168938159942627, "rewards/rejected": 0.0767485499382019, "step": 56 }, { "epoch": 0.03, "learning_rate": 5.229357798165137e-07, "logits/chosen": -6.097347736358643, "logits/rejected": -6.145903587341309, "logps/chosen": -500.7098083496094, "logps/rejected": -507.9455871582031, "loss": 0.4394, "rewards/accuracies": 0.75, "rewards/chosen": 0.8705160617828369, "rewards/margins": 0.6006441116333008, "rewards/rejected": 0.26987192034721375, "step": 57 }, { "epoch": 0.03, "learning_rate": 5.321100917431193e-07, "logits/chosen": -6.202371597290039, "logits/rejected": -6.135773658752441, "logps/chosen": -162.91114807128906, "logps/rejected": -162.86024475097656, "loss": 0.3968, "rewards/accuracies": 0.875, "rewards/chosen": 0.8798503279685974, "rewards/margins": 0.9044716358184814, "rewards/rejected": -0.024621322751045227, "step": 58 }, { "epoch": 0.03, "learning_rate": 5.412844036697247e-07, "logits/chosen": -6.093079566955566, "logits/rejected": -6.062050819396973, "logps/chosen": -229.77468872070312, "logps/rejected": -178.21702575683594, "loss": 0.4729, "rewards/accuracies": 0.9375, "rewards/chosen": 0.7370570302009583, "rewards/margins": 0.7789735794067383, "rewards/rejected": -0.041916489601135254, "step": 59 }, { "epoch": 0.03, "learning_rate": 5.504587155963303e-07, "logits/chosen": -6.149627208709717, "logits/rejected": -6.114480972290039, "logps/chosen": -361.0819091796875, "logps/rejected": -334.4779052734375, "loss": 0.4384, "rewards/accuracies": 0.6875, "rewards/chosen": 0.6658995151519775, "rewards/margins": 0.25940561294555664, "rewards/rejected": 0.4064939022064209, "step": 60 }, { "epoch": 0.03, "learning_rate": 5.596330275229358e-07, "logits/chosen": -6.21652889251709, "logits/rejected": -6.224872589111328, "logps/chosen": -280.59954833984375, "logps/rejected": -249.08843994140625, "loss": 0.411, "rewards/accuracies": 0.6875, "rewards/chosen": 1.3622164726257324, "rewards/margins": 1.1579315662384033, "rewards/rejected": 0.2042849063873291, "step": 61 }, { "epoch": 0.03, "learning_rate": 5.688073394495412e-07, "logits/chosen": -6.110018253326416, "logits/rejected": -6.126964092254639, "logps/chosen": -393.55517578125, "logps/rejected": -153.65272521972656, "loss": 0.3865, "rewards/accuracies": 0.9375, "rewards/chosen": 1.772498369216919, "rewards/margins": 1.392824649810791, "rewards/rejected": 0.37967371940612793, "step": 62 }, { "epoch": 0.03, "learning_rate": 5.779816513761467e-07, "logits/chosen": -6.186422824859619, "logits/rejected": -6.1658711433410645, "logps/chosen": -270.2244873046875, "logps/rejected": -208.0586395263672, "loss": 0.3869, "rewards/accuracies": 0.9375, "rewards/chosen": 1.3104474544525146, "rewards/margins": 1.037336826324463, "rewards/rejected": 0.2731105387210846, "step": 63 }, { "epoch": 0.04, "learning_rate": 5.871559633027523e-07, "logits/chosen": -6.145932197570801, "logits/rejected": -6.17667293548584, "logps/chosen": -447.4602355957031, "logps/rejected": -143.1881561279297, "loss": 0.4652, "rewards/accuracies": 0.75, "rewards/chosen": 1.105393648147583, "rewards/margins": 0.8749557137489319, "rewards/rejected": 0.23043793439865112, "step": 64 }, { "epoch": 0.04, "learning_rate": 5.963302752293577e-07, "logits/chosen": -6.172579288482666, "logits/rejected": -6.142296314239502, "logps/chosen": -283.79681396484375, "logps/rejected": -434.04296875, "loss": 0.3985, "rewards/accuracies": 0.8125, "rewards/chosen": 0.9796267151832581, "rewards/margins": 1.193192958831787, "rewards/rejected": -0.21356631815433502, "step": 65 }, { "epoch": 0.04, "learning_rate": 6.055045871559633e-07, "logits/chosen": -6.199929714202881, "logits/rejected": -6.192565441131592, "logps/chosen": -266.1230163574219, "logps/rejected": -151.1613006591797, "loss": 0.3821, "rewards/accuracies": 0.9375, "rewards/chosen": 1.5102323293685913, "rewards/margins": 1.3582210540771484, "rewards/rejected": 0.15201133489608765, "step": 66 }, { "epoch": 0.04, "learning_rate": 6.146788990825688e-07, "logits/chosen": -6.234021186828613, "logits/rejected": -6.176629543304443, "logps/chosen": -309.28326416015625, "logps/rejected": -134.57398986816406, "loss": 0.415, "rewards/accuracies": 0.875, "rewards/chosen": 1.2770280838012695, "rewards/margins": 1.1337616443634033, "rewards/rejected": 0.14326632022857666, "step": 67 }, { "epoch": 0.04, "learning_rate": 6.238532110091743e-07, "logits/chosen": -6.1860761642456055, "logits/rejected": -6.217402935028076, "logps/chosen": -232.1396942138672, "logps/rejected": -204.4400177001953, "loss": 0.3435, "rewards/accuracies": 0.875, "rewards/chosen": 0.869666337966919, "rewards/margins": 1.064968228340149, "rewards/rejected": -0.19530192017555237, "step": 68 }, { "epoch": 0.04, "learning_rate": 6.330275229357798e-07, "logits/chosen": -6.188159942626953, "logits/rejected": -6.172239303588867, "logps/chosen": -322.51153564453125, "logps/rejected": -166.94869995117188, "loss": 0.2668, "rewards/accuracies": 1.0, "rewards/chosen": 1.6830345392227173, "rewards/margins": 2.0026865005493164, "rewards/rejected": -0.3196519613265991, "step": 69 }, { "epoch": 0.04, "learning_rate": 6.422018348623854e-07, "logits/chosen": -5.995734214782715, "logits/rejected": -6.014505386352539, "logps/chosen": -271.61639404296875, "logps/rejected": -189.93734741210938, "loss": 0.3877, "rewards/accuracies": 0.8125, "rewards/chosen": 1.118377685546875, "rewards/margins": 1.008007526397705, "rewards/rejected": 0.11037012934684753, "step": 70 }, { "epoch": 0.04, "learning_rate": 6.513761467889908e-07, "logits/chosen": -6.18956184387207, "logits/rejected": -6.167155742645264, "logps/chosen": -269.1372985839844, "logps/rejected": -198.68502807617188, "loss": 0.3814, "rewards/accuracies": 0.9375, "rewards/chosen": 1.3815149068832397, "rewards/margins": 1.791999340057373, "rewards/rejected": -0.4104844331741333, "step": 71 }, { "epoch": 0.04, "learning_rate": 6.605504587155963e-07, "logits/chosen": -6.105093002319336, "logits/rejected": -6.300891399383545, "logps/chosen": -325.9559326171875, "logps/rejected": -324.5492858886719, "loss": 0.3156, "rewards/accuracies": 0.9375, "rewards/chosen": 1.8985131978988647, "rewards/margins": 1.7073094844818115, "rewards/rejected": 0.19120368361473083, "step": 72 }, { "epoch": 0.04, "learning_rate": 6.697247706422018e-07, "logits/chosen": -6.12565803527832, "logits/rejected": -6.118583679199219, "logps/chosen": -177.5399627685547, "logps/rejected": -138.97665405273438, "loss": 0.3194, "rewards/accuracies": 0.8125, "rewards/chosen": 0.8197789788246155, "rewards/margins": 0.8830575942993164, "rewards/rejected": -0.06327849626541138, "step": 73 }, { "epoch": 0.04, "learning_rate": 6.788990825688073e-07, "logits/chosen": -6.183126449584961, "logits/rejected": -6.119062423706055, "logps/chosen": -301.7933349609375, "logps/rejected": -318.74334716796875, "loss": 0.3189, "rewards/accuracies": 0.875, "rewards/chosen": 1.1220371723175049, "rewards/margins": 1.3251421451568604, "rewards/rejected": -0.20310500264167786, "step": 74 }, { "epoch": 0.04, "learning_rate": 6.880733944954128e-07, "logits/chosen": -6.133808135986328, "logits/rejected": -6.118991374969482, "logps/chosen": -273.4339294433594, "logps/rejected": -126.00532531738281, "loss": 0.3018, "rewards/accuracies": 1.0, "rewards/chosen": 1.858175277709961, "rewards/margins": 1.9248178005218506, "rewards/rejected": -0.06664249300956726, "step": 75 }, { "epoch": 0.04, "learning_rate": 6.972477064220184e-07, "logits/chosen": -6.1503753662109375, "logits/rejected": -6.059566497802734, "logps/chosen": -304.72979736328125, "logps/rejected": -198.39501953125, "loss": 0.2849, "rewards/accuracies": 1.0, "rewards/chosen": 1.8134856224060059, "rewards/margins": 2.2712326049804688, "rewards/rejected": -0.4577469825744629, "step": 76 }, { "epoch": 0.04, "learning_rate": 7.064220183486238e-07, "logits/chosen": -6.034600257873535, "logits/rejected": -6.077031135559082, "logps/chosen": -243.37937927246094, "logps/rejected": -200.09884643554688, "loss": 0.375, "rewards/accuracies": 0.9375, "rewards/chosen": 1.3812549114227295, "rewards/margins": 1.8521149158477783, "rewards/rejected": -0.4708600342273712, "step": 77 }, { "epoch": 0.04, "learning_rate": 7.155963302752294e-07, "logits/chosen": -5.9841437339782715, "logits/rejected": -6.109887599945068, "logps/chosen": -420.886474609375, "logps/rejected": -348.9469299316406, "loss": 0.2788, "rewards/accuracies": 0.9375, "rewards/chosen": 1.713912844657898, "rewards/margins": 1.4629981517791748, "rewards/rejected": 0.25091463327407837, "step": 78 }, { "epoch": 0.04, "learning_rate": 7.247706422018348e-07, "logits/chosen": -6.129507064819336, "logits/rejected": -6.12633752822876, "logps/chosen": -335.6905517578125, "logps/rejected": -235.14068603515625, "loss": 0.2837, "rewards/accuracies": 0.75, "rewards/chosen": 2.209038019180298, "rewards/margins": 1.8815948963165283, "rewards/rejected": 0.32744312286376953, "step": 79 }, { "epoch": 0.04, "learning_rate": 7.339449541284404e-07, "logits/chosen": -6.217733383178711, "logits/rejected": -6.141520977020264, "logps/chosen": -214.01271057128906, "logps/rejected": -154.58279418945312, "loss": 0.3186, "rewards/accuracies": 0.875, "rewards/chosen": 1.0089471340179443, "rewards/margins": 1.730311632156372, "rewards/rejected": -0.7213644981384277, "step": 80 }, { "epoch": 0.04, "learning_rate": 7.431192660550458e-07, "logits/chosen": -6.1389055252075195, "logits/rejected": -6.10025691986084, "logps/chosen": -344.89532470703125, "logps/rejected": -127.14324188232422, "loss": 0.2516, "rewards/accuracies": 1.0, "rewards/chosen": 1.9816224575042725, "rewards/margins": 2.16282057762146, "rewards/rejected": -0.18119823932647705, "step": 81 }, { "epoch": 0.05, "learning_rate": 7.522935779816513e-07, "logits/chosen": -6.230846405029297, "logits/rejected": -6.152212619781494, "logps/chosen": -334.69891357421875, "logps/rejected": -178.78158569335938, "loss": 0.2788, "rewards/accuracies": 0.9375, "rewards/chosen": 1.8969759941101074, "rewards/margins": 2.1305344104766846, "rewards/rejected": -0.23355841636657715, "step": 82 }, { "epoch": 0.05, "learning_rate": 7.614678899082568e-07, "logits/chosen": -6.116316795349121, "logits/rejected": -6.194598197937012, "logps/chosen": -356.2184143066406, "logps/rejected": -281.1240539550781, "loss": 0.265, "rewards/accuracies": 0.875, "rewards/chosen": 2.3488378524780273, "rewards/margins": 2.949784994125366, "rewards/rejected": -0.6009470820426941, "step": 83 }, { "epoch": 0.05, "learning_rate": 7.706422018348624e-07, "logits/chosen": -6.104313850402832, "logits/rejected": -6.071195125579834, "logps/chosen": -182.91665649414062, "logps/rejected": -167.96682739257812, "loss": 0.3342, "rewards/accuracies": 0.875, "rewards/chosen": 1.6408827304840088, "rewards/margins": 2.013704538345337, "rewards/rejected": -0.37282171845436096, "step": 84 }, { "epoch": 0.05, "learning_rate": 7.798165137614678e-07, "logits/chosen": -6.176689147949219, "logits/rejected": -6.122247695922852, "logps/chosen": -263.59515380859375, "logps/rejected": -243.4390869140625, "loss": 0.2876, "rewards/accuracies": 0.75, "rewards/chosen": 1.5727221965789795, "rewards/margins": 1.6388447284698486, "rewards/rejected": -0.06612250208854675, "step": 85 }, { "epoch": 0.05, "learning_rate": 7.889908256880734e-07, "logits/chosen": -6.079311847686768, "logits/rejected": -6.174327373504639, "logps/chosen": -171.39794921875, "logps/rejected": -179.1609344482422, "loss": 0.2828, "rewards/accuracies": 1.0, "rewards/chosen": 1.1077358722686768, "rewards/margins": 2.343574047088623, "rewards/rejected": -1.2358382940292358, "step": 86 }, { "epoch": 0.05, "learning_rate": 7.981651376146789e-07, "logits/chosen": -6.259137153625488, "logits/rejected": -6.286831378936768, "logps/chosen": -257.65045166015625, "logps/rejected": -229.15206909179688, "loss": 0.2464, "rewards/accuracies": 0.75, "rewards/chosen": 1.459415316581726, "rewards/margins": 2.037216901779175, "rewards/rejected": -0.5778014659881592, "step": 87 }, { "epoch": 0.05, "learning_rate": 8.073394495412844e-07, "logits/chosen": -6.1216583251953125, "logits/rejected": -6.199326515197754, "logps/chosen": -247.26226806640625, "logps/rejected": -292.4710693359375, "loss": 0.2668, "rewards/accuracies": 0.875, "rewards/chosen": 1.6664111614227295, "rewards/margins": 2.8628921508789062, "rewards/rejected": -1.1964809894561768, "step": 88 }, { "epoch": 0.05, "learning_rate": 8.165137614678899e-07, "logits/chosen": -6.260870933532715, "logits/rejected": -6.224328994750977, "logps/chosen": -313.1972961425781, "logps/rejected": -189.53370666503906, "loss": 0.2396, "rewards/accuracies": 1.0, "rewards/chosen": 2.7375636100769043, "rewards/margins": 3.0100595951080322, "rewards/rejected": -0.27249592542648315, "step": 89 }, { "epoch": 0.05, "learning_rate": 8.256880733944955e-07, "logits/chosen": -6.09231424331665, "logits/rejected": -6.187870979309082, "logps/chosen": -298.049560546875, "logps/rejected": -175.69752502441406, "loss": 0.2201, "rewards/accuracies": 0.9375, "rewards/chosen": 2.2083048820495605, "rewards/margins": 3.0511269569396973, "rewards/rejected": -0.8428218364715576, "step": 90 }, { "epoch": 0.05, "learning_rate": 8.348623853211008e-07, "logits/chosen": -6.1971025466918945, "logits/rejected": -6.12807035446167, "logps/chosen": -250.4287109375, "logps/rejected": -159.22264099121094, "loss": 0.202, "rewards/accuracies": 1.0, "rewards/chosen": 1.2702312469482422, "rewards/margins": 1.8582947254180908, "rewards/rejected": -0.5880635976791382, "step": 91 }, { "epoch": 0.05, "learning_rate": 8.440366972477064e-07, "logits/chosen": -6.166654109954834, "logits/rejected": -6.191655158996582, "logps/chosen": -256.359375, "logps/rejected": -291.5062255859375, "loss": 0.2643, "rewards/accuracies": 0.8125, "rewards/chosen": 1.441417932510376, "rewards/margins": 1.9427014589309692, "rewards/rejected": -0.5012834072113037, "step": 92 }, { "epoch": 0.05, "learning_rate": 8.532110091743119e-07, "logits/chosen": -6.112573623657227, "logits/rejected": -6.135139465332031, "logps/chosen": -296.8247985839844, "logps/rejected": -185.64254760742188, "loss": 0.3367, "rewards/accuracies": 0.875, "rewards/chosen": 1.7902770042419434, "rewards/margins": 2.42769718170166, "rewards/rejected": -0.6374202966690063, "step": 93 }, { "epoch": 0.05, "learning_rate": 8.623853211009174e-07, "logits/chosen": -6.13280725479126, "logits/rejected": -6.127173900604248, "logps/chosen": -314.390869140625, "logps/rejected": -103.9262466430664, "loss": 0.2547, "rewards/accuracies": 0.9375, "rewards/chosen": 2.2750089168548584, "rewards/margins": 2.3182995319366455, "rewards/rejected": -0.043290525674819946, "step": 94 }, { "epoch": 0.05, "learning_rate": 8.715596330275229e-07, "logits/chosen": -6.164212226867676, "logits/rejected": -6.157148361206055, "logps/chosen": -270.37176513671875, "logps/rejected": -203.89537048339844, "loss": 0.2321, "rewards/accuracies": 0.9375, "rewards/chosen": 1.713679552078247, "rewards/margins": 3.0097248554229736, "rewards/rejected": -1.2960454225540161, "step": 95 }, { "epoch": 0.05, "learning_rate": 8.807339449541285e-07, "logits/chosen": -6.151309013366699, "logits/rejected": -6.183154582977295, "logps/chosen": -268.44232177734375, "logps/rejected": -308.69482421875, "loss": 0.2891, "rewards/accuracies": 0.875, "rewards/chosen": 1.9726226329803467, "rewards/margins": 3.684645175933838, "rewards/rejected": -1.7120227813720703, "step": 96 }, { "epoch": 0.05, "learning_rate": 8.899082568807339e-07, "logits/chosen": -6.210077285766602, "logits/rejected": -6.105077266693115, "logps/chosen": -275.36004638671875, "logps/rejected": -132.697998046875, "loss": 0.2046, "rewards/accuracies": 1.0, "rewards/chosen": 1.8264905214309692, "rewards/margins": 3.569530963897705, "rewards/rejected": -1.7430402040481567, "step": 97 }, { "epoch": 0.05, "learning_rate": 8.990825688073395e-07, "logits/chosen": -6.102627754211426, "logits/rejected": -6.13408088684082, "logps/chosen": -287.4217529296875, "logps/rejected": -303.62005615234375, "loss": 0.2728, "rewards/accuracies": 0.875, "rewards/chosen": 0.973746120929718, "rewards/margins": 2.253154754638672, "rewards/rejected": -1.2794086933135986, "step": 98 }, { "epoch": 0.05, "learning_rate": 9.082568807339449e-07, "logits/chosen": -5.946325302124023, "logits/rejected": -5.9807024002075195, "logps/chosen": -223.17835998535156, "logps/rejected": -138.9239959716797, "loss": 0.2548, "rewards/accuracies": 0.875, "rewards/chosen": 1.416301965713501, "rewards/margins": 1.5361994504928589, "rewards/rejected": -0.11989742517471313, "step": 99 }, { "epoch": 0.06, "learning_rate": 9.174311926605505e-07, "logits/chosen": -6.274218559265137, "logits/rejected": -6.104443550109863, "logps/chosen": -260.33673095703125, "logps/rejected": -124.57670593261719, "loss": 0.2071, "rewards/accuracies": 0.9375, "rewards/chosen": 2.3870153427124023, "rewards/margins": 3.1770739555358887, "rewards/rejected": -0.7900586128234863, "step": 100 }, { "epoch": 0.06, "learning_rate": 9.266055045871559e-07, "logits/chosen": -6.128286838531494, "logits/rejected": -6.069456100463867, "logps/chosen": -184.06683349609375, "logps/rejected": -177.06375122070312, "loss": 0.2291, "rewards/accuracies": 1.0, "rewards/chosen": 1.3797633647918701, "rewards/margins": 2.8569958209991455, "rewards/rejected": -1.4772324562072754, "step": 101 }, { "epoch": 0.06, "learning_rate": 9.357798165137614e-07, "logits/chosen": -6.164072513580322, "logits/rejected": -6.1948137283325195, "logps/chosen": -212.3609619140625, "logps/rejected": -269.5514831542969, "loss": 0.2427, "rewards/accuracies": 0.9375, "rewards/chosen": 1.2079975605010986, "rewards/margins": 2.3255972862243652, "rewards/rejected": -1.1175994873046875, "step": 102 }, { "epoch": 0.06, "learning_rate": 9.449541284403669e-07, "logits/chosen": -6.074004650115967, "logits/rejected": -6.062209129333496, "logps/chosen": -398.5319519042969, "logps/rejected": -293.7998046875, "loss": 0.2312, "rewards/accuracies": 0.8125, "rewards/chosen": 3.1806564331054688, "rewards/margins": 3.4574077129364014, "rewards/rejected": -0.2767513394355774, "step": 103 }, { "epoch": 0.06, "learning_rate": 9.541284403669725e-07, "logits/chosen": -6.037951946258545, "logits/rejected": -6.130672931671143, "logps/chosen": -573.56640625, "logps/rejected": -388.6517028808594, "loss": 0.2218, "rewards/accuracies": 0.875, "rewards/chosen": 1.4796901941299438, "rewards/margins": 3.029566764831543, "rewards/rejected": -1.5498764514923096, "step": 104 }, { "epoch": 0.06, "learning_rate": 9.63302752293578e-07, "logits/chosen": -6.048287868499756, "logits/rejected": -6.195818901062012, "logps/chosen": -201.0973358154297, "logps/rejected": -265.7939453125, "loss": 0.2231, "rewards/accuracies": 1.0, "rewards/chosen": 1.148888111114502, "rewards/margins": 3.543009042739868, "rewards/rejected": -2.394120693206787, "step": 105 }, { "epoch": 0.06, "learning_rate": 9.724770642201835e-07, "logits/chosen": -6.22913932800293, "logits/rejected": -6.237907886505127, "logps/chosen": -385.3351135253906, "logps/rejected": -213.13690185546875, "loss": 0.2813, "rewards/accuracies": 0.875, "rewards/chosen": 2.1163322925567627, "rewards/margins": 2.693115234375, "rewards/rejected": -0.5767829418182373, "step": 106 }, { "epoch": 0.06, "learning_rate": 9.81651376146789e-07, "logits/chosen": -6.137155532836914, "logits/rejected": -6.037350654602051, "logps/chosen": -264.8150634765625, "logps/rejected": -179.02682495117188, "loss": 0.2583, "rewards/accuracies": 0.875, "rewards/chosen": 1.4391732215881348, "rewards/margins": 2.0789873600006104, "rewards/rejected": -0.6398142576217651, "step": 107 }, { "epoch": 0.06, "learning_rate": 9.908256880733945e-07, "logits/chosen": -6.171786308288574, "logits/rejected": -6.147058486938477, "logps/chosen": -268.55731201171875, "logps/rejected": -184.39479064941406, "loss": 0.2547, "rewards/accuracies": 0.875, "rewards/chosen": 1.371220588684082, "rewards/margins": 2.6072373390197754, "rewards/rejected": -1.2360167503356934, "step": 108 }, { "epoch": 0.06, "learning_rate": 1e-06, "logits/chosen": -6.225935935974121, "logits/rejected": -6.135045528411865, "logps/chosen": -305.5453186035156, "logps/rejected": -148.78330993652344, "loss": 0.1907, "rewards/accuracies": 0.9375, "rewards/chosen": 2.6751317977905273, "rewards/margins": 3.5009636878967285, "rewards/rejected": -0.8258317112922668, "step": 109 }, { "epoch": 0.06, "learning_rate": 9.9999979777141e-07, "logits/chosen": -6.132730484008789, "logits/rejected": -6.1731367111206055, "logps/chosen": -278.70977783203125, "logps/rejected": -219.66949462890625, "loss": 0.186, "rewards/accuracies": 0.875, "rewards/chosen": 2.2620580196380615, "rewards/margins": 3.7326889038085938, "rewards/rejected": -1.4706307649612427, "step": 110 }, { "epoch": 0.06, "learning_rate": 9.99999191085804e-07, "logits/chosen": -6.152575492858887, "logits/rejected": -6.10996675491333, "logps/chosen": -254.57159423828125, "logps/rejected": -203.66664123535156, "loss": 0.2383, "rewards/accuracies": 1.0, "rewards/chosen": 1.5838760137557983, "rewards/margins": 4.1485395431518555, "rewards/rejected": -2.5646636486053467, "step": 111 }, { "epoch": 0.06, "learning_rate": 9.999981799436726e-07, "logits/chosen": -6.236265182495117, "logits/rejected": -6.126051902770996, "logps/chosen": -216.89984130859375, "logps/rejected": -169.70750427246094, "loss": 0.2359, "rewards/accuracies": 1.0, "rewards/chosen": 1.776692271232605, "rewards/margins": 3.649486780166626, "rewards/rejected": -1.8727943897247314, "step": 112 }, { "epoch": 0.06, "learning_rate": 9.99996764345834e-07, "logits/chosen": -6.089020252227783, "logits/rejected": -6.1421284675598145, "logps/chosen": -380.6576843261719, "logps/rejected": -266.2269287109375, "loss": 0.2069, "rewards/accuracies": 0.9375, "rewards/chosen": 2.1802704334259033, "rewards/margins": 3.5947439670562744, "rewards/rejected": -1.4144734144210815, "step": 113 }, { "epoch": 0.06, "learning_rate": 9.99994944293433e-07, "logits/chosen": -6.223931312561035, "logits/rejected": -6.196537017822266, "logps/chosen": -321.066162109375, "logps/rejected": -185.55799865722656, "loss": 0.2251, "rewards/accuracies": 0.875, "rewards/chosen": 1.6625971794128418, "rewards/margins": 3.3955798149108887, "rewards/rejected": -1.7329826354980469, "step": 114 }, { "epoch": 0.06, "learning_rate": 9.99992719787942e-07, "logits/chosen": -6.132981300354004, "logits/rejected": -6.164431095123291, "logps/chosen": -206.73196411132812, "logps/rejected": -223.85403442382812, "loss": 0.1749, "rewards/accuracies": 0.9375, "rewards/chosen": 1.4122207164764404, "rewards/margins": 3.1069748401641846, "rewards/rejected": -1.6947540044784546, "step": 115 }, { "epoch": 0.06, "learning_rate": 9.9999009083116e-07, "logits/chosen": -6.12959623336792, "logits/rejected": -6.121712684631348, "logps/chosen": -225.5880889892578, "logps/rejected": -220.6191864013672, "loss": 0.1937, "rewards/accuracies": 0.8125, "rewards/chosen": 1.987911581993103, "rewards/margins": 3.0439116954803467, "rewards/rejected": -1.0560001134872437, "step": 116 }, { "epoch": 0.06, "learning_rate": 9.999870574252143e-07, "logits/chosen": -6.058114051818848, "logits/rejected": -6.117788314819336, "logps/chosen": -526.836669921875, "logps/rejected": -392.58740234375, "loss": 0.2215, "rewards/accuracies": 1.0, "rewards/chosen": 2.7886171340942383, "rewards/margins": 4.528153419494629, "rewards/rejected": -1.7395362854003906, "step": 117 }, { "epoch": 0.07, "learning_rate": 9.999836195725582e-07, "logits/chosen": -6.191082000732422, "logits/rejected": -6.14277982711792, "logps/chosen": -291.4619140625, "logps/rejected": -179.90122985839844, "loss": 0.1922, "rewards/accuracies": 0.9375, "rewards/chosen": 2.1934964656829834, "rewards/margins": 3.5223376750946045, "rewards/rejected": -1.328841209411621, "step": 118 }, { "epoch": 0.07, "learning_rate": 9.999797772759728e-07, "logits/chosen": -6.126948833465576, "logits/rejected": -6.135437965393066, "logps/chosen": -240.429931640625, "logps/rejected": -170.65420532226562, "loss": 0.2354, "rewards/accuracies": 0.875, "rewards/chosen": 0.9017592072486877, "rewards/margins": 1.7949473857879639, "rewards/rejected": -0.8931881189346313, "step": 119 }, { "epoch": 0.07, "learning_rate": 9.999755305385661e-07, "logits/chosen": -6.151350021362305, "logits/rejected": -6.197535037994385, "logps/chosen": -213.3367919921875, "logps/rejected": -163.26638793945312, "loss": 0.1978, "rewards/accuracies": 0.875, "rewards/chosen": 2.150217056274414, "rewards/margins": 3.464996576309204, "rewards/rejected": -1.31477952003479, "step": 120 }, { "epoch": 0.07, "learning_rate": 9.999708793637736e-07, "logits/chosen": -6.028512477874756, "logits/rejected": -6.085759162902832, "logps/chosen": -217.1544189453125, "logps/rejected": -238.0445098876953, "loss": 0.1901, "rewards/accuracies": 0.875, "rewards/chosen": 1.6344265937805176, "rewards/margins": 3.1971867084503174, "rewards/rejected": -1.5627599954605103, "step": 121 }, { "epoch": 0.07, "learning_rate": 9.999658237553573e-07, "logits/chosen": -6.111461639404297, "logits/rejected": -6.0946149826049805, "logps/chosen": -224.8594970703125, "logps/rejected": -140.26431274414062, "loss": 0.2027, "rewards/accuracies": 0.6875, "rewards/chosen": 1.3605592250823975, "rewards/margins": 1.9382851123809814, "rewards/rejected": -0.5777260661125183, "step": 122 }, { "epoch": 0.07, "learning_rate": 9.99960363717407e-07, "logits/chosen": -6.180279731750488, "logits/rejected": -6.18260383605957, "logps/chosen": -370.63238525390625, "logps/rejected": -521.7234497070312, "loss": 0.2364, "rewards/accuracies": 0.8125, "rewards/chosen": 1.7234439849853516, "rewards/margins": 3.403526782989502, "rewards/rejected": -1.6800825595855713, "step": 123 }, { "epoch": 0.07, "learning_rate": 9.999544992543394e-07, "logits/chosen": -6.25581693649292, "logits/rejected": -6.126944541931152, "logps/chosen": -453.99169921875, "logps/rejected": -407.84161376953125, "loss": 0.2012, "rewards/accuracies": 0.875, "rewards/chosen": 1.1569921970367432, "rewards/margins": 3.018618106842041, "rewards/rejected": -1.8616259098052979, "step": 124 }, { "epoch": 0.07, "learning_rate": 9.999482303708984e-07, "logits/chosen": -6.110297679901123, "logits/rejected": -6.119926929473877, "logps/chosen": -520.0106201171875, "logps/rejected": -348.03594970703125, "loss": 0.206, "rewards/accuracies": 0.8125, "rewards/chosen": 2.575652599334717, "rewards/margins": 4.03757905960083, "rewards/rejected": -1.4619264602661133, "step": 125 }, { "epoch": 0.07, "learning_rate": 9.999415570721547e-07, "logits/chosen": -6.185552597045898, "logits/rejected": -6.170355796813965, "logps/chosen": -254.82589721679688, "logps/rejected": -156.73284912109375, "loss": 0.1888, "rewards/accuracies": 0.9375, "rewards/chosen": 1.901466727256775, "rewards/margins": 3.2348170280456543, "rewards/rejected": -1.3333501815795898, "step": 126 }, { "epoch": 0.07, "learning_rate": 9.999344793635067e-07, "logits/chosen": -6.111130714416504, "logits/rejected": -6.097271919250488, "logps/chosen": -210.37246704101562, "logps/rejected": -215.6019287109375, "loss": 0.2364, "rewards/accuracies": 1.0, "rewards/chosen": 1.5690425634384155, "rewards/margins": 3.8764564990997314, "rewards/rejected": -2.3074138164520264, "step": 127 }, { "epoch": 0.07, "learning_rate": 9.999269972506796e-07, "logits/chosen": -6.193935394287109, "logits/rejected": -6.196062088012695, "logps/chosen": -234.24835205078125, "logps/rejected": -142.18356323242188, "loss": 0.1756, "rewards/accuracies": 0.9375, "rewards/chosen": 1.0862383842468262, "rewards/margins": 2.9888315200805664, "rewards/rejected": -1.9025931358337402, "step": 128 }, { "epoch": 0.07, "learning_rate": 9.999191107397258e-07, "logits/chosen": -6.099908351898193, "logits/rejected": -6.244009494781494, "logps/chosen": -340.634521484375, "logps/rejected": -203.32505798339844, "loss": 0.2098, "rewards/accuracies": 1.0, "rewards/chosen": 3.8565237522125244, "rewards/margins": 4.998035907745361, "rewards/rejected": -1.1415122747421265, "step": 129 }, { "epoch": 0.07, "learning_rate": 9.999108198370247e-07, "logits/chosen": -6.142115592956543, "logits/rejected": -6.037004470825195, "logps/chosen": -200.65963745117188, "logps/rejected": -159.7888946533203, "loss": 0.1789, "rewards/accuracies": 0.9375, "rewards/chosen": 1.569343090057373, "rewards/margins": 3.063305139541626, "rewards/rejected": -1.493962049484253, "step": 130 }, { "epoch": 0.07, "learning_rate": 9.999021245492831e-07, "logits/chosen": -6.172358512878418, "logits/rejected": -6.188109397888184, "logps/chosen": -242.70166015625, "logps/rejected": -221.97479248046875, "loss": 0.2084, "rewards/accuracies": 0.875, "rewards/chosen": 1.0373423099517822, "rewards/margins": 4.0058369636535645, "rewards/rejected": -2.968494415283203, "step": 131 }, { "epoch": 0.07, "learning_rate": 9.998930248835346e-07, "logits/chosen": -6.170269012451172, "logits/rejected": -6.17437744140625, "logps/chosen": -308.2850341796875, "logps/rejected": -255.10043334960938, "loss": 0.2006, "rewards/accuracies": 0.9375, "rewards/chosen": 1.759937047958374, "rewards/margins": 3.7782182693481445, "rewards/rejected": -2.0182812213897705, "step": 132 }, { "epoch": 0.07, "learning_rate": 9.998835208471404e-07, "logits/chosen": -6.0804948806762695, "logits/rejected": -6.10755729675293, "logps/chosen": -217.15187072753906, "logps/rejected": -204.25514221191406, "loss": 0.2457, "rewards/accuracies": 0.875, "rewards/chosen": 1.3001518249511719, "rewards/margins": 2.8146445751190186, "rewards/rejected": -1.5144926309585571, "step": 133 }, { "epoch": 0.07, "learning_rate": 9.998736124477877e-07, "logits/chosen": -6.138456344604492, "logits/rejected": -6.233449459075928, "logps/chosen": -205.18081665039062, "logps/rejected": -281.7361755371094, "loss": 0.2025, "rewards/accuracies": 0.875, "rewards/chosen": 1.0211679935455322, "rewards/margins": 4.081866264343262, "rewards/rejected": -3.0606985092163086, "step": 134 }, { "epoch": 0.07, "learning_rate": 9.998632996934925e-07, "logits/chosen": -6.1307549476623535, "logits/rejected": -6.233660697937012, "logps/chosen": -233.17335510253906, "logps/rejected": -310.3764343261719, "loss": 0.1616, "rewards/accuracies": 1.0, "rewards/chosen": 1.6569697856903076, "rewards/margins": 5.0916337966918945, "rewards/rejected": -3.434664011001587, "step": 135 }, { "epoch": 0.08, "learning_rate": 9.99852582592596e-07, "logits/chosen": -6.232591152191162, "logits/rejected": -6.243587493896484, "logps/chosen": -265.89825439453125, "logps/rejected": -182.42059326171875, "loss": 0.239, "rewards/accuracies": 0.8125, "rewards/chosen": 2.1292295455932617, "rewards/margins": 3.1899383068084717, "rewards/rejected": -1.0607086420059204, "step": 136 }, { "epoch": 0.08, "learning_rate": 9.99841461153768e-07, "logits/chosen": -6.123791694641113, "logits/rejected": -6.197680473327637, "logps/chosen": -247.1147003173828, "logps/rejected": -138.662841796875, "loss": 0.2288, "rewards/accuracies": 0.875, "rewards/chosen": 2.105496406555176, "rewards/margins": 2.616154432296753, "rewards/rejected": -0.5106580853462219, "step": 137 }, { "epoch": 0.08, "learning_rate": 9.998299353860048e-07, "logits/chosen": -6.288413047790527, "logits/rejected": -6.139993667602539, "logps/chosen": -325.7513427734375, "logps/rejected": -238.60659790039062, "loss": 0.1855, "rewards/accuracies": 0.9375, "rewards/chosen": 1.953574299812317, "rewards/margins": 3.8179595470428467, "rewards/rejected": -1.8643853664398193, "step": 138 }, { "epoch": 0.08, "learning_rate": 9.998180052986294e-07, "logits/chosen": -6.173130035400391, "logits/rejected": -6.082803249359131, "logps/chosen": -260.377197265625, "logps/rejected": -100.99342346191406, "loss": 0.2733, "rewards/accuracies": 0.9375, "rewards/chosen": 1.713658094406128, "rewards/margins": 3.5346696376800537, "rewards/rejected": -1.8210115432739258, "step": 139 }, { "epoch": 0.08, "learning_rate": 9.998056709012926e-07, "logits/chosen": -6.13713264465332, "logits/rejected": -6.073615074157715, "logps/chosen": -314.8575744628906, "logps/rejected": -161.6866455078125, "loss": 0.1791, "rewards/accuracies": 1.0, "rewards/chosen": 3.318448305130005, "rewards/margins": 5.645552635192871, "rewards/rejected": -2.327104091644287, "step": 140 }, { "epoch": 0.08, "learning_rate": 9.997929322039716e-07, "logits/chosen": -6.109276294708252, "logits/rejected": -6.175443649291992, "logps/chosen": -267.4035339355469, "logps/rejected": -290.0419921875, "loss": 0.1413, "rewards/accuracies": 1.0, "rewards/chosen": 1.9833292961120605, "rewards/margins": 6.382046222686768, "rewards/rejected": -4.398717403411865, "step": 141 }, { "epoch": 0.08, "learning_rate": 9.997797892169709e-07, "logits/chosen": -6.17066764831543, "logits/rejected": -6.254542827606201, "logps/chosen": -279.461669921875, "logps/rejected": -337.10076904296875, "loss": 0.2381, "rewards/accuracies": 0.875, "rewards/chosen": 1.368152379989624, "rewards/margins": 3.366715908050537, "rewards/rejected": -1.9985636472702026, "step": 142 }, { "epoch": 0.08, "learning_rate": 9.997662419509224e-07, "logits/chosen": -6.150041580200195, "logits/rejected": -6.128377914428711, "logps/chosen": -331.1325988769531, "logps/rejected": -173.81910705566406, "loss": 0.238, "rewards/accuracies": 0.9375, "rewards/chosen": 2.205449104309082, "rewards/margins": 3.179309844970703, "rewards/rejected": -0.973860502243042, "step": 143 }, { "epoch": 0.08, "learning_rate": 9.997522904167843e-07, "logits/chosen": -6.201495170593262, "logits/rejected": -6.171029090881348, "logps/chosen": -263.76959228515625, "logps/rejected": -181.0001220703125, "loss": 0.2091, "rewards/accuracies": 1.0, "rewards/chosen": 1.6117058992385864, "rewards/margins": 3.5653913021087646, "rewards/rejected": -1.9536854028701782, "step": 144 }, { "epoch": 0.08, "learning_rate": 9.997379346258423e-07, "logits/chosen": -6.227288246154785, "logits/rejected": -6.14165735244751, "logps/chosen": -271.2688903808594, "logps/rejected": -121.44509887695312, "loss": 0.1128, "rewards/accuracies": 0.9375, "rewards/chosen": 1.7622156143188477, "rewards/margins": 4.260353088378906, "rewards/rejected": -2.4981374740600586, "step": 145 }, { "epoch": 0.08, "learning_rate": 9.997231745897091e-07, "logits/chosen": -6.204087257385254, "logits/rejected": -6.241743087768555, "logps/chosen": -203.92044067382812, "logps/rejected": -220.79469299316406, "loss": 0.1875, "rewards/accuracies": 1.0, "rewards/chosen": 1.6554160118103027, "rewards/margins": 5.267730712890625, "rewards/rejected": -3.612314224243164, "step": 146 }, { "epoch": 0.08, "learning_rate": 9.997080103203243e-07, "logits/chosen": -6.1420793533325195, "logits/rejected": -6.0963664054870605, "logps/chosen": -337.8759765625, "logps/rejected": -220.93218994140625, "loss": 0.2265, "rewards/accuracies": 0.875, "rewards/chosen": 2.960634469985962, "rewards/margins": 3.5895261764526367, "rewards/rejected": -0.6288915276527405, "step": 147 }, { "epoch": 0.08, "learning_rate": 9.996924418299546e-07, "logits/chosen": -6.160341262817383, "logits/rejected": -6.093823432922363, "logps/chosen": -237.21871948242188, "logps/rejected": -186.74539184570312, "loss": 0.1799, "rewards/accuracies": 1.0, "rewards/chosen": 1.8249675035476685, "rewards/margins": 4.438294410705566, "rewards/rejected": -2.6133267879486084, "step": 148 }, { "epoch": 0.08, "learning_rate": 9.996764691311931e-07, "logits/chosen": -6.2173027992248535, "logits/rejected": -6.136970520019531, "logps/chosen": -292.61553955078125, "logps/rejected": -171.992431640625, "loss": 0.1918, "rewards/accuracies": 0.9375, "rewards/chosen": 1.6603975296020508, "rewards/margins": 4.173758029937744, "rewards/rejected": -2.5133605003356934, "step": 149 }, { "epoch": 0.08, "learning_rate": 9.996600922369609e-07, "logits/chosen": -6.205836296081543, "logits/rejected": -6.132662296295166, "logps/chosen": -504.79522705078125, "logps/rejected": -230.64236450195312, "loss": 0.1582, "rewards/accuracies": 1.0, "rewards/chosen": 1.8584685325622559, "rewards/margins": 3.819171905517578, "rewards/rejected": -1.9607033729553223, "step": 150 }, { "epoch": 0.08, "learning_rate": 9.996433111605051e-07, "logits/chosen": -6.158787250518799, "logits/rejected": -6.024654388427734, "logps/chosen": -450.20733642578125, "logps/rejected": -218.20916748046875, "loss": 0.229, "rewards/accuracies": 1.0, "rewards/chosen": 1.2202115058898926, "rewards/margins": 3.4758830070495605, "rewards/rejected": -2.255671262741089, "step": 151 }, { "epoch": 0.08, "learning_rate": 9.996261259154005e-07, "logits/chosen": -6.100133895874023, "logits/rejected": -6.09719705581665, "logps/chosen": -231.27511596679688, "logps/rejected": -240.6998748779297, "loss": 0.205, "rewards/accuracies": 0.9375, "rewards/chosen": 2.1727404594421387, "rewards/margins": 3.407789707183838, "rewards/rejected": -1.2350493669509888, "step": 152 }, { "epoch": 0.08, "learning_rate": 9.996085365155483e-07, "logits/chosen": -6.275554180145264, "logits/rejected": -6.249926567077637, "logps/chosen": -280.2225341796875, "logps/rejected": -179.588134765625, "loss": 0.1906, "rewards/accuracies": 0.9375, "rewards/chosen": 1.9174602031707764, "rewards/margins": 4.662914752960205, "rewards/rejected": -2.745454788208008, "step": 153 }, { "epoch": 0.09, "learning_rate": 9.995905429751767e-07, "logits/chosen": -6.059700012207031, "logits/rejected": -6.085733890533447, "logps/chosen": -261.91302490234375, "logps/rejected": -238.15582275390625, "loss": 0.178, "rewards/accuracies": 1.0, "rewards/chosen": 2.001685619354248, "rewards/margins": 5.440531253814697, "rewards/rejected": -3.438845634460449, "step": 154 }, { "epoch": 0.09, "learning_rate": 9.995721453088411e-07, "logits/chosen": -6.159486770629883, "logits/rejected": -6.193168640136719, "logps/chosen": -279.9972839355469, "logps/rejected": -170.19313049316406, "loss": 0.1537, "rewards/accuracies": 0.8125, "rewards/chosen": 1.8149399757385254, "rewards/margins": 3.246155261993408, "rewards/rejected": -1.4312154054641724, "step": 155 }, { "epoch": 0.09, "learning_rate": 9.995533435314236e-07, "logits/chosen": -6.063732624053955, "logits/rejected": -6.139912128448486, "logps/chosen": -251.72325134277344, "logps/rejected": -203.2379150390625, "loss": 0.1947, "rewards/accuracies": 0.9375, "rewards/chosen": 1.7926995754241943, "rewards/margins": 5.6888580322265625, "rewards/rejected": -3.896158456802368, "step": 156 }, { "epoch": 0.09, "learning_rate": 9.995341376581334e-07, "logits/chosen": -6.172029495239258, "logits/rejected": -6.117672920227051, "logps/chosen": -467.1440734863281, "logps/rejected": -330.78875732421875, "loss": 0.2014, "rewards/accuracies": 0.9375, "rewards/chosen": 1.363053321838379, "rewards/margins": 3.7071664333343506, "rewards/rejected": -2.3441131114959717, "step": 157 }, { "epoch": 0.09, "learning_rate": 9.99514527704506e-07, "logits/chosen": -6.143178462982178, "logits/rejected": -6.114846229553223, "logps/chosen": -252.84725952148438, "logps/rejected": -212.02972412109375, "loss": 0.1999, "rewards/accuracies": 1.0, "rewards/chosen": 2.1901206970214844, "rewards/margins": 4.769767761230469, "rewards/rejected": -2.5796470642089844, "step": 158 }, { "epoch": 0.09, "learning_rate": 9.994945136864046e-07, "logits/chosen": -6.095925331115723, "logits/rejected": -6.136963367462158, "logps/chosen": -310.8692626953125, "logps/rejected": -207.09454345703125, "loss": 0.1489, "rewards/accuracies": 0.9375, "rewards/chosen": 2.8637118339538574, "rewards/margins": 4.660451889038086, "rewards/rejected": -1.7967400550842285, "step": 159 }, { "epoch": 0.09, "learning_rate": 9.994740956200187e-07, "logits/chosen": -6.177314758300781, "logits/rejected": -6.159539222717285, "logps/chosen": -258.052734375, "logps/rejected": -178.92153930664062, "loss": 0.157, "rewards/accuracies": 0.9375, "rewards/chosen": 1.7684215307235718, "rewards/margins": 3.9963762760162354, "rewards/rejected": -2.227954864501953, "step": 160 }, { "epoch": 0.09, "learning_rate": 9.994532735218645e-07, "logits/chosen": -6.240503311157227, "logits/rejected": -6.179758071899414, "logps/chosen": -282.1408386230469, "logps/rejected": -200.59686279296875, "loss": 0.1467, "rewards/accuracies": 1.0, "rewards/chosen": 2.251980781555176, "rewards/margins": 4.893492698669434, "rewards/rejected": -2.641511917114258, "step": 161 }, { "epoch": 0.09, "learning_rate": 9.994320474087855e-07, "logits/chosen": -6.062313556671143, "logits/rejected": -6.012970924377441, "logps/chosen": -508.03826904296875, "logps/rejected": -269.24755859375, "loss": 0.1983, "rewards/accuracies": 0.6875, "rewards/chosen": 0.9904719591140747, "rewards/margins": 1.7489852905273438, "rewards/rejected": -0.758513331413269, "step": 162 }, { "epoch": 0.09, "learning_rate": 9.99410417297952e-07, "logits/chosen": -6.156746864318848, "logits/rejected": -6.227184772491455, "logps/chosen": -210.1279296875, "logps/rejected": -171.7838134765625, "loss": 0.1539, "rewards/accuracies": 1.0, "rewards/chosen": 1.3829212188720703, "rewards/margins": 3.2528204917907715, "rewards/rejected": -1.8698992729187012, "step": 163 }, { "epoch": 0.09, "learning_rate": 9.993883832068605e-07, "logits/chosen": -6.306753158569336, "logits/rejected": -6.174138069152832, "logps/chosen": -292.5240783691406, "logps/rejected": -131.47659301757812, "loss": 0.1446, "rewards/accuracies": 1.0, "rewards/chosen": 3.9608495235443115, "rewards/margins": 4.997085094451904, "rewards/rejected": -1.0362355709075928, "step": 164 }, { "epoch": 0.09, "learning_rate": 9.993659451533351e-07, "logits/chosen": -6.107589244842529, "logits/rejected": -6.174666404724121, "logps/chosen": -147.41761779785156, "logps/rejected": -198.94635009765625, "loss": 0.1773, "rewards/accuracies": 0.875, "rewards/chosen": 1.036531925201416, "rewards/margins": 2.6519181728363037, "rewards/rejected": -1.6153862476348877, "step": 165 }, { "epoch": 0.09, "learning_rate": 9.99343103155526e-07, "logits/chosen": -6.152732849121094, "logits/rejected": -6.082740783691406, "logps/chosen": -296.1016540527344, "logps/rejected": -143.6741180419922, "loss": 0.1532, "rewards/accuracies": 1.0, "rewards/chosen": 2.5221714973449707, "rewards/margins": 4.913609504699707, "rewards/rejected": -2.3914380073547363, "step": 166 }, { "epoch": 0.09, "learning_rate": 9.993198572319105e-07, "logits/chosen": -6.08421516418457, "logits/rejected": -6.222238540649414, "logps/chosen": -297.15771484375, "logps/rejected": -266.9120788574219, "loss": 0.1973, "rewards/accuracies": 0.75, "rewards/chosen": 1.836761236190796, "rewards/margins": 3.890272617340088, "rewards/rejected": -2.053511142730713, "step": 167 }, { "epoch": 0.09, "learning_rate": 9.992962074012925e-07, "logits/chosen": -6.136037349700928, "logits/rejected": -6.19166898727417, "logps/chosen": -208.5140838623047, "logps/rejected": -205.2286376953125, "loss": 0.1873, "rewards/accuracies": 0.875, "rewards/chosen": 2.428420305252075, "rewards/margins": 5.451738357543945, "rewards/rejected": -3.02331805229187, "step": 168 }, { "epoch": 0.09, "learning_rate": 9.992721536828028e-07, "logits/chosen": -6.028768539428711, "logits/rejected": -6.059806823730469, "logps/chosen": -443.9870300292969, "logps/rejected": -490.6073913574219, "loss": 0.2149, "rewards/accuracies": 0.875, "rewards/chosen": 2.0449562072753906, "rewards/margins": 4.8099260330200195, "rewards/rejected": -2.76496958732605, "step": 169 }, { "epoch": 0.09, "learning_rate": 9.992476960958986e-07, "logits/chosen": -6.0608954429626465, "logits/rejected": -6.116751670837402, "logps/chosen": -182.04461669921875, "logps/rejected": -278.5975341796875, "loss": 0.1686, "rewards/accuracies": 0.875, "rewards/chosen": 1.5537970066070557, "rewards/margins": 3.391159772872925, "rewards/rejected": -1.8373627662658691, "step": 170 }, { "epoch": 0.09, "learning_rate": 9.992228346603644e-07, "logits/chosen": -6.177562713623047, "logits/rejected": -6.173373222351074, "logps/chosen": -245.48095703125, "logps/rejected": -230.12039184570312, "loss": 0.1183, "rewards/accuracies": 1.0, "rewards/chosen": 1.950847864151001, "rewards/margins": 5.58507776260376, "rewards/rejected": -3.634229898452759, "step": 171 }, { "epoch": 0.1, "learning_rate": 9.991975693963107e-07, "logits/chosen": -6.203705310821533, "logits/rejected": -6.162189960479736, "logps/chosen": -292.58843994140625, "logps/rejected": -233.964111328125, "loss": 0.1769, "rewards/accuracies": 1.0, "rewards/chosen": 2.7446346282958984, "rewards/margins": 4.842648983001709, "rewards/rejected": -2.0980143547058105, "step": 172 }, { "epoch": 0.1, "learning_rate": 9.991719003241746e-07, "logits/chosen": -6.109380722045898, "logits/rejected": -6.1698198318481445, "logps/chosen": -240.15940856933594, "logps/rejected": -151.09762573242188, "loss": 0.2193, "rewards/accuracies": 0.875, "rewards/chosen": 2.149357318878174, "rewards/margins": 3.9247703552246094, "rewards/rejected": -1.7754132747650146, "step": 173 }, { "epoch": 0.1, "learning_rate": 9.991458274647208e-07, "logits/chosen": -6.164599895477295, "logits/rejected": -6.068042755126953, "logps/chosen": -314.2458190917969, "logps/rejected": -99.11494445800781, "loss": 0.1789, "rewards/accuracies": 0.9375, "rewards/chosen": 1.5350767374038696, "rewards/margins": 3.4799630641937256, "rewards/rejected": -1.9448864459991455, "step": 174 }, { "epoch": 0.1, "learning_rate": 9.991193508390397e-07, "logits/chosen": -6.168668746948242, "logits/rejected": -6.2546186447143555, "logps/chosen": -261.7895202636719, "logps/rejected": -277.517822265625, "loss": 0.191, "rewards/accuracies": 0.9375, "rewards/chosen": 1.5351372957229614, "rewards/margins": 3.667663097381592, "rewards/rejected": -2.132525682449341, "step": 175 }, { "epoch": 0.1, "learning_rate": 9.990924704685484e-07, "logits/chosen": -6.0961151123046875, "logits/rejected": -6.1699748039245605, "logps/chosen": -247.12356567382812, "logps/rejected": -232.46890258789062, "loss": 0.2203, "rewards/accuracies": 0.8125, "rewards/chosen": 1.9891009330749512, "rewards/margins": 3.8822994232177734, "rewards/rejected": -1.8931987285614014, "step": 176 }, { "epoch": 0.1, "learning_rate": 9.990651863749915e-07, "logits/chosen": -6.084652423858643, "logits/rejected": -6.154176235198975, "logps/chosen": -288.05401611328125, "logps/rejected": -191.03135681152344, "loss": 0.1586, "rewards/accuracies": 0.9375, "rewards/chosen": 3.181574821472168, "rewards/margins": 4.90686559677124, "rewards/rejected": -1.7252908945083618, "step": 177 }, { "epoch": 0.1, "learning_rate": 9.990374985804388e-07, "logits/chosen": -6.170328140258789, "logits/rejected": -6.186709403991699, "logps/chosen": -181.7666015625, "logps/rejected": -188.55734252929688, "loss": 0.153, "rewards/accuracies": 1.0, "rewards/chosen": 1.6107354164123535, "rewards/margins": 4.4995927810668945, "rewards/rejected": -2.888857364654541, "step": 178 }, { "epoch": 0.1, "learning_rate": 9.990094071072878e-07, "logits/chosen": -6.108193874359131, "logits/rejected": -6.193990707397461, "logps/chosen": -260.77093505859375, "logps/rejected": -186.57345581054688, "loss": 0.1537, "rewards/accuracies": 0.875, "rewards/chosen": 2.075122594833374, "rewards/margins": 4.464332103729248, "rewards/rejected": -2.389209270477295, "step": 179 }, { "epoch": 0.1, "learning_rate": 9.989809119782616e-07, "logits/chosen": -6.133490085601807, "logits/rejected": -6.124725341796875, "logps/chosen": -272.0555419921875, "logps/rejected": -228.7551727294922, "loss": 0.192, "rewards/accuracies": 0.875, "rewards/chosen": 0.9340229034423828, "rewards/margins": 2.7771947383880615, "rewards/rejected": -1.8431718349456787, "step": 180 }, { "epoch": 0.1, "learning_rate": 9.98952013216411e-07, "logits/chosen": -6.238469123840332, "logits/rejected": -6.149538993835449, "logps/chosen": -517.91845703125, "logps/rejected": -196.77005004882812, "loss": 0.126, "rewards/accuracies": 1.0, "rewards/chosen": 3.22592830657959, "rewards/margins": 5.928641319274902, "rewards/rejected": -2.7027125358581543, "step": 181 }, { "epoch": 0.1, "learning_rate": 9.98922710845112e-07, "logits/chosen": -6.074148178100586, "logits/rejected": -6.106535911560059, "logps/chosen": -233.1022186279297, "logps/rejected": -231.4138641357422, "loss": 0.1514, "rewards/accuracies": 0.9375, "rewards/chosen": 1.4802167415618896, "rewards/margins": 5.289525032043457, "rewards/rejected": -3.8093085289001465, "step": 182 }, { "epoch": 0.1, "learning_rate": 9.98893004888068e-07, "logits/chosen": -6.133334636688232, "logits/rejected": -6.2249908447265625, "logps/chosen": -257.96929931640625, "logps/rejected": -206.4293212890625, "loss": 0.1666, "rewards/accuracies": 0.875, "rewards/chosen": 1.5807050466537476, "rewards/margins": 3.58884334564209, "rewards/rejected": -2.008138418197632, "step": 183 }, { "epoch": 0.1, "learning_rate": 9.988628953693085e-07, "logits/chosen": -6.231034278869629, "logits/rejected": -6.2005109786987305, "logps/chosen": -233.84463500976562, "logps/rejected": -272.1944885253906, "loss": 0.2071, "rewards/accuracies": 0.875, "rewards/chosen": 1.7302522659301758, "rewards/margins": 5.594113826751709, "rewards/rejected": -3.863861560821533, "step": 184 }, { "epoch": 0.1, "learning_rate": 9.988323823131896e-07, "logits/chosen": -6.100637912750244, "logits/rejected": -6.247350692749023, "logps/chosen": -235.26910400390625, "logps/rejected": -308.8831481933594, "loss": 0.1649, "rewards/accuracies": 0.875, "rewards/chosen": 2.0043082237243652, "rewards/margins": 6.677364349365234, "rewards/rejected": -4.673056125640869, "step": 185 }, { "epoch": 0.1, "learning_rate": 9.98801465744394e-07, "logits/chosen": -6.138222694396973, "logits/rejected": -6.1960906982421875, "logps/chosen": -200.17044067382812, "logps/rejected": -122.6049575805664, "loss": 0.2244, "rewards/accuracies": 0.8125, "rewards/chosen": 1.6911547183990479, "rewards/margins": 3.7560882568359375, "rewards/rejected": -2.0649333000183105, "step": 186 }, { "epoch": 0.1, "learning_rate": 9.9877014568793e-07, "logits/chosen": -6.163601875305176, "logits/rejected": -6.192246437072754, "logps/chosen": -172.96514892578125, "logps/rejected": -174.00732421875, "loss": 0.2183, "rewards/accuracies": 1.0, "rewards/chosen": 2.1447510719299316, "rewards/margins": 5.309485912322998, "rewards/rejected": -3.1647348403930664, "step": 187 }, { "epoch": 0.1, "learning_rate": 9.98738422169133e-07, "logits/chosen": -6.195466041564941, "logits/rejected": -6.134463310241699, "logps/chosen": -394.6925354003906, "logps/rejected": -192.25233459472656, "loss": 0.1865, "rewards/accuracies": 0.875, "rewards/chosen": 2.473728656768799, "rewards/margins": 4.092730522155762, "rewards/rejected": -1.6190019845962524, "step": 188 }, { "epoch": 0.1, "learning_rate": 9.98706295213665e-07, "logits/chosen": -6.119232654571533, "logits/rejected": -6.134125709533691, "logps/chosen": -137.0325927734375, "logps/rejected": -199.8701171875, "loss": 0.2316, "rewards/accuracies": 0.9375, "rewards/chosen": 0.8458901643753052, "rewards/margins": 4.39250373840332, "rewards/rejected": -3.5466134548187256, "step": 189 }, { "epoch": 0.11, "learning_rate": 9.986737648475136e-07, "logits/chosen": -6.114107131958008, "logits/rejected": -6.117621421813965, "logps/chosen": -207.59718322753906, "logps/rejected": -156.5538330078125, "loss": 0.1196, "rewards/accuracies": 1.0, "rewards/chosen": 1.6025092601776123, "rewards/margins": 4.050037384033203, "rewards/rejected": -2.44752836227417, "step": 190 }, { "epoch": 0.11, "learning_rate": 9.986408310969932e-07, "logits/chosen": -6.062973499298096, "logits/rejected": -6.162273406982422, "logps/chosen": -372.07763671875, "logps/rejected": -245.80059814453125, "loss": 0.1803, "rewards/accuracies": 1.0, "rewards/chosen": 3.212214469909668, "rewards/margins": 4.922523498535156, "rewards/rejected": -1.7103089094161987, "step": 191 }, { "epoch": 0.11, "learning_rate": 9.986074939887441e-07, "logits/chosen": -6.24260950088501, "logits/rejected": -6.203961372375488, "logps/chosen": -193.63333129882812, "logps/rejected": -225.0701446533203, "loss": 0.1637, "rewards/accuracies": 0.9375, "rewards/chosen": 0.7793278098106384, "rewards/margins": 5.134250164031982, "rewards/rejected": -4.354922294616699, "step": 192 }, { "epoch": 0.11, "learning_rate": 9.985737535497335e-07, "logits/chosen": -6.212823867797852, "logits/rejected": -6.2100067138671875, "logps/chosen": -358.58447265625, "logps/rejected": -293.93890380859375, "loss": 0.0967, "rewards/accuracies": 1.0, "rewards/chosen": 2.462350368499756, "rewards/margins": 7.373188018798828, "rewards/rejected": -4.910837173461914, "step": 193 }, { "epoch": 0.11, "learning_rate": 9.985396098072546e-07, "logits/chosen": -6.243161678314209, "logits/rejected": -6.182557582855225, "logps/chosen": -399.0211181640625, "logps/rejected": -133.4063262939453, "loss": 0.1844, "rewards/accuracies": 1.0, "rewards/chosen": 3.627692699432373, "rewards/margins": 4.884624481201172, "rewards/rejected": -1.2569316625595093, "step": 194 }, { "epoch": 0.11, "learning_rate": 9.985050627889267e-07, "logits/chosen": -6.05493688583374, "logits/rejected": -6.113831520080566, "logps/chosen": -404.84014892578125, "logps/rejected": -275.7078552246094, "loss": 0.1592, "rewards/accuracies": 0.9375, "rewards/chosen": 3.183377265930176, "rewards/margins": 6.312142372131348, "rewards/rejected": -3.128765344619751, "step": 195 }, { "epoch": 0.11, "learning_rate": 9.98470112522695e-07, "logits/chosen": -6.152552604675293, "logits/rejected": -6.121710300445557, "logps/chosen": -222.77394104003906, "logps/rejected": -161.99549865722656, "loss": 0.1325, "rewards/accuracies": 0.9375, "rewards/chosen": 1.8388112783432007, "rewards/margins": 4.4259209632873535, "rewards/rejected": -2.5871095657348633, "step": 196 }, { "epoch": 0.11, "learning_rate": 9.984347590368316e-07, "logits/chosen": -6.043149948120117, "logits/rejected": -6.035826683044434, "logps/chosen": -320.97528076171875, "logps/rejected": -358.29351806640625, "loss": 0.1426, "rewards/accuracies": 0.8125, "rewards/chosen": 1.1304610967636108, "rewards/margins": 2.5629801750183105, "rewards/rejected": -1.4325190782546997, "step": 197 }, { "epoch": 0.11, "learning_rate": 9.983990023599344e-07, "logits/chosen": -6.135063648223877, "logits/rejected": -6.183027267456055, "logps/chosen": -287.2575988769531, "logps/rejected": -269.26654052734375, "loss": 0.1638, "rewards/accuracies": 1.0, "rewards/chosen": 2.9646501541137695, "rewards/margins": 6.883374214172363, "rewards/rejected": -3.918724298477173, "step": 198 }, { "epoch": 0.11, "learning_rate": 9.983628425209275e-07, "logits/chosen": -6.238388538360596, "logits/rejected": -6.257356643676758, "logps/chosen": -199.8618927001953, "logps/rejected": -133.96621704101562, "loss": 0.1988, "rewards/accuracies": 0.8125, "rewards/chosen": 1.8912222385406494, "rewards/margins": 3.589694023132324, "rewards/rejected": -1.6984719038009644, "step": 199 }, { "epoch": 0.11, "learning_rate": 9.983262795490613e-07, "logits/chosen": -6.167751312255859, "logits/rejected": -6.2639851570129395, "logps/chosen": -256.63299560546875, "logps/rejected": -224.52984619140625, "loss": 0.1351, "rewards/accuracies": 0.9375, "rewards/chosen": 2.0390586853027344, "rewards/margins": 4.37666654586792, "rewards/rejected": -2.3376078605651855, "step": 200 }, { "epoch": 0.11, "learning_rate": 9.982893134739117e-07, "logits/chosen": -6.166282653808594, "logits/rejected": -6.186671733856201, "logps/chosen": -220.19137573242188, "logps/rejected": -220.0702362060547, "loss": 0.1979, "rewards/accuracies": 1.0, "rewards/chosen": 2.708742618560791, "rewards/margins": 4.76578950881958, "rewards/rejected": -2.057046890258789, "step": 201 }, { "epoch": 0.11, "learning_rate": 9.982519443253813e-07, "logits/chosen": -6.221887111663818, "logits/rejected": -6.208301544189453, "logps/chosen": -324.39141845703125, "logps/rejected": -157.0420379638672, "loss": 0.1991, "rewards/accuracies": 0.875, "rewards/chosen": 3.491703987121582, "rewards/margins": 5.067813873291016, "rewards/rejected": -1.5761100053787231, "step": 202 }, { "epoch": 0.11, "learning_rate": 9.982141721336984e-07, "logits/chosen": -6.200504302978516, "logits/rejected": -6.167238235473633, "logps/chosen": -219.26815795898438, "logps/rejected": -215.60897827148438, "loss": 0.2176, "rewards/accuracies": 0.8125, "rewards/chosen": 0.8409526348114014, "rewards/margins": 3.201326847076416, "rewards/rejected": -2.3603742122650146, "step": 203 }, { "epoch": 0.11, "learning_rate": 9.98175996929418e-07, "logits/chosen": -6.0262041091918945, "logits/rejected": -6.144102096557617, "logps/chosen": -282.4689025878906, "logps/rejected": -203.83164978027344, "loss": 0.1433, "rewards/accuracies": 1.0, "rewards/chosen": 4.115827560424805, "rewards/margins": 5.641611576080322, "rewards/rejected": -1.525783658027649, "step": 204 }, { "epoch": 0.11, "learning_rate": 9.981374187434199e-07, "logits/chosen": -6.14973258972168, "logits/rejected": -6.190040111541748, "logps/chosen": -213.53021240234375, "logps/rejected": -182.68228149414062, "loss": 0.2426, "rewards/accuracies": 0.9375, "rewards/chosen": 2.225274085998535, "rewards/margins": 4.901466369628906, "rewards/rejected": -2.676192283630371, "step": 205 }, { "epoch": 0.11, "learning_rate": 9.980984376069107e-07, "logits/chosen": -6.17765998840332, "logits/rejected": -6.167413711547852, "logps/chosen": -274.76910400390625, "logps/rejected": -133.0980224609375, "loss": 0.1121, "rewards/accuracies": 0.8125, "rewards/chosen": 2.3994925022125244, "rewards/margins": 4.489744663238525, "rewards/rejected": -2.09025239944458, "step": 206 }, { "epoch": 0.11, "learning_rate": 9.980590535514234e-07, "logits/chosen": -6.154487133026123, "logits/rejected": -6.213505744934082, "logps/chosen": -235.22569274902344, "logps/rejected": -184.30714416503906, "loss": 0.1485, "rewards/accuracies": 1.0, "rewards/chosen": 2.3019165992736816, "rewards/margins": 5.6310906410217285, "rewards/rejected": -3.3291738033294678, "step": 207 }, { "epoch": 0.12, "learning_rate": 9.980192666088153e-07, "logits/chosen": -6.057201385498047, "logits/rejected": -6.072698593139648, "logps/chosen": -279.99609375, "logps/rejected": -236.9033660888672, "loss": 0.2642, "rewards/accuracies": 0.8125, "rewards/chosen": 1.9345040321350098, "rewards/margins": 3.078066110610962, "rewards/rejected": -1.1435620784759521, "step": 208 }, { "epoch": 0.12, "learning_rate": 9.979790768112718e-07, "logits/chosen": -6.0521392822265625, "logits/rejected": -6.171464920043945, "logps/chosen": -383.7384033203125, "logps/rejected": -238.41542053222656, "loss": 0.0949, "rewards/accuracies": 1.0, "rewards/chosen": 2.9613587856292725, "rewards/margins": 5.1202497482299805, "rewards/rejected": -2.158890962600708, "step": 209 }, { "epoch": 0.12, "learning_rate": 9.979384841913018e-07, "logits/chosen": -6.155935287475586, "logits/rejected": -6.167370319366455, "logps/chosen": -373.02484130859375, "logps/rejected": -226.3342742919922, "loss": 0.1526, "rewards/accuracies": 0.9375, "rewards/chosen": 2.593935966491699, "rewards/margins": 4.32281494140625, "rewards/rejected": -1.7288786172866821, "step": 210 }, { "epoch": 0.12, "learning_rate": 9.978974887817422e-07, "logits/chosen": -6.150284767150879, "logits/rejected": -6.172290325164795, "logps/chosen": -147.9048614501953, "logps/rejected": -134.49374389648438, "loss": 0.1641, "rewards/accuracies": 0.875, "rewards/chosen": 1.233677625656128, "rewards/margins": 3.40617036819458, "rewards/rejected": -2.172492742538452, "step": 211 }, { "epoch": 0.12, "learning_rate": 9.978560906157542e-07, "logits/chosen": -6.104578495025635, "logits/rejected": -6.153537750244141, "logps/chosen": -237.6129150390625, "logps/rejected": -263.56707763671875, "loss": 0.1652, "rewards/accuracies": 1.0, "rewards/chosen": 1.7097797393798828, "rewards/margins": 4.8965349197387695, "rewards/rejected": -3.1867551803588867, "step": 212 }, { "epoch": 0.12, "learning_rate": 9.978142897268257e-07, "logits/chosen": -6.154561996459961, "logits/rejected": -6.122575759887695, "logps/chosen": -240.7945098876953, "logps/rejected": -158.45501708984375, "loss": 0.2252, "rewards/accuracies": 0.875, "rewards/chosen": 1.7646102905273438, "rewards/margins": 4.718732833862305, "rewards/rejected": -2.95412278175354, "step": 213 }, { "epoch": 0.12, "learning_rate": 9.977720861487699e-07, "logits/chosen": -6.136336803436279, "logits/rejected": -6.14532995223999, "logps/chosen": -211.83929443359375, "logps/rejected": -166.369140625, "loss": 0.2374, "rewards/accuracies": 0.75, "rewards/chosen": 0.30200302600860596, "rewards/margins": 2.794645071029663, "rewards/rejected": -2.4926421642303467, "step": 214 }, { "epoch": 0.12, "learning_rate": 9.97729479915726e-07, "logits/chosen": -6.208266258239746, "logits/rejected": -6.139787673950195, "logps/chosen": -292.866943359375, "logps/rejected": -153.95556640625, "loss": 0.1354, "rewards/accuracies": 0.9375, "rewards/chosen": 2.7371816635131836, "rewards/margins": 4.471616744995117, "rewards/rejected": -1.7344346046447754, "step": 215 }, { "epoch": 0.12, "learning_rate": 9.976864710621586e-07, "logits/chosen": -6.183183670043945, "logits/rejected": -6.198195457458496, "logps/chosen": -205.04220581054688, "logps/rejected": -125.80296325683594, "loss": 0.1604, "rewards/accuracies": 0.9375, "rewards/chosen": 1.6600713729858398, "rewards/margins": 4.439384460449219, "rewards/rejected": -2.779313325881958, "step": 216 }, { "epoch": 0.12, "learning_rate": 9.976430596228583e-07, "logits/chosen": -6.136728286743164, "logits/rejected": -6.150753974914551, "logps/chosen": -189.12445068359375, "logps/rejected": -203.89918518066406, "loss": 0.178, "rewards/accuracies": 1.0, "rewards/chosen": 2.1438498497009277, "rewards/margins": 5.4640793800354, "rewards/rejected": -3.3202295303344727, "step": 217 }, { "epoch": 0.12, "learning_rate": 9.975992456329412e-07, "logits/chosen": -6.138241767883301, "logits/rejected": -6.083906173706055, "logps/chosen": -183.17877197265625, "logps/rejected": -189.0666046142578, "loss": 0.1022, "rewards/accuracies": 1.0, "rewards/chosen": 1.93941330909729, "rewards/margins": 5.817348957061768, "rewards/rejected": -3.8779358863830566, "step": 218 }, { "epoch": 0.12, "learning_rate": 9.975550291278493e-07, "logits/chosen": -6.092685699462891, "logits/rejected": -6.1347455978393555, "logps/chosen": -249.64646911621094, "logps/rejected": -259.8849182128906, "loss": 0.1405, "rewards/accuracies": 0.875, "rewards/chosen": 1.7009724378585815, "rewards/margins": 5.322075843811035, "rewards/rejected": -3.621103525161743, "step": 219 }, { "epoch": 0.12, "learning_rate": 9.975104101433494e-07, "logits/chosen": -6.154489517211914, "logits/rejected": -6.188085556030273, "logps/chosen": -344.76123046875, "logps/rejected": -271.52276611328125, "loss": 0.1302, "rewards/accuracies": 1.0, "rewards/chosen": 3.6282849311828613, "rewards/margins": 6.803807735443115, "rewards/rejected": -3.175522565841675, "step": 220 }, { "epoch": 0.12, "learning_rate": 9.974653887155349e-07, "logits/chosen": -6.197551250457764, "logits/rejected": -6.1686811447143555, "logps/chosen": -232.95358276367188, "logps/rejected": -205.75592041015625, "loss": 0.1666, "rewards/accuracies": 0.875, "rewards/chosen": 1.5852534770965576, "rewards/margins": 4.073505401611328, "rewards/rejected": -2.4882516860961914, "step": 221 }, { "epoch": 0.12, "learning_rate": 9.974199648808242e-07, "logits/chosen": -6.169668197631836, "logits/rejected": -6.148119926452637, "logps/chosen": -200.91677856445312, "logps/rejected": -161.98362731933594, "loss": 0.2308, "rewards/accuracies": 0.8125, "rewards/chosen": 1.2104146480560303, "rewards/margins": 3.786769151687622, "rewards/rejected": -2.576354503631592, "step": 222 }, { "epoch": 0.12, "learning_rate": 9.973741386759613e-07, "logits/chosen": -6.155689716339111, "logits/rejected": -6.195010662078857, "logps/chosen": -207.25723266601562, "logps/rejected": -256.5521240234375, "loss": 0.1604, "rewards/accuracies": 0.9375, "rewards/chosen": 1.49904203414917, "rewards/margins": 4.583011150360107, "rewards/rejected": -3.0839691162109375, "step": 223 }, { "epoch": 0.12, "learning_rate": 9.973279101380157e-07, "logits/chosen": -6.120276927947998, "logits/rejected": -6.239943027496338, "logps/chosen": -347.8678894042969, "logps/rejected": -499.648681640625, "loss": 0.1344, "rewards/accuracies": 0.9375, "rewards/chosen": 1.6912122964859009, "rewards/margins": 6.332095146179199, "rewards/rejected": -4.64088249206543, "step": 224 }, { "epoch": 0.12, "learning_rate": 9.97281279304382e-07, "logits/chosen": -6.126797676086426, "logits/rejected": -6.192370414733887, "logps/chosen": -263.1372375488281, "logps/rejected": -246.24649047851562, "loss": 0.1672, "rewards/accuracies": 0.9375, "rewards/chosen": 2.040922164916992, "rewards/margins": 4.67547607421875, "rewards/rejected": -2.6345536708831787, "step": 225 }, { "epoch": 0.13, "learning_rate": 9.97234246212781e-07, "logits/chosen": -6.132216930389404, "logits/rejected": -6.16166877746582, "logps/chosen": -213.13958740234375, "logps/rejected": -309.08026123046875, "loss": 0.1302, "rewards/accuracies": 0.9375, "rewards/chosen": 1.1091256141662598, "rewards/margins": 6.126342296600342, "rewards/rejected": -5.017216682434082, "step": 226 }, { "epoch": 0.13, "learning_rate": 9.971868109012583e-07, "logits/chosen": -6.230940341949463, "logits/rejected": -6.210010051727295, "logps/chosen": -224.12738037109375, "logps/rejected": -130.29364013671875, "loss": 0.1334, "rewards/accuracies": 0.9375, "rewards/chosen": 2.197854518890381, "rewards/margins": 4.656001091003418, "rewards/rejected": -2.458146572113037, "step": 227 }, { "epoch": 0.13, "learning_rate": 9.971389734081847e-07, "logits/chosen": -6.220359802246094, "logits/rejected": -6.1139020919799805, "logps/chosen": -232.14356994628906, "logps/rejected": -161.3067626953125, "loss": 0.2247, "rewards/accuracies": 1.0, "rewards/chosen": 1.642122507095337, "rewards/margins": 4.3158278465271, "rewards/rejected": -2.6737051010131836, "step": 228 }, { "epoch": 0.13, "learning_rate": 9.970907337722572e-07, "logits/chosen": -6.191458702087402, "logits/rejected": -6.193089962005615, "logps/chosen": -284.3473205566406, "logps/rejected": -192.82826232910156, "loss": 0.16, "rewards/accuracies": 0.9375, "rewards/chosen": 2.49491548538208, "rewards/margins": 5.7516679763793945, "rewards/rejected": -3.2567522525787354, "step": 229 }, { "epoch": 0.13, "learning_rate": 9.970420920324972e-07, "logits/chosen": -6.125573635101318, "logits/rejected": -6.175869464874268, "logps/chosen": -296.0160217285156, "logps/rejected": -166.5511474609375, "loss": 0.1082, "rewards/accuracies": 1.0, "rewards/chosen": 3.657979965209961, "rewards/margins": 6.632909774780273, "rewards/rejected": -2.9749298095703125, "step": 230 }, { "epoch": 0.13, "learning_rate": 9.969930482282516e-07, "logits/chosen": -6.154365062713623, "logits/rejected": -6.2073163986206055, "logps/chosen": -305.8282775878906, "logps/rejected": -219.24420166015625, "loss": 0.0981, "rewards/accuracies": 0.875, "rewards/chosen": 2.086075782775879, "rewards/margins": 4.2703399658203125, "rewards/rejected": -2.1842639446258545, "step": 231 }, { "epoch": 0.13, "learning_rate": 9.969436023991928e-07, "logits/chosen": -6.1390461921691895, "logits/rejected": -6.13059139251709, "logps/chosen": -387.5664978027344, "logps/rejected": -163.02996826171875, "loss": 0.106, "rewards/accuracies": 1.0, "rewards/chosen": 2.5315287113189697, "rewards/margins": 4.797287940979004, "rewards/rejected": -2.2657594680786133, "step": 232 }, { "epoch": 0.13, "learning_rate": 9.968937545853181e-07, "logits/chosen": -6.11931037902832, "logits/rejected": -6.192165374755859, "logps/chosen": -262.4884948730469, "logps/rejected": -196.27032470703125, "loss": 0.2141, "rewards/accuracies": 0.9375, "rewards/chosen": 2.3634519577026367, "rewards/margins": 5.0774922370910645, "rewards/rejected": -2.7140400409698486, "step": 233 }, { "epoch": 0.13, "learning_rate": 9.968435048269504e-07, "logits/chosen": -6.210986137390137, "logits/rejected": -6.218911647796631, "logps/chosen": -373.125244140625, "logps/rejected": -146.1776580810547, "loss": 0.179, "rewards/accuracies": 1.0, "rewards/chosen": 1.836595058441162, "rewards/margins": 5.207396984100342, "rewards/rejected": -3.3708019256591797, "step": 234 }, { "epoch": 0.13, "learning_rate": 9.967928531647373e-07, "logits/chosen": -6.229574680328369, "logits/rejected": -6.169474124908447, "logps/chosen": -208.24008178710938, "logps/rejected": -179.02577209472656, "loss": 0.1193, "rewards/accuracies": 0.9375, "rewards/chosen": 2.2324211597442627, "rewards/margins": 4.462672233581543, "rewards/rejected": -2.2302513122558594, "step": 235 }, { "epoch": 0.13, "learning_rate": 9.967417996396514e-07, "logits/chosen": -6.156024932861328, "logits/rejected": -6.1213459968566895, "logps/chosen": -224.36807250976562, "logps/rejected": -223.61233520507812, "loss": 0.187, "rewards/accuracies": 0.875, "rewards/chosen": 1.6430795192718506, "rewards/margins": 3.7850799560546875, "rewards/rejected": -2.142000198364258, "step": 236 }, { "epoch": 0.13, "learning_rate": 9.96690344292991e-07, "logits/chosen": -6.166228771209717, "logits/rejected": -6.193170070648193, "logps/chosen": -247.52024841308594, "logps/rejected": -175.10037231445312, "loss": 0.18, "rewards/accuracies": 0.9375, "rewards/chosen": 2.560096025466919, "rewards/margins": 4.8300371170043945, "rewards/rejected": -2.2699408531188965, "step": 237 }, { "epoch": 0.13, "learning_rate": 9.966384871663792e-07, "logits/chosen": -6.238102912902832, "logits/rejected": -6.2605156898498535, "logps/chosen": -317.9850769042969, "logps/rejected": -265.1817626953125, "loss": 0.1009, "rewards/accuracies": 0.9375, "rewards/chosen": 3.1322367191314697, "rewards/margins": 6.347092628479004, "rewards/rejected": -3.2148561477661133, "step": 238 }, { "epoch": 0.13, "learning_rate": 9.965862283017633e-07, "logits/chosen": -6.189223289489746, "logits/rejected": -6.120240211486816, "logps/chosen": -254.01580810546875, "logps/rejected": -218.8646240234375, "loss": 0.1083, "rewards/accuracies": 0.875, "rewards/chosen": 2.3359780311584473, "rewards/margins": 5.767636299133301, "rewards/rejected": -3.4316587448120117, "step": 239 }, { "epoch": 0.13, "learning_rate": 9.965335677414168e-07, "logits/chosen": -6.0146164894104, "logits/rejected": -6.125892639160156, "logps/chosen": -235.87347412109375, "logps/rejected": -231.96661376953125, "loss": 0.2532, "rewards/accuracies": 0.9375, "rewards/chosen": 1.632489562034607, "rewards/margins": 4.947948932647705, "rewards/rejected": -3.3154592514038086, "step": 240 }, { "epoch": 0.13, "learning_rate": 9.964805055279374e-07, "logits/chosen": -6.079483985900879, "logits/rejected": -6.101933002471924, "logps/chosen": -387.113525390625, "logps/rejected": -259.1803894042969, "loss": 0.1543, "rewards/accuracies": 0.875, "rewards/chosen": 1.5777146816253662, "rewards/margins": 4.794155597686768, "rewards/rejected": -3.2164409160614014, "step": 241 }, { "epoch": 0.13, "learning_rate": 9.96427041704248e-07, "logits/chosen": -6.083850860595703, "logits/rejected": -6.154448509216309, "logps/chosen": -198.31964111328125, "logps/rejected": -180.9499969482422, "loss": 0.2336, "rewards/accuracies": 0.875, "rewards/chosen": 2.224802017211914, "rewards/margins": 4.715157985687256, "rewards/rejected": -2.4903557300567627, "step": 242 }, { "epoch": 0.13, "learning_rate": 9.963731763135962e-07, "logits/chosen": -6.092083930969238, "logits/rejected": -6.198792457580566, "logps/chosen": -250.81741333007812, "logps/rejected": -217.7373504638672, "loss": 0.1522, "rewards/accuracies": 0.875, "rewards/chosen": 1.782495141029358, "rewards/margins": 4.149192810058594, "rewards/rejected": -2.3666975498199463, "step": 243 }, { "epoch": 0.14, "learning_rate": 9.963189093995543e-07, "logits/chosen": -6.011901378631592, "logits/rejected": -6.1038737297058105, "logps/chosen": -228.60128784179688, "logps/rejected": -310.6833801269531, "loss": 0.1561, "rewards/accuracies": 0.875, "rewards/chosen": 1.3725844621658325, "rewards/margins": 4.291471004486084, "rewards/rejected": -2.918886661529541, "step": 244 }, { "epoch": 0.14, "learning_rate": 9.962642410060197e-07, "logits/chosen": -6.139901638031006, "logits/rejected": -6.135146141052246, "logps/chosen": -356.7838439941406, "logps/rejected": -313.6571044921875, "loss": 0.1095, "rewards/accuracies": 1.0, "rewards/chosen": 2.0694918632507324, "rewards/margins": 5.421291828155518, "rewards/rejected": -3.351799964904785, "step": 245 }, { "epoch": 0.14, "learning_rate": 9.962091711772148e-07, "logits/chosen": -6.187196731567383, "logits/rejected": -6.162841320037842, "logps/chosen": -235.70013427734375, "logps/rejected": -190.4325714111328, "loss": 0.2038, "rewards/accuracies": 0.875, "rewards/chosen": 1.902061939239502, "rewards/margins": 6.077968120574951, "rewards/rejected": -4.175906181335449, "step": 246 }, { "epoch": 0.14, "learning_rate": 9.961536999576857e-07, "logits/chosen": -6.120741367340088, "logits/rejected": -6.199330806732178, "logps/chosen": -246.11233520507812, "logps/rejected": -219.70310974121094, "loss": 0.1311, "rewards/accuracies": 1.0, "rewards/chosen": 2.889162063598633, "rewards/margins": 6.690234661102295, "rewards/rejected": -3.801072597503662, "step": 247 }, { "epoch": 0.14, "learning_rate": 9.960978273923045e-07, "logits/chosen": -6.147345542907715, "logits/rejected": -6.1177592277526855, "logps/chosen": -331.0669250488281, "logps/rejected": -101.60523223876953, "loss": 0.1479, "rewards/accuracies": 1.0, "rewards/chosen": 4.844243049621582, "rewards/margins": 5.47091007232666, "rewards/rejected": -0.626667320728302, "step": 248 }, { "epoch": 0.14, "learning_rate": 9.96041553526267e-07, "logits/chosen": -6.13089656829834, "logits/rejected": -6.135618209838867, "logps/chosen": -214.07611083984375, "logps/rejected": -203.08883666992188, "loss": 0.1016, "rewards/accuracies": 0.875, "rewards/chosen": 1.1822879314422607, "rewards/margins": 4.1169281005859375, "rewards/rejected": -2.934640407562256, "step": 249 }, { "epoch": 0.14, "learning_rate": 9.95984878405094e-07, "logits/chosen": -6.157674789428711, "logits/rejected": -6.206605911254883, "logps/chosen": -300.6916198730469, "logps/rejected": -215.01010131835938, "loss": 0.2319, "rewards/accuracies": 0.9375, "rewards/chosen": 2.112333059310913, "rewards/margins": 4.654182434082031, "rewards/rejected": -2.5418498516082764, "step": 250 }, { "epoch": 0.14, "learning_rate": 9.959278020746307e-07, "logits/chosen": -6.076920509338379, "logits/rejected": -6.089045524597168, "logps/chosen": -219.50653076171875, "logps/rejected": -94.47952270507812, "loss": 0.1452, "rewards/accuracies": 0.9375, "rewards/chosen": 2.3272621631622314, "rewards/margins": 4.976851940155029, "rewards/rejected": -2.6495895385742188, "step": 251 }, { "epoch": 0.14, "learning_rate": 9.958703245810472e-07, "logits/chosen": -6.169229507446289, "logits/rejected": -6.143495559692383, "logps/chosen": -340.42047119140625, "logps/rejected": -295.281982421875, "loss": 0.1139, "rewards/accuracies": 0.9375, "rewards/chosen": 2.9187779426574707, "rewards/margins": 6.05657958984375, "rewards/rejected": -3.1378018856048584, "step": 252 }, { "epoch": 0.14, "learning_rate": 9.958124459708376e-07, "logits/chosen": -6.145963668823242, "logits/rejected": -6.190906524658203, "logps/chosen": -353.74017333984375, "logps/rejected": -281.7767333984375, "loss": 0.1419, "rewards/accuracies": 0.9375, "rewards/chosen": 2.222024440765381, "rewards/margins": 5.344831466674805, "rewards/rejected": -3.122807502746582, "step": 253 }, { "epoch": 0.14, "learning_rate": 9.957541662908212e-07, "logits/chosen": -6.216705322265625, "logits/rejected": -6.146181583404541, "logps/chosen": -206.95513916015625, "logps/rejected": -164.50003051757812, "loss": 0.143, "rewards/accuracies": 0.9375, "rewards/chosen": 1.4876298904418945, "rewards/margins": 5.627899169921875, "rewards/rejected": -4.1402692794799805, "step": 254 }, { "epoch": 0.14, "learning_rate": 9.956954855881408e-07, "logits/chosen": -6.079500198364258, "logits/rejected": -6.059078216552734, "logps/chosen": -340.0668029785156, "logps/rejected": -192.9694061279297, "loss": 0.0977, "rewards/accuracies": 1.0, "rewards/chosen": 1.9233356714248657, "rewards/margins": 4.0927958488464355, "rewards/rejected": -2.1694602966308594, "step": 255 }, { "epoch": 0.14, "learning_rate": 9.95636403910264e-07, "logits/chosen": -6.065738677978516, "logits/rejected": -6.134993076324463, "logps/chosen": -244.07888793945312, "logps/rejected": -200.0607147216797, "loss": 0.1351, "rewards/accuracies": 0.9375, "rewards/chosen": 2.582949638366699, "rewards/margins": 6.4258246421813965, "rewards/rejected": -3.842874765396118, "step": 256 }, { "epoch": 0.14, "learning_rate": 9.955769213049832e-07, "logits/chosen": -6.078837871551514, "logits/rejected": -6.139305114746094, "logps/chosen": -255.54531860351562, "logps/rejected": -170.29006958007812, "loss": 0.1638, "rewards/accuracies": 0.9375, "rewards/chosen": 3.5197954177856445, "rewards/margins": 3.957782745361328, "rewards/rejected": -0.43798720836639404, "step": 257 }, { "epoch": 0.14, "learning_rate": 9.955170378204148e-07, "logits/chosen": -6.217965602874756, "logits/rejected": -6.292010307312012, "logps/chosen": -215.48184204101562, "logps/rejected": -253.30503845214844, "loss": 0.1774, "rewards/accuracies": 0.875, "rewards/chosen": 2.0325489044189453, "rewards/margins": 5.149980545043945, "rewards/rejected": -3.117432117462158, "step": 258 }, { "epoch": 0.14, "learning_rate": 9.95456753504999e-07, "logits/chosen": -6.193286418914795, "logits/rejected": -6.18550443649292, "logps/chosen": -325.5516052246094, "logps/rejected": -203.48004150390625, "loss": 0.1309, "rewards/accuracies": 1.0, "rewards/chosen": 3.132024049758911, "rewards/margins": 4.951642036437988, "rewards/rejected": -1.8196181058883667, "step": 259 }, { "epoch": 0.14, "learning_rate": 9.953960684075007e-07, "logits/chosen": -6.113389492034912, "logits/rejected": -6.1099982261657715, "logps/chosen": -321.6541748046875, "logps/rejected": -265.082763671875, "loss": 0.1487, "rewards/accuracies": 0.875, "rewards/chosen": 1.618927001953125, "rewards/margins": 4.607532501220703, "rewards/rejected": -2.988605499267578, "step": 260 }, { "epoch": 0.14, "learning_rate": 9.953349825770093e-07, "logits/chosen": -6.192946434020996, "logits/rejected": -6.242873191833496, "logps/chosen": -205.6689453125, "logps/rejected": -176.36058044433594, "loss": 0.1049, "rewards/accuracies": 0.9375, "rewards/chosen": 2.255997657775879, "rewards/margins": 5.714536666870117, "rewards/rejected": -3.4585390090942383, "step": 261 }, { "epoch": 0.15, "learning_rate": 9.952734960629375e-07, "logits/chosen": -6.093379497528076, "logits/rejected": -6.1223955154418945, "logps/chosen": -208.93276977539062, "logps/rejected": -165.42919921875, "loss": 0.1465, "rewards/accuracies": 1.0, "rewards/chosen": 2.7923197746276855, "rewards/margins": 4.5555267333984375, "rewards/rejected": -1.7632064819335938, "step": 262 }, { "epoch": 0.15, "learning_rate": 9.95211608915023e-07, "logits/chosen": -6.165628433227539, "logits/rejected": -6.244649410247803, "logps/chosen": -216.71517944335938, "logps/rejected": -221.6785430908203, "loss": 0.2128, "rewards/accuracies": 0.9375, "rewards/chosen": 1.6652982234954834, "rewards/margins": 3.745105028152466, "rewards/rejected": -2.0798068046569824, "step": 263 }, { "epoch": 0.15, "learning_rate": 9.951493211833272e-07, "logits/chosen": -6.089705467224121, "logits/rejected": -6.165596008300781, "logps/chosen": -200.30628967285156, "logps/rejected": -219.95602416992188, "loss": 0.1616, "rewards/accuracies": 1.0, "rewards/chosen": 2.1390786170959473, "rewards/margins": 6.395192623138428, "rewards/rejected": -4.2561140060424805, "step": 264 }, { "epoch": 0.15, "learning_rate": 9.950866329182354e-07, "logits/chosen": -6.173651218414307, "logits/rejected": -6.2141218185424805, "logps/chosen": -355.3276062011719, "logps/rejected": -187.78646850585938, "loss": 0.1317, "rewards/accuracies": 0.9375, "rewards/chosen": 4.410912990570068, "rewards/margins": 5.586097717285156, "rewards/rejected": -1.175184726715088, "step": 265 }, { "epoch": 0.15, "learning_rate": 9.950235441704574e-07, "logits/chosen": -6.2289886474609375, "logits/rejected": -6.246344089508057, "logps/chosen": -278.1068420410156, "logps/rejected": -185.05047607421875, "loss": 0.1719, "rewards/accuracies": 1.0, "rewards/chosen": 3.9846110343933105, "rewards/margins": 6.756996154785156, "rewards/rejected": -2.7723846435546875, "step": 266 }, { "epoch": 0.15, "learning_rate": 9.94960054991026e-07, "logits/chosen": -6.167195796966553, "logits/rejected": -6.182229042053223, "logps/chosen": -228.73260498046875, "logps/rejected": -270.97076416015625, "loss": 0.2897, "rewards/accuracies": 0.8125, "rewards/chosen": 1.441026210784912, "rewards/margins": 4.357522010803223, "rewards/rejected": -2.9164958000183105, "step": 267 }, { "epoch": 0.15, "learning_rate": 9.948961654312985e-07, "logits/chosen": -6.246692657470703, "logits/rejected": -6.236818313598633, "logps/chosen": -336.25628662109375, "logps/rejected": -211.85189819335938, "loss": 0.1871, "rewards/accuracies": 1.0, "rewards/chosen": 2.7031729221343994, "rewards/margins": 6.410870552062988, "rewards/rejected": -3.707697868347168, "step": 268 }, { "epoch": 0.15, "learning_rate": 9.948318755429567e-07, "logits/chosen": -6.187053203582764, "logits/rejected": -6.254376411437988, "logps/chosen": -213.81805419921875, "logps/rejected": -300.1053466796875, "loss": 0.2124, "rewards/accuracies": 0.875, "rewards/chosen": 0.8722965717315674, "rewards/margins": 4.549399375915527, "rewards/rejected": -3.677102565765381, "step": 269 }, { "epoch": 0.15, "learning_rate": 9.947671853780052e-07, "logits/chosen": -6.152793884277344, "logits/rejected": -6.117624759674072, "logps/chosen": -329.7068176269531, "logps/rejected": -446.9447326660156, "loss": 0.1723, "rewards/accuracies": 0.8125, "rewards/chosen": 1.5223588943481445, "rewards/margins": 4.948300361633301, "rewards/rejected": -3.4259414672851562, "step": 270 }, { "epoch": 0.15, "learning_rate": 9.94702094988773e-07, "logits/chosen": -6.222860813140869, "logits/rejected": -6.1543354988098145, "logps/chosen": -346.3696594238281, "logps/rejected": -397.0447998046875, "loss": 0.1983, "rewards/accuracies": 0.875, "rewards/chosen": 1.3044843673706055, "rewards/margins": 3.9770987033843994, "rewards/rejected": -2.672614097595215, "step": 271 }, { "epoch": 0.15, "learning_rate": 9.946366044279126e-07, "logits/chosen": -6.182933330535889, "logits/rejected": -6.199898719787598, "logps/chosen": -325.40008544921875, "logps/rejected": -176.1597900390625, "loss": 0.1461, "rewards/accuracies": 0.875, "rewards/chosen": 3.6727237701416016, "rewards/margins": 5.170888900756836, "rewards/rejected": -1.4981653690338135, "step": 272 }, { "epoch": 0.15, "learning_rate": 9.945707137484001e-07, "logits/chosen": -6.175583839416504, "logits/rejected": -6.189727783203125, "logps/chosen": -246.3599090576172, "logps/rejected": -203.7847137451172, "loss": 0.1281, "rewards/accuracies": 1.0, "rewards/chosen": 1.910176396369934, "rewards/margins": 5.649108409881592, "rewards/rejected": -3.738931894302368, "step": 273 }, { "epoch": 0.15, "learning_rate": 9.945044230035355e-07, "logits/chosen": -6.141395568847656, "logits/rejected": -6.180420875549316, "logps/chosen": -189.5386962890625, "logps/rejected": -220.40463256835938, "loss": 0.1595, "rewards/accuracies": 0.9375, "rewards/chosen": 1.3363206386566162, "rewards/margins": 5.130756378173828, "rewards/rejected": -3.794435501098633, "step": 274 }, { "epoch": 0.15, "learning_rate": 9.944377322469423e-07, "logits/chosen": -6.253963947296143, "logits/rejected": -6.178512096405029, "logps/chosen": -240.9012451171875, "logps/rejected": -225.3631591796875, "loss": 0.127, "rewards/accuracies": 0.9375, "rewards/chosen": 2.4538369178771973, "rewards/margins": 5.183237075805664, "rewards/rejected": -2.729400157928467, "step": 275 }, { "epoch": 0.15, "learning_rate": 9.943706415325676e-07, "logits/chosen": -6.1818084716796875, "logits/rejected": -6.2100348472595215, "logps/chosen": -302.7386474609375, "logps/rejected": -260.9920959472656, "loss": 0.0963, "rewards/accuracies": 0.875, "rewards/chosen": 3.1019201278686523, "rewards/margins": 6.075319766998291, "rewards/rejected": -2.9733996391296387, "step": 276 }, { "epoch": 0.15, "learning_rate": 9.943031509146822e-07, "logits/chosen": -6.163376331329346, "logits/rejected": -6.171880722045898, "logps/chosen": -166.6505126953125, "logps/rejected": -156.67369079589844, "loss": 0.194, "rewards/accuracies": 0.9375, "rewards/chosen": 1.7810962200164795, "rewards/margins": 5.447059154510498, "rewards/rejected": -3.6659631729125977, "step": 277 }, { "epoch": 0.15, "learning_rate": 9.942352604478803e-07, "logits/chosen": -6.212433815002441, "logits/rejected": -6.197973251342773, "logps/chosen": -208.39181518554688, "logps/rejected": -205.97128295898438, "loss": 0.1492, "rewards/accuracies": 0.9375, "rewards/chosen": 1.3178365230560303, "rewards/margins": 3.7019600868225098, "rewards/rejected": -2.3841235637664795, "step": 278 }, { "epoch": 0.15, "learning_rate": 9.94166970187079e-07, "logits/chosen": -6.220452308654785, "logits/rejected": -6.2785468101501465, "logps/chosen": -252.60923767089844, "logps/rejected": -298.6199951171875, "loss": 0.1813, "rewards/accuracies": 0.9375, "rewards/chosen": 2.2554807662963867, "rewards/margins": 5.739938259124756, "rewards/rejected": -3.484457492828369, "step": 279 }, { "epoch": 0.16, "learning_rate": 9.940982801875198e-07, "logits/chosen": -6.042468070983887, "logits/rejected": -6.099361419677734, "logps/chosen": -296.80645751953125, "logps/rejected": -367.822021484375, "loss": 0.1567, "rewards/accuracies": 0.9375, "rewards/chosen": 2.9257993698120117, "rewards/margins": 5.952960968017578, "rewards/rejected": -3.0271615982055664, "step": 280 }, { "epoch": 0.16, "learning_rate": 9.940291905047666e-07, "logits/chosen": -6.326336860656738, "logits/rejected": -6.154934883117676, "logps/chosen": -178.51734924316406, "logps/rejected": -143.0418243408203, "loss": 0.1631, "rewards/accuracies": 0.875, "rewards/chosen": 1.1567026376724243, "rewards/margins": 4.561386585235596, "rewards/rejected": -3.404683828353882, "step": 281 }, { "epoch": 0.16, "learning_rate": 9.939597011947074e-07, "logits/chosen": -6.165666580200195, "logits/rejected": -6.122532844543457, "logps/chosen": -303.4905090332031, "logps/rejected": -199.7312774658203, "loss": 0.1604, "rewards/accuracies": 1.0, "rewards/chosen": 3.005291223526001, "rewards/margins": 5.439754009246826, "rewards/rejected": -2.4344630241394043, "step": 282 }, { "epoch": 0.16, "learning_rate": 9.93889812313553e-07, "logits/chosen": -6.211609363555908, "logits/rejected": -6.178144931793213, "logps/chosen": -298.924072265625, "logps/rejected": -305.0313720703125, "loss": 0.1338, "rewards/accuracies": 0.9375, "rewards/chosen": 1.9172675609588623, "rewards/margins": 5.533268928527832, "rewards/rejected": -3.6160011291503906, "step": 283 }, { "epoch": 0.16, "learning_rate": 9.938195239178374e-07, "logits/chosen": -6.3352370262146, "logits/rejected": -6.263294696807861, "logps/chosen": -274.6060485839844, "logps/rejected": -211.53318786621094, "loss": 0.1058, "rewards/accuracies": 1.0, "rewards/chosen": 1.9000556468963623, "rewards/margins": 6.03302001953125, "rewards/rejected": -4.132964611053467, "step": 284 }, { "epoch": 0.16, "learning_rate": 9.93748836064418e-07, "logits/chosen": -6.197998046875, "logits/rejected": -6.246679306030273, "logps/chosen": -277.4730224609375, "logps/rejected": -306.8868103027344, "loss": 0.1055, "rewards/accuracies": 1.0, "rewards/chosen": 2.8390772342681885, "rewards/margins": 5.201803207397461, "rewards/rejected": -2.3627257347106934, "step": 285 }, { "epoch": 0.16, "learning_rate": 9.936777488104749e-07, "logits/chosen": -6.2039337158203125, "logits/rejected": -6.1549177169799805, "logps/chosen": -197.35244750976562, "logps/rejected": -128.67654418945312, "loss": 0.1413, "rewards/accuracies": 1.0, "rewards/chosen": 1.5337297916412354, "rewards/margins": 4.834748268127441, "rewards/rejected": -3.301018714904785, "step": 286 }, { "epoch": 0.16, "learning_rate": 9.93606262213512e-07, "logits/chosen": -6.1297807693481445, "logits/rejected": -6.243411540985107, "logps/chosen": -255.27955627441406, "logps/rejected": -222.48345947265625, "loss": 0.1282, "rewards/accuracies": 0.9375, "rewards/chosen": 2.56942081451416, "rewards/margins": 5.584407329559326, "rewards/rejected": -3.014986753463745, "step": 287 }, { "epoch": 0.16, "learning_rate": 9.935343763313558e-07, "logits/chosen": -6.251723289489746, "logits/rejected": -6.163512229919434, "logps/chosen": -250.07359313964844, "logps/rejected": -176.28158569335938, "loss": 0.1106, "rewards/accuracies": 0.9375, "rewards/chosen": 3.0203866958618164, "rewards/margins": 6.157003402709961, "rewards/rejected": -3.1366167068481445, "step": 288 }, { "epoch": 0.16, "learning_rate": 9.934620912221557e-07, "logits/chosen": -6.164420127868652, "logits/rejected": -6.1294050216674805, "logps/chosen": -206.1524658203125, "logps/rejected": -228.0734405517578, "loss": 0.105, "rewards/accuracies": 0.875, "rewards/chosen": 1.5826375484466553, "rewards/margins": 4.624949932098389, "rewards/rejected": -3.0423123836517334, "step": 289 }, { "epoch": 0.16, "learning_rate": 9.933894069443841e-07, "logits/chosen": -6.248263835906982, "logits/rejected": -6.161256790161133, "logps/chosen": -238.9767303466797, "logps/rejected": -203.93223571777344, "loss": 0.1417, "rewards/accuracies": 1.0, "rewards/chosen": 1.9129749536514282, "rewards/margins": 5.660186290740967, "rewards/rejected": -3.747211456298828, "step": 290 }, { "epoch": 0.16, "learning_rate": 9.933163235568366e-07, "logits/chosen": -6.0519185066223145, "logits/rejected": -6.171475410461426, "logps/chosen": -188.7727508544922, "logps/rejected": -215.62039184570312, "loss": 0.185, "rewards/accuracies": 0.9375, "rewards/chosen": 1.9843122959136963, "rewards/margins": 5.019140720367432, "rewards/rejected": -3.0348286628723145, "step": 291 }, { "epoch": 0.16, "learning_rate": 9.932428411186314e-07, "logits/chosen": -6.051318168640137, "logits/rejected": -6.130903720855713, "logps/chosen": -167.01429748535156, "logps/rejected": -271.09564208984375, "loss": 0.111, "rewards/accuracies": 0.875, "rewards/chosen": 0.7332984805107117, "rewards/margins": 4.563808441162109, "rewards/rejected": -3.830510139465332, "step": 292 }, { "epoch": 0.16, "learning_rate": 9.93168959689209e-07, "logits/chosen": -6.143916606903076, "logits/rejected": -6.157659530639648, "logps/chosen": -339.35321044921875, "logps/rejected": -268.21551513671875, "loss": 0.1778, "rewards/accuracies": 1.0, "rewards/chosen": 2.7984347343444824, "rewards/margins": 5.560793399810791, "rewards/rejected": -2.7623589038848877, "step": 293 }, { "epoch": 0.16, "learning_rate": 9.930946793283336e-07, "logits/chosen": -6.240921974182129, "logits/rejected": -6.246933937072754, "logps/chosen": -178.18624877929688, "logps/rejected": -188.342529296875, "loss": 0.1358, "rewards/accuracies": 0.9375, "rewards/chosen": 1.404895305633545, "rewards/margins": 5.336767196655273, "rewards/rejected": -3.9318721294403076, "step": 294 }, { "epoch": 0.16, "learning_rate": 9.930200000960916e-07, "logits/chosen": -6.15095329284668, "logits/rejected": -6.238350868225098, "logps/chosen": -192.8265380859375, "logps/rejected": -268.6875915527344, "loss": 0.1782, "rewards/accuracies": 0.875, "rewards/chosen": 2.251039743423462, "rewards/margins": 6.812237739562988, "rewards/rejected": -4.561197280883789, "step": 295 }, { "epoch": 0.16, "learning_rate": 9.92944922052892e-07, "logits/chosen": -6.142652988433838, "logits/rejected": -6.200563430786133, "logps/chosen": -152.8358154296875, "logps/rejected": -208.38140869140625, "loss": 0.136, "rewards/accuracies": 0.75, "rewards/chosen": 0.11459754407405853, "rewards/margins": 4.898566722869873, "rewards/rejected": -4.783968925476074, "step": 296 }, { "epoch": 0.16, "learning_rate": 9.928694452594666e-07, "logits/chosen": -6.0744948387146, "logits/rejected": -6.2503509521484375, "logps/chosen": -451.2860107421875, "logps/rejected": -277.47906494140625, "loss": 0.1466, "rewards/accuracies": 0.875, "rewards/chosen": 2.893673896789551, "rewards/margins": 5.590200424194336, "rewards/rejected": -2.696526527404785, "step": 297 }, { "epoch": 0.17, "learning_rate": 9.927935697768697e-07, "logits/chosen": -6.203526973724365, "logits/rejected": -6.284683704376221, "logps/chosen": -304.3150634765625, "logps/rejected": -161.55844116210938, "loss": 0.1525, "rewards/accuracies": 0.9375, "rewards/chosen": 3.4312076568603516, "rewards/margins": 6.102359294891357, "rewards/rejected": -2.671151638031006, "step": 298 }, { "epoch": 0.17, "learning_rate": 9.92717295666478e-07, "logits/chosen": -6.191102981567383, "logits/rejected": -6.155085563659668, "logps/chosen": -203.13613891601562, "logps/rejected": -228.85498046875, "loss": 0.1553, "rewards/accuracies": 0.8125, "rewards/chosen": 1.739060878753662, "rewards/margins": 4.216375827789307, "rewards/rejected": -2.4773149490356445, "step": 299 }, { "epoch": 0.17, "learning_rate": 9.926406229899907e-07, "logits/chosen": -6.1327619552612305, "logits/rejected": -6.156465530395508, "logps/chosen": -279.6263732910156, "logps/rejected": -180.8185272216797, "loss": 0.1438, "rewards/accuracies": 0.9375, "rewards/chosen": 2.5171968936920166, "rewards/margins": 5.713810920715332, "rewards/rejected": -3.1966142654418945, "step": 300 }, { "epoch": 0.17, "learning_rate": 9.925635518094294e-07, "logits/chosen": -6.070094108581543, "logits/rejected": -6.084531307220459, "logps/chosen": -213.0826416015625, "logps/rejected": -195.67526245117188, "loss": 0.1978, "rewards/accuracies": 0.9375, "rewards/chosen": 2.7295870780944824, "rewards/margins": 6.3753156661987305, "rewards/rejected": -3.645728349685669, "step": 301 }, { "epoch": 0.17, "learning_rate": 9.924860821871379e-07, "logits/chosen": -6.165408611297607, "logits/rejected": -6.248081207275391, "logps/chosen": -269.83978271484375, "logps/rejected": -279.12396240234375, "loss": 0.1605, "rewards/accuracies": 1.0, "rewards/chosen": 3.062493324279785, "rewards/margins": 8.11104679107666, "rewards/rejected": -5.048553466796875, "step": 302 }, { "epoch": 0.17, "learning_rate": 9.92408214185783e-07, "logits/chosen": -6.2567291259765625, "logits/rejected": -6.162806034088135, "logps/chosen": -267.15667724609375, "logps/rejected": -197.0996856689453, "loss": 0.133, "rewards/accuracies": 0.875, "rewards/chosen": 1.1603795289993286, "rewards/margins": 4.673677444458008, "rewards/rejected": -3.5132980346679688, "step": 303 }, { "epoch": 0.17, "learning_rate": 9.923299478683529e-07, "logits/chosen": -6.191572189331055, "logits/rejected": -6.215318202972412, "logps/chosen": -252.22569274902344, "logps/rejected": -211.60684204101562, "loss": 0.0991, "rewards/accuracies": 1.0, "rewards/chosen": 2.832836389541626, "rewards/margins": 7.048031806945801, "rewards/rejected": -4.215195655822754, "step": 304 }, { "epoch": 0.17, "learning_rate": 9.922512832981584e-07, "logits/chosen": -6.239014148712158, "logits/rejected": -6.148369312286377, "logps/chosen": -345.5250549316406, "logps/rejected": -180.20751953125, "loss": 0.139, "rewards/accuracies": 0.9375, "rewards/chosen": 3.9115092754364014, "rewards/margins": 6.142221450805664, "rewards/rejected": -2.2307119369506836, "step": 305 }, { "epoch": 0.17, "learning_rate": 9.921722205388324e-07, "logits/chosen": -6.072641849517822, "logits/rejected": -6.0833282470703125, "logps/chosen": -348.01629638671875, "logps/rejected": -147.49154663085938, "loss": 0.0821, "rewards/accuracies": 1.0, "rewards/chosen": 1.7265243530273438, "rewards/margins": 4.7500996589660645, "rewards/rejected": -3.0235753059387207, "step": 306 }, { "epoch": 0.17, "learning_rate": 9.920927596543299e-07, "logits/chosen": -6.154444217681885, "logits/rejected": -6.235158920288086, "logps/chosen": -368.36962890625, "logps/rejected": -206.67611694335938, "loss": 0.1708, "rewards/accuracies": 1.0, "rewards/chosen": 4.486504554748535, "rewards/margins": 7.931435585021973, "rewards/rejected": -3.4449307918548584, "step": 307 }, { "epoch": 0.17, "learning_rate": 9.920129007089278e-07, "logits/chosen": -6.185452461242676, "logits/rejected": -6.22297477722168, "logps/chosen": -280.48004150390625, "logps/rejected": -216.99185180664062, "loss": 0.0792, "rewards/accuracies": 1.0, "rewards/chosen": 2.576580286026001, "rewards/margins": 6.578587055206299, "rewards/rejected": -4.002006530761719, "step": 308 }, { "epoch": 0.17, "learning_rate": 9.919326437672254e-07, "logits/chosen": -6.09520959854126, "logits/rejected": -6.220224857330322, "logps/chosen": -168.41275024414062, "logps/rejected": -264.30218505859375, "loss": 0.1457, "rewards/accuracies": 0.9375, "rewards/chosen": 1.587723970413208, "rewards/margins": 5.796626091003418, "rewards/rejected": -4.208902359008789, "step": 309 }, { "epoch": 0.17, "learning_rate": 9.91851988894144e-07, "logits/chosen": -6.134993553161621, "logits/rejected": -6.188841342926025, "logps/chosen": -262.6785888671875, "logps/rejected": -231.90740966796875, "loss": 0.1417, "rewards/accuracies": 1.0, "rewards/chosen": 2.945014476776123, "rewards/margins": 6.882830619812012, "rewards/rejected": -3.9378161430358887, "step": 310 }, { "epoch": 0.17, "learning_rate": 9.917709361549255e-07, "logits/chosen": -6.0362420082092285, "logits/rejected": -6.142971992492676, "logps/chosen": -255.04782104492188, "logps/rejected": -218.67169189453125, "loss": 0.0874, "rewards/accuracies": 1.0, "rewards/chosen": 3.76059627532959, "rewards/margins": 6.922657012939453, "rewards/rejected": -3.1620609760284424, "step": 311 }, { "epoch": 0.17, "learning_rate": 9.916894856151355e-07, "logits/chosen": -6.138720989227295, "logits/rejected": -6.173671722412109, "logps/chosen": -302.2479553222656, "logps/rejected": -343.3147888183594, "loss": 0.1532, "rewards/accuracies": 0.9375, "rewards/chosen": 1.328683853149414, "rewards/margins": 5.727692127227783, "rewards/rejected": -4.399008274078369, "step": 312 }, { "epoch": 0.17, "learning_rate": 9.916076373406604e-07, "logits/chosen": -6.076006889343262, "logits/rejected": -6.158059120178223, "logps/chosen": -200.1094970703125, "logps/rejected": -119.41806030273438, "loss": 0.1527, "rewards/accuracies": 0.875, "rewards/chosen": 1.6442950963974, "rewards/margins": 3.425387144088745, "rewards/rejected": -1.7810920476913452, "step": 313 }, { "epoch": 0.17, "learning_rate": 9.91525391397708e-07, "logits/chosen": -6.153073310852051, "logits/rejected": -6.198222637176514, "logps/chosen": -413.26605224609375, "logps/rejected": -375.39483642578125, "loss": 0.162, "rewards/accuracies": 0.75, "rewards/chosen": 2.3288750648498535, "rewards/margins": 4.669673442840576, "rewards/rejected": -2.3407981395721436, "step": 314 }, { "epoch": 0.17, "learning_rate": 9.914427478528085e-07, "logits/chosen": -6.191381454467773, "logits/rejected": -6.186038970947266, "logps/chosen": -245.70826721191406, "logps/rejected": -236.78973388671875, "loss": 0.1359, "rewards/accuracies": 1.0, "rewards/chosen": 1.4580602645874023, "rewards/margins": 4.828459739685059, "rewards/rejected": -3.3703997135162354, "step": 315 }, { "epoch": 0.18, "learning_rate": 9.913597067728135e-07, "logits/chosen": -6.214071750640869, "logits/rejected": -6.210634231567383, "logps/chosen": -204.2682647705078, "logps/rejected": -157.1708526611328, "loss": 0.1197, "rewards/accuracies": 0.9375, "rewards/chosen": 2.422504425048828, "rewards/margins": 5.270514965057373, "rewards/rejected": -2.848010778427124, "step": 316 }, { "epoch": 0.18, "learning_rate": 9.912762682248962e-07, "logits/chosen": -6.143237113952637, "logits/rejected": -6.118226051330566, "logps/chosen": -330.7015380859375, "logps/rejected": -399.0765686035156, "loss": 0.1721, "rewards/accuracies": 0.9375, "rewards/chosen": 2.639918088912964, "rewards/margins": 6.639889240264893, "rewards/rejected": -3.9999709129333496, "step": 317 }, { "epoch": 0.18, "learning_rate": 9.911924322765508e-07, "logits/chosen": -6.119570255279541, "logits/rejected": -6.156144618988037, "logps/chosen": -175.75840759277344, "logps/rejected": -222.65528869628906, "loss": 0.1129, "rewards/accuracies": 1.0, "rewards/chosen": 1.145658254623413, "rewards/margins": 5.772303581237793, "rewards/rejected": -4.626645088195801, "step": 318 }, { "epoch": 0.18, "learning_rate": 9.911081989955939e-07, "logits/chosen": -6.236217975616455, "logits/rejected": -6.256204605102539, "logps/chosen": -183.06607055664062, "logps/rejected": -222.81591796875, "loss": 0.1406, "rewards/accuracies": 0.9375, "rewards/chosen": 1.5467164516448975, "rewards/margins": 5.823591232299805, "rewards/rejected": -4.2768754959106445, "step": 319 }, { "epoch": 0.18, "learning_rate": 9.910235684501627e-07, "logits/chosen": -6.144071102142334, "logits/rejected": -6.235483169555664, "logps/chosen": -275.0068664550781, "logps/rejected": -164.8978729248047, "loss": 0.1515, "rewards/accuracies": 0.9375, "rewards/chosen": 3.776179790496826, "rewards/margins": 6.160484313964844, "rewards/rejected": -2.384305000305176, "step": 320 }, { "epoch": 0.18, "learning_rate": 9.909385407087162e-07, "logits/chosen": -6.188652038574219, "logits/rejected": -6.171665668487549, "logps/chosen": -274.30792236328125, "logps/rejected": -250.94309997558594, "loss": 0.1871, "rewards/accuracies": 0.9375, "rewards/chosen": 2.4222970008850098, "rewards/margins": 5.108797550201416, "rewards/rejected": -2.6865005493164062, "step": 321 }, { "epoch": 0.18, "learning_rate": 9.908531158400345e-07, "logits/chosen": -6.1148481369018555, "logits/rejected": -6.112892150878906, "logps/chosen": -231.732421875, "logps/rejected": -245.60003662109375, "loss": 0.1226, "rewards/accuracies": 0.9375, "rewards/chosen": 1.5911474227905273, "rewards/margins": 4.738527774810791, "rewards/rejected": -3.1473805904388428, "step": 322 }, { "epoch": 0.18, "learning_rate": 9.907672939132194e-07, "logits/chosen": -6.184881210327148, "logits/rejected": -6.212708950042725, "logps/chosen": -283.446533203125, "logps/rejected": -177.12655639648438, "loss": 0.1339, "rewards/accuracies": 1.0, "rewards/chosen": 2.2140870094299316, "rewards/margins": 5.81157112121582, "rewards/rejected": -3.5974843502044678, "step": 323 }, { "epoch": 0.18, "learning_rate": 9.906810749976929e-07, "logits/chosen": -6.170691013336182, "logits/rejected": -6.147339344024658, "logps/chosen": -249.25222778320312, "logps/rejected": -120.1686019897461, "loss": 0.1799, "rewards/accuracies": 0.9375, "rewards/chosen": 1.6223722696304321, "rewards/margins": 5.189864158630371, "rewards/rejected": -3.5674917697906494, "step": 324 }, { "epoch": 0.18, "learning_rate": 9.90594459163199e-07, "logits/chosen": -6.229015827178955, "logits/rejected": -6.157158374786377, "logps/chosen": -436.25701904296875, "logps/rejected": -230.99603271484375, "loss": 0.1172, "rewards/accuracies": 0.875, "rewards/chosen": 1.5249817371368408, "rewards/margins": 4.621013164520264, "rewards/rejected": -3.096031427383423, "step": 325 }, { "epoch": 0.18, "learning_rate": 9.905074464798022e-07, "logits/chosen": -6.172946929931641, "logits/rejected": -6.158473968505859, "logps/chosen": -314.9814758300781, "logps/rejected": -143.3639678955078, "loss": 0.1222, "rewards/accuracies": 1.0, "rewards/chosen": 4.560540676116943, "rewards/margins": 6.485367774963379, "rewards/rejected": -1.924826741218567, "step": 326 }, { "epoch": 0.18, "learning_rate": 9.904200370178887e-07, "logits/chosen": -6.158792495727539, "logits/rejected": -6.184123992919922, "logps/chosen": -285.7272033691406, "logps/rejected": -243.6670379638672, "loss": 0.1708, "rewards/accuracies": 1.0, "rewards/chosen": 2.730398178100586, "rewards/margins": 7.398548126220703, "rewards/rejected": -4.668150424957275, "step": 327 }, { "epoch": 0.18, "learning_rate": 9.90332230848165e-07, "logits/chosen": -6.27269172668457, "logits/rejected": -6.225458145141602, "logps/chosen": -268.75140380859375, "logps/rejected": -235.08123779296875, "loss": 0.0994, "rewards/accuracies": 0.9375, "rewards/chosen": 2.987882137298584, "rewards/margins": 6.394829750061035, "rewards/rejected": -3.406947612762451, "step": 328 }, { "epoch": 0.18, "learning_rate": 9.902440280416593e-07, "logits/chosen": -6.180708885192871, "logits/rejected": -6.118640899658203, "logps/chosen": -343.80621337890625, "logps/rejected": -204.38485717773438, "loss": 0.1116, "rewards/accuracies": 0.9375, "rewards/chosen": 3.4597928524017334, "rewards/margins": 7.086215019226074, "rewards/rejected": -3.626422166824341, "step": 329 }, { "epoch": 0.18, "learning_rate": 9.901554286697193e-07, "logits/chosen": -6.175800323486328, "logits/rejected": -6.17018985748291, "logps/chosen": -159.21788024902344, "logps/rejected": -133.48638916015625, "loss": 0.1336, "rewards/accuracies": 0.875, "rewards/chosen": 1.3037822246551514, "rewards/margins": 3.5956759452819824, "rewards/rejected": -2.291893482208252, "step": 330 }, { "epoch": 0.18, "learning_rate": 9.90066432804015e-07, "logits/chosen": -6.175915718078613, "logits/rejected": -6.170278549194336, "logps/chosen": -264.28546142578125, "logps/rejected": -197.55126953125, "loss": 0.1595, "rewards/accuracies": 0.9375, "rewards/chosen": 4.046449661254883, "rewards/margins": 6.746700286865234, "rewards/rejected": -2.7002503871917725, "step": 331 }, { "epoch": 0.18, "learning_rate": 9.89977040516536e-07, "logits/chosen": -6.180754661560059, "logits/rejected": -6.240391731262207, "logps/chosen": -285.0645751953125, "logps/rejected": -151.4569091796875, "loss": 0.2064, "rewards/accuracies": 0.9375, "rewards/chosen": 2.716646194458008, "rewards/margins": 6.903552055358887, "rewards/rejected": -4.186905860900879, "step": 332 }, { "epoch": 0.18, "learning_rate": 9.898872518795932e-07, "logits/chosen": -6.256319522857666, "logits/rejected": -6.218266010284424, "logps/chosen": -336.7333984375, "logps/rejected": -201.0228729248047, "loss": 0.1127, "rewards/accuracies": 0.9375, "rewards/chosen": 3.272026300430298, "rewards/margins": 5.562191486358643, "rewards/rejected": -2.290165424346924, "step": 333 }, { "epoch": 0.19, "learning_rate": 9.897970669658179e-07, "logits/chosen": -6.050084114074707, "logits/rejected": -6.133731842041016, "logps/chosen": -268.3016357421875, "logps/rejected": -218.64317321777344, "loss": 0.1548, "rewards/accuracies": 1.0, "rewards/chosen": 2.5543999671936035, "rewards/margins": 6.609291076660156, "rewards/rejected": -4.054891586303711, "step": 334 }, { "epoch": 0.19, "learning_rate": 9.897064858481616e-07, "logits/chosen": -6.09243106842041, "logits/rejected": -6.148199558258057, "logps/chosen": -221.58950805664062, "logps/rejected": -298.24639892578125, "loss": 0.1664, "rewards/accuracies": 1.0, "rewards/chosen": 1.350205421447754, "rewards/margins": 8.537644386291504, "rewards/rejected": -7.18743896484375, "step": 335 }, { "epoch": 0.19, "learning_rate": 9.896155085998975e-07, "logits/chosen": -6.128321647644043, "logits/rejected": -6.217351913452148, "logps/chosen": -267.5182189941406, "logps/rejected": -226.7987823486328, "loss": 0.1289, "rewards/accuracies": 0.9375, "rewards/chosen": 3.8164560794830322, "rewards/margins": 5.703151226043701, "rewards/rejected": -1.886695146560669, "step": 336 }, { "epoch": 0.19, "learning_rate": 9.895241352946175e-07, "logits/chosen": -6.22696590423584, "logits/rejected": -6.0973334312438965, "logps/chosen": -306.5613708496094, "logps/rejected": -197.9162139892578, "loss": 0.1159, "rewards/accuracies": 1.0, "rewards/chosen": 2.6710562705993652, "rewards/margins": 7.401001930236816, "rewards/rejected": -4.729945659637451, "step": 337 }, { "epoch": 0.19, "learning_rate": 9.894323660062353e-07, "logits/chosen": -6.104724884033203, "logits/rejected": -6.115644931793213, "logps/chosen": -387.83734130859375, "logps/rejected": -292.32244873046875, "loss": 0.1045, "rewards/accuracies": 1.0, "rewards/chosen": 2.794374942779541, "rewards/margins": 5.123919486999512, "rewards/rejected": -2.3295445442199707, "step": 338 }, { "epoch": 0.19, "learning_rate": 9.893402008089846e-07, "logits/chosen": -6.100727081298828, "logits/rejected": -6.157661437988281, "logps/chosen": -254.75521850585938, "logps/rejected": -241.91702270507812, "loss": 0.2154, "rewards/accuracies": 1.0, "rewards/chosen": 2.358642101287842, "rewards/margins": 7.648621559143066, "rewards/rejected": -5.289979457855225, "step": 339 }, { "epoch": 0.19, "learning_rate": 9.892476397774186e-07, "logits/chosen": -6.202640533447266, "logits/rejected": -6.170502185821533, "logps/chosen": -247.23919677734375, "logps/rejected": -136.15109252929688, "loss": 0.1479, "rewards/accuracies": 1.0, "rewards/chosen": 2.364410638809204, "rewards/margins": 5.043046474456787, "rewards/rejected": -2.678636074066162, "step": 340 }, { "epoch": 0.19, "learning_rate": 9.891546829864115e-07, "logits/chosen": -6.097866535186768, "logits/rejected": -6.1335554122924805, "logps/chosen": -351.26324462890625, "logps/rejected": -191.20681762695312, "loss": 0.1144, "rewards/accuracies": 1.0, "rewards/chosen": 3.5499215126037598, "rewards/margins": 6.260181427001953, "rewards/rejected": -2.7102601528167725, "step": 341 }, { "epoch": 0.19, "learning_rate": 9.890613305111572e-07, "logits/chosen": -6.063937187194824, "logits/rejected": -6.148399829864502, "logps/chosen": -261.4443054199219, "logps/rejected": -236.0340118408203, "loss": 0.1377, "rewards/accuracies": 1.0, "rewards/chosen": 2.0941386222839355, "rewards/margins": 4.719989776611328, "rewards/rejected": -2.6258511543273926, "step": 342 }, { "epoch": 0.19, "learning_rate": 9.8896758242717e-07, "logits/chosen": -6.097537994384766, "logits/rejected": -6.157896518707275, "logps/chosen": -215.0029296875, "logps/rejected": -253.79310607910156, "loss": 0.1252, "rewards/accuracies": 0.9375, "rewards/chosen": 2.8648476600646973, "rewards/margins": 8.443144798278809, "rewards/rejected": -5.5782976150512695, "step": 343 }, { "epoch": 0.19, "learning_rate": 9.888734388102846e-07, "logits/chosen": -6.105420112609863, "logits/rejected": -6.122292518615723, "logps/chosen": -262.09521484375, "logps/rejected": -199.51007080078125, "loss": 0.1788, "rewards/accuracies": 0.875, "rewards/chosen": 1.447614312171936, "rewards/margins": 4.466113090515137, "rewards/rejected": -3.0184988975524902, "step": 344 }, { "epoch": 0.19, "learning_rate": 9.887788997366541e-07, "logits/chosen": -6.078027725219727, "logits/rejected": -6.111606597900391, "logps/chosen": -291.9843444824219, "logps/rejected": -257.94873046875, "loss": 0.1941, "rewards/accuracies": 1.0, "rewards/chosen": 2.966740608215332, "rewards/margins": 5.797212600708008, "rewards/rejected": -2.830471992492676, "step": 345 }, { "epoch": 0.19, "learning_rate": 9.886839652827533e-07, "logits/chosen": -6.199707984924316, "logits/rejected": -6.159271717071533, "logps/chosen": -347.41021728515625, "logps/rejected": -225.67291259765625, "loss": 0.2014, "rewards/accuracies": 0.875, "rewards/chosen": 3.0070345401763916, "rewards/margins": 6.473329067230225, "rewards/rejected": -3.466294288635254, "step": 346 }, { "epoch": 0.19, "learning_rate": 9.885886355253757e-07, "logits/chosen": -6.149809837341309, "logits/rejected": -6.197521686553955, "logps/chosen": -288.85479736328125, "logps/rejected": -207.00457763671875, "loss": 0.1225, "rewards/accuracies": 0.875, "rewards/chosen": 3.616553544998169, "rewards/margins": 6.775841236114502, "rewards/rejected": -3.159287691116333, "step": 347 }, { "epoch": 0.19, "learning_rate": 9.88492910541635e-07, "logits/chosen": -6.228297233581543, "logits/rejected": -6.097426891326904, "logps/chosen": -453.7963562011719, "logps/rejected": -334.21954345703125, "loss": 0.1054, "rewards/accuracies": 0.9375, "rewards/chosen": 2.3100807666778564, "rewards/margins": 6.189229965209961, "rewards/rejected": -3.8791496753692627, "step": 348 }, { "epoch": 0.19, "learning_rate": 9.883967904089646e-07, "logits/chosen": -6.177059173583984, "logits/rejected": -6.134081840515137, "logps/chosen": -220.4542999267578, "logps/rejected": -152.80435180664062, "loss": 0.1796, "rewards/accuracies": 0.875, "rewards/chosen": 1.3668158054351807, "rewards/margins": 4.93735408782959, "rewards/rejected": -3.5705385208129883, "step": 349 }, { "epoch": 0.19, "learning_rate": 9.883002752051173e-07, "logits/chosen": -6.231980800628662, "logits/rejected": -6.157042503356934, "logps/chosen": -233.4925994873047, "logps/rejected": -211.56158447265625, "loss": 0.1789, "rewards/accuracies": 0.9375, "rewards/chosen": 1.662095308303833, "rewards/margins": 5.730605602264404, "rewards/rejected": -4.06851053237915, "step": 350 }, { "epoch": 0.19, "learning_rate": 9.882033650081656e-07, "logits/chosen": -6.127355575561523, "logits/rejected": -6.150257587432861, "logps/chosen": -269.3143310546875, "logps/rejected": -151.528076171875, "loss": 0.1623, "rewards/accuracies": 0.9375, "rewards/chosen": 2.6398868560791016, "rewards/margins": 6.06907844543457, "rewards/rejected": -3.4291915893554688, "step": 351 }, { "epoch": 0.2, "learning_rate": 9.881060598965015e-07, "logits/chosen": -6.1489176750183105, "logits/rejected": -6.095128536224365, "logps/chosen": -191.4779052734375, "logps/rejected": -286.38458251953125, "loss": 0.1663, "rewards/accuracies": 0.8125, "rewards/chosen": 1.471542239189148, "rewards/margins": 5.241286277770996, "rewards/rejected": -3.7697441577911377, "step": 352 }, { "epoch": 0.2, "learning_rate": 9.880083599488368e-07, "logits/chosen": -6.153332710266113, "logits/rejected": -6.1143693923950195, "logps/chosen": -289.9466857910156, "logps/rejected": -210.99786376953125, "loss": 0.1432, "rewards/accuracies": 1.0, "rewards/chosen": 3.5386862754821777, "rewards/margins": 6.412633895874023, "rewards/rejected": -2.873948097229004, "step": 353 }, { "epoch": 0.2, "learning_rate": 9.879102652442023e-07, "logits/chosen": -6.1821088790893555, "logits/rejected": -6.218867301940918, "logps/chosen": -185.91720581054688, "logps/rejected": -118.94656372070312, "loss": 0.1253, "rewards/accuracies": 1.0, "rewards/chosen": 1.2982150316238403, "rewards/margins": 4.303973197937012, "rewards/rejected": -3.0057578086853027, "step": 354 }, { "epoch": 0.2, "learning_rate": 9.878117758619483e-07, "logits/chosen": -6.136451244354248, "logits/rejected": -6.229123115539551, "logps/chosen": -230.845703125, "logps/rejected": -209.46591186523438, "loss": 0.1926, "rewards/accuracies": 0.9375, "rewards/chosen": 2.456355094909668, "rewards/margins": 7.319372177124023, "rewards/rejected": -4.8630170822143555, "step": 355 }, { "epoch": 0.2, "learning_rate": 9.877128918817437e-07, "logits/chosen": -6.064380168914795, "logits/rejected": -6.09105920791626, "logps/chosen": -240.41073608398438, "logps/rejected": -229.10316467285156, "loss": 0.1568, "rewards/accuracies": 0.9375, "rewards/chosen": 1.9153834581375122, "rewards/margins": 6.304159164428711, "rewards/rejected": -4.388775825500488, "step": 356 }, { "epoch": 0.2, "learning_rate": 9.876136133835778e-07, "logits/chosen": -6.1496992111206055, "logits/rejected": -6.164925575256348, "logps/chosen": -203.94215393066406, "logps/rejected": -124.49942779541016, "loss": 0.2602, "rewards/accuracies": 0.875, "rewards/chosen": 2.4654383659362793, "rewards/margins": 4.666162490844727, "rewards/rejected": -2.200723886489868, "step": 357 }, { "epoch": 0.2, "learning_rate": 9.875139404477582e-07, "logits/chosen": -6.161808967590332, "logits/rejected": -6.1486310958862305, "logps/chosen": -266.5404357910156, "logps/rejected": -237.3206329345703, "loss": 0.1194, "rewards/accuracies": 0.9375, "rewards/chosen": 3.052042245864868, "rewards/margins": 6.842716693878174, "rewards/rejected": -3.7906744480133057, "step": 358 }, { "epoch": 0.2, "learning_rate": 9.874138731549118e-07, "logits/chosen": -6.141172409057617, "logits/rejected": -6.132399082183838, "logps/chosen": -317.3544616699219, "logps/rejected": -209.0914306640625, "loss": 0.1519, "rewards/accuracies": 1.0, "rewards/chosen": 3.2616019248962402, "rewards/margins": 7.654623031616211, "rewards/rejected": -4.393021106719971, "step": 359 }, { "epoch": 0.2, "learning_rate": 9.873134115859845e-07, "logits/chosen": -6.044583797454834, "logits/rejected": -6.127551078796387, "logps/chosen": -324.7784423828125, "logps/rejected": -243.59197998046875, "loss": 0.1205, "rewards/accuracies": 0.9375, "rewards/chosen": 3.03710675239563, "rewards/margins": 6.4871320724487305, "rewards/rejected": -3.4500253200531006, "step": 360 }, { "epoch": 0.2, "learning_rate": 9.872125558222409e-07, "logits/chosen": -6.106557846069336, "logits/rejected": -6.086084365844727, "logps/chosen": -276.782470703125, "logps/rejected": -215.17715454101562, "loss": 0.1101, "rewards/accuracies": 1.0, "rewards/chosen": 3.7081339359283447, "rewards/margins": 7.169328689575195, "rewards/rejected": -3.4611949920654297, "step": 361 }, { "epoch": 0.2, "learning_rate": 9.87111305945265e-07, "logits/chosen": -6.114460468292236, "logits/rejected": -6.048722267150879, "logps/chosen": -309.2605285644531, "logps/rejected": -133.6256103515625, "loss": 0.1208, "rewards/accuracies": 0.875, "rewards/chosen": 1.741492748260498, "rewards/margins": 3.509610176086426, "rewards/rejected": -1.7681171894073486, "step": 362 }, { "epoch": 0.2, "learning_rate": 9.870096620369587e-07, "logits/chosen": -6.057194232940674, "logits/rejected": -6.134325981140137, "logps/chosen": -333.2855224609375, "logps/rejected": -296.2319030761719, "loss": 0.2201, "rewards/accuracies": 0.875, "rewards/chosen": 1.8883016109466553, "rewards/margins": 4.668437957763672, "rewards/rejected": -2.7801365852355957, "step": 363 }, { "epoch": 0.2, "learning_rate": 9.86907624179544e-07, "logits/chosen": -6.111608982086182, "logits/rejected": -6.167049884796143, "logps/chosen": -403.6197814941406, "logps/rejected": -128.21307373046875, "loss": 0.1106, "rewards/accuracies": 1.0, "rewards/chosen": 2.6580944061279297, "rewards/margins": 4.1634202003479, "rewards/rejected": -1.5053257942199707, "step": 364 }, { "epoch": 0.2, "learning_rate": 9.868051924555602e-07, "logits/chosen": -6.128413200378418, "logits/rejected": -6.1406989097595215, "logps/chosen": -264.5427551269531, "logps/rejected": -192.12060546875, "loss": 0.1447, "rewards/accuracies": 0.8125, "rewards/chosen": 1.6935884952545166, "rewards/margins": 4.886353969573975, "rewards/rejected": -3.192765712738037, "step": 365 }, { "epoch": 0.2, "learning_rate": 9.867023669478662e-07, "logits/chosen": -6.234226703643799, "logits/rejected": -6.246587753295898, "logps/chosen": -328.37060546875, "logps/rejected": -198.08055114746094, "loss": 0.1137, "rewards/accuracies": 0.9375, "rewards/chosen": 3.149117946624756, "rewards/margins": 6.339272975921631, "rewards/rejected": -3.190155029296875, "step": 366 }, { "epoch": 0.2, "learning_rate": 9.865991477396387e-07, "logits/chosen": -6.290513038635254, "logits/rejected": -6.262100696563721, "logps/chosen": -165.62442016601562, "logps/rejected": -167.78265380859375, "loss": 0.1113, "rewards/accuracies": 0.875, "rewards/chosen": 1.1474123001098633, "rewards/margins": 5.3448967933654785, "rewards/rejected": -4.197484493255615, "step": 367 }, { "epoch": 0.2, "learning_rate": 9.864955349143732e-07, "logits/chosen": -6.131610870361328, "logits/rejected": -6.177234172821045, "logps/chosen": -306.1328125, "logps/rejected": -252.8646240234375, "loss": 0.1254, "rewards/accuracies": 0.875, "rewards/chosen": 3.557067632675171, "rewards/margins": 7.613417148590088, "rewards/rejected": -4.056349754333496, "step": 368 }, { "epoch": 0.2, "learning_rate": 9.86391528555884e-07, "logits/chosen": -6.146757125854492, "logits/rejected": -6.130558967590332, "logps/chosen": -351.05352783203125, "logps/rejected": -180.17843627929688, "loss": 0.1499, "rewards/accuracies": 0.9375, "rewards/chosen": 3.43133807182312, "rewards/margins": 5.406378269195557, "rewards/rejected": -1.9750399589538574, "step": 369 }, { "epoch": 0.21, "learning_rate": 9.86287128748303e-07, "logits/chosen": -6.1381378173828125, "logits/rejected": -6.120584011077881, "logps/chosen": -407.4199523925781, "logps/rejected": -305.9720764160156, "loss": 0.1304, "rewards/accuracies": 0.9375, "rewards/chosen": 1.9250730276107788, "rewards/margins": 6.421322822570801, "rewards/rejected": -4.496250152587891, "step": 370 }, { "epoch": 0.21, "learning_rate": 9.861823355760806e-07, "logits/chosen": -6.1416778564453125, "logits/rejected": -6.178136825561523, "logps/chosen": -265.4732360839844, "logps/rejected": -286.2342224121094, "loss": 0.1727, "rewards/accuracies": 0.9375, "rewards/chosen": 2.107776165008545, "rewards/margins": 5.3927001953125, "rewards/rejected": -3.284924030303955, "step": 371 }, { "epoch": 0.21, "learning_rate": 9.860771491239857e-07, "logits/chosen": -6.139047622680664, "logits/rejected": -6.201632499694824, "logps/chosen": -339.7377014160156, "logps/rejected": -389.6783447265625, "loss": 0.1225, "rewards/accuracies": 0.9375, "rewards/chosen": 1.6839358806610107, "rewards/margins": 6.774288177490234, "rewards/rejected": -5.0903520584106445, "step": 372 }, { "epoch": 0.21, "learning_rate": 9.859715694771053e-07, "logits/chosen": -6.154051780700684, "logits/rejected": -6.162543296813965, "logps/chosen": -203.682373046875, "logps/rejected": -186.7019500732422, "loss": 0.1043, "rewards/accuracies": 1.0, "rewards/chosen": 1.1038825511932373, "rewards/margins": 7.0315327644348145, "rewards/rejected": -5.927650451660156, "step": 373 }, { "epoch": 0.21, "learning_rate": 9.858655967208438e-07, "logits/chosen": -6.129390716552734, "logits/rejected": -6.196495056152344, "logps/chosen": -277.2786865234375, "logps/rejected": -187.05203247070312, "loss": 0.0844, "rewards/accuracies": 1.0, "rewards/chosen": 2.743068218231201, "rewards/margins": 6.030062675476074, "rewards/rejected": -3.286994695663452, "step": 374 }, { "epoch": 0.21, "learning_rate": 9.857592309409247e-07, "logits/chosen": -6.026375770568848, "logits/rejected": -6.139601707458496, "logps/chosen": -548.7781982421875, "logps/rejected": -352.73675537109375, "loss": 0.1504, "rewards/accuracies": 0.875, "rewards/chosen": 0.9910093545913696, "rewards/margins": 5.3285064697265625, "rewards/rejected": -4.337497234344482, "step": 375 }, { "epoch": 0.21, "learning_rate": 9.856524722233881e-07, "logits/chosen": -6.076659202575684, "logits/rejected": -6.156828880310059, "logps/chosen": -277.9828796386719, "logps/rejected": -173.9156036376953, "loss": 0.1467, "rewards/accuracies": 0.875, "rewards/chosen": 1.9352953433990479, "rewards/margins": 5.501072406768799, "rewards/rejected": -3.565777063369751, "step": 376 }, { "epoch": 0.21, "learning_rate": 9.855453206545934e-07, "logits/chosen": -6.171263217926025, "logits/rejected": -6.196284770965576, "logps/chosen": -317.1729736328125, "logps/rejected": -216.98519897460938, "loss": 0.1198, "rewards/accuracies": 1.0, "rewards/chosen": 2.1968226432800293, "rewards/margins": 8.130172729492188, "rewards/rejected": -5.933349609375, "step": 377 }, { "epoch": 0.21, "learning_rate": 9.854377763212164e-07, "logits/chosen": -6.154395580291748, "logits/rejected": -6.087129592895508, "logps/chosen": -217.08969116210938, "logps/rejected": -182.21234130859375, "loss": 0.1443, "rewards/accuracies": 0.9375, "rewards/chosen": 2.662783145904541, "rewards/margins": 5.500368118286133, "rewards/rejected": -2.8375844955444336, "step": 378 }, { "epoch": 0.21, "learning_rate": 9.85329839310252e-07, "logits/chosen": -6.142321586608887, "logits/rejected": -6.182257652282715, "logps/chosen": -223.13059997558594, "logps/rejected": -239.03103637695312, "loss": 0.1434, "rewards/accuracies": 1.0, "rewards/chosen": 1.6251578330993652, "rewards/margins": 6.262110710144043, "rewards/rejected": -4.636953353881836, "step": 379 }, { "epoch": 0.21, "learning_rate": 9.85221509709011e-07, "logits/chosen": -6.116193771362305, "logits/rejected": -6.163656711578369, "logps/chosen": -232.28407287597656, "logps/rejected": -138.45436096191406, "loss": 0.1929, "rewards/accuracies": 0.8125, "rewards/chosen": 1.3604464530944824, "rewards/margins": 3.1839144229888916, "rewards/rejected": -1.8234679698944092, "step": 380 }, { "epoch": 0.21, "learning_rate": 9.851127876051235e-07, "logits/chosen": -6.190003395080566, "logits/rejected": -6.171816349029541, "logps/chosen": -347.7410583496094, "logps/rejected": -173.4783935546875, "loss": 0.1463, "rewards/accuracies": 0.9375, "rewards/chosen": 3.881089210510254, "rewards/margins": 6.4134521484375, "rewards/rejected": -2.532362937927246, "step": 381 }, { "epoch": 0.21, "learning_rate": 9.850036730865362e-07, "logits/chosen": -6.148687362670898, "logits/rejected": -6.148941516876221, "logps/chosen": -337.80328369140625, "logps/rejected": -300.68621826171875, "loss": 0.1832, "rewards/accuracies": 0.9375, "rewards/chosen": 2.0475995540618896, "rewards/margins": 7.269918441772461, "rewards/rejected": -5.222318649291992, "step": 382 }, { "epoch": 0.21, "learning_rate": 9.848941662415133e-07, "logits/chosen": -6.186258316040039, "logits/rejected": -6.148284435272217, "logps/chosen": -194.02215576171875, "logps/rejected": -207.01998901367188, "loss": 0.1929, "rewards/accuracies": 0.9375, "rewards/chosen": 1.4334359169006348, "rewards/margins": 5.300998687744141, "rewards/rejected": -3.867562770843506, "step": 383 }, { "epoch": 0.21, "learning_rate": 9.847842671586366e-07, "logits/chosen": -6.125960826873779, "logits/rejected": -6.132890224456787, "logps/chosen": -283.7254943847656, "logps/rejected": -250.0480499267578, "loss": 0.0911, "rewards/accuracies": 0.9375, "rewards/chosen": 2.4747776985168457, "rewards/margins": 7.229301452636719, "rewards/rejected": -4.754523754119873, "step": 384 }, { "epoch": 0.21, "learning_rate": 9.84673975926805e-07, "logits/chosen": -6.105648994445801, "logits/rejected": -6.201576232910156, "logps/chosen": -169.94229125976562, "logps/rejected": -192.46575927734375, "loss": 0.195, "rewards/accuracies": 0.875, "rewards/chosen": 1.595960259437561, "rewards/margins": 5.28392219543457, "rewards/rejected": -3.6879613399505615, "step": 385 }, { "epoch": 0.21, "learning_rate": 9.845632926352348e-07, "logits/chosen": -6.064892292022705, "logits/rejected": -6.140828609466553, "logps/chosen": -230.6685028076172, "logps/rejected": -210.61895751953125, "loss": 0.1678, "rewards/accuracies": 0.875, "rewards/chosen": 2.105720043182373, "rewards/margins": 4.115483283996582, "rewards/rejected": -2.009763240814209, "step": 386 }, { "epoch": 0.21, "learning_rate": 9.84452217373459e-07, "logits/chosen": -6.101828098297119, "logits/rejected": -6.143858909606934, "logps/chosen": -217.96743774414062, "logps/rejected": -122.58586120605469, "loss": 0.1185, "rewards/accuracies": 0.9375, "rewards/chosen": 2.3875057697296143, "rewards/margins": 6.099436283111572, "rewards/rejected": -3.711930274963379, "step": 387 }, { "epoch": 0.22, "learning_rate": 9.843407502313281e-07, "logits/chosen": -6.196358680725098, "logits/rejected": -6.221885681152344, "logps/chosen": -288.3143615722656, "logps/rejected": -176.5892333984375, "loss": 0.1041, "rewards/accuracies": 0.875, "rewards/chosen": 2.1979167461395264, "rewards/margins": 6.093826770782471, "rewards/rejected": -3.8959107398986816, "step": 388 }, { "epoch": 0.22, "learning_rate": 9.842288912990095e-07, "logits/chosen": -6.010260105133057, "logits/rejected": -6.060407638549805, "logps/chosen": -178.4649200439453, "logps/rejected": -246.2576141357422, "loss": 0.1708, "rewards/accuracies": 1.0, "rewards/chosen": 2.3763515949249268, "rewards/margins": 6.483241081237793, "rewards/rejected": -4.106889724731445, "step": 389 }, { "epoch": 0.22, "learning_rate": 9.841166406669877e-07, "logits/chosen": -6.208846092224121, "logits/rejected": -6.286919116973877, "logps/chosen": -225.99169921875, "logps/rejected": -173.09706115722656, "loss": 0.1465, "rewards/accuracies": 0.8125, "rewards/chosen": 3.517144203186035, "rewards/margins": 6.728947639465332, "rewards/rejected": -3.211803913116455, "step": 390 }, { "epoch": 0.22, "learning_rate": 9.840039984260632e-07, "logits/chosen": -6.075720310211182, "logits/rejected": -6.168022632598877, "logps/chosen": -200.30845642089844, "logps/rejected": -309.4629211425781, "loss": 0.2437, "rewards/accuracies": 0.9375, "rewards/chosen": 2.00426983833313, "rewards/margins": 6.038347244262695, "rewards/rejected": -4.034077167510986, "step": 391 }, { "epoch": 0.22, "learning_rate": 9.838909646673549e-07, "logits/chosen": -6.282089710235596, "logits/rejected": -6.218657970428467, "logps/chosen": -279.44122314453125, "logps/rejected": -200.29588317871094, "loss": 0.1174, "rewards/accuracies": 0.9375, "rewards/chosen": 2.9049625396728516, "rewards/margins": 6.295598030090332, "rewards/rejected": -3.3906354904174805, "step": 392 }, { "epoch": 0.22, "learning_rate": 9.837775394822965e-07, "logits/chosen": -6.203348159790039, "logits/rejected": -6.160443305969238, "logps/chosen": -252.39315795898438, "logps/rejected": -208.48570251464844, "loss": 0.1779, "rewards/accuracies": 1.0, "rewards/chosen": 2.5248095989227295, "rewards/margins": 5.849348545074463, "rewards/rejected": -3.3245389461517334, "step": 393 }, { "epoch": 0.22, "learning_rate": 9.8366372296264e-07, "logits/chosen": -6.249890327453613, "logits/rejected": -6.243919372558594, "logps/chosen": -230.259521484375, "logps/rejected": -158.47683715820312, "loss": 0.1737, "rewards/accuracies": 0.9375, "rewards/chosen": 2.839777946472168, "rewards/margins": 6.557085990905762, "rewards/rejected": -3.717308282852173, "step": 394 }, { "epoch": 0.22, "learning_rate": 9.835495152004524e-07, "logits/chosen": -6.185309410095215, "logits/rejected": -6.123495578765869, "logps/chosen": -327.7737731933594, "logps/rejected": -164.29591369628906, "loss": 0.1372, "rewards/accuracies": 1.0, "rewards/chosen": 4.0962629318237305, "rewards/margins": 7.172358989715576, "rewards/rejected": -3.0760958194732666, "step": 395 }, { "epoch": 0.22, "learning_rate": 9.83434916288119e-07, "logits/chosen": -6.170119285583496, "logits/rejected": -6.182114124298096, "logps/chosen": -177.10369873046875, "logps/rejected": -177.27328491210938, "loss": 0.1213, "rewards/accuracies": 1.0, "rewards/chosen": 1.9425971508026123, "rewards/margins": 6.498996734619141, "rewards/rejected": -4.556399345397949, "step": 396 }, { "epoch": 0.22, "learning_rate": 9.833199263183396e-07, "logits/chosen": -6.238332748413086, "logits/rejected": -6.1599626541137695, "logps/chosen": -210.38711547851562, "logps/rejected": -192.80148315429688, "loss": 0.2067, "rewards/accuracies": 0.9375, "rewards/chosen": 1.038034439086914, "rewards/margins": 5.339235305786133, "rewards/rejected": -4.301200866699219, "step": 397 }, { "epoch": 0.22, "learning_rate": 9.832045453841318e-07, "logits/chosen": -6.108883857727051, "logits/rejected": -6.187255859375, "logps/chosen": -242.6337432861328, "logps/rejected": -196.03948974609375, "loss": 0.1432, "rewards/accuracies": 1.0, "rewards/chosen": 2.8342742919921875, "rewards/margins": 5.836004257202148, "rewards/rejected": -3.00173020362854, "step": 398 }, { "epoch": 0.22, "learning_rate": 9.830887735788286e-07, "logits/chosen": -6.072417736053467, "logits/rejected": -6.079141616821289, "logps/chosen": -649.708984375, "logps/rejected": -451.77838134765625, "loss": 0.1049, "rewards/accuracies": 0.9375, "rewards/chosen": 3.002612829208374, "rewards/margins": 5.7434892654418945, "rewards/rejected": -2.7408766746520996, "step": 399 }, { "epoch": 0.22, "learning_rate": 9.829726109960797e-07, "logits/chosen": -6.127256393432617, "logits/rejected": -6.198369026184082, "logps/chosen": -285.30645751953125, "logps/rejected": -262.789306640625, "loss": 0.0829, "rewards/accuracies": 0.9375, "rewards/chosen": 3.138270854949951, "rewards/margins": 6.863991737365723, "rewards/rejected": -3.7257208824157715, "step": 400 }, { "epoch": 0.22, "learning_rate": 9.828560577298505e-07, "logits/chosen": -6.057912349700928, "logits/rejected": -6.151362895965576, "logps/chosen": -274.5877380371094, "logps/rejected": -229.85089111328125, "loss": 0.1581, "rewards/accuracies": 1.0, "rewards/chosen": 3.5855729579925537, "rewards/margins": 6.879945755004883, "rewards/rejected": -3.29437255859375, "step": 401 }, { "epoch": 0.22, "learning_rate": 9.827391138744229e-07, "logits/chosen": -6.147820472717285, "logits/rejected": -6.167044162750244, "logps/chosen": -218.0772247314453, "logps/rejected": -139.93807983398438, "loss": 0.0641, "rewards/accuracies": 1.0, "rewards/chosen": 1.9406840801239014, "rewards/margins": 5.560384750366211, "rewards/rejected": -3.6197009086608887, "step": 402 }, { "epoch": 0.22, "learning_rate": 9.82621779524394e-07, "logits/chosen": -6.074099540710449, "logits/rejected": -6.100383281707764, "logps/chosen": -304.87744140625, "logps/rejected": -218.06094360351562, "loss": 0.1535, "rewards/accuracies": 0.9375, "rewards/chosen": 1.8237684965133667, "rewards/margins": 4.872592926025391, "rewards/rejected": -3.0488243103027344, "step": 403 }, { "epoch": 0.22, "learning_rate": 9.825040547746775e-07, "logits/chosen": -6.113541603088379, "logits/rejected": -6.125110626220703, "logps/chosen": -240.5567169189453, "logps/rejected": -147.43565368652344, "loss": 0.1004, "rewards/accuracies": 0.9375, "rewards/chosen": 3.833263635635376, "rewards/margins": 6.072971343994141, "rewards/rejected": -2.2397079467773438, "step": 404 }, { "epoch": 0.22, "learning_rate": 9.823859397205026e-07, "logits/chosen": -6.1254119873046875, "logits/rejected": -6.137979507446289, "logps/chosen": -527.114013671875, "logps/rejected": -273.43096923828125, "loss": 0.1353, "rewards/accuracies": 0.9375, "rewards/chosen": 3.8195056915283203, "rewards/margins": 6.247306823730469, "rewards/rejected": -2.4278008937835693, "step": 405 }, { "epoch": 0.23, "learning_rate": 9.822674344574142e-07, "logits/chosen": -6.121859550476074, "logits/rejected": -5.956385612487793, "logps/chosen": -205.4810028076172, "logps/rejected": -155.83682250976562, "loss": 0.1429, "rewards/accuracies": 0.875, "rewards/chosen": 1.2441991567611694, "rewards/margins": 4.753612041473389, "rewards/rejected": -3.5094127655029297, "step": 406 }, { "epoch": 0.23, "learning_rate": 9.821485390812733e-07, "logits/chosen": -6.117393970489502, "logits/rejected": -6.184133529663086, "logps/chosen": -309.78314208984375, "logps/rejected": -260.56732177734375, "loss": 0.1269, "rewards/accuracies": 0.9375, "rewards/chosen": 4.119655609130859, "rewards/margins": 6.333324432373047, "rewards/rejected": -2.2136690616607666, "step": 407 }, { "epoch": 0.23, "learning_rate": 9.820292536882556e-07, "logits/chosen": -6.175124168395996, "logits/rejected": -6.2762041091918945, "logps/chosen": -243.71405029296875, "logps/rejected": -243.57855224609375, "loss": 0.0973, "rewards/accuracies": 1.0, "rewards/chosen": 2.8597605228424072, "rewards/margins": 6.897196292877197, "rewards/rejected": -4.037435531616211, "step": 408 }, { "epoch": 0.23, "learning_rate": 9.81909578374853e-07, "logits/chosen": -6.153772354125977, "logits/rejected": -6.113091468811035, "logps/chosen": -268.47137451171875, "logps/rejected": -291.4559631347656, "loss": 0.1325, "rewards/accuracies": 0.8125, "rewards/chosen": 3.027672529220581, "rewards/margins": 6.0441765785217285, "rewards/rejected": -3.0165038108825684, "step": 409 }, { "epoch": 0.23, "learning_rate": 9.817895132378723e-07, "logits/chosen": -6.183579921722412, "logits/rejected": -6.105597972869873, "logps/chosen": -286.9995422363281, "logps/rejected": -233.7751922607422, "loss": 0.1697, "rewards/accuracies": 1.0, "rewards/chosen": 2.981196880340576, "rewards/margins": 6.324825763702393, "rewards/rejected": -3.3436288833618164, "step": 410 }, { "epoch": 0.23, "learning_rate": 9.816690583744365e-07, "logits/chosen": -6.213713645935059, "logits/rejected": -6.175411224365234, "logps/chosen": -242.18284606933594, "logps/rejected": -141.793701171875, "loss": 0.1352, "rewards/accuracies": 1.0, "rewards/chosen": 3.1030452251434326, "rewards/margins": 6.83505916595459, "rewards/rejected": -3.7320139408111572, "step": 411 }, { "epoch": 0.23, "learning_rate": 9.815482138819827e-07, "logits/chosen": -6.251336097717285, "logits/rejected": -6.258684158325195, "logps/chosen": -264.013916015625, "logps/rejected": -210.6518096923828, "loss": 0.1832, "rewards/accuracies": 0.9375, "rewards/chosen": 3.120168447494507, "rewards/margins": 7.403202056884766, "rewards/rejected": -4.283033847808838, "step": 412 }, { "epoch": 0.23, "learning_rate": 9.81426979858264e-07, "logits/chosen": -6.157103061676025, "logits/rejected": -6.09609842300415, "logps/chosen": -373.6759033203125, "logps/rejected": -339.4051208496094, "loss": 0.1247, "rewards/accuracies": 1.0, "rewards/chosen": 3.1834232807159424, "rewards/margins": 7.089523792266846, "rewards/rejected": -3.906100273132324, "step": 413 }, { "epoch": 0.23, "learning_rate": 9.813053564013483e-07, "logits/chosen": -6.127376079559326, "logits/rejected": -6.122491836547852, "logps/chosen": -180.00518798828125, "logps/rejected": -189.196044921875, "loss": 0.1266, "rewards/accuracies": 0.875, "rewards/chosen": 2.4800682067871094, "rewards/margins": 6.702486991882324, "rewards/rejected": -4.222418785095215, "step": 414 }, { "epoch": 0.23, "learning_rate": 9.811833436096186e-07, "logits/chosen": -6.133454322814941, "logits/rejected": -6.1316118240356445, "logps/chosen": -208.2034454345703, "logps/rejected": -183.61544799804688, "loss": 0.1197, "rewards/accuracies": 1.0, "rewards/chosen": 2.551236867904663, "rewards/margins": 6.470276832580566, "rewards/rejected": -3.919039726257324, "step": 415 }, { "epoch": 0.23, "learning_rate": 9.810609415817726e-07, "logits/chosen": -6.156603813171387, "logits/rejected": -6.197829246520996, "logps/chosen": -219.6597137451172, "logps/rejected": -215.0107421875, "loss": 0.0859, "rewards/accuracies": 0.9375, "rewards/chosen": 2.7179813385009766, "rewards/margins": 5.578693866729736, "rewards/rejected": -2.8607125282287598, "step": 416 }, { "epoch": 0.23, "learning_rate": 9.809381504168233e-07, "logits/chosen": -6.110377311706543, "logits/rejected": -6.095988750457764, "logps/chosen": -269.66827392578125, "logps/rejected": -189.86813354492188, "loss": 0.1087, "rewards/accuracies": 0.875, "rewards/chosen": 3.1218347549438477, "rewards/margins": 6.103032112121582, "rewards/rejected": -2.9811973571777344, "step": 417 }, { "epoch": 0.23, "learning_rate": 9.808149702140983e-07, "logits/chosen": -6.189333915710449, "logits/rejected": -6.1458845138549805, "logps/chosen": -473.4580383300781, "logps/rejected": -206.03933715820312, "loss": 0.1205, "rewards/accuracies": 0.9375, "rewards/chosen": 2.2368087768554688, "rewards/margins": 5.491946697235107, "rewards/rejected": -3.2551379203796387, "step": 418 }, { "epoch": 0.23, "learning_rate": 9.806914010732394e-07, "logits/chosen": -6.133488655090332, "logits/rejected": -6.135101318359375, "logps/chosen": -260.67279052734375, "logps/rejected": -226.0069122314453, "loss": 0.2275, "rewards/accuracies": 0.875, "rewards/chosen": 2.481854200363159, "rewards/margins": 5.455935955047607, "rewards/rejected": -2.9740819931030273, "step": 419 }, { "epoch": 0.23, "learning_rate": 9.805674430942038e-07, "logits/chosen": -6.099483489990234, "logits/rejected": -6.082739353179932, "logps/chosen": -355.5606689453125, "logps/rejected": -197.3260498046875, "loss": 0.1859, "rewards/accuracies": 0.875, "rewards/chosen": 2.4628541469573975, "rewards/margins": 4.647974014282227, "rewards/rejected": -2.185119867324829, "step": 420 }, { "epoch": 0.23, "learning_rate": 9.80443096377263e-07, "logits/chosen": -6.163466930389404, "logits/rejected": -6.228775978088379, "logps/chosen": -287.36651611328125, "logps/rejected": -185.16493225097656, "loss": 0.2169, "rewards/accuracies": 1.0, "rewards/chosen": 3.3468191623687744, "rewards/margins": 8.14945125579834, "rewards/rejected": -4.802631378173828, "step": 421 }, { "epoch": 0.23, "learning_rate": 9.803183610230024e-07, "logits/chosen": -6.1017045974731445, "logits/rejected": -6.146064281463623, "logps/chosen": -282.001708984375, "logps/rejected": -280.70367431640625, "loss": 0.1698, "rewards/accuracies": 0.75, "rewards/chosen": 3.7704455852508545, "rewards/margins": 6.453583240509033, "rewards/rejected": -2.683137893676758, "step": 422 }, { "epoch": 0.23, "learning_rate": 9.801932371323227e-07, "logits/chosen": -6.277316093444824, "logits/rejected": -6.207168102264404, "logps/chosen": -263.04949951171875, "logps/rejected": -188.16810607910156, "loss": 0.096, "rewards/accuracies": 0.9375, "rewards/chosen": 2.076669931411743, "rewards/margins": 6.290994644165039, "rewards/rejected": -4.214324951171875, "step": 423 }, { "epoch": 0.24, "learning_rate": 9.80067724806438e-07, "logits/chosen": -6.065526485443115, "logits/rejected": -6.163200378417969, "logps/chosen": -249.66781616210938, "logps/rejected": -454.90216064453125, "loss": 0.0957, "rewards/accuracies": 0.9375, "rewards/chosen": 2.257237672805786, "rewards/margins": 7.251589298248291, "rewards/rejected": -4.994351863861084, "step": 424 }, { "epoch": 0.24, "learning_rate": 9.799418241468774e-07, "logits/chosen": -6.155393600463867, "logits/rejected": -6.23781681060791, "logps/chosen": -203.36961364746094, "logps/rejected": -263.5718994140625, "loss": 0.1379, "rewards/accuracies": 1.0, "rewards/chosen": 2.815474510192871, "rewards/margins": 7.4738311767578125, "rewards/rejected": -4.658355712890625, "step": 425 }, { "epoch": 0.24, "learning_rate": 9.798155352554836e-07, "logits/chosen": -6.105723857879639, "logits/rejected": -6.143390655517578, "logps/chosen": -237.36968994140625, "logps/rejected": -210.7153778076172, "loss": 0.1286, "rewards/accuracies": 1.0, "rewards/chosen": 3.439324378967285, "rewards/margins": 6.905327796936035, "rewards/rejected": -3.466002941131592, "step": 426 }, { "epoch": 0.24, "learning_rate": 9.796888582344133e-07, "logits/chosen": -6.094855308532715, "logits/rejected": -6.104280948638916, "logps/chosen": -287.3463134765625, "logps/rejected": -178.45162963867188, "loss": 0.1068, "rewards/accuracies": 0.875, "rewards/chosen": 3.5700128078460693, "rewards/margins": 5.802703857421875, "rewards/rejected": -2.2326912879943848, "step": 427 }, { "epoch": 0.24, "learning_rate": 9.795617931861377e-07, "logits/chosen": -6.237288475036621, "logits/rejected": -6.184139728546143, "logps/chosen": -202.50277709960938, "logps/rejected": -163.25613403320312, "loss": 0.2065, "rewards/accuracies": 1.0, "rewards/chosen": 2.1658825874328613, "rewards/margins": 5.914618492126465, "rewards/rejected": -3.7487363815307617, "step": 428 }, { "epoch": 0.24, "learning_rate": 9.794343402134412e-07, "logits/chosen": -6.1248321533203125, "logits/rejected": -6.140398979187012, "logps/chosen": -442.7035217285156, "logps/rejected": -412.28546142578125, "loss": 0.2423, "rewards/accuracies": 0.875, "rewards/chosen": 3.273824453353882, "rewards/margins": 7.618215560913086, "rewards/rejected": -4.344391345977783, "step": 429 }, { "epoch": 0.24, "learning_rate": 9.793064994194226e-07, "logits/chosen": -6.213188171386719, "logits/rejected": -6.1217803955078125, "logps/chosen": -247.79173278808594, "logps/rejected": -169.672607421875, "loss": 0.1638, "rewards/accuracies": 0.875, "rewards/chosen": 3.016284227371216, "rewards/margins": 5.935843467712402, "rewards/rejected": -2.9195594787597656, "step": 430 }, { "epoch": 0.24, "learning_rate": 9.791782709074942e-07, "logits/chosen": -6.2383131980896, "logits/rejected": -6.178727626800537, "logps/chosen": -217.31692504882812, "logps/rejected": -213.34182739257812, "loss": 0.1093, "rewards/accuracies": 0.8125, "rewards/chosen": 1.0531913042068481, "rewards/margins": 5.169083595275879, "rewards/rejected": -4.11589241027832, "step": 431 }, { "epoch": 0.24, "learning_rate": 9.790496547813817e-07, "logits/chosen": -6.090535640716553, "logits/rejected": -6.117537021636963, "logps/chosen": -207.6473388671875, "logps/rejected": -220.57777404785156, "loss": 0.1138, "rewards/accuracies": 0.8125, "rewards/chosen": 1.9075230360031128, "rewards/margins": 4.500934600830078, "rewards/rejected": -2.593411922454834, "step": 432 }, { "epoch": 0.24, "learning_rate": 9.789206511451244e-07, "logits/chosen": -6.068240642547607, "logits/rejected": -6.0823445320129395, "logps/chosen": -381.38360595703125, "logps/rejected": -292.4210205078125, "loss": 0.1874, "rewards/accuracies": 1.0, "rewards/chosen": 3.242098569869995, "rewards/margins": 7.4571452140808105, "rewards/rejected": -4.215045928955078, "step": 433 }, { "epoch": 0.24, "learning_rate": 9.787912601030758e-07, "logits/chosen": -6.072139739990234, "logits/rejected": -6.135468006134033, "logps/chosen": -259.8935546875, "logps/rejected": -265.36181640625, "loss": 0.1646, "rewards/accuracies": 1.0, "rewards/chosen": 3.205493927001953, "rewards/margins": 6.171504974365234, "rewards/rejected": -2.9660110473632812, "step": 434 }, { "epoch": 0.24, "learning_rate": 9.786614817599012e-07, "logits/chosen": -6.191239833831787, "logits/rejected": -6.141618251800537, "logps/chosen": -409.2213439941406, "logps/rejected": -414.83087158203125, "loss": 0.1526, "rewards/accuracies": 0.875, "rewards/chosen": 2.7378687858581543, "rewards/margins": 7.27205228805542, "rewards/rejected": -4.534183502197266, "step": 435 }, { "epoch": 0.24, "learning_rate": 9.78531316220581e-07, "logits/chosen": -6.097401142120361, "logits/rejected": -6.115484714508057, "logps/chosen": -567.2266235351562, "logps/rejected": -330.7457275390625, "loss": 0.0874, "rewards/accuracies": 1.0, "rewards/chosen": 2.916342258453369, "rewards/margins": 5.9041900634765625, "rewards/rejected": -2.9878478050231934, "step": 436 }, { "epoch": 0.24, "learning_rate": 9.784007635904077e-07, "logits/chosen": -6.259825706481934, "logits/rejected": -6.198639869689941, "logps/chosen": -212.49620056152344, "logps/rejected": -105.02965545654297, "loss": 0.1631, "rewards/accuracies": 0.9375, "rewards/chosen": 2.3837530612945557, "rewards/margins": 6.116817951202393, "rewards/rejected": -3.733064889907837, "step": 437 }, { "epoch": 0.24, "learning_rate": 9.782698239749872e-07, "logits/chosen": -6.189592361450195, "logits/rejected": -6.105215072631836, "logps/chosen": -274.84442138671875, "logps/rejected": -152.00128173828125, "loss": 0.0937, "rewards/accuracies": 1.0, "rewards/chosen": 3.800882339477539, "rewards/margins": 7.169047832489014, "rewards/rejected": -3.3681654930114746, "step": 438 }, { "epoch": 0.24, "learning_rate": 9.781384974802382e-07, "logits/chosen": -6.213291168212891, "logits/rejected": -6.207550525665283, "logps/chosen": -236.90121459960938, "logps/rejected": -165.2549591064453, "loss": 0.1617, "rewards/accuracies": 0.9375, "rewards/chosen": 2.6853842735290527, "rewards/margins": 6.946890354156494, "rewards/rejected": -4.261506080627441, "step": 439 }, { "epoch": 0.24, "learning_rate": 9.780067842123931e-07, "logits/chosen": -6.230352401733398, "logits/rejected": -6.184768199920654, "logps/chosen": -226.9215850830078, "logps/rejected": -175.8043670654297, "loss": 0.1171, "rewards/accuracies": 0.9375, "rewards/chosen": 1.3884329795837402, "rewards/margins": 6.939168453216553, "rewards/rejected": -5.5507354736328125, "step": 440 }, { "epoch": 0.24, "learning_rate": 9.77874684277996e-07, "logits/chosen": -6.159233093261719, "logits/rejected": -6.237397193908691, "logps/chosen": -227.36099243164062, "logps/rejected": -257.4359130859375, "loss": 0.0949, "rewards/accuracies": 1.0, "rewards/chosen": 1.9725210666656494, "rewards/margins": 6.698975563049316, "rewards/rejected": -4.726454734802246, "step": 441 }, { "epoch": 0.25, "learning_rate": 9.777421977839051e-07, "logits/chosen": -6.101927757263184, "logits/rejected": -6.163566589355469, "logps/chosen": -350.6739501953125, "logps/rejected": -198.16612243652344, "loss": 0.0765, "rewards/accuracies": 0.9375, "rewards/chosen": 4.901760101318359, "rewards/margins": 7.177021026611328, "rewards/rejected": -2.2752609252929688, "step": 442 }, { "epoch": 0.25, "learning_rate": 9.776093248372903e-07, "logits/chosen": -6.048395156860352, "logits/rejected": -6.177793502807617, "logps/chosen": -333.9043884277344, "logps/rejected": -261.49713134765625, "loss": 0.1241, "rewards/accuracies": 0.9375, "rewards/chosen": 3.6298842430114746, "rewards/margins": 7.5910186767578125, "rewards/rejected": -3.961134910583496, "step": 443 }, { "epoch": 0.25, "learning_rate": 9.774760655456345e-07, "logits/chosen": -6.077164173126221, "logits/rejected": -6.195157527923584, "logps/chosen": -332.865478515625, "logps/rejected": -242.3297119140625, "loss": 0.1433, "rewards/accuracies": 0.875, "rewards/chosen": 4.192413806915283, "rewards/margins": 7.282929420471191, "rewards/rejected": -3.09051513671875, "step": 444 }, { "epoch": 0.25, "learning_rate": 9.773424200167328e-07, "logits/chosen": -6.1953816413879395, "logits/rejected": -6.173314094543457, "logps/chosen": -545.9422607421875, "logps/rejected": -226.7327423095703, "loss": 0.1331, "rewards/accuracies": 0.9375, "rewards/chosen": 2.555873394012451, "rewards/margins": 6.040750503540039, "rewards/rejected": -3.4848766326904297, "step": 445 }, { "epoch": 0.25, "learning_rate": 9.772083883586935e-07, "logits/chosen": -6.248187065124512, "logits/rejected": -6.188684463500977, "logps/chosen": -329.71807861328125, "logps/rejected": -176.44882202148438, "loss": 0.2062, "rewards/accuracies": 0.875, "rewards/chosen": 4.294015884399414, "rewards/margins": 6.890556335449219, "rewards/rejected": -2.596540689468384, "step": 446 }, { "epoch": 0.25, "learning_rate": 9.770739706799362e-07, "logits/chosen": -6.121765613555908, "logits/rejected": -6.144112586975098, "logps/chosen": -239.84432983398438, "logps/rejected": -194.72674560546875, "loss": 0.0788, "rewards/accuracies": 1.0, "rewards/chosen": 2.1486010551452637, "rewards/margins": 7.219542980194092, "rewards/rejected": -5.07094144821167, "step": 447 }, { "epoch": 0.25, "learning_rate": 9.769391670891936e-07, "logits/chosen": -6.188878536224365, "logits/rejected": -6.135140419006348, "logps/chosen": -229.39727783203125, "logps/rejected": -159.7254638671875, "loss": 0.0891, "rewards/accuracies": 1.0, "rewards/chosen": 2.5091605186462402, "rewards/margins": 5.31627082824707, "rewards/rejected": -2.80711030960083, "step": 448 }, { "epoch": 0.25, "learning_rate": 9.768039776955102e-07, "logits/chosen": -6.124837875366211, "logits/rejected": -6.158774375915527, "logps/chosen": -255.30630493164062, "logps/rejected": -301.17578125, "loss": 0.1209, "rewards/accuracies": 0.875, "rewards/chosen": 4.149033069610596, "rewards/margins": 7.643522262573242, "rewards/rejected": -3.4944891929626465, "step": 449 }, { "epoch": 0.25, "learning_rate": 9.766684026082426e-07, "logits/chosen": -6.12068510055542, "logits/rejected": -6.158231258392334, "logps/chosen": -242.54168701171875, "logps/rejected": -160.04779052734375, "loss": 0.1077, "rewards/accuracies": 1.0, "rewards/chosen": 2.3795249462127686, "rewards/margins": 6.382024765014648, "rewards/rejected": -4.002499580383301, "step": 450 }, { "epoch": 0.25, "learning_rate": 9.765324419370594e-07, "logits/chosen": -6.16224479675293, "logits/rejected": -6.102821350097656, "logps/chosen": -408.5194091796875, "logps/rejected": -162.35870361328125, "loss": 0.1309, "rewards/accuracies": 0.9375, "rewards/chosen": 4.788368225097656, "rewards/margins": 5.998832702636719, "rewards/rejected": -1.2104637622833252, "step": 451 }, { "epoch": 0.25, "learning_rate": 9.763960957919413e-07, "logits/chosen": -6.105546474456787, "logits/rejected": -6.081988334655762, "logps/chosen": -267.9845886230469, "logps/rejected": -161.68792724609375, "loss": 0.1352, "rewards/accuracies": 0.875, "rewards/chosen": 1.4971106052398682, "rewards/margins": 4.819871425628662, "rewards/rejected": -3.322761058807373, "step": 452 }, { "epoch": 0.25, "learning_rate": 9.762593642831805e-07, "logits/chosen": -6.14063024520874, "logits/rejected": -6.125726699829102, "logps/chosen": -328.2044677734375, "logps/rejected": -311.2734680175781, "loss": 0.1461, "rewards/accuracies": 0.8125, "rewards/chosen": 1.6548843383789062, "rewards/margins": 5.143726348876953, "rewards/rejected": -3.488842248916626, "step": 453 }, { "epoch": 0.25, "learning_rate": 9.761222475213812e-07, "logits/chosen": -6.138090133666992, "logits/rejected": -6.143809795379639, "logps/chosen": -324.5354309082031, "logps/rejected": -141.5960235595703, "loss": 0.118, "rewards/accuracies": 1.0, "rewards/chosen": 2.1624839305877686, "rewards/margins": 6.957640647888184, "rewards/rejected": -4.795156478881836, "step": 454 }, { "epoch": 0.25, "learning_rate": 9.759847456174587e-07, "logits/chosen": -6.334171772003174, "logits/rejected": -6.172364234924316, "logps/chosen": -441.97845458984375, "logps/rejected": -282.17584228515625, "loss": 0.1208, "rewards/accuracies": 0.8125, "rewards/chosen": 3.297438144683838, "rewards/margins": 6.401386260986328, "rewards/rejected": -3.1039485931396484, "step": 455 }, { "epoch": 0.25, "learning_rate": 9.758468586826412e-07, "logits/chosen": -6.169939041137695, "logits/rejected": -6.16922664642334, "logps/chosen": -268.70428466796875, "logps/rejected": -215.17800903320312, "loss": 0.1263, "rewards/accuracies": 0.875, "rewards/chosen": 3.5008888244628906, "rewards/margins": 7.857489109039307, "rewards/rejected": -4.356599807739258, "step": 456 }, { "epoch": 0.25, "learning_rate": 9.757085868284665e-07, "logits/chosen": -6.15043830871582, "logits/rejected": -6.141223907470703, "logps/chosen": -262.0875244140625, "logps/rejected": -164.6953887939453, "loss": 0.1239, "rewards/accuracies": 0.875, "rewards/chosen": 1.318662166595459, "rewards/margins": 5.856977462768555, "rewards/rejected": -4.5383148193359375, "step": 457 }, { "epoch": 0.25, "learning_rate": 9.75569930166785e-07, "logits/chosen": -6.1551103591918945, "logits/rejected": -6.08454704284668, "logps/chosen": -278.81439208984375, "logps/rejected": -117.91893005371094, "loss": 0.1298, "rewards/accuracies": 1.0, "rewards/chosen": 2.4022393226623535, "rewards/margins": 4.970365047454834, "rewards/rejected": -2.5681252479553223, "step": 458 }, { "epoch": 0.25, "learning_rate": 9.754308888097582e-07, "logits/chosen": -6.183053016662598, "logits/rejected": -6.184223175048828, "logps/chosen": -360.5695495605469, "logps/rejected": -267.76611328125, "loss": 0.1803, "rewards/accuracies": 0.8125, "rewards/chosen": 1.50730299949646, "rewards/margins": 5.774026870727539, "rewards/rejected": -4.2667236328125, "step": 459 }, { "epoch": 0.26, "learning_rate": 9.752914628698585e-07, "logits/chosen": -6.060819625854492, "logits/rejected": -6.171632766723633, "logps/chosen": -148.06546020507812, "logps/rejected": -204.98220825195312, "loss": 0.1168, "rewards/accuracies": 0.875, "rewards/chosen": 1.4261850118637085, "rewards/margins": 5.711833953857422, "rewards/rejected": -4.285649299621582, "step": 460 }, { "epoch": 0.26, "learning_rate": 9.751516524598696e-07, "logits/chosen": -6.225282669067383, "logits/rejected": -6.274970531463623, "logps/chosen": -484.1134948730469, "logps/rejected": -445.11712646484375, "loss": 0.081, "rewards/accuracies": 0.9375, "rewards/chosen": 2.234375, "rewards/margins": 8.412368774414062, "rewards/rejected": -6.177992820739746, "step": 461 }, { "epoch": 0.26, "learning_rate": 9.75011457692886e-07, "logits/chosen": -6.125696182250977, "logits/rejected": -5.996340751647949, "logps/chosen": -497.98907470703125, "logps/rejected": -406.04815673828125, "loss": 0.1278, "rewards/accuracies": 0.9375, "rewards/chosen": 0.6650406122207642, "rewards/margins": 5.049563884735107, "rewards/rejected": -4.384522914886475, "step": 462 }, { "epoch": 0.26, "learning_rate": 9.748708786823136e-07, "logits/chosen": -6.221319675445557, "logits/rejected": -6.149311542510986, "logps/chosen": -172.68240356445312, "logps/rejected": -196.24981689453125, "loss": 0.1413, "rewards/accuracies": 1.0, "rewards/chosen": 1.3657097816467285, "rewards/margins": 4.883006572723389, "rewards/rejected": -3.51729679107666, "step": 463 }, { "epoch": 0.26, "learning_rate": 9.747299155418685e-07, "logits/chosen": -6.267592906951904, "logits/rejected": -6.234381198883057, "logps/chosen": -280.454833984375, "logps/rejected": -288.8922119140625, "loss": 0.1629, "rewards/accuracies": 1.0, "rewards/chosen": 3.5583181381225586, "rewards/margins": 8.441956520080566, "rewards/rejected": -4.88363790512085, "step": 464 }, { "epoch": 0.26, "learning_rate": 9.745885683855779e-07, "logits/chosen": -6.199519157409668, "logits/rejected": -6.169279098510742, "logps/chosen": -355.78546142578125, "logps/rejected": -470.106201171875, "loss": 0.1495, "rewards/accuracies": 1.0, "rewards/chosen": 2.2284088134765625, "rewards/margins": 8.709840774536133, "rewards/rejected": -6.48143196105957, "step": 465 }, { "epoch": 0.26, "learning_rate": 9.744468373277796e-07, "logits/chosen": -6.157007217407227, "logits/rejected": -6.214248180389404, "logps/chosen": -234.63412475585938, "logps/rejected": -213.2494659423828, "loss": 0.152, "rewards/accuracies": 1.0, "rewards/chosen": 2.487703323364258, "rewards/margins": 7.350833892822266, "rewards/rejected": -4.863130569458008, "step": 466 }, { "epoch": 0.26, "learning_rate": 9.743047224831217e-07, "logits/chosen": -6.075843811035156, "logits/rejected": -6.07133674621582, "logps/chosen": -221.71726989746094, "logps/rejected": -224.6005859375, "loss": 0.1393, "rewards/accuracies": 0.8125, "rewards/chosen": 1.6462368965148926, "rewards/margins": 3.964816093444824, "rewards/rejected": -2.3185791969299316, "step": 467 }, { "epoch": 0.26, "learning_rate": 9.74162223966563e-07, "logits/chosen": -6.1647162437438965, "logits/rejected": -6.192020416259766, "logps/chosen": -288.7389221191406, "logps/rejected": -306.38330078125, "loss": 0.2007, "rewards/accuracies": 0.875, "rewards/chosen": 1.1728227138519287, "rewards/margins": 7.294410705566406, "rewards/rejected": -6.121587753295898, "step": 468 }, { "epoch": 0.26, "learning_rate": 9.740193418933727e-07, "logits/chosen": -6.286660194396973, "logits/rejected": -6.210444927215576, "logps/chosen": -286.1888122558594, "logps/rejected": -176.22109985351562, "loss": 0.0935, "rewards/accuracies": 1.0, "rewards/chosen": 3.1100993156433105, "rewards/margins": 7.338729381561279, "rewards/rejected": -4.228630065917969, "step": 469 }, { "epoch": 0.26, "learning_rate": 9.738760763791302e-07, "logits/chosen": -6.12261962890625, "logits/rejected": -6.206892013549805, "logps/chosen": -246.5421905517578, "logps/rejected": -224.64089965820312, "loss": 0.1163, "rewards/accuracies": 1.0, "rewards/chosen": 1.9963898658752441, "rewards/margins": 7.028719902038574, "rewards/rejected": -5.032330513000488, "step": 470 }, { "epoch": 0.26, "learning_rate": 9.73732427539725e-07, "logits/chosen": -6.20058012008667, "logits/rejected": -6.168783187866211, "logps/chosen": -384.5362548828125, "logps/rejected": -390.533447265625, "loss": 0.1462, "rewards/accuracies": 0.875, "rewards/chosen": 2.2884061336517334, "rewards/margins": 7.392823219299316, "rewards/rejected": -5.104416847229004, "step": 471 }, { "epoch": 0.26, "learning_rate": 9.735883954913564e-07, "logits/chosen": -6.140614986419678, "logits/rejected": -6.222221851348877, "logps/chosen": -230.88230895996094, "logps/rejected": -345.58660888671875, "loss": 0.1325, "rewards/accuracies": 0.9375, "rewards/chosen": 1.586719274520874, "rewards/margins": 7.908655643463135, "rewards/rejected": -6.32193660736084, "step": 472 }, { "epoch": 0.26, "learning_rate": 9.734439803505344e-07, "logits/chosen": -6.204314708709717, "logits/rejected": -6.140636444091797, "logps/chosen": -314.546875, "logps/rejected": -441.8406066894531, "loss": 0.0993, "rewards/accuracies": 0.9375, "rewards/chosen": 2.535348415374756, "rewards/margins": 6.144204139709473, "rewards/rejected": -3.608855724334717, "step": 473 }, { "epoch": 0.26, "learning_rate": 9.732991822340785e-07, "logits/chosen": -6.204495906829834, "logits/rejected": -6.1546478271484375, "logps/chosen": -313.9527893066406, "logps/rejected": -268.72802734375, "loss": 0.1775, "rewards/accuracies": 0.875, "rewards/chosen": 0.5698843002319336, "rewards/margins": 5.0779852867126465, "rewards/rejected": -4.508100986480713, "step": 474 }, { "epoch": 0.26, "learning_rate": 9.731540012591175e-07, "logits/chosen": -6.167142868041992, "logits/rejected": -6.161559581756592, "logps/chosen": -243.310791015625, "logps/rejected": -232.71981811523438, "loss": 0.1024, "rewards/accuracies": 0.875, "rewards/chosen": 1.6680893898010254, "rewards/margins": 7.051551818847656, "rewards/rejected": -5.383462905883789, "step": 475 }, { "epoch": 0.26, "learning_rate": 9.730084375430907e-07, "logits/chosen": -6.1533660888671875, "logits/rejected": -6.056884288787842, "logps/chosen": -292.1827087402344, "logps/rejected": -435.247314453125, "loss": 0.1328, "rewards/accuracies": 0.875, "rewards/chosen": 2.2574312686920166, "rewards/margins": 5.517563343048096, "rewards/rejected": -3.260132074356079, "step": 476 }, { "epoch": 0.26, "learning_rate": 9.728624912037468e-07, "logits/chosen": -6.304618835449219, "logits/rejected": -6.1802849769592285, "logps/chosen": -222.12051391601562, "logps/rejected": -198.3790740966797, "loss": 0.0906, "rewards/accuracies": 1.0, "rewards/chosen": 2.9770946502685547, "rewards/margins": 7.194730281829834, "rewards/rejected": -4.2176361083984375, "step": 477 }, { "epoch": 0.27, "learning_rate": 9.727161623591437e-07, "logits/chosen": -6.136122703552246, "logits/rejected": -6.102808475494385, "logps/chosen": -260.13848876953125, "logps/rejected": -224.22918701171875, "loss": 0.1228, "rewards/accuracies": 0.875, "rewards/chosen": 3.061678647994995, "rewards/margins": 8.445009231567383, "rewards/rejected": -5.383330345153809, "step": 478 }, { "epoch": 0.27, "learning_rate": 9.72569451127649e-07, "logits/chosen": -6.181612968444824, "logits/rejected": -6.040815830230713, "logps/chosen": -184.01608276367188, "logps/rejected": -154.45669555664062, "loss": 0.3233, "rewards/accuracies": 0.9375, "rewards/chosen": 1.5109946727752686, "rewards/margins": 6.049994468688965, "rewards/rejected": -4.539000511169434, "step": 479 }, { "epoch": 0.27, "learning_rate": 9.724223576279393e-07, "logits/chosen": -6.21879768371582, "logits/rejected": -6.2632598876953125, "logps/chosen": -298.9716491699219, "logps/rejected": -240.4029998779297, "loss": 0.1191, "rewards/accuracies": 1.0, "rewards/chosen": 3.7362542152404785, "rewards/margins": 8.247047424316406, "rewards/rejected": -4.510793685913086, "step": 480 }, { "epoch": 0.27, "learning_rate": 9.72274881979001e-07, "logits/chosen": -6.110412120819092, "logits/rejected": -6.187872886657715, "logps/chosen": -273.03240966796875, "logps/rejected": -233.76092529296875, "loss": 0.1528, "rewards/accuracies": 0.9375, "rewards/chosen": 4.88227653503418, "rewards/margins": 8.705138206481934, "rewards/rejected": -3.822861671447754, "step": 481 }, { "epoch": 0.27, "learning_rate": 9.72127024300129e-07, "logits/chosen": -6.093429088592529, "logits/rejected": -6.128937244415283, "logps/chosen": -158.247314453125, "logps/rejected": -259.6379699707031, "loss": 0.2611, "rewards/accuracies": 0.9375, "rewards/chosen": 0.8969706296920776, "rewards/margins": 8.305450439453125, "rewards/rejected": -7.408479690551758, "step": 482 }, { "epoch": 0.27, "learning_rate": 9.719787847109275e-07, "logits/chosen": -6.143531799316406, "logits/rejected": -6.065769672393799, "logps/chosen": -252.33599853515625, "logps/rejected": -141.16168212890625, "loss": 0.135, "rewards/accuracies": 0.875, "rewards/chosen": 1.4031739234924316, "rewards/margins": 5.26558780670166, "rewards/rejected": -3.8624138832092285, "step": 483 }, { "epoch": 0.27, "learning_rate": 9.7183016333131e-07, "logits/chosen": -6.298156261444092, "logits/rejected": -6.255956649780273, "logps/chosen": -305.90093994140625, "logps/rejected": -121.6975326538086, "loss": 0.1073, "rewards/accuracies": 0.9375, "rewards/chosen": 2.6014676094055176, "rewards/margins": 6.042379379272461, "rewards/rejected": -3.4409124851226807, "step": 484 }, { "epoch": 0.27, "learning_rate": 9.716811602814978e-07, "logits/chosen": -6.255924224853516, "logits/rejected": -6.24452018737793, "logps/chosen": -359.786376953125, "logps/rejected": -211.94322204589844, "loss": 0.1307, "rewards/accuracies": 0.875, "rewards/chosen": 4.153106689453125, "rewards/margins": 7.024389266967773, "rewards/rejected": -2.8712823390960693, "step": 485 }, { "epoch": 0.27, "learning_rate": 9.715317756820224e-07, "logits/chosen": -6.021261215209961, "logits/rejected": -6.019612789154053, "logps/chosen": -302.9067687988281, "logps/rejected": -312.087890625, "loss": 0.1731, "rewards/accuracies": 0.875, "rewards/chosen": 1.6845804452896118, "rewards/margins": 6.640143871307373, "rewards/rejected": -4.955563545227051, "step": 486 }, { "epoch": 0.27, "learning_rate": 9.713820096537224e-07, "logits/chosen": -6.17200231552124, "logits/rejected": -6.255337715148926, "logps/chosen": -327.22119140625, "logps/rejected": -225.92845153808594, "loss": 0.0734, "rewards/accuracies": 1.0, "rewards/chosen": 3.3033628463745117, "rewards/margins": 8.201024055480957, "rewards/rejected": -4.897660732269287, "step": 487 }, { "epoch": 0.27, "learning_rate": 9.71231862317746e-07, "logits/chosen": -6.3118672370910645, "logits/rejected": -6.147855281829834, "logps/chosen": -289.497314453125, "logps/rejected": -228.22509765625, "loss": 0.1326, "rewards/accuracies": 0.8125, "rewards/chosen": 2.132859706878662, "rewards/margins": 4.952709197998047, "rewards/rejected": -2.8198492527008057, "step": 488 }, { "epoch": 0.27, "learning_rate": 9.7108133379555e-07, "logits/chosen": -6.062687397003174, "logits/rejected": -6.177463054656982, "logps/chosen": -347.86968994140625, "logps/rejected": -243.49203491210938, "loss": 0.1586, "rewards/accuracies": 0.875, "rewards/chosen": 3.4794154167175293, "rewards/margins": 6.821000099182129, "rewards/rejected": -3.3415842056274414, "step": 489 }, { "epoch": 0.27, "learning_rate": 9.709304242088982e-07, "logits/chosen": -6.066871643066406, "logits/rejected": -6.118285179138184, "logps/chosen": -258.6498107910156, "logps/rejected": -218.82308959960938, "loss": 0.093, "rewards/accuracies": 1.0, "rewards/chosen": 2.0075836181640625, "rewards/margins": 6.2426958084106445, "rewards/rejected": -4.235112190246582, "step": 490 }, { "epoch": 0.27, "learning_rate": 9.707791336798641e-07, "logits/chosen": -6.204981803894043, "logits/rejected": -6.12919282913208, "logps/chosen": -233.21002197265625, "logps/rejected": -143.25584411621094, "loss": 0.1924, "rewards/accuracies": 1.0, "rewards/chosen": 0.9795303344726562, "rewards/margins": 5.997549057006836, "rewards/rejected": -5.01801872253418, "step": 491 }, { "epoch": 0.27, "learning_rate": 9.706274623308288e-07, "logits/chosen": -6.147150993347168, "logits/rejected": -6.109336853027344, "logps/chosen": -333.599609375, "logps/rejected": -181.25750732421875, "loss": 0.1075, "rewards/accuracies": 1.0, "rewards/chosen": 3.582629680633545, "rewards/margins": 7.323076248168945, "rewards/rejected": -3.7404463291168213, "step": 492 }, { "epoch": 0.27, "learning_rate": 9.704754102844811e-07, "logits/chosen": -6.065732479095459, "logits/rejected": -6.073663711547852, "logps/chosen": -186.30960083007812, "logps/rejected": -178.18490600585938, "loss": 0.1377, "rewards/accuracies": 1.0, "rewards/chosen": 2.4353010654449463, "rewards/margins": 7.318205833435059, "rewards/rejected": -4.882905006408691, "step": 493 }, { "epoch": 0.27, "learning_rate": 9.703229776638185e-07, "logits/chosen": -6.142109394073486, "logits/rejected": -6.190553665161133, "logps/chosen": -512.0780639648438, "logps/rejected": -378.96771240234375, "loss": 0.1321, "rewards/accuracies": 0.875, "rewards/chosen": 1.8784568309783936, "rewards/margins": 6.303449630737305, "rewards/rejected": -4.424992561340332, "step": 494 }, { "epoch": 0.27, "learning_rate": 9.701701645921457e-07, "logits/chosen": -6.137495994567871, "logits/rejected": -6.129331588745117, "logps/chosen": -214.12664794921875, "logps/rejected": -160.99365234375, "loss": 0.1286, "rewards/accuracies": 0.9375, "rewards/chosen": 2.908879280090332, "rewards/margins": 4.850286483764648, "rewards/rejected": -1.9414067268371582, "step": 495 }, { "epoch": 0.28, "learning_rate": 9.700169711930753e-07, "logits/chosen": -6.150388717651367, "logits/rejected": -6.133740425109863, "logps/chosen": -256.82684326171875, "logps/rejected": -173.89669799804688, "loss": 0.349, "rewards/accuracies": 1.0, "rewards/chosen": 3.686755895614624, "rewards/margins": 9.092775344848633, "rewards/rejected": -5.406019687652588, "step": 496 }, { "epoch": 0.28, "learning_rate": 9.698633975905278e-07, "logits/chosen": -6.166840553283691, "logits/rejected": -6.296000003814697, "logps/chosen": -305.6956481933594, "logps/rejected": -272.42376708984375, "loss": 0.1768, "rewards/accuracies": 0.875, "rewards/chosen": 4.383974075317383, "rewards/margins": 7.255768299102783, "rewards/rejected": -2.8717944622039795, "step": 497 }, { "epoch": 0.28, "learning_rate": 9.697094439087309e-07, "logits/chosen": -6.218133926391602, "logits/rejected": -6.186685562133789, "logps/chosen": -214.38714599609375, "logps/rejected": -141.884033203125, "loss": 0.1692, "rewards/accuracies": 0.9375, "rewards/chosen": 3.142040729522705, "rewards/margins": 6.073299884796143, "rewards/rejected": -2.9312591552734375, "step": 498 }, { "epoch": 0.28, "learning_rate": 9.6955511027222e-07, "logits/chosen": -6.225597381591797, "logits/rejected": -6.203853130340576, "logps/chosen": -208.4882354736328, "logps/rejected": -112.77388000488281, "loss": 0.1379, "rewards/accuracies": 1.0, "rewards/chosen": 1.2419888973236084, "rewards/margins": 4.366032600402832, "rewards/rejected": -3.1240437030792236, "step": 499 }, { "epoch": 0.28, "learning_rate": 9.694003968058383e-07, "logits/chosen": -6.119610786437988, "logits/rejected": -6.1600799560546875, "logps/chosen": -282.38763427734375, "logps/rejected": -308.7266845703125, "loss": 0.232, "rewards/accuracies": 0.9375, "rewards/chosen": 2.320021152496338, "rewards/margins": 6.548381805419922, "rewards/rejected": -4.228361129760742, "step": 500 }, { "epoch": 0.28, "learning_rate": 9.69245303634735e-07, "logits/chosen": -6.149877548217773, "logits/rejected": -6.143815040588379, "logps/chosen": -190.4169464111328, "logps/rejected": -239.87869262695312, "loss": 0.1706, "rewards/accuracies": 0.8125, "rewards/chosen": 2.0694470405578613, "rewards/margins": 5.370758056640625, "rewards/rejected": -3.3013112545013428, "step": 501 }, { "epoch": 0.28, "learning_rate": 9.690898308843675e-07, "logits/chosen": -6.083517074584961, "logits/rejected": -6.044161319732666, "logps/chosen": -278.1767578125, "logps/rejected": -235.76097106933594, "loss": 0.1297, "rewards/accuracies": 0.9375, "rewards/chosen": 1.8372353315353394, "rewards/margins": 5.248145580291748, "rewards/rejected": -3.410910129547119, "step": 502 }, { "epoch": 0.28, "learning_rate": 9.689339786804999e-07, "logits/chosen": -6.241808891296387, "logits/rejected": -6.230091094970703, "logps/chosen": -307.4137268066406, "logps/rejected": -134.5135040283203, "loss": 0.0831, "rewards/accuracies": 1.0, "rewards/chosen": 4.156228065490723, "rewards/margins": 6.210494041442871, "rewards/rejected": -2.0542659759521484, "step": 503 }, { "epoch": 0.28, "learning_rate": 9.687777471492036e-07, "logits/chosen": -6.077905654907227, "logits/rejected": -6.141417503356934, "logps/chosen": -282.9916076660156, "logps/rejected": -247.1158447265625, "loss": 0.1423, "rewards/accuracies": 0.9375, "rewards/chosen": 4.734704971313477, "rewards/margins": 6.773510932922363, "rewards/rejected": -2.0388054847717285, "step": 504 }, { "epoch": 0.28, "learning_rate": 9.686211364168562e-07, "logits/chosen": -6.205385208129883, "logits/rejected": -6.167778968811035, "logps/chosen": -273.3856201171875, "logps/rejected": -225.85595703125, "loss": 0.1318, "rewards/accuracies": 1.0, "rewards/chosen": 3.844083786010742, "rewards/margins": 6.7781805992126465, "rewards/rejected": -2.934096574783325, "step": 505 }, { "epoch": 0.28, "learning_rate": 9.684641466101423e-07, "logits/chosen": -6.04416036605835, "logits/rejected": -6.141241550445557, "logps/chosen": -267.1047668457031, "logps/rejected": -243.28724670410156, "loss": 0.0833, "rewards/accuracies": 1.0, "rewards/chosen": 3.6707959175109863, "rewards/margins": 7.678710460662842, "rewards/rejected": -4.0079145431518555, "step": 506 }, { "epoch": 0.28, "learning_rate": 9.683067778560535e-07, "logits/chosen": -6.159636974334717, "logits/rejected": -6.19619083404541, "logps/chosen": -258.7883605957031, "logps/rejected": -269.78857421875, "loss": 0.14, "rewards/accuracies": 0.9375, "rewards/chosen": 1.3717677593231201, "rewards/margins": 7.098457336425781, "rewards/rejected": -5.726689338684082, "step": 507 }, { "epoch": 0.28, "learning_rate": 9.681490302818873e-07, "logits/chosen": -6.178951740264893, "logits/rejected": -6.1417999267578125, "logps/chosen": -217.10586547851562, "logps/rejected": -159.909423828125, "loss": 0.1593, "rewards/accuracies": 0.9375, "rewards/chosen": 2.6553921699523926, "rewards/margins": 6.512484073638916, "rewards/rejected": -3.8570921421051025, "step": 508 }, { "epoch": 0.28, "learning_rate": 9.679909040152482e-07, "logits/chosen": -6.152879238128662, "logits/rejected": -6.093077659606934, "logps/chosen": -266.1920166015625, "logps/rejected": -186.870361328125, "loss": 0.0853, "rewards/accuracies": 0.9375, "rewards/chosen": 3.951385021209717, "rewards/margins": 7.101144790649414, "rewards/rejected": -3.1497602462768555, "step": 509 }, { "epoch": 0.28, "learning_rate": 9.678323991840469e-07, "logits/chosen": -6.193437099456787, "logits/rejected": -6.139285087585449, "logps/chosen": -166.506103515625, "logps/rejected": -194.10781860351562, "loss": 0.1484, "rewards/accuracies": 0.8125, "rewards/chosen": 1.6817612648010254, "rewards/margins": 5.146651268005371, "rewards/rejected": -3.4648895263671875, "step": 510 }, { "epoch": 0.28, "learning_rate": 9.676735159165e-07, "logits/chosen": -6.095390319824219, "logits/rejected": -6.149693489074707, "logps/chosen": -277.25830078125, "logps/rejected": -255.19984436035156, "loss": 0.1745, "rewards/accuracies": 1.0, "rewards/chosen": 2.8043525218963623, "rewards/margins": 7.169757843017578, "rewards/rejected": -4.365405082702637, "step": 511 }, { "epoch": 0.28, "learning_rate": 9.675142543411308e-07, "logits/chosen": -6.167036056518555, "logits/rejected": -6.194899082183838, "logps/chosen": -256.0096740722656, "logps/rejected": -145.68492126464844, "loss": 0.1238, "rewards/accuracies": 1.0, "rewards/chosen": 3.987565279006958, "rewards/margins": 7.8329057693481445, "rewards/rejected": -3.8453407287597656, "step": 512 }, { "epoch": 0.28, "learning_rate": 9.673546145867676e-07, "logits/chosen": -6.15823221206665, "logits/rejected": -6.2512431144714355, "logps/chosen": -229.72122192382812, "logps/rejected": -227.36587524414062, "loss": 0.0654, "rewards/accuracies": 1.0, "rewards/chosen": 3.0440099239349365, "rewards/margins": 7.073368072509766, "rewards/rejected": -4.029358386993408, "step": 513 }, { "epoch": 0.29, "learning_rate": 9.671945967825461e-07, "logits/chosen": -6.288339614868164, "logits/rejected": -6.08655309677124, "logps/chosen": -280.424560546875, "logps/rejected": -153.46224975585938, "loss": 0.1224, "rewards/accuracies": 0.8125, "rewards/chosen": 3.4290337562561035, "rewards/margins": 6.496003150939941, "rewards/rejected": -3.066969394683838, "step": 514 }, { "epoch": 0.29, "learning_rate": 9.670342010579064e-07, "logits/chosen": -6.144603252410889, "logits/rejected": -6.200301647186279, "logps/chosen": -321.0738830566406, "logps/rejected": -312.7076110839844, "loss": 0.1591, "rewards/accuracies": 1.0, "rewards/chosen": 4.221352577209473, "rewards/margins": 7.363331317901611, "rewards/rejected": -3.1419789791107178, "step": 515 }, { "epoch": 0.29, "learning_rate": 9.668734275425954e-07, "logits/chosen": -6.123932361602783, "logits/rejected": -6.109036445617676, "logps/chosen": -314.3672180175781, "logps/rejected": -209.3293914794922, "loss": 0.0806, "rewards/accuracies": 0.8125, "rewards/chosen": 2.563575506210327, "rewards/margins": 5.445080757141113, "rewards/rejected": -2.881505250930786, "step": 516 }, { "epoch": 0.29, "learning_rate": 9.667122763666647e-07, "logits/chosen": -6.16413688659668, "logits/rejected": -6.129265308380127, "logps/chosen": -264.31011962890625, "logps/rejected": -210.07273864746094, "loss": 0.0891, "rewards/accuracies": 1.0, "rewards/chosen": 2.485328197479248, "rewards/margins": 7.4106245040893555, "rewards/rejected": -4.925295829772949, "step": 517 }, { "epoch": 0.29, "learning_rate": 9.66550747660472e-07, "logits/chosen": -6.179217338562012, "logits/rejected": -6.149456024169922, "logps/chosen": -261.366943359375, "logps/rejected": -237.65130615234375, "loss": 0.1105, "rewards/accuracies": 1.0, "rewards/chosen": 2.9126009941101074, "rewards/margins": 8.06972885131836, "rewards/rejected": -5.15712833404541, "step": 518 }, { "epoch": 0.29, "learning_rate": 9.6638884155468e-07, "logits/chosen": -6.161569595336914, "logits/rejected": -6.1253557205200195, "logps/chosen": -289.39312744140625, "logps/rejected": -161.32777404785156, "loss": 0.1592, "rewards/accuracies": 0.9375, "rewards/chosen": 3.644599199295044, "rewards/margins": 6.607910633087158, "rewards/rejected": -2.963311195373535, "step": 519 }, { "epoch": 0.29, "learning_rate": 9.66226558180257e-07, "logits/chosen": -6.140803813934326, "logits/rejected": -6.114655494689941, "logps/chosen": -228.26107788085938, "logps/rejected": -130.31307983398438, "loss": 0.2059, "rewards/accuracies": 0.75, "rewards/chosen": 1.9155288934707642, "rewards/margins": 5.152224540710449, "rewards/rejected": -3.2366952896118164, "step": 520 }, { "epoch": 0.29, "learning_rate": 9.660638976684765e-07, "logits/chosen": -6.217998027801514, "logits/rejected": -6.166038513183594, "logps/chosen": -272.6920166015625, "logps/rejected": -172.27223205566406, "loss": 0.0943, "rewards/accuracies": 1.0, "rewards/chosen": 3.951448440551758, "rewards/margins": 6.9265899658203125, "rewards/rejected": -2.9751415252685547, "step": 521 }, { "epoch": 0.29, "learning_rate": 9.659008601509166e-07, "logits/chosen": -6.079833984375, "logits/rejected": -6.13705587387085, "logps/chosen": -188.66494750976562, "logps/rejected": -279.3419494628906, "loss": 0.0808, "rewards/accuracies": 0.9375, "rewards/chosen": 2.6239185333251953, "rewards/margins": 7.032986164093018, "rewards/rejected": -4.409067630767822, "step": 522 }, { "epoch": 0.29, "learning_rate": 9.65737445759461e-07, "logits/chosen": -6.193273544311523, "logits/rejected": -6.368896484375, "logps/chosen": -298.4354248046875, "logps/rejected": -283.71099853515625, "loss": 0.1342, "rewards/accuracies": 0.9375, "rewards/chosen": 3.709573745727539, "rewards/margins": 6.568884372711182, "rewards/rejected": -2.859311103820801, "step": 523 }, { "epoch": 0.29, "learning_rate": 9.65573654626298e-07, "logits/chosen": -6.204023838043213, "logits/rejected": -6.117292881011963, "logps/chosen": -217.39044189453125, "logps/rejected": -172.965576171875, "loss": 0.1136, "rewards/accuracies": 1.0, "rewards/chosen": 1.1731561422348022, "rewards/margins": 5.9189043045043945, "rewards/rejected": -4.745748043060303, "step": 524 }, { "epoch": 0.29, "learning_rate": 9.6540948688392e-07, "logits/chosen": -6.197424411773682, "logits/rejected": -6.087774276733398, "logps/chosen": -191.9601593017578, "logps/rejected": -133.59854125976562, "loss": 0.1092, "rewards/accuracies": 1.0, "rewards/chosen": 1.788454294204712, "rewards/margins": 5.856534004211426, "rewards/rejected": -4.068078994750977, "step": 525 }, { "epoch": 0.29, "learning_rate": 9.652449426651253e-07, "logits/chosen": -6.115753173828125, "logits/rejected": -6.103476047515869, "logps/chosen": -206.47921752929688, "logps/rejected": -212.80801391601562, "loss": 0.1331, "rewards/accuracies": 1.0, "rewards/chosen": 2.8373308181762695, "rewards/margins": 7.762137413024902, "rewards/rejected": -4.924807071685791, "step": 526 }, { "epoch": 0.29, "learning_rate": 9.650800221030158e-07, "logits/chosen": -6.1698479652404785, "logits/rejected": -6.124533176422119, "logps/chosen": -306.8234558105469, "logps/rejected": -188.43890380859375, "loss": 0.1341, "rewards/accuracies": 1.0, "rewards/chosen": 4.355559349060059, "rewards/margins": 8.507676124572754, "rewards/rejected": -4.152116298675537, "step": 527 }, { "epoch": 0.29, "learning_rate": 9.649147253309983e-07, "logits/chosen": -6.113718509674072, "logits/rejected": -6.209031105041504, "logps/chosen": -239.9146728515625, "logps/rejected": -205.63461303710938, "loss": 0.149, "rewards/accuracies": 0.875, "rewards/chosen": 2.6100926399230957, "rewards/margins": 5.550892353057861, "rewards/rejected": -2.9407997131347656, "step": 528 }, { "epoch": 0.29, "learning_rate": 9.647490524827833e-07, "logits/chosen": -6.148141860961914, "logits/rejected": -6.141280651092529, "logps/chosen": -192.80343627929688, "logps/rejected": -222.6265106201172, "loss": 0.1037, "rewards/accuracies": 0.9375, "rewards/chosen": 1.282414436340332, "rewards/margins": 6.887872695922852, "rewards/rejected": -5.605457782745361, "step": 529 }, { "epoch": 0.29, "learning_rate": 9.645830036923864e-07, "logits/chosen": -6.082411766052246, "logits/rejected": -6.127699375152588, "logps/chosen": -248.708984375, "logps/rejected": -150.51068115234375, "loss": 0.194, "rewards/accuracies": 0.9375, "rewards/chosen": 2.2859277725219727, "rewards/margins": 5.357077598571777, "rewards/rejected": -3.0711498260498047, "step": 530 }, { "epoch": 0.29, "learning_rate": 9.644165790941267e-07, "logits/chosen": -6.1020636558532715, "logits/rejected": -6.128826141357422, "logps/chosen": -239.81927490234375, "logps/rejected": -190.54681396484375, "loss": 0.1429, "rewards/accuracies": 0.875, "rewards/chosen": 2.6107449531555176, "rewards/margins": 5.852334499359131, "rewards/rejected": -3.2415897846221924, "step": 531 }, { "epoch": 0.3, "learning_rate": 9.642497788226273e-07, "logits/chosen": -6.290470600128174, "logits/rejected": -6.171840667724609, "logps/chosen": -286.86309814453125, "logps/rejected": -160.65318298339844, "loss": 0.0992, "rewards/accuracies": 1.0, "rewards/chosen": 3.789902448654175, "rewards/margins": 7.194936752319336, "rewards/rejected": -3.405033588409424, "step": 532 }, { "epoch": 0.3, "learning_rate": 9.640826030128157e-07, "logits/chosen": -6.247890949249268, "logits/rejected": -6.1813130378723145, "logps/chosen": -278.84173583984375, "logps/rejected": -142.28628540039062, "loss": 0.2256, "rewards/accuracies": 0.875, "rewards/chosen": 2.518467426300049, "rewards/margins": 7.045354843139648, "rewards/rejected": -4.526887893676758, "step": 533 }, { "epoch": 0.3, "learning_rate": 9.639150517999223e-07, "logits/chosen": -6.091325759887695, "logits/rejected": -6.085665702819824, "logps/chosen": -391.4510803222656, "logps/rejected": -238.70556640625, "loss": 0.1277, "rewards/accuracies": 1.0, "rewards/chosen": 2.3081750869750977, "rewards/margins": 6.935734748840332, "rewards/rejected": -4.627559661865234, "step": 534 }, { "epoch": 0.3, "learning_rate": 9.637471253194821e-07, "logits/chosen": -6.20977783203125, "logits/rejected": -6.09962797164917, "logps/chosen": -275.673583984375, "logps/rejected": -181.5064239501953, "loss": 0.2069, "rewards/accuracies": 0.9375, "rewards/chosen": 2.627016305923462, "rewards/margins": 6.952593803405762, "rewards/rejected": -4.325577735900879, "step": 535 }, { "epoch": 0.3, "learning_rate": 9.635788237073332e-07, "logits/chosen": -6.0807881355285645, "logits/rejected": -6.169670104980469, "logps/chosen": -225.4248046875, "logps/rejected": -175.2964630126953, "loss": 0.1234, "rewards/accuracies": 1.0, "rewards/chosen": 4.442472457885742, "rewards/margins": 7.038171768188477, "rewards/rejected": -2.5956990718841553, "step": 536 }, { "epoch": 0.3, "learning_rate": 9.63410147099617e-07, "logits/chosen": -6.034350872039795, "logits/rejected": -6.1868085861206055, "logps/chosen": -298.09423828125, "logps/rejected": -351.814208984375, "loss": 0.114, "rewards/accuracies": 1.0, "rewards/chosen": 2.2574167251586914, "rewards/margins": 8.66075325012207, "rewards/rejected": -6.403336524963379, "step": 537 }, { "epoch": 0.3, "learning_rate": 9.632410956327785e-07, "logits/chosen": -6.050400257110596, "logits/rejected": -6.073800563812256, "logps/chosen": -283.498779296875, "logps/rejected": -329.3812255859375, "loss": 0.1686, "rewards/accuracies": 0.9375, "rewards/chosen": 1.5253207683563232, "rewards/margins": 5.52132511138916, "rewards/rejected": -3.996004581451416, "step": 538 }, { "epoch": 0.3, "learning_rate": 9.63071669443566e-07, "logits/chosen": -6.151094436645508, "logits/rejected": -6.150213241577148, "logps/chosen": -215.62741088867188, "logps/rejected": -244.054931640625, "loss": 0.1142, "rewards/accuracies": 0.9375, "rewards/chosen": 1.6646010875701904, "rewards/margins": 7.308534622192383, "rewards/rejected": -5.64393424987793, "step": 539 }, { "epoch": 0.3, "learning_rate": 9.629018686690304e-07, "logits/chosen": -6.219801902770996, "logits/rejected": -6.222918510437012, "logps/chosen": -196.09913635253906, "logps/rejected": -228.72674560546875, "loss": 0.0794, "rewards/accuracies": 1.0, "rewards/chosen": 2.0557851791381836, "rewards/margins": 6.451139450073242, "rewards/rejected": -4.395354747772217, "step": 540 }, { "epoch": 0.3, "learning_rate": 9.627316934465263e-07, "logits/chosen": -6.121006488800049, "logits/rejected": -6.168787956237793, "logps/chosen": -217.71630859375, "logps/rejected": -146.42855834960938, "loss": 0.1434, "rewards/accuracies": 0.875, "rewards/chosen": 2.122831344604492, "rewards/margins": 7.022008419036865, "rewards/rejected": -4.899176597595215, "step": 541 }, { "epoch": 0.3, "learning_rate": 9.62561143913711e-07, "logits/chosen": -6.118625640869141, "logits/rejected": -6.163582801818848, "logps/chosen": -310.63275146484375, "logps/rejected": -260.4857482910156, "loss": 0.1029, "rewards/accuracies": 0.9375, "rewards/chosen": 4.636989593505859, "rewards/margins": 8.097419738769531, "rewards/rejected": -3.4604291915893555, "step": 542 }, { "epoch": 0.3, "learning_rate": 9.623902202085444e-07, "logits/chosen": -6.138749122619629, "logits/rejected": -6.16074275970459, "logps/chosen": -409.34759521484375, "logps/rejected": -308.61859130859375, "loss": 0.138, "rewards/accuracies": 0.9375, "rewards/chosen": 2.7516229152679443, "rewards/margins": 7.752129554748535, "rewards/rejected": -5.00050687789917, "step": 543 }, { "epoch": 0.3, "learning_rate": 9.622189224692888e-07, "logits/chosen": -6.14600944519043, "logits/rejected": -6.138145446777344, "logps/chosen": -283.41510009765625, "logps/rejected": -108.485595703125, "loss": 0.1133, "rewards/accuracies": 0.9375, "rewards/chosen": 4.118766784667969, "rewards/margins": 6.263320446014404, "rewards/rejected": -2.1445541381835938, "step": 544 }, { "epoch": 0.3, "learning_rate": 9.620472508345095e-07, "logits/chosen": -6.170074462890625, "logits/rejected": -6.117996692657471, "logps/chosen": -175.06678771972656, "logps/rejected": -128.82305908203125, "loss": 0.0781, "rewards/accuracies": 0.9375, "rewards/chosen": 2.166503429412842, "rewards/margins": 7.141688346862793, "rewards/rejected": -4.975185394287109, "step": 545 }, { "epoch": 0.3, "learning_rate": 9.618752054430745e-07, "logits/chosen": -6.212990760803223, "logits/rejected": -6.190824508666992, "logps/chosen": -283.7847900390625, "logps/rejected": -166.33729553222656, "loss": 0.143, "rewards/accuracies": 1.0, "rewards/chosen": 3.21022629737854, "rewards/margins": 6.128647804260254, "rewards/rejected": -2.918421745300293, "step": 546 }, { "epoch": 0.3, "learning_rate": 9.617027864341535e-07, "logits/chosen": -6.0666399002075195, "logits/rejected": -6.090616226196289, "logps/chosen": -278.0721435546875, "logps/rejected": -151.29115295410156, "loss": 0.1027, "rewards/accuracies": 1.0, "rewards/chosen": 5.394173622131348, "rewards/margins": 7.270061492919922, "rewards/rejected": -1.8758879899978638, "step": 547 }, { "epoch": 0.3, "learning_rate": 9.615299939472188e-07, "logits/chosen": -6.084729194641113, "logits/rejected": -6.064918518066406, "logps/chosen": -308.5484619140625, "logps/rejected": -137.26318359375, "loss": 0.139, "rewards/accuracies": 1.0, "rewards/chosen": 4.940703868865967, "rewards/margins": 7.032670021057129, "rewards/rejected": -2.091965913772583, "step": 548 }, { "epoch": 0.3, "learning_rate": 9.613568281220449e-07, "logits/chosen": -6.118406772613525, "logits/rejected": -6.068952560424805, "logps/chosen": -149.8465576171875, "logps/rejected": -174.6525115966797, "loss": 0.0976, "rewards/accuracies": 1.0, "rewards/chosen": 1.062377691268921, "rewards/margins": 5.441269874572754, "rewards/rejected": -4.378891468048096, "step": 549 }, { "epoch": 0.31, "learning_rate": 9.611832890987076e-07, "logits/chosen": -6.070160865783691, "logits/rejected": -6.112170696258545, "logps/chosen": -191.40000915527344, "logps/rejected": -236.89361572265625, "loss": 0.0716, "rewards/accuracies": 1.0, "rewards/chosen": 2.2866296768188477, "rewards/margins": 7.070341110229492, "rewards/rejected": -4.7837114334106445, "step": 550 }, { "epoch": 0.31, "learning_rate": 9.610093770175855e-07, "logits/chosen": -6.2315874099731445, "logits/rejected": -6.22998046875, "logps/chosen": -167.81976318359375, "logps/rejected": -175.28225708007812, "loss": 0.1635, "rewards/accuracies": 1.0, "rewards/chosen": 1.9677352905273438, "rewards/margins": 6.988535404205322, "rewards/rejected": -5.02079963684082, "step": 551 }, { "epoch": 0.31, "learning_rate": 9.608350920193587e-07, "logits/chosen": -6.219935417175293, "logits/rejected": -6.052756309509277, "logps/chosen": -271.42388916015625, "logps/rejected": -141.3201904296875, "loss": 0.1082, "rewards/accuracies": 0.9375, "rewards/chosen": 2.811133623123169, "rewards/margins": 6.087221145629883, "rewards/rejected": -3.2760872840881348, "step": 552 }, { "epoch": 0.31, "learning_rate": 9.606604342450083e-07, "logits/chosen": -6.121737003326416, "logits/rejected": -6.0914387702941895, "logps/chosen": -251.20518493652344, "logps/rejected": -186.66188049316406, "loss": 0.0791, "rewards/accuracies": 1.0, "rewards/chosen": 2.967036485671997, "rewards/margins": 6.785343170166016, "rewards/rejected": -3.8183064460754395, "step": 553 }, { "epoch": 0.31, "learning_rate": 9.604854038358183e-07, "logits/chosen": -6.12995719909668, "logits/rejected": -6.188076972961426, "logps/chosen": -240.97593688964844, "logps/rejected": -137.29376220703125, "loss": 0.1176, "rewards/accuracies": 0.9375, "rewards/chosen": 3.8201231956481934, "rewards/margins": 7.629640579223633, "rewards/rejected": -3.8095171451568604, "step": 554 }, { "epoch": 0.31, "learning_rate": 9.603100009333725e-07, "logits/chosen": -6.128851890563965, "logits/rejected": -6.131591796875, "logps/chosen": -320.8419494628906, "logps/rejected": -244.07369995117188, "loss": 0.1057, "rewards/accuracies": 1.0, "rewards/chosen": 3.5153822898864746, "rewards/margins": 7.557437419891357, "rewards/rejected": -4.042055130004883, "step": 555 }, { "epoch": 0.31, "learning_rate": 9.601342256795573e-07, "logits/chosen": -6.188492298126221, "logits/rejected": -6.122400760650635, "logps/chosen": -215.8286590576172, "logps/rejected": -127.51449584960938, "loss": 0.2076, "rewards/accuracies": 1.0, "rewards/chosen": 3.063263416290283, "rewards/margins": 6.683736324310303, "rewards/rejected": -3.6204729080200195, "step": 556 }, { "epoch": 0.31, "learning_rate": 9.599580782165597e-07, "logits/chosen": -6.156156063079834, "logits/rejected": -6.173383712768555, "logps/chosen": -277.8128662109375, "logps/rejected": -245.02069091796875, "loss": 0.0802, "rewards/accuracies": 1.0, "rewards/chosen": 4.666019439697266, "rewards/margins": 7.83255672454834, "rewards/rejected": -3.166536808013916, "step": 557 }, { "epoch": 0.31, "learning_rate": 9.597815586868678e-07, "logits/chosen": -6.163208961486816, "logits/rejected": -6.290266990661621, "logps/chosen": -331.21929931640625, "logps/rejected": -204.19236755371094, "loss": 0.1439, "rewards/accuracies": 1.0, "rewards/chosen": 4.8484625816345215, "rewards/margins": 8.00944995880127, "rewards/rejected": -3.160987377166748, "step": 558 }, { "epoch": 0.31, "learning_rate": 9.59604667233271e-07, "logits/chosen": -6.115029811859131, "logits/rejected": -6.124709606170654, "logps/chosen": -292.560302734375, "logps/rejected": -248.8638458251953, "loss": 0.1611, "rewards/accuracies": 0.8125, "rewards/chosen": 2.6987996101379395, "rewards/margins": 5.13929557800293, "rewards/rejected": -2.4404959678649902, "step": 559 }, { "epoch": 0.31, "learning_rate": 9.594274039988592e-07, "logits/chosen": -6.334722995758057, "logits/rejected": -6.09508752822876, "logps/chosen": -310.6607360839844, "logps/rejected": -132.16725158691406, "loss": 0.1216, "rewards/accuracies": 1.0, "rewards/chosen": 3.9279465675354004, "rewards/margins": 7.630026340484619, "rewards/rejected": -3.702080249786377, "step": 560 }, { "epoch": 0.31, "learning_rate": 9.592497691270233e-07, "logits/chosen": -6.261887550354004, "logits/rejected": -6.246855735778809, "logps/chosen": -262.9445495605469, "logps/rejected": -271.7788391113281, "loss": 0.1144, "rewards/accuracies": 1.0, "rewards/chosen": 3.248724937438965, "rewards/margins": 8.013134002685547, "rewards/rejected": -4.764409065246582, "step": 561 }, { "epoch": 0.31, "learning_rate": 9.590717627614546e-07, "logits/chosen": -6.190260410308838, "logits/rejected": -6.062605381011963, "logps/chosen": -314.1452331542969, "logps/rejected": -100.82379150390625, "loss": 0.1535, "rewards/accuracies": 0.9375, "rewards/chosen": 3.6622955799102783, "rewards/margins": 6.870562553405762, "rewards/rejected": -3.2082667350769043, "step": 562 }, { "epoch": 0.31, "learning_rate": 9.588933850461447e-07, "logits/chosen": -6.1308207511901855, "logits/rejected": -6.136370658874512, "logps/chosen": -253.634033203125, "logps/rejected": -193.26292419433594, "loss": 0.1134, "rewards/accuracies": 1.0, "rewards/chosen": 3.3854422569274902, "rewards/margins": 6.866676330566406, "rewards/rejected": -3.481234073638916, "step": 563 }, { "epoch": 0.31, "learning_rate": 9.587146361253867e-07, "logits/chosen": -6.192271709442139, "logits/rejected": -6.138386249542236, "logps/chosen": -195.46847534179688, "logps/rejected": -157.23941040039062, "loss": 0.0943, "rewards/accuracies": 1.0, "rewards/chosen": 2.914928674697876, "rewards/margins": 6.677742958068848, "rewards/rejected": -3.762814521789551, "step": 564 }, { "epoch": 0.31, "learning_rate": 9.585355161437723e-07, "logits/chosen": -6.245938301086426, "logits/rejected": -6.122579097747803, "logps/chosen": -246.79156494140625, "logps/rejected": -150.4551544189453, "loss": 0.0586, "rewards/accuracies": 0.9375, "rewards/chosen": 3.2915148735046387, "rewards/margins": 8.256811141967773, "rewards/rejected": -4.965296268463135, "step": 565 }, { "epoch": 0.31, "learning_rate": 9.583560252461946e-07, "logits/chosen": -6.122226238250732, "logits/rejected": -6.181131362915039, "logps/chosen": -203.8675537109375, "logps/rejected": -198.1805419921875, "loss": 0.1383, "rewards/accuracies": 1.0, "rewards/chosen": 1.796614646911621, "rewards/margins": 5.816368579864502, "rewards/rejected": -4.019753932952881, "step": 566 }, { "epoch": 0.31, "learning_rate": 9.581761635778465e-07, "logits/chosen": -6.1775383949279785, "logits/rejected": -6.157805442810059, "logps/chosen": -429.6580810546875, "logps/rejected": -201.10113525390625, "loss": 0.1926, "rewards/accuracies": 0.9375, "rewards/chosen": 4.70403528213501, "rewards/margins": 8.130621910095215, "rewards/rejected": -3.426586151123047, "step": 567 }, { "epoch": 0.32, "learning_rate": 9.579959312842206e-07, "logits/chosen": -6.166776657104492, "logits/rejected": -6.320647239685059, "logps/chosen": -282.4844970703125, "logps/rejected": -262.7876281738281, "loss": 0.1561, "rewards/accuracies": 1.0, "rewards/chosen": 3.958009958267212, "rewards/margins": 7.536346435546875, "rewards/rejected": -3.578336238861084, "step": 568 }, { "epoch": 0.32, "learning_rate": 9.578153285111093e-07, "logits/chosen": -6.1039557456970215, "logits/rejected": -6.080148220062256, "logps/chosen": -211.20687866210938, "logps/rejected": -161.578369140625, "loss": 0.1118, "rewards/accuracies": 0.9375, "rewards/chosen": 1.84993314743042, "rewards/margins": 4.173891544342041, "rewards/rejected": -2.323958158493042, "step": 569 }, { "epoch": 0.32, "learning_rate": 9.576343554046047e-07, "logits/chosen": -6.136208534240723, "logits/rejected": -6.227239608764648, "logps/chosen": -198.65533447265625, "logps/rejected": -254.16712951660156, "loss": 0.113, "rewards/accuracies": 1.0, "rewards/chosen": 2.2099287509918213, "rewards/margins": 6.747272968292236, "rewards/rejected": -4.537344455718994, "step": 570 }, { "epoch": 0.32, "learning_rate": 9.57453012111099e-07, "logits/chosen": -6.108669281005859, "logits/rejected": -6.198397159576416, "logps/chosen": -265.974853515625, "logps/rejected": -135.96609497070312, "loss": 0.0911, "rewards/accuracies": 0.9375, "rewards/chosen": 4.259000778198242, "rewards/margins": 7.723726749420166, "rewards/rejected": -3.464725971221924, "step": 571 }, { "epoch": 0.32, "learning_rate": 9.572712987772825e-07, "logits/chosen": -6.090296745300293, "logits/rejected": -6.245205402374268, "logps/chosen": -392.447509765625, "logps/rejected": -216.49252319335938, "loss": 0.1269, "rewards/accuracies": 1.0, "rewards/chosen": 4.919370651245117, "rewards/margins": 7.704433441162109, "rewards/rejected": -2.785062551498413, "step": 572 }, { "epoch": 0.32, "learning_rate": 9.570892155501467e-07, "logits/chosen": -6.107841968536377, "logits/rejected": -6.215850830078125, "logps/chosen": -265.84600830078125, "logps/rejected": -319.2273864746094, "loss": 0.0869, "rewards/accuracies": 1.0, "rewards/chosen": 2.8501381874084473, "rewards/margins": 9.006841659545898, "rewards/rejected": -6.156702995300293, "step": 573 }, { "epoch": 0.32, "learning_rate": 9.569067625769807e-07, "logits/chosen": -6.165061950683594, "logits/rejected": -6.226284027099609, "logps/chosen": -259.63531494140625, "logps/rejected": -285.9486999511719, "loss": 0.069, "rewards/accuracies": 0.9375, "rewards/chosen": 3.2873940467834473, "rewards/margins": 9.650806427001953, "rewards/rejected": -6.363411903381348, "step": 574 }, { "epoch": 0.32, "learning_rate": 9.567239400053737e-07, "logits/chosen": -6.105330467224121, "logits/rejected": -6.148839950561523, "logps/chosen": -265.652587890625, "logps/rejected": -229.7642364501953, "loss": 0.0721, "rewards/accuracies": 0.9375, "rewards/chosen": 3.800694227218628, "rewards/margins": 7.694418907165527, "rewards/rejected": -3.8937244415283203, "step": 575 }, { "epoch": 0.32, "learning_rate": 9.565407479832134e-07, "logits/chosen": -6.262153625488281, "logits/rejected": -6.13336706161499, "logps/chosen": -208.92864990234375, "logps/rejected": -119.82474517822266, "loss": 0.1437, "rewards/accuracies": 1.0, "rewards/chosen": 2.5904033184051514, "rewards/margins": 6.46002721786499, "rewards/rejected": -3.869624137878418, "step": 576 }, { "epoch": 0.32, "learning_rate": 9.56357186658686e-07, "logits/chosen": -6.080566883087158, "logits/rejected": -6.186698913574219, "logps/chosen": -283.30059814453125, "logps/rejected": -279.10791015625, "loss": 0.0818, "rewards/accuracies": 0.9375, "rewards/chosen": 3.4498510360717773, "rewards/margins": 7.268438339233398, "rewards/rejected": -3.818587303161621, "step": 577 }, { "epoch": 0.32, "learning_rate": 9.561732561802779e-07, "logits/chosen": -6.172302722930908, "logits/rejected": -6.1559157371521, "logps/chosen": -199.8704833984375, "logps/rejected": -300.7607421875, "loss": 0.17, "rewards/accuracies": 0.9375, "rewards/chosen": 1.4945039749145508, "rewards/margins": 5.133358001708984, "rewards/rejected": -3.6388542652130127, "step": 578 }, { "epoch": 0.32, "learning_rate": 9.55988956696772e-07, "logits/chosen": -6.1113080978393555, "logits/rejected": -6.122375011444092, "logps/chosen": -180.7142333984375, "logps/rejected": -235.10931396484375, "loss": 0.1393, "rewards/accuracies": 1.0, "rewards/chosen": 1.8324743509292603, "rewards/margins": 7.886664390563965, "rewards/rejected": -6.054190158843994, "step": 579 }, { "epoch": 0.32, "learning_rate": 9.558042883572515e-07, "logits/chosen": -6.123084545135498, "logits/rejected": -6.134700775146484, "logps/chosen": -289.46527099609375, "logps/rejected": -277.4513854980469, "loss": 0.1762, "rewards/accuracies": 0.9375, "rewards/chosen": 2.6896321773529053, "rewards/margins": 6.808422565460205, "rewards/rejected": -4.118790149688721, "step": 580 }, { "epoch": 0.32, "learning_rate": 9.556192513110973e-07, "logits/chosen": -6.029311180114746, "logits/rejected": -6.140995025634766, "logps/chosen": -221.06622314453125, "logps/rejected": -251.2128143310547, "loss": 0.1169, "rewards/accuracies": 0.875, "rewards/chosen": 1.1682369709014893, "rewards/margins": 4.689662933349609, "rewards/rejected": -3.52142596244812, "step": 581 }, { "epoch": 0.32, "learning_rate": 9.554338457079879e-07, "logits/chosen": -6.151986122131348, "logits/rejected": -6.120706081390381, "logps/chosen": -218.36676025390625, "logps/rejected": -194.06871032714844, "loss": 0.1281, "rewards/accuracies": 1.0, "rewards/chosen": 1.9853336811065674, "rewards/margins": 7.275458335876465, "rewards/rejected": -5.290124416351318, "step": 582 }, { "epoch": 0.32, "learning_rate": 9.552480716979011e-07, "logits/chosen": -6.1246843338012695, "logits/rejected": -6.222963333129883, "logps/chosen": -264.27923583984375, "logps/rejected": -242.24386596679688, "loss": 0.1149, "rewards/accuracies": 1.0, "rewards/chosen": 3.528547525405884, "rewards/margins": 8.217379570007324, "rewards/rejected": -4.688831806182861, "step": 583 }, { "epoch": 0.32, "learning_rate": 9.55061929431112e-07, "logits/chosen": -6.130334854125977, "logits/rejected": -6.114960193634033, "logps/chosen": -238.23101806640625, "logps/rejected": -213.35873413085938, "loss": 0.1125, "rewards/accuracies": 0.9375, "rewards/chosen": 3.283118724822998, "rewards/margins": 8.238401412963867, "rewards/rejected": -4.955282688140869, "step": 584 }, { "epoch": 0.32, "learning_rate": 9.548754190581937e-07, "logits/chosen": -6.067286014556885, "logits/rejected": -6.075590133666992, "logps/chosen": -287.83477783203125, "logps/rejected": -203.43417358398438, "loss": 0.1069, "rewards/accuracies": 0.875, "rewards/chosen": 2.6773314476013184, "rewards/margins": 6.434047222137451, "rewards/rejected": -3.7567155361175537, "step": 585 }, { "epoch": 0.33, "learning_rate": 9.546885407300175e-07, "logits/chosen": -6.082038879394531, "logits/rejected": -6.061506748199463, "logps/chosen": -266.072509765625, "logps/rejected": -172.41799926757812, "loss": 0.14, "rewards/accuracies": 0.9375, "rewards/chosen": 1.7736270427703857, "rewards/margins": 5.661879539489746, "rewards/rejected": -3.8882527351379395, "step": 586 }, { "epoch": 0.33, "learning_rate": 9.545012945977513e-07, "logits/chosen": -6.150752544403076, "logits/rejected": -6.1425981521606445, "logps/chosen": -234.52317810058594, "logps/rejected": -189.80491638183594, "loss": 0.1033, "rewards/accuracies": 0.9375, "rewards/chosen": 1.7883884906768799, "rewards/margins": 5.253413200378418, "rewards/rejected": -3.465024709701538, "step": 587 }, { "epoch": 0.33, "learning_rate": 9.543136808128617e-07, "logits/chosen": -6.264730453491211, "logits/rejected": -6.195131301879883, "logps/chosen": -220.66644287109375, "logps/rejected": -192.25103759765625, "loss": 0.0992, "rewards/accuracies": 0.9375, "rewards/chosen": 1.4200855493545532, "rewards/margins": 6.605283260345459, "rewards/rejected": -5.185197830200195, "step": 588 }, { "epoch": 0.33, "learning_rate": 9.54125699527112e-07, "logits/chosen": -6.219130992889404, "logits/rejected": -6.191728115081787, "logps/chosen": -238.71563720703125, "logps/rejected": -183.31979370117188, "loss": 0.0876, "rewards/accuracies": 1.0, "rewards/chosen": 2.211099147796631, "rewards/margins": 6.564305782318115, "rewards/rejected": -4.353207111358643, "step": 589 }, { "epoch": 0.33, "learning_rate": 9.539373508925631e-07, "logits/chosen": -6.161739826202393, "logits/rejected": -6.146148681640625, "logps/chosen": -253.2348175048828, "logps/rejected": -159.23643493652344, "loss": 0.1572, "rewards/accuracies": 1.0, "rewards/chosen": 3.052746295928955, "rewards/margins": 7.010488510131836, "rewards/rejected": -3.9577414989471436, "step": 590 }, { "epoch": 0.33, "learning_rate": 9.537486350615727e-07, "logits/chosen": -6.087927341461182, "logits/rejected": -6.162445545196533, "logps/chosen": -163.90530395507812, "logps/rejected": -193.23240661621094, "loss": 0.0829, "rewards/accuracies": 1.0, "rewards/chosen": 1.5483163595199585, "rewards/margins": 7.110222816467285, "rewards/rejected": -5.561906814575195, "step": 591 }, { "epoch": 0.33, "learning_rate": 9.535595521867958e-07, "logits/chosen": -6.205174446105957, "logits/rejected": -6.161969184875488, "logps/chosen": -253.82728576660156, "logps/rejected": -291.6375427246094, "loss": 0.1733, "rewards/accuracies": 0.9375, "rewards/chosen": 2.542609691619873, "rewards/margins": 7.799514293670654, "rewards/rejected": -5.256904602050781, "step": 592 }, { "epoch": 0.33, "learning_rate": 9.533701024211843e-07, "logits/chosen": -6.196863651275635, "logits/rejected": -6.263934135437012, "logps/chosen": -289.0281066894531, "logps/rejected": -193.81829833984375, "loss": 0.1057, "rewards/accuracies": 0.9375, "rewards/chosen": 1.9791617393493652, "rewards/margins": 8.591022491455078, "rewards/rejected": -6.611861228942871, "step": 593 }, { "epoch": 0.33, "learning_rate": 9.53180285917987e-07, "logits/chosen": -6.182864189147949, "logits/rejected": -6.168224811553955, "logps/chosen": -307.9858093261719, "logps/rejected": -213.48985290527344, "loss": 0.1273, "rewards/accuracies": 1.0, "rewards/chosen": 3.9307618141174316, "rewards/margins": 8.806412696838379, "rewards/rejected": -4.875650882720947, "step": 594 }, { "epoch": 0.33, "learning_rate": 9.529901028307488e-07, "logits/chosen": -6.192775726318359, "logits/rejected": -6.090634822845459, "logps/chosen": -273.4591064453125, "logps/rejected": -133.8290252685547, "loss": 0.1901, "rewards/accuracies": 0.8125, "rewards/chosen": 3.533958673477173, "rewards/margins": 5.644036293029785, "rewards/rejected": -2.1100778579711914, "step": 595 }, { "epoch": 0.33, "learning_rate": 9.527995533133117e-07, "logits/chosen": -6.018102645874023, "logits/rejected": -6.077920436859131, "logps/chosen": -484.60626220703125, "logps/rejected": -298.9256591796875, "loss": 0.1271, "rewards/accuracies": 0.875, "rewards/chosen": 3.713975191116333, "rewards/margins": 6.366461753845215, "rewards/rejected": -2.65248703956604, "step": 596 }, { "epoch": 0.33, "learning_rate": 9.526086375198143e-07, "logits/chosen": -6.156438827514648, "logits/rejected": -6.130906105041504, "logps/chosen": -438.70037841796875, "logps/rejected": -526.713134765625, "loss": 0.1372, "rewards/accuracies": 0.8125, "rewards/chosen": 1.6311671733856201, "rewards/margins": 5.154043197631836, "rewards/rejected": -3.522876024246216, "step": 597 }, { "epoch": 0.33, "learning_rate": 9.524173556046906e-07, "logits/chosen": -6.010011196136475, "logits/rejected": -6.061853408813477, "logps/chosen": -255.20346069335938, "logps/rejected": -148.90603637695312, "loss": 0.1658, "rewards/accuracies": 1.0, "rewards/chosen": 4.664862155914307, "rewards/margins": 7.94114875793457, "rewards/rejected": -3.2762861251831055, "step": 598 }, { "epoch": 0.33, "learning_rate": 9.522257077226716e-07, "logits/chosen": -6.089669227600098, "logits/rejected": -6.158654689788818, "logps/chosen": -233.57681274414062, "logps/rejected": -260.05889892578125, "loss": 0.1116, "rewards/accuracies": 1.0, "rewards/chosen": 2.341308116912842, "rewards/margins": 7.155489921569824, "rewards/rejected": -4.814181804656982, "step": 599 }, { "epoch": 0.33, "learning_rate": 9.520336940287839e-07, "logits/chosen": -6.07725191116333, "logits/rejected": -6.107265472412109, "logps/chosen": -363.82452392578125, "logps/rejected": -341.2762756347656, "loss": 0.2112, "rewards/accuracies": 0.875, "rewards/chosen": 3.893527030944824, "rewards/margins": 6.345994472503662, "rewards/rejected": -2.452467203140259, "step": 600 }, { "epoch": 0.33, "learning_rate": 9.518413146783501e-07, "logits/chosen": -6.174194812774658, "logits/rejected": -6.165878772735596, "logps/chosen": -277.5762634277344, "logps/rejected": -240.14535522460938, "loss": 0.1281, "rewards/accuracies": 1.0, "rewards/chosen": 2.341766357421875, "rewards/margins": 7.0370025634765625, "rewards/rejected": -4.6952362060546875, "step": 601 }, { "epoch": 0.33, "learning_rate": 9.516485698269888e-07, "logits/chosen": -6.081261157989502, "logits/rejected": -6.2004618644714355, "logps/chosen": -304.3279113769531, "logps/rejected": -274.8487548828125, "loss": 0.1347, "rewards/accuracies": 1.0, "rewards/chosen": 3.7030558586120605, "rewards/margins": 7.629045009613037, "rewards/rejected": -3.9259891510009766, "step": 602 }, { "epoch": 0.33, "learning_rate": 9.514554596306139e-07, "logits/chosen": -6.031647205352783, "logits/rejected": -6.021945476531982, "logps/chosen": -236.98985290527344, "logps/rejected": -370.85693359375, "loss": 0.1432, "rewards/accuracies": 1.0, "rewards/chosen": 3.7965853214263916, "rewards/margins": 10.486000061035156, "rewards/rejected": -6.689414024353027, "step": 603 }, { "epoch": 0.34, "learning_rate": 9.512619842454352e-07, "logits/chosen": -6.245304584503174, "logits/rejected": -6.244408130645752, "logps/chosen": -244.32339477539062, "logps/rejected": -252.16854858398438, "loss": 0.1052, "rewards/accuracies": 0.9375, "rewards/chosen": 2.4110922813415527, "rewards/margins": 7.920407295227051, "rewards/rejected": -5.509315013885498, "step": 604 }, { "epoch": 0.34, "learning_rate": 9.510681438279574e-07, "logits/chosen": -6.070958614349365, "logits/rejected": -6.048058986663818, "logps/chosen": -264.48138427734375, "logps/rejected": -237.13967895507812, "loss": 0.113, "rewards/accuracies": 1.0, "rewards/chosen": 3.2711586952209473, "rewards/margins": 7.853595733642578, "rewards/rejected": -4.582437038421631, "step": 605 }, { "epoch": 0.34, "learning_rate": 9.508739385349811e-07, "logits/chosen": -6.17681884765625, "logits/rejected": -6.225823879241943, "logps/chosen": -239.25692749023438, "logps/rejected": -200.8418426513672, "loss": 0.1568, "rewards/accuracies": 1.0, "rewards/chosen": 3.2905378341674805, "rewards/margins": 7.485922336578369, "rewards/rejected": -4.195384502410889, "step": 606 }, { "epoch": 0.34, "learning_rate": 9.506793685236016e-07, "logits/chosen": -6.110422611236572, "logits/rejected": -6.147623538970947, "logps/chosen": -208.51553344726562, "logps/rejected": -255.71043395996094, "loss": 0.1576, "rewards/accuracies": 0.9375, "rewards/chosen": 2.4243335723876953, "rewards/margins": 7.469099044799805, "rewards/rejected": -5.044765472412109, "step": 607 }, { "epoch": 0.34, "learning_rate": 9.504844339512094e-07, "logits/chosen": -6.049036026000977, "logits/rejected": -6.0867462158203125, "logps/chosen": -184.360107421875, "logps/rejected": -211.92874145507812, "loss": 0.1414, "rewards/accuracies": 1.0, "rewards/chosen": 2.3699159622192383, "rewards/margins": 8.025567054748535, "rewards/rejected": -5.655651092529297, "step": 608 }, { "epoch": 0.34, "learning_rate": 9.5028913497549e-07, "logits/chosen": -6.244020938873291, "logits/rejected": -6.153738498687744, "logps/chosen": -383.3640441894531, "logps/rejected": -409.512451171875, "loss": 0.1364, "rewards/accuracies": 0.9375, "rewards/chosen": 3.281303644180298, "rewards/margins": 6.601810455322266, "rewards/rejected": -3.320507049560547, "step": 609 }, { "epoch": 0.34, "learning_rate": 9.500934717544235e-07, "logits/chosen": -6.148037910461426, "logits/rejected": -6.200467109680176, "logps/chosen": -179.6498565673828, "logps/rejected": -269.857666015625, "loss": 0.1065, "rewards/accuracies": 0.875, "rewards/chosen": 1.4386214017868042, "rewards/margins": 7.566324234008789, "rewards/rejected": -6.127703666687012, "step": 610 }, { "epoch": 0.34, "learning_rate": 9.498974444462844e-07, "logits/chosen": -6.228267192840576, "logits/rejected": -6.211406707763672, "logps/chosen": -292.4180908203125, "logps/rejected": -162.7943115234375, "loss": 0.0756, "rewards/accuracies": 1.0, "rewards/chosen": 4.604039192199707, "rewards/margins": 7.8985137939453125, "rewards/rejected": -3.2944743633270264, "step": 611 }, { "epoch": 0.34, "learning_rate": 9.497010532096424e-07, "logits/chosen": -6.173196792602539, "logits/rejected": -6.210853099822998, "logps/chosen": -248.23419189453125, "logps/rejected": -259.43896484375, "loss": 0.1004, "rewards/accuracies": 0.9375, "rewards/chosen": 3.7475759983062744, "rewards/margins": 8.91260814666748, "rewards/rejected": -5.165032863616943, "step": 612 }, { "epoch": 0.34, "learning_rate": 9.49504298203361e-07, "logits/chosen": -6.17827844619751, "logits/rejected": -6.126678943634033, "logps/chosen": -293.95257568359375, "logps/rejected": -314.0119323730469, "loss": 0.149, "rewards/accuracies": 0.9375, "rewards/chosen": 4.046427249908447, "rewards/margins": 7.900274276733398, "rewards/rejected": -3.8538472652435303, "step": 613 }, { "epoch": 0.34, "learning_rate": 9.493071795865981e-07, "logits/chosen": -6.103337287902832, "logits/rejected": -6.058945655822754, "logps/chosen": -296.3387451171875, "logps/rejected": -161.22044372558594, "loss": 0.1688, "rewards/accuracies": 0.9375, "rewards/chosen": 3.8677802085876465, "rewards/margins": 7.200966835021973, "rewards/rejected": -3.3331871032714844, "step": 614 }, { "epoch": 0.34, "learning_rate": 9.49109697518806e-07, "logits/chosen": -6.129523754119873, "logits/rejected": -6.215043544769287, "logps/chosen": -335.00372314453125, "logps/rejected": -240.10708618164062, "loss": 0.1069, "rewards/accuracies": 1.0, "rewards/chosen": 3.061471462249756, "rewards/margins": 7.858141899108887, "rewards/rejected": -4.796670436859131, "step": 615 }, { "epoch": 0.34, "learning_rate": 9.489118521597304e-07, "logits/chosen": -6.229637145996094, "logits/rejected": -6.151392936706543, "logps/chosen": -286.01861572265625, "logps/rejected": -211.02001953125, "loss": 0.0707, "rewards/accuracies": 1.0, "rewards/chosen": 3.9168314933776855, "rewards/margins": 7.711551189422607, "rewards/rejected": -3.794719934463501, "step": 616 }, { "epoch": 0.34, "learning_rate": 9.487136436694117e-07, "logits/chosen": -6.120850086212158, "logits/rejected": -6.1545000076293945, "logps/chosen": -525.697265625, "logps/rejected": -327.25909423828125, "loss": 0.1415, "rewards/accuracies": 0.9375, "rewards/chosen": 2.613253116607666, "rewards/margins": 7.562692165374756, "rewards/rejected": -4.94943904876709, "step": 617 }, { "epoch": 0.34, "learning_rate": 9.485150722081833e-07, "logits/chosen": -6.194954872131348, "logits/rejected": -6.138664245605469, "logps/chosen": -311.74664306640625, "logps/rejected": -158.69668579101562, "loss": 0.1579, "rewards/accuracies": 1.0, "rewards/chosen": 4.590102195739746, "rewards/margins": 9.515308380126953, "rewards/rejected": -4.925207138061523, "step": 618 }, { "epoch": 0.34, "learning_rate": 9.483161379366725e-07, "logits/chosen": -6.162278175354004, "logits/rejected": -6.189266204833984, "logps/chosen": -218.71783447265625, "logps/rejected": -175.24154663085938, "loss": 0.1441, "rewards/accuracies": 1.0, "rewards/chosen": 2.6832473278045654, "rewards/margins": 7.6678032875061035, "rewards/rejected": -4.984556198120117, "step": 619 }, { "epoch": 0.34, "learning_rate": 9.481168410158002e-07, "logits/chosen": -6.19890022277832, "logits/rejected": -6.167575836181641, "logps/chosen": -260.1766662597656, "logps/rejected": -186.6630401611328, "loss": 0.1152, "rewards/accuracies": 0.9375, "rewards/chosen": 2.5936999320983887, "rewards/margins": 5.2335429191589355, "rewards/rejected": -2.639842987060547, "step": 620 }, { "epoch": 0.34, "learning_rate": 9.479171816067807e-07, "logits/chosen": -5.966275691986084, "logits/rejected": -6.082179069519043, "logps/chosen": -251.84286499023438, "logps/rejected": -280.7418212890625, "loss": 0.1493, "rewards/accuracies": 0.9375, "rewards/chosen": 2.952733039855957, "rewards/margins": 7.682551383972168, "rewards/rejected": -4.729818344116211, "step": 621 }, { "epoch": 0.35, "learning_rate": 9.47717159871121e-07, "logits/chosen": -6.059700012207031, "logits/rejected": -6.0215301513671875, "logps/chosen": -281.32415771484375, "logps/rejected": -179.3712615966797, "loss": 0.119, "rewards/accuracies": 0.9375, "rewards/chosen": 3.0416419506073, "rewards/margins": 7.179350852966309, "rewards/rejected": -4.137709140777588, "step": 622 }, { "epoch": 0.35, "learning_rate": 9.475167759706217e-07, "logits/chosen": -6.16970682144165, "logits/rejected": -6.174613952636719, "logps/chosen": -403.75677490234375, "logps/rejected": -266.5968933105469, "loss": 0.3255, "rewards/accuracies": 0.875, "rewards/chosen": 3.5700724124908447, "rewards/margins": 8.093951225280762, "rewards/rejected": -4.523878574371338, "step": 623 }, { "epoch": 0.35, "learning_rate": 9.473160300673763e-07, "logits/chosen": -6.2184953689575195, "logits/rejected": -6.1214094161987305, "logps/chosen": -340.5256652832031, "logps/rejected": -191.6681671142578, "loss": 0.154, "rewards/accuracies": 0.9375, "rewards/chosen": 2.907759189605713, "rewards/margins": 6.913349151611328, "rewards/rejected": -4.005590438842773, "step": 624 }, { "epoch": 0.35, "learning_rate": 9.471149223237711e-07, "logits/chosen": -6.037817001342773, "logits/rejected": -6.161898136138916, "logps/chosen": -209.19033813476562, "logps/rejected": -184.62252807617188, "loss": 0.134, "rewards/accuracies": 1.0, "rewards/chosen": 2.991180896759033, "rewards/margins": 6.897390842437744, "rewards/rejected": -3.906209945678711, "step": 625 }, { "epoch": 0.35, "learning_rate": 9.469134529024849e-07, "logits/chosen": -6.153458118438721, "logits/rejected": -6.112678050994873, "logps/chosen": -244.97021484375, "logps/rejected": -176.8368682861328, "loss": 0.1521, "rewards/accuracies": 1.0, "rewards/chosen": 3.5145153999328613, "rewards/margins": 7.005611419677734, "rewards/rejected": -3.491096258163452, "step": 626 }, { "epoch": 0.35, "learning_rate": 9.467116219664893e-07, "logits/chosen": -6.172712802886963, "logits/rejected": -6.119151592254639, "logps/chosen": -190.18359375, "logps/rejected": -186.1217803955078, "loss": 0.1812, "rewards/accuracies": 0.875, "rewards/chosen": 0.8336466550827026, "rewards/margins": 4.961636543273926, "rewards/rejected": -4.127989768981934, "step": 627 }, { "epoch": 0.35, "learning_rate": 9.465094296790479e-07, "logits/chosen": -6.127062797546387, "logits/rejected": -6.136512756347656, "logps/chosen": -229.67723083496094, "logps/rejected": -248.75897216796875, "loss": 0.1545, "rewards/accuracies": 0.9375, "rewards/chosen": 2.3540053367614746, "rewards/margins": 6.241504192352295, "rewards/rejected": -3.8874988555908203, "step": 628 }, { "epoch": 0.35, "learning_rate": 9.463068762037174e-07, "logits/chosen": -5.996257305145264, "logits/rejected": -6.024906158447266, "logps/chosen": -376.6622009277344, "logps/rejected": -331.2543640136719, "loss": 0.0977, "rewards/accuracies": 1.0, "rewards/chosen": 2.9280600547790527, "rewards/margins": 6.249780654907227, "rewards/rejected": -3.321721076965332, "step": 629 }, { "epoch": 0.35, "learning_rate": 9.46103961704346e-07, "logits/chosen": -6.199478626251221, "logits/rejected": -6.086523056030273, "logps/chosen": -261.8714294433594, "logps/rejected": -231.96697998046875, "loss": 0.2145, "rewards/accuracies": 0.9375, "rewards/chosen": 3.2057924270629883, "rewards/margins": 7.46624231338501, "rewards/rejected": -4.2604498863220215, "step": 630 }, { "epoch": 0.35, "learning_rate": 9.459006863450744e-07, "logits/chosen": -6.040960788726807, "logits/rejected": -6.116358757019043, "logps/chosen": -322.5862731933594, "logps/rejected": -268.4863586425781, "loss": 0.1431, "rewards/accuracies": 1.0, "rewards/chosen": 2.634842872619629, "rewards/margins": 8.264451026916504, "rewards/rejected": -5.629608154296875, "step": 631 }, { "epoch": 0.35, "learning_rate": 9.456970502903345e-07, "logits/chosen": -6.184334754943848, "logits/rejected": -6.14564847946167, "logps/chosen": -220.6077880859375, "logps/rejected": -237.02481079101562, "loss": 0.0891, "rewards/accuracies": 0.9375, "rewards/chosen": 2.114680051803589, "rewards/margins": 7.052677631378174, "rewards/rejected": -4.937997817993164, "step": 632 }, { "epoch": 0.35, "learning_rate": 9.454930537048507e-07, "logits/chosen": -6.107847213745117, "logits/rejected": -6.1287641525268555, "logps/chosen": -246.2913360595703, "logps/rejected": -187.86874389648438, "loss": 0.1987, "rewards/accuracies": 0.9375, "rewards/chosen": 2.4431352615356445, "rewards/margins": 6.685600757598877, "rewards/rejected": -4.242465496063232, "step": 633 }, { "epoch": 0.35, "learning_rate": 9.452886967536388e-07, "logits/chosen": -6.134681224822998, "logits/rejected": -6.112156391143799, "logps/chosen": -257.94110107421875, "logps/rejected": -169.57568359375, "loss": 0.1871, "rewards/accuracies": 0.9375, "rewards/chosen": 2.3684961795806885, "rewards/margins": 6.405317306518555, "rewards/rejected": -4.036820888519287, "step": 634 }, { "epoch": 0.35, "learning_rate": 9.450839796020062e-07, "logits/chosen": -6.209312915802002, "logits/rejected": -6.22847843170166, "logps/chosen": -215.71368408203125, "logps/rejected": -239.5076446533203, "loss": 0.0826, "rewards/accuracies": 1.0, "rewards/chosen": 1.6224230527877808, "rewards/margins": 8.841713905334473, "rewards/rejected": -7.219290733337402, "step": 635 }, { "epoch": 0.35, "learning_rate": 9.448789024155511e-07, "logits/chosen": -6.035607814788818, "logits/rejected": -6.126137733459473, "logps/chosen": -268.9172058105469, "logps/rejected": -217.80044555664062, "loss": 0.2124, "rewards/accuracies": 0.9375, "rewards/chosen": 4.15580940246582, "rewards/margins": 6.344704627990723, "rewards/rejected": -2.1888952255249023, "step": 636 }, { "epoch": 0.35, "learning_rate": 9.446734653601638e-07, "logits/chosen": -6.050620079040527, "logits/rejected": -6.145984649658203, "logps/chosen": -242.83868408203125, "logps/rejected": -158.26434326171875, "loss": 0.2237, "rewards/accuracies": 0.9375, "rewards/chosen": 2.355539083480835, "rewards/margins": 5.72823429107666, "rewards/rejected": -3.372695207595825, "step": 637 }, { "epoch": 0.35, "learning_rate": 9.44467668602025e-07, "logits/chosen": -6.107868194580078, "logits/rejected": -6.131823539733887, "logps/chosen": -266.462890625, "logps/rejected": -124.90760040283203, "loss": 0.1363, "rewards/accuracies": 1.0, "rewards/chosen": 3.7642855644226074, "rewards/margins": 7.159101486206055, "rewards/rejected": -3.3948163986206055, "step": 638 }, { "epoch": 0.35, "learning_rate": 9.442615123076069e-07, "logits/chosen": -6.1226301193237305, "logits/rejected": -6.156649589538574, "logps/chosen": -237.88172912597656, "logps/rejected": -252.98780822753906, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": 3.0561211109161377, "rewards/margins": 7.489344120025635, "rewards/rejected": -4.433223247528076, "step": 639 }, { "epoch": 0.36, "learning_rate": 9.440549966436722e-07, "logits/chosen": -6.092881679534912, "logits/rejected": -6.157510757446289, "logps/chosen": -143.5648956298828, "logps/rejected": -237.71400451660156, "loss": 0.1381, "rewards/accuracies": 1.0, "rewards/chosen": 1.4979217052459717, "rewards/margins": 8.648542404174805, "rewards/rejected": -7.150620937347412, "step": 640 }, { "epoch": 0.36, "learning_rate": 9.438481217772742e-07, "logits/chosen": -6.070809364318848, "logits/rejected": -6.087282657623291, "logps/chosen": -482.06292724609375, "logps/rejected": -346.0431823730469, "loss": 0.0746, "rewards/accuracies": 0.875, "rewards/chosen": 4.033655643463135, "rewards/margins": 6.450525283813477, "rewards/rejected": -2.416869640350342, "step": 641 }, { "epoch": 0.36, "learning_rate": 9.436408878757571e-07, "logits/chosen": -6.152761459350586, "logits/rejected": -6.229827880859375, "logps/chosen": -240.84231567382812, "logps/rejected": -223.46888732910156, "loss": 0.1837, "rewards/accuracies": 0.9375, "rewards/chosen": 2.303079843521118, "rewards/margins": 7.250059604644775, "rewards/rejected": -4.946979999542236, "step": 642 }, { "epoch": 0.36, "learning_rate": 9.434332951067555e-07, "logits/chosen": -6.165007591247559, "logits/rejected": -6.253281593322754, "logps/chosen": -244.71658325195312, "logps/rejected": -319.0214538574219, "loss": 0.0931, "rewards/accuracies": 0.9375, "rewards/chosen": 1.4598498344421387, "rewards/margins": 6.606109619140625, "rewards/rejected": -5.1462602615356445, "step": 643 }, { "epoch": 0.36, "learning_rate": 9.432253436381941e-07, "logits/chosen": -6.110738277435303, "logits/rejected": -6.155429840087891, "logps/chosen": -278.2003479003906, "logps/rejected": -180.2960205078125, "loss": 0.1498, "rewards/accuracies": 1.0, "rewards/chosen": 3.701937437057495, "rewards/margins": 7.816946506500244, "rewards/rejected": -4.115009307861328, "step": 644 }, { "epoch": 0.36, "learning_rate": 9.430170336382877e-07, "logits/chosen": -6.2359843254089355, "logits/rejected": -6.129754066467285, "logps/chosen": -313.3378601074219, "logps/rejected": -136.75857543945312, "loss": 0.1029, "rewards/accuracies": 0.875, "rewards/chosen": 3.7008614540100098, "rewards/margins": 5.785926818847656, "rewards/rejected": -2.0850653648376465, "step": 645 }, { "epoch": 0.36, "learning_rate": 9.428083652755411e-07, "logits/chosen": -6.153457164764404, "logits/rejected": -6.0400261878967285, "logps/chosen": -204.69232177734375, "logps/rejected": -149.92233276367188, "loss": 0.1629, "rewards/accuracies": 0.875, "rewards/chosen": 1.8181712627410889, "rewards/margins": 4.46468448638916, "rewards/rejected": -2.6465134620666504, "step": 646 }, { "epoch": 0.36, "learning_rate": 9.425993387187497e-07, "logits/chosen": -6.0876569747924805, "logits/rejected": -6.117831230163574, "logps/chosen": -436.5406494140625, "logps/rejected": -609.0032958984375, "loss": 0.3612, "rewards/accuracies": 0.875, "rewards/chosen": 1.8090254068374634, "rewards/margins": 5.138644695281982, "rewards/rejected": -3.3296194076538086, "step": 647 }, { "epoch": 0.36, "learning_rate": 9.423899541369977e-07, "logits/chosen": -6.142005443572998, "logits/rejected": -6.133950233459473, "logps/chosen": -353.4951171875, "logps/rejected": -490.1811218261719, "loss": 0.0858, "rewards/accuracies": 0.9375, "rewards/chosen": 2.7806615829467773, "rewards/margins": 8.366715431213379, "rewards/rejected": -5.586053848266602, "step": 648 }, { "epoch": 0.36, "learning_rate": 9.421802116996593e-07, "logits/chosen": -6.137939453125, "logits/rejected": -6.118526458740234, "logps/chosen": -481.85906982421875, "logps/rejected": -299.87152099609375, "loss": 0.2087, "rewards/accuracies": 0.875, "rewards/chosen": 2.108314037322998, "rewards/margins": 4.992038726806641, "rewards/rejected": -2.8837246894836426, "step": 649 }, { "epoch": 0.36, "learning_rate": 9.419701115763982e-07, "logits/chosen": -6.183736801147461, "logits/rejected": -6.117070198059082, "logps/chosen": -248.61181640625, "logps/rejected": -206.1510467529297, "loss": 0.0755, "rewards/accuracies": 1.0, "rewards/chosen": 2.8737611770629883, "rewards/margins": 8.86086654663086, "rewards/rejected": -5.987105846405029, "step": 650 }, { "epoch": 0.36, "learning_rate": 9.417596539371675e-07, "logits/chosen": -6.086924076080322, "logits/rejected": -6.175112247467041, "logps/chosen": -214.98704528808594, "logps/rejected": -146.018310546875, "loss": 0.1017, "rewards/accuracies": 1.0, "rewards/chosen": 3.6620681285858154, "rewards/margins": 6.807932376861572, "rewards/rejected": -3.1458637714385986, "step": 651 }, { "epoch": 0.36, "learning_rate": 9.415488389522093e-07, "logits/chosen": -6.116333484649658, "logits/rejected": -6.1751861572265625, "logps/chosen": -234.04983520507812, "logps/rejected": -244.35922241210938, "loss": 0.0793, "rewards/accuracies": 0.9375, "rewards/chosen": 2.721717357635498, "rewards/margins": 8.405662536621094, "rewards/rejected": -5.683945655822754, "step": 652 }, { "epoch": 0.36, "learning_rate": 9.413376667920547e-07, "logits/chosen": -6.127478122711182, "logits/rejected": -6.202167510986328, "logps/chosen": -201.56674194335938, "logps/rejected": -184.40475463867188, "loss": 0.1068, "rewards/accuracies": 1.0, "rewards/chosen": 3.931852340698242, "rewards/margins": 8.520572662353516, "rewards/rejected": -4.588720798492432, "step": 653 }, { "epoch": 0.36, "learning_rate": 9.411261376275244e-07, "logits/chosen": -6.253202438354492, "logits/rejected": -6.14641809463501, "logps/chosen": -317.9904479980469, "logps/rejected": -203.6920623779297, "loss": 0.255, "rewards/accuracies": 0.9375, "rewards/chosen": 4.400468826293945, "rewards/margins": 6.648378849029541, "rewards/rejected": -2.2479100227355957, "step": 654 }, { "epoch": 0.36, "learning_rate": 9.409142516297268e-07, "logits/chosen": -6.211539268493652, "logits/rejected": -6.158629417419434, "logps/chosen": -298.30255126953125, "logps/rejected": -233.82508850097656, "loss": 0.0443, "rewards/accuracies": 1.0, "rewards/chosen": 3.836784601211548, "rewards/margins": 9.367177963256836, "rewards/rejected": -5.530393600463867, "step": 655 }, { "epoch": 0.36, "learning_rate": 9.4070200897006e-07, "logits/chosen": -6.219355583190918, "logits/rejected": -6.187346458435059, "logps/chosen": -248.45425415039062, "logps/rejected": -227.26593017578125, "loss": 0.1192, "rewards/accuracies": 0.8125, "rewards/chosen": 1.2490825653076172, "rewards/margins": 6.592933654785156, "rewards/rejected": -5.343851089477539, "step": 656 }, { "epoch": 0.36, "learning_rate": 9.404894098202099e-07, "logits/chosen": -6.045419692993164, "logits/rejected": -6.1190185546875, "logps/chosen": -287.6633605957031, "logps/rejected": -360.53179931640625, "loss": 0.1558, "rewards/accuracies": 0.9375, "rewards/chosen": 2.6811771392822266, "rewards/margins": 7.3057098388671875, "rewards/rejected": -4.624533653259277, "step": 657 }, { "epoch": 0.37, "learning_rate": 9.402764543521508e-07, "logits/chosen": -6.134307861328125, "logits/rejected": -6.168745994567871, "logps/chosen": -264.5479736328125, "logps/rejected": -177.01214599609375, "loss": 0.0881, "rewards/accuracies": 1.0, "rewards/chosen": 4.906135082244873, "rewards/margins": 7.160226345062256, "rewards/rejected": -2.254091262817383, "step": 658 }, { "epoch": 0.37, "learning_rate": 9.40063142738146e-07, "logits/chosen": -6.096348762512207, "logits/rejected": -6.043329238891602, "logps/chosen": -228.33572387695312, "logps/rejected": -141.64096069335938, "loss": 0.0892, "rewards/accuracies": 0.9375, "rewards/chosen": 4.458082675933838, "rewards/margins": 7.618128299713135, "rewards/rejected": -3.160045623779297, "step": 659 }, { "epoch": 0.37, "learning_rate": 9.398494751507457e-07, "logits/chosen": -6.174238204956055, "logits/rejected": -6.169564247131348, "logps/chosen": -272.77569580078125, "logps/rejected": -235.25775146484375, "loss": 0.1463, "rewards/accuracies": 0.9375, "rewards/chosen": 2.428701162338257, "rewards/margins": 6.712494850158691, "rewards/rejected": -4.2837934494018555, "step": 660 }, { "epoch": 0.37, "learning_rate": 9.396354517627892e-07, "logits/chosen": -6.081879615783691, "logits/rejected": -6.201927185058594, "logps/chosen": -293.3031921386719, "logps/rejected": -281.8537292480469, "loss": 0.15, "rewards/accuracies": 1.0, "rewards/chosen": 2.5108230113983154, "rewards/margins": 7.941143035888672, "rewards/rejected": -5.430319786071777, "step": 661 }, { "epoch": 0.37, "learning_rate": 9.394210727474028e-07, "logits/chosen": -6.108458518981934, "logits/rejected": -6.230710506439209, "logps/chosen": -227.11517333984375, "logps/rejected": -212.0843505859375, "loss": 0.1693, "rewards/accuracies": 0.75, "rewards/chosen": 2.721357583999634, "rewards/margins": 6.952136993408203, "rewards/rejected": -4.230780124664307, "step": 662 }, { "epoch": 0.37, "learning_rate": 9.392063382780009e-07, "logits/chosen": -6.17206335067749, "logits/rejected": -6.217511177062988, "logps/chosen": -279.31378173828125, "logps/rejected": -328.2190856933594, "loss": 0.1347, "rewards/accuracies": 1.0, "rewards/chosen": 1.6206480264663696, "rewards/margins": 9.301722526550293, "rewards/rejected": -7.681074142456055, "step": 663 }, { "epoch": 0.37, "learning_rate": 9.389912485282852e-07, "logits/chosen": -6.230871200561523, "logits/rejected": -6.187227249145508, "logps/chosen": -333.4610290527344, "logps/rejected": -300.6738586425781, "loss": 0.1291, "rewards/accuracies": 0.9375, "rewards/chosen": 3.184515953063965, "rewards/margins": 6.911686420440674, "rewards/rejected": -3.727169990539551, "step": 664 }, { "epoch": 0.37, "learning_rate": 9.387758036722449e-07, "logits/chosen": -6.1753997802734375, "logits/rejected": -6.142716884613037, "logps/chosen": -291.3757019042969, "logps/rejected": -148.0311279296875, "loss": 0.1444, "rewards/accuracies": 1.0, "rewards/chosen": 4.551909446716309, "rewards/margins": 7.717567443847656, "rewards/rejected": -3.1656579971313477, "step": 665 }, { "epoch": 0.37, "learning_rate": 9.385600038841564e-07, "logits/chosen": -6.062032699584961, "logits/rejected": -6.067647457122803, "logps/chosen": -345.30523681640625, "logps/rejected": -347.2033386230469, "loss": 0.1165, "rewards/accuracies": 0.9375, "rewards/chosen": 2.333082437515259, "rewards/margins": 7.504354953765869, "rewards/rejected": -5.171272277832031, "step": 666 }, { "epoch": 0.37, "learning_rate": 9.383438493385832e-07, "logits/chosen": -6.194644927978516, "logits/rejected": -6.214474201202393, "logps/chosen": -269.3030700683594, "logps/rejected": -308.7378234863281, "loss": 0.1366, "rewards/accuracies": 0.9375, "rewards/chosen": 2.4173591136932373, "rewards/margins": 9.456581115722656, "rewards/rejected": -7.039222240447998, "step": 667 }, { "epoch": 0.37, "learning_rate": 9.381273402103761e-07, "logits/chosen": -6.239296913146973, "logits/rejected": -6.146302223205566, "logps/chosen": -261.1658020019531, "logps/rejected": -203.83302307128906, "loss": 0.1093, "rewards/accuracies": 0.9375, "rewards/chosen": 2.9389162063598633, "rewards/margins": 7.713803768157959, "rewards/rejected": -4.7748870849609375, "step": 668 }, { "epoch": 0.37, "learning_rate": 9.379104766746722e-07, "logits/chosen": -6.098235130310059, "logits/rejected": -6.2031097412109375, "logps/chosen": -268.44970703125, "logps/rejected": -186.1722412109375, "loss": 0.0418, "rewards/accuracies": 1.0, "rewards/chosen": 5.314560890197754, "rewards/margins": 8.504068374633789, "rewards/rejected": -3.1895084381103516, "step": 669 }, { "epoch": 0.37, "learning_rate": 9.376932589068956e-07, "logits/chosen": -6.045612335205078, "logits/rejected": -6.070651054382324, "logps/chosen": -317.395751953125, "logps/rejected": -115.06853485107422, "loss": 0.0936, "rewards/accuracies": 0.9375, "rewards/chosen": 5.530206203460693, "rewards/margins": 8.471613883972168, "rewards/rejected": -2.9414079189300537, "step": 670 }, { "epoch": 0.37, "learning_rate": 9.374756870827569e-07, "logits/chosen": -6.1316328048706055, "logits/rejected": -6.133537769317627, "logps/chosen": -288.394287109375, "logps/rejected": -129.90289306640625, "loss": 0.1529, "rewards/accuracies": 0.9375, "rewards/chosen": 3.689342975616455, "rewards/margins": 6.234066009521484, "rewards/rejected": -2.5447232723236084, "step": 671 }, { "epoch": 0.37, "learning_rate": 9.372577613782529e-07, "logits/chosen": -6.14378023147583, "logits/rejected": -6.2291412353515625, "logps/chosen": -321.97174072265625, "logps/rejected": -180.46163940429688, "loss": 0.1716, "rewards/accuracies": 0.9375, "rewards/chosen": 4.475607872009277, "rewards/margins": 7.949614524841309, "rewards/rejected": -3.4740071296691895, "step": 672 }, { "epoch": 0.37, "learning_rate": 9.370394819696672e-07, "logits/chosen": -6.235326766967773, "logits/rejected": -6.1884050369262695, "logps/chosen": -222.36569213867188, "logps/rejected": -148.617919921875, "loss": 0.0839, "rewards/accuracies": 0.9375, "rewards/chosen": 4.412635803222656, "rewards/margins": 8.098434448242188, "rewards/rejected": -3.6857991218566895, "step": 673 }, { "epoch": 0.37, "learning_rate": 9.368208490335688e-07, "logits/chosen": -6.048202991485596, "logits/rejected": -6.061593055725098, "logps/chosen": -248.485107421875, "logps/rejected": -331.7811584472656, "loss": 0.0882, "rewards/accuracies": 0.875, "rewards/chosen": 2.014183521270752, "rewards/margins": 4.187324047088623, "rewards/rejected": -2.17314076423645, "step": 674 }, { "epoch": 0.37, "learning_rate": 9.366018627468131e-07, "logits/chosen": -6.110976219177246, "logits/rejected": -6.151120662689209, "logps/chosen": -262.37957763671875, "logps/rejected": -274.47271728515625, "loss": 0.2065, "rewards/accuracies": 1.0, "rewards/chosen": 3.13633394241333, "rewards/margins": 6.906209945678711, "rewards/rejected": -3.769876480102539, "step": 675 }, { "epoch": 0.38, "learning_rate": 9.363825232865413e-07, "logits/chosen": -6.1189775466918945, "logits/rejected": -6.140850067138672, "logps/chosen": -250.4308319091797, "logps/rejected": -229.81307983398438, "loss": 0.1478, "rewards/accuracies": 0.9375, "rewards/chosen": 4.539402484893799, "rewards/margins": 9.088634490966797, "rewards/rejected": -4.549232006072998, "step": 676 }, { "epoch": 0.38, "learning_rate": 9.361628308301802e-07, "logits/chosen": -6.220818519592285, "logits/rejected": -6.1906538009643555, "logps/chosen": -193.85543823242188, "logps/rejected": -179.9879150390625, "loss": 0.0808, "rewards/accuracies": 0.9375, "rewards/chosen": 1.3737727403640747, "rewards/margins": 5.705449104309082, "rewards/rejected": -4.331676483154297, "step": 677 }, { "epoch": 0.38, "learning_rate": 9.359427855554421e-07, "logits/chosen": -6.010313034057617, "logits/rejected": -6.126961708068848, "logps/chosen": -597.79833984375, "logps/rejected": -638.8466186523438, "loss": 0.1517, "rewards/accuracies": 0.8125, "rewards/chosen": 2.379490852355957, "rewards/margins": 5.707098960876465, "rewards/rejected": -3.327608108520508, "step": 678 }, { "epoch": 0.38, "learning_rate": 9.35722387640325e-07, "logits/chosen": -6.161664962768555, "logits/rejected": -6.148280143737793, "logps/chosen": -318.5894470214844, "logps/rejected": -147.86375427246094, "loss": 0.1565, "rewards/accuracies": 1.0, "rewards/chosen": 2.9021551609039307, "rewards/margins": 6.649904251098633, "rewards/rejected": -3.7477493286132812, "step": 679 }, { "epoch": 0.38, "learning_rate": 9.355016372631118e-07, "logits/chosen": -6.0713090896606445, "logits/rejected": -6.243159294128418, "logps/chosen": -608.193115234375, "logps/rejected": -275.7320556640625, "loss": 0.2075, "rewards/accuracies": 0.875, "rewards/chosen": 1.5484657287597656, "rewards/margins": 9.162221908569336, "rewards/rejected": -7.61375617980957, "step": 680 }, { "epoch": 0.38, "learning_rate": 9.352805346023707e-07, "logits/chosen": -6.11140251159668, "logits/rejected": -6.127127647399902, "logps/chosen": -308.6825866699219, "logps/rejected": -277.16070556640625, "loss": 0.2014, "rewards/accuracies": 0.9375, "rewards/chosen": 2.3202319145202637, "rewards/margins": 5.34745454788208, "rewards/rejected": -3.0272231101989746, "step": 681 }, { "epoch": 0.38, "learning_rate": 9.350590798369549e-07, "logits/chosen": -6.124761581420898, "logits/rejected": -6.124488353729248, "logps/chosen": -213.46774291992188, "logps/rejected": -185.0941619873047, "loss": 0.1592, "rewards/accuracies": 0.875, "rewards/chosen": 2.4736971855163574, "rewards/margins": 6.772656440734863, "rewards/rejected": -4.298959255218506, "step": 682 }, { "epoch": 0.38, "learning_rate": 9.348372731460021e-07, "logits/chosen": -6.234807014465332, "logits/rejected": -6.204251289367676, "logps/chosen": -285.6980285644531, "logps/rejected": -183.10321044921875, "loss": 0.1571, "rewards/accuracies": 0.9375, "rewards/chosen": 3.274296760559082, "rewards/margins": 7.7912750244140625, "rewards/rejected": -4.5169782638549805, "step": 683 }, { "epoch": 0.38, "learning_rate": 9.346151147089351e-07, "logits/chosen": -6.074853420257568, "logits/rejected": -6.205709934234619, "logps/chosen": -179.88392639160156, "logps/rejected": -301.62677001953125, "loss": 0.0919, "rewards/accuracies": 1.0, "rewards/chosen": 2.914047956466675, "rewards/margins": 9.249938011169434, "rewards/rejected": -6.335890293121338, "step": 684 }, { "epoch": 0.38, "learning_rate": 9.34392604705461e-07, "logits/chosen": -6.173827648162842, "logits/rejected": -6.200650215148926, "logps/chosen": -264.980712890625, "logps/rejected": -287.44464111328125, "loss": 0.0957, "rewards/accuracies": 0.875, "rewards/chosen": 2.0392181873321533, "rewards/margins": 4.344158172607422, "rewards/rejected": -2.3049399852752686, "step": 685 }, { "epoch": 0.38, "learning_rate": 9.341697433155713e-07, "logits/chosen": -6.182460784912109, "logits/rejected": -6.0996527671813965, "logps/chosen": -324.64068603515625, "logps/rejected": -243.41424560546875, "loss": 0.1543, "rewards/accuracies": 1.0, "rewards/chosen": 3.8841805458068848, "rewards/margins": 7.8310770988464355, "rewards/rejected": -3.9468963146209717, "step": 686 }, { "epoch": 0.38, "learning_rate": 9.339465307195419e-07, "logits/chosen": -6.130373477935791, "logits/rejected": -6.163053512573242, "logps/chosen": -418.26995849609375, "logps/rejected": -281.3454895019531, "loss": 0.1442, "rewards/accuracies": 0.75, "rewards/chosen": 1.3073246479034424, "rewards/margins": 5.75638484954834, "rewards/rejected": -4.449059963226318, "step": 687 }, { "epoch": 0.38, "learning_rate": 9.337229670979325e-07, "logits/chosen": -6.153512477874756, "logits/rejected": -6.123152732849121, "logps/chosen": -238.08114624023438, "logps/rejected": -209.9005126953125, "loss": 0.1384, "rewards/accuracies": 0.9375, "rewards/chosen": 3.155343532562256, "rewards/margins": 8.385480880737305, "rewards/rejected": -5.230137348175049, "step": 688 }, { "epoch": 0.38, "learning_rate": 9.334990526315871e-07, "logits/chosen": -6.0923027992248535, "logits/rejected": -6.215046405792236, "logps/chosen": -251.9896240234375, "logps/rejected": -311.1653747558594, "loss": 0.0716, "rewards/accuracies": 1.0, "rewards/chosen": 2.658651828765869, "rewards/margins": 8.907503128051758, "rewards/rejected": -6.248851776123047, "step": 689 }, { "epoch": 0.38, "learning_rate": 9.332747875016332e-07, "logits/chosen": -6.111528396606445, "logits/rejected": -6.0468597412109375, "logps/chosen": -270.6130676269531, "logps/rejected": -149.5161590576172, "loss": 0.0888, "rewards/accuracies": 1.0, "rewards/chosen": 4.211400032043457, "rewards/margins": 7.842015266418457, "rewards/rejected": -3.6306142807006836, "step": 690 }, { "epoch": 0.38, "learning_rate": 9.330501718894821e-07, "logits/chosen": -6.264308929443359, "logits/rejected": -6.17147970199585, "logps/chosen": -294.1273498535156, "logps/rejected": -131.8824462890625, "loss": 0.1531, "rewards/accuracies": 1.0, "rewards/chosen": 3.7933566570281982, "rewards/margins": 8.52663516998291, "rewards/rejected": -4.733278751373291, "step": 691 }, { "epoch": 0.38, "learning_rate": 9.328252059768287e-07, "logits/chosen": -6.176352500915527, "logits/rejected": -6.124019145965576, "logps/chosen": -496.0931091308594, "logps/rejected": -434.1627502441406, "loss": 0.266, "rewards/accuracies": 0.8125, "rewards/chosen": 1.8216736316680908, "rewards/margins": 6.467177391052246, "rewards/rejected": -4.645503520965576, "step": 692 }, { "epoch": 0.38, "learning_rate": 9.32599889945651e-07, "logits/chosen": -6.183346748352051, "logits/rejected": -6.207963943481445, "logps/chosen": -331.67620849609375, "logps/rejected": -262.3308410644531, "loss": 0.1058, "rewards/accuracies": 1.0, "rewards/chosen": 3.3208389282226562, "rewards/margins": 9.341703414916992, "rewards/rejected": -6.020864486694336, "step": 693 }, { "epoch": 0.39, "learning_rate": 9.323742239782104e-07, "logits/chosen": -6.0990519523620605, "logits/rejected": -6.189776420593262, "logps/chosen": -300.00445556640625, "logps/rejected": -231.4160614013672, "loss": 0.0663, "rewards/accuracies": 1.0, "rewards/chosen": 5.206927299499512, "rewards/margins": 9.555927276611328, "rewards/rejected": -4.348999977111816, "step": 694 }, { "epoch": 0.39, "learning_rate": 9.321482082570515e-07, "logits/chosen": -6.25160551071167, "logits/rejected": -6.171948432922363, "logps/chosen": -193.81089782714844, "logps/rejected": -135.9246368408203, "loss": 0.1077, "rewards/accuracies": 1.0, "rewards/chosen": 3.53536319732666, "rewards/margins": 8.158501625061035, "rewards/rejected": -4.623138904571533, "step": 695 }, { "epoch": 0.39, "learning_rate": 9.319218429650015e-07, "logits/chosen": -6.110838413238525, "logits/rejected": -6.11324405670166, "logps/chosen": -242.07290649414062, "logps/rejected": -159.5401153564453, "loss": 0.2125, "rewards/accuracies": 1.0, "rewards/chosen": 3.8476579189300537, "rewards/margins": 8.525928497314453, "rewards/rejected": -4.6782708168029785, "step": 696 }, { "epoch": 0.39, "learning_rate": 9.316951282851706e-07, "logits/chosen": -6.165462017059326, "logits/rejected": -6.1570611000061035, "logps/chosen": -213.9104461669922, "logps/rejected": -219.02371215820312, "loss": 0.1781, "rewards/accuracies": 0.9375, "rewards/chosen": 1.5867266654968262, "rewards/margins": 5.323990345001221, "rewards/rejected": -3.7372639179229736, "step": 697 }, { "epoch": 0.39, "learning_rate": 9.314680644009516e-07, "logits/chosen": -6.1772661209106445, "logits/rejected": -6.106390953063965, "logps/chosen": -341.9758605957031, "logps/rejected": -219.13275146484375, "loss": 0.1344, "rewards/accuracies": 1.0, "rewards/chosen": 5.007011413574219, "rewards/margins": 10.187793731689453, "rewards/rejected": -5.180782318115234, "step": 698 }, { "epoch": 0.39, "learning_rate": 9.312406514960196e-07, "logits/chosen": -6.288412570953369, "logits/rejected": -6.164281845092773, "logps/chosen": -300.04620361328125, "logps/rejected": -120.85182189941406, "loss": 0.1789, "rewards/accuracies": 0.9375, "rewards/chosen": 4.308810234069824, "rewards/margins": 8.563250541687012, "rewards/rejected": -4.2544403076171875, "step": 699 }, { "epoch": 0.39, "learning_rate": 9.310128897543323e-07, "logits/chosen": -6.14650821685791, "logits/rejected": -6.068092346191406, "logps/chosen": -283.4834899902344, "logps/rejected": -234.38096618652344, "loss": 0.1501, "rewards/accuracies": 1.0, "rewards/chosen": 2.9875948429107666, "rewards/margins": 7.72098445892334, "rewards/rejected": -4.7333903312683105, "step": 700 }, { "epoch": 0.39, "learning_rate": 9.307847793601294e-07, "logits/chosen": -6.184426307678223, "logits/rejected": -6.100467205047607, "logps/chosen": -282.3787841796875, "logps/rejected": -181.51902770996094, "loss": 0.1127, "rewards/accuracies": 1.0, "rewards/chosen": 3.637817859649658, "rewards/margins": 7.738523960113525, "rewards/rejected": -4.100706100463867, "step": 701 }, { "epoch": 0.39, "learning_rate": 9.305563204979327e-07, "logits/chosen": -6.123430252075195, "logits/rejected": -6.201633930206299, "logps/chosen": -309.794921875, "logps/rejected": -317.2446594238281, "loss": 0.2398, "rewards/accuracies": 0.875, "rewards/chosen": 3.7136988639831543, "rewards/margins": 7.400752067565918, "rewards/rejected": -3.6870527267456055, "step": 702 }, { "epoch": 0.39, "learning_rate": 9.303275133525458e-07, "logits/chosen": -6.334762096405029, "logits/rejected": -6.118695259094238, "logps/chosen": -343.4132385253906, "logps/rejected": -112.02140045166016, "loss": 0.0921, "rewards/accuracies": 1.0, "rewards/chosen": 5.291436672210693, "rewards/margins": 8.276570320129395, "rewards/rejected": -2.9851338863372803, "step": 703 }, { "epoch": 0.39, "learning_rate": 9.30098358109054e-07, "logits/chosen": -6.158136367797852, "logits/rejected": -6.127549648284912, "logps/chosen": -248.5960693359375, "logps/rejected": -144.35293579101562, "loss": 0.1018, "rewards/accuracies": 0.9375, "rewards/chosen": 3.198136806488037, "rewards/margins": 7.535111427307129, "rewards/rejected": -4.336974143981934, "step": 704 }, { "epoch": 0.39, "learning_rate": 9.298688549528244e-07, "logits/chosen": -6.054064750671387, "logits/rejected": -6.081275463104248, "logps/chosen": -167.1916046142578, "logps/rejected": -272.20733642578125, "loss": 0.0661, "rewards/accuracies": 1.0, "rewards/chosen": 1.4004955291748047, "rewards/margins": 6.719880104064941, "rewards/rejected": -5.319384574890137, "step": 705 }, { "epoch": 0.39, "learning_rate": 9.296390040695055e-07, "logits/chosen": -6.137249946594238, "logits/rejected": -6.156277656555176, "logps/chosen": -365.59637451171875, "logps/rejected": -482.3803405761719, "loss": 0.1034, "rewards/accuracies": 0.9375, "rewards/chosen": 4.519557952880859, "rewards/margins": 8.603727340698242, "rewards/rejected": -4.084169864654541, "step": 706 }, { "epoch": 0.39, "learning_rate": 9.294088056450267e-07, "logits/chosen": -6.172020435333252, "logits/rejected": -6.195448875427246, "logps/chosen": -233.73182678222656, "logps/rejected": -238.15496826171875, "loss": 0.1499, "rewards/accuracies": 1.0, "rewards/chosen": 2.0720412731170654, "rewards/margins": 6.845121383666992, "rewards/rejected": -4.773079872131348, "step": 707 }, { "epoch": 0.39, "learning_rate": 9.29178259865599e-07, "logits/chosen": -6.206693649291992, "logits/rejected": -6.097179889678955, "logps/chosen": -227.79959106445312, "logps/rejected": -145.75003051757812, "loss": 0.1249, "rewards/accuracies": 1.0, "rewards/chosen": 3.7427115440368652, "rewards/margins": 7.2391252517700195, "rewards/rejected": -3.4964139461517334, "step": 708 }, { "epoch": 0.39, "learning_rate": 9.289473669177143e-07, "logits/chosen": -6.1594157218933105, "logits/rejected": -6.141211032867432, "logps/chosen": -347.0675048828125, "logps/rejected": -228.5911102294922, "loss": 0.0888, "rewards/accuracies": 1.0, "rewards/chosen": 4.642331123352051, "rewards/margins": 9.09563159942627, "rewards/rejected": -4.453300952911377, "step": 709 }, { "epoch": 0.39, "learning_rate": 9.28716126988145e-07, "logits/chosen": -6.157484531402588, "logits/rejected": -6.132338523864746, "logps/chosen": -296.271484375, "logps/rejected": -259.8378601074219, "loss": 0.2653, "rewards/accuracies": 1.0, "rewards/chosen": 2.8920085430145264, "rewards/margins": 7.822299003601074, "rewards/rejected": -4.930290222167969, "step": 710 }, { "epoch": 0.39, "learning_rate": 9.284845402639445e-07, "logits/chosen": -6.141048908233643, "logits/rejected": -6.1987690925598145, "logps/chosen": -268.80938720703125, "logps/rejected": -261.2803955078125, "loss": 0.0739, "rewards/accuracies": 1.0, "rewards/chosen": 3.5238118171691895, "rewards/margins": 9.262603759765625, "rewards/rejected": -5.738792419433594, "step": 711 }, { "epoch": 0.4, "learning_rate": 9.282526069324466e-07, "logits/chosen": -6.139723777770996, "logits/rejected": -6.1989288330078125, "logps/chosen": -259.50994873046875, "logps/rejected": -217.13583374023438, "loss": 0.0937, "rewards/accuracies": 0.9375, "rewards/chosen": 5.207658767700195, "rewards/margins": 8.245648384094238, "rewards/rejected": -3.037990093231201, "step": 712 }, { "epoch": 0.4, "learning_rate": 9.280203271812655e-07, "logits/chosen": -6.185050964355469, "logits/rejected": -6.300824165344238, "logps/chosen": -176.2664794921875, "logps/rejected": -165.8997039794922, "loss": 0.193, "rewards/accuracies": 0.9375, "rewards/chosen": 2.4251270294189453, "rewards/margins": 7.409775257110596, "rewards/rejected": -4.98464822769165, "step": 713 }, { "epoch": 0.4, "learning_rate": 9.277877011982954e-07, "logits/chosen": -6.117770195007324, "logits/rejected": -6.084338188171387, "logps/chosen": -317.75860595703125, "logps/rejected": -177.33172607421875, "loss": 0.1483, "rewards/accuracies": 0.9375, "rewards/chosen": 2.7681498527526855, "rewards/margins": 6.708713531494141, "rewards/rejected": -3.940563678741455, "step": 714 }, { "epoch": 0.4, "learning_rate": 9.275547291717112e-07, "logits/chosen": -6.118829727172852, "logits/rejected": -6.085391998291016, "logps/chosen": -223.66482543945312, "logps/rejected": -163.66632080078125, "loss": 0.1385, "rewards/accuracies": 1.0, "rewards/chosen": 4.254216194152832, "rewards/margins": 6.976489067077637, "rewards/rejected": -2.7222719192504883, "step": 715 }, { "epoch": 0.4, "learning_rate": 9.273214112899672e-07, "logits/chosen": -6.231943130493164, "logits/rejected": -6.132741451263428, "logps/chosen": -265.32391357421875, "logps/rejected": -132.85101318359375, "loss": 0.1327, "rewards/accuracies": 1.0, "rewards/chosen": 4.629436016082764, "rewards/margins": 8.79643726348877, "rewards/rejected": -4.167001247406006, "step": 716 }, { "epoch": 0.4, "learning_rate": 9.270877477417976e-07, "logits/chosen": -6.145716190338135, "logits/rejected": -6.107166290283203, "logps/chosen": -297.3612060546875, "logps/rejected": -216.39308166503906, "loss": 0.1836, "rewards/accuracies": 1.0, "rewards/chosen": 3.6986045837402344, "rewards/margins": 8.500170707702637, "rewards/rejected": -4.801566123962402, "step": 717 }, { "epoch": 0.4, "learning_rate": 9.268537387162159e-07, "logits/chosen": -6.153881072998047, "logits/rejected": -6.031408786773682, "logps/chosen": -372.3363952636719, "logps/rejected": -120.3449935913086, "loss": 0.0999, "rewards/accuracies": 1.0, "rewards/chosen": 4.963330268859863, "rewards/margins": 10.266254425048828, "rewards/rejected": -5.302924156188965, "step": 718 }, { "epoch": 0.4, "learning_rate": 9.266193844025156e-07, "logits/chosen": -6.11968994140625, "logits/rejected": -6.019290447235107, "logps/chosen": -236.41419982910156, "logps/rejected": -257.7601318359375, "loss": 0.154, "rewards/accuracies": 0.9375, "rewards/chosen": 2.353300094604492, "rewards/margins": 6.521975994110107, "rewards/rejected": -4.168675899505615, "step": 719 }, { "epoch": 0.4, "learning_rate": 9.263846849902695e-07, "logits/chosen": -6.097935676574707, "logits/rejected": -5.968759536743164, "logps/chosen": -343.25970458984375, "logps/rejected": -82.82450866699219, "loss": 0.1399, "rewards/accuracies": 0.9375, "rewards/chosen": 6.320001602172852, "rewards/margins": 9.102459907531738, "rewards/rejected": -2.7824580669403076, "step": 720 }, { "epoch": 0.4, "learning_rate": 9.26149640669329e-07, "logits/chosen": -6.14331579208374, "logits/rejected": -6.109249114990234, "logps/chosen": -221.67214965820312, "logps/rejected": -241.94821166992188, "loss": 0.0901, "rewards/accuracies": 1.0, "rewards/chosen": 1.280705213546753, "rewards/margins": 7.31394100189209, "rewards/rejected": -6.033236026763916, "step": 721 }, { "epoch": 0.4, "learning_rate": 9.259142516298248e-07, "logits/chosen": -6.168336868286133, "logits/rejected": -6.12797212600708, "logps/chosen": -259.6896057128906, "logps/rejected": -244.13429260253906, "loss": 0.1313, "rewards/accuracies": 1.0, "rewards/chosen": 2.9538915157318115, "rewards/margins": 8.462024688720703, "rewards/rejected": -5.5081329345703125, "step": 722 }, { "epoch": 0.4, "learning_rate": 9.256785180621667e-07, "logits/chosen": -6.086834907531738, "logits/rejected": -6.149489879608154, "logps/chosen": -265.2887878417969, "logps/rejected": -246.8849334716797, "loss": 0.1081, "rewards/accuracies": 1.0, "rewards/chosen": 2.176508665084839, "rewards/margins": 8.498468399047852, "rewards/rejected": -6.321959495544434, "step": 723 }, { "epoch": 0.4, "learning_rate": 9.254424401570426e-07, "logits/chosen": -6.020636081695557, "logits/rejected": -6.084320068359375, "logps/chosen": -254.5147247314453, "logps/rejected": -228.357666015625, "loss": 0.1056, "rewards/accuracies": 0.9375, "rewards/chosen": 3.678462505340576, "rewards/margins": 7.855762481689453, "rewards/rejected": -4.177299976348877, "step": 724 }, { "epoch": 0.4, "learning_rate": 9.252060181054199e-07, "logits/chosen": -6.115510940551758, "logits/rejected": -6.162691593170166, "logps/chosen": -456.45062255859375, "logps/rejected": -344.3853759765625, "loss": 0.0816, "rewards/accuracies": 0.9375, "rewards/chosen": 4.726279258728027, "rewards/margins": 8.07099723815918, "rewards/rejected": -3.3447182178497314, "step": 725 }, { "epoch": 0.4, "learning_rate": 9.249692520985433e-07, "logits/chosen": -6.0259833335876465, "logits/rejected": -6.121557235717773, "logps/chosen": -286.24835205078125, "logps/rejected": -355.57745361328125, "loss": 0.1596, "rewards/accuracies": 1.0, "rewards/chosen": 1.816375732421875, "rewards/margins": 7.061708450317383, "rewards/rejected": -5.245332717895508, "step": 726 }, { "epoch": 0.4, "learning_rate": 9.247321423279364e-07, "logits/chosen": -6.102288722991943, "logits/rejected": -6.098578453063965, "logps/chosen": -337.7213439941406, "logps/rejected": -162.73410034179688, "loss": 0.1632, "rewards/accuracies": 1.0, "rewards/chosen": 4.1656904220581055, "rewards/margins": 7.3717193603515625, "rewards/rejected": -3.206029176712036, "step": 727 }, { "epoch": 0.4, "learning_rate": 9.244946889854007e-07, "logits/chosen": -6.253843307495117, "logits/rejected": -6.016226768493652, "logps/chosen": -302.94384765625, "logps/rejected": -209.65354919433594, "loss": 0.149, "rewards/accuracies": 1.0, "rewards/chosen": 4.033886909484863, "rewards/margins": 8.618804931640625, "rewards/rejected": -4.584918022155762, "step": 728 }, { "epoch": 0.4, "learning_rate": 9.242568922630156e-07, "logits/chosen": -6.0954437255859375, "logits/rejected": -6.021907806396484, "logps/chosen": -258.2449035644531, "logps/rejected": -127.32172393798828, "loss": 0.0844, "rewards/accuracies": 1.0, "rewards/chosen": 5.116415977478027, "rewards/margins": 7.734076023101807, "rewards/rejected": -2.617659330368042, "step": 729 }, { "epoch": 0.41, "learning_rate": 9.240187523531382e-07, "logits/chosen": -5.962625026702881, "logits/rejected": -6.085705757141113, "logps/chosen": -330.3873596191406, "logps/rejected": -220.80894470214844, "loss": 0.1372, "rewards/accuracies": 1.0, "rewards/chosen": 5.611870765686035, "rewards/margins": 10.298730850219727, "rewards/rejected": -4.686859607696533, "step": 730 }, { "epoch": 0.41, "learning_rate": 9.237802694484034e-07, "logits/chosen": -6.218386650085449, "logits/rejected": -6.013435363769531, "logps/chosen": -301.43927001953125, "logps/rejected": -138.29519653320312, "loss": 0.2638, "rewards/accuracies": 1.0, "rewards/chosen": 4.39827823638916, "rewards/margins": 9.660018920898438, "rewards/rejected": -5.261741638183594, "step": 731 }, { "epoch": 0.41, "learning_rate": 9.235414437417234e-07, "logits/chosen": -6.04509162902832, "logits/rejected": -5.905974388122559, "logps/chosen": -362.40350341796875, "logps/rejected": -402.46197509765625, "loss": 0.2311, "rewards/accuracies": 0.875, "rewards/chosen": 4.827206611633301, "rewards/margins": 6.503277778625488, "rewards/rejected": -1.6760714054107666, "step": 732 }, { "epoch": 0.41, "learning_rate": 9.233022754262878e-07, "logits/chosen": -6.0554728507995605, "logits/rejected": -6.099010467529297, "logps/chosen": -204.8660888671875, "logps/rejected": -193.62982177734375, "loss": 0.0828, "rewards/accuracies": 0.9375, "rewards/chosen": 3.7019925117492676, "rewards/margins": 7.233319282531738, "rewards/rejected": -3.5313267707824707, "step": 733 }, { "epoch": 0.41, "learning_rate": 9.230627646955633e-07, "logits/chosen": -6.0838775634765625, "logits/rejected": -6.105295181274414, "logps/chosen": -241.837646484375, "logps/rejected": -293.3580627441406, "loss": 0.1482, "rewards/accuracies": 0.875, "rewards/chosen": 2.223219633102417, "rewards/margins": 5.949347496032715, "rewards/rejected": -3.726128101348877, "step": 734 }, { "epoch": 0.41, "learning_rate": 9.228229117432933e-07, "logits/chosen": -6.162737846374512, "logits/rejected": -5.995007514953613, "logps/chosen": -360.9312744140625, "logps/rejected": -224.6217041015625, "loss": 0.1113, "rewards/accuracies": 0.875, "rewards/chosen": 1.9838756322860718, "rewards/margins": 5.137279033660889, "rewards/rejected": -3.1534032821655273, "step": 735 }, { "epoch": 0.41, "learning_rate": 9.225827167634987e-07, "logits/chosen": -6.022132873535156, "logits/rejected": -6.077319145202637, "logps/chosen": -173.7171630859375, "logps/rejected": -202.79672241210938, "loss": 0.0988, "rewards/accuracies": 1.0, "rewards/chosen": 1.6678502559661865, "rewards/margins": 6.641290187835693, "rewards/rejected": -4.973439693450928, "step": 736 }, { "epoch": 0.41, "learning_rate": 9.223421799504765e-07, "logits/chosen": -6.053539752960205, "logits/rejected": -6.2310004234313965, "logps/chosen": -256.3684997558594, "logps/rejected": -274.218994140625, "loss": 0.081, "rewards/accuracies": 0.9375, "rewards/chosen": 3.581714153289795, "rewards/margins": 9.460285186767578, "rewards/rejected": -5.878571510314941, "step": 737 }, { "epoch": 0.41, "learning_rate": 9.221013014988002e-07, "logits/chosen": -6.0787200927734375, "logits/rejected": -6.187920093536377, "logps/chosen": -179.19351196289062, "logps/rejected": -217.04107666015625, "loss": 0.0987, "rewards/accuracies": 1.0, "rewards/chosen": 1.576695442199707, "rewards/margins": 7.523340225219727, "rewards/rejected": -5.9466447830200195, "step": 738 }, { "epoch": 0.41, "learning_rate": 9.218600816033199e-07, "logits/chosen": -6.078987121582031, "logits/rejected": -6.132231712341309, "logps/chosen": -280.1922912597656, "logps/rejected": -266.86737060546875, "loss": 0.107, "rewards/accuracies": 0.8125, "rewards/chosen": 1.4758174419403076, "rewards/margins": 6.253841400146484, "rewards/rejected": -4.778023719787598, "step": 739 }, { "epoch": 0.41, "learning_rate": 9.216185204591622e-07, "logits/chosen": -5.983208179473877, "logits/rejected": -6.029104232788086, "logps/chosen": -253.84274291992188, "logps/rejected": -170.65042114257812, "loss": 0.0737, "rewards/accuracies": 0.9375, "rewards/chosen": 3.79293155670166, "rewards/margins": 6.733543395996094, "rewards/rejected": -2.9406118392944336, "step": 740 }, { "epoch": 0.41, "learning_rate": 9.213766182617291e-07, "logits/chosen": -6.104340076446533, "logits/rejected": -6.125178337097168, "logps/chosen": -298.81402587890625, "logps/rejected": -199.76219177246094, "loss": 0.1508, "rewards/accuracies": 0.9375, "rewards/chosen": 4.694951057434082, "rewards/margins": 9.062592506408691, "rewards/rejected": -4.367640972137451, "step": 741 }, { "epoch": 0.41, "learning_rate": 9.211343752066987e-07, "logits/chosen": -6.0084333419799805, "logits/rejected": -6.07200288772583, "logps/chosen": -216.253662109375, "logps/rejected": -170.329833984375, "loss": 0.1576, "rewards/accuracies": 1.0, "rewards/chosen": 3.0738885402679443, "rewards/margins": 8.497859954833984, "rewards/rejected": -5.423972129821777, "step": 742 }, { "epoch": 0.41, "learning_rate": 9.208917914900249e-07, "logits/chosen": -6.168215751647949, "logits/rejected": -6.196663856506348, "logps/chosen": -263.6036682128906, "logps/rejected": -214.28594970703125, "loss": 0.1002, "rewards/accuracies": 1.0, "rewards/chosen": 3.767453670501709, "rewards/margins": 9.014142036437988, "rewards/rejected": -5.2466888427734375, "step": 743 }, { "epoch": 0.41, "learning_rate": 9.206488673079373e-07, "logits/chosen": -6.0469207763671875, "logits/rejected": -6.135421276092529, "logps/chosen": -287.10272216796875, "logps/rejected": -235.1182861328125, "loss": 0.1225, "rewards/accuracies": 1.0, "rewards/chosen": 4.799840927124023, "rewards/margins": 9.414087295532227, "rewards/rejected": -4.614245891571045, "step": 744 }, { "epoch": 0.41, "learning_rate": 9.204056028569407e-07, "logits/chosen": -6.1603546142578125, "logits/rejected": -6.073156356811523, "logps/chosen": -266.9090576171875, "logps/rejected": -196.54971313476562, "loss": 0.1643, "rewards/accuracies": 1.0, "rewards/chosen": 3.696166753768921, "rewards/margins": 7.611945152282715, "rewards/rejected": -3.915778398513794, "step": 745 }, { "epoch": 0.41, "learning_rate": 9.201619983338152e-07, "logits/chosen": -6.070844650268555, "logits/rejected": -6.213879108428955, "logps/chosen": -214.40219116210938, "logps/rejected": -330.71124267578125, "loss": 0.126, "rewards/accuracies": 0.9375, "rewards/chosen": 3.364086151123047, "rewards/margins": 7.530152797698975, "rewards/rejected": -4.166067123413086, "step": 746 }, { "epoch": 0.41, "learning_rate": 9.199180539356157e-07, "logits/chosen": -6.098531723022461, "logits/rejected": -6.124833106994629, "logps/chosen": -258.28485107421875, "logps/rejected": -190.30555725097656, "loss": 0.1042, "rewards/accuracies": 1.0, "rewards/chosen": 4.51103401184082, "rewards/margins": 9.327522277832031, "rewards/rejected": -4.816487789154053, "step": 747 }, { "epoch": 0.42, "learning_rate": 9.196737698596728e-07, "logits/chosen": -6.080665588378906, "logits/rejected": -6.142753601074219, "logps/chosen": -312.3468017578125, "logps/rejected": -221.76559448242188, "loss": 0.0956, "rewards/accuracies": 0.875, "rewards/chosen": 5.703142166137695, "rewards/margins": 8.682913780212402, "rewards/rejected": -2.979771375656128, "step": 748 }, { "epoch": 0.42, "learning_rate": 9.194291463035912e-07, "logits/chosen": -6.091834545135498, "logits/rejected": -6.0323381423950195, "logps/chosen": -263.5816650390625, "logps/rejected": -161.97232055664062, "loss": 0.121, "rewards/accuracies": 0.9375, "rewards/chosen": 3.9972095489501953, "rewards/margins": 6.838659286499023, "rewards/rejected": -2.8414499759674072, "step": 749 }, { "epoch": 0.42, "learning_rate": 9.191841834652505e-07, "logits/chosen": -5.9812211990356445, "logits/rejected": -6.057814121246338, "logps/chosen": -227.10104370117188, "logps/rejected": -207.0454864501953, "loss": 0.1726, "rewards/accuracies": 0.875, "rewards/chosen": 3.27831768989563, "rewards/margins": 8.238297462463379, "rewards/rejected": -4.959980010986328, "step": 750 }, { "epoch": 0.42, "learning_rate": 9.189388815428046e-07, "logits/chosen": -6.109200954437256, "logits/rejected": -6.129554748535156, "logps/chosen": -315.46435546875, "logps/rejected": -167.15130615234375, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/chosen": 6.114748954772949, "rewards/margins": 9.603129386901855, "rewards/rejected": -3.4883806705474854, "step": 751 }, { "epoch": 0.42, "learning_rate": 9.186932407346815e-07, "logits/chosen": -5.914679527282715, "logits/rejected": -5.995996952056885, "logps/chosen": -224.29090881347656, "logps/rejected": -210.37197875976562, "loss": 0.1361, "rewards/accuracies": 1.0, "rewards/chosen": 3.7048838138580322, "rewards/margins": 7.281247615814209, "rewards/rejected": -3.576364040374756, "step": 752 }, { "epoch": 0.42, "learning_rate": 9.184472612395839e-07, "logits/chosen": -6.069769382476807, "logits/rejected": -6.072704315185547, "logps/chosen": -366.15179443359375, "logps/rejected": -238.90780639648438, "loss": 0.0475, "rewards/accuracies": 1.0, "rewards/chosen": 3.957439422607422, "rewards/margins": 7.650252342224121, "rewards/rejected": -3.6928131580352783, "step": 753 }, { "epoch": 0.42, "learning_rate": 9.182009432564879e-07, "logits/chosen": -6.108329772949219, "logits/rejected": -6.096103668212891, "logps/chosen": -264.2442626953125, "logps/rejected": -229.5881805419922, "loss": 0.0617, "rewards/accuracies": 1.0, "rewards/chosen": 3.3534693717956543, "rewards/margins": 7.3830389976501465, "rewards/rejected": -4.029569149017334, "step": 754 }, { "epoch": 0.42, "learning_rate": 9.17954286984644e-07, "logits/chosen": -6.191638946533203, "logits/rejected": -6.212657451629639, "logps/chosen": -180.7881622314453, "logps/rejected": -131.69052124023438, "loss": 0.1279, "rewards/accuracies": 0.9375, "rewards/chosen": 1.3108338117599487, "rewards/margins": 5.99742317199707, "rewards/rejected": -4.686589241027832, "step": 755 }, { "epoch": 0.42, "learning_rate": 9.177072926235756e-07, "logits/chosen": -6.128105163574219, "logits/rejected": -6.073211193084717, "logps/chosen": -322.24713134765625, "logps/rejected": -145.02987670898438, "loss": 0.0795, "rewards/accuracies": 0.9375, "rewards/chosen": 6.068349361419678, "rewards/margins": 8.20376968383789, "rewards/rejected": -2.1354198455810547, "step": 756 }, { "epoch": 0.42, "learning_rate": 9.174599603730802e-07, "logits/chosen": -5.992038726806641, "logits/rejected": -5.972693920135498, "logps/chosen": -387.6080017089844, "logps/rejected": -183.14993286132812, "loss": 0.0775, "rewards/accuracies": 0.9375, "rewards/chosen": 6.203304290771484, "rewards/margins": 7.806984901428223, "rewards/rejected": -1.6036803722381592, "step": 757 }, { "epoch": 0.42, "learning_rate": 9.172122904332284e-07, "logits/chosen": -6.011044979095459, "logits/rejected": -6.166879653930664, "logps/chosen": -157.1174774169922, "logps/rejected": -255.82159423828125, "loss": 0.1614, "rewards/accuracies": 1.0, "rewards/chosen": 1.0025458335876465, "rewards/margins": 7.831038475036621, "rewards/rejected": -6.828492641448975, "step": 758 }, { "epoch": 0.42, "learning_rate": 9.16964283004364e-07, "logits/chosen": -6.016331672668457, "logits/rejected": -6.038015842437744, "logps/chosen": -425.4797668457031, "logps/rejected": -285.4554748535156, "loss": 0.2333, "rewards/accuracies": 0.9375, "rewards/chosen": 4.546114921569824, "rewards/margins": 6.541872978210449, "rewards/rejected": -1.9957579374313354, "step": 759 }, { "epoch": 0.42, "learning_rate": 9.167159382871038e-07, "logits/chosen": -6.158977031707764, "logits/rejected": -6.076437950134277, "logps/chosen": -375.582763671875, "logps/rejected": -202.0689697265625, "loss": 0.1617, "rewards/accuracies": 0.8125, "rewards/chosen": 3.1795144081115723, "rewards/margins": 6.358526229858398, "rewards/rejected": -3.1790122985839844, "step": 760 }, { "epoch": 0.42, "learning_rate": 9.164672564823372e-07, "logits/chosen": -6.170459270477295, "logits/rejected": -6.048398971557617, "logps/chosen": -243.85992431640625, "logps/rejected": -186.485107421875, "loss": 0.0423, "rewards/accuracies": 1.0, "rewards/chosen": 2.4508790969848633, "rewards/margins": 6.643634796142578, "rewards/rejected": -4.192755699157715, "step": 761 }, { "epoch": 0.42, "learning_rate": 9.162182377912266e-07, "logits/chosen": -6.061278343200684, "logits/rejected": -6.014796257019043, "logps/chosen": -180.3480682373047, "logps/rejected": -168.0278778076172, "loss": 0.1174, "rewards/accuracies": 0.875, "rewards/chosen": 1.7247756719589233, "rewards/margins": 4.588239669799805, "rewards/rejected": -2.86346435546875, "step": 762 }, { "epoch": 0.42, "learning_rate": 9.15968882415207e-07, "logits/chosen": -6.130010604858398, "logits/rejected": -6.095508575439453, "logps/chosen": -224.50286865234375, "logps/rejected": -173.78256225585938, "loss": 0.1526, "rewards/accuracies": 0.9375, "rewards/chosen": 1.5063433647155762, "rewards/margins": 5.606013298034668, "rewards/rejected": -4.099669933319092, "step": 763 }, { "epoch": 0.42, "learning_rate": 9.157191905559851e-07, "logits/chosen": -5.918902397155762, "logits/rejected": -6.034447193145752, "logps/chosen": -404.2789306640625, "logps/rejected": -375.1899108886719, "loss": 0.0812, "rewards/accuracies": 0.9375, "rewards/chosen": 2.0237531661987305, "rewards/margins": 8.442103385925293, "rewards/rejected": -6.418350696563721, "step": 764 }, { "epoch": 0.42, "learning_rate": 9.154691624155406e-07, "logits/chosen": -6.157670021057129, "logits/rejected": -6.2126784324646, "logps/chosen": -421.93634033203125, "logps/rejected": -366.26171875, "loss": 0.124, "rewards/accuracies": 0.9375, "rewards/chosen": 2.9244518280029297, "rewards/margins": 9.228578567504883, "rewards/rejected": -6.304127216339111, "step": 765 }, { "epoch": 0.43, "learning_rate": 9.152187981961248e-07, "logits/chosen": -6.095183849334717, "logits/rejected": -6.212705612182617, "logps/chosen": -292.04229736328125, "logps/rejected": -331.60223388671875, "loss": 0.1257, "rewards/accuracies": 1.0, "rewards/chosen": 1.664520502090454, "rewards/margins": 7.619585037231445, "rewards/rejected": -5.95506477355957, "step": 766 }, { "epoch": 0.43, "learning_rate": 9.149680981002608e-07, "logits/chosen": -5.988295555114746, "logits/rejected": -5.984433174133301, "logps/chosen": -308.788330078125, "logps/rejected": -326.5765075683594, "loss": 0.0895, "rewards/accuracies": 1.0, "rewards/chosen": 1.548231840133667, "rewards/margins": 9.77752685546875, "rewards/rejected": -8.229294776916504, "step": 767 }, { "epoch": 0.43, "learning_rate": 9.147170623307436e-07, "logits/chosen": -6.048581123352051, "logits/rejected": -6.082336902618408, "logps/chosen": -386.07611083984375, "logps/rejected": -290.8280334472656, "loss": 0.0888, "rewards/accuracies": 0.9375, "rewards/chosen": 4.740599632263184, "rewards/margins": 9.271171569824219, "rewards/rejected": -4.530571460723877, "step": 768 }, { "epoch": 0.43, "learning_rate": 9.144656910906395e-07, "logits/chosen": -6.120183944702148, "logits/rejected": -6.022701263427734, "logps/chosen": -278.1828308105469, "logps/rejected": -174.9853057861328, "loss": 0.1979, "rewards/accuracies": 0.9375, "rewards/chosen": 3.489989995956421, "rewards/margins": 7.55324125289917, "rewards/rejected": -4.06325101852417, "step": 769 }, { "epoch": 0.43, "learning_rate": 9.142139845832863e-07, "logits/chosen": -6.062563419342041, "logits/rejected": -6.135951042175293, "logps/chosen": -254.64913940429688, "logps/rejected": -264.11090087890625, "loss": 0.0932, "rewards/accuracies": 1.0, "rewards/chosen": 4.116878032684326, "rewards/margins": 9.220258712768555, "rewards/rejected": -5.103381156921387, "step": 770 }, { "epoch": 0.43, "learning_rate": 9.139619430122934e-07, "logits/chosen": -6.144084453582764, "logits/rejected": -6.252043724060059, "logps/chosen": -266.8453063964844, "logps/rejected": -247.15072631835938, "loss": 0.2626, "rewards/accuracies": 1.0, "rewards/chosen": 3.5346310138702393, "rewards/margins": 8.472955703735352, "rewards/rejected": -4.938324451446533, "step": 771 }, { "epoch": 0.43, "learning_rate": 9.137095665815404e-07, "logits/chosen": -6.197841644287109, "logits/rejected": -6.058094024658203, "logps/chosen": -333.8558349609375, "logps/rejected": -190.19741821289062, "loss": 0.1396, "rewards/accuracies": 1.0, "rewards/chosen": 4.101949691772461, "rewards/margins": 9.111078262329102, "rewards/rejected": -5.009129047393799, "step": 772 }, { "epoch": 0.43, "learning_rate": 9.134568554951782e-07, "logits/chosen": -6.122692108154297, "logits/rejected": -6.111172676086426, "logps/chosen": -268.0708312988281, "logps/rejected": -267.3555603027344, "loss": 0.1163, "rewards/accuracies": 0.875, "rewards/chosen": 3.980194091796875, "rewards/margins": 7.456755638122559, "rewards/rejected": -3.4765613079071045, "step": 773 }, { "epoch": 0.43, "learning_rate": 9.132038099576289e-07, "logits/chosen": -5.991390705108643, "logits/rejected": -6.10852575302124, "logps/chosen": -344.15087890625, "logps/rejected": -230.64620971679688, "loss": 0.0911, "rewards/accuracies": 1.0, "rewards/chosen": 6.2105712890625, "rewards/margins": 8.80116081237793, "rewards/rejected": -2.5905892848968506, "step": 774 }, { "epoch": 0.43, "learning_rate": 9.129504301735842e-07, "logits/chosen": -6.164137840270996, "logits/rejected": -6.10960054397583, "logps/chosen": -319.8435363769531, "logps/rejected": -202.65814208984375, "loss": 0.1382, "rewards/accuracies": 1.0, "rewards/chosen": 2.0411124229431152, "rewards/margins": 5.56640625, "rewards/rejected": -3.525294303894043, "step": 775 }, { "epoch": 0.43, "learning_rate": 9.126967163480068e-07, "logits/chosen": -6.139608383178711, "logits/rejected": -6.093688488006592, "logps/chosen": -294.0291442871094, "logps/rejected": -180.90103149414062, "loss": 0.2483, "rewards/accuracies": 0.875, "rewards/chosen": 1.9423060417175293, "rewards/margins": 5.540348052978516, "rewards/rejected": -3.5980420112609863, "step": 776 }, { "epoch": 0.43, "learning_rate": 9.124426686861295e-07, "logits/chosen": -6.057552337646484, "logits/rejected": -6.050956726074219, "logps/chosen": -133.3291015625, "logps/rejected": -176.51797485351562, "loss": 0.1525, "rewards/accuracies": 0.9375, "rewards/chosen": 0.9984871745109558, "rewards/margins": 6.214117050170898, "rewards/rejected": -5.215630531311035, "step": 777 }, { "epoch": 0.43, "learning_rate": 9.121882873934552e-07, "logits/chosen": -6.029111385345459, "logits/rejected": -6.008682727813721, "logps/chosen": -493.23187255859375, "logps/rejected": -219.95147705078125, "loss": 0.1543, "rewards/accuracies": 0.875, "rewards/chosen": 2.548233985900879, "rewards/margins": 4.793907642364502, "rewards/rejected": -2.245673179626465, "step": 778 }, { "epoch": 0.43, "learning_rate": 9.119335726757563e-07, "logits/chosen": -6.23454475402832, "logits/rejected": -6.124479293823242, "logps/chosen": -283.88983154296875, "logps/rejected": -175.34397888183594, "loss": 0.0579, "rewards/accuracies": 1.0, "rewards/chosen": 3.449831962585449, "rewards/margins": 7.220975875854492, "rewards/rejected": -3.771144151687622, "step": 779 }, { "epoch": 0.43, "learning_rate": 9.116785247390754e-07, "logits/chosen": -6.038861274719238, "logits/rejected": -6.148797035217285, "logps/chosen": -222.24986267089844, "logps/rejected": -306.615234375, "loss": 0.1534, "rewards/accuracies": 0.9375, "rewards/chosen": 1.1091701984405518, "rewards/margins": 5.1803388595581055, "rewards/rejected": -4.071168422698975, "step": 780 }, { "epoch": 0.43, "learning_rate": 9.114231437897243e-07, "logits/chosen": -6.209085464477539, "logits/rejected": -5.986705780029297, "logps/chosen": -366.95684814453125, "logps/rejected": -127.41775512695312, "loss": 0.0709, "rewards/accuracies": 0.9375, "rewards/chosen": 6.030979156494141, "rewards/margins": 9.913244247436523, "rewards/rejected": -3.882265090942383, "step": 781 }, { "epoch": 0.43, "learning_rate": 9.111674300342846e-07, "logits/chosen": -6.193955421447754, "logits/rejected": -6.1107916831970215, "logps/chosen": -261.62261962890625, "logps/rejected": -166.44775390625, "loss": 0.0853, "rewards/accuracies": 0.875, "rewards/chosen": 4.332917213439941, "rewards/margins": 6.745082378387451, "rewards/rejected": -2.4121651649475098, "step": 782 }, { "epoch": 0.43, "learning_rate": 9.109113836796065e-07, "logits/chosen": -6.089321136474609, "logits/rejected": -6.0696492195129395, "logps/chosen": -220.43666076660156, "logps/rejected": -283.4960021972656, "loss": 0.1078, "rewards/accuracies": 1.0, "rewards/chosen": 2.0500288009643555, "rewards/margins": 6.9536004066467285, "rewards/rejected": -4.903571605682373, "step": 783 }, { "epoch": 0.44, "learning_rate": 9.106550049328097e-07, "logits/chosen": -6.049460411071777, "logits/rejected": -6.0619635581970215, "logps/chosen": -313.4250183105469, "logps/rejected": -129.6637725830078, "loss": 0.0955, "rewards/accuracies": 1.0, "rewards/chosen": 4.795393943786621, "rewards/margins": 7.821475505828857, "rewards/rejected": -3.0260813236236572, "step": 784 }, { "epoch": 0.44, "learning_rate": 9.103982940012827e-07, "logits/chosen": -6.123769760131836, "logits/rejected": -6.108389854431152, "logps/chosen": -161.47756958007812, "logps/rejected": -161.87774658203125, "loss": 0.11, "rewards/accuracies": 0.9375, "rewards/chosen": 2.311652183532715, "rewards/margins": 5.207001209259033, "rewards/rejected": -2.8953487873077393, "step": 785 }, { "epoch": 0.44, "learning_rate": 9.101412510926825e-07, "logits/chosen": -6.120044708251953, "logits/rejected": -5.93784761428833, "logps/chosen": -269.4009704589844, "logps/rejected": -105.55522918701172, "loss": 0.1419, "rewards/accuracies": 0.9375, "rewards/chosen": 4.092445373535156, "rewards/margins": 8.124579429626465, "rewards/rejected": -4.032134056091309, "step": 786 }, { "epoch": 0.44, "learning_rate": 9.09883876414935e-07, "logits/chosen": -6.088851451873779, "logits/rejected": -6.105160713195801, "logps/chosen": -267.3103942871094, "logps/rejected": -219.544189453125, "loss": 0.1554, "rewards/accuracies": 0.9375, "rewards/chosen": 2.3585216999053955, "rewards/margins": 7.815218925476074, "rewards/rejected": -5.456697463989258, "step": 787 }, { "epoch": 0.44, "learning_rate": 9.096261701762342e-07, "logits/chosen": -6.108015537261963, "logits/rejected": -6.0176520347595215, "logps/chosen": -236.10653686523438, "logps/rejected": -129.24111938476562, "loss": 0.0754, "rewards/accuracies": 1.0, "rewards/chosen": 5.23223352432251, "rewards/margins": 9.87279224395752, "rewards/rejected": -4.64055871963501, "step": 788 }, { "epoch": 0.44, "learning_rate": 9.093681325850423e-07, "logits/chosen": -6.038315773010254, "logits/rejected": -5.9891862869262695, "logps/chosen": -213.58018493652344, "logps/rejected": -133.33731079101562, "loss": 0.2716, "rewards/accuracies": 1.0, "rewards/chosen": 3.6832926273345947, "rewards/margins": 6.088710784912109, "rewards/rejected": -2.4054179191589355, "step": 789 }, { "epoch": 0.44, "learning_rate": 9.091097638500897e-07, "logits/chosen": -6.054169654846191, "logits/rejected": -6.075413703918457, "logps/chosen": -227.6063995361328, "logps/rejected": -174.50714111328125, "loss": 0.1699, "rewards/accuracies": 1.0, "rewards/chosen": 2.963892698287964, "rewards/margins": 6.993749141693115, "rewards/rejected": -4.0298566818237305, "step": 790 }, { "epoch": 0.44, "learning_rate": 9.088510641803746e-07, "logits/chosen": -5.9417924880981445, "logits/rejected": -6.080036163330078, "logps/chosen": -241.58799743652344, "logps/rejected": -285.8059997558594, "loss": 0.1133, "rewards/accuracies": 0.875, "rewards/chosen": 3.5522806644439697, "rewards/margins": 8.37626838684082, "rewards/rejected": -4.823988437652588, "step": 791 }, { "epoch": 0.44, "learning_rate": 9.085920337851627e-07, "logits/chosen": -6.060581684112549, "logits/rejected": -6.1372294425964355, "logps/chosen": -222.84458923339844, "logps/rejected": -263.3702087402344, "loss": 0.1136, "rewards/accuracies": 0.9375, "rewards/chosen": 2.5622365474700928, "rewards/margins": 8.473005294799805, "rewards/rejected": -5.910767555236816, "step": 792 }, { "epoch": 0.44, "learning_rate": 9.083326728739876e-07, "logits/chosen": -6.026924133300781, "logits/rejected": -6.079131126403809, "logps/chosen": -475.333984375, "logps/rejected": -365.531982421875, "loss": 0.1179, "rewards/accuracies": 1.0, "rewards/chosen": 2.2862048149108887, "rewards/margins": 7.5423736572265625, "rewards/rejected": -5.256168365478516, "step": 793 }, { "epoch": 0.44, "learning_rate": 9.080729816566502e-07, "logits/chosen": -6.0172119140625, "logits/rejected": -6.132256031036377, "logps/chosen": -320.59228515625, "logps/rejected": -296.8845520019531, "loss": 0.0993, "rewards/accuracies": 0.9375, "rewards/chosen": 3.4726486206054688, "rewards/margins": 8.530129432678223, "rewards/rejected": -5.057481288909912, "step": 794 }, { "epoch": 0.44, "learning_rate": 9.07812960343218e-07, "logits/chosen": -6.07403564453125, "logits/rejected": -6.115268707275391, "logps/chosen": -232.53842163085938, "logps/rejected": -249.42831420898438, "loss": 0.1116, "rewards/accuracies": 0.9375, "rewards/chosen": 2.094299793243408, "rewards/margins": 6.7952470779418945, "rewards/rejected": -4.700947284698486, "step": 795 }, { "epoch": 0.44, "learning_rate": 9.075526091440262e-07, "logits/chosen": -6.035438060760498, "logits/rejected": -6.069631576538086, "logps/chosen": -261.5220947265625, "logps/rejected": -216.37551879882812, "loss": 0.1372, "rewards/accuracies": 0.9375, "rewards/chosen": 2.607067584991455, "rewards/margins": 8.036619186401367, "rewards/rejected": -5.429551601409912, "step": 796 }, { "epoch": 0.44, "learning_rate": 9.072919282696768e-07, "logits/chosen": -6.027279853820801, "logits/rejected": -6.055627346038818, "logps/chosen": -181.22314453125, "logps/rejected": -244.32644653320312, "loss": 0.1462, "rewards/accuracies": 1.0, "rewards/chosen": 2.8713998794555664, "rewards/margins": 8.179544448852539, "rewards/rejected": -5.308144569396973, "step": 797 }, { "epoch": 0.44, "learning_rate": 9.070309179310382e-07, "logits/chosen": -5.985651016235352, "logits/rejected": -6.024098873138428, "logps/chosen": -194.91744995117188, "logps/rejected": -184.8845672607422, "loss": 0.1708, "rewards/accuracies": 0.9375, "rewards/chosen": 1.5281894207000732, "rewards/margins": 6.595711708068848, "rewards/rejected": -5.067522048950195, "step": 798 }, { "epoch": 0.44, "learning_rate": 9.067695783392453e-07, "logits/chosen": -6.045537948608398, "logits/rejected": -6.160036087036133, "logps/chosen": -324.77935791015625, "logps/rejected": -254.42552185058594, "loss": 0.1058, "rewards/accuracies": 1.0, "rewards/chosen": 4.511652946472168, "rewards/margins": 8.319493293762207, "rewards/rejected": -3.807840585708618, "step": 799 }, { "epoch": 0.44, "learning_rate": 9.065079097056995e-07, "logits/chosen": -6.120011806488037, "logits/rejected": -6.097523212432861, "logps/chosen": -270.7096862792969, "logps/rejected": -277.1035461425781, "loss": 0.0426, "rewards/accuracies": 1.0, "rewards/chosen": 1.4866924285888672, "rewards/margins": 7.05916690826416, "rewards/rejected": -5.572474956512451, "step": 800 }, { "epoch": 0.44, "learning_rate": 9.062459122420683e-07, "logits/chosen": -6.024872303009033, "logits/rejected": -6.014915943145752, "logps/chosen": -250.15325927734375, "logps/rejected": -224.0111541748047, "loss": 0.0603, "rewards/accuracies": 1.0, "rewards/chosen": 1.6196759939193726, "rewards/margins": 6.853560924530029, "rewards/rejected": -5.233884811401367, "step": 801 }, { "epoch": 0.45, "learning_rate": 9.059835861602853e-07, "logits/chosen": -5.971227645874023, "logits/rejected": -6.05783748626709, "logps/chosen": -268.87738037109375, "logps/rejected": -281.363037109375, "loss": 0.091, "rewards/accuracies": 0.9375, "rewards/chosen": 3.6079912185668945, "rewards/margins": 8.491508483886719, "rewards/rejected": -4.883518218994141, "step": 802 }, { "epoch": 0.45, "learning_rate": 9.057209316725497e-07, "logits/chosen": -6.124950408935547, "logits/rejected": -6.036437511444092, "logps/chosen": -290.7763366699219, "logps/rejected": -303.3521423339844, "loss": 0.1451, "rewards/accuracies": 0.8125, "rewards/chosen": 2.549030303955078, "rewards/margins": 6.781907558441162, "rewards/rejected": -4.232876777648926, "step": 803 }, { "epoch": 0.45, "learning_rate": 9.054579489913267e-07, "logits/chosen": -6.188398361206055, "logits/rejected": -6.1967453956604, "logps/chosen": -203.08981323242188, "logps/rejected": -167.4432373046875, "loss": 0.0782, "rewards/accuracies": 0.9375, "rewards/chosen": 3.104853391647339, "rewards/margins": 7.383669376373291, "rewards/rejected": -4.278815746307373, "step": 804 }, { "epoch": 0.45, "learning_rate": 9.051946383293465e-07, "logits/chosen": -6.050601959228516, "logits/rejected": -5.956600666046143, "logps/chosen": -400.81597900390625, "logps/rejected": -215.13560485839844, "loss": 0.1381, "rewards/accuracies": 0.9375, "rewards/chosen": 5.006436347961426, "rewards/margins": 8.997549057006836, "rewards/rejected": -3.99111270904541, "step": 805 }, { "epoch": 0.45, "learning_rate": 9.049309998996051e-07, "logits/chosen": -6.108128547668457, "logits/rejected": -6.121151924133301, "logps/chosen": -241.36318969726562, "logps/rejected": -283.3583068847656, "loss": 0.1372, "rewards/accuracies": 0.875, "rewards/chosen": 2.572042942047119, "rewards/margins": 7.15032958984375, "rewards/rejected": -4.578286647796631, "step": 806 }, { "epoch": 0.45, "learning_rate": 9.046670339153633e-07, "logits/chosen": -6.1145172119140625, "logits/rejected": -6.053196907043457, "logps/chosen": -222.07391357421875, "logps/rejected": -216.14950561523438, "loss": 0.109, "rewards/accuracies": 1.0, "rewards/chosen": 2.912447452545166, "rewards/margins": 8.8062744140625, "rewards/rejected": -5.893827438354492, "step": 807 }, { "epoch": 0.45, "learning_rate": 9.044027405901469e-07, "logits/chosen": -6.155492305755615, "logits/rejected": -5.94620418548584, "logps/chosen": -272.05987548828125, "logps/rejected": -142.7749786376953, "loss": 0.2404, "rewards/accuracies": 0.9375, "rewards/chosen": 3.1218388080596924, "rewards/margins": 7.681735515594482, "rewards/rejected": -4.559896945953369, "step": 808 }, { "epoch": 0.45, "learning_rate": 9.041381201377467e-07, "logits/chosen": -6.146156311035156, "logits/rejected": -6.031675338745117, "logps/chosen": -339.44158935546875, "logps/rejected": -167.6539764404297, "loss": 0.1371, "rewards/accuracies": 1.0, "rewards/chosen": 6.871504783630371, "rewards/margins": 9.720539093017578, "rewards/rejected": -2.8490352630615234, "step": 809 }, { "epoch": 0.45, "learning_rate": 9.038731727722179e-07, "logits/chosen": -6.139837741851807, "logits/rejected": -6.023964881896973, "logps/chosen": -509.24053955078125, "logps/rejected": -188.51577758789062, "loss": 0.131, "rewards/accuracies": 0.9375, "rewards/chosen": 2.7774739265441895, "rewards/margins": 6.628616809844971, "rewards/rejected": -3.8511428833007812, "step": 810 }, { "epoch": 0.45, "learning_rate": 9.036078987078803e-07, "logits/chosen": -6.052670478820801, "logits/rejected": -5.961304664611816, "logps/chosen": -276.994140625, "logps/rejected": -152.17947387695312, "loss": 0.0615, "rewards/accuracies": 1.0, "rewards/chosen": 4.778738975524902, "rewards/margins": 7.560942649841309, "rewards/rejected": -2.7822036743164062, "step": 811 }, { "epoch": 0.45, "learning_rate": 9.033422981593179e-07, "logits/chosen": -5.974599838256836, "logits/rejected": -6.029651165008545, "logps/chosen": -329.00750732421875, "logps/rejected": -228.82220458984375, "loss": 0.1732, "rewards/accuracies": 0.875, "rewards/chosen": 3.3766541481018066, "rewards/margins": 6.921587944030762, "rewards/rejected": -3.544933319091797, "step": 812 }, { "epoch": 0.45, "learning_rate": 9.03076371341379e-07, "logits/chosen": -6.009669303894043, "logits/rejected": -6.061643600463867, "logps/chosen": -247.57363891601562, "logps/rejected": -153.5314483642578, "loss": 0.1493, "rewards/accuracies": 1.0, "rewards/chosen": 4.274101257324219, "rewards/margins": 8.207910537719727, "rewards/rejected": -3.9338080883026123, "step": 813 }, { "epoch": 0.45, "learning_rate": 9.02810118469175e-07, "logits/chosen": -6.0681586265563965, "logits/rejected": -6.082386493682861, "logps/chosen": -367.67083740234375, "logps/rejected": -249.68521118164062, "loss": 0.0767, "rewards/accuracies": 0.9375, "rewards/chosen": 3.9960238933563232, "rewards/margins": 8.058995246887207, "rewards/rejected": -4.062971115112305, "step": 814 }, { "epoch": 0.45, "learning_rate": 9.025435397580823e-07, "logits/chosen": -6.010396957397461, "logits/rejected": -5.948957443237305, "logps/chosen": -138.7119598388672, "logps/rejected": -134.96737670898438, "loss": 0.1549, "rewards/accuracies": 0.9375, "rewards/chosen": 2.366211414337158, "rewards/margins": 7.202910423278809, "rewards/rejected": -4.836699485778809, "step": 815 }, { "epoch": 0.45, "learning_rate": 9.022766354237399e-07, "logits/chosen": -6.014671325683594, "logits/rejected": -5.9847493171691895, "logps/chosen": -445.6752624511719, "logps/rejected": -474.88690185546875, "loss": 0.0732, "rewards/accuracies": 0.9375, "rewards/chosen": 2.9629359245300293, "rewards/margins": 6.434779167175293, "rewards/rejected": -3.4718432426452637, "step": 816 }, { "epoch": 0.45, "learning_rate": 9.020094056820506e-07, "logits/chosen": -6.110288619995117, "logits/rejected": -6.155078887939453, "logps/chosen": -292.049072265625, "logps/rejected": -298.9111328125, "loss": 0.1693, "rewards/accuracies": 0.9375, "rewards/chosen": 3.123659372329712, "rewards/margins": 7.967240333557129, "rewards/rejected": -4.843581199645996, "step": 817 }, { "epoch": 0.45, "learning_rate": 9.017418507491805e-07, "logits/chosen": -5.982954978942871, "logits/rejected": -6.185075283050537, "logps/chosen": -227.9599609375, "logps/rejected": -264.676025390625, "loss": 0.1485, "rewards/accuracies": 0.75, "rewards/chosen": 4.369356155395508, "rewards/margins": 9.104026794433594, "rewards/rejected": -4.734671115875244, "step": 818 }, { "epoch": 0.45, "learning_rate": 9.014739708415587e-07, "logits/chosen": -6.056069374084473, "logits/rejected": -6.142364501953125, "logps/chosen": -255.0589141845703, "logps/rejected": -325.45343017578125, "loss": 0.1221, "rewards/accuracies": 0.9375, "rewards/chosen": 2.5547778606414795, "rewards/margins": 8.091358184814453, "rewards/rejected": -5.5365800857543945, "step": 819 }, { "epoch": 0.46, "learning_rate": 9.012057661758768e-07, "logits/chosen": -6.067625045776367, "logits/rejected": -6.168066024780273, "logps/chosen": -221.49362182617188, "logps/rejected": -256.63079833984375, "loss": 0.1028, "rewards/accuracies": 0.9375, "rewards/chosen": 2.6985511779785156, "rewards/margins": 8.74101448059082, "rewards/rejected": -6.0424628257751465, "step": 820 }, { "epoch": 0.46, "learning_rate": 9.009372369690895e-07, "logits/chosen": -6.018408298492432, "logits/rejected": -6.081243991851807, "logps/chosen": -246.72247314453125, "logps/rejected": -331.26336669921875, "loss": 0.2447, "rewards/accuracies": 0.9375, "rewards/chosen": 1.838474988937378, "rewards/margins": 6.400204658508301, "rewards/rejected": -4.561729907989502, "step": 821 }, { "epoch": 0.46, "learning_rate": 9.006683834384141e-07, "logits/chosen": -6.095917701721191, "logits/rejected": -6.032547950744629, "logps/chosen": -214.31826782226562, "logps/rejected": -139.84445190429688, "loss": 0.0907, "rewards/accuracies": 1.0, "rewards/chosen": 4.227230072021484, "rewards/margins": 8.729194641113281, "rewards/rejected": -4.501965045928955, "step": 822 }, { "epoch": 0.46, "learning_rate": 9.003992058013301e-07, "logits/chosen": -6.074545383453369, "logits/rejected": -6.108537673950195, "logps/chosen": -432.37042236328125, "logps/rejected": -192.76235961914062, "loss": 0.0713, "rewards/accuracies": 0.9375, "rewards/chosen": 3.2434871196746826, "rewards/margins": 7.2001800537109375, "rewards/rejected": -3.956692695617676, "step": 823 }, { "epoch": 0.46, "learning_rate": 9.001297042755789e-07, "logits/chosen": -6.060349464416504, "logits/rejected": -6.112211227416992, "logps/chosen": -234.55409240722656, "logps/rejected": -264.90887451171875, "loss": 0.1596, "rewards/accuracies": 1.0, "rewards/chosen": 3.580644369125366, "rewards/margins": 5.902409553527832, "rewards/rejected": -2.321765422821045, "step": 824 }, { "epoch": 0.46, "learning_rate": 8.998598790791642e-07, "logits/chosen": -6.103640556335449, "logits/rejected": -6.317295074462891, "logps/chosen": -214.5191650390625, "logps/rejected": -288.4006652832031, "loss": 0.1814, "rewards/accuracies": 1.0, "rewards/chosen": 3.1618480682373047, "rewards/margins": 10.300455093383789, "rewards/rejected": -7.138606071472168, "step": 825 }, { "epoch": 0.46, "learning_rate": 8.995897304303518e-07, "logits/chosen": -6.030210971832275, "logits/rejected": -5.991599082946777, "logps/chosen": -482.08587646484375, "logps/rejected": -181.41043090820312, "loss": 0.1382, "rewards/accuracies": 1.0, "rewards/chosen": 4.10278844833374, "rewards/margins": 6.295504570007324, "rewards/rejected": -2.192716121673584, "step": 826 }, { "epoch": 0.46, "learning_rate": 8.993192585476685e-07, "logits/chosen": -6.024294376373291, "logits/rejected": -6.009075164794922, "logps/chosen": -353.82904052734375, "logps/rejected": -194.31808471679688, "loss": 0.06, "rewards/accuracies": 0.875, "rewards/chosen": 6.183791637420654, "rewards/margins": 9.272157669067383, "rewards/rejected": -3.0883665084838867, "step": 827 }, { "epoch": 0.46, "learning_rate": 8.990484636499029e-07, "logits/chosen": -6.132922172546387, "logits/rejected": -6.103938102722168, "logps/chosen": -277.95849609375, "logps/rejected": -176.02249145507812, "loss": 0.1741, "rewards/accuracies": 0.9375, "rewards/chosen": 5.160196304321289, "rewards/margins": 8.674287796020508, "rewards/rejected": -3.514092206954956, "step": 828 }, { "epoch": 0.46, "learning_rate": 8.98777345956105e-07, "logits/chosen": -5.93393087387085, "logits/rejected": -5.989589691162109, "logps/chosen": -156.47589111328125, "logps/rejected": -286.7480163574219, "loss": 0.1017, "rewards/accuracies": 0.9375, "rewards/chosen": 1.422878384590149, "rewards/margins": 8.155950546264648, "rewards/rejected": -6.733072280883789, "step": 829 }, { "epoch": 0.46, "learning_rate": 8.985059056855857e-07, "logits/chosen": -6.077751636505127, "logits/rejected": -6.135644912719727, "logps/chosen": -250.6182098388672, "logps/rejected": -219.73550415039062, "loss": 0.1137, "rewards/accuracies": 1.0, "rewards/chosen": 4.847991943359375, "rewards/margins": 10.20809555053711, "rewards/rejected": -5.360104084014893, "step": 830 }, { "epoch": 0.46, "learning_rate": 8.982341430579173e-07, "logits/chosen": -5.976973533630371, "logits/rejected": -6.0483880043029785, "logps/chosen": -321.6473083496094, "logps/rejected": -178.33245849609375, "loss": 0.0794, "rewards/accuracies": 1.0, "rewards/chosen": 3.2920241355895996, "rewards/margins": 8.804765701293945, "rewards/rejected": -5.5127410888671875, "step": 831 }, { "epoch": 0.46, "learning_rate": 8.979620582929317e-07, "logits/chosen": -6.038172721862793, "logits/rejected": -6.088642597198486, "logps/chosen": -377.7230529785156, "logps/rejected": -214.09056091308594, "loss": 0.1045, "rewards/accuracies": 0.9375, "rewards/chosen": 4.64923095703125, "rewards/margins": 8.043447494506836, "rewards/rejected": -3.394216775894165, "step": 832 }, { "epoch": 0.46, "learning_rate": 8.976896516107229e-07, "logits/chosen": -6.085821628570557, "logits/rejected": -6.116927623748779, "logps/chosen": -302.36505126953125, "logps/rejected": -224.56961059570312, "loss": 0.1278, "rewards/accuracies": 1.0, "rewards/chosen": 4.948205471038818, "rewards/margins": 8.288524627685547, "rewards/rejected": -3.3403189182281494, "step": 833 }, { "epoch": 0.46, "learning_rate": 8.974169232316444e-07, "logits/chosen": -6.0636372566223145, "logits/rejected": -5.933193206787109, "logps/chosen": -280.99468994140625, "logps/rejected": -161.44189453125, "loss": 0.1741, "rewards/accuracies": 0.9375, "rewards/chosen": 3.5454301834106445, "rewards/margins": 5.107020378112793, "rewards/rejected": -1.5615899562835693, "step": 834 }, { "epoch": 0.46, "learning_rate": 8.971438733763099e-07, "logits/chosen": -6.161956310272217, "logits/rejected": -6.098660469055176, "logps/chosen": -245.61074829101562, "logps/rejected": -217.31849670410156, "loss": 0.1069, "rewards/accuracies": 1.0, "rewards/chosen": 4.743899822235107, "rewards/margins": 10.096986770629883, "rewards/rejected": -5.353087425231934, "step": 835 }, { "epoch": 0.46, "learning_rate": 8.968705022655936e-07, "logits/chosen": -6.088655471801758, "logits/rejected": -6.110435485839844, "logps/chosen": -283.7930908203125, "logps/rejected": -188.38482666015625, "loss": 0.1001, "rewards/accuracies": 1.0, "rewards/chosen": 3.733837604522705, "rewards/margins": 7.51103401184082, "rewards/rejected": -3.7771966457366943, "step": 836 }, { "epoch": 0.46, "learning_rate": 8.96596810120629e-07, "logits/chosen": -6.09984016418457, "logits/rejected": -5.9510393142700195, "logps/chosen": -210.13055419921875, "logps/rejected": -160.10498046875, "loss": 0.1194, "rewards/accuracies": 0.8125, "rewards/chosen": 2.2827773094177246, "rewards/margins": 6.605005741119385, "rewards/rejected": -4.322228908538818, "step": 837 }, { "epoch": 0.47, "learning_rate": 8.9632279716281e-07, "logits/chosen": -5.930235862731934, "logits/rejected": -5.9347453117370605, "logps/chosen": -431.6900939941406, "logps/rejected": -400.98236083984375, "loss": 0.1655, "rewards/accuracies": 0.9375, "rewards/chosen": 2.8633437156677246, "rewards/margins": 6.648489952087402, "rewards/rejected": -3.785146474838257, "step": 838 }, { "epoch": 0.47, "learning_rate": 8.960484636137893e-07, "logits/chosen": -6.006452560424805, "logits/rejected": -5.954592227935791, "logps/chosen": -228.16281127929688, "logps/rejected": -311.282958984375, "loss": 0.0747, "rewards/accuracies": 0.9375, "rewards/chosen": 3.2524335384368896, "rewards/margins": 8.951729774475098, "rewards/rejected": -5.699295997619629, "step": 839 }, { "epoch": 0.47, "learning_rate": 8.957738096954794e-07, "logits/chosen": -6.2062907218933105, "logits/rejected": -6.134960174560547, "logps/chosen": -297.9073791503906, "logps/rejected": -254.70343017578125, "loss": 0.2157, "rewards/accuracies": 0.9375, "rewards/chosen": 3.2301764488220215, "rewards/margins": 8.098063468933105, "rewards/rejected": -4.867886543273926, "step": 840 }, { "epoch": 0.47, "learning_rate": 8.954988356300516e-07, "logits/chosen": -6.094696998596191, "logits/rejected": -6.06779146194458, "logps/chosen": -290.40960693359375, "logps/rejected": -164.10546875, "loss": 0.1067, "rewards/accuracies": 1.0, "rewards/chosen": 4.550548076629639, "rewards/margins": 9.594438552856445, "rewards/rejected": -5.043890476226807, "step": 841 }, { "epoch": 0.47, "learning_rate": 8.952235416399369e-07, "logits/chosen": -6.074103355407715, "logits/rejected": -6.078497409820557, "logps/chosen": -196.5069122314453, "logps/rejected": -185.23458862304688, "loss": 0.073, "rewards/accuracies": 0.875, "rewards/chosen": 1.9345186948776245, "rewards/margins": 6.709598064422607, "rewards/rejected": -4.775079250335693, "step": 842 }, { "epoch": 0.47, "learning_rate": 8.949479279478239e-07, "logits/chosen": -6.090696334838867, "logits/rejected": -6.081409931182861, "logps/chosen": -260.8499755859375, "logps/rejected": -236.8240203857422, "loss": 0.1164, "rewards/accuracies": 0.9375, "rewards/chosen": 3.2225306034088135, "rewards/margins": 5.92186164855957, "rewards/rejected": -2.6993308067321777, "step": 843 }, { "epoch": 0.47, "learning_rate": 8.94671994776661e-07, "logits/chosen": -5.976778507232666, "logits/rejected": -5.962027549743652, "logps/chosen": -300.5640563964844, "logps/rejected": -167.19351196289062, "loss": 0.1267, "rewards/accuracies": 1.0, "rewards/chosen": 4.849679946899414, "rewards/margins": 7.099732875823975, "rewards/rejected": -2.2500529289245605, "step": 844 }, { "epoch": 0.47, "learning_rate": 8.943957423496541e-07, "logits/chosen": -5.996813774108887, "logits/rejected": -6.019984245300293, "logps/chosen": -309.94171142578125, "logps/rejected": -373.09112548828125, "loss": 0.1299, "rewards/accuracies": 1.0, "rewards/chosen": 3.9054293632507324, "rewards/margins": 8.184141159057617, "rewards/rejected": -4.278712272644043, "step": 845 }, { "epoch": 0.47, "learning_rate": 8.94119170890268e-07, "logits/chosen": -6.033357620239258, "logits/rejected": -6.10835075378418, "logps/chosen": -207.9352569580078, "logps/rejected": -269.20709228515625, "loss": 0.099, "rewards/accuracies": 1.0, "rewards/chosen": 2.1725683212280273, "rewards/margins": 9.261373519897461, "rewards/rejected": -7.088804721832275, "step": 846 }, { "epoch": 0.47, "learning_rate": 8.938422806222252e-07, "logits/chosen": -6.004037380218506, "logits/rejected": -6.0106682777404785, "logps/chosen": -318.23980712890625, "logps/rejected": -271.02825927734375, "loss": 0.1534, "rewards/accuracies": 0.9375, "rewards/chosen": 4.22467041015625, "rewards/margins": 9.750310897827148, "rewards/rejected": -5.525640964508057, "step": 847 }, { "epoch": 0.47, "learning_rate": 8.935650717695063e-07, "logits/chosen": -6.0508832931518555, "logits/rejected": -6.0723090171813965, "logps/chosen": -289.7255554199219, "logps/rejected": -339.43914794921875, "loss": 0.0509, "rewards/accuracies": 0.9375, "rewards/chosen": 1.966230034828186, "rewards/margins": 7.659694194793701, "rewards/rejected": -5.693464279174805, "step": 848 }, { "epoch": 0.47, "learning_rate": 8.932875445563495e-07, "logits/chosen": -6.062553405761719, "logits/rejected": -6.068608283996582, "logps/chosen": -383.65325927734375, "logps/rejected": -372.2225646972656, "loss": 0.4385, "rewards/accuracies": 0.875, "rewards/chosen": 2.3130416870117188, "rewards/margins": 6.117033004760742, "rewards/rejected": -3.8039910793304443, "step": 849 }, { "epoch": 0.47, "learning_rate": 8.930096992072504e-07, "logits/chosen": -6.074253082275391, "logits/rejected": -6.149560928344727, "logps/chosen": -267.385009765625, "logps/rejected": -226.03076171875, "loss": 0.0662, "rewards/accuracies": 1.0, "rewards/chosen": 2.997344970703125, "rewards/margins": 7.898953914642334, "rewards/rejected": -4.901608943939209, "step": 850 }, { "epoch": 0.47, "learning_rate": 8.927315359469625e-07, "logits/chosen": -5.996990203857422, "logits/rejected": -5.987354278564453, "logps/chosen": -285.66998291015625, "logps/rejected": -193.98257446289062, "loss": 0.1426, "rewards/accuracies": 0.875, "rewards/chosen": 4.157785415649414, "rewards/margins": 6.947266578674316, "rewards/rejected": -2.789480686187744, "step": 851 }, { "epoch": 0.47, "learning_rate": 8.924530550004956e-07, "logits/chosen": -6.1398539543151855, "logits/rejected": -6.018101215362549, "logps/chosen": -189.32716369628906, "logps/rejected": -176.01071166992188, "loss": 0.0988, "rewards/accuracies": 0.9375, "rewards/chosen": 1.095672607421875, "rewards/margins": 6.10011625289917, "rewards/rejected": -5.004443168640137, "step": 852 }, { "epoch": 0.47, "learning_rate": 8.921742565931171e-07, "logits/chosen": -5.992966175079346, "logits/rejected": -6.0677490234375, "logps/chosen": -367.07623291015625, "logps/rejected": -139.62249755859375, "loss": 0.1495, "rewards/accuracies": 1.0, "rewards/chosen": 4.228760242462158, "rewards/margins": 7.382375717163086, "rewards/rejected": -3.153615951538086, "step": 853 }, { "epoch": 0.47, "learning_rate": 8.918951409503512e-07, "logits/chosen": -6.108186721801758, "logits/rejected": -6.132979393005371, "logps/chosen": -276.1746520996094, "logps/rejected": -202.65985107421875, "loss": 0.139, "rewards/accuracies": 1.0, "rewards/chosen": 4.819629669189453, "rewards/margins": 10.696340560913086, "rewards/rejected": -5.876711845397949, "step": 854 }, { "epoch": 0.47, "learning_rate": 8.916157082979782e-07, "logits/chosen": -6.071115493774414, "logits/rejected": -6.117913722991943, "logps/chosen": -220.8710479736328, "logps/rejected": -134.0199432373047, "loss": 0.1086, "rewards/accuracies": 1.0, "rewards/chosen": 3.3267080783843994, "rewards/margins": 6.941452980041504, "rewards/rejected": -3.6147451400756836, "step": 855 }, { "epoch": 0.48, "learning_rate": 8.913359588620356e-07, "logits/chosen": -6.048104763031006, "logits/rejected": -5.972200870513916, "logps/chosen": -230.6852264404297, "logps/rejected": -192.9178466796875, "loss": 0.1189, "rewards/accuracies": 1.0, "rewards/chosen": 3.5976474285125732, "rewards/margins": 6.45188570022583, "rewards/rejected": -2.854238510131836, "step": 856 }, { "epoch": 0.48, "learning_rate": 8.910558928688164e-07, "logits/chosen": -6.086408615112305, "logits/rejected": -6.104702949523926, "logps/chosen": -250.6977996826172, "logps/rejected": -279.1931457519531, "loss": 0.1709, "rewards/accuracies": 0.9375, "rewards/chosen": 2.8774285316467285, "rewards/margins": 8.45862102508545, "rewards/rejected": -5.581192970275879, "step": 857 }, { "epoch": 0.48, "learning_rate": 8.907755105448703e-07, "logits/chosen": -6.030965328216553, "logits/rejected": -6.03886079788208, "logps/chosen": -215.1977081298828, "logps/rejected": -302.7920837402344, "loss": 0.1766, "rewards/accuracies": 0.875, "rewards/chosen": 1.661760687828064, "rewards/margins": 6.959959030151367, "rewards/rejected": -5.2981977462768555, "step": 858 }, { "epoch": 0.48, "learning_rate": 8.904948121170022e-07, "logits/chosen": -6.08776330947876, "logits/rejected": -6.108325481414795, "logps/chosen": -187.6278839111328, "logps/rejected": -282.58306884765625, "loss": 0.2397, "rewards/accuracies": 0.9375, "rewards/chosen": 1.775031328201294, "rewards/margins": 7.8065314292907715, "rewards/rejected": -6.031500339508057, "step": 859 }, { "epoch": 0.48, "learning_rate": 8.902137978122735e-07, "logits/chosen": -6.059734344482422, "logits/rejected": -5.957641124725342, "logps/chosen": -288.9066162109375, "logps/rejected": -138.70518493652344, "loss": 0.0945, "rewards/accuracies": 1.0, "rewards/chosen": 3.4432544708251953, "rewards/margins": 6.331413269042969, "rewards/rejected": -2.8881587982177734, "step": 860 }, { "epoch": 0.48, "learning_rate": 8.899324678580003e-07, "logits/chosen": -6.090494155883789, "logits/rejected": -6.009712219238281, "logps/chosen": -307.5048522949219, "logps/rejected": -421.40863037109375, "loss": 0.173, "rewards/accuracies": 1.0, "rewards/chosen": 3.1673827171325684, "rewards/margins": 7.173433780670166, "rewards/rejected": -4.0060505867004395, "step": 861 }, { "epoch": 0.48, "learning_rate": 8.896508224817548e-07, "logits/chosen": -6.212299346923828, "logits/rejected": -6.039118766784668, "logps/chosen": -290.01214599609375, "logps/rejected": -217.97506713867188, "loss": 0.1933, "rewards/accuracies": 0.9375, "rewards/chosen": 3.1812667846679688, "rewards/margins": 9.52743148803711, "rewards/rejected": -6.346165657043457, "step": 862 }, { "epoch": 0.48, "learning_rate": 8.893688619113639e-07, "logits/chosen": -6.17421817779541, "logits/rejected": -6.03590202331543, "logps/chosen": -298.06219482421875, "logps/rejected": -144.76226806640625, "loss": 0.1413, "rewards/accuracies": 0.8125, "rewards/chosen": 3.4932968616485596, "rewards/margins": 7.01220703125, "rewards/rejected": -3.5189104080200195, "step": 863 }, { "epoch": 0.48, "learning_rate": 8.890865863749093e-07, "logits/chosen": -6.097796440124512, "logits/rejected": -6.230877876281738, "logps/chosen": -275.94451904296875, "logps/rejected": -271.618896484375, "loss": 0.2146, "rewards/accuracies": 0.75, "rewards/chosen": 0.2711796164512634, "rewards/margins": 5.182984352111816, "rewards/rejected": -4.91180419921875, "step": 864 }, { "epoch": 0.48, "learning_rate": 8.888039961007281e-07, "logits/chosen": -5.948993682861328, "logits/rejected": -6.0035929679870605, "logps/chosen": -344.433349609375, "logps/rejected": -245.10601806640625, "loss": 0.0685, "rewards/accuracies": 0.9375, "rewards/chosen": 2.195500373840332, "rewards/margins": 7.030066013336182, "rewards/rejected": -4.834566116333008, "step": 865 }, { "epoch": 0.48, "learning_rate": 8.885210913174115e-07, "logits/chosen": -6.094549179077148, "logits/rejected": -6.097583770751953, "logps/chosen": -267.3152770996094, "logps/rejected": -161.25608825683594, "loss": 0.1195, "rewards/accuracies": 0.8125, "rewards/chosen": 2.8388049602508545, "rewards/margins": 5.659798622131348, "rewards/rejected": -2.8209939002990723, "step": 866 }, { "epoch": 0.48, "learning_rate": 8.882378722538051e-07, "logits/chosen": -6.056877136230469, "logits/rejected": -6.084977626800537, "logps/chosen": -207.91566467285156, "logps/rejected": -219.05384826660156, "loss": 0.1285, "rewards/accuracies": 0.9375, "rewards/chosen": 2.476167678833008, "rewards/margins": 8.625038146972656, "rewards/rejected": -6.148870468139648, "step": 867 }, { "epoch": 0.48, "learning_rate": 8.87954339139009e-07, "logits/chosen": -6.007232666015625, "logits/rejected": -6.031445503234863, "logps/chosen": -403.05877685546875, "logps/rejected": -377.6606750488281, "loss": 0.1139, "rewards/accuracies": 0.9375, "rewards/chosen": 1.9518800973892212, "rewards/margins": 6.077029228210449, "rewards/rejected": -4.125148773193359, "step": 868 }, { "epoch": 0.48, "learning_rate": 8.876704922023772e-07, "logits/chosen": -6.061877727508545, "logits/rejected": -6.058808326721191, "logps/chosen": -149.4529266357422, "logps/rejected": -296.6505126953125, "loss": 0.1213, "rewards/accuracies": 1.0, "rewards/chosen": 2.0469627380371094, "rewards/margins": 8.95372486114502, "rewards/rejected": -6.90676212310791, "step": 869 }, { "epoch": 0.48, "learning_rate": 8.873863316735175e-07, "logits/chosen": -6.033123970031738, "logits/rejected": -5.967067241668701, "logps/chosen": -239.16061401367188, "logps/rejected": -444.63262939453125, "loss": 0.0877, "rewards/accuracies": 0.9375, "rewards/chosen": 2.343663215637207, "rewards/margins": 7.186145782470703, "rewards/rejected": -4.842482566833496, "step": 870 }, { "epoch": 0.48, "learning_rate": 8.871018577822915e-07, "logits/chosen": -6.073770999908447, "logits/rejected": -6.043847560882568, "logps/chosen": -315.0035400390625, "logps/rejected": -294.74664306640625, "loss": 0.1061, "rewards/accuracies": 0.9375, "rewards/chosen": 3.733703136444092, "rewards/margins": 6.9114580154418945, "rewards/rejected": -3.177755355834961, "step": 871 }, { "epoch": 0.48, "learning_rate": 8.868170707588142e-07, "logits/chosen": -6.116209030151367, "logits/rejected": -6.060998916625977, "logps/chosen": -242.0023193359375, "logps/rejected": -150.129150390625, "loss": 0.2029, "rewards/accuracies": 1.0, "rewards/chosen": 3.7222936153411865, "rewards/margins": 7.097454071044922, "rewards/rejected": -3.3751604557037354, "step": 872 }, { "epoch": 0.48, "learning_rate": 8.865319708334538e-07, "logits/chosen": -5.989092826843262, "logits/rejected": -6.0160698890686035, "logps/chosen": -177.13436889648438, "logps/rejected": -242.78314208984375, "loss": 0.1838, "rewards/accuracies": 0.875, "rewards/chosen": 1.8956981897354126, "rewards/margins": 6.607552528381348, "rewards/rejected": -4.711853981018066, "step": 873 }, { "epoch": 0.49, "learning_rate": 8.862465582368319e-07, "logits/chosen": -6.078321933746338, "logits/rejected": -6.093864440917969, "logps/chosen": -267.6138916015625, "logps/rejected": -234.47877502441406, "loss": 0.0774, "rewards/accuracies": 0.875, "rewards/chosen": 3.9951086044311523, "rewards/margins": 8.656991958618164, "rewards/rejected": -4.661883354187012, "step": 874 }, { "epoch": 0.49, "learning_rate": 8.859608331998229e-07, "logits/chosen": -6.188248157501221, "logits/rejected": -6.067914962768555, "logps/chosen": -205.62510681152344, "logps/rejected": -215.01516723632812, "loss": 0.1454, "rewards/accuracies": 0.875, "rewards/chosen": 2.6416163444519043, "rewards/margins": 8.118297576904297, "rewards/rejected": -5.476680755615234, "step": 875 }, { "epoch": 0.49, "learning_rate": 8.856747959535536e-07, "logits/chosen": -6.082720756530762, "logits/rejected": -6.022894382476807, "logps/chosen": -300.8619384765625, "logps/rejected": -366.0984802246094, "loss": 0.0713, "rewards/accuracies": 1.0, "rewards/chosen": 2.5266611576080322, "rewards/margins": 10.945878982543945, "rewards/rejected": -8.419218063354492, "step": 876 }, { "epoch": 0.49, "learning_rate": 8.853884467294038e-07, "logits/chosen": -6.068367958068848, "logits/rejected": -5.9861602783203125, "logps/chosen": -225.40829467773438, "logps/rejected": -102.93605041503906, "loss": 0.096, "rewards/accuracies": 1.0, "rewards/chosen": 3.8792505264282227, "rewards/margins": 8.380216598510742, "rewards/rejected": -4.5009660720825195, "step": 877 }, { "epoch": 0.49, "learning_rate": 8.851017857590056e-07, "logits/chosen": -6.093664169311523, "logits/rejected": -6.059864044189453, "logps/chosen": -273.3231201171875, "logps/rejected": -215.33779907226562, "loss": 0.1361, "rewards/accuracies": 0.9375, "rewards/chosen": 1.6957316398620605, "rewards/margins": 8.2228364944458, "rewards/rejected": -6.527105331420898, "step": 878 }, { "epoch": 0.49, "learning_rate": 8.84814813274243e-07, "logits/chosen": -5.941673278808594, "logits/rejected": -6.032391548156738, "logps/chosen": -357.34661865234375, "logps/rejected": -414.7855529785156, "loss": 0.0957, "rewards/accuracies": 0.9375, "rewards/chosen": 4.371154308319092, "rewards/margins": 9.112201690673828, "rewards/rejected": -4.741046905517578, "step": 879 }, { "epoch": 0.49, "learning_rate": 8.845275295072523e-07, "logits/chosen": -6.064865589141846, "logits/rejected": -6.048277854919434, "logps/chosen": -237.04574584960938, "logps/rejected": -237.2377166748047, "loss": 0.0738, "rewards/accuracies": 1.0, "rewards/chosen": 3.7268450260162354, "rewards/margins": 7.431762218475342, "rewards/rejected": -3.7049169540405273, "step": 880 }, { "epoch": 0.49, "learning_rate": 8.842399346904214e-07, "logits/chosen": -6.093265056610107, "logits/rejected": -6.145652770996094, "logps/chosen": -259.3392333984375, "logps/rejected": -269.5951232910156, "loss": 0.1937, "rewards/accuracies": 0.875, "rewards/chosen": 3.5797195434570312, "rewards/margins": 8.149308204650879, "rewards/rejected": -4.569588661193848, "step": 881 }, { "epoch": 0.49, "learning_rate": 8.839520290563898e-07, "logits/chosen": -6.097153663635254, "logits/rejected": -6.1163763999938965, "logps/chosen": -364.1341857910156, "logps/rejected": -271.23846435546875, "loss": 0.0918, "rewards/accuracies": 0.9375, "rewards/chosen": 5.773977279663086, "rewards/margins": 9.383922576904297, "rewards/rejected": -3.609945297241211, "step": 882 }, { "epoch": 0.49, "learning_rate": 8.836638128380485e-07, "logits/chosen": -6.065730094909668, "logits/rejected": -6.146162986755371, "logps/chosen": -226.07305908203125, "logps/rejected": -273.28369140625, "loss": 0.1657, "rewards/accuracies": 1.0, "rewards/chosen": 2.1385059356689453, "rewards/margins": 7.277004241943359, "rewards/rejected": -5.138497829437256, "step": 883 }, { "epoch": 0.49, "learning_rate": 8.8337528626854e-07, "logits/chosen": -6.037668704986572, "logits/rejected": -6.099139213562012, "logps/chosen": -354.3026123046875, "logps/rejected": -229.7716522216797, "loss": 0.1118, "rewards/accuracies": 1.0, "rewards/chosen": 5.32166862487793, "rewards/margins": 10.779128074645996, "rewards/rejected": -5.457459449768066, "step": 884 }, { "epoch": 0.49, "learning_rate": 8.830864495812573e-07, "logits/chosen": -6.116034507751465, "logits/rejected": -6.143840789794922, "logps/chosen": -262.42864990234375, "logps/rejected": -199.62425231933594, "loss": 0.1435, "rewards/accuracies": 1.0, "rewards/chosen": 5.51588249206543, "rewards/margins": 10.304750442504883, "rewards/rejected": -4.788867473602295, "step": 885 }, { "epoch": 0.49, "learning_rate": 8.827973030098446e-07, "logits/chosen": -6.056812286376953, "logits/rejected": -6.08138370513916, "logps/chosen": -231.82005310058594, "logps/rejected": -196.2194366455078, "loss": 0.038, "rewards/accuracies": 0.9375, "rewards/chosen": 3.526252269744873, "rewards/margins": 7.472598075866699, "rewards/rejected": -3.946345806121826, "step": 886 }, { "epoch": 0.49, "learning_rate": 8.825078467881968e-07, "logits/chosen": -6.01331090927124, "logits/rejected": -6.167982578277588, "logps/chosen": -221.2479248046875, "logps/rejected": -287.1504821777344, "loss": 0.1263, "rewards/accuracies": 1.0, "rewards/chosen": 3.735262632369995, "rewards/margins": 9.414759635925293, "rewards/rejected": -5.679496765136719, "step": 887 }, { "epoch": 0.49, "learning_rate": 8.822180811504593e-07, "logits/chosen": -6.138681411743164, "logits/rejected": -5.972278594970703, "logps/chosen": -238.31622314453125, "logps/rejected": -116.98393249511719, "loss": 0.0972, "rewards/accuracies": 1.0, "rewards/chosen": 2.3486599922180176, "rewards/margins": 5.919708728790283, "rewards/rejected": -3.5710487365722656, "step": 888 }, { "epoch": 0.49, "learning_rate": 8.819280063310274e-07, "logits/chosen": -5.997912406921387, "logits/rejected": -5.987307548522949, "logps/chosen": -225.22796630859375, "logps/rejected": -221.40158081054688, "loss": 0.0924, "rewards/accuracies": 1.0, "rewards/chosen": 4.125665664672852, "rewards/margins": 9.687132835388184, "rewards/rejected": -5.561467170715332, "step": 889 }, { "epoch": 0.49, "learning_rate": 8.81637622564547e-07, "logits/chosen": -6.03338623046875, "logits/rejected": -6.134432792663574, "logps/chosen": -350.3159484863281, "logps/rejected": -264.2432861328125, "loss": 0.1674, "rewards/accuracies": 0.9375, "rewards/chosen": 5.125955581665039, "rewards/margins": 8.933907508850098, "rewards/rejected": -3.8079519271850586, "step": 890 }, { "epoch": 0.49, "learning_rate": 8.813469300859134e-07, "logits/chosen": -6.054321765899658, "logits/rejected": -6.13383674621582, "logps/chosen": -259.8888244628906, "logps/rejected": -332.85797119140625, "loss": 0.0903, "rewards/accuracies": 1.0, "rewards/chosen": 2.384873390197754, "rewards/margins": 11.081871032714844, "rewards/rejected": -8.696996688842773, "step": 891 }, { "epoch": 0.5, "learning_rate": 8.810559291302724e-07, "logits/chosen": -5.961924076080322, "logits/rejected": -6.049365043640137, "logps/chosen": -275.2284851074219, "logps/rejected": -252.3112335205078, "loss": 0.0952, "rewards/accuracies": 0.875, "rewards/chosen": 2.819375514984131, "rewards/margins": 6.897953987121582, "rewards/rejected": -4.078578948974609, "step": 892 }, { "epoch": 0.5, "learning_rate": 8.807646199330185e-07, "logits/chosen": -6.028183937072754, "logits/rejected": -6.200468063354492, "logps/chosen": -197.4179229736328, "logps/rejected": -278.4293212890625, "loss": 0.0966, "rewards/accuracies": 0.8125, "rewards/chosen": 2.5223388671875, "rewards/margins": 7.611967086791992, "rewards/rejected": -5.089629173278809, "step": 893 }, { "epoch": 0.5, "learning_rate": 8.804730027297959e-07, "logits/chosen": -6.03397798538208, "logits/rejected": -6.138638019561768, "logps/chosen": -277.82489013671875, "logps/rejected": -235.16403198242188, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": 4.2530670166015625, "rewards/margins": 8.636983871459961, "rewards/rejected": -4.38391637802124, "step": 894 }, { "epoch": 0.5, "learning_rate": 8.801810777564982e-07, "logits/chosen": -6.106746196746826, "logits/rejected": -6.141480445861816, "logps/chosen": -249.22142028808594, "logps/rejected": -199.20787048339844, "loss": 0.198, "rewards/accuracies": 1.0, "rewards/chosen": 3.386754035949707, "rewards/margins": 8.914708137512207, "rewards/rejected": -5.527953624725342, "step": 895 }, { "epoch": 0.5, "learning_rate": 8.798888452492674e-07, "logits/chosen": -6.053955554962158, "logits/rejected": -6.043476104736328, "logps/chosen": -317.4023132324219, "logps/rejected": -152.52667236328125, "loss": 0.0997, "rewards/accuracies": 1.0, "rewards/chosen": 4.497253894805908, "rewards/margins": 8.641721725463867, "rewards/rejected": -4.144468307495117, "step": 896 }, { "epoch": 0.5, "learning_rate": 8.795963054444947e-07, "logits/chosen": -6.140640735626221, "logits/rejected": -6.178797245025635, "logps/chosen": -335.10504150390625, "logps/rejected": -282.946044921875, "loss": 0.1638, "rewards/accuracies": 1.0, "rewards/chosen": 4.485469818115234, "rewards/margins": 9.718730926513672, "rewards/rejected": -5.2332611083984375, "step": 897 }, { "epoch": 0.5, "learning_rate": 8.793034585788198e-07, "logits/chosen": -6.033228397369385, "logits/rejected": -6.018833160400391, "logps/chosen": -263.0472412109375, "logps/rejected": -176.8916015625, "loss": 0.1176, "rewards/accuracies": 1.0, "rewards/chosen": 3.570873260498047, "rewards/margins": 7.606788158416748, "rewards/rejected": -4.035914897918701, "step": 898 }, { "epoch": 0.5, "learning_rate": 8.790103048891307e-07, "logits/chosen": -6.188422203063965, "logits/rejected": -6.08475399017334, "logps/chosen": -242.1334991455078, "logps/rejected": -282.0115661621094, "loss": 0.1553, "rewards/accuracies": 1.0, "rewards/chosen": 1.901000738143921, "rewards/margins": 11.144109725952148, "rewards/rejected": -9.243110656738281, "step": 899 }, { "epoch": 0.5, "learning_rate": 8.787168446125637e-07, "logits/chosen": -6.100378036499023, "logits/rejected": -6.15850830078125, "logps/chosen": -306.8464660644531, "logps/rejected": -252.01609802246094, "loss": 0.0957, "rewards/accuracies": 0.9375, "rewards/chosen": 3.413966655731201, "rewards/margins": 8.639751434326172, "rewards/rejected": -5.225784778594971, "step": 900 }, { "epoch": 0.5, "learning_rate": 8.784230779865027e-07, "logits/chosen": -6.128490447998047, "logits/rejected": -6.205029487609863, "logps/chosen": -282.90924072265625, "logps/rejected": -329.27398681640625, "loss": 0.0872, "rewards/accuracies": 0.9375, "rewards/chosen": 3.907926082611084, "rewards/margins": 9.556699752807617, "rewards/rejected": -5.648774147033691, "step": 901 }, { "epoch": 0.5, "learning_rate": 8.781290052485801e-07, "logits/chosen": -6.005298614501953, "logits/rejected": -6.031425476074219, "logps/chosen": -308.5400390625, "logps/rejected": -358.3555908203125, "loss": 0.1292, "rewards/accuracies": 0.9375, "rewards/chosen": 2.160630226135254, "rewards/margins": 7.575787544250488, "rewards/rejected": -5.415157318115234, "step": 902 }, { "epoch": 0.5, "learning_rate": 8.778346266366756e-07, "logits/chosen": -6.088057994842529, "logits/rejected": -6.072858810424805, "logps/chosen": -278.923095703125, "logps/rejected": -240.4122314453125, "loss": 0.071, "rewards/accuracies": 0.9375, "rewards/chosen": 2.1166343688964844, "rewards/margins": 7.554737091064453, "rewards/rejected": -5.4381022453308105, "step": 903 }, { "epoch": 0.5, "learning_rate": 8.77539942388916e-07, "logits/chosen": -6.115749835968018, "logits/rejected": -6.034824371337891, "logps/chosen": -178.51568603515625, "logps/rejected": -137.43978881835938, "loss": 0.1887, "rewards/accuracies": 0.9375, "rewards/chosen": 2.23138165473938, "rewards/margins": 6.533794403076172, "rewards/rejected": -4.302413463592529, "step": 904 }, { "epoch": 0.5, "learning_rate": 8.772449527436758e-07, "logits/chosen": -6.128143310546875, "logits/rejected": -6.112307548522949, "logps/chosen": -294.8726806640625, "logps/rejected": -167.50009155273438, "loss": 0.149, "rewards/accuracies": 0.9375, "rewards/chosen": 3.626821517944336, "rewards/margins": 8.032505989074707, "rewards/rejected": -4.405684471130371, "step": 905 }, { "epoch": 0.5, "learning_rate": 8.769496579395763e-07, "logits/chosen": -6.040465831756592, "logits/rejected": -6.107337951660156, "logps/chosen": -403.17144775390625, "logps/rejected": -217.63473510742188, "loss": 0.0686, "rewards/accuracies": 0.875, "rewards/chosen": 3.429452657699585, "rewards/margins": 6.714498996734619, "rewards/rejected": -3.285046100616455, "step": 906 }, { "epoch": 0.5, "learning_rate": 8.766540582154857e-07, "logits/chosen": -6.209981918334961, "logits/rejected": -6.122732639312744, "logps/chosen": -268.83013916015625, "logps/rejected": -204.02554321289062, "loss": 0.0957, "rewards/accuracies": 1.0, "rewards/chosen": 2.3975720405578613, "rewards/margins": 9.108088493347168, "rewards/rejected": -6.710515975952148, "step": 907 }, { "epoch": 0.5, "learning_rate": 8.76358153810519e-07, "logits/chosen": -6.0865960121154785, "logits/rejected": -6.006633758544922, "logps/chosen": -219.4733428955078, "logps/rejected": -210.09555053710938, "loss": 0.1311, "rewards/accuracies": 0.9375, "rewards/chosen": 2.6497244834899902, "rewards/margins": 5.444741249084473, "rewards/rejected": -2.795016288757324, "step": 908 }, { "epoch": 0.5, "learning_rate": 8.760619449640372e-07, "logits/chosen": -6.1140618324279785, "logits/rejected": -6.215268135070801, "logps/chosen": -292.04638671875, "logps/rejected": -229.26385498046875, "loss": 0.0724, "rewards/accuracies": 0.9375, "rewards/chosen": 3.55850887298584, "rewards/margins": 8.00179672241211, "rewards/rejected": -4.4432878494262695, "step": 909 }, { "epoch": 0.51, "learning_rate": 8.757654319156482e-07, "logits/chosen": -6.061601161956787, "logits/rejected": -6.153131484985352, "logps/chosen": -330.6020202636719, "logps/rejected": -293.7030944824219, "loss": 0.0896, "rewards/accuracies": 1.0, "rewards/chosen": 6.521590232849121, "rewards/margins": 12.332352638244629, "rewards/rejected": -5.810762405395508, "step": 910 }, { "epoch": 0.51, "learning_rate": 8.754686149052057e-07, "logits/chosen": -6.094214916229248, "logits/rejected": -6.1223883628845215, "logps/chosen": -318.4533996582031, "logps/rejected": -256.71319580078125, "loss": 0.1092, "rewards/accuracies": 1.0, "rewards/chosen": 3.6365838050842285, "rewards/margins": 8.449272155761719, "rewards/rejected": -4.812687873840332, "step": 911 }, { "epoch": 0.51, "learning_rate": 8.75171494172809e-07, "logits/chosen": -5.9706268310546875, "logits/rejected": -5.995001792907715, "logps/chosen": -361.293212890625, "logps/rejected": -231.8260498046875, "loss": 0.2963, "rewards/accuracies": 0.875, "rewards/chosen": 3.302685022354126, "rewards/margins": 5.798103332519531, "rewards/rejected": -2.495418071746826, "step": 912 }, { "epoch": 0.51, "learning_rate": 8.748740699588033e-07, "logits/chosen": -6.144740581512451, "logits/rejected": -6.106675148010254, "logps/chosen": -191.16729736328125, "logps/rejected": -209.36561584472656, "loss": 0.0858, "rewards/accuracies": 0.875, "rewards/chosen": 0.7277817726135254, "rewards/margins": 7.56052827835083, "rewards/rejected": -6.832746505737305, "step": 913 }, { "epoch": 0.51, "learning_rate": 8.745763425037795e-07, "logits/chosen": -6.1086602210998535, "logits/rejected": -6.099296569824219, "logps/chosen": -245.81532287597656, "logps/rejected": -130.70654296875, "loss": 0.1674, "rewards/accuracies": 0.875, "rewards/chosen": 3.2815845012664795, "rewards/margins": 6.527441501617432, "rewards/rejected": -3.245857000350952, "step": 914 }, { "epoch": 0.51, "learning_rate": 8.742783120485737e-07, "logits/chosen": -6.001606464385986, "logits/rejected": -6.116796493530273, "logps/chosen": -349.0835876464844, "logps/rejected": -233.34864807128906, "loss": 0.1862, "rewards/accuracies": 0.9375, "rewards/chosen": 5.008444786071777, "rewards/margins": 7.512418746948242, "rewards/rejected": -2.503974437713623, "step": 915 }, { "epoch": 0.51, "learning_rate": 8.739799788342668e-07, "logits/chosen": -6.011168003082275, "logits/rejected": -6.080544471740723, "logps/chosen": -218.36395263671875, "logps/rejected": -198.7637939453125, "loss": 0.1876, "rewards/accuracies": 0.9375, "rewards/chosen": 3.7355406284332275, "rewards/margins": 7.8238091468811035, "rewards/rejected": -4.088268280029297, "step": 916 }, { "epoch": 0.51, "learning_rate": 8.736813431021848e-07, "logits/chosen": -6.065601825714111, "logits/rejected": -6.001230239868164, "logps/chosen": -138.3395538330078, "logps/rejected": -135.4157257080078, "loss": 0.1499, "rewards/accuracies": 0.9375, "rewards/chosen": 1.4445664882659912, "rewards/margins": 5.208317756652832, "rewards/rejected": -3.763751268386841, "step": 917 }, { "epoch": 0.51, "learning_rate": 8.733824050938986e-07, "logits/chosen": -6.152259826660156, "logits/rejected": -6.021184921264648, "logps/chosen": -299.0706481933594, "logps/rejected": -131.36074829101562, "loss": 0.0837, "rewards/accuracies": 0.9375, "rewards/chosen": 5.778386116027832, "rewards/margins": 9.117568016052246, "rewards/rejected": -3.3391823768615723, "step": 918 }, { "epoch": 0.51, "learning_rate": 8.730831650512233e-07, "logits/chosen": -6.030030250549316, "logits/rejected": -5.968084335327148, "logps/chosen": -192.6510009765625, "logps/rejected": -100.53213500976562, "loss": 0.0628, "rewards/accuracies": 0.9375, "rewards/chosen": 4.248754978179932, "rewards/margins": 7.358892440795898, "rewards/rejected": -3.110137701034546, "step": 919 }, { "epoch": 0.51, "learning_rate": 8.727836232162186e-07, "logits/chosen": -6.050478935241699, "logits/rejected": -6.092695236206055, "logps/chosen": -358.98065185546875, "logps/rejected": -379.155517578125, "loss": 0.059, "rewards/accuracies": 1.0, "rewards/chosen": 3.447890520095825, "rewards/margins": 8.174931526184082, "rewards/rejected": -4.727041244506836, "step": 920 }, { "epoch": 0.51, "learning_rate": 8.724837798311882e-07, "logits/chosen": -6.106820106506348, "logits/rejected": -6.0915422439575195, "logps/chosen": -338.54052734375, "logps/rejected": -137.01055908203125, "loss": 0.1064, "rewards/accuracies": 1.0, "rewards/chosen": 3.5342307090759277, "rewards/margins": 7.929624080657959, "rewards/rejected": -4.395393371582031, "step": 921 }, { "epoch": 0.51, "learning_rate": 8.721836351386796e-07, "logits/chosen": -6.052647590637207, "logits/rejected": -6.0215253829956055, "logps/chosen": -237.1790008544922, "logps/rejected": -332.0692138671875, "loss": 0.0883, "rewards/accuracies": 0.9375, "rewards/chosen": 3.3454766273498535, "rewards/margins": 8.252630233764648, "rewards/rejected": -4.907153606414795, "step": 922 }, { "epoch": 0.51, "learning_rate": 8.718831893814842e-07, "logits/chosen": -5.936583042144775, "logits/rejected": -6.060250282287598, "logps/chosen": -280.9782409667969, "logps/rejected": -166.09124755859375, "loss": 0.0533, "rewards/accuracies": 1.0, "rewards/chosen": 2.0360207557678223, "rewards/margins": 7.002890586853027, "rewards/rejected": -4.966869831085205, "step": 923 }, { "epoch": 0.51, "learning_rate": 8.71582442802637e-07, "logits/chosen": -6.053940773010254, "logits/rejected": -6.077616214752197, "logps/chosen": -232.14279174804688, "logps/rejected": -139.94314575195312, "loss": 0.0758, "rewards/accuracies": 1.0, "rewards/chosen": 3.0191612243652344, "rewards/margins": 7.4725565910339355, "rewards/rejected": -4.453395366668701, "step": 924 }, { "epoch": 0.51, "learning_rate": 8.712813956454159e-07, "logits/chosen": -6.063910484313965, "logits/rejected": -6.128617763519287, "logps/chosen": -201.27023315429688, "logps/rejected": -358.62750244140625, "loss": 0.0665, "rewards/accuracies": 1.0, "rewards/chosen": 2.445892333984375, "rewards/margins": 10.706507682800293, "rewards/rejected": -8.260616302490234, "step": 925 }, { "epoch": 0.51, "learning_rate": 8.709800481533427e-07, "logits/chosen": -6.316894054412842, "logits/rejected": -6.102489471435547, "logps/chosen": -340.5710144042969, "logps/rejected": -184.08363342285156, "loss": 0.0666, "rewards/accuracies": 0.9375, "rewards/chosen": 4.644523620605469, "rewards/margins": 8.077905654907227, "rewards/rejected": -3.433382272720337, "step": 926 }, { "epoch": 0.51, "learning_rate": 8.706784005701813e-07, "logits/chosen": -6.040139675140381, "logits/rejected": -6.060258865356445, "logps/chosen": -440.6451416015625, "logps/rejected": -349.844970703125, "loss": 0.1897, "rewards/accuracies": 1.0, "rewards/chosen": 3.587313413619995, "rewards/margins": 9.608963966369629, "rewards/rejected": -6.021650314331055, "step": 927 }, { "epoch": 0.52, "learning_rate": 8.703764531399391e-07, "logits/chosen": -6.062858581542969, "logits/rejected": -6.046159267425537, "logps/chosen": -201.91168212890625, "logps/rejected": -302.52508544921875, "loss": 0.0709, "rewards/accuracies": 1.0, "rewards/chosen": 0.946198582649231, "rewards/margins": 7.712523460388184, "rewards/rejected": -6.766324996948242, "step": 928 }, { "epoch": 0.52, "learning_rate": 8.700742061068656e-07, "logits/chosen": -6.089160442352295, "logits/rejected": -6.141940116882324, "logps/chosen": -298.2307434082031, "logps/rejected": -292.01959228515625, "loss": 0.0922, "rewards/accuracies": 1.0, "rewards/chosen": 4.160654544830322, "rewards/margins": 8.171039581298828, "rewards/rejected": -4.0103840827941895, "step": 929 }, { "epoch": 0.52, "learning_rate": 8.697716597154526e-07, "logits/chosen": -6.002208232879639, "logits/rejected": -5.957746505737305, "logps/chosen": -276.42626953125, "logps/rejected": -182.77490234375, "loss": 0.0759, "rewards/accuracies": 0.875, "rewards/chosen": 2.93318510055542, "rewards/margins": 6.568999290466309, "rewards/rejected": -3.6358141899108887, "step": 930 }, { "epoch": 0.52, "learning_rate": 8.694688142104345e-07, "logits/chosen": -6.1579694747924805, "logits/rejected": -6.119706153869629, "logps/chosen": -410.6029968261719, "logps/rejected": -227.28443908691406, "loss": 0.0957, "rewards/accuracies": 1.0, "rewards/chosen": 5.805343151092529, "rewards/margins": 10.446690559387207, "rewards/rejected": -4.6413469314575195, "step": 931 }, { "epoch": 0.52, "learning_rate": 8.691656698367871e-07, "logits/chosen": -6.059299468994141, "logits/rejected": -6.042227268218994, "logps/chosen": -346.71044921875, "logps/rejected": -221.25111389160156, "loss": 0.1436, "rewards/accuracies": 0.875, "rewards/chosen": 2.4479527473449707, "rewards/margins": 6.135810375213623, "rewards/rejected": -3.687857151031494, "step": 932 }, { "epoch": 0.52, "learning_rate": 8.688622268397285e-07, "logits/chosen": -6.019150257110596, "logits/rejected": -6.085504531860352, "logps/chosen": -178.30239868164062, "logps/rejected": -339.47991943359375, "loss": 0.1499, "rewards/accuracies": 1.0, "rewards/chosen": 1.0961052179336548, "rewards/margins": 9.11821174621582, "rewards/rejected": -8.022106170654297, "step": 933 }, { "epoch": 0.52, "learning_rate": 8.685584854647179e-07, "logits/chosen": -6.080018997192383, "logits/rejected": -6.103069305419922, "logps/chosen": -272.63690185546875, "logps/rejected": -232.5760498046875, "loss": 0.1734, "rewards/accuracies": 1.0, "rewards/chosen": 1.758530855178833, "rewards/margins": 7.831353187561035, "rewards/rejected": -6.072822570800781, "step": 934 }, { "epoch": 0.52, "learning_rate": 8.682544459574561e-07, "logits/chosen": -6.161831378936768, "logits/rejected": -6.032742977142334, "logps/chosen": -333.3594970703125, "logps/rejected": -436.1448974609375, "loss": 0.1803, "rewards/accuracies": 0.875, "rewards/chosen": 1.4744679927825928, "rewards/margins": 5.274588584899902, "rewards/rejected": -3.8001205921173096, "step": 935 }, { "epoch": 0.52, "learning_rate": 8.679501085638851e-07, "logits/chosen": -6.122840881347656, "logits/rejected": -6.062246799468994, "logps/chosen": -177.54730224609375, "logps/rejected": -224.76918029785156, "loss": 0.0795, "rewards/accuracies": 0.875, "rewards/chosen": 1.0517128705978394, "rewards/margins": 8.545878410339355, "rewards/rejected": -7.494165420532227, "step": 936 }, { "epoch": 0.52, "learning_rate": 8.676454735301877e-07, "logits/chosen": -6.125982284545898, "logits/rejected": -6.197983741760254, "logps/chosen": -152.5364990234375, "logps/rejected": -183.06317138671875, "loss": 0.1659, "rewards/accuracies": 0.9375, "rewards/chosen": 1.4710800647735596, "rewards/margins": 6.719622611999512, "rewards/rejected": -5.248542308807373, "step": 937 }, { "epoch": 0.52, "learning_rate": 8.673405411027877e-07, "logits/chosen": -6.112830638885498, "logits/rejected": -5.982642650604248, "logps/chosen": -257.4549255371094, "logps/rejected": -143.47872924804688, "loss": 0.0597, "rewards/accuracies": 1.0, "rewards/chosen": 2.5017428398132324, "rewards/margins": 8.791422843933105, "rewards/rejected": -6.289680480957031, "step": 938 }, { "epoch": 0.52, "learning_rate": 8.670353115283491e-07, "logits/chosen": -6.174715995788574, "logits/rejected": -6.09304141998291, "logps/chosen": -187.88189697265625, "logps/rejected": -208.05516052246094, "loss": 0.1059, "rewards/accuracies": 0.9375, "rewards/chosen": 1.5768003463745117, "rewards/margins": 8.069175720214844, "rewards/rejected": -6.492375373840332, "step": 939 }, { "epoch": 0.52, "learning_rate": 8.667297850537767e-07, "logits/chosen": -6.122961044311523, "logits/rejected": -6.037968635559082, "logps/chosen": -342.246337890625, "logps/rejected": -179.9786376953125, "loss": 0.0931, "rewards/accuracies": 0.9375, "rewards/chosen": 3.854971170425415, "rewards/margins": 7.382735252380371, "rewards/rejected": -3.527763843536377, "step": 940 }, { "epoch": 0.52, "learning_rate": 8.66423961926215e-07, "logits/chosen": -6.092538833618164, "logits/rejected": -6.142636299133301, "logps/chosen": -254.5118865966797, "logps/rejected": -216.10379028320312, "loss": 0.1294, "rewards/accuracies": 0.9375, "rewards/chosen": 3.080872058868408, "rewards/margins": 7.726169586181641, "rewards/rejected": -4.645297527313232, "step": 941 }, { "epoch": 0.52, "learning_rate": 8.661178423930491e-07, "logits/chosen": -6.201349258422852, "logits/rejected": -6.135807037353516, "logps/chosen": -344.016845703125, "logps/rejected": -455.29791259765625, "loss": 0.1489, "rewards/accuracies": 0.9375, "rewards/chosen": 3.373141288757324, "rewards/margins": 8.563385963439941, "rewards/rejected": -5.190244674682617, "step": 942 }, { "epoch": 0.52, "learning_rate": 8.658114267019031e-07, "logits/chosen": -6.082934379577637, "logits/rejected": -6.208596229553223, "logps/chosen": -340.862548828125, "logps/rejected": -214.89596557617188, "loss": 0.2033, "rewards/accuracies": 0.9375, "rewards/chosen": 5.63924503326416, "rewards/margins": 8.377729415893555, "rewards/rejected": -2.7384846210479736, "step": 943 }, { "epoch": 0.52, "learning_rate": 8.655047151006411e-07, "logits/chosen": -6.174600601196289, "logits/rejected": -6.150313377380371, "logps/chosen": -165.252685546875, "logps/rejected": -204.98826599121094, "loss": 0.1542, "rewards/accuracies": 1.0, "rewards/chosen": 1.4737968444824219, "rewards/margins": 6.319183826446533, "rewards/rejected": -4.8453874588012695, "step": 944 }, { "epoch": 0.52, "learning_rate": 8.651977078373669e-07, "logits/chosen": -6.054029941558838, "logits/rejected": -6.17495059967041, "logps/chosen": -246.63986206054688, "logps/rejected": -215.3654022216797, "loss": 0.0924, "rewards/accuracies": 0.9375, "rewards/chosen": 3.396981954574585, "rewards/margins": 8.3783540725708, "rewards/rejected": -4.981372833251953, "step": 945 }, { "epoch": 0.53, "learning_rate": 8.648904051604226e-07, "logits/chosen": -6.0722198486328125, "logits/rejected": -6.117853164672852, "logps/chosen": -580.7009887695312, "logps/rejected": -308.53533935546875, "loss": 0.142, "rewards/accuracies": 0.9375, "rewards/chosen": 3.6519052982330322, "rewards/margins": 8.548792839050293, "rewards/rejected": -4.89688777923584, "step": 946 }, { "epoch": 0.53, "learning_rate": 8.645828073183901e-07, "logits/chosen": -5.933415412902832, "logits/rejected": -5.959549903869629, "logps/chosen": -143.9924774169922, "logps/rejected": -127.21443176269531, "loss": 0.0474, "rewards/accuracies": 0.9375, "rewards/chosen": 1.7742042541503906, "rewards/margins": 5.818575859069824, "rewards/rejected": -4.044371128082275, "step": 947 }, { "epoch": 0.53, "learning_rate": 8.642749145600896e-07, "logits/chosen": -6.162103652954102, "logits/rejected": -6.0052690505981445, "logps/chosen": -276.9605712890625, "logps/rejected": -158.9975128173828, "loss": 0.0775, "rewards/accuracies": 1.0, "rewards/chosen": 3.3092446327209473, "rewards/margins": 7.596729278564453, "rewards/rejected": -4.287485122680664, "step": 948 }, { "epoch": 0.53, "learning_rate": 8.639667271345798e-07, "logits/chosen": -6.0828447341918945, "logits/rejected": -6.206306457519531, "logps/chosen": -204.82237243652344, "logps/rejected": -302.0823974609375, "loss": 0.1024, "rewards/accuracies": 1.0, "rewards/chosen": 3.2563138008117676, "rewards/margins": 9.769547462463379, "rewards/rejected": -6.5132341384887695, "step": 949 }, { "epoch": 0.53, "learning_rate": 8.636582452911581e-07, "logits/chosen": -5.999475955963135, "logits/rejected": -6.0081024169921875, "logps/chosen": -282.0269775390625, "logps/rejected": -162.02366638183594, "loss": 0.0816, "rewards/accuracies": 0.875, "rewards/chosen": 4.333992004394531, "rewards/margins": 7.70344352722168, "rewards/rejected": -3.3694522380828857, "step": 950 }, { "epoch": 0.53, "learning_rate": 8.633494692793599e-07, "logits/chosen": -6.246095657348633, "logits/rejected": -6.164856910705566, "logps/chosen": -208.70684814453125, "logps/rejected": -211.98358154296875, "loss": 0.1626, "rewards/accuracies": 1.0, "rewards/chosen": 2.5909364223480225, "rewards/margins": 8.701234817504883, "rewards/rejected": -6.110299110412598, "step": 951 }, { "epoch": 0.53, "learning_rate": 8.630403993489586e-07, "logits/chosen": -6.086622714996338, "logits/rejected": -6.057507514953613, "logps/chosen": -337.48394775390625, "logps/rejected": -156.041015625, "loss": 0.1555, "rewards/accuracies": 1.0, "rewards/chosen": 4.898967742919922, "rewards/margins": 9.65659236907959, "rewards/rejected": -4.75762414932251, "step": 952 }, { "epoch": 0.53, "learning_rate": 8.627310357499651e-07, "logits/chosen": -6.035940647125244, "logits/rejected": -6.094386577606201, "logps/chosen": -246.42877197265625, "logps/rejected": -233.535888671875, "loss": 0.0943, "rewards/accuracies": 0.9375, "rewards/chosen": 3.4232916831970215, "rewards/margins": 8.075368881225586, "rewards/rejected": -4.652077674865723, "step": 953 }, { "epoch": 0.53, "learning_rate": 8.624213787326282e-07, "logits/chosen": -6.1344757080078125, "logits/rejected": -6.109796047210693, "logps/chosen": -211.04405212402344, "logps/rejected": -415.5338134765625, "loss": 0.0576, "rewards/accuracies": 0.9375, "rewards/chosen": 1.3027710914611816, "rewards/margins": 7.355828762054443, "rewards/rejected": -6.053057670593262, "step": 954 }, { "epoch": 0.53, "learning_rate": 8.62111428547434e-07, "logits/chosen": -6.097352981567383, "logits/rejected": -6.047719478607178, "logps/chosen": -276.1925048828125, "logps/rejected": -149.64593505859375, "loss": 0.1155, "rewards/accuracies": 1.0, "rewards/chosen": 5.768824577331543, "rewards/margins": 9.349690437316895, "rewards/rejected": -3.5808653831481934, "step": 955 }, { "epoch": 0.53, "learning_rate": 8.618011854451054e-07, "logits/chosen": -6.00544548034668, "logits/rejected": -5.988583087921143, "logps/chosen": -223.61776733398438, "logps/rejected": -233.53733825683594, "loss": 0.1749, "rewards/accuracies": 1.0, "rewards/chosen": 3.4449093341827393, "rewards/margins": 7.439711570739746, "rewards/rejected": -3.994802474975586, "step": 956 }, { "epoch": 0.53, "learning_rate": 8.614906496766028e-07, "logits/chosen": -6.216385841369629, "logits/rejected": -6.126654624938965, "logps/chosen": -270.7296142578125, "logps/rejected": -185.99986267089844, "loss": 0.1755, "rewards/accuracies": 1.0, "rewards/chosen": 4.267377853393555, "rewards/margins": 10.162614822387695, "rewards/rejected": -5.895237445831299, "step": 957 }, { "epoch": 0.53, "learning_rate": 8.611798214931228e-07, "logits/chosen": -6.111264228820801, "logits/rejected": -6.059797763824463, "logps/chosen": -206.81466674804688, "logps/rejected": -249.1523895263672, "loss": 0.1443, "rewards/accuracies": 0.875, "rewards/chosen": 2.3787147998809814, "rewards/margins": 9.979655265808105, "rewards/rejected": -7.600940704345703, "step": 958 }, { "epoch": 0.53, "learning_rate": 8.608687011460988e-07, "logits/chosen": -6.110211372375488, "logits/rejected": -6.025620937347412, "logps/chosen": -167.7454376220703, "logps/rejected": -145.51914978027344, "loss": 0.1211, "rewards/accuracies": 0.875, "rewards/chosen": 1.9318939447402954, "rewards/margins": 7.474842071533203, "rewards/rejected": -5.542947769165039, "step": 959 }, { "epoch": 0.53, "learning_rate": 8.605572888872008e-07, "logits/chosen": -6.080013751983643, "logits/rejected": -6.142392158508301, "logps/chosen": -315.021728515625, "logps/rejected": -232.65914916992188, "loss": 0.0827, "rewards/accuracies": 1.0, "rewards/chosen": 3.634575128555298, "rewards/margins": 8.021797180175781, "rewards/rejected": -4.387221813201904, "step": 960 }, { "epoch": 0.53, "learning_rate": 8.602455849683342e-07, "logits/chosen": -5.989071846008301, "logits/rejected": -6.000075817108154, "logps/chosen": -213.77978515625, "logps/rejected": -179.39163208007812, "loss": 0.2201, "rewards/accuracies": 0.9375, "rewards/chosen": 1.9505717754364014, "rewards/margins": 4.727374076843262, "rewards/rejected": -2.7768023014068604, "step": 961 }, { "epoch": 0.53, "learning_rate": 8.599335896416411e-07, "logits/chosen": -6.129282474517822, "logits/rejected": -5.94862174987793, "logps/chosen": -261.0104064941406, "logps/rejected": -143.31610107421875, "loss": 0.1396, "rewards/accuracies": 0.875, "rewards/chosen": 2.249391555786133, "rewards/margins": 6.198086738586426, "rewards/rejected": -3.9486942291259766, "step": 962 }, { "epoch": 0.53, "learning_rate": 8.59621303159499e-07, "logits/chosen": -6.110588073730469, "logits/rejected": -6.089471340179443, "logps/chosen": -247.384765625, "logps/rejected": -208.1514434814453, "loss": 0.1166, "rewards/accuracies": 1.0, "rewards/chosen": 3.66013240814209, "rewards/margins": 8.128166198730469, "rewards/rejected": -4.468033313751221, "step": 963 }, { "epoch": 0.54, "learning_rate": 8.593087257745206e-07, "logits/chosen": -6.00665283203125, "logits/rejected": -6.093001365661621, "logps/chosen": -318.9318542480469, "logps/rejected": -265.8369140625, "loss": 0.0832, "rewards/accuracies": 1.0, "rewards/chosen": 4.532879829406738, "rewards/margins": 10.096622467041016, "rewards/rejected": -5.563742637634277, "step": 964 }, { "epoch": 0.54, "learning_rate": 8.589958577395546e-07, "logits/chosen": -6.080345153808594, "logits/rejected": -6.052530765533447, "logps/chosen": -272.8042297363281, "logps/rejected": -288.50555419921875, "loss": 0.0811, "rewards/accuracies": 0.9375, "rewards/chosen": 3.050786256790161, "rewards/margins": 8.628995895385742, "rewards/rejected": -5.578209400177002, "step": 965 }, { "epoch": 0.54, "learning_rate": 8.586826993076845e-07, "logits/chosen": -6.082822322845459, "logits/rejected": -6.089057445526123, "logps/chosen": -218.76824951171875, "logps/rejected": -176.22447204589844, "loss": 0.112, "rewards/accuracies": 1.0, "rewards/chosen": 3.1388421058654785, "rewards/margins": 8.552971839904785, "rewards/rejected": -5.414130210876465, "step": 966 }, { "epoch": 0.54, "learning_rate": 8.583692507322282e-07, "logits/chosen": -6.086842060089111, "logits/rejected": -6.037171363830566, "logps/chosen": -440.9912109375, "logps/rejected": -270.2554931640625, "loss": 0.1214, "rewards/accuracies": 0.9375, "rewards/chosen": 2.441267967224121, "rewards/margins": 7.254035949707031, "rewards/rejected": -4.81276798248291, "step": 967 }, { "epoch": 0.54, "learning_rate": 8.580555122667392e-07, "logits/chosen": -6.055644989013672, "logits/rejected": -6.133266448974609, "logps/chosen": -213.13555908203125, "logps/rejected": -309.33026123046875, "loss": 0.0672, "rewards/accuracies": 1.0, "rewards/chosen": 2.163424015045166, "rewards/margins": 9.812017440795898, "rewards/rejected": -7.648592948913574, "step": 968 }, { "epoch": 0.54, "learning_rate": 8.577414841650048e-07, "logits/chosen": -6.094929218292236, "logits/rejected": -6.094304084777832, "logps/chosen": -183.44973754882812, "logps/rejected": -228.74893188476562, "loss": 0.1055, "rewards/accuracies": 1.0, "rewards/chosen": 1.5125319957733154, "rewards/margins": 8.785411834716797, "rewards/rejected": -7.2728800773620605, "step": 969 }, { "epoch": 0.54, "learning_rate": 8.574271666810469e-07, "logits/chosen": -6.062626838684082, "logits/rejected": -6.002470016479492, "logps/chosen": -275.470703125, "logps/rejected": -200.9104766845703, "loss": 0.0917, "rewards/accuracies": 0.9375, "rewards/chosen": 5.411420822143555, "rewards/margins": 10.54881477355957, "rewards/rejected": -5.137393474578857, "step": 970 }, { "epoch": 0.54, "learning_rate": 8.571125600691213e-07, "logits/chosen": -6.174204349517822, "logits/rejected": -6.127342224121094, "logps/chosen": -361.8542785644531, "logps/rejected": -232.205078125, "loss": 0.1581, "rewards/accuracies": 0.9375, "rewards/chosen": 5.024289131164551, "rewards/margins": 8.525688171386719, "rewards/rejected": -3.5013985633850098, "step": 971 }, { "epoch": 0.54, "learning_rate": 8.56797664583718e-07, "logits/chosen": -6.184189796447754, "logits/rejected": -6.112346649169922, "logps/chosen": -207.99530029296875, "logps/rejected": -165.47109985351562, "loss": 0.1242, "rewards/accuracies": 1.0, "rewards/chosen": 1.5036015510559082, "rewards/margins": 7.740414619445801, "rewards/rejected": -6.236813545227051, "step": 972 }, { "epoch": 0.54, "learning_rate": 8.564824804795607e-07, "logits/chosen": -5.99798059463501, "logits/rejected": -6.16435432434082, "logps/chosen": -257.35113525390625, "logps/rejected": -174.77017211914062, "loss": 0.0713, "rewards/accuracies": 1.0, "rewards/chosen": 3.922835350036621, "rewards/margins": 7.437314987182617, "rewards/rejected": -3.514479875564575, "step": 973 }, { "epoch": 0.54, "learning_rate": 8.561670080116056e-07, "logits/chosen": -6.037534713745117, "logits/rejected": -6.142971038818359, "logps/chosen": -291.63165283203125, "logps/rejected": -262.9277038574219, "loss": 0.0719, "rewards/accuracies": 1.0, "rewards/chosen": 1.4238884449005127, "rewards/margins": 7.499478340148926, "rewards/rejected": -6.075590133666992, "step": 974 }, { "epoch": 0.54, "learning_rate": 8.558512474350436e-07, "logits/chosen": -6.1492438316345215, "logits/rejected": -6.115976810455322, "logps/chosen": -200.95489501953125, "logps/rejected": -163.6702423095703, "loss": 0.1718, "rewards/accuracies": 1.0, "rewards/chosen": 3.5612263679504395, "rewards/margins": 8.151769638061523, "rewards/rejected": -4.590542793273926, "step": 975 }, { "epoch": 0.54, "learning_rate": 8.555351990052979e-07, "logits/chosen": -6.122307777404785, "logits/rejected": -6.025392532348633, "logps/chosen": -294.64691162109375, "logps/rejected": -172.58770751953125, "loss": 0.1011, "rewards/accuracies": 0.9375, "rewards/chosen": 4.528719902038574, "rewards/margins": 9.169065475463867, "rewards/rejected": -4.640345573425293, "step": 976 }, { "epoch": 0.54, "learning_rate": 8.552188629780244e-07, "logits/chosen": -6.076466083526611, "logits/rejected": -6.09858512878418, "logps/chosen": -281.52606201171875, "logps/rejected": -224.40155029296875, "loss": 0.0304, "rewards/accuracies": 1.0, "rewards/chosen": 4.6139936447143555, "rewards/margins": 9.807899475097656, "rewards/rejected": -5.193905830383301, "step": 977 }, { "epoch": 0.54, "learning_rate": 8.549022396091117e-07, "logits/chosen": -6.118803977966309, "logits/rejected": -6.1203508377075195, "logps/chosen": -276.19525146484375, "logps/rejected": -188.5348358154297, "loss": 0.2514, "rewards/accuracies": 1.0, "rewards/chosen": 2.641049861907959, "rewards/margins": 8.153768539428711, "rewards/rejected": -5.512718200683594, "step": 978 }, { "epoch": 0.54, "learning_rate": 8.545853291546814e-07, "logits/chosen": -6.11532735824585, "logits/rejected": -6.188718795776367, "logps/chosen": -253.86422729492188, "logps/rejected": -167.2735595703125, "loss": 0.0884, "rewards/accuracies": 1.0, "rewards/chosen": 5.1261186599731445, "rewards/margins": 9.0311279296875, "rewards/rejected": -3.9050092697143555, "step": 979 }, { "epoch": 0.54, "learning_rate": 8.542681318710867e-07, "logits/chosen": -6.079742908477783, "logits/rejected": -6.080710411071777, "logps/chosen": -146.07003784179688, "logps/rejected": -294.6082458496094, "loss": 0.154, "rewards/accuracies": 1.0, "rewards/chosen": 0.40471982955932617, "rewards/margins": 7.201087951660156, "rewards/rejected": -6.796368598937988, "step": 980 }, { "epoch": 0.54, "learning_rate": 8.539506480149129e-07, "logits/chosen": -6.065600872039795, "logits/rejected": -6.108728408813477, "logps/chosen": -232.643798828125, "logps/rejected": -238.24298095703125, "loss": 0.1229, "rewards/accuracies": 1.0, "rewards/chosen": 4.922659873962402, "rewards/margins": 7.674108982086182, "rewards/rejected": -2.7514493465423584, "step": 981 }, { "epoch": 0.55, "learning_rate": 8.536328778429776e-07, "logits/chosen": -6.081585884094238, "logits/rejected": -6.06520938873291, "logps/chosen": -236.771484375, "logps/rejected": -177.97796630859375, "loss": 0.114, "rewards/accuracies": 1.0, "rewards/chosen": 2.2438714504241943, "rewards/margins": 7.408615589141846, "rewards/rejected": -5.164743900299072, "step": 982 }, { "epoch": 0.55, "learning_rate": 8.533148216123293e-07, "logits/chosen": -6.0889129638671875, "logits/rejected": -6.087824821472168, "logps/chosen": -327.41827392578125, "logps/rejected": -238.63668823242188, "loss": 0.0975, "rewards/accuracies": 0.9375, "rewards/chosen": 3.272712230682373, "rewards/margins": 9.852269172668457, "rewards/rejected": -6.579556941986084, "step": 983 }, { "epoch": 0.55, "learning_rate": 8.529964795802484e-07, "logits/chosen": -6.146811008453369, "logits/rejected": -6.1106743812561035, "logps/chosen": -258.4364013671875, "logps/rejected": -346.88543701171875, "loss": 0.09, "rewards/accuracies": 0.875, "rewards/chosen": 2.4260454177856445, "rewards/margins": 9.075974464416504, "rewards/rejected": -6.649929046630859, "step": 984 }, { "epoch": 0.55, "learning_rate": 8.526778520042465e-07, "logits/chosen": -6.099720478057861, "logits/rejected": -6.063031196594238, "logps/chosen": -256.7463684082031, "logps/rejected": -208.32070922851562, "loss": 0.1502, "rewards/accuracies": 0.9375, "rewards/chosen": 2.6125946044921875, "rewards/margins": 7.651473045349121, "rewards/rejected": -5.038878440856934, "step": 985 }, { "epoch": 0.55, "learning_rate": 8.523589391420659e-07, "logits/chosen": -6.089155673980713, "logits/rejected": -6.123330116271973, "logps/chosen": -243.88682556152344, "logps/rejected": -233.28021240234375, "loss": 0.1216, "rewards/accuracies": 0.875, "rewards/chosen": 1.892558217048645, "rewards/margins": 6.138571739196777, "rewards/rejected": -4.246013164520264, "step": 986 }, { "epoch": 0.55, "learning_rate": 8.520397412516795e-07, "logits/chosen": -6.201366424560547, "logits/rejected": -6.047648906707764, "logps/chosen": -201.57969665527344, "logps/rejected": -161.47064208984375, "loss": 0.0814, "rewards/accuracies": 1.0, "rewards/chosen": 2.476132869720459, "rewards/margins": 8.739216804504395, "rewards/rejected": -6.2630839347839355, "step": 987 }, { "epoch": 0.55, "learning_rate": 8.517202585912915e-07, "logits/chosen": -6.296269416809082, "logits/rejected": -6.208191394805908, "logps/chosen": -287.322265625, "logps/rejected": -246.34645080566406, "loss": 0.1518, "rewards/accuracies": 0.875, "rewards/chosen": 3.1438238620758057, "rewards/margins": 7.408665657043457, "rewards/rejected": -4.2648420333862305, "step": 988 }, { "epoch": 0.55, "learning_rate": 8.514004914193358e-07, "logits/chosen": -6.090151309967041, "logits/rejected": -6.049017906188965, "logps/chosen": -373.2897033691406, "logps/rejected": -359.7154235839844, "loss": 0.1372, "rewards/accuracies": 0.9375, "rewards/chosen": 3.4075169563293457, "rewards/margins": 6.340484619140625, "rewards/rejected": -2.9329676628112793, "step": 989 }, { "epoch": 0.55, "learning_rate": 8.510804399944767e-07, "logits/chosen": -6.078717231750488, "logits/rejected": -6.2108473777771, "logps/chosen": -297.4354553222656, "logps/rejected": -237.14596557617188, "loss": 0.0947, "rewards/accuracies": 0.875, "rewards/chosen": 4.216681480407715, "rewards/margins": 7.156315803527832, "rewards/rejected": -2.9396347999572754, "step": 990 }, { "epoch": 0.55, "learning_rate": 8.507601045756084e-07, "logits/chosen": -6.083149433135986, "logits/rejected": -6.075432300567627, "logps/chosen": -253.8882598876953, "logps/rejected": -244.56057739257812, "loss": 0.0826, "rewards/accuracies": 0.9375, "rewards/chosen": 3.760679244995117, "rewards/margins": 7.9092607498168945, "rewards/rejected": -4.148580551147461, "step": 991 }, { "epoch": 0.55, "learning_rate": 8.504394854218548e-07, "logits/chosen": -6.062779426574707, "logits/rejected": -6.031650066375732, "logps/chosen": -289.1352844238281, "logps/rejected": -169.29946899414062, "loss": 0.0767, "rewards/accuracies": 1.0, "rewards/chosen": 7.088157653808594, "rewards/margins": 9.629281997680664, "rewards/rejected": -2.541123867034912, "step": 992 }, { "epoch": 0.55, "learning_rate": 8.501185827925695e-07, "logits/chosen": -6.09207820892334, "logits/rejected": -6.14821195602417, "logps/chosen": -293.908203125, "logps/rejected": -251.03060913085938, "loss": 0.1272, "rewards/accuracies": 1.0, "rewards/chosen": 3.993107318878174, "rewards/margins": 9.282014846801758, "rewards/rejected": -5.288907527923584, "step": 993 }, { "epoch": 0.55, "learning_rate": 8.497973969473349e-07, "logits/chosen": -6.177901268005371, "logits/rejected": -6.153402805328369, "logps/chosen": -222.5386962890625, "logps/rejected": -223.96463012695312, "loss": 0.1112, "rewards/accuracies": 1.0, "rewards/chosen": 3.740891933441162, "rewards/margins": 7.574563980102539, "rewards/rejected": -3.833672523498535, "step": 994 }, { "epoch": 0.55, "learning_rate": 8.49475928145963e-07, "logits/chosen": -6.067638397216797, "logits/rejected": -6.059797763824463, "logps/chosen": -274.0416564941406, "logps/rejected": -194.80642700195312, "loss": 0.1299, "rewards/accuracies": 1.0, "rewards/chosen": 3.9957809448242188, "rewards/margins": 7.819552898406982, "rewards/rejected": -3.8237719535827637, "step": 995 }, { "epoch": 0.55, "learning_rate": 8.491541766484946e-07, "logits/chosen": -6.057993412017822, "logits/rejected": -6.006321907043457, "logps/chosen": -253.95379638671875, "logps/rejected": -143.56442260742188, "loss": 0.2126, "rewards/accuracies": 0.9375, "rewards/chosen": 3.568972587585449, "rewards/margins": 5.902911186218262, "rewards/rejected": -2.3339385986328125, "step": 996 }, { "epoch": 0.55, "learning_rate": 8.48832142715199e-07, "logits/chosen": -6.092041492462158, "logits/rejected": -6.115187168121338, "logps/chosen": -356.6314392089844, "logps/rejected": -398.20672607421875, "loss": 0.2184, "rewards/accuracies": 0.875, "rewards/chosen": 1.5356777906417847, "rewards/margins": 6.6323041915893555, "rewards/rejected": -5.096627235412598, "step": 997 }, { "epoch": 0.55, "learning_rate": 8.485098266065743e-07, "logits/chosen": -6.122196197509766, "logits/rejected": -6.151808261871338, "logps/chosen": -210.888916015625, "logps/rejected": -228.2406005859375, "loss": 0.0497, "rewards/accuracies": 0.9375, "rewards/chosen": 1.8121812343597412, "rewards/margins": 9.590176582336426, "rewards/rejected": -7.777995586395264, "step": 998 }, { "epoch": 0.55, "learning_rate": 8.481872285833464e-07, "logits/chosen": -6.136103630065918, "logits/rejected": -6.052691459655762, "logps/chosen": -279.6268615722656, "logps/rejected": -172.88677978515625, "loss": 0.128, "rewards/accuracies": 1.0, "rewards/chosen": 4.634238243103027, "rewards/margins": 10.3872652053833, "rewards/rejected": -5.753026485443115, "step": 999 }, { "epoch": 0.56, "learning_rate": 8.478643489064695e-07, "logits/chosen": -6.128993988037109, "logits/rejected": -6.242142677307129, "logps/chosen": -295.5755615234375, "logps/rejected": -251.72634887695312, "loss": 0.1106, "rewards/accuracies": 0.9375, "rewards/chosen": 3.146484136581421, "rewards/margins": 7.509942054748535, "rewards/rejected": -4.363458633422852, "step": 1000 }, { "epoch": 0.56, "learning_rate": 8.475411878371257e-07, "logits/chosen": -6.096008777618408, "logits/rejected": -6.156581878662109, "logps/chosen": -233.27587890625, "logps/rejected": -257.0662536621094, "loss": 0.0961, "rewards/accuracies": 1.0, "rewards/chosen": 5.730852127075195, "rewards/margins": 9.784895896911621, "rewards/rejected": -4.054043769836426, "step": 1001 }, { "epoch": 0.56, "learning_rate": 8.472177456367245e-07, "logits/chosen": -6.04449987411499, "logits/rejected": -6.077629566192627, "logps/chosen": -417.8230285644531, "logps/rejected": -280.805908203125, "loss": 0.1303, "rewards/accuracies": 0.875, "rewards/chosen": 2.309673309326172, "rewards/margins": 5.853078842163086, "rewards/rejected": -3.543405532836914, "step": 1002 }, { "epoch": 0.56, "learning_rate": 8.468940225669031e-07, "logits/chosen": -5.985193252563477, "logits/rejected": -6.054416179656982, "logps/chosen": -234.13941955566406, "logps/rejected": -175.62741088867188, "loss": 0.1018, "rewards/accuracies": 1.0, "rewards/chosen": 4.689852237701416, "rewards/margins": 7.894060134887695, "rewards/rejected": -3.2042076587677, "step": 1003 }, { "epoch": 0.56, "learning_rate": 8.465700188895257e-07, "logits/chosen": -6.068649768829346, "logits/rejected": -6.0630412101745605, "logps/chosen": -276.7752685546875, "logps/rejected": -232.32379150390625, "loss": 0.1134, "rewards/accuracies": 1.0, "rewards/chosen": 3.8837451934814453, "rewards/margins": 8.975566864013672, "rewards/rejected": -5.091821193695068, "step": 1004 }, { "epoch": 0.56, "learning_rate": 8.462457348666834e-07, "logits/chosen": -6.123279094696045, "logits/rejected": -6.058923244476318, "logps/chosen": -282.07659912109375, "logps/rejected": -420.69818115234375, "loss": 0.0661, "rewards/accuracies": 1.0, "rewards/chosen": 2.7112584114074707, "rewards/margins": 8.808965682983398, "rewards/rejected": -6.097707271575928, "step": 1005 }, { "epoch": 0.56, "learning_rate": 8.459211707606944e-07, "logits/chosen": -6.221673965454102, "logits/rejected": -6.032009601593018, "logps/chosen": -267.7088623046875, "logps/rejected": -123.87884521484375, "loss": 0.0785, "rewards/accuracies": 1.0, "rewards/chosen": 4.493976593017578, "rewards/margins": 8.2493314743042, "rewards/rejected": -3.7553551197052, "step": 1006 }, { "epoch": 0.56, "learning_rate": 8.45596326834103e-07, "logits/chosen": -6.028262138366699, "logits/rejected": -6.071428298950195, "logps/chosen": -246.5548095703125, "logps/rejected": -244.2744598388672, "loss": 0.1052, "rewards/accuracies": 1.0, "rewards/chosen": 4.679087162017822, "rewards/margins": 10.771502494812012, "rewards/rejected": -6.092414855957031, "step": 1007 }, { "epoch": 0.56, "learning_rate": 8.452712033496803e-07, "logits/chosen": -6.17669677734375, "logits/rejected": -6.164555549621582, "logps/chosen": -271.77264404296875, "logps/rejected": -204.01345825195312, "loss": 0.1762, "rewards/accuracies": 1.0, "rewards/chosen": 3.4933438301086426, "rewards/margins": 9.437177658081055, "rewards/rejected": -5.943833351135254, "step": 1008 }, { "epoch": 0.56, "learning_rate": 8.449458005704233e-07, "logits/chosen": -6.040768623352051, "logits/rejected": -6.005390167236328, "logps/chosen": -270.34759521484375, "logps/rejected": -112.69155883789062, "loss": 0.1271, "rewards/accuracies": 1.0, "rewards/chosen": 4.600653648376465, "rewards/margins": 8.064019203186035, "rewards/rejected": -3.463365077972412, "step": 1009 }, { "epoch": 0.56, "learning_rate": 8.446201187595551e-07, "logits/chosen": -6.080146312713623, "logits/rejected": -6.131264686584473, "logps/chosen": -297.76849365234375, "logps/rejected": -307.06396484375, "loss": 0.1199, "rewards/accuracies": 0.875, "rewards/chosen": 3.7206435203552246, "rewards/margins": 7.996771812438965, "rewards/rejected": -4.276127815246582, "step": 1010 }, { "epoch": 0.56, "learning_rate": 8.442941581805243e-07, "logits/chosen": -6.095742225646973, "logits/rejected": -6.103157043457031, "logps/chosen": -213.1395263671875, "logps/rejected": -217.32717895507812, "loss": 0.1398, "rewards/accuracies": 0.9375, "rewards/chosen": 2.274935245513916, "rewards/margins": 7.054439544677734, "rewards/rejected": -4.77950382232666, "step": 1011 }, { "epoch": 0.56, "learning_rate": 8.43967919097005e-07, "logits/chosen": -6.0043721199035645, "logits/rejected": -6.056396961212158, "logps/chosen": -518.6679077148438, "logps/rejected": -345.4638671875, "loss": 0.109, "rewards/accuracies": 0.9375, "rewards/chosen": 3.890838146209717, "rewards/margins": 9.585893630981445, "rewards/rejected": -5.69505500793457, "step": 1012 }, { "epoch": 0.56, "learning_rate": 8.436414017728969e-07, "logits/chosen": -6.011142253875732, "logits/rejected": -5.913460731506348, "logps/chosen": -319.5745544433594, "logps/rejected": -250.4364013671875, "loss": 0.0688, "rewards/accuracies": 1.0, "rewards/chosen": 2.279360771179199, "rewards/margins": 7.0776686668396, "rewards/rejected": -4.798307418823242, "step": 1013 }, { "epoch": 0.56, "learning_rate": 8.433146064723243e-07, "logits/chosen": -6.047643184661865, "logits/rejected": -6.126669406890869, "logps/chosen": -202.17041015625, "logps/rejected": -232.86660766601562, "loss": 0.1925, "rewards/accuracies": 0.875, "rewards/chosen": 1.041749119758606, "rewards/margins": 6.916635513305664, "rewards/rejected": -5.874886512756348, "step": 1014 }, { "epoch": 0.56, "learning_rate": 8.429875334596368e-07, "logits/chosen": -5.954748630523682, "logits/rejected": -5.957712173461914, "logps/chosen": -481.1158142089844, "logps/rejected": -431.546875, "loss": 0.2484, "rewards/accuracies": 0.6875, "rewards/chosen": 4.704934120178223, "rewards/margins": 6.260160446166992, "rewards/rejected": -1.5552258491516113, "step": 1015 }, { "epoch": 0.56, "learning_rate": 8.426601829994083e-07, "logits/chosen": -6.098687171936035, "logits/rejected": -6.211813449859619, "logps/chosen": -197.02955627441406, "logps/rejected": -249.43023681640625, "loss": 0.1425, "rewards/accuracies": 0.9375, "rewards/chosen": 1.4615271091461182, "rewards/margins": 7.526735305786133, "rewards/rejected": -6.065208435058594, "step": 1016 }, { "epoch": 0.56, "learning_rate": 8.423325553564375e-07, "logits/chosen": -5.935191631317139, "logits/rejected": -5.954459190368652, "logps/chosen": -328.2978515625, "logps/rejected": -101.91423034667969, "loss": 0.1633, "rewards/accuracies": 1.0, "rewards/chosen": 3.7630412578582764, "rewards/margins": 6.865618705749512, "rewards/rejected": -3.1025781631469727, "step": 1017 }, { "epoch": 0.57, "learning_rate": 8.42004650795747e-07, "logits/chosen": -6.140911102294922, "logits/rejected": -6.045834541320801, "logps/chosen": -247.38882446289062, "logps/rejected": -181.0020751953125, "loss": 0.0777, "rewards/accuracies": 1.0, "rewards/chosen": 2.936220645904541, "rewards/margins": 8.793134689331055, "rewards/rejected": -5.856914520263672, "step": 1018 }, { "epoch": 0.57, "learning_rate": 8.416764695825834e-07, "logits/chosen": -5.983673095703125, "logits/rejected": -6.021848201751709, "logps/chosen": -278.8241882324219, "logps/rejected": -397.9767761230469, "loss": 0.1388, "rewards/accuracies": 0.9375, "rewards/chosen": 3.8605246543884277, "rewards/margins": 7.328566551208496, "rewards/rejected": -3.4680416584014893, "step": 1019 }, { "epoch": 0.57, "learning_rate": 8.413480119824173e-07, "logits/chosen": -6.236453533172607, "logits/rejected": -6.130363464355469, "logps/chosen": -221.4014892578125, "logps/rejected": -198.94827270507812, "loss": 0.0526, "rewards/accuracies": 0.9375, "rewards/chosen": 1.525424838066101, "rewards/margins": 9.760568618774414, "rewards/rejected": -8.235143661499023, "step": 1020 }, { "epoch": 0.57, "learning_rate": 8.410192782609428e-07, "logits/chosen": -6.066558837890625, "logits/rejected": -6.1154890060424805, "logps/chosen": -281.9203796386719, "logps/rejected": -217.4858856201172, "loss": 0.1424, "rewards/accuracies": 0.9375, "rewards/chosen": 3.7548840045928955, "rewards/margins": 8.171481132507324, "rewards/rejected": -4.416597366333008, "step": 1021 }, { "epoch": 0.57, "learning_rate": 8.406902686840773e-07, "logits/chosen": -6.064456462860107, "logits/rejected": -6.1109418869018555, "logps/chosen": -300.984375, "logps/rejected": -315.47515869140625, "loss": 0.0854, "rewards/accuracies": 0.9375, "rewards/chosen": 1.981658935546875, "rewards/margins": 8.352401733398438, "rewards/rejected": -6.370742321014404, "step": 1022 }, { "epoch": 0.57, "learning_rate": 8.403609835179612e-07, "logits/chosen": -6.0420684814453125, "logits/rejected": -6.105473041534424, "logps/chosen": -310.16351318359375, "logps/rejected": -367.2919921875, "loss": 0.2377, "rewards/accuracies": 0.9375, "rewards/chosen": 5.738905906677246, "rewards/margins": 10.958192825317383, "rewards/rejected": -5.219287395477295, "step": 1023 }, { "epoch": 0.57, "learning_rate": 8.400314230289582e-07, "logits/chosen": -6.031220436096191, "logits/rejected": -6.10631799697876, "logps/chosen": -177.28817749023438, "logps/rejected": -173.50540161132812, "loss": 0.2872, "rewards/accuracies": 1.0, "rewards/chosen": 2.608532667160034, "rewards/margins": 9.11413288116455, "rewards/rejected": -6.5055999755859375, "step": 1024 }, { "epoch": 0.57, "learning_rate": 8.397015874836546e-07, "logits/chosen": -6.037949085235596, "logits/rejected": -6.053393840789795, "logps/chosen": -235.31671142578125, "logps/rejected": -161.90025329589844, "loss": 0.1077, "rewards/accuracies": 1.0, "rewards/chosen": 3.030801296234131, "rewards/margins": 7.380684852600098, "rewards/rejected": -4.349883079528809, "step": 1025 }, { "epoch": 0.57, "learning_rate": 8.393714771488589e-07, "logits/chosen": -6.059126853942871, "logits/rejected": -6.019997596740723, "logps/chosen": -417.7149963378906, "logps/rejected": -333.95489501953125, "loss": 0.2438, "rewards/accuracies": 0.9375, "rewards/chosen": 5.351978302001953, "rewards/margins": 8.302471160888672, "rewards/rejected": -2.950493335723877, "step": 1026 }, { "epoch": 0.57, "learning_rate": 8.390410922916022e-07, "logits/chosen": -6.099283695220947, "logits/rejected": -6.116852283477783, "logps/chosen": -263.10980224609375, "logps/rejected": -185.14686584472656, "loss": 0.0975, "rewards/accuracies": 1.0, "rewards/chosen": 3.0266926288604736, "rewards/margins": 8.559209823608398, "rewards/rejected": -5.532517433166504, "step": 1027 }, { "epoch": 0.57, "learning_rate": 8.387104331791375e-07, "logits/chosen": -6.0406060218811035, "logits/rejected": -6.0065598487854, "logps/chosen": -618.3265380859375, "logps/rejected": -599.6515502929688, "loss": 0.1511, "rewards/accuracies": 0.875, "rewards/chosen": 3.0676441192626953, "rewards/margins": 6.424025535583496, "rewards/rejected": -3.3563809394836426, "step": 1028 }, { "epoch": 0.57, "learning_rate": 8.383795000789397e-07, "logits/chosen": -6.1146650314331055, "logits/rejected": -6.185565948486328, "logps/chosen": -353.14990234375, "logps/rejected": -265.9854736328125, "loss": 0.1716, "rewards/accuracies": 1.0, "rewards/chosen": 3.997877359390259, "rewards/margins": 7.256226062774658, "rewards/rejected": -3.2583487033843994, "step": 1029 }, { "epoch": 0.57, "learning_rate": 8.380482932587054e-07, "logits/chosen": -6.091364860534668, "logits/rejected": -6.2341532707214355, "logps/chosen": -236.77809143066406, "logps/rejected": -307.848876953125, "loss": 0.1766, "rewards/accuracies": 0.9375, "rewards/chosen": 2.477937936782837, "rewards/margins": 9.29813003540039, "rewards/rejected": -6.820192337036133, "step": 1030 }, { "epoch": 0.57, "learning_rate": 8.377168129863524e-07, "logits/chosen": -6.057624816894531, "logits/rejected": -6.118899345397949, "logps/chosen": -261.8293151855469, "logps/rejected": -237.30960083007812, "loss": 0.0982, "rewards/accuracies": 0.875, "rewards/chosen": 2.9667186737060547, "rewards/margins": 7.9374871253967285, "rewards/rejected": -4.970768451690674, "step": 1031 }, { "epoch": 0.57, "learning_rate": 8.373850595300201e-07, "logits/chosen": -6.119987964630127, "logits/rejected": -6.047914981842041, "logps/chosen": -233.82630920410156, "logps/rejected": -221.5707244873047, "loss": 0.0992, "rewards/accuracies": 0.9375, "rewards/chosen": 2.300137519836426, "rewards/margins": 7.686337471008301, "rewards/rejected": -5.386199951171875, "step": 1032 }, { "epoch": 0.57, "learning_rate": 8.370530331580685e-07, "logits/chosen": -6.050333499908447, "logits/rejected": -6.172735214233398, "logps/chosen": -267.5522766113281, "logps/rejected": -242.84957885742188, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": 2.9196596145629883, "rewards/margins": 10.08393669128418, "rewards/rejected": -7.164277076721191, "step": 1033 }, { "epoch": 0.57, "learning_rate": 8.367207341390785e-07, "logits/chosen": -6.228893280029297, "logits/rejected": -6.138602256774902, "logps/chosen": -232.18572998046875, "logps/rejected": -163.32870483398438, "loss": 0.2357, "rewards/accuracies": 1.0, "rewards/chosen": 3.2191433906555176, "rewards/margins": 8.830099105834961, "rewards/rejected": -5.610954761505127, "step": 1034 }, { "epoch": 0.57, "learning_rate": 8.363881627418515e-07, "logits/chosen": -6.042507648468018, "logits/rejected": -5.97994327545166, "logps/chosen": -249.4932098388672, "logps/rejected": -132.14376831054688, "loss": 0.0469, "rewards/accuracies": 1.0, "rewards/chosen": 3.9525067806243896, "rewards/margins": 8.431564331054688, "rewards/rejected": -4.479057312011719, "step": 1035 }, { "epoch": 0.58, "learning_rate": 8.360553192354092e-07, "logits/chosen": -6.189111232757568, "logits/rejected": -6.034914970397949, "logps/chosen": -444.06884765625, "logps/rejected": -234.46798706054688, "loss": 0.0828, "rewards/accuracies": 1.0, "rewards/chosen": 5.860688209533691, "rewards/margins": 10.497258186340332, "rewards/rejected": -4.636569976806641, "step": 1036 }, { "epoch": 0.58, "learning_rate": 8.357222038889938e-07, "logits/chosen": -6.217987060546875, "logits/rejected": -6.137872695922852, "logps/chosen": -282.30322265625, "logps/rejected": -223.59324645996094, "loss": 0.0784, "rewards/accuracies": 1.0, "rewards/chosen": 4.002612113952637, "rewards/margins": 9.417407989501953, "rewards/rejected": -5.414795875549316, "step": 1037 }, { "epoch": 0.58, "learning_rate": 8.353888169720668e-07, "logits/chosen": -6.066972255706787, "logits/rejected": -6.069156646728516, "logps/chosen": -315.7365417480469, "logps/rejected": -176.4014892578125, "loss": 0.0675, "rewards/accuracies": 1.0, "rewards/chosen": 5.852876663208008, "rewards/margins": 10.397254943847656, "rewards/rejected": -4.544378280639648, "step": 1038 }, { "epoch": 0.58, "learning_rate": 8.3505515875431e-07, "logits/chosen": -6.049164772033691, "logits/rejected": -6.016626358032227, "logps/chosen": -266.4462890625, "logps/rejected": -394.17877197265625, "loss": 0.1595, "rewards/accuracies": 0.9375, "rewards/chosen": 4.639700889587402, "rewards/margins": 10.522415161132812, "rewards/rejected": -5.882715225219727, "step": 1039 }, { "epoch": 0.58, "learning_rate": 8.347212295056239e-07, "logits/chosen": -6.1670708656311035, "logits/rejected": -6.052176475524902, "logps/chosen": -359.6260070800781, "logps/rejected": -255.99874877929688, "loss": 0.0909, "rewards/accuracies": 0.9375, "rewards/chosen": 3.7441112995147705, "rewards/margins": 7.313457489013672, "rewards/rejected": -3.5693464279174805, "step": 1040 }, { "epoch": 0.58, "learning_rate": 8.343870294961289e-07, "logits/chosen": -6.0474653244018555, "logits/rejected": -6.155452728271484, "logps/chosen": -228.31182861328125, "logps/rejected": -322.62579345703125, "loss": 0.1681, "rewards/accuracies": 1.0, "rewards/chosen": 3.212524652481079, "rewards/margins": 10.059717178344727, "rewards/rejected": -6.847192764282227, "step": 1041 }, { "epoch": 0.58, "learning_rate": 8.34052558996164e-07, "logits/chosen": -6.13066291809082, "logits/rejected": -6.0151801109313965, "logps/chosen": -294.0337829589844, "logps/rejected": -227.86888122558594, "loss": 0.1642, "rewards/accuracies": 1.0, "rewards/chosen": 3.039324998855591, "rewards/margins": 7.491637229919434, "rewards/rejected": -4.4523115158081055, "step": 1042 }, { "epoch": 0.58, "learning_rate": 8.337178182762874e-07, "logits/chosen": -6.122186660766602, "logits/rejected": -6.181934833526611, "logps/chosen": -199.34564208984375, "logps/rejected": -192.93896484375, "loss": 0.0632, "rewards/accuracies": 1.0, "rewards/chosen": 1.558536410331726, "rewards/margins": 8.313051223754883, "rewards/rejected": -6.754514694213867, "step": 1043 }, { "epoch": 0.58, "learning_rate": 8.333828076072758e-07, "logits/chosen": -6.016533374786377, "logits/rejected": -6.054002285003662, "logps/chosen": -317.56085205078125, "logps/rejected": -234.1451416015625, "loss": 0.2512, "rewards/accuracies": 0.875, "rewards/chosen": 4.393125534057617, "rewards/margins": 9.439367294311523, "rewards/rejected": -5.046241760253906, "step": 1044 }, { "epoch": 0.58, "learning_rate": 8.330475272601239e-07, "logits/chosen": -5.994156360626221, "logits/rejected": -6.069057464599609, "logps/chosen": -244.51409912109375, "logps/rejected": -244.71011352539062, "loss": 0.126, "rewards/accuracies": 0.9375, "rewards/chosen": 4.145280838012695, "rewards/margins": 8.68636417388916, "rewards/rejected": -4.541083335876465, "step": 1045 }, { "epoch": 0.58, "learning_rate": 8.327119775060447e-07, "logits/chosen": -6.072657585144043, "logits/rejected": -6.039957046508789, "logps/chosen": -283.09893798828125, "logps/rejected": -167.06558227539062, "loss": 0.1123, "rewards/accuracies": 1.0, "rewards/chosen": 5.3008713722229, "rewards/margins": 7.115846157073975, "rewards/rejected": -1.8149751424789429, "step": 1046 }, { "epoch": 0.58, "learning_rate": 8.323761586164694e-07, "logits/chosen": -6.065644264221191, "logits/rejected": -6.0727763175964355, "logps/chosen": -260.01287841796875, "logps/rejected": -190.4319610595703, "loss": 0.079, "rewards/accuracies": 0.9375, "rewards/chosen": 3.9656572341918945, "rewards/margins": 8.366264343261719, "rewards/rejected": -4.400606632232666, "step": 1047 }, { "epoch": 0.58, "learning_rate": 8.320400708630468e-07, "logits/chosen": -6.040584564208984, "logits/rejected": -6.085400581359863, "logps/chosen": -292.1056823730469, "logps/rejected": -207.79603576660156, "loss": 0.1106, "rewards/accuracies": 1.0, "rewards/chosen": 4.645409107208252, "rewards/margins": 8.276715278625488, "rewards/rejected": -3.6313066482543945, "step": 1048 }, { "epoch": 0.58, "learning_rate": 8.317037145176427e-07, "logits/chosen": -6.052261829376221, "logits/rejected": -6.11367654800415, "logps/chosen": -319.95208740234375, "logps/rejected": -271.25677490234375, "loss": 0.1676, "rewards/accuracies": 0.8125, "rewards/chosen": 3.4581377506256104, "rewards/margins": 9.216348648071289, "rewards/rejected": -5.7582106590271, "step": 1049 }, { "epoch": 0.58, "learning_rate": 8.31367089852341e-07, "logits/chosen": -6.109805583953857, "logits/rejected": -6.030033588409424, "logps/chosen": -253.20838928222656, "logps/rejected": -137.85208129882812, "loss": 0.1049, "rewards/accuracies": 1.0, "rewards/chosen": 2.934891700744629, "rewards/margins": 6.898618698120117, "rewards/rejected": -3.9637272357940674, "step": 1050 }, { "epoch": 0.58, "learning_rate": 8.310301971394422e-07, "logits/chosen": -6.033871650695801, "logits/rejected": -6.063182353973389, "logps/chosen": -278.42156982421875, "logps/rejected": -253.25247192382812, "loss": 0.1189, "rewards/accuracies": 0.875, "rewards/chosen": 3.394479990005493, "rewards/margins": 7.106695175170898, "rewards/rejected": -3.7122151851654053, "step": 1051 }, { "epoch": 0.58, "learning_rate": 8.306930366514635e-07, "logits/chosen": -6.072514533996582, "logits/rejected": -6.201259136199951, "logps/chosen": -235.60818481445312, "logps/rejected": -348.791015625, "loss": 0.1477, "rewards/accuracies": 1.0, "rewards/chosen": 3.339733600616455, "rewards/margins": 9.641655921936035, "rewards/rejected": -6.301921844482422, "step": 1052 }, { "epoch": 0.58, "learning_rate": 8.303556086611389e-07, "logits/chosen": -5.954116344451904, "logits/rejected": -5.975019931793213, "logps/chosen": -223.05364990234375, "logps/rejected": -144.85595703125, "loss": 0.258, "rewards/accuracies": 1.0, "rewards/chosen": 2.92120099067688, "rewards/margins": 6.7808637619018555, "rewards/rejected": -3.8596625328063965, "step": 1053 }, { "epoch": 0.59, "learning_rate": 8.300179134414187e-07, "logits/chosen": -6.127859115600586, "logits/rejected": -6.12407112121582, "logps/chosen": -190.85098266601562, "logps/rejected": -227.01255798339844, "loss": 0.1466, "rewards/accuracies": 0.9375, "rewards/chosen": 3.486219882965088, "rewards/margins": 7.395968437194824, "rewards/rejected": -3.9097487926483154, "step": 1054 }, { "epoch": 0.59, "learning_rate": 8.296799512654694e-07, "logits/chosen": -6.144261837005615, "logits/rejected": -5.993209362030029, "logps/chosen": -357.25750732421875, "logps/rejected": -216.38592529296875, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": 5.867090225219727, "rewards/margins": 9.624114990234375, "rewards/rejected": -3.757024049758911, "step": 1055 }, { "epoch": 0.59, "learning_rate": 8.293417224066736e-07, "logits/chosen": -6.098045349121094, "logits/rejected": -6.111726760864258, "logps/chosen": -201.05271911621094, "logps/rejected": -187.20965576171875, "loss": 0.0919, "rewards/accuracies": 1.0, "rewards/chosen": 2.934701681137085, "rewards/margins": 7.566896915435791, "rewards/rejected": -4.632195472717285, "step": 1056 }, { "epoch": 0.59, "learning_rate": 8.290032271386293e-07, "logits/chosen": -6.117771148681641, "logits/rejected": -6.051225662231445, "logps/chosen": -224.09066772460938, "logps/rejected": -242.12672424316406, "loss": 0.3174, "rewards/accuracies": 0.875, "rewards/chosen": 1.316799283027649, "rewards/margins": 6.130516529083252, "rewards/rejected": -4.813717365264893, "step": 1057 }, { "epoch": 0.59, "learning_rate": 8.286644657351504e-07, "logits/chosen": -6.260326862335205, "logits/rejected": -6.033742427825928, "logps/chosen": -292.0521545410156, "logps/rejected": -165.01821899414062, "loss": 0.0648, "rewards/accuracies": 1.0, "rewards/chosen": 3.278460741043091, "rewards/margins": 9.18847370147705, "rewards/rejected": -5.910013198852539, "step": 1058 }, { "epoch": 0.59, "learning_rate": 8.283254384702658e-07, "logits/chosen": -5.991489410400391, "logits/rejected": -6.002531051635742, "logps/chosen": -233.9945831298828, "logps/rejected": -227.93008422851562, "loss": 0.1345, "rewards/accuracies": 0.875, "rewards/chosen": 2.5543103218078613, "rewards/margins": 7.646369934082031, "rewards/rejected": -5.092059135437012, "step": 1059 }, { "epoch": 0.59, "learning_rate": 8.279861456182194e-07, "logits/chosen": -6.051688194274902, "logits/rejected": -6.002199649810791, "logps/chosen": -617.1204223632812, "logps/rejected": -251.8156280517578, "loss": 0.079, "rewards/accuracies": 1.0, "rewards/chosen": 8.197909355163574, "rewards/margins": 12.504674911499023, "rewards/rejected": -4.306764602661133, "step": 1060 }, { "epoch": 0.59, "learning_rate": 8.2764658745347e-07, "logits/chosen": -6.020668029785156, "logits/rejected": -5.914614677429199, "logps/chosen": -206.41148376464844, "logps/rejected": -172.85604858398438, "loss": 0.0914, "rewards/accuracies": 1.0, "rewards/chosen": 2.5160255432128906, "rewards/margins": 7.318474769592285, "rewards/rejected": -4.8024492263793945, "step": 1061 }, { "epoch": 0.59, "learning_rate": 8.273067642506913e-07, "logits/chosen": -6.043506622314453, "logits/rejected": -6.124425411224365, "logps/chosen": -181.53546142578125, "logps/rejected": -293.2760314941406, "loss": 0.1836, "rewards/accuracies": 0.875, "rewards/chosen": 3.1585464477539062, "rewards/margins": 9.398677825927734, "rewards/rejected": -6.240131378173828, "step": 1062 }, { "epoch": 0.59, "learning_rate": 8.269666762847711e-07, "logits/chosen": -6.046955108642578, "logits/rejected": -6.044978141784668, "logps/chosen": -213.70098876953125, "logps/rejected": -203.41055297851562, "loss": 0.1514, "rewards/accuracies": 0.9375, "rewards/chosen": 3.167379856109619, "rewards/margins": 8.01948356628418, "rewards/rejected": -4.852104663848877, "step": 1063 }, { "epoch": 0.59, "learning_rate": 8.266263238308115e-07, "logits/chosen": -5.900177001953125, "logits/rejected": -5.968143463134766, "logps/chosen": -284.643310546875, "logps/rejected": -191.70787048339844, "loss": 0.0709, "rewards/accuracies": 0.9375, "rewards/chosen": 2.690063714981079, "rewards/margins": 6.732784271240234, "rewards/rejected": -4.042720317840576, "step": 1064 }, { "epoch": 0.59, "learning_rate": 8.262857071641283e-07, "logits/chosen": -6.1501030921936035, "logits/rejected": -6.231736183166504, "logps/chosen": -295.1238708496094, "logps/rejected": -301.2281188964844, "loss": 0.0859, "rewards/accuracies": 1.0, "rewards/chosen": 3.044539451599121, "rewards/margins": 11.313722610473633, "rewards/rejected": -8.269183158874512, "step": 1065 }, { "epoch": 0.59, "learning_rate": 8.259448265602512e-07, "logits/chosen": -6.040938854217529, "logits/rejected": -6.081604957580566, "logps/chosen": -271.4258117675781, "logps/rejected": -174.24349975585938, "loss": 0.1345, "rewards/accuracies": 1.0, "rewards/chosen": 4.263580322265625, "rewards/margins": 8.769886016845703, "rewards/rejected": -4.506305694580078, "step": 1066 }, { "epoch": 0.59, "learning_rate": 8.256036822949236e-07, "logits/chosen": -6.141528129577637, "logits/rejected": -6.128284454345703, "logps/chosen": -434.8712158203125, "logps/rejected": -241.76498413085938, "loss": 0.1303, "rewards/accuracies": 1.0, "rewards/chosen": 3.9808294773101807, "rewards/margins": 7.633964538574219, "rewards/rejected": -3.653134822845459, "step": 1067 }, { "epoch": 0.59, "learning_rate": 8.252622746441021e-07, "logits/chosen": -6.110534191131592, "logits/rejected": -6.124307632446289, "logps/chosen": -238.97021484375, "logps/rejected": -225.6280975341797, "loss": 0.1122, "rewards/accuracies": 0.9375, "rewards/chosen": 2.402200222015381, "rewards/margins": 7.905332565307617, "rewards/rejected": -5.503131866455078, "step": 1068 }, { "epoch": 0.59, "learning_rate": 8.249206038839558e-07, "logits/chosen": -6.024777412414551, "logits/rejected": -6.150127410888672, "logps/chosen": -304.1200866699219, "logps/rejected": -313.4581298828125, "loss": 0.1204, "rewards/accuracies": 1.0, "rewards/chosen": 4.106658458709717, "rewards/margins": 8.242372512817383, "rewards/rejected": -4.135714054107666, "step": 1069 }, { "epoch": 0.59, "learning_rate": 8.245786702908674e-07, "logits/chosen": -5.953618049621582, "logits/rejected": -6.012453079223633, "logps/chosen": -228.71685791015625, "logps/rejected": -188.85662841796875, "loss": 0.0763, "rewards/accuracies": 1.0, "rewards/chosen": 3.5496459007263184, "rewards/margins": 9.309991836547852, "rewards/rejected": -5.760345458984375, "step": 1070 }, { "epoch": 0.59, "learning_rate": 8.242364741414319e-07, "logits/chosen": -5.9773759841918945, "logits/rejected": -5.9821343421936035, "logps/chosen": -229.57518005371094, "logps/rejected": -149.5858154296875, "loss": 0.061, "rewards/accuracies": 0.9375, "rewards/chosen": 3.559817314147949, "rewards/margins": 7.547682762145996, "rewards/rejected": -3.987865686416626, "step": 1071 }, { "epoch": 0.6, "learning_rate": 8.238940157124567e-07, "logits/chosen": -6.065321445465088, "logits/rejected": -6.059556007385254, "logps/chosen": -285.3388366699219, "logps/rejected": -278.04559326171875, "loss": 0.1121, "rewards/accuracies": 1.0, "rewards/chosen": 2.623286247253418, "rewards/margins": 8.609963417053223, "rewards/rejected": -5.9866766929626465, "step": 1072 }, { "epoch": 0.6, "learning_rate": 8.235512952809612e-07, "logits/chosen": -6.129952430725098, "logits/rejected": -6.040441989898682, "logps/chosen": -216.180908203125, "logps/rejected": -142.51002502441406, "loss": 0.2805, "rewards/accuracies": 1.0, "rewards/chosen": 3.94512939453125, "rewards/margins": 8.66243839263916, "rewards/rejected": -4.717308521270752, "step": 1073 }, { "epoch": 0.6, "learning_rate": 8.232083131241769e-07, "logits/chosen": -6.044152736663818, "logits/rejected": -6.1406731605529785, "logps/chosen": -261.8125, "logps/rejected": -179.39639282226562, "loss": 0.1219, "rewards/accuracies": 0.875, "rewards/chosen": 2.9655423164367676, "rewards/margins": 8.832748413085938, "rewards/rejected": -5.867206573486328, "step": 1074 }, { "epoch": 0.6, "learning_rate": 8.228650695195472e-07, "logits/chosen": -6.112764835357666, "logits/rejected": -6.157624244689941, "logps/chosen": -238.75917053222656, "logps/rejected": -210.69723510742188, "loss": 0.0501, "rewards/accuracies": 1.0, "rewards/chosen": 3.8742432594299316, "rewards/margins": 7.321263313293457, "rewards/rejected": -3.4470202922821045, "step": 1075 }, { "epoch": 0.6, "learning_rate": 8.225215647447263e-07, "logits/chosen": -5.918902397155762, "logits/rejected": -5.991105079650879, "logps/chosen": -239.54225158691406, "logps/rejected": -234.2740478515625, "loss": 0.0528, "rewards/accuracies": 1.0, "rewards/chosen": 4.075347423553467, "rewards/margins": 9.59113597869873, "rewards/rejected": -5.515789031982422, "step": 1076 }, { "epoch": 0.6, "learning_rate": 8.221777990775808e-07, "logits/chosen": -6.053345680236816, "logits/rejected": -5.997647285461426, "logps/chosen": -236.25546264648438, "logps/rejected": -157.51153564453125, "loss": 0.1094, "rewards/accuracies": 0.9375, "rewards/chosen": 2.108376979827881, "rewards/margins": 6.224863052368164, "rewards/rejected": -4.116486549377441, "step": 1077 }, { "epoch": 0.6, "learning_rate": 8.218337727961872e-07, "logits/chosen": -6.0418500900268555, "logits/rejected": -6.038497447967529, "logps/chosen": -419.7406005859375, "logps/rejected": -195.04522705078125, "loss": 0.0647, "rewards/accuracies": 0.9375, "rewards/chosen": 6.410067081451416, "rewards/margins": 9.118255615234375, "rewards/rejected": -2.708188772201538, "step": 1078 }, { "epoch": 0.6, "learning_rate": 8.214894861788335e-07, "logits/chosen": -6.103893280029297, "logits/rejected": -5.963909149169922, "logps/chosen": -246.08221435546875, "logps/rejected": -114.3473892211914, "loss": 0.1499, "rewards/accuracies": 1.0, "rewards/chosen": 2.7071876525878906, "rewards/margins": 7.163538932800293, "rewards/rejected": -4.456351280212402, "step": 1079 }, { "epoch": 0.6, "learning_rate": 8.21144939504018e-07, "logits/chosen": -6.0116987228393555, "logits/rejected": -5.9878926277160645, "logps/chosen": -201.50375366210938, "logps/rejected": -160.1256561279297, "loss": 0.0802, "rewards/accuracies": 0.8125, "rewards/chosen": 2.5341696739196777, "rewards/margins": 6.051645278930664, "rewards/rejected": -3.5174753665924072, "step": 1080 }, { "epoch": 0.6, "learning_rate": 8.208001330504495e-07, "logits/chosen": -6.093554496765137, "logits/rejected": -6.180202484130859, "logps/chosen": -176.83334350585938, "logps/rejected": -197.8236541748047, "loss": 0.1529, "rewards/accuracies": 1.0, "rewards/chosen": 1.6317138671875, "rewards/margins": 7.545279502868652, "rewards/rejected": -5.9135661125183105, "step": 1081 }, { "epoch": 0.6, "learning_rate": 8.204550670970468e-07, "logits/chosen": -5.978993892669678, "logits/rejected": -6.021770477294922, "logps/chosen": -211.88088989257812, "logps/rejected": -198.30699157714844, "loss": 0.0897, "rewards/accuracies": 0.9375, "rewards/chosen": 1.9909034967422485, "rewards/margins": 7.510677337646484, "rewards/rejected": -5.519773960113525, "step": 1082 }, { "epoch": 0.6, "learning_rate": 8.201097419229388e-07, "logits/chosen": -6.2415242195129395, "logits/rejected": -6.123587608337402, "logps/chosen": -211.02001953125, "logps/rejected": -249.5972900390625, "loss": 0.153, "rewards/accuracies": 0.9375, "rewards/chosen": 1.5072565078735352, "rewards/margins": 9.151080131530762, "rewards/rejected": -7.643823623657227, "step": 1083 }, { "epoch": 0.6, "learning_rate": 8.197641578074641e-07, "logits/chosen": -6.092808723449707, "logits/rejected": -6.124863624572754, "logps/chosen": -280.7645263671875, "logps/rejected": -255.4720458984375, "loss": 0.2224, "rewards/accuracies": 0.9375, "rewards/chosen": 6.2195658683776855, "rewards/margins": 9.983743667602539, "rewards/rejected": -3.7641782760620117, "step": 1084 }, { "epoch": 0.6, "learning_rate": 8.194183150301705e-07, "logits/chosen": -6.023285388946533, "logits/rejected": -5.991503715515137, "logps/chosen": -313.76806640625, "logps/rejected": -273.16180419921875, "loss": 0.0569, "rewards/accuracies": 0.875, "rewards/chosen": 4.9581804275512695, "rewards/margins": 8.532318115234375, "rewards/rejected": -3.5741379261016846, "step": 1085 }, { "epoch": 0.6, "learning_rate": 8.190722138708151e-07, "logits/chosen": -6.120821952819824, "logits/rejected": -6.183749675750732, "logps/chosen": -309.873779296875, "logps/rejected": -308.4891662597656, "loss": 0.2149, "rewards/accuracies": 0.9375, "rewards/chosen": 3.3106722831726074, "rewards/margins": 9.415361404418945, "rewards/rejected": -6.1046881675720215, "step": 1086 }, { "epoch": 0.6, "learning_rate": 8.187258546093644e-07, "logits/chosen": -6.059454917907715, "logits/rejected": -5.986688613891602, "logps/chosen": -306.1138916015625, "logps/rejected": -142.91607666015625, "loss": 0.127, "rewards/accuracies": 0.875, "rewards/chosen": 3.1363539695739746, "rewards/margins": 7.545896053314209, "rewards/rejected": -4.409542083740234, "step": 1087 }, { "epoch": 0.6, "learning_rate": 8.18379237525993e-07, "logits/chosen": -6.065793514251709, "logits/rejected": -6.122668266296387, "logps/chosen": -223.53074645996094, "logps/rejected": -161.00506591796875, "loss": 0.1384, "rewards/accuracies": 0.9375, "rewards/chosen": 2.8767645359039307, "rewards/margins": 6.755867004394531, "rewards/rejected": -3.879102945327759, "step": 1088 }, { "epoch": 0.6, "learning_rate": 8.180323629010848e-07, "logits/chosen": -6.103419780731201, "logits/rejected": -6.098438262939453, "logps/chosen": -263.38775634765625, "logps/rejected": -131.55361938476562, "loss": 0.0621, "rewards/accuracies": 1.0, "rewards/chosen": 6.269506931304932, "rewards/margins": 9.399690628051758, "rewards/rejected": -3.130183219909668, "step": 1089 }, { "epoch": 0.61, "learning_rate": 8.176852310152314e-07, "logits/chosen": -6.125283241271973, "logits/rejected": -6.159388065338135, "logps/chosen": -306.70611572265625, "logps/rejected": -210.32577514648438, "loss": 0.1022, "rewards/accuracies": 0.9375, "rewards/chosen": 3.13387393951416, "rewards/margins": 8.444878578186035, "rewards/rejected": -5.311005115509033, "step": 1090 }, { "epoch": 0.61, "learning_rate": 8.173378421492329e-07, "logits/chosen": -5.951584339141846, "logits/rejected": -5.957973003387451, "logps/chosen": -281.56793212890625, "logps/rejected": -266.5608215332031, "loss": 0.1046, "rewards/accuracies": 0.9375, "rewards/chosen": 3.3378257751464844, "rewards/margins": 7.604853630065918, "rewards/rejected": -4.267027854919434, "step": 1091 }, { "epoch": 0.61, "learning_rate": 8.169901965840971e-07, "logits/chosen": -6.033692836761475, "logits/rejected": -6.177189826965332, "logps/chosen": -314.9477233886719, "logps/rejected": -286.337158203125, "loss": 0.1103, "rewards/accuracies": 1.0, "rewards/chosen": 5.575843811035156, "rewards/margins": 11.478199005126953, "rewards/rejected": -5.902354717254639, "step": 1092 }, { "epoch": 0.61, "learning_rate": 8.166422946010395e-07, "logits/chosen": -6.103214263916016, "logits/rejected": -6.024356365203857, "logps/chosen": -195.48251342773438, "logps/rejected": -136.72955322265625, "loss": 0.0753, "rewards/accuracies": 1.0, "rewards/chosen": 3.7881546020507812, "rewards/margins": 8.92214298248291, "rewards/rejected": -5.133988380432129, "step": 1093 }, { "epoch": 0.61, "learning_rate": 8.16294136481483e-07, "logits/chosen": -6.017100811004639, "logits/rejected": -6.009147644042969, "logps/chosen": -271.70831298828125, "logps/rejected": -194.93601989746094, "loss": 0.1554, "rewards/accuracies": 0.9375, "rewards/chosen": 4.471423149108887, "rewards/margins": 7.924104690551758, "rewards/rejected": -3.452681064605713, "step": 1094 }, { "epoch": 0.61, "learning_rate": 8.159457225070576e-07, "logits/chosen": -6.034609317779541, "logits/rejected": -6.075691223144531, "logps/chosen": -301.23126220703125, "logps/rejected": -213.65805053710938, "loss": 0.1084, "rewards/accuracies": 0.9375, "rewards/chosen": 3.376908779144287, "rewards/margins": 8.292683601379395, "rewards/rejected": -4.915774345397949, "step": 1095 }, { "epoch": 0.61, "learning_rate": 8.155970529596005e-07, "logits/chosen": -5.919774055480957, "logits/rejected": -5.9543890953063965, "logps/chosen": -393.3760986328125, "logps/rejected": -261.1364440917969, "loss": 0.0831, "rewards/accuracies": 0.875, "rewards/chosen": 4.254932403564453, "rewards/margins": 8.027334213256836, "rewards/rejected": -3.77240252494812, "step": 1096 }, { "epoch": 0.61, "learning_rate": 8.152481281211556e-07, "logits/chosen": -6.0676164627075195, "logits/rejected": -6.065086364746094, "logps/chosen": -243.3832244873047, "logps/rejected": -222.44224548339844, "loss": 0.1364, "rewards/accuracies": 0.9375, "rewards/chosen": 4.635822296142578, "rewards/margins": 8.677939414978027, "rewards/rejected": -4.042117118835449, "step": 1097 }, { "epoch": 0.61, "learning_rate": 8.14898948273973e-07, "logits/chosen": -6.149594306945801, "logits/rejected": -6.042490005493164, "logps/chosen": -302.32611083984375, "logps/rejected": -167.7694549560547, "loss": 0.0858, "rewards/accuracies": 1.0, "rewards/chosen": 2.7119534015655518, "rewards/margins": 8.861345291137695, "rewards/rejected": -6.149392127990723, "step": 1098 }, { "epoch": 0.61, "learning_rate": 8.145495137005096e-07, "logits/chosen": -6.037805080413818, "logits/rejected": -6.148012161254883, "logps/chosen": -318.3827209472656, "logps/rejected": -231.08767700195312, "loss": 0.0542, "rewards/accuracies": 0.9375, "rewards/chosen": 4.023009300231934, "rewards/margins": 7.6525068283081055, "rewards/rejected": -3.6294984817504883, "step": 1099 }, { "epoch": 0.61, "learning_rate": 8.141998246834277e-07, "logits/chosen": -6.100918769836426, "logits/rejected": -6.283461093902588, "logps/chosen": -204.7171173095703, "logps/rejected": -310.837890625, "loss": 0.1354, "rewards/accuracies": 0.9375, "rewards/chosen": 1.5097999572753906, "rewards/margins": 10.120144844055176, "rewards/rejected": -8.610344886779785, "step": 1100 }, { "epoch": 0.61, "learning_rate": 8.138498815055957e-07, "logits/chosen": -6.0902910232543945, "logits/rejected": -6.048587799072266, "logps/chosen": -299.11016845703125, "logps/rejected": -253.73910522460938, "loss": 0.1062, "rewards/accuracies": 0.9375, "rewards/chosen": 4.884589195251465, "rewards/margins": 9.108903884887695, "rewards/rejected": -4.2243146896362305, "step": 1101 }, { "epoch": 0.61, "learning_rate": 8.134996844500879e-07, "logits/chosen": -6.074222087860107, "logits/rejected": -5.97291374206543, "logps/chosen": -511.09417724609375, "logps/rejected": -266.2562255859375, "loss": 0.0904, "rewards/accuracies": 1.0, "rewards/chosen": 5.9473748207092285, "rewards/margins": 9.671131134033203, "rewards/rejected": -3.7237563133239746, "step": 1102 }, { "epoch": 0.61, "learning_rate": 8.131492338001838e-07, "logits/chosen": -6.018893241882324, "logits/rejected": -6.106936454772949, "logps/chosen": -225.46844482421875, "logps/rejected": -303.7918701171875, "loss": 0.1031, "rewards/accuracies": 0.9375, "rewards/chosen": 2.590646266937256, "rewards/margins": 10.757279396057129, "rewards/rejected": -8.166633605957031, "step": 1103 }, { "epoch": 0.61, "learning_rate": 8.12798529839368e-07, "logits/chosen": -6.067305088043213, "logits/rejected": -6.02083158493042, "logps/chosen": -253.14569091796875, "logps/rejected": -232.61932373046875, "loss": 0.1119, "rewards/accuracies": 0.875, "rewards/chosen": 3.402919292449951, "rewards/margins": 9.607854843139648, "rewards/rejected": -6.204935073852539, "step": 1104 }, { "epoch": 0.61, "learning_rate": 8.124475728513296e-07, "logits/chosen": -6.22822380065918, "logits/rejected": -6.078846454620361, "logps/chosen": -283.9096984863281, "logps/rejected": -151.97698974609375, "loss": 0.1348, "rewards/accuracies": 1.0, "rewards/chosen": 3.1583714485168457, "rewards/margins": 10.53658676147461, "rewards/rejected": -7.378214359283447, "step": 1105 }, { "epoch": 0.61, "learning_rate": 8.12096363119963e-07, "logits/chosen": -5.970790863037109, "logits/rejected": -6.038588523864746, "logps/chosen": -354.22113037109375, "logps/rejected": -185.0518035888672, "loss": 0.0806, "rewards/accuracies": 0.9375, "rewards/chosen": 5.062350273132324, "rewards/margins": 9.513622283935547, "rewards/rejected": -4.451272010803223, "step": 1106 }, { "epoch": 0.61, "learning_rate": 8.117449009293668e-07, "logits/chosen": -6.015563011169434, "logits/rejected": -6.056793212890625, "logps/chosen": -276.23468017578125, "logps/rejected": -226.13465881347656, "loss": 0.096, "rewards/accuracies": 0.9375, "rewards/chosen": 3.4283864498138428, "rewards/margins": 7.505777835845947, "rewards/rejected": -4.077391624450684, "step": 1107 }, { "epoch": 0.62, "learning_rate": 8.113931865638437e-07, "logits/chosen": -6.054025650024414, "logits/rejected": -6.095461845397949, "logps/chosen": -213.632080078125, "logps/rejected": -237.14413452148438, "loss": 0.1596, "rewards/accuracies": 0.9375, "rewards/chosen": 2.868543863296509, "rewards/margins": 8.690404891967773, "rewards/rejected": -5.821861267089844, "step": 1108 }, { "epoch": 0.62, "learning_rate": 8.110412203079007e-07, "logits/chosen": -6.0366315841674805, "logits/rejected": -6.039833068847656, "logps/chosen": -518.2492065429688, "logps/rejected": -259.1172790527344, "loss": 0.131, "rewards/accuracies": 0.9375, "rewards/chosen": 3.9282140731811523, "rewards/margins": 9.86404037475586, "rewards/rejected": -5.935825347900391, "step": 1109 }, { "epoch": 0.62, "learning_rate": 8.10689002446248e-07, "logits/chosen": -6.036911964416504, "logits/rejected": -6.060551643371582, "logps/chosen": -215.97439575195312, "logps/rejected": -108.17390441894531, "loss": 0.106, "rewards/accuracies": 1.0, "rewards/chosen": 5.846142768859863, "rewards/margins": 8.963006973266602, "rewards/rejected": -3.1168644428253174, "step": 1110 }, { "epoch": 0.62, "learning_rate": 8.103365332638e-07, "logits/chosen": -5.993443965911865, "logits/rejected": -5.9762420654296875, "logps/chosen": -240.16177368164062, "logps/rejected": -168.79208374023438, "loss": 0.0703, "rewards/accuracies": 0.9375, "rewards/chosen": 3.1944146156311035, "rewards/margins": 6.578033924102783, "rewards/rejected": -3.383619546890259, "step": 1111 }, { "epoch": 0.62, "learning_rate": 8.09983813045674e-07, "logits/chosen": -5.949184417724609, "logits/rejected": -5.913346290588379, "logps/chosen": -293.96185302734375, "logps/rejected": -319.9906311035156, "loss": 0.0906, "rewards/accuracies": 0.875, "rewards/chosen": 1.9778194427490234, "rewards/margins": 5.442646503448486, "rewards/rejected": -3.464827060699463, "step": 1112 }, { "epoch": 0.62, "learning_rate": 8.096308420771907e-07, "logits/chosen": -6.137996196746826, "logits/rejected": -6.039612770080566, "logps/chosen": -243.51800537109375, "logps/rejected": -129.6521453857422, "loss": 0.6224, "rewards/accuracies": 0.9375, "rewards/chosen": 3.5073018074035645, "rewards/margins": 7.281027793884277, "rewards/rejected": -3.773725748062134, "step": 1113 }, { "epoch": 0.62, "learning_rate": 8.092776206438729e-07, "logits/chosen": -6.104245185852051, "logits/rejected": -6.024772644042969, "logps/chosen": -259.22259521484375, "logps/rejected": -245.98281860351562, "loss": 0.149, "rewards/accuracies": 0.875, "rewards/chosen": 2.5697555541992188, "rewards/margins": 5.734617233276367, "rewards/rejected": -3.1648619174957275, "step": 1114 }, { "epoch": 0.62, "learning_rate": 8.089241490314467e-07, "logits/chosen": -5.963727951049805, "logits/rejected": -6.002569198608398, "logps/chosen": -238.9496612548828, "logps/rejected": -162.58685302734375, "loss": 0.1264, "rewards/accuracies": 1.0, "rewards/chosen": 4.355733871459961, "rewards/margins": 9.712972640991211, "rewards/rejected": -5.35723876953125, "step": 1115 }, { "epoch": 0.62, "learning_rate": 8.085704275258403e-07, "logits/chosen": -6.095206260681152, "logits/rejected": -5.997029781341553, "logps/chosen": -401.4620056152344, "logps/rejected": -200.79209899902344, "loss": 0.0893, "rewards/accuracies": 0.875, "rewards/chosen": 3.1220474243164062, "rewards/margins": 8.042863845825195, "rewards/rejected": -4.920815944671631, "step": 1116 }, { "epoch": 0.62, "learning_rate": 8.082164564131844e-07, "logits/chosen": -6.174099445343018, "logits/rejected": -6.1214728355407715, "logps/chosen": -299.1026611328125, "logps/rejected": -244.0281982421875, "loss": 0.1765, "rewards/accuracies": 0.875, "rewards/chosen": 2.3777244091033936, "rewards/margins": 7.280400276184082, "rewards/rejected": -4.902675628662109, "step": 1117 }, { "epoch": 0.62, "learning_rate": 8.07862235979811e-07, "logits/chosen": -6.013698577880859, "logits/rejected": -6.1146368980407715, "logps/chosen": -427.6639709472656, "logps/rejected": -349.8815002441406, "loss": 0.1369, "rewards/accuracies": 0.9375, "rewards/chosen": 3.325230121612549, "rewards/margins": 10.048351287841797, "rewards/rejected": -6.723121166229248, "step": 1118 }, { "epoch": 0.62, "learning_rate": 8.075077665122543e-07, "logits/chosen": -5.947376251220703, "logits/rejected": -5.992820739746094, "logps/chosen": -243.7882843017578, "logps/rejected": -176.50039672851562, "loss": 0.1155, "rewards/accuracies": 0.875, "rewards/chosen": 5.1540608406066895, "rewards/margins": 9.379914283752441, "rewards/rejected": -4.225853443145752, "step": 1119 }, { "epoch": 0.62, "learning_rate": 8.071530482972495e-07, "logits/chosen": -5.9960856437683105, "logits/rejected": -6.094660758972168, "logps/chosen": -346.7715148925781, "logps/rejected": -258.17633056640625, "loss": 0.1343, "rewards/accuracies": 1.0, "rewards/chosen": 3.1848983764648438, "rewards/margins": 7.804778099060059, "rewards/rejected": -4.619880199432373, "step": 1120 }, { "epoch": 0.62, "learning_rate": 8.067980816217335e-07, "logits/chosen": -6.099969863891602, "logits/rejected": -6.098663330078125, "logps/chosen": -290.4445495605469, "logps/rejected": -294.5204162597656, "loss": 0.0784, "rewards/accuracies": 1.0, "rewards/chosen": 4.152972221374512, "rewards/margins": 10.212297439575195, "rewards/rejected": -6.059325218200684, "step": 1121 }, { "epoch": 0.62, "learning_rate": 8.064428667728437e-07, "logits/chosen": -6.003690242767334, "logits/rejected": -6.058723449707031, "logps/chosen": -297.42034912109375, "logps/rejected": -341.7375793457031, "loss": 0.1468, "rewards/accuracies": 0.9375, "rewards/chosen": 2.1679391860961914, "rewards/margins": 6.450918197631836, "rewards/rejected": -4.2829790115356445, "step": 1122 }, { "epoch": 0.62, "learning_rate": 8.060874040379189e-07, "logits/chosen": -6.043740749359131, "logits/rejected": -6.057499885559082, "logps/chosen": -212.67782592773438, "logps/rejected": -254.10104370117188, "loss": 0.1361, "rewards/accuracies": 1.0, "rewards/chosen": 3.6948206424713135, "rewards/margins": 9.82315731048584, "rewards/rejected": -6.1283369064331055, "step": 1123 }, { "epoch": 0.62, "learning_rate": 8.057316937044976e-07, "logits/chosen": -6.042202472686768, "logits/rejected": -5.991865158081055, "logps/chosen": -275.21893310546875, "logps/rejected": -141.9148712158203, "loss": 0.1475, "rewards/accuracies": 1.0, "rewards/chosen": 5.138075828552246, "rewards/margins": 8.965892791748047, "rewards/rejected": -3.827816963195801, "step": 1124 }, { "epoch": 0.62, "learning_rate": 8.05375736060319e-07, "logits/chosen": -6.152252197265625, "logits/rejected": -6.001944541931152, "logps/chosen": -155.4302520751953, "logps/rejected": -121.5499496459961, "loss": 0.0691, "rewards/accuracies": 1.0, "rewards/chosen": 1.8283233642578125, "rewards/margins": 6.168185710906982, "rewards/rejected": -4.339861869812012, "step": 1125 }, { "epoch": 0.63, "learning_rate": 8.050195313933228e-07, "logits/chosen": -6.231130599975586, "logits/rejected": -6.012393951416016, "logps/chosen": -301.17633056640625, "logps/rejected": -200.6704559326172, "loss": 0.0308, "rewards/accuracies": 1.0, "rewards/chosen": 3.225977659225464, "rewards/margins": 9.250001907348633, "rewards/rejected": -6.02402400970459, "step": 1126 }, { "epoch": 0.63, "learning_rate": 8.046630799916475e-07, "logits/chosen": -6.064586162567139, "logits/rejected": -6.008525848388672, "logps/chosen": -336.1375732421875, "logps/rejected": -265.62017822265625, "loss": 0.1582, "rewards/accuracies": 0.9375, "rewards/chosen": 0.7599697113037109, "rewards/margins": 8.251876831054688, "rewards/rejected": -7.491907119750977, "step": 1127 }, { "epoch": 0.63, "learning_rate": 8.043063821436322e-07, "logits/chosen": -6.1505584716796875, "logits/rejected": -6.206721305847168, "logps/chosen": -213.83326721191406, "logps/rejected": -326.9692077636719, "loss": 0.0782, "rewards/accuracies": 0.9375, "rewards/chosen": 0.6623613238334656, "rewards/margins": 8.056880950927734, "rewards/rejected": -7.394519329071045, "step": 1128 }, { "epoch": 0.63, "learning_rate": 8.039494381378145e-07, "logits/chosen": -5.944568157196045, "logits/rejected": -5.969659328460693, "logps/chosen": -394.49224853515625, "logps/rejected": -404.88372802734375, "loss": 0.2359, "rewards/accuracies": 0.9375, "rewards/chosen": 2.2914505004882812, "rewards/margins": 6.442939758300781, "rewards/rejected": -4.1514892578125, "step": 1129 }, { "epoch": 0.63, "learning_rate": 8.035922482629318e-07, "logits/chosen": -6.114164352416992, "logits/rejected": -5.975306510925293, "logps/chosen": -166.92662048339844, "logps/rejected": -132.9229736328125, "loss": 0.1492, "rewards/accuracies": 1.0, "rewards/chosen": 1.3103938102722168, "rewards/margins": 8.231557846069336, "rewards/rejected": -6.921163558959961, "step": 1130 }, { "epoch": 0.63, "learning_rate": 8.032348128079203e-07, "logits/chosen": -6.044055461883545, "logits/rejected": -6.103265762329102, "logps/chosen": -263.30712890625, "logps/rejected": -276.2400207519531, "loss": 0.1473, "rewards/accuracies": 0.875, "rewards/chosen": 1.906916856765747, "rewards/margins": 6.3724260330200195, "rewards/rejected": -4.465508937835693, "step": 1131 }, { "epoch": 0.63, "learning_rate": 8.028771320619143e-07, "logits/chosen": -5.9868364334106445, "logits/rejected": -6.060549259185791, "logps/chosen": -340.0048828125, "logps/rejected": -489.2795715332031, "loss": 0.1754, "rewards/accuracies": 0.9375, "rewards/chosen": 2.938750743865967, "rewards/margins": 9.99929428100586, "rewards/rejected": -7.060544013977051, "step": 1132 }, { "epoch": 0.63, "learning_rate": 8.02519206314247e-07, "logits/chosen": -6.044644832611084, "logits/rejected": -5.992549419403076, "logps/chosen": -312.49560546875, "logps/rejected": -202.3363037109375, "loss": 0.183, "rewards/accuracies": 0.9375, "rewards/chosen": 4.348934173583984, "rewards/margins": 9.39487075805664, "rewards/rejected": -5.04593563079834, "step": 1133 }, { "epoch": 0.63, "learning_rate": 8.021610358544498e-07, "logits/chosen": -6.144941806793213, "logits/rejected": -6.111791610717773, "logps/chosen": -262.5271911621094, "logps/rejected": -186.29885864257812, "loss": 0.1146, "rewards/accuracies": 1.0, "rewards/chosen": 3.448601722717285, "rewards/margins": 8.406960487365723, "rewards/rejected": -4.9583587646484375, "step": 1134 }, { "epoch": 0.63, "learning_rate": 8.018026209722518e-07, "logits/chosen": -6.0041422843933105, "logits/rejected": -6.035800457000732, "logps/chosen": -243.78077697753906, "logps/rejected": -234.10916137695312, "loss": 0.1788, "rewards/accuracies": 0.875, "rewards/chosen": 2.524371862411499, "rewards/margins": 7.450647354125977, "rewards/rejected": -4.926275253295898, "step": 1135 }, { "epoch": 0.63, "learning_rate": 8.0144396195758e-07, "logits/chosen": -6.003296375274658, "logits/rejected": -5.98722505569458, "logps/chosen": -281.1911926269531, "logps/rejected": -173.6881561279297, "loss": 0.1257, "rewards/accuracies": 0.875, "rewards/chosen": 4.462518692016602, "rewards/margins": 8.628705978393555, "rewards/rejected": -4.166187286376953, "step": 1136 }, { "epoch": 0.63, "learning_rate": 8.010850591005589e-07, "logits/chosen": -6.004449367523193, "logits/rejected": -6.00582218170166, "logps/chosen": -264.0318603515625, "logps/rejected": -185.863525390625, "loss": 0.0809, "rewards/accuracies": 1.0, "rewards/chosen": 3.5787582397460938, "rewards/margins": 8.713605880737305, "rewards/rejected": -5.134847640991211, "step": 1137 }, { "epoch": 0.63, "learning_rate": 8.007259126915101e-07, "logits/chosen": -6.047478199005127, "logits/rejected": -6.007688999176025, "logps/chosen": -281.62451171875, "logps/rejected": -282.93597412109375, "loss": 0.089, "rewards/accuracies": 1.0, "rewards/chosen": 3.110609769821167, "rewards/margins": 10.823173522949219, "rewards/rejected": -7.7125630378723145, "step": 1138 }, { "epoch": 0.63, "learning_rate": 8.003665230209521e-07, "logits/chosen": -6.070169448852539, "logits/rejected": -6.034229755401611, "logps/chosen": -256.228515625, "logps/rejected": -158.34481811523438, "loss": 0.1075, "rewards/accuracies": 1.0, "rewards/chosen": 5.413362503051758, "rewards/margins": 9.318279266357422, "rewards/rejected": -3.904916286468506, "step": 1139 }, { "epoch": 0.63, "learning_rate": 8.000068903796006e-07, "logits/chosen": -6.133431911468506, "logits/rejected": -6.010963439941406, "logps/chosen": -402.2516174316406, "logps/rejected": -188.2210235595703, "loss": 0.142, "rewards/accuracies": 1.0, "rewards/chosen": 7.4976348876953125, "rewards/margins": 10.068317413330078, "rewards/rejected": -2.570683002471924, "step": 1140 }, { "epoch": 0.63, "learning_rate": 7.996470150583676e-07, "logits/chosen": -6.098150253295898, "logits/rejected": -6.023287296295166, "logps/chosen": -575.9285888671875, "logps/rejected": -338.2773132324219, "loss": 0.0806, "rewards/accuracies": 1.0, "rewards/chosen": 4.484109401702881, "rewards/margins": 10.556451797485352, "rewards/rejected": -6.0723419189453125, "step": 1141 }, { "epoch": 0.63, "learning_rate": 7.992868973483615e-07, "logits/chosen": -6.195915699005127, "logits/rejected": -6.081768989562988, "logps/chosen": -267.5556945800781, "logps/rejected": -181.7349853515625, "loss": 0.1421, "rewards/accuracies": 0.9375, "rewards/chosen": 3.0968194007873535, "rewards/margins": 8.246270179748535, "rewards/rejected": -5.14945125579834, "step": 1142 }, { "epoch": 0.63, "learning_rate": 7.989265375408864e-07, "logits/chosen": -6.0011138916015625, "logits/rejected": -5.995405197143555, "logps/chosen": -271.343017578125, "logps/rejected": -230.9110565185547, "loss": 0.1379, "rewards/accuracies": 0.875, "rewards/chosen": 4.623308181762695, "rewards/margins": 8.698301315307617, "rewards/rejected": -4.074993133544922, "step": 1143 }, { "epoch": 0.64, "learning_rate": 7.985659359274428e-07, "logits/chosen": -6.034908294677734, "logits/rejected": -5.91158390045166, "logps/chosen": -295.335205078125, "logps/rejected": -168.4410400390625, "loss": 0.1491, "rewards/accuracies": 1.0, "rewards/chosen": 4.798348903656006, "rewards/margins": 7.035645484924316, "rewards/rejected": -2.2372965812683105, "step": 1144 }, { "epoch": 0.64, "learning_rate": 7.982050927997262e-07, "logits/chosen": -6.169565677642822, "logits/rejected": -6.127771377563477, "logps/chosen": -245.8062744140625, "logps/rejected": -198.82406616210938, "loss": 0.1762, "rewards/accuracies": 0.9375, "rewards/chosen": 2.8408727645874023, "rewards/margins": 7.042374610900879, "rewards/rejected": -4.201501846313477, "step": 1145 }, { "epoch": 0.64, "learning_rate": 7.97844008449628e-07, "logits/chosen": -5.984594345092773, "logits/rejected": -6.017289638519287, "logps/chosen": -384.2126770019531, "logps/rejected": -224.22084045410156, "loss": 0.2583, "rewards/accuracies": 1.0, "rewards/chosen": 5.138515472412109, "rewards/margins": 7.934027671813965, "rewards/rejected": -2.7955121994018555, "step": 1146 }, { "epoch": 0.64, "learning_rate": 7.974826831692347e-07, "logits/chosen": -6.124039649963379, "logits/rejected": -6.2075653076171875, "logps/chosen": -296.3076171875, "logps/rejected": -237.9260711669922, "loss": 0.1295, "rewards/accuracies": 1.0, "rewards/chosen": 3.5620920658111572, "rewards/margins": 8.267481803894043, "rewards/rejected": -4.705389022827148, "step": 1147 }, { "epoch": 0.64, "learning_rate": 7.971211172508274e-07, "logits/chosen": -6.080069065093994, "logits/rejected": -6.009673118591309, "logps/chosen": -224.90585327148438, "logps/rejected": -114.15572357177734, "loss": 0.1794, "rewards/accuracies": 1.0, "rewards/chosen": 2.7220664024353027, "rewards/margins": 6.491966247558594, "rewards/rejected": -3.76990008354187, "step": 1148 }, { "epoch": 0.64, "learning_rate": 7.967593109868816e-07, "logits/chosen": -6.058512210845947, "logits/rejected": -6.068697452545166, "logps/chosen": -231.45101928710938, "logps/rejected": -251.49139404296875, "loss": 0.106, "rewards/accuracies": 0.9375, "rewards/chosen": 2.9920506477355957, "rewards/margins": 8.609760284423828, "rewards/rejected": -5.617709159851074, "step": 1149 }, { "epoch": 0.64, "learning_rate": 7.96397264670068e-07, "logits/chosen": -5.906046390533447, "logits/rejected": -6.00606632232666, "logps/chosen": -388.7838134765625, "logps/rejected": -283.4466247558594, "loss": 0.1183, "rewards/accuracies": 1.0, "rewards/chosen": 4.3606719970703125, "rewards/margins": 9.595909118652344, "rewards/rejected": -5.235237121582031, "step": 1150 }, { "epoch": 0.64, "learning_rate": 7.960349785932509e-07, "logits/chosen": -6.040643692016602, "logits/rejected": -6.08192777633667, "logps/chosen": -222.4943389892578, "logps/rejected": -290.24346923828125, "loss": 0.1241, "rewards/accuracies": 0.875, "rewards/chosen": 2.3201727867126465, "rewards/margins": 8.48751163482666, "rewards/rejected": -6.1673383712768555, "step": 1151 }, { "epoch": 0.64, "learning_rate": 7.956724530494887e-07, "logits/chosen": -6.033901214599609, "logits/rejected": -6.056934356689453, "logps/chosen": -216.14295959472656, "logps/rejected": -223.46200561523438, "loss": 0.1999, "rewards/accuracies": 0.9375, "rewards/chosen": 4.892193794250488, "rewards/margins": 8.406733512878418, "rewards/rejected": -3.514539957046509, "step": 1152 }, { "epoch": 0.64, "learning_rate": 7.953096883320336e-07, "logits/chosen": -6.080511093139648, "logits/rejected": -6.003183364868164, "logps/chosen": -331.5466613769531, "logps/rejected": -223.01458740234375, "loss": 0.1304, "rewards/accuracies": 0.9375, "rewards/chosen": 3.920295238494873, "rewards/margins": 8.327749252319336, "rewards/rejected": -4.407454013824463, "step": 1153 }, { "epoch": 0.64, "learning_rate": 7.949466847343311e-07, "logits/chosen": -5.988302707672119, "logits/rejected": -5.99582052230835, "logps/chosen": -201.29629516601562, "logps/rejected": -246.4649658203125, "loss": 0.098, "rewards/accuracies": 1.0, "rewards/chosen": 2.8073654174804688, "rewards/margins": 7.142858505249023, "rewards/rejected": -4.335493087768555, "step": 1154 }, { "epoch": 0.64, "learning_rate": 7.9458344255002e-07, "logits/chosen": -5.973379611968994, "logits/rejected": -5.986441612243652, "logps/chosen": -432.5057373046875, "logps/rejected": -273.1411437988281, "loss": 0.1116, "rewards/accuracies": 0.9375, "rewards/chosen": 3.94163179397583, "rewards/margins": 6.7886223793029785, "rewards/rejected": -2.8469905853271484, "step": 1155 }, { "epoch": 0.64, "learning_rate": 7.942199620729323e-07, "logits/chosen": -6.082533836364746, "logits/rejected": -5.992378234863281, "logps/chosen": -234.26144409179688, "logps/rejected": -95.78205871582031, "loss": 0.0639, "rewards/accuracies": 0.9375, "rewards/chosen": 5.173867702484131, "rewards/margins": 8.400843620300293, "rewards/rejected": -3.226975440979004, "step": 1156 }, { "epoch": 0.64, "learning_rate": 7.938562435970924e-07, "logits/chosen": -6.0542893409729, "logits/rejected": -6.029457092285156, "logps/chosen": -269.94378662109375, "logps/rejected": -191.1302490234375, "loss": 0.0792, "rewards/accuracies": 1.0, "rewards/chosen": 5.156626224517822, "rewards/margins": 9.647424697875977, "rewards/rejected": -4.490798473358154, "step": 1157 }, { "epoch": 0.64, "learning_rate": 7.934922874167173e-07, "logits/chosen": -6.049599647521973, "logits/rejected": -6.05776309967041, "logps/chosen": -402.8728942871094, "logps/rejected": -485.4306640625, "loss": 0.1315, "rewards/accuracies": 0.875, "rewards/chosen": 1.9085428714752197, "rewards/margins": 7.0615739822387695, "rewards/rejected": -5.153031826019287, "step": 1158 }, { "epoch": 0.64, "learning_rate": 7.931280938262168e-07, "logits/chosen": -6.109054088592529, "logits/rejected": -6.01610803604126, "logps/chosen": -266.4964904785156, "logps/rejected": -180.5867919921875, "loss": 0.0837, "rewards/accuracies": 1.0, "rewards/chosen": 4.237823486328125, "rewards/margins": 8.665630340576172, "rewards/rejected": -4.427807331085205, "step": 1159 }, { "epoch": 0.64, "learning_rate": 7.92763663120192e-07, "logits/chosen": -6.121335983276367, "logits/rejected": -6.12330961227417, "logps/chosen": -286.0820007324219, "logps/rejected": -312.43731689453125, "loss": 0.1623, "rewards/accuracies": 0.9375, "rewards/chosen": 3.7074203491210938, "rewards/margins": 9.429570198059082, "rewards/rejected": -5.7221503257751465, "step": 1160 }, { "epoch": 0.64, "learning_rate": 7.923989955934362e-07, "logits/chosen": -5.9861345291137695, "logits/rejected": -5.995604515075684, "logps/chosen": -222.2103271484375, "logps/rejected": -175.33251953125, "loss": 0.0854, "rewards/accuracies": 0.9375, "rewards/chosen": 2.7136073112487793, "rewards/margins": 5.858940124511719, "rewards/rejected": -3.1453328132629395, "step": 1161 }, { "epoch": 0.65, "learning_rate": 7.920340915409342e-07, "logits/chosen": -6.031702041625977, "logits/rejected": -6.020167350769043, "logps/chosen": -164.8861083984375, "logps/rejected": -212.04925537109375, "loss": 0.1526, "rewards/accuracies": 1.0, "rewards/chosen": 2.149869441986084, "rewards/margins": 7.0298752784729, "rewards/rejected": -4.880005836486816, "step": 1162 }, { "epoch": 0.65, "learning_rate": 7.91668951257862e-07, "logits/chosen": -5.981973171234131, "logits/rejected": -6.091398239135742, "logps/chosen": -263.5066223144531, "logps/rejected": -278.67510986328125, "loss": 0.1383, "rewards/accuracies": 0.9375, "rewards/chosen": 4.3452067375183105, "rewards/margins": 8.7535982131958, "rewards/rejected": -4.408390998840332, "step": 1163 }, { "epoch": 0.65, "learning_rate": 7.91303575039587e-07, "logits/chosen": -5.984492301940918, "logits/rejected": -6.026705265045166, "logps/chosen": -498.7265625, "logps/rejected": -697.2202758789062, "loss": 0.1439, "rewards/accuracies": 0.8125, "rewards/chosen": 3.1126482486724854, "rewards/margins": 8.158661842346191, "rewards/rejected": -5.046013355255127, "step": 1164 }, { "epoch": 0.65, "learning_rate": 7.909379631816673e-07, "logits/chosen": -6.119069576263428, "logits/rejected": -6.074241638183594, "logps/chosen": -270.8763732910156, "logps/rejected": -254.1182861328125, "loss": 0.0915, "rewards/accuracies": 0.9375, "rewards/chosen": 2.7539384365081787, "rewards/margins": 8.304045677185059, "rewards/rejected": -5.550107002258301, "step": 1165 }, { "epoch": 0.65, "learning_rate": 7.905721159798513e-07, "logits/chosen": -6.105257511138916, "logits/rejected": -6.109031677246094, "logps/chosen": -167.95851135253906, "logps/rejected": -230.12457275390625, "loss": 0.1832, "rewards/accuracies": 0.875, "rewards/chosen": 3.0683555603027344, "rewards/margins": 7.954100608825684, "rewards/rejected": -4.885745048522949, "step": 1166 }, { "epoch": 0.65, "learning_rate": 7.902060337300784e-07, "logits/chosen": -6.121033668518066, "logits/rejected": -5.931591033935547, "logps/chosen": -519.4784545898438, "logps/rejected": -351.2183532714844, "loss": 0.1685, "rewards/accuracies": 0.875, "rewards/chosen": 2.137507915496826, "rewards/margins": 4.424890518188477, "rewards/rejected": -2.287382125854492, "step": 1167 }, { "epoch": 0.65, "learning_rate": 7.898397167284776e-07, "logits/chosen": -6.0370378494262695, "logits/rejected": -6.070746421813965, "logps/chosen": -159.52200317382812, "logps/rejected": -254.0201873779297, "loss": 0.042, "rewards/accuracies": 1.0, "rewards/chosen": 1.2597057819366455, "rewards/margins": 7.167049407958984, "rewards/rejected": -5.907343864440918, "step": 1168 }, { "epoch": 0.65, "learning_rate": 7.89473165271368e-07, "logits/chosen": -6.129443168640137, "logits/rejected": -6.044654846191406, "logps/chosen": -240.97747802734375, "logps/rejected": -175.38882446289062, "loss": 0.1145, "rewards/accuracies": 0.9375, "rewards/chosen": 2.8731143474578857, "rewards/margins": 7.880565643310547, "rewards/rejected": -5.007451057434082, "step": 1169 }, { "epoch": 0.65, "learning_rate": 7.891063796552584e-07, "logits/chosen": -6.0528717041015625, "logits/rejected": -6.100050926208496, "logps/chosen": -342.8522033691406, "logps/rejected": -170.41427612304688, "loss": 0.1756, "rewards/accuracies": 1.0, "rewards/chosen": 6.982723712921143, "rewards/margins": 10.970176696777344, "rewards/rejected": -3.9874534606933594, "step": 1170 }, { "epoch": 0.65, "learning_rate": 7.887393601768467e-07, "logits/chosen": -6.027454853057861, "logits/rejected": -5.940363883972168, "logps/chosen": -492.4783630371094, "logps/rejected": -284.6382751464844, "loss": 0.0917, "rewards/accuracies": 1.0, "rewards/chosen": 3.8375487327575684, "rewards/margins": 9.194808959960938, "rewards/rejected": -5.357260704040527, "step": 1171 }, { "epoch": 0.65, "learning_rate": 7.883721071330206e-07, "logits/chosen": -6.045798301696777, "logits/rejected": -5.895071029663086, "logps/chosen": -259.4369201660156, "logps/rejected": -165.07177734375, "loss": 0.1137, "rewards/accuracies": 1.0, "rewards/chosen": 2.630918025970459, "rewards/margins": 8.249153137207031, "rewards/rejected": -5.618235111236572, "step": 1172 }, { "epoch": 0.65, "learning_rate": 7.880046208208562e-07, "logits/chosen": -6.028651237487793, "logits/rejected": -6.008401393890381, "logps/chosen": -202.655517578125, "logps/rejected": -265.33544921875, "loss": 0.2051, "rewards/accuracies": 0.9375, "rewards/chosen": 2.2577033042907715, "rewards/margins": 6.62693452835083, "rewards/rejected": -4.3692307472229, "step": 1173 }, { "epoch": 0.65, "learning_rate": 7.876369015376185e-07, "logits/chosen": -5.947914123535156, "logits/rejected": -6.037431240081787, "logps/chosen": -535.6672973632812, "logps/rejected": -308.87969970703125, "loss": 0.1987, "rewards/accuracies": 1.0, "rewards/chosen": 2.2612311840057373, "rewards/margins": 5.3181843757629395, "rewards/rejected": -3.056952953338623, "step": 1174 }, { "epoch": 0.65, "learning_rate": 7.872689495807607e-07, "logits/chosen": -5.90370512008667, "logits/rejected": -5.971344947814941, "logps/chosen": -208.5885009765625, "logps/rejected": -382.5224304199219, "loss": 0.1336, "rewards/accuracies": 1.0, "rewards/chosen": 3.5921506881713867, "rewards/margins": 6.873295307159424, "rewards/rejected": -3.281144618988037, "step": 1175 }, { "epoch": 0.65, "learning_rate": 7.869007652479247e-07, "logits/chosen": -6.038125038146973, "logits/rejected": -6.043197154998779, "logps/chosen": -158.95697021484375, "logps/rejected": -169.10252380371094, "loss": 0.1162, "rewards/accuracies": 0.9375, "rewards/chosen": 2.434225082397461, "rewards/margins": 6.48309326171875, "rewards/rejected": -4.048867702484131, "step": 1176 }, { "epoch": 0.65, "learning_rate": 7.865323488369398e-07, "logits/chosen": -6.211824893951416, "logits/rejected": -6.055037021636963, "logps/chosen": -339.81646728515625, "logps/rejected": -145.66018676757812, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": 5.721136569976807, "rewards/margins": 9.688755989074707, "rewards/rejected": -3.967618942260742, "step": 1177 }, { "epoch": 0.65, "learning_rate": 7.861637006458237e-07, "logits/chosen": -6.015261173248291, "logits/rejected": -6.034191131591797, "logps/chosen": -262.6189880371094, "logps/rejected": -172.20449829101562, "loss": 0.0966, "rewards/accuracies": 1.0, "rewards/chosen": 4.297458171844482, "rewards/margins": 8.218559265136719, "rewards/rejected": -3.9211015701293945, "step": 1178 }, { "epoch": 0.65, "learning_rate": 7.857948209727809e-07, "logits/chosen": -6.089540004730225, "logits/rejected": -6.001619338989258, "logps/chosen": -428.8447265625, "logps/rejected": -250.74798583984375, "loss": 0.1648, "rewards/accuracies": 1.0, "rewards/chosen": 3.760552406311035, "rewards/margins": 8.62395191192627, "rewards/rejected": -4.863398551940918, "step": 1179 }, { "epoch": 0.66, "learning_rate": 7.854257101162036e-07, "logits/chosen": -6.066147804260254, "logits/rejected": -6.017991542816162, "logps/chosen": -288.4270935058594, "logps/rejected": -231.92962646484375, "loss": 0.1159, "rewards/accuracies": 0.9375, "rewards/chosen": 5.143889427185059, "rewards/margins": 10.547615051269531, "rewards/rejected": -5.4037251472473145, "step": 1180 }, { "epoch": 0.66, "learning_rate": 7.85056368374671e-07, "logits/chosen": -5.918069362640381, "logits/rejected": -5.978036403656006, "logps/chosen": -537.0894775390625, "logps/rejected": -351.43719482421875, "loss": 0.13, "rewards/accuracies": 0.9375, "rewards/chosen": 4.007433891296387, "rewards/margins": 8.884529113769531, "rewards/rejected": -4.8770952224731445, "step": 1181 }, { "epoch": 0.66, "learning_rate": 7.846867960469485e-07, "logits/chosen": -6.008528709411621, "logits/rejected": -6.0528059005737305, "logps/chosen": -282.26715087890625, "logps/rejected": -285.90185546875, "loss": 0.1431, "rewards/accuracies": 0.875, "rewards/chosen": 3.7472167015075684, "rewards/margins": 8.393765449523926, "rewards/rejected": -4.646549224853516, "step": 1182 }, { "epoch": 0.66, "learning_rate": 7.843169934319889e-07, "logits/chosen": -6.004192352294922, "logits/rejected": -6.0207109451293945, "logps/chosen": -381.4297180175781, "logps/rejected": -304.3707580566406, "loss": 0.1248, "rewards/accuracies": 1.0, "rewards/chosen": 5.957542419433594, "rewards/margins": 9.821344375610352, "rewards/rejected": -3.863801956176758, "step": 1183 }, { "epoch": 0.66, "learning_rate": 7.839469608289306e-07, "logits/chosen": -6.0031280517578125, "logits/rejected": -6.075742244720459, "logps/chosen": -213.11276245117188, "logps/rejected": -325.0307312011719, "loss": 0.0846, "rewards/accuracies": 0.9375, "rewards/chosen": 1.6206207275390625, "rewards/margins": 9.03106689453125, "rewards/rejected": -7.4104461669921875, "step": 1184 }, { "epoch": 0.66, "learning_rate": 7.835766985370985e-07, "logits/chosen": -6.036684513092041, "logits/rejected": -6.08386754989624, "logps/chosen": -289.41943359375, "logps/rejected": -270.5193786621094, "loss": 0.0772, "rewards/accuracies": 0.9375, "rewards/chosen": 3.1329619884490967, "rewards/margins": 8.665365219116211, "rewards/rejected": -5.532402992248535, "step": 1185 }, { "epoch": 0.66, "learning_rate": 7.832062068560029e-07, "logits/chosen": -6.132328033447266, "logits/rejected": -6.125805377960205, "logps/chosen": -239.3499298095703, "logps/rejected": -315.3807373046875, "loss": 0.1582, "rewards/accuracies": 1.0, "rewards/chosen": 4.024970054626465, "rewards/margins": 9.971384048461914, "rewards/rejected": -5.946413993835449, "step": 1186 }, { "epoch": 0.66, "learning_rate": 7.828354860853399e-07, "logits/chosen": -6.030989646911621, "logits/rejected": -6.066253662109375, "logps/chosen": -254.76580810546875, "logps/rejected": -188.99807739257812, "loss": 0.0619, "rewards/accuracies": 0.9375, "rewards/chosen": 5.168130874633789, "rewards/margins": 8.916954040527344, "rewards/rejected": -3.748823642730713, "step": 1187 }, { "epoch": 0.66, "learning_rate": 7.824645365249909e-07, "logits/chosen": -6.274206638336182, "logits/rejected": -6.04735803604126, "logps/chosen": -219.7831268310547, "logps/rejected": -132.09759521484375, "loss": 0.1103, "rewards/accuracies": 1.0, "rewards/chosen": 2.643317222595215, "rewards/margins": 7.453429222106934, "rewards/rejected": -4.8101115226745605, "step": 1188 }, { "epoch": 0.66, "learning_rate": 7.820933584750223e-07, "logits/chosen": -6.066831588745117, "logits/rejected": -6.026650428771973, "logps/chosen": -513.912109375, "logps/rejected": -339.07086181640625, "loss": 0.0468, "rewards/accuracies": 0.9375, "rewards/chosen": 3.0254054069519043, "rewards/margins": 8.193268775939941, "rewards/rejected": -5.167862892150879, "step": 1189 }, { "epoch": 0.66, "learning_rate": 7.817219522356853e-07, "logits/chosen": -6.2094244956970215, "logits/rejected": -5.947554111480713, "logps/chosen": -302.4980773925781, "logps/rejected": -137.2402801513672, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/chosen": 5.998326301574707, "rewards/margins": 11.716205596923828, "rewards/rejected": -5.7178802490234375, "step": 1190 }, { "epoch": 0.66, "learning_rate": 7.813503181074157e-07, "logits/chosen": -6.016927242279053, "logits/rejected": -6.051432132720947, "logps/chosen": -196.46835327148438, "logps/rejected": -142.93716430664062, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 2.5015597343444824, "rewards/margins": 8.441475868225098, "rewards/rejected": -5.939916133880615, "step": 1191 }, { "epoch": 0.66, "learning_rate": 7.809784563908339e-07, "logits/chosen": -6.1039137840271, "logits/rejected": -6.124124050140381, "logps/chosen": -239.0525665283203, "logps/rejected": -228.24444580078125, "loss": 0.1598, "rewards/accuracies": 1.0, "rewards/chosen": 3.7647533416748047, "rewards/margins": 9.610077857971191, "rewards/rejected": -5.845324516296387, "step": 1192 }, { "epoch": 0.66, "learning_rate": 7.806063673867439e-07, "logits/chosen": -6.059421062469482, "logits/rejected": -6.017416000366211, "logps/chosen": -242.19671630859375, "logps/rejected": -162.75125122070312, "loss": 0.1239, "rewards/accuracies": 0.875, "rewards/chosen": 5.507048606872559, "rewards/margins": 6.821662902832031, "rewards/rejected": -1.3146138191223145, "step": 1193 }, { "epoch": 0.66, "learning_rate": 7.802340513961341e-07, "logits/chosen": -6.0805277824401855, "logits/rejected": -6.0517168045043945, "logps/chosen": -163.3236541748047, "logps/rejected": -137.0320587158203, "loss": 0.0968, "rewards/accuracies": 0.9375, "rewards/chosen": 1.7248021364212036, "rewards/margins": 6.463040351867676, "rewards/rejected": -4.738238334655762, "step": 1194 }, { "epoch": 0.66, "learning_rate": 7.798615087201761e-07, "logits/chosen": -6.021080493927002, "logits/rejected": -6.107555389404297, "logps/chosen": -268.97540283203125, "logps/rejected": -258.123779296875, "loss": 0.1861, "rewards/accuracies": 0.9375, "rewards/chosen": 2.169187068939209, "rewards/margins": 7.926180362701416, "rewards/rejected": -5.756993293762207, "step": 1195 }, { "epoch": 0.66, "learning_rate": 7.79488739660225e-07, "logits/chosen": -6.059578895568848, "logits/rejected": -5.994654655456543, "logps/chosen": -248.28907775878906, "logps/rejected": -214.66986083984375, "loss": 0.115, "rewards/accuracies": 0.875, "rewards/chosen": 2.9491379261016846, "rewards/margins": 8.37936782836914, "rewards/rejected": -5.430230140686035, "step": 1196 }, { "epoch": 0.66, "learning_rate": 7.791157445178192e-07, "logits/chosen": -5.971706390380859, "logits/rejected": -5.989594459533691, "logps/chosen": -232.90274047851562, "logps/rejected": -195.41583251953125, "loss": 0.182, "rewards/accuracies": 1.0, "rewards/chosen": 2.8244755268096924, "rewards/margins": 7.992788314819336, "rewards/rejected": -5.168312072753906, "step": 1197 }, { "epoch": 0.67, "learning_rate": 7.787425235946797e-07, "logits/chosen": -6.084181785583496, "logits/rejected": -6.116888999938965, "logps/chosen": -319.7836608886719, "logps/rejected": -292.400634765625, "loss": 0.0504, "rewards/accuracies": 1.0, "rewards/chosen": 3.632826328277588, "rewards/margins": 9.574518203735352, "rewards/rejected": -5.9416913986206055, "step": 1198 }, { "epoch": 0.67, "learning_rate": 7.783690771927103e-07, "logits/chosen": -6.085404872894287, "logits/rejected": -6.026186466217041, "logps/chosen": -255.34010314941406, "logps/rejected": -188.79473876953125, "loss": 0.0893, "rewards/accuracies": 0.9375, "rewards/chosen": 4.451929092407227, "rewards/margins": 8.333044052124023, "rewards/rejected": -3.881115198135376, "step": 1199 }, { "epoch": 0.67, "learning_rate": 7.77995405613997e-07, "logits/chosen": -5.956398010253906, "logits/rejected": -5.971601486206055, "logps/chosen": -181.76580810546875, "logps/rejected": -192.927734375, "loss": 0.1243, "rewards/accuracies": 0.875, "rewards/chosen": 3.182033061981201, "rewards/margins": 7.009710311889648, "rewards/rejected": -3.8276777267456055, "step": 1200 }, { "epoch": 0.67, "learning_rate": 7.776215091608085e-07, "logits/chosen": -6.026005744934082, "logits/rejected": -6.152562618255615, "logps/chosen": -249.16050720214844, "logps/rejected": -268.29827880859375, "loss": 0.1094, "rewards/accuracies": 1.0, "rewards/chosen": 3.530717611312866, "rewards/margins": 9.519245147705078, "rewards/rejected": -5.988528251647949, "step": 1201 }, { "epoch": 0.67, "learning_rate": 7.772473881355946e-07, "logits/chosen": -6.058902263641357, "logits/rejected": -5.979567050933838, "logps/chosen": -233.57904052734375, "logps/rejected": -176.82937622070312, "loss": 0.0813, "rewards/accuracies": 1.0, "rewards/chosen": 3.0475621223449707, "rewards/margins": 6.958279609680176, "rewards/rejected": -3.910717487335205, "step": 1202 }, { "epoch": 0.67, "learning_rate": 7.768730428409874e-07, "logits/chosen": -6.084634304046631, "logits/rejected": -6.065637111663818, "logps/chosen": -217.53744506835938, "logps/rejected": -199.98004150390625, "loss": 0.1155, "rewards/accuracies": 0.9375, "rewards/chosen": 2.9900732040405273, "rewards/margins": 7.750467300415039, "rewards/rejected": -4.760394096374512, "step": 1203 }, { "epoch": 0.67, "learning_rate": 7.764984735798002e-07, "logits/chosen": -6.052178382873535, "logits/rejected": -6.065422058105469, "logps/chosen": -246.4319610595703, "logps/rejected": -146.5522003173828, "loss": 0.1167, "rewards/accuracies": 1.0, "rewards/chosen": 5.67214822769165, "rewards/margins": 11.479593276977539, "rewards/rejected": -5.807445049285889, "step": 1204 }, { "epoch": 0.67, "learning_rate": 7.761236806550271e-07, "logits/chosen": -5.980053901672363, "logits/rejected": -5.931022644042969, "logps/chosen": -234.06629943847656, "logps/rejected": -169.33584594726562, "loss": 0.0509, "rewards/accuracies": 0.9375, "rewards/chosen": 3.6890816688537598, "rewards/margins": 9.27683162689209, "rewards/rejected": -5.587749481201172, "step": 1205 }, { "epoch": 0.67, "learning_rate": 7.757486643698439e-07, "logits/chosen": -5.96361780166626, "logits/rejected": -6.127939224243164, "logps/chosen": -206.3937225341797, "logps/rejected": -294.6588439941406, "loss": 0.1618, "rewards/accuracies": 0.9375, "rewards/chosen": 1.2059731483459473, "rewards/margins": 7.929281234741211, "rewards/rejected": -6.723308086395264, "step": 1206 }, { "epoch": 0.67, "learning_rate": 7.753734250276066e-07, "logits/chosen": -6.069872856140137, "logits/rejected": -6.115445137023926, "logps/chosen": -210.33255004882812, "logps/rejected": -181.84185791015625, "loss": 0.1224, "rewards/accuracies": 0.9375, "rewards/chosen": 3.053764581680298, "rewards/margins": 7.673853397369385, "rewards/rejected": -4.620088577270508, "step": 1207 }, { "epoch": 0.67, "learning_rate": 7.749979629318514e-07, "logits/chosen": -6.006708145141602, "logits/rejected": -6.063028335571289, "logps/chosen": -240.41729736328125, "logps/rejected": -216.97024536132812, "loss": 0.1525, "rewards/accuracies": 0.9375, "rewards/chosen": 3.794862747192383, "rewards/margins": 9.926589012145996, "rewards/rejected": -6.13172721862793, "step": 1208 }, { "epoch": 0.67, "learning_rate": 7.746222783862955e-07, "logits/chosen": -6.083585739135742, "logits/rejected": -6.052544593811035, "logps/chosen": -344.3232116699219, "logps/rejected": -143.09555053710938, "loss": 0.3855, "rewards/accuracies": 0.9375, "rewards/chosen": 3.6933953762054443, "rewards/margins": 8.769853591918945, "rewards/rejected": -5.076457977294922, "step": 1209 }, { "epoch": 0.67, "learning_rate": 7.742463716948349e-07, "logits/chosen": -6.040247440338135, "logits/rejected": -6.03994083404541, "logps/chosen": -255.66371154785156, "logps/rejected": -254.69749450683594, "loss": 0.0563, "rewards/accuracies": 0.9375, "rewards/chosen": 1.7555850744247437, "rewards/margins": 8.083250999450684, "rewards/rejected": -6.327666282653809, "step": 1210 }, { "epoch": 0.67, "learning_rate": 7.738702431615463e-07, "logits/chosen": -6.01219367980957, "logits/rejected": -6.019203186035156, "logps/chosen": -331.9346923828125, "logps/rejected": -393.698486328125, "loss": 0.1292, "rewards/accuracies": 0.8125, "rewards/chosen": 2.9733433723449707, "rewards/margins": 6.153546333312988, "rewards/rejected": -3.180203437805176, "step": 1211 }, { "epoch": 0.67, "learning_rate": 7.734938930906853e-07, "logits/chosen": -6.147035598754883, "logits/rejected": -6.0834832191467285, "logps/chosen": -246.96987915039062, "logps/rejected": -222.15650939941406, "loss": 0.0877, "rewards/accuracies": 1.0, "rewards/chosen": 3.0383403301239014, "rewards/margins": 9.135601997375488, "rewards/rejected": -6.097261905670166, "step": 1212 }, { "epoch": 0.67, "learning_rate": 7.731173217866872e-07, "logits/chosen": -5.997942924499512, "logits/rejected": -5.987437725067139, "logps/chosen": -194.0274200439453, "logps/rejected": -206.93072509765625, "loss": 0.0589, "rewards/accuracies": 0.9375, "rewards/chosen": 0.5580341219902039, "rewards/margins": 6.75298547744751, "rewards/rejected": -6.19495153427124, "step": 1213 }, { "epoch": 0.67, "learning_rate": 7.727405295541656e-07, "logits/chosen": -6.0375823974609375, "logits/rejected": -6.095705032348633, "logps/chosen": -326.5381164550781, "logps/rejected": -214.58990478515625, "loss": 0.1368, "rewards/accuracies": 1.0, "rewards/chosen": 5.53263521194458, "rewards/margins": 9.908272743225098, "rewards/rejected": -4.375638008117676, "step": 1214 }, { "epoch": 0.67, "learning_rate": 7.723635166979132e-07, "logits/chosen": -6.017529487609863, "logits/rejected": -6.100589752197266, "logps/chosen": -249.15213012695312, "logps/rejected": -226.11624145507812, "loss": 0.064, "rewards/accuracies": 1.0, "rewards/chosen": 2.669546604156494, "rewards/margins": 10.702880859375, "rewards/rejected": -8.033333778381348, "step": 1215 }, { "epoch": 0.68, "learning_rate": 7.719862835229013e-07, "logits/chosen": -6.024231433868408, "logits/rejected": -6.063839435577393, "logps/chosen": -230.5096435546875, "logps/rejected": -208.66976928710938, "loss": 0.1814, "rewards/accuracies": 0.9375, "rewards/chosen": 4.89434814453125, "rewards/margins": 11.19705581665039, "rewards/rejected": -6.302707195281982, "step": 1216 }, { "epoch": 0.68, "learning_rate": 7.716088303342789e-07, "logits/chosen": -6.13828182220459, "logits/rejected": -6.085996150970459, "logps/chosen": -297.681640625, "logps/rejected": -282.640869140625, "loss": 0.0537, "rewards/accuracies": 1.0, "rewards/chosen": 5.348306179046631, "rewards/margins": 11.012192726135254, "rewards/rejected": -5.663885116577148, "step": 1217 }, { "epoch": 0.68, "learning_rate": 7.712311574373738e-07, "logits/chosen": -6.008792400360107, "logits/rejected": -6.112948417663574, "logps/chosen": -210.1962890625, "logps/rejected": -250.1320037841797, "loss": 0.1199, "rewards/accuracies": 0.9375, "rewards/chosen": 2.568221092224121, "rewards/margins": 9.742046356201172, "rewards/rejected": -7.173824787139893, "step": 1218 }, { "epoch": 0.68, "learning_rate": 7.708532651376905e-07, "logits/chosen": -6.120410919189453, "logits/rejected": -5.980255603790283, "logps/chosen": -339.27606201171875, "logps/rejected": -169.07872009277344, "loss": 0.044, "rewards/accuracies": 0.875, "rewards/chosen": 2.1633400917053223, "rewards/margins": 6.527525901794434, "rewards/rejected": -4.364185810089111, "step": 1219 }, { "epoch": 0.68, "learning_rate": 7.704751537409117e-07, "logits/chosen": -6.189390182495117, "logits/rejected": -6.077666759490967, "logps/chosen": -422.68634033203125, "logps/rejected": -336.44073486328125, "loss": 0.1487, "rewards/accuracies": 0.9375, "rewards/chosen": 4.750012397766113, "rewards/margins": 9.235153198242188, "rewards/rejected": -4.485140800476074, "step": 1220 }, { "epoch": 0.68, "learning_rate": 7.700968235528973e-07, "logits/chosen": -6.145337104797363, "logits/rejected": -6.071909427642822, "logps/chosen": -553.6163940429688, "logps/rejected": -510.75970458984375, "loss": 0.1755, "rewards/accuracies": 0.9375, "rewards/chosen": 5.302380561828613, "rewards/margins": 7.774694919586182, "rewards/rejected": -2.4723143577575684, "step": 1221 }, { "epoch": 0.68, "learning_rate": 7.69718274879684e-07, "logits/chosen": -6.1768693923950195, "logits/rejected": -6.129026412963867, "logps/chosen": -276.1714172363281, "logps/rejected": -237.2880401611328, "loss": 0.1665, "rewards/accuracies": 0.875, "rewards/chosen": 3.388388156890869, "rewards/margins": 7.237593173980713, "rewards/rejected": -3.849205493927002, "step": 1222 }, { "epoch": 0.68, "learning_rate": 7.693395080274849e-07, "logits/chosen": -5.980221748352051, "logits/rejected": -5.925684928894043, "logps/chosen": -252.9662628173828, "logps/rejected": -153.64129638671875, "loss": 0.1133, "rewards/accuracies": 1.0, "rewards/chosen": 4.556216239929199, "rewards/margins": 9.673234939575195, "rewards/rejected": -5.117018699645996, "step": 1223 }, { "epoch": 0.68, "learning_rate": 7.689605233026903e-07, "logits/chosen": -6.122570037841797, "logits/rejected": -6.064699172973633, "logps/chosen": -246.4972381591797, "logps/rejected": -216.98788452148438, "loss": 0.0794, "rewards/accuracies": 0.9375, "rewards/chosen": 3.656938314437866, "rewards/margins": 9.612564086914062, "rewards/rejected": -5.955625534057617, "step": 1224 }, { "epoch": 0.68, "learning_rate": 7.685813210118663e-07, "logits/chosen": -6.022034645080566, "logits/rejected": -5.98155403137207, "logps/chosen": -414.5570068359375, "logps/rejected": -341.95867919921875, "loss": 0.1033, "rewards/accuracies": 0.9375, "rewards/chosen": 4.473957061767578, "rewards/margins": 8.32880687713623, "rewards/rejected": -3.8548502922058105, "step": 1225 }, { "epoch": 0.68, "learning_rate": 7.682019014617553e-07, "logits/chosen": -6.092354774475098, "logits/rejected": -6.091029167175293, "logps/chosen": -275.5772705078125, "logps/rejected": -212.093505859375, "loss": 0.1195, "rewards/accuracies": 0.9375, "rewards/chosen": 3.463554859161377, "rewards/margins": 8.785066604614258, "rewards/rejected": -5.321512222290039, "step": 1226 }, { "epoch": 0.68, "learning_rate": 7.678222649592747e-07, "logits/chosen": -5.989055156707764, "logits/rejected": -6.003298759460449, "logps/chosen": -237.74472045898438, "logps/rejected": -229.43130493164062, "loss": 0.1255, "rewards/accuracies": 0.8125, "rewards/chosen": 1.8970046043395996, "rewards/margins": 6.667085647583008, "rewards/rejected": -4.770081520080566, "step": 1227 }, { "epoch": 0.68, "learning_rate": 7.674424118115184e-07, "logits/chosen": -6.089328765869141, "logits/rejected": -6.156499862670898, "logps/chosen": -229.0035400390625, "logps/rejected": -237.7056121826172, "loss": 0.1425, "rewards/accuracies": 0.9375, "rewards/chosen": 4.916130542755127, "rewards/margins": 8.443757057189941, "rewards/rejected": -3.5276267528533936, "step": 1228 }, { "epoch": 0.68, "learning_rate": 7.670623423257547e-07, "logits/chosen": -6.027183532714844, "logits/rejected": -6.094977855682373, "logps/chosen": -282.1151428222656, "logps/rejected": -244.297119140625, "loss": 0.2006, "rewards/accuracies": 0.9375, "rewards/chosen": 3.617676258087158, "rewards/margins": 10.164041519165039, "rewards/rejected": -6.546365737915039, "step": 1229 }, { "epoch": 0.68, "learning_rate": 7.666820568094275e-07, "logits/chosen": -5.979430675506592, "logits/rejected": -6.12558650970459, "logps/chosen": -440.69696044921875, "logps/rejected": -417.7516784667969, "loss": 0.1055, "rewards/accuracies": 0.875, "rewards/chosen": 1.081465721130371, "rewards/margins": 5.416512966156006, "rewards/rejected": -4.335047245025635, "step": 1230 }, { "epoch": 0.68, "learning_rate": 7.66301555570155e-07, "logits/chosen": -6.143494606018066, "logits/rejected": -6.026765823364258, "logps/chosen": -456.3009338378906, "logps/rejected": -211.10781860351562, "loss": 0.1365, "rewards/accuracies": 0.875, "rewards/chosen": 1.9375841617584229, "rewards/margins": 7.587225914001465, "rewards/rejected": -5.649641990661621, "step": 1231 }, { "epoch": 0.68, "learning_rate": 7.659208389157305e-07, "logits/chosen": -6.047993183135986, "logits/rejected": -6.0531744956970215, "logps/chosen": -268.51837158203125, "logps/rejected": -264.01678466796875, "loss": 0.1433, "rewards/accuracies": 0.9375, "rewards/chosen": 1.9178062677383423, "rewards/margins": 6.202149391174316, "rewards/rejected": -4.284343719482422, "step": 1232 }, { "epoch": 0.68, "learning_rate": 7.65539907154121e-07, "logits/chosen": -6.023699760437012, "logits/rejected": -6.080851078033447, "logps/chosen": -225.87298583984375, "logps/rejected": -192.4325408935547, "loss": 0.1891, "rewards/accuracies": 0.9375, "rewards/chosen": 2.5432403087615967, "rewards/margins": 6.886648178100586, "rewards/rejected": -4.34340763092041, "step": 1233 }, { "epoch": 0.69, "learning_rate": 7.651587605934675e-07, "logits/chosen": -6.092856407165527, "logits/rejected": -6.091909885406494, "logps/chosen": -351.75616455078125, "logps/rejected": -243.72171020507812, "loss": 0.073, "rewards/accuracies": 1.0, "rewards/chosen": 5.948168754577637, "rewards/margins": 10.719989776611328, "rewards/rejected": -4.771821975708008, "step": 1234 }, { "epoch": 0.69, "learning_rate": 7.64777399542085e-07, "logits/chosen": -5.9947614669799805, "logits/rejected": -6.050779819488525, "logps/chosen": -200.66986083984375, "logps/rejected": -254.62002563476562, "loss": 0.1212, "rewards/accuracies": 1.0, "rewards/chosen": 3.927788019180298, "rewards/margins": 11.014246940612793, "rewards/rejected": -7.086458683013916, "step": 1235 }, { "epoch": 0.69, "learning_rate": 7.643958243084619e-07, "logits/chosen": -6.0730085372924805, "logits/rejected": -5.988588333129883, "logps/chosen": -169.52304077148438, "logps/rejected": -209.30775451660156, "loss": 0.1269, "rewards/accuracies": 1.0, "rewards/chosen": 2.0738706588745117, "rewards/margins": 8.138635635375977, "rewards/rejected": -6.064764976501465, "step": 1236 }, { "epoch": 0.69, "learning_rate": 7.640140352012601e-07, "logits/chosen": -6.146800518035889, "logits/rejected": -6.010822772979736, "logps/chosen": -204.39671325683594, "logps/rejected": -226.9562225341797, "loss": 0.165, "rewards/accuracies": 0.9375, "rewards/chosen": 1.945433497428894, "rewards/margins": 6.153772354125977, "rewards/rejected": -4.208338737487793, "step": 1237 }, { "epoch": 0.69, "learning_rate": 7.63632032529314e-07, "logits/chosen": -6.057021141052246, "logits/rejected": -5.966213226318359, "logps/chosen": -254.92398071289062, "logps/rejected": -173.32150268554688, "loss": 0.0498, "rewards/accuracies": 1.0, "rewards/chosen": 4.659049034118652, "rewards/margins": 7.765229225158691, "rewards/rejected": -3.1061806678771973, "step": 1238 }, { "epoch": 0.69, "learning_rate": 7.632498166016313e-07, "logits/chosen": -6.104081153869629, "logits/rejected": -6.049587249755859, "logps/chosen": -198.3680419921875, "logps/rejected": -207.7176055908203, "loss": 0.0354, "rewards/accuracies": 0.9375, "rewards/chosen": 2.5645997524261475, "rewards/margins": 8.000565528869629, "rewards/rejected": -5.4359660148620605, "step": 1239 }, { "epoch": 0.69, "learning_rate": 7.628673877273918e-07, "logits/chosen": -6.030216217041016, "logits/rejected": -6.051486968994141, "logps/chosen": -297.72906494140625, "logps/rejected": -281.1402587890625, "loss": 0.1086, "rewards/accuracies": 0.9375, "rewards/chosen": 3.9521539211273193, "rewards/margins": 10.026297569274902, "rewards/rejected": -6.074143886566162, "step": 1240 }, { "epoch": 0.69, "learning_rate": 7.624847462159478e-07, "logits/chosen": -6.012128829956055, "logits/rejected": -6.099501132965088, "logps/chosen": -178.7427520751953, "logps/rejected": -205.02886962890625, "loss": 0.1195, "rewards/accuracies": 1.0, "rewards/chosen": 2.976414918899536, "rewards/margins": 9.313131332397461, "rewards/rejected": -6.336716651916504, "step": 1241 }, { "epoch": 0.69, "learning_rate": 7.621018923768234e-07, "logits/chosen": -6.055964469909668, "logits/rejected": -6.020269393920898, "logps/chosen": -544.48291015625, "logps/rejected": -344.1703796386719, "loss": 0.1679, "rewards/accuracies": 0.8125, "rewards/chosen": 7.756343841552734, "rewards/margins": 7.526614189147949, "rewards/rejected": 0.22972941398620605, "step": 1242 }, { "epoch": 0.69, "learning_rate": 7.617188265197148e-07, "logits/chosen": -5.959918022155762, "logits/rejected": -6.0409369468688965, "logps/chosen": -187.77113342285156, "logps/rejected": -248.5814666748047, "loss": 0.1125, "rewards/accuracies": 1.0, "rewards/chosen": 1.3308436870574951, "rewards/margins": 6.465399742126465, "rewards/rejected": -5.134555816650391, "step": 1243 }, { "epoch": 0.69, "learning_rate": 7.61335548954489e-07, "logits/chosen": -6.08536958694458, "logits/rejected": -6.029773712158203, "logps/chosen": -230.22427368164062, "logps/rejected": -232.3740997314453, "loss": 0.0838, "rewards/accuracies": 1.0, "rewards/chosen": 3.573425054550171, "rewards/margins": 9.952119827270508, "rewards/rejected": -6.378695487976074, "step": 1244 }, { "epoch": 0.69, "learning_rate": 7.609520599911853e-07, "logits/chosen": -6.022909641265869, "logits/rejected": -5.938808441162109, "logps/chosen": -222.46707153320312, "logps/rejected": -98.29277038574219, "loss": 0.1862, "rewards/accuracies": 1.0, "rewards/chosen": 4.010836601257324, "rewards/margins": 7.910682201385498, "rewards/rejected": -3.899845838546753, "step": 1245 }, { "epoch": 0.69, "learning_rate": 7.60568359940013e-07, "logits/chosen": -6.064361095428467, "logits/rejected": -6.09885311126709, "logps/chosen": -277.94384765625, "logps/rejected": -283.54803466796875, "loss": 0.0498, "rewards/accuracies": 1.0, "rewards/chosen": 3.31642746925354, "rewards/margins": 10.577644348144531, "rewards/rejected": -7.261216163635254, "step": 1246 }, { "epoch": 0.69, "learning_rate": 7.601844491113528e-07, "logits/chosen": -6.1194634437561035, "logits/rejected": -6.037926197052002, "logps/chosen": -312.023681640625, "logps/rejected": -262.29522705078125, "loss": 0.0933, "rewards/accuracies": 0.9375, "rewards/chosen": 4.028916358947754, "rewards/margins": 10.115239143371582, "rewards/rejected": -6.086323261260986, "step": 1247 }, { "epoch": 0.69, "learning_rate": 7.598003278157557e-07, "logits/chosen": -6.0903706550598145, "logits/rejected": -6.101524829864502, "logps/chosen": -283.0379333496094, "logps/rejected": -174.52713012695312, "loss": 0.0681, "rewards/accuracies": 1.0, "rewards/chosen": 5.033945560455322, "rewards/margins": 9.4891357421875, "rewards/rejected": -4.455190181732178, "step": 1248 }, { "epoch": 0.69, "learning_rate": 7.594159963639428e-07, "logits/chosen": -6.117640972137451, "logits/rejected": -6.111990451812744, "logps/chosen": -237.90956115722656, "logps/rejected": -228.25392150878906, "loss": 0.2681, "rewards/accuracies": 0.875, "rewards/chosen": 2.2993855476379395, "rewards/margins": 6.1567792892456055, "rewards/rejected": -3.857393741607666, "step": 1249 }, { "epoch": 0.69, "learning_rate": 7.590314550668053e-07, "logits/chosen": -6.104964256286621, "logits/rejected": -6.161600589752197, "logps/chosen": -213.23550415039062, "logps/rejected": -251.20530700683594, "loss": 0.2368, "rewards/accuracies": 0.875, "rewards/chosen": 2.7263431549072266, "rewards/margins": 7.238973140716553, "rewards/rejected": -4.512630462646484, "step": 1250 }, { "epoch": 0.69, "learning_rate": 7.586467042354044e-07, "logits/chosen": -5.9843950271606445, "logits/rejected": -5.9774627685546875, "logps/chosen": -196.26730346679688, "logps/rejected": -209.3619384765625, "loss": 0.0975, "rewards/accuracies": 0.9375, "rewards/chosen": 2.444270610809326, "rewards/margins": 8.710357666015625, "rewards/rejected": -6.266087055206299, "step": 1251 }, { "epoch": 0.7, "learning_rate": 7.582617441809702e-07, "logits/chosen": -6.173245906829834, "logits/rejected": -6.08177375793457, "logps/chosen": -363.104248046875, "logps/rejected": -175.60867309570312, "loss": 0.1229, "rewards/accuracies": 1.0, "rewards/chosen": 6.179861068725586, "rewards/margins": 9.274555206298828, "rewards/rejected": -3.0946950912475586, "step": 1252 }, { "epoch": 0.7, "learning_rate": 7.578765752149028e-07, "logits/chosen": -6.090093612670898, "logits/rejected": -6.071500778198242, "logps/chosen": -296.2977600097656, "logps/rejected": -230.9355926513672, "loss": 0.079, "rewards/accuracies": 1.0, "rewards/chosen": 5.10618257522583, "rewards/margins": 8.208000183105469, "rewards/rejected": -3.1018176078796387, "step": 1253 }, { "epoch": 0.7, "learning_rate": 7.574911976487708e-07, "logits/chosen": -5.990859508514404, "logits/rejected": -6.169576168060303, "logps/chosen": -352.32989501953125, "logps/rejected": -477.6534423828125, "loss": 0.1463, "rewards/accuracies": 0.9375, "rewards/chosen": 4.241082668304443, "rewards/margins": 9.688407897949219, "rewards/rejected": -5.447325229644775, "step": 1254 }, { "epoch": 0.7, "learning_rate": 7.571056117943115e-07, "logits/chosen": -6.026872158050537, "logits/rejected": -6.044693470001221, "logps/chosen": -276.06292724609375, "logps/rejected": -152.87554931640625, "loss": 0.0528, "rewards/accuracies": 1.0, "rewards/chosen": 4.360499382019043, "rewards/margins": 9.825261116027832, "rewards/rejected": -5.464761257171631, "step": 1255 }, { "epoch": 0.7, "learning_rate": 7.567198179634311e-07, "logits/chosen": -6.066892623901367, "logits/rejected": -6.013187885284424, "logps/chosen": -283.98486328125, "logps/rejected": -128.4271697998047, "loss": 0.0943, "rewards/accuracies": 1.0, "rewards/chosen": 7.978456497192383, "rewards/margins": 10.65440559387207, "rewards/rejected": -2.675950288772583, "step": 1256 }, { "epoch": 0.7, "learning_rate": 7.563338164682035e-07, "logits/chosen": -6.016378402709961, "logits/rejected": -6.025440216064453, "logps/chosen": -284.5599365234375, "logps/rejected": -307.2975158691406, "loss": 0.084, "rewards/accuracies": 0.9375, "rewards/chosen": 1.7246860265731812, "rewards/margins": 7.488491058349609, "rewards/rejected": -5.763805389404297, "step": 1257 }, { "epoch": 0.7, "learning_rate": 7.559476076208711e-07, "logits/chosen": -6.11720609664917, "logits/rejected": -6.101117134094238, "logps/chosen": -335.2707214355469, "logps/rejected": -161.04360961914062, "loss": 0.1595, "rewards/accuracies": 0.9375, "rewards/chosen": 4.169426918029785, "rewards/margins": 8.431354522705078, "rewards/rejected": -4.261927127838135, "step": 1258 }, { "epoch": 0.7, "learning_rate": 7.555611917338434e-07, "logits/chosen": -6.048216342926025, "logits/rejected": -6.0256266593933105, "logps/chosen": -213.81362915039062, "logps/rejected": -241.48252868652344, "loss": 0.0855, "rewards/accuracies": 0.9375, "rewards/chosen": 2.6212613582611084, "rewards/margins": 8.589630126953125, "rewards/rejected": -5.968369483947754, "step": 1259 }, { "epoch": 0.7, "learning_rate": 7.55174569119698e-07, "logits/chosen": -6.033987045288086, "logits/rejected": -6.173727989196777, "logps/chosen": -276.880126953125, "logps/rejected": -250.60621643066406, "loss": 0.1133, "rewards/accuracies": 1.0, "rewards/chosen": 3.202655792236328, "rewards/margins": 7.996343612670898, "rewards/rejected": -4.793687343597412, "step": 1260 }, { "epoch": 0.7, "learning_rate": 7.547877400911798e-07, "logits/chosen": -6.127286911010742, "logits/rejected": -6.121720314025879, "logps/chosen": -194.90884399414062, "logps/rejected": -171.88507080078125, "loss": 0.0828, "rewards/accuracies": 0.9375, "rewards/chosen": 2.0091497898101807, "rewards/margins": 7.856366157531738, "rewards/rejected": -5.847217082977295, "step": 1261 }, { "epoch": 0.7, "learning_rate": 7.544007049611998e-07, "logits/chosen": -6.133922576904297, "logits/rejected": -6.020505905151367, "logps/chosen": -304.26458740234375, "logps/rejected": -197.94822692871094, "loss": 0.1301, "rewards/accuracies": 1.0, "rewards/chosen": 4.81516170501709, "rewards/margins": 7.851722717285156, "rewards/rejected": -3.0365610122680664, "step": 1262 }, { "epoch": 0.7, "learning_rate": 7.540134640428365e-07, "logits/chosen": -6.086077690124512, "logits/rejected": -6.024439334869385, "logps/chosen": -345.9918518066406, "logps/rejected": -208.7952117919922, "loss": 0.1041, "rewards/accuracies": 1.0, "rewards/chosen": 2.7781991958618164, "rewards/margins": 8.058347702026367, "rewards/rejected": -5.280148506164551, "step": 1263 }, { "epoch": 0.7, "learning_rate": 7.536260176493347e-07, "logits/chosen": -6.110701560974121, "logits/rejected": -6.119073867797852, "logps/chosen": -407.656494140625, "logps/rejected": -513.7734985351562, "loss": 0.1142, "rewards/accuracies": 0.875, "rewards/chosen": 2.8181254863739014, "rewards/margins": 9.45882511138916, "rewards/rejected": -6.64069938659668, "step": 1264 }, { "epoch": 0.7, "learning_rate": 7.532383660941052e-07, "logits/chosen": -6.0326247215271, "logits/rejected": -6.0713958740234375, "logps/chosen": -418.81878662109375, "logps/rejected": -237.8697967529297, "loss": 0.143, "rewards/accuracies": 0.875, "rewards/chosen": 2.5160675048828125, "rewards/margins": 5.512253284454346, "rewards/rejected": -2.996185779571533, "step": 1265 }, { "epoch": 0.7, "learning_rate": 7.528505096907253e-07, "logits/chosen": -6.006389617919922, "logits/rejected": -6.136787414550781, "logps/chosen": -275.0121765136719, "logps/rejected": -255.6889190673828, "loss": 0.0665, "rewards/accuracies": 0.9375, "rewards/chosen": 3.8266441822052, "rewards/margins": 9.526358604431152, "rewards/rejected": -5.699714660644531, "step": 1266 }, { "epoch": 0.7, "learning_rate": 7.52462448752937e-07, "logits/chosen": -5.996214389801025, "logits/rejected": -6.097535133361816, "logps/chosen": -583.373779296875, "logps/rejected": -468.58062744140625, "loss": 0.1221, "rewards/accuracies": 0.8125, "rewards/chosen": 3.5021729469299316, "rewards/margins": 6.536679267883301, "rewards/rejected": -3.034506320953369, "step": 1267 }, { "epoch": 0.7, "learning_rate": 7.520741835946491e-07, "logits/chosen": -6.090686798095703, "logits/rejected": -6.021792888641357, "logps/chosen": -223.2567138671875, "logps/rejected": -181.69415283203125, "loss": 0.2802, "rewards/accuracies": 1.0, "rewards/chosen": 2.1621663570404053, "rewards/margins": 8.106904983520508, "rewards/rejected": -5.944738388061523, "step": 1268 }, { "epoch": 0.7, "learning_rate": 7.516857145299341e-07, "logits/chosen": -6.056577682495117, "logits/rejected": -6.101611614227295, "logps/chosen": -282.2320861816406, "logps/rejected": -173.68386840820312, "loss": 0.0726, "rewards/accuracies": 1.0, "rewards/chosen": 4.3100481033325195, "rewards/margins": 9.719593048095703, "rewards/rejected": -5.409544944763184, "step": 1269 }, { "epoch": 0.71, "learning_rate": 7.512970418730308e-07, "logits/chosen": -6.143049716949463, "logits/rejected": -6.163585186004639, "logps/chosen": -263.18853759765625, "logps/rejected": -212.05941772460938, "loss": 0.2087, "rewards/accuracies": 0.9375, "rewards/chosen": 4.406957626342773, "rewards/margins": 10.129213333129883, "rewards/rejected": -5.722255229949951, "step": 1270 }, { "epoch": 0.71, "learning_rate": 7.509081659383416e-07, "logits/chosen": -6.180408477783203, "logits/rejected": -6.066322326660156, "logps/chosen": -296.0860900878906, "logps/rejected": -193.8563232421875, "loss": 0.1274, "rewards/accuracies": 1.0, "rewards/chosen": 4.463028907775879, "rewards/margins": 9.991432189941406, "rewards/rejected": -5.5284037590026855, "step": 1271 }, { "epoch": 0.71, "learning_rate": 7.505190870404343e-07, "logits/chosen": -6.075692653656006, "logits/rejected": -6.127617835998535, "logps/chosen": -197.1459503173828, "logps/rejected": -192.86737060546875, "loss": 0.1311, "rewards/accuracies": 0.9375, "rewards/chosen": 2.7538795471191406, "rewards/margins": 8.655632019042969, "rewards/rejected": -5.901752471923828, "step": 1272 }, { "epoch": 0.71, "learning_rate": 7.501298054940402e-07, "logits/chosen": -6.090029239654541, "logits/rejected": -6.036351203918457, "logps/chosen": -226.0718536376953, "logps/rejected": -194.29144287109375, "loss": 0.2716, "rewards/accuracies": 0.9375, "rewards/chosen": 2.5542447566986084, "rewards/margins": 7.707350254058838, "rewards/rejected": -5.153105735778809, "step": 1273 }, { "epoch": 0.71, "learning_rate": 7.497403216140546e-07, "logits/chosen": -6.059592247009277, "logits/rejected": -5.986201286315918, "logps/chosen": -269.3030700683594, "logps/rejected": -259.2023620605469, "loss": 0.1157, "rewards/accuracies": 1.0, "rewards/chosen": 1.8770246505737305, "rewards/margins": 6.784247398376465, "rewards/rejected": -4.907222747802734, "step": 1274 }, { "epoch": 0.71, "learning_rate": 7.493506357155367e-07, "logits/chosen": -6.1528754234313965, "logits/rejected": -5.9641194343566895, "logps/chosen": -340.250732421875, "logps/rejected": -122.87653350830078, "loss": 0.0991, "rewards/accuracies": 1.0, "rewards/chosen": 4.66175651550293, "rewards/margins": 8.299947738647461, "rewards/rejected": -3.6381914615631104, "step": 1275 }, { "epoch": 0.71, "learning_rate": 7.489607481137092e-07, "logits/chosen": -6.012228488922119, "logits/rejected": -6.050632476806641, "logps/chosen": -238.22247314453125, "logps/rejected": -209.54287719726562, "loss": 0.0879, "rewards/accuracies": 1.0, "rewards/chosen": 3.1374948024749756, "rewards/margins": 7.586928367614746, "rewards/rejected": -4.449432849884033, "step": 1276 }, { "epoch": 0.71, "learning_rate": 7.485706591239575e-07, "logits/chosen": -6.003012657165527, "logits/rejected": -6.084948539733887, "logps/chosen": -329.1686096191406, "logps/rejected": -305.75604248046875, "loss": 0.159, "rewards/accuracies": 0.875, "rewards/chosen": 4.627331733703613, "rewards/margins": 8.805228233337402, "rewards/rejected": -4.177896499633789, "step": 1277 }, { "epoch": 0.71, "learning_rate": 7.481803690618304e-07, "logits/chosen": -6.154013156890869, "logits/rejected": -6.070377349853516, "logps/chosen": -203.79312133789062, "logps/rejected": -162.1273956298828, "loss": 0.1377, "rewards/accuracies": 0.875, "rewards/chosen": 4.473545551300049, "rewards/margins": 9.61819076538086, "rewards/rejected": -5.144646167755127, "step": 1278 }, { "epoch": 0.71, "learning_rate": 7.477898782430389e-07, "logits/chosen": -6.07352352142334, "logits/rejected": -5.995216369628906, "logps/chosen": -522.3631591796875, "logps/rejected": -327.83721923828125, "loss": 0.1564, "rewards/accuracies": 0.9375, "rewards/chosen": 3.018306016921997, "rewards/margins": 8.323955535888672, "rewards/rejected": -5.305649280548096, "step": 1279 }, { "epoch": 0.71, "learning_rate": 7.473991869834569e-07, "logits/chosen": -6.049894332885742, "logits/rejected": -6.101380348205566, "logps/chosen": -328.11602783203125, "logps/rejected": -171.67919921875, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": 5.264794826507568, "rewards/margins": 9.87697982788086, "rewards/rejected": -4.612185001373291, "step": 1280 }, { "epoch": 0.71, "learning_rate": 7.4700829559912e-07, "logits/chosen": -5.976825714111328, "logits/rejected": -6.198343276977539, "logps/chosen": -261.7573547363281, "logps/rejected": -315.869384765625, "loss": 0.0956, "rewards/accuracies": 0.9375, "rewards/chosen": 3.37076473236084, "rewards/margins": 10.401010513305664, "rewards/rejected": -7.030245780944824, "step": 1281 }, { "epoch": 0.71, "learning_rate": 7.46617204406226e-07, "logits/chosen": -6.079501152038574, "logits/rejected": -6.010014533996582, "logps/chosen": -300.9473876953125, "logps/rejected": -287.81158447265625, "loss": 0.1948, "rewards/accuracies": 0.9375, "rewards/chosen": 5.472265720367432, "rewards/margins": 11.50198745727539, "rewards/rejected": -6.029720306396484, "step": 1282 }, { "epoch": 0.71, "learning_rate": 7.46225913721134e-07, "logits/chosen": -6.015846252441406, "logits/rejected": -6.058208465576172, "logps/chosen": -354.6758728027344, "logps/rejected": -357.9071960449219, "loss": 0.0956, "rewards/accuracies": 0.875, "rewards/chosen": 5.4889302253723145, "rewards/margins": 8.833581924438477, "rewards/rejected": -3.3446521759033203, "step": 1283 }, { "epoch": 0.71, "learning_rate": 7.458344238603647e-07, "logits/chosen": -5.973452568054199, "logits/rejected": -5.993824481964111, "logps/chosen": -223.3433837890625, "logps/rejected": -158.99771118164062, "loss": 0.1195, "rewards/accuracies": 0.875, "rewards/chosen": 3.420649766921997, "rewards/margins": 7.23799991607666, "rewards/rejected": -3.8173508644104004, "step": 1284 }, { "epoch": 0.71, "learning_rate": 7.454427351405999e-07, "logits/chosen": -6.061602592468262, "logits/rejected": -6.120564937591553, "logps/chosen": -330.190673828125, "logps/rejected": -245.74314880371094, "loss": 0.0939, "rewards/accuracies": 0.9375, "rewards/chosen": 6.268910884857178, "rewards/margins": 9.406506538391113, "rewards/rejected": -3.1375958919525146, "step": 1285 }, { "epoch": 0.71, "learning_rate": 7.450508478786822e-07, "logits/chosen": -6.0419535636901855, "logits/rejected": -5.94632625579834, "logps/chosen": -295.6313781738281, "logps/rejected": -232.17210388183594, "loss": 0.2792, "rewards/accuracies": 0.875, "rewards/chosen": 4.884203910827637, "rewards/margins": 7.561223030090332, "rewards/rejected": -2.6770195960998535, "step": 1286 }, { "epoch": 0.71, "learning_rate": 7.446587623916149e-07, "logits/chosen": -5.994877338409424, "logits/rejected": -5.964996814727783, "logps/chosen": -341.1622619628906, "logps/rejected": -206.17263793945312, "loss": 0.1071, "rewards/accuracies": 0.875, "rewards/chosen": 5.9621806144714355, "rewards/margins": 9.43132209777832, "rewards/rejected": -3.4691410064697266, "step": 1287 }, { "epoch": 0.72, "learning_rate": 7.442664789965616e-07, "logits/chosen": -6.062853813171387, "logits/rejected": -6.113408088684082, "logps/chosen": -301.3484802246094, "logps/rejected": -269.7041320800781, "loss": 0.0957, "rewards/accuracies": 1.0, "rewards/chosen": 5.580715656280518, "rewards/margins": 9.5714111328125, "rewards/rejected": -3.990694522857666, "step": 1288 }, { "epoch": 0.72, "learning_rate": 7.43873998010846e-07, "logits/chosen": -6.16969633102417, "logits/rejected": -6.106876373291016, "logps/chosen": -161.3182830810547, "logps/rejected": -159.58340454101562, "loss": 0.4367, "rewards/accuracies": 1.0, "rewards/chosen": 2.836488962173462, "rewards/margins": 9.114100456237793, "rewards/rejected": -6.27761173248291, "step": 1289 }, { "epoch": 0.72, "learning_rate": 7.434813197519513e-07, "logits/chosen": -6.2147111892700195, "logits/rejected": -6.13381290435791, "logps/chosen": -247.98287963867188, "logps/rejected": -174.22462463378906, "loss": 0.0837, "rewards/accuracies": 1.0, "rewards/chosen": 1.983750581741333, "rewards/margins": 7.779447555541992, "rewards/rejected": -5.795697212219238, "step": 1290 }, { "epoch": 0.72, "learning_rate": 7.430884445375212e-07, "logits/chosen": -6.092342376708984, "logits/rejected": -6.031964302062988, "logps/chosen": -264.0797424316406, "logps/rejected": -215.65872192382812, "loss": 0.1075, "rewards/accuracies": 0.875, "rewards/chosen": 2.3136582374572754, "rewards/margins": 6.783572196960449, "rewards/rejected": -4.469914436340332, "step": 1291 }, { "epoch": 0.72, "learning_rate": 7.426953726853573e-07, "logits/chosen": -5.9855756759643555, "logits/rejected": -5.961250305175781, "logps/chosen": -243.06207275390625, "logps/rejected": -197.2215118408203, "loss": 0.0993, "rewards/accuracies": 0.875, "rewards/chosen": 3.6760730743408203, "rewards/margins": 7.9862236976623535, "rewards/rejected": -4.310150623321533, "step": 1292 }, { "epoch": 0.72, "learning_rate": 7.423021045134217e-07, "logits/chosen": -6.188447952270508, "logits/rejected": -6.130696773529053, "logps/chosen": -249.34022521972656, "logps/rejected": -165.44979858398438, "loss": 0.0876, "rewards/accuracies": 1.0, "rewards/chosen": 3.5435678958892822, "rewards/margins": 9.756319046020508, "rewards/rejected": -6.212751388549805, "step": 1293 }, { "epoch": 0.72, "learning_rate": 7.419086403398343e-07, "logits/chosen": -6.058134078979492, "logits/rejected": -6.1007585525512695, "logps/chosen": -341.04876708984375, "logps/rejected": -204.39102172851562, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 6.098655700683594, "rewards/margins": 11.169706344604492, "rewards/rejected": -5.071051597595215, "step": 1294 }, { "epoch": 0.72, "learning_rate": 7.415149804828741e-07, "logits/chosen": -6.081541061401367, "logits/rejected": -5.9772772789001465, "logps/chosen": -374.8544921875, "logps/rejected": -168.22769165039062, "loss": 0.1303, "rewards/accuracies": 1.0, "rewards/chosen": 4.347777366638184, "rewards/margins": 8.462928771972656, "rewards/rejected": -4.115152359008789, "step": 1295 }, { "epoch": 0.72, "learning_rate": 7.411211252609783e-07, "logits/chosen": -6.07320499420166, "logits/rejected": -6.087054252624512, "logps/chosen": -203.90582275390625, "logps/rejected": -240.23309326171875, "loss": 0.2162, "rewards/accuracies": 0.9375, "rewards/chosen": 1.0993611812591553, "rewards/margins": 7.126766204833984, "rewards/rejected": -6.027405738830566, "step": 1296 }, { "epoch": 0.72, "learning_rate": 7.407270749927419e-07, "logits/chosen": -6.067856788635254, "logits/rejected": -6.097687721252441, "logps/chosen": -271.35882568359375, "logps/rejected": -227.03570556640625, "loss": 0.0896, "rewards/accuracies": 1.0, "rewards/chosen": 4.463667392730713, "rewards/margins": 10.85399341583252, "rewards/rejected": -6.390326499938965, "step": 1297 }, { "epoch": 0.72, "learning_rate": 7.403328299969179e-07, "logits/chosen": -6.091115951538086, "logits/rejected": -6.112998008728027, "logps/chosen": -301.1331481933594, "logps/rejected": -269.97308349609375, "loss": 0.0628, "rewards/accuracies": 1.0, "rewards/chosen": 5.054299354553223, "rewards/margins": 10.88804817199707, "rewards/rejected": -5.833748817443848, "step": 1298 }, { "epoch": 0.72, "learning_rate": 7.399383905924165e-07, "logits/chosen": -6.0120954513549805, "logits/rejected": -6.055952548980713, "logps/chosen": -314.5677490234375, "logps/rejected": -260.017578125, "loss": 0.0734, "rewards/accuracies": 0.9375, "rewards/chosen": 4.574370384216309, "rewards/margins": 8.470233917236328, "rewards/rejected": -3.8958640098571777, "step": 1299 }, { "epoch": 0.72, "learning_rate": 7.395437570983057e-07, "logits/chosen": -5.984044075012207, "logits/rejected": -6.063026428222656, "logps/chosen": -408.0760803222656, "logps/rejected": -285.37738037109375, "loss": 0.0876, "rewards/accuracies": 1.0, "rewards/chosen": 5.3096418380737305, "rewards/margins": 9.076921463012695, "rewards/rejected": -3.7672789096832275, "step": 1300 }, { "epoch": 0.72, "learning_rate": 7.391489298338098e-07, "logits/chosen": -6.045022487640381, "logits/rejected": -6.09623384475708, "logps/chosen": -142.22528076171875, "logps/rejected": -271.81976318359375, "loss": 0.1508, "rewards/accuracies": 1.0, "rewards/chosen": 1.9600423574447632, "rewards/margins": 7.31329345703125, "rewards/rejected": -5.353250503540039, "step": 1301 }, { "epoch": 0.72, "learning_rate": 7.38753909118311e-07, "logits/chosen": -5.968980312347412, "logits/rejected": -5.951439380645752, "logps/chosen": -545.4443359375, "logps/rejected": -543.6364135742188, "loss": 0.0841, "rewards/accuracies": 0.8125, "rewards/chosen": 0.7920769453048706, "rewards/margins": 6.176349639892578, "rewards/rejected": -5.384273052215576, "step": 1302 }, { "epoch": 0.72, "learning_rate": 7.383586952713465e-07, "logits/chosen": -6.117459297180176, "logits/rejected": -6.06411075592041, "logps/chosen": -342.7870788574219, "logps/rejected": -189.27420043945312, "loss": 0.0913, "rewards/accuracies": 1.0, "rewards/chosen": 4.134057998657227, "rewards/margins": 9.451963424682617, "rewards/rejected": -5.317906379699707, "step": 1303 }, { "epoch": 0.72, "learning_rate": 7.379632886126107e-07, "logits/chosen": -6.015042304992676, "logits/rejected": -6.113739967346191, "logps/chosen": -332.63037109375, "logps/rejected": -190.0571746826172, "loss": 0.1515, "rewards/accuracies": 0.9375, "rewards/chosen": 4.95468807220459, "rewards/margins": 8.710943222045898, "rewards/rejected": -3.7562551498413086, "step": 1304 }, { "epoch": 0.72, "learning_rate": 7.375676894619537e-07, "logits/chosen": -6.0642194747924805, "logits/rejected": -6.159219741821289, "logps/chosen": -376.7021789550781, "logps/rejected": -215.40992736816406, "loss": 0.0643, "rewards/accuracies": 1.0, "rewards/chosen": 5.153777122497559, "rewards/margins": 9.50890064239502, "rewards/rejected": -4.355123519897461, "step": 1305 }, { "epoch": 0.73, "learning_rate": 7.371718981393814e-07, "logits/chosen": -6.097156524658203, "logits/rejected": -6.197620391845703, "logps/chosen": -173.43418884277344, "logps/rejected": -258.51141357421875, "loss": 0.1409, "rewards/accuracies": 1.0, "rewards/chosen": 2.7210404872894287, "rewards/margins": 9.161616325378418, "rewards/rejected": -6.44057559967041, "step": 1306 }, { "epoch": 0.73, "learning_rate": 7.36775914965055e-07, "logits/chosen": -6.11170768737793, "logits/rejected": -6.104549884796143, "logps/chosen": -342.8865966796875, "logps/rejected": -185.5748291015625, "loss": 0.1615, "rewards/accuracies": 0.875, "rewards/chosen": 3.5030517578125, "rewards/margins": 8.056623458862305, "rewards/rejected": -4.553571701049805, "step": 1307 }, { "epoch": 0.73, "learning_rate": 7.363797402592911e-07, "logits/chosen": -5.960229873657227, "logits/rejected": -5.972378253936768, "logps/chosen": -326.4179992675781, "logps/rejected": -203.1566925048828, "loss": 0.1445, "rewards/accuracies": 0.9375, "rewards/chosen": 5.336518287658691, "rewards/margins": 7.342599868774414, "rewards/rejected": -2.0060811042785645, "step": 1308 }, { "epoch": 0.73, "learning_rate": 7.35983374342561e-07, "logits/chosen": -6.081458568572998, "logits/rejected": -6.0055437088012695, "logps/chosen": -371.691650390625, "logps/rejected": -200.1259002685547, "loss": 0.0384, "rewards/accuracies": 1.0, "rewards/chosen": 6.3098063468933105, "rewards/margins": 10.426277160644531, "rewards/rejected": -4.1164703369140625, "step": 1309 }, { "epoch": 0.73, "learning_rate": 7.35586817535491e-07, "logits/chosen": -6.041460990905762, "logits/rejected": -6.020700454711914, "logps/chosen": -193.36468505859375, "logps/rejected": -151.43218994140625, "loss": 0.1115, "rewards/accuracies": 1.0, "rewards/chosen": 1.4696576595306396, "rewards/margins": 7.257951736450195, "rewards/rejected": -5.788293838500977, "step": 1310 }, { "epoch": 0.73, "learning_rate": 7.351900701588612e-07, "logits/chosen": -6.058887958526611, "logits/rejected": -6.0949201583862305, "logps/chosen": -217.85836791992188, "logps/rejected": -195.53944396972656, "loss": 0.1077, "rewards/accuracies": 1.0, "rewards/chosen": 2.6411044597625732, "rewards/margins": 8.413302421569824, "rewards/rejected": -5.77219820022583, "step": 1311 }, { "epoch": 0.73, "learning_rate": 7.347931325336065e-07, "logits/chosen": -6.198208808898926, "logits/rejected": -6.075082778930664, "logps/chosen": -273.5542297363281, "logps/rejected": -224.71881103515625, "loss": 0.0619, "rewards/accuracies": 1.0, "rewards/chosen": 3.1581456661224365, "rewards/margins": 9.615480422973633, "rewards/rejected": -6.457334518432617, "step": 1312 }, { "epoch": 0.73, "learning_rate": 7.343960049808155e-07, "logits/chosen": -6.138808250427246, "logits/rejected": -6.105430603027344, "logps/chosen": -358.4767150878906, "logps/rejected": -212.4351806640625, "loss": 0.11, "rewards/accuracies": 1.0, "rewards/chosen": 4.82252311706543, "rewards/margins": 9.908756256103516, "rewards/rejected": -5.086233615875244, "step": 1313 }, { "epoch": 0.73, "learning_rate": 7.339986878217302e-07, "logits/chosen": -6.03812313079834, "logits/rejected": -6.096261024475098, "logps/chosen": -288.17279052734375, "logps/rejected": -398.2716369628906, "loss": 0.0679, "rewards/accuracies": 0.9375, "rewards/chosen": 3.0780210494995117, "rewards/margins": 7.662692546844482, "rewards/rejected": -4.584671497344971, "step": 1314 }, { "epoch": 0.73, "learning_rate": 7.336011813777462e-07, "logits/chosen": -6.072235107421875, "logits/rejected": -6.1248250007629395, "logps/chosen": -234.3385009765625, "logps/rejected": -173.1555938720703, "loss": 0.1018, "rewards/accuracies": 1.0, "rewards/chosen": 3.0748374462127686, "rewards/margins": 7.985086441040039, "rewards/rejected": -4.910248756408691, "step": 1315 }, { "epoch": 0.73, "learning_rate": 7.332034859704123e-07, "logits/chosen": -6.087615966796875, "logits/rejected": -6.042573928833008, "logps/chosen": -209.08045959472656, "logps/rejected": -161.58187866210938, "loss": 0.1217, "rewards/accuracies": 0.9375, "rewards/chosen": 2.234314441680908, "rewards/margins": 7.791162490844727, "rewards/rejected": -5.556848526000977, "step": 1316 }, { "epoch": 0.73, "learning_rate": 7.3280560192143e-07, "logits/chosen": -6.027186870574951, "logits/rejected": -6.07853889465332, "logps/chosen": -273.8988342285156, "logps/rejected": -274.3017883300781, "loss": 0.0788, "rewards/accuracies": 1.0, "rewards/chosen": 5.747946739196777, "rewards/margins": 12.101722717285156, "rewards/rejected": -6.353776931762695, "step": 1317 }, { "epoch": 0.73, "learning_rate": 7.324075295526531e-07, "logits/chosen": -6.097906112670898, "logits/rejected": -6.035480499267578, "logps/chosen": -258.7633361816406, "logps/rejected": -238.7749786376953, "loss": 0.1036, "rewards/accuracies": 0.9375, "rewards/chosen": 4.4261016845703125, "rewards/margins": 9.497491836547852, "rewards/rejected": -5.071390151977539, "step": 1318 }, { "epoch": 0.73, "learning_rate": 7.320092691860885e-07, "logits/chosen": -6.001631736755371, "logits/rejected": -6.051450252532959, "logps/chosen": -304.95147705078125, "logps/rejected": -163.02871704101562, "loss": 0.1365, "rewards/accuracies": 1.0, "rewards/chosen": 3.8732266426086426, "rewards/margins": 10.180585861206055, "rewards/rejected": -6.3073577880859375, "step": 1319 }, { "epoch": 0.73, "learning_rate": 7.316108211438945e-07, "logits/chosen": -6.066781997680664, "logits/rejected": -6.168417453765869, "logps/chosen": -265.70098876953125, "logps/rejected": -279.6372375488281, "loss": 0.0841, "rewards/accuracies": 1.0, "rewards/chosen": 4.189809799194336, "rewards/margins": 12.451412200927734, "rewards/rejected": -8.261602401733398, "step": 1320 }, { "epoch": 0.73, "learning_rate": 7.312121857483815e-07, "logits/chosen": -6.295483589172363, "logits/rejected": -6.050955772399902, "logps/chosen": -201.21615600585938, "logps/rejected": -115.73289489746094, "loss": 0.109, "rewards/accuracies": 0.9375, "rewards/chosen": 1.9018619060516357, "rewards/margins": 7.750657081604004, "rewards/rejected": -5.848795413970947, "step": 1321 }, { "epoch": 0.73, "learning_rate": 7.308133633220114e-07, "logits/chosen": -6.004981994628906, "logits/rejected": -6.144278049468994, "logps/chosen": -217.1898651123047, "logps/rejected": -309.65753173828125, "loss": 0.1419, "rewards/accuracies": 0.9375, "rewards/chosen": 3.053128480911255, "rewards/margins": 9.881279945373535, "rewards/rejected": -6.828151226043701, "step": 1322 }, { "epoch": 0.73, "learning_rate": 7.304143541873974e-07, "logits/chosen": -5.990900993347168, "logits/rejected": -6.0124101638793945, "logps/chosen": -253.15586853027344, "logps/rejected": -203.74606323242188, "loss": 0.0952, "rewards/accuracies": 1.0, "rewards/chosen": 4.299021244049072, "rewards/margins": 8.307391166687012, "rewards/rejected": -4.008369445800781, "step": 1323 }, { "epoch": 0.74, "learning_rate": 7.300151586673036e-07, "logits/chosen": -6.02937650680542, "logits/rejected": -6.0081281661987305, "logps/chosen": -216.66143798828125, "logps/rejected": -226.1572265625, "loss": 0.1943, "rewards/accuracies": 0.9375, "rewards/chosen": 2.544370651245117, "rewards/margins": 10.55185317993164, "rewards/rejected": -8.007482528686523, "step": 1324 }, { "epoch": 0.74, "learning_rate": 7.296157770846451e-07, "logits/chosen": -6.111717700958252, "logits/rejected": -6.02163553237915, "logps/chosen": -234.42074584960938, "logps/rejected": -217.5285186767578, "loss": 0.0623, "rewards/accuracies": 1.0, "rewards/chosen": 1.987913966178894, "rewards/margins": 10.390345573425293, "rewards/rejected": -8.40243148803711, "step": 1325 }, { "epoch": 0.74, "learning_rate": 7.292162097624873e-07, "logits/chosen": -6.032763481140137, "logits/rejected": -6.054285049438477, "logps/chosen": -481.7970886230469, "logps/rejected": -445.13873291015625, "loss": 0.1183, "rewards/accuracies": 0.9375, "rewards/chosen": 2.524782657623291, "rewards/margins": 4.5496110916137695, "rewards/rejected": -2.0248284339904785, "step": 1326 }, { "epoch": 0.74, "learning_rate": 7.288164570240461e-07, "logits/chosen": -6.1121368408203125, "logits/rejected": -6.028587341308594, "logps/chosen": -304.45758056640625, "logps/rejected": -367.71820068359375, "loss": 0.0808, "rewards/accuracies": 1.0, "rewards/chosen": 4.74758768081665, "rewards/margins": 10.898886680603027, "rewards/rejected": -6.151299476623535, "step": 1327 }, { "epoch": 0.74, "learning_rate": 7.284165191926871e-07, "logits/chosen": -6.038816452026367, "logits/rejected": -6.064770221710205, "logps/chosen": -145.205810546875, "logps/rejected": -136.92855834960938, "loss": 0.1901, "rewards/accuracies": 0.875, "rewards/chosen": 2.0623409748077393, "rewards/margins": 7.282182216644287, "rewards/rejected": -5.219841480255127, "step": 1328 }, { "epoch": 0.74, "learning_rate": 7.280163965919259e-07, "logits/chosen": -6.039548873901367, "logits/rejected": -6.063363552093506, "logps/chosen": -214.8475799560547, "logps/rejected": -191.8903350830078, "loss": 0.0596, "rewards/accuracies": 1.0, "rewards/chosen": 2.9767873287200928, "rewards/margins": 8.224809646606445, "rewards/rejected": -5.248022556304932, "step": 1329 }, { "epoch": 0.74, "learning_rate": 7.276160895454273e-07, "logits/chosen": -6.206977844238281, "logits/rejected": -6.106070518493652, "logps/chosen": -325.3150634765625, "logps/rejected": -303.6671142578125, "loss": 0.1841, "rewards/accuracies": 0.9375, "rewards/chosen": 1.263213872909546, "rewards/margins": 7.115237712860107, "rewards/rejected": -5.852024078369141, "step": 1330 }, { "epoch": 0.74, "learning_rate": 7.272155983770054e-07, "logits/chosen": -6.211255073547363, "logits/rejected": -6.150696754455566, "logps/chosen": -333.2089538574219, "logps/rejected": -218.98460388183594, "loss": 0.6345, "rewards/accuracies": 0.9375, "rewards/chosen": 5.611711502075195, "rewards/margins": 10.661273002624512, "rewards/rejected": -5.049561500549316, "step": 1331 }, { "epoch": 0.74, "learning_rate": 7.268149234106233e-07, "logits/chosen": -6.030642032623291, "logits/rejected": -6.204184055328369, "logps/chosen": -224.16204833984375, "logps/rejected": -308.8463134765625, "loss": 0.0443, "rewards/accuracies": 1.0, "rewards/chosen": 2.4671316146850586, "rewards/margins": 11.207184791564941, "rewards/rejected": -8.740053176879883, "step": 1332 }, { "epoch": 0.74, "learning_rate": 7.264140649703927e-07, "logits/chosen": -6.1266350746154785, "logits/rejected": -6.086054801940918, "logps/chosen": -266.61468505859375, "logps/rejected": -225.689208984375, "loss": 0.1262, "rewards/accuracies": 0.9375, "rewards/chosen": 3.0485897064208984, "rewards/margins": 9.53434944152832, "rewards/rejected": -6.485759735107422, "step": 1333 }, { "epoch": 0.74, "learning_rate": 7.26013023380574e-07, "logits/chosen": -6.032636642456055, "logits/rejected": -6.140746116638184, "logps/chosen": -259.97705078125, "logps/rejected": -269.1328125, "loss": 0.0584, "rewards/accuracies": 1.0, "rewards/chosen": 4.135635852813721, "rewards/margins": 9.588680267333984, "rewards/rejected": -5.453044891357422, "step": 1334 }, { "epoch": 0.74, "learning_rate": 7.256117989655749e-07, "logits/chosen": -5.961981773376465, "logits/rejected": -5.974255561828613, "logps/chosen": -357.022705078125, "logps/rejected": -591.6638793945312, "loss": 0.0926, "rewards/accuracies": 0.875, "rewards/chosen": 2.957172393798828, "rewards/margins": 7.646937370300293, "rewards/rejected": -4.689764976501465, "step": 1335 }, { "epoch": 0.74, "learning_rate": 7.252103920499522e-07, "logits/chosen": -6.0311174392700195, "logits/rejected": -5.9963765144348145, "logps/chosen": -237.17308044433594, "logps/rejected": -207.33526611328125, "loss": 0.0595, "rewards/accuracies": 0.9375, "rewards/chosen": 3.172402858734131, "rewards/margins": 9.180929183959961, "rewards/rejected": -6.00852632522583, "step": 1336 }, { "epoch": 0.74, "learning_rate": 7.248088029584094e-07, "logits/chosen": -6.065291881561279, "logits/rejected": -6.033791542053223, "logps/chosen": -297.9627380371094, "logps/rejected": -167.19918823242188, "loss": 0.1738, "rewards/accuracies": 0.875, "rewards/chosen": 5.224691867828369, "rewards/margins": 9.82826042175293, "rewards/rejected": -4.6035685539245605, "step": 1337 }, { "epoch": 0.74, "learning_rate": 7.244070320157979e-07, "logits/chosen": -5.95734167098999, "logits/rejected": -5.968779563903809, "logps/chosen": -221.9677734375, "logps/rejected": -188.1582794189453, "loss": 0.085, "rewards/accuracies": 0.9375, "rewards/chosen": 2.6872506141662598, "rewards/margins": 7.32805871963501, "rewards/rejected": -4.64080810546875, "step": 1338 }, { "epoch": 0.74, "learning_rate": 7.240050795471156e-07, "logits/chosen": -6.045138359069824, "logits/rejected": -6.034846305847168, "logps/chosen": -184.6343231201172, "logps/rejected": -191.22796630859375, "loss": 0.2366, "rewards/accuracies": 0.9375, "rewards/chosen": 1.7345716953277588, "rewards/margins": 9.438708305358887, "rewards/rejected": -7.704135894775391, "step": 1339 }, { "epoch": 0.74, "learning_rate": 7.236029458775082e-07, "logits/chosen": -6.124875068664551, "logits/rejected": -6.0756025314331055, "logps/chosen": -319.8560791015625, "logps/rejected": -212.65318298339844, "loss": 0.1146, "rewards/accuracies": 0.9375, "rewards/chosen": 1.6557502746582031, "rewards/margins": 8.032660484313965, "rewards/rejected": -6.37691068649292, "step": 1340 }, { "epoch": 0.74, "learning_rate": 7.232006313322668e-07, "logits/chosen": -6.087249755859375, "logits/rejected": -5.994450569152832, "logps/chosen": -222.14382934570312, "logps/rejected": -185.34278869628906, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": 3.6049318313598633, "rewards/margins": 9.380369186401367, "rewards/rejected": -5.775437355041504, "step": 1341 }, { "epoch": 0.75, "learning_rate": 7.227981362368299e-07, "logits/chosen": -6.052980422973633, "logits/rejected": -5.9567790031433105, "logps/chosen": -247.1732177734375, "logps/rejected": -152.46937561035156, "loss": 0.0837, "rewards/accuracies": 1.0, "rewards/chosen": 2.568974494934082, "rewards/margins": 10.704643249511719, "rewards/rejected": -8.135669708251953, "step": 1342 }, { "epoch": 0.75, "learning_rate": 7.223954609167811e-07, "logits/chosen": -5.983013153076172, "logits/rejected": -5.922849178314209, "logps/chosen": -251.6832275390625, "logps/rejected": -365.8275451660156, "loss": 0.0905, "rewards/accuracies": 0.8125, "rewards/chosen": 3.497265577316284, "rewards/margins": 7.385248184204102, "rewards/rejected": -3.8879826068878174, "step": 1343 }, { "epoch": 0.75, "learning_rate": 7.219926056978507e-07, "logits/chosen": -6.203547477722168, "logits/rejected": -5.989952564239502, "logps/chosen": -442.38323974609375, "logps/rejected": -358.493408203125, "loss": 0.1189, "rewards/accuracies": 1.0, "rewards/chosen": 4.563877105712891, "rewards/margins": 7.854157447814941, "rewards/rejected": -3.290280342102051, "step": 1344 }, { "epoch": 0.75, "learning_rate": 7.215895709059139e-07, "logits/chosen": -6.133031368255615, "logits/rejected": -5.940506458282471, "logps/chosen": -410.0890197753906, "logps/rejected": -229.2003631591797, "loss": 0.0997, "rewards/accuracies": 0.9375, "rewards/chosen": 4.202350616455078, "rewards/margins": 9.028924942016602, "rewards/rejected": -4.826574325561523, "step": 1345 }, { "epoch": 0.75, "learning_rate": 7.211863568669912e-07, "logits/chosen": -6.067340850830078, "logits/rejected": -6.057849884033203, "logps/chosen": -225.21583557128906, "logps/rejected": -277.852294921875, "loss": 0.096, "rewards/accuracies": 0.9375, "rewards/chosen": 2.6193923950195312, "rewards/margins": 9.529121398925781, "rewards/rejected": -6.90972900390625, "step": 1346 }, { "epoch": 0.75, "learning_rate": 7.207829639072483e-07, "logits/chosen": -6.065158367156982, "logits/rejected": -5.909739017486572, "logps/chosen": -198.5718994140625, "logps/rejected": -211.87344360351562, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": 3.3498740196228027, "rewards/margins": 10.008403778076172, "rewards/rejected": -6.658529281616211, "step": 1347 }, { "epoch": 0.75, "learning_rate": 7.203793923529956e-07, "logits/chosen": -6.027858734130859, "logits/rejected": -6.058905601501465, "logps/chosen": -308.4850769042969, "logps/rejected": -236.74647521972656, "loss": 0.083, "rewards/accuracies": 1.0, "rewards/chosen": 2.6541175842285156, "rewards/margins": 8.686245918273926, "rewards/rejected": -6.03212833404541, "step": 1348 }, { "epoch": 0.75, "learning_rate": 7.19975642530688e-07, "logits/chosen": -6.064692497253418, "logits/rejected": -6.118101119995117, "logps/chosen": -212.9950714111328, "logps/rejected": -270.95758056640625, "loss": 0.1241, "rewards/accuracies": 1.0, "rewards/chosen": 3.503786563873291, "rewards/margins": 9.81353759765625, "rewards/rejected": -6.309751510620117, "step": 1349 }, { "epoch": 0.75, "learning_rate": 7.195717147669244e-07, "logits/chosen": -6.202317237854004, "logits/rejected": -6.161434173583984, "logps/chosen": -291.2039489746094, "logps/rejected": -190.4608917236328, "loss": 0.0543, "rewards/accuracies": 1.0, "rewards/chosen": 3.539247512817383, "rewards/margins": 10.423436164855957, "rewards/rejected": -6.884188652038574, "step": 1350 }, { "epoch": 0.75, "learning_rate": 7.191676093884478e-07, "logits/chosen": -6.147848606109619, "logits/rejected": -5.987669467926025, "logps/chosen": -260.390380859375, "logps/rejected": -174.11868286132812, "loss": 0.1445, "rewards/accuracies": 1.0, "rewards/chosen": 4.968995094299316, "rewards/margins": 10.167647361755371, "rewards/rejected": -5.198653221130371, "step": 1351 }, { "epoch": 0.75, "learning_rate": 7.187633267221449e-07, "logits/chosen": -6.025554656982422, "logits/rejected": -6.175818920135498, "logps/chosen": -280.4809875488281, "logps/rejected": -333.0943603515625, "loss": 0.0921, "rewards/accuracies": 1.0, "rewards/chosen": 3.6408333778381348, "rewards/margins": 9.058094024658203, "rewards/rejected": -5.41726016998291, "step": 1352 }, { "epoch": 0.75, "learning_rate": 7.183588670950455e-07, "logits/chosen": -6.070987701416016, "logits/rejected": -5.99968957901001, "logps/chosen": -268.5097351074219, "logps/rejected": -183.3932342529297, "loss": 0.0886, "rewards/accuracies": 1.0, "rewards/chosen": 3.092446804046631, "rewards/margins": 7.916715621948242, "rewards/rejected": -4.8242692947387695, "step": 1353 }, { "epoch": 0.75, "learning_rate": 7.179542308343232e-07, "logits/chosen": -6.188262462615967, "logits/rejected": -6.074052333831787, "logps/chosen": -338.7012939453125, "logps/rejected": -180.14102172851562, "loss": 0.1595, "rewards/accuracies": 0.9375, "rewards/chosen": 4.914703845977783, "rewards/margins": 9.60867691040039, "rewards/rejected": -4.693972587585449, "step": 1354 }, { "epoch": 0.75, "learning_rate": 7.175494182672939e-07, "logits/chosen": -6.037590980529785, "logits/rejected": -6.1067304611206055, "logps/chosen": -269.209228515625, "logps/rejected": -282.5416564941406, "loss": 0.1126, "rewards/accuracies": 0.9375, "rewards/chosen": 3.159806966781616, "rewards/margins": 7.767567157745361, "rewards/rejected": -4.607760429382324, "step": 1355 }, { "epoch": 0.75, "learning_rate": 7.171444297214162e-07, "logits/chosen": -6.1827392578125, "logits/rejected": -6.173981189727783, "logps/chosen": -339.9375915527344, "logps/rejected": -202.85379028320312, "loss": 0.1467, "rewards/accuracies": 0.875, "rewards/chosen": 4.798175811767578, "rewards/margins": 7.422446250915527, "rewards/rejected": -2.624269962310791, "step": 1356 }, { "epoch": 0.75, "learning_rate": 7.167392655242915e-07, "logits/chosen": -6.101621627807617, "logits/rejected": -6.073042392730713, "logps/chosen": -147.62896728515625, "logps/rejected": -75.28791809082031, "loss": 0.0584, "rewards/accuracies": 1.0, "rewards/chosen": 1.8413028717041016, "rewards/margins": 5.6495361328125, "rewards/rejected": -3.8082332611083984, "step": 1357 }, { "epoch": 0.75, "learning_rate": 7.163339260036623e-07, "logits/chosen": -6.073856830596924, "logits/rejected": -6.093197345733643, "logps/chosen": -270.16204833984375, "logps/rejected": -220.8931427001953, "loss": 0.1261, "rewards/accuracies": 1.0, "rewards/chosen": 4.11351203918457, "rewards/margins": 10.855968475341797, "rewards/rejected": -6.742456912994385, "step": 1358 }, { "epoch": 0.75, "learning_rate": 7.15928411487414e-07, "logits/chosen": -6.342689514160156, "logits/rejected": -6.053016185760498, "logps/chosen": -320.3722839355469, "logps/rejected": -191.14739990234375, "loss": 0.0784, "rewards/accuracies": 0.9375, "rewards/chosen": 4.693699836730957, "rewards/margins": 8.371573448181152, "rewards/rejected": -3.677873134613037, "step": 1359 }, { "epoch": 0.76, "learning_rate": 7.155227223035731e-07, "logits/chosen": -6.143305778503418, "logits/rejected": -6.035642147064209, "logps/chosen": -324.173828125, "logps/rejected": -237.31246948242188, "loss": 0.099, "rewards/accuracies": 1.0, "rewards/chosen": 3.3211746215820312, "rewards/margins": 8.702857971191406, "rewards/rejected": -5.381682872772217, "step": 1360 }, { "epoch": 0.76, "learning_rate": 7.151168587803074e-07, "logits/chosen": -6.127578258514404, "logits/rejected": -5.892776012420654, "logps/chosen": -190.60430908203125, "logps/rejected": -191.74298095703125, "loss": 0.0656, "rewards/accuracies": 0.875, "rewards/chosen": 0.6226956844329834, "rewards/margins": 9.48497200012207, "rewards/rejected": -8.862275123596191, "step": 1361 }, { "epoch": 0.76, "learning_rate": 7.147108212459256e-07, "logits/chosen": -6.0494489669799805, "logits/rejected": -5.9936933517456055, "logps/chosen": -260.217529296875, "logps/rejected": -123.3944091796875, "loss": 0.1087, "rewards/accuracies": 1.0, "rewards/chosen": 5.480798721313477, "rewards/margins": 8.133153915405273, "rewards/rejected": -2.6523547172546387, "step": 1362 }, { "epoch": 0.76, "learning_rate": 7.143046100288776e-07, "logits/chosen": -6.136012554168701, "logits/rejected": -6.061535358428955, "logps/chosen": -335.3572082519531, "logps/rejected": -230.79551696777344, "loss": 0.1012, "rewards/accuracies": 0.9375, "rewards/chosen": 4.15491247177124, "rewards/margins": 7.782021522521973, "rewards/rejected": -3.6271090507507324, "step": 1363 }, { "epoch": 0.76, "learning_rate": 7.13898225457753e-07, "logits/chosen": -6.039066791534424, "logits/rejected": -6.024312496185303, "logps/chosen": -219.21302795410156, "logps/rejected": -194.6749267578125, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": 3.6984598636627197, "rewards/margins": 10.483619689941406, "rewards/rejected": -6.785159587860107, "step": 1364 }, { "epoch": 0.76, "learning_rate": 7.134916678612825e-07, "logits/chosen": -6.091155052185059, "logits/rejected": -5.939361095428467, "logps/chosen": -224.564208984375, "logps/rejected": -93.30662536621094, "loss": 0.0886, "rewards/accuracies": 1.0, "rewards/chosen": 3.500605344772339, "rewards/margins": 7.757795333862305, "rewards/rejected": -4.257190227508545, "step": 1365 }, { "epoch": 0.76, "learning_rate": 7.130849375683361e-07, "logits/chosen": -6.027637958526611, "logits/rejected": -6.004227161407471, "logps/chosen": -290.65948486328125, "logps/rejected": -280.4742736816406, "loss": 0.1118, "rewards/accuracies": 1.0, "rewards/chosen": 2.218742609024048, "rewards/margins": 9.099035263061523, "rewards/rejected": -6.880293369293213, "step": 1366 }, { "epoch": 0.76, "learning_rate": 7.126780349079241e-07, "logits/chosen": -5.986345291137695, "logits/rejected": -6.007540702819824, "logps/chosen": -166.31619262695312, "logps/rejected": -220.24142456054688, "loss": 0.113, "rewards/accuracies": 0.9375, "rewards/chosen": 2.3777432441711426, "rewards/margins": 10.111302375793457, "rewards/rejected": -7.733559608459473, "step": 1367 }, { "epoch": 0.76, "learning_rate": 7.122709602091956e-07, "logits/chosen": -6.012782573699951, "logits/rejected": -6.013812065124512, "logps/chosen": -310.06365966796875, "logps/rejected": -213.94522094726562, "loss": 0.1341, "rewards/accuracies": 1.0, "rewards/chosen": 2.992772102355957, "rewards/margins": 7.451608657836914, "rewards/rejected": -4.458836555480957, "step": 1368 }, { "epoch": 0.76, "learning_rate": 7.118637138014395e-07, "logits/chosen": -6.071951866149902, "logits/rejected": -6.103793621063232, "logps/chosen": -328.6159362792969, "logps/rejected": -317.91339111328125, "loss": 0.1579, "rewards/accuracies": 1.0, "rewards/chosen": 4.029630184173584, "rewards/margins": 10.137107849121094, "rewards/rejected": -6.107477188110352, "step": 1369 }, { "epoch": 0.76, "learning_rate": 7.114562960140829e-07, "logits/chosen": -6.058844566345215, "logits/rejected": -5.97899055480957, "logps/chosen": -195.7847442626953, "logps/rejected": -263.77032470703125, "loss": 0.1165, "rewards/accuracies": 0.9375, "rewards/chosen": 1.658060073852539, "rewards/margins": 10.799760818481445, "rewards/rejected": -9.141700744628906, "step": 1370 }, { "epoch": 0.76, "learning_rate": 7.110487071766923e-07, "logits/chosen": -6.125411033630371, "logits/rejected": -6.047447681427002, "logps/chosen": -232.73199462890625, "logps/rejected": -258.67987060546875, "loss": 0.128, "rewards/accuracies": 1.0, "rewards/chosen": 3.4023754596710205, "rewards/margins": 9.969518661499023, "rewards/rejected": -6.567142486572266, "step": 1371 }, { "epoch": 0.76, "learning_rate": 7.106409476189717e-07, "logits/chosen": -6.070623397827148, "logits/rejected": -6.175521373748779, "logps/chosen": -265.2828369140625, "logps/rejected": -251.43894958496094, "loss": 0.363, "rewards/accuracies": 1.0, "rewards/chosen": 3.1157872676849365, "rewards/margins": 8.005983352661133, "rewards/rejected": -4.890196323394775, "step": 1372 }, { "epoch": 0.76, "learning_rate": 7.102330176707639e-07, "logits/chosen": -5.981566429138184, "logits/rejected": -6.111678600311279, "logps/chosen": -151.10443115234375, "logps/rejected": -189.6289520263672, "loss": 0.1003, "rewards/accuracies": 0.9375, "rewards/chosen": 1.9500110149383545, "rewards/margins": 6.465855121612549, "rewards/rejected": -4.515844821929932, "step": 1373 }, { "epoch": 0.76, "learning_rate": 7.098249176620495e-07, "logits/chosen": -6.077847480773926, "logits/rejected": -6.128878593444824, "logps/chosen": -270.5472412109375, "logps/rejected": -160.19386291503906, "loss": 0.0791, "rewards/accuracies": 1.0, "rewards/chosen": 5.154694557189941, "rewards/margins": 10.998868942260742, "rewards/rejected": -5.844175338745117, "step": 1374 }, { "epoch": 0.76, "learning_rate": 7.09416647922946e-07, "logits/chosen": -5.983907699584961, "logits/rejected": -5.979390621185303, "logps/chosen": -302.01434326171875, "logps/rejected": -155.2790069580078, "loss": 0.1117, "rewards/accuracies": 1.0, "rewards/chosen": 5.997051239013672, "rewards/margins": 8.487147331237793, "rewards/rejected": -2.4900965690612793, "step": 1375 }, { "epoch": 0.76, "learning_rate": 7.090082087837091e-07, "logits/chosen": -6.021796226501465, "logits/rejected": -6.069650650024414, "logps/chosen": -209.50497436523438, "logps/rejected": -172.66781616210938, "loss": 0.0688, "rewards/accuracies": 1.0, "rewards/chosen": 3.8919754028320312, "rewards/margins": 8.643095016479492, "rewards/rejected": -4.751119613647461, "step": 1376 }, { "epoch": 0.76, "learning_rate": 7.085996005747308e-07, "logits/chosen": -6.035234451293945, "logits/rejected": -6.031132698059082, "logps/chosen": -285.725830078125, "logps/rejected": -243.60324096679688, "loss": 0.0689, "rewards/accuracies": 1.0, "rewards/chosen": 4.526556968688965, "rewards/margins": 9.003499984741211, "rewards/rejected": -4.476942539215088, "step": 1377 }, { "epoch": 0.77, "learning_rate": 7.081908236265401e-07, "logits/chosen": -6.1443586349487305, "logits/rejected": -6.037471771240234, "logps/chosen": -289.89959716796875, "logps/rejected": -99.58477783203125, "loss": 0.0712, "rewards/accuracies": 0.9375, "rewards/chosen": 4.69611930847168, "rewards/margins": 8.758322715759277, "rewards/rejected": -4.0622029304504395, "step": 1378 }, { "epoch": 0.77, "learning_rate": 7.077818782698028e-07, "logits/chosen": -6.2739105224609375, "logits/rejected": -5.960155963897705, "logps/chosen": -278.5445861816406, "logps/rejected": -100.51150512695312, "loss": 0.2256, "rewards/accuracies": 1.0, "rewards/chosen": 3.0493323802948, "rewards/margins": 7.940944194793701, "rewards/rejected": -4.8916120529174805, "step": 1379 }, { "epoch": 0.77, "learning_rate": 7.073727648353205e-07, "logits/chosen": -6.125566005706787, "logits/rejected": -6.0791335105896, "logps/chosen": -278.09368896484375, "logps/rejected": -167.3604736328125, "loss": 0.1275, "rewards/accuracies": 1.0, "rewards/chosen": 4.106603622436523, "rewards/margins": 8.767158508300781, "rewards/rejected": -4.660555362701416, "step": 1380 }, { "epoch": 0.77, "learning_rate": 7.06963483654031e-07, "logits/chosen": -6.028334617614746, "logits/rejected": -5.977977752685547, "logps/chosen": -189.5796661376953, "logps/rejected": -160.3148956298828, "loss": 0.1301, "rewards/accuracies": 0.9375, "rewards/chosen": 1.0674694776535034, "rewards/margins": 6.597287178039551, "rewards/rejected": -5.529817581176758, "step": 1381 }, { "epoch": 0.77, "learning_rate": 7.065540350570077e-07, "logits/chosen": -6.087738990783691, "logits/rejected": -6.033205986022949, "logps/chosen": -286.96453857421875, "logps/rejected": -229.64317321777344, "loss": 0.2408, "rewards/accuracies": 0.8125, "rewards/chosen": 5.024192810058594, "rewards/margins": 7.5096940994262695, "rewards/rejected": -2.485501527786255, "step": 1382 }, { "epoch": 0.77, "learning_rate": 7.061444193754595e-07, "logits/chosen": -6.030710697174072, "logits/rejected": -5.9840312004089355, "logps/chosen": -255.11167907714844, "logps/rejected": -181.9347381591797, "loss": 0.0952, "rewards/accuracies": 1.0, "rewards/chosen": 2.765686511993408, "rewards/margins": 7.820613861083984, "rewards/rejected": -5.054927349090576, "step": 1383 }, { "epoch": 0.77, "learning_rate": 7.057346369407304e-07, "logits/chosen": -6.029199123382568, "logits/rejected": -6.16962194442749, "logps/chosen": -383.7835388183594, "logps/rejected": -344.0092468261719, "loss": 0.0763, "rewards/accuracies": 1.0, "rewards/chosen": 3.077986240386963, "rewards/margins": 8.02470588684082, "rewards/rejected": -4.946719169616699, "step": 1384 }, { "epoch": 0.77, "learning_rate": 7.053246880842991e-07, "logits/chosen": -6.178139686584473, "logits/rejected": -6.088767051696777, "logps/chosen": -504.03717041015625, "logps/rejected": -306.3965759277344, "loss": 0.1394, "rewards/accuracies": 0.875, "rewards/chosen": 6.406309604644775, "rewards/margins": 8.374695777893066, "rewards/rejected": -1.9683860540390015, "step": 1385 }, { "epoch": 0.77, "learning_rate": 7.049145731377794e-07, "logits/chosen": -5.98462438583374, "logits/rejected": -6.049999237060547, "logps/chosen": -235.57582092285156, "logps/rejected": -173.9741973876953, "loss": 0.0847, "rewards/accuracies": 0.9375, "rewards/chosen": 3.401129722595215, "rewards/margins": 6.435771465301514, "rewards/rejected": -3.034641981124878, "step": 1386 }, { "epoch": 0.77, "learning_rate": 7.045042924329189e-07, "logits/chosen": -5.973904609680176, "logits/rejected": -5.995218276977539, "logps/chosen": -424.8649597167969, "logps/rejected": -399.3345947265625, "loss": 0.1801, "rewards/accuracies": 0.9375, "rewards/chosen": 4.982008457183838, "rewards/margins": 9.051424980163574, "rewards/rejected": -4.069416522979736, "step": 1387 }, { "epoch": 0.77, "learning_rate": 7.040938463015997e-07, "logits/chosen": -6.1959452629089355, "logits/rejected": -6.12241792678833, "logps/chosen": -275.55889892578125, "logps/rejected": -193.1356201171875, "loss": 0.0734, "rewards/accuracies": 0.875, "rewards/chosen": 3.824281692504883, "rewards/margins": 8.382363319396973, "rewards/rejected": -4.55808162689209, "step": 1388 }, { "epoch": 0.77, "learning_rate": 7.036832350758377e-07, "logits/chosen": -6.040772438049316, "logits/rejected": -6.053961753845215, "logps/chosen": -251.57614135742188, "logps/rejected": -287.55816650390625, "loss": 0.0997, "rewards/accuracies": 0.9375, "rewards/chosen": 2.1320853233337402, "rewards/margins": 10.337739944458008, "rewards/rejected": -8.20565414428711, "step": 1389 }, { "epoch": 0.77, "learning_rate": 7.032724590877821e-07, "logits/chosen": -6.033738613128662, "logits/rejected": -6.187263011932373, "logps/chosen": -316.8111877441406, "logps/rejected": -322.9998474121094, "loss": 0.2025, "rewards/accuracies": 0.9375, "rewards/chosen": 4.997002124786377, "rewards/margins": 10.212957382202148, "rewards/rejected": -5.2159552574157715, "step": 1390 }, { "epoch": 0.77, "learning_rate": 7.028615186697153e-07, "logits/chosen": -6.144097328186035, "logits/rejected": -6.045859336853027, "logps/chosen": -257.6662292480469, "logps/rejected": -205.64915466308594, "loss": 0.1029, "rewards/accuracies": 1.0, "rewards/chosen": 4.441778659820557, "rewards/margins": 9.94100570678711, "rewards/rejected": -5.499227523803711, "step": 1391 }, { "epoch": 0.77, "learning_rate": 7.024504141540532e-07, "logits/chosen": -5.957516670227051, "logits/rejected": -6.067089557647705, "logps/chosen": -442.4953918457031, "logps/rejected": -253.59547424316406, "loss": 0.0841, "rewards/accuracies": 1.0, "rewards/chosen": 7.423131942749023, "rewards/margins": 10.118366241455078, "rewards/rejected": -2.6952340602874756, "step": 1392 }, { "epoch": 0.77, "learning_rate": 7.020391458733441e-07, "logits/chosen": -6.022036552429199, "logits/rejected": -6.037942886352539, "logps/chosen": -168.08607482910156, "logps/rejected": -163.5162353515625, "loss": 0.0702, "rewards/accuracies": 0.9375, "rewards/chosen": 2.2869677543640137, "rewards/margins": 6.173643112182617, "rewards/rejected": -3.8866751194000244, "step": 1393 }, { "epoch": 0.77, "learning_rate": 7.016277141602685e-07, "logits/chosen": -6.051993370056152, "logits/rejected": -6.243185043334961, "logps/chosen": -244.00619506835938, "logps/rejected": -249.9722900390625, "loss": 0.0991, "rewards/accuracies": 0.875, "rewards/chosen": 2.502530097961426, "rewards/margins": 8.83254623413086, "rewards/rejected": -6.330016136169434, "step": 1394 }, { "epoch": 0.77, "learning_rate": 7.012161193476398e-07, "logits/chosen": -6.051778793334961, "logits/rejected": -6.1035237312316895, "logps/chosen": -207.08815002441406, "logps/rejected": -230.94570922851562, "loss": 0.1255, "rewards/accuracies": 1.0, "rewards/chosen": 1.997448205947876, "rewards/margins": 8.58229923248291, "rewards/rejected": -6.584850788116455, "step": 1395 }, { "epoch": 0.78, "learning_rate": 7.008043617684028e-07, "logits/chosen": -6.063898086547852, "logits/rejected": -5.988157749176025, "logps/chosen": -375.78021240234375, "logps/rejected": -243.9337615966797, "loss": 0.1238, "rewards/accuracies": 0.8125, "rewards/chosen": 2.9254889488220215, "rewards/margins": 6.831324100494385, "rewards/rejected": -3.9058356285095215, "step": 1396 }, { "epoch": 0.78, "learning_rate": 7.003924417556343e-07, "logits/chosen": -6.061966419219971, "logits/rejected": -6.075843811035156, "logps/chosen": -211.1681671142578, "logps/rejected": -174.83114624023438, "loss": 0.059, "rewards/accuracies": 1.0, "rewards/chosen": 2.8906283378601074, "rewards/margins": 8.771059036254883, "rewards/rejected": -5.880430221557617, "step": 1397 }, { "epoch": 0.78, "learning_rate": 6.99980359642542e-07, "logits/chosen": -6.049656867980957, "logits/rejected": -6.11716890335083, "logps/chosen": -334.75897216796875, "logps/rejected": -233.36749267578125, "loss": 0.1272, "rewards/accuracies": 1.0, "rewards/chosen": 5.277340412139893, "rewards/margins": 10.433923721313477, "rewards/rejected": -5.156582832336426, "step": 1398 }, { "epoch": 0.78, "learning_rate": 6.995681157624651e-07, "logits/chosen": -6.013607025146484, "logits/rejected": -6.106748580932617, "logps/chosen": -204.61370849609375, "logps/rejected": -279.8631591796875, "loss": 0.0841, "rewards/accuracies": 0.9375, "rewards/chosen": 2.825883388519287, "rewards/margins": 11.475889205932617, "rewards/rejected": -8.650006294250488, "step": 1399 }, { "epoch": 0.78, "learning_rate": 6.991557104488738e-07, "logits/chosen": -6.072766304016113, "logits/rejected": -6.013133525848389, "logps/chosen": -338.1749267578125, "logps/rejected": -200.5375518798828, "loss": 0.1599, "rewards/accuracies": 1.0, "rewards/chosen": 5.780627250671387, "rewards/margins": 10.533499717712402, "rewards/rejected": -4.752871513366699, "step": 1400 }, { "epoch": 0.78, "learning_rate": 6.987431440353686e-07, "logits/chosen": -6.017533779144287, "logits/rejected": -6.075963497161865, "logps/chosen": -212.59686279296875, "logps/rejected": -356.5498046875, "loss": 0.0996, "rewards/accuracies": 0.9375, "rewards/chosen": 1.9465763568878174, "rewards/margins": 7.251452445983887, "rewards/rejected": -5.304876327514648, "step": 1401 }, { "epoch": 0.78, "learning_rate": 6.983304168556802e-07, "logits/chosen": -6.1310858726501465, "logits/rejected": -6.066628456115723, "logps/chosen": -393.19415283203125, "logps/rejected": -244.12884521484375, "loss": 0.0716, "rewards/accuracies": 0.9375, "rewards/chosen": 5.553382873535156, "rewards/margins": 9.498783111572266, "rewards/rejected": -3.9454004764556885, "step": 1402 }, { "epoch": 0.78, "learning_rate": 6.979175292436699e-07, "logits/chosen": -5.945898532867432, "logits/rejected": -5.959759712219238, "logps/chosen": -506.98529052734375, "logps/rejected": -276.83343505859375, "loss": 0.1411, "rewards/accuracies": 0.875, "rewards/chosen": 3.2649292945861816, "rewards/margins": 6.844979763031006, "rewards/rejected": -3.580049991607666, "step": 1403 }, { "epoch": 0.78, "learning_rate": 6.975044815333281e-07, "logits/chosen": -6.092831134796143, "logits/rejected": -6.020350456237793, "logps/chosen": -223.03616333007812, "logps/rejected": -309.6962890625, "loss": 0.319, "rewards/accuracies": 0.9375, "rewards/chosen": 2.7989377975463867, "rewards/margins": 8.589644432067871, "rewards/rejected": -5.790707111358643, "step": 1404 }, { "epoch": 0.78, "learning_rate": 6.970912740587751e-07, "logits/chosen": -6.087954998016357, "logits/rejected": -6.038308620452881, "logps/chosen": -230.54843139648438, "logps/rejected": -175.5337371826172, "loss": 0.1429, "rewards/accuracies": 1.0, "rewards/chosen": 2.6565628051757812, "rewards/margins": 7.969485282897949, "rewards/rejected": -5.312921524047852, "step": 1405 }, { "epoch": 0.78, "learning_rate": 6.966779071542604e-07, "logits/chosen": -6.031346797943115, "logits/rejected": -6.063976287841797, "logps/chosen": -247.11044311523438, "logps/rejected": -352.00335693359375, "loss": 0.1051, "rewards/accuracies": 1.0, "rewards/chosen": 2.420100212097168, "rewards/margins": 10.821763038635254, "rewards/rejected": -8.401662826538086, "step": 1406 }, { "epoch": 0.78, "learning_rate": 6.962643811541626e-07, "logits/chosen": -6.097916126251221, "logits/rejected": -6.11122989654541, "logps/chosen": -309.63116455078125, "logps/rejected": -184.1705322265625, "loss": 0.14, "rewards/accuracies": 1.0, "rewards/chosen": 5.487165451049805, "rewards/margins": 9.325078010559082, "rewards/rejected": -3.8379123210906982, "step": 1407 }, { "epoch": 0.78, "learning_rate": 6.958506963929884e-07, "logits/chosen": -5.967081069946289, "logits/rejected": -5.962502479553223, "logps/chosen": -184.56394958496094, "logps/rejected": -211.9400634765625, "loss": 0.193, "rewards/accuracies": 1.0, "rewards/chosen": 2.8720860481262207, "rewards/margins": 8.054615020751953, "rewards/rejected": -5.182528972625732, "step": 1408 }, { "epoch": 0.78, "learning_rate": 6.954368532053738e-07, "logits/chosen": -6.078205108642578, "logits/rejected": -6.0064191818237305, "logps/chosen": -244.19454956054688, "logps/rejected": -145.0478973388672, "loss": 0.1792, "rewards/accuracies": 1.0, "rewards/chosen": 6.47467565536499, "rewards/margins": 9.28497314453125, "rewards/rejected": -2.8102989196777344, "step": 1409 }, { "epoch": 0.78, "learning_rate": 6.950228519260822e-07, "logits/chosen": -5.989204406738281, "logits/rejected": -6.013781547546387, "logps/chosen": -247.00938415527344, "logps/rejected": -190.20550537109375, "loss": 0.1637, "rewards/accuracies": 1.0, "rewards/chosen": 2.8201828002929688, "rewards/margins": 7.821065902709961, "rewards/rejected": -5.000883102416992, "step": 1410 }, { "epoch": 0.78, "learning_rate": 6.946086928900053e-07, "logits/chosen": -6.0209269523620605, "logits/rejected": -6.0377349853515625, "logps/chosen": -277.72344970703125, "logps/rejected": -321.469970703125, "loss": 0.0742, "rewards/accuracies": 0.9375, "rewards/chosen": 2.548733949661255, "rewards/margins": 7.521518707275391, "rewards/rejected": -4.972784519195557, "step": 1411 }, { "epoch": 0.78, "learning_rate": 6.941943764321622e-07, "logits/chosen": -6.058125972747803, "logits/rejected": -6.022767543792725, "logps/chosen": -304.62799072265625, "logps/rejected": -457.22412109375, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": 3.2653563022613525, "rewards/margins": 7.910794258117676, "rewards/rejected": -4.645437717437744, "step": 1412 }, { "epoch": 0.78, "learning_rate": 6.937799028876996e-07, "logits/chosen": -6.063399314880371, "logits/rejected": -6.10997200012207, "logps/chosen": -235.42138671875, "logps/rejected": -239.85311889648438, "loss": 0.0619, "rewards/accuracies": 1.0, "rewards/chosen": 3.0917506217956543, "rewards/margins": 7.379978179931641, "rewards/rejected": -4.288227081298828, "step": 1413 }, { "epoch": 0.79, "learning_rate": 6.93365272591891e-07, "logits/chosen": -6.003730773925781, "logits/rejected": -6.0014495849609375, "logps/chosen": -362.4808349609375, "logps/rejected": -168.1205291748047, "loss": 0.1466, "rewards/accuracies": 0.9375, "rewards/chosen": 2.7496886253356934, "rewards/margins": 6.379487991333008, "rewards/rejected": -3.6297996044158936, "step": 1414 }, { "epoch": 0.79, "learning_rate": 6.929504858801366e-07, "logits/chosen": -6.004327297210693, "logits/rejected": -6.124914169311523, "logps/chosen": -245.3143310546875, "logps/rejected": -293.0394592285156, "loss": 0.14, "rewards/accuracies": 0.875, "rewards/chosen": 6.888875961303711, "rewards/margins": 10.227558135986328, "rewards/rejected": -3.3386831283569336, "step": 1415 }, { "epoch": 0.79, "learning_rate": 6.925355430879636e-07, "logits/chosen": -6.061826229095459, "logits/rejected": -6.096845626831055, "logps/chosen": -251.9625701904297, "logps/rejected": -151.36578369140625, "loss": 0.1894, "rewards/accuracies": 1.0, "rewards/chosen": 4.37583065032959, "rewards/margins": 7.907534599304199, "rewards/rejected": -3.5317041873931885, "step": 1416 }, { "epoch": 0.79, "learning_rate": 6.921204445510253e-07, "logits/chosen": -6.101950645446777, "logits/rejected": -6.142802715301514, "logps/chosen": -268.0932312011719, "logps/rejected": -223.0537872314453, "loss": 0.2277, "rewards/accuracies": 0.9375, "rewards/chosen": 3.7643959522247314, "rewards/margins": 8.277070999145508, "rewards/rejected": -4.512675762176514, "step": 1417 }, { "epoch": 0.79, "learning_rate": 6.917051906051005e-07, "logits/chosen": -5.981034278869629, "logits/rejected": -6.061359405517578, "logps/chosen": -321.8081970214844, "logps/rejected": -248.7979278564453, "loss": 0.1384, "rewards/accuracies": 0.9375, "rewards/chosen": 5.404831409454346, "rewards/margins": 8.307886123657227, "rewards/rejected": -2.903054714202881, "step": 1418 }, { "epoch": 0.79, "learning_rate": 6.912897815860942e-07, "logits/chosen": -5.949730396270752, "logits/rejected": -6.068449974060059, "logps/chosen": -227.39773559570312, "logps/rejected": -195.53125, "loss": 0.0857, "rewards/accuracies": 0.875, "rewards/chosen": 3.6816599369049072, "rewards/margins": 8.130945205688477, "rewards/rejected": -4.449285507202148, "step": 1419 }, { "epoch": 0.79, "learning_rate": 6.908742178300369e-07, "logits/chosen": -6.080801963806152, "logits/rejected": -6.033605098724365, "logps/chosen": -282.88140869140625, "logps/rejected": -123.51484680175781, "loss": 0.104, "rewards/accuracies": 1.0, "rewards/chosen": 4.745979309082031, "rewards/margins": 8.636303901672363, "rewards/rejected": -3.890324592590332, "step": 1420 }, { "epoch": 0.79, "learning_rate": 6.904584996730837e-07, "logits/chosen": -5.9843854904174805, "logits/rejected": -5.98454475402832, "logps/chosen": -274.2420654296875, "logps/rejected": -182.39999389648438, "loss": 0.2261, "rewards/accuracies": 0.9375, "rewards/chosen": 7.142513275146484, "rewards/margins": 9.65526008605957, "rewards/rejected": -2.5127463340759277, "step": 1421 }, { "epoch": 0.79, "learning_rate": 6.900426274515155e-07, "logits/chosen": -6.023632049560547, "logits/rejected": -5.963802814483643, "logps/chosen": -259.6689453125, "logps/rejected": -244.3707733154297, "loss": 0.0943, "rewards/accuracies": 0.9375, "rewards/chosen": 3.8155786991119385, "rewards/margins": 8.894298553466797, "rewards/rejected": -5.078719615936279, "step": 1422 }, { "epoch": 0.79, "learning_rate": 6.896266015017369e-07, "logits/chosen": -5.993277549743652, "logits/rejected": -6.034170627593994, "logps/chosen": -172.14501953125, "logps/rejected": -207.51014709472656, "loss": 0.0525, "rewards/accuracies": 1.0, "rewards/chosen": 2.292076587677002, "rewards/margins": 10.636287689208984, "rewards/rejected": -8.344210624694824, "step": 1423 }, { "epoch": 0.79, "learning_rate": 6.892104221602777e-07, "logits/chosen": -6.028767108917236, "logits/rejected": -5.996218681335449, "logps/chosen": -200.1058807373047, "logps/rejected": -184.36334228515625, "loss": 0.0766, "rewards/accuracies": 0.9375, "rewards/chosen": 2.3673644065856934, "rewards/margins": 6.937259674072266, "rewards/rejected": -4.569895267486572, "step": 1424 }, { "epoch": 0.79, "learning_rate": 6.887940897637907e-07, "logits/chosen": -6.02112340927124, "logits/rejected": -6.061664581298828, "logps/chosen": -259.9549560546875, "logps/rejected": -212.77781677246094, "loss": 0.0725, "rewards/accuracies": 1.0, "rewards/chosen": 4.727875709533691, "rewards/margins": 8.519977569580078, "rewards/rejected": -3.792102336883545, "step": 1425 }, { "epoch": 0.79, "learning_rate": 6.883776046490537e-07, "logits/chosen": -6.031931400299072, "logits/rejected": -6.016203880310059, "logps/chosen": -148.73397827148438, "logps/rejected": -175.6664581298828, "loss": 0.1202, "rewards/accuracies": 0.9375, "rewards/chosen": 2.086768627166748, "rewards/margins": 7.321413993835449, "rewards/rejected": -5.234644889831543, "step": 1426 }, { "epoch": 0.79, "learning_rate": 6.879609671529674e-07, "logits/chosen": -6.005404949188232, "logits/rejected": -6.0299530029296875, "logps/chosen": -185.9849853515625, "logps/rejected": -176.24012756347656, "loss": 0.1062, "rewards/accuracies": 0.9375, "rewards/chosen": 2.3543701171875, "rewards/margins": 7.752780914306641, "rewards/rejected": -5.398411273956299, "step": 1427 }, { "epoch": 0.79, "learning_rate": 6.875441776125556e-07, "logits/chosen": -6.01747989654541, "logits/rejected": -6.084235668182373, "logps/chosen": -239.0260009765625, "logps/rejected": -237.40078735351562, "loss": 0.0536, "rewards/accuracies": 1.0, "rewards/chosen": 5.651514053344727, "rewards/margins": 9.587220191955566, "rewards/rejected": -3.935706615447998, "step": 1428 }, { "epoch": 0.79, "learning_rate": 6.871272363649656e-07, "logits/chosen": -6.053633689880371, "logits/rejected": -6.111542701721191, "logps/chosen": -208.09933471679688, "logps/rejected": -362.01177978515625, "loss": 0.0689, "rewards/accuracies": 1.0, "rewards/chosen": 1.9408422708511353, "rewards/margins": 8.82948112487793, "rewards/rejected": -6.888639450073242, "step": 1429 }, { "epoch": 0.79, "learning_rate": 6.867101437474672e-07, "logits/chosen": -6.0620927810668945, "logits/rejected": -6.064268112182617, "logps/chosen": -213.6414337158203, "logps/rejected": -239.34356689453125, "loss": 0.1386, "rewards/accuracies": 1.0, "rewards/chosen": 4.208807468414307, "rewards/margins": 11.655021667480469, "rewards/rejected": -7.4462151527404785, "step": 1430 }, { "epoch": 0.79, "learning_rate": 6.862929000974522e-07, "logits/chosen": -6.12476921081543, "logits/rejected": -5.963036060333252, "logps/chosen": -302.5382995605469, "logps/rejected": -141.02398681640625, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": 3.41206955909729, "rewards/margins": 8.586894989013672, "rewards/rejected": -5.174825668334961, "step": 1431 }, { "epoch": 0.8, "learning_rate": 6.858755057524354e-07, "logits/chosen": -6.0347371101379395, "logits/rejected": -6.083566665649414, "logps/chosen": -230.4484100341797, "logps/rejected": -165.91595458984375, "loss": 0.0964, "rewards/accuracies": 0.9375, "rewards/chosen": 3.783538341522217, "rewards/margins": 9.657254219055176, "rewards/rejected": -5.873715400695801, "step": 1432 }, { "epoch": 0.8, "learning_rate": 6.854579610500529e-07, "logits/chosen": -6.009468078613281, "logits/rejected": -5.99528169631958, "logps/chosen": -184.81146240234375, "logps/rejected": -173.541259765625, "loss": 0.1142, "rewards/accuracies": 0.9375, "rewards/chosen": 1.5185363292694092, "rewards/margins": 9.264116287231445, "rewards/rejected": -7.745580673217773, "step": 1433 }, { "epoch": 0.8, "learning_rate": 6.850402663280626e-07, "logits/chosen": -6.083688735961914, "logits/rejected": -6.064659595489502, "logps/chosen": -305.0957336425781, "logps/rejected": -191.49119567871094, "loss": 0.0562, "rewards/accuracies": 0.9375, "rewards/chosen": 4.190209865570068, "rewards/margins": 6.907886505126953, "rewards/rejected": -2.7176766395568848, "step": 1434 }, { "epoch": 0.8, "learning_rate": 6.84622421924344e-07, "logits/chosen": -6.074461936950684, "logits/rejected": -6.197417736053467, "logps/chosen": -187.24879455566406, "logps/rejected": -229.85159301757812, "loss": 0.0986, "rewards/accuracies": 1.0, "rewards/chosen": 2.394407272338867, "rewards/margins": 9.103700637817383, "rewards/rejected": -6.709293365478516, "step": 1435 }, { "epoch": 0.8, "learning_rate": 6.842044281768969e-07, "logits/chosen": -6.038440704345703, "logits/rejected": -6.060318946838379, "logps/chosen": -276.129150390625, "logps/rejected": -324.8747863769531, "loss": 0.071, "rewards/accuracies": 1.0, "rewards/chosen": 5.401124000549316, "rewards/margins": 9.890104293823242, "rewards/rejected": -4.488980770111084, "step": 1436 }, { "epoch": 0.8, "learning_rate": 6.837862854238433e-07, "logits/chosen": -6.078960418701172, "logits/rejected": -5.949121475219727, "logps/chosen": -283.1185607910156, "logps/rejected": -179.88327026367188, "loss": 0.162, "rewards/accuracies": 0.875, "rewards/chosen": 3.9618406295776367, "rewards/margins": 7.443095684051514, "rewards/rejected": -3.481255054473877, "step": 1437 }, { "epoch": 0.8, "learning_rate": 6.833679940034239e-07, "logits/chosen": -6.076961040496826, "logits/rejected": -6.0995192527771, "logps/chosen": -348.430908203125, "logps/rejected": -198.14039611816406, "loss": 0.0995, "rewards/accuracies": 1.0, "rewards/chosen": 6.940587043762207, "rewards/margins": 10.83799934387207, "rewards/rejected": -3.8974132537841797, "step": 1438 }, { "epoch": 0.8, "learning_rate": 6.829495542540013e-07, "logits/chosen": -6.079804420471191, "logits/rejected": -6.038825035095215, "logps/chosen": -362.19976806640625, "logps/rejected": -342.4893798828125, "loss": 0.2112, "rewards/accuracies": 0.875, "rewards/chosen": 3.5997753143310547, "rewards/margins": 7.802933692932129, "rewards/rejected": -4.203158855438232, "step": 1439 }, { "epoch": 0.8, "learning_rate": 6.82530966514057e-07, "logits/chosen": -5.990476131439209, "logits/rejected": -6.079122543334961, "logps/chosen": -196.23193359375, "logps/rejected": -204.21011352539062, "loss": 0.1084, "rewards/accuracies": 1.0, "rewards/chosen": 4.5843377113342285, "rewards/margins": 10.367134094238281, "rewards/rejected": -5.782797336578369, "step": 1440 }, { "epoch": 0.8, "learning_rate": 6.821122311221931e-07, "logits/chosen": -6.083323955535889, "logits/rejected": -6.174132347106934, "logps/chosen": -274.97393798828125, "logps/rejected": -181.14389038085938, "loss": 0.0718, "rewards/accuracies": 1.0, "rewards/chosen": 4.520473957061768, "rewards/margins": 7.583676815032959, "rewards/rejected": -3.0632033348083496, "step": 1441 }, { "epoch": 0.8, "learning_rate": 6.816933484171302e-07, "logits/chosen": -6.084748268127441, "logits/rejected": -6.015445232391357, "logps/chosen": -235.4429168701172, "logps/rejected": -191.06370544433594, "loss": 0.0991, "rewards/accuracies": 1.0, "rewards/chosen": 3.349029541015625, "rewards/margins": 9.997099876403809, "rewards/rejected": -6.648070335388184, "step": 1442 }, { "epoch": 0.8, "learning_rate": 6.81274318737709e-07, "logits/chosen": -6.1235551834106445, "logits/rejected": -6.0470685958862305, "logps/chosen": -320.794921875, "logps/rejected": -258.6234130859375, "loss": 0.0503, "rewards/accuracies": 1.0, "rewards/chosen": 4.239457130432129, "rewards/margins": 11.68571662902832, "rewards/rejected": -7.44625997543335, "step": 1443 }, { "epoch": 0.8, "learning_rate": 6.808551424228883e-07, "logits/chosen": -6.095592975616455, "logits/rejected": -6.077411651611328, "logps/chosen": -238.30197143554688, "logps/rejected": -202.46527099609375, "loss": 0.1198, "rewards/accuracies": 1.0, "rewards/chosen": 4.7935380935668945, "rewards/margins": 8.691986083984375, "rewards/rejected": -3.8984484672546387, "step": 1444 }, { "epoch": 0.8, "learning_rate": 6.804358198117459e-07, "logits/chosen": -6.065978050231934, "logits/rejected": -6.012004852294922, "logps/chosen": -180.2494354248047, "logps/rejected": -167.6892852783203, "loss": 0.0913, "rewards/accuracies": 1.0, "rewards/chosen": 4.128413200378418, "rewards/margins": 7.8849077224731445, "rewards/rejected": -3.7564949989318848, "step": 1445 }, { "epoch": 0.8, "learning_rate": 6.800163512434779e-07, "logits/chosen": -6.093088150024414, "logits/rejected": -6.059597492218018, "logps/chosen": -204.48187255859375, "logps/rejected": -165.95452880859375, "loss": 0.1583, "rewards/accuracies": 1.0, "rewards/chosen": 4.189722061157227, "rewards/margins": 8.290176391601562, "rewards/rejected": -4.100454807281494, "step": 1446 }, { "epoch": 0.8, "learning_rate": 6.795967370573985e-07, "logits/chosen": -6.100433349609375, "logits/rejected": -5.9732747077941895, "logps/chosen": -365.7115478515625, "logps/rejected": -354.37298583984375, "loss": 0.1748, "rewards/accuracies": 0.9375, "rewards/chosen": 3.5878381729125977, "rewards/margins": 8.686935424804688, "rewards/rejected": -5.09909725189209, "step": 1447 }, { "epoch": 0.8, "learning_rate": 6.791769775929395e-07, "logits/chosen": -6.014057159423828, "logits/rejected": -5.991609573364258, "logps/chosen": -240.34109497070312, "logps/rejected": -201.99700927734375, "loss": 0.1111, "rewards/accuracies": 1.0, "rewards/chosen": 4.1615190505981445, "rewards/margins": 8.778528213500977, "rewards/rejected": -4.617008686065674, "step": 1448 }, { "epoch": 0.8, "learning_rate": 6.787570731896505e-07, "logits/chosen": -5.989819049835205, "logits/rejected": -6.157070636749268, "logps/chosen": -258.9277648925781, "logps/rejected": -445.72076416015625, "loss": 0.0869, "rewards/accuracies": 0.9375, "rewards/chosen": 3.9556996822357178, "rewards/margins": 9.462995529174805, "rewards/rejected": -5.507295608520508, "step": 1449 }, { "epoch": 0.8, "learning_rate": 6.783370241871982e-07, "logits/chosen": -5.98892879486084, "logits/rejected": -6.0236358642578125, "logps/chosen": -267.2408142089844, "logps/rejected": -251.838134765625, "loss": 0.1348, "rewards/accuracies": 1.0, "rewards/chosen": 3.375676155090332, "rewards/margins": 8.497501373291016, "rewards/rejected": -5.121824741363525, "step": 1450 }, { "epoch": 0.81, "learning_rate": 6.779168309253662e-07, "logits/chosen": -6.208059787750244, "logits/rejected": -6.102479934692383, "logps/chosen": -321.77850341796875, "logps/rejected": -178.5479278564453, "loss": 0.0804, "rewards/accuracies": 1.0, "rewards/chosen": 6.135748863220215, "rewards/margins": 9.33074951171875, "rewards/rejected": -3.195000171661377, "step": 1451 }, { "epoch": 0.81, "learning_rate": 6.774964937440549e-07, "logits/chosen": -5.975876808166504, "logits/rejected": -5.948160171508789, "logps/chosen": -282.64959716796875, "logps/rejected": -165.46792602539062, "loss": 0.0838, "rewards/accuracies": 1.0, "rewards/chosen": 3.4763808250427246, "rewards/margins": 6.728830337524414, "rewards/rejected": -3.2524495124816895, "step": 1452 }, { "epoch": 0.81, "learning_rate": 6.77076012983281e-07, "logits/chosen": -6.115548610687256, "logits/rejected": -6.085720062255859, "logps/chosen": -306.37847900390625, "logps/rejected": -256.3692932128906, "loss": 0.0258, "rewards/accuracies": 1.0, "rewards/chosen": 5.126550674438477, "rewards/margins": 12.62614917755127, "rewards/rejected": -7.499599456787109, "step": 1453 }, { "epoch": 0.81, "learning_rate": 6.766553889831776e-07, "logits/chosen": -5.981795310974121, "logits/rejected": -6.113895893096924, "logps/chosen": -253.09500122070312, "logps/rejected": -274.5882873535156, "loss": 0.3002, "rewards/accuracies": 1.0, "rewards/chosen": 3.6040937900543213, "rewards/margins": 11.687889099121094, "rewards/rejected": -8.083795547485352, "step": 1454 }, { "epoch": 0.81, "learning_rate": 6.762346220839932e-07, "logits/chosen": -6.018502235412598, "logits/rejected": -6.14133358001709, "logps/chosen": -319.9573059082031, "logps/rejected": -236.5071563720703, "loss": 0.1685, "rewards/accuracies": 1.0, "rewards/chosen": 4.065950393676758, "rewards/margins": 8.568302154541016, "rewards/rejected": -4.502351760864258, "step": 1455 }, { "epoch": 0.81, "learning_rate": 6.758137126260926e-07, "logits/chosen": -6.09340238571167, "logits/rejected": -6.079553604125977, "logps/chosen": -359.01318359375, "logps/rejected": -237.42196655273438, "loss": 0.1373, "rewards/accuracies": 0.9375, "rewards/chosen": 4.4088969230651855, "rewards/margins": 9.601430892944336, "rewards/rejected": -5.192534446716309, "step": 1456 }, { "epoch": 0.81, "learning_rate": 6.753926609499552e-07, "logits/chosen": -6.139950752258301, "logits/rejected": -6.069728851318359, "logps/chosen": -361.2205810546875, "logps/rejected": -383.296875, "loss": 0.0616, "rewards/accuracies": 0.9375, "rewards/chosen": 3.935922622680664, "rewards/margins": 10.00815486907959, "rewards/rejected": -6.072231769561768, "step": 1457 }, { "epoch": 0.81, "learning_rate": 6.749714673961759e-07, "logits/chosen": -5.968595027923584, "logits/rejected": -6.037608623504639, "logps/chosen": -166.68409729003906, "logps/rejected": -186.7183837890625, "loss": 0.1849, "rewards/accuracies": 1.0, "rewards/chosen": 1.9225680828094482, "rewards/margins": 8.475505828857422, "rewards/rejected": -6.5529375076293945, "step": 1458 }, { "epoch": 0.81, "learning_rate": 6.745501323054639e-07, "logits/chosen": -6.070195198059082, "logits/rejected": -6.078049182891846, "logps/chosen": -220.77325439453125, "logps/rejected": -170.77783203125, "loss": 0.1038, "rewards/accuracies": 1.0, "rewards/chosen": 5.940371513366699, "rewards/margins": 9.914987564086914, "rewards/rejected": -3.9746158123016357, "step": 1459 }, { "epoch": 0.81, "learning_rate": 6.741286560186435e-07, "logits/chosen": -6.1232123374938965, "logits/rejected": -6.057173252105713, "logps/chosen": -311.5325012207031, "logps/rejected": -166.7673797607422, "loss": 0.1019, "rewards/accuracies": 0.875, "rewards/chosen": 4.373503684997559, "rewards/margins": 8.7137451171875, "rewards/rejected": -4.340240955352783, "step": 1460 }, { "epoch": 0.81, "learning_rate": 6.73707038876653e-07, "logits/chosen": -6.074114799499512, "logits/rejected": -6.109468460083008, "logps/chosen": -254.2091522216797, "logps/rejected": -196.37802124023438, "loss": 0.1136, "rewards/accuracies": 1.0, "rewards/chosen": 4.140955924987793, "rewards/margins": 7.9759039878845215, "rewards/rejected": -3.8349483013153076, "step": 1461 }, { "epoch": 0.81, "learning_rate": 6.732852812205442e-07, "logits/chosen": -6.114867210388184, "logits/rejected": -6.041839122772217, "logps/chosen": -274.8901672363281, "logps/rejected": -162.8756866455078, "loss": 0.1362, "rewards/accuracies": 1.0, "rewards/chosen": 5.0407795906066895, "rewards/margins": 8.565729141235352, "rewards/rejected": -3.524949789047241, "step": 1462 }, { "epoch": 0.81, "learning_rate": 6.728633833914834e-07, "logits/chosen": -6.042699813842773, "logits/rejected": -6.0483903884887695, "logps/chosen": -295.65399169921875, "logps/rejected": -235.32254028320312, "loss": 0.1843, "rewards/accuracies": 1.0, "rewards/chosen": 4.470395565032959, "rewards/margins": 12.02309799194336, "rewards/rejected": -7.552703380584717, "step": 1463 }, { "epoch": 0.81, "learning_rate": 6.724413457307496e-07, "logits/chosen": -6.020845413208008, "logits/rejected": -6.1215996742248535, "logps/chosen": -124.5847396850586, "logps/rejected": -215.82608032226562, "loss": 0.3017, "rewards/accuracies": 0.9375, "rewards/chosen": 1.1173217296600342, "rewards/margins": 7.984800338745117, "rewards/rejected": -6.867478370666504, "step": 1464 }, { "epoch": 0.81, "learning_rate": 6.720191685797349e-07, "logits/chosen": -6.0630784034729, "logits/rejected": -6.015294551849365, "logps/chosen": -156.95323181152344, "logps/rejected": -159.0855255126953, "loss": 0.115, "rewards/accuracies": 1.0, "rewards/chosen": 2.1392629146575928, "rewards/margins": 7.701141357421875, "rewards/rejected": -5.561878204345703, "step": 1465 }, { "epoch": 0.81, "learning_rate": 6.715968522799448e-07, "logits/chosen": -6.052424430847168, "logits/rejected": -6.0743913650512695, "logps/chosen": -236.26776123046875, "logps/rejected": -186.9967041015625, "loss": 0.1282, "rewards/accuracies": 1.0, "rewards/chosen": 4.501534461975098, "rewards/margins": 8.899417877197266, "rewards/rejected": -4.397883415222168, "step": 1466 }, { "epoch": 0.81, "learning_rate": 6.711743971729966e-07, "logits/chosen": -6.0402607917785645, "logits/rejected": -6.040450096130371, "logps/chosen": -266.1409912109375, "logps/rejected": -205.03463745117188, "loss": 0.065, "rewards/accuracies": 1.0, "rewards/chosen": 4.260885238647461, "rewards/margins": 9.35067081451416, "rewards/rejected": -5.089785575866699, "step": 1467 }, { "epoch": 0.81, "learning_rate": 6.707518036006208e-07, "logits/chosen": -6.187680721282959, "logits/rejected": -6.244836807250977, "logps/chosen": -269.59564208984375, "logps/rejected": -191.52980041503906, "loss": 0.1339, "rewards/accuracies": 0.9375, "rewards/chosen": 4.133073806762695, "rewards/margins": 8.350425720214844, "rewards/rejected": -4.217351913452148, "step": 1468 }, { "epoch": 0.82, "learning_rate": 6.703290719046591e-07, "logits/chosen": -6.087540626525879, "logits/rejected": -6.1387481689453125, "logps/chosen": -482.7379455566406, "logps/rejected": -256.47576904296875, "loss": 0.0885, "rewards/accuracies": 1.0, "rewards/chosen": 5.199287414550781, "rewards/margins": 10.28647518157959, "rewards/rejected": -5.087187767028809, "step": 1469 }, { "epoch": 0.82, "learning_rate": 6.699062024270653e-07, "logits/chosen": -6.133069038391113, "logits/rejected": -5.996346473693848, "logps/chosen": -196.8043670654297, "logps/rejected": -193.03961181640625, "loss": 0.1736, "rewards/accuracies": 0.875, "rewards/chosen": 3.4519073963165283, "rewards/margins": 9.111331939697266, "rewards/rejected": -5.659425258636475, "step": 1470 }, { "epoch": 0.82, "learning_rate": 6.694831955099048e-07, "logits/chosen": -6.0321125984191895, "logits/rejected": -6.156271457672119, "logps/chosen": -144.6280975341797, "logps/rejected": -192.10687255859375, "loss": 0.1206, "rewards/accuracies": 0.9375, "rewards/chosen": 1.7157232761383057, "rewards/margins": 8.255969047546387, "rewards/rejected": -6.540245532989502, "step": 1471 }, { "epoch": 0.82, "learning_rate": 6.690600514953535e-07, "logits/chosen": -6.088932514190674, "logits/rejected": -6.074586868286133, "logps/chosen": -198.60821533203125, "logps/rejected": -149.71969604492188, "loss": 0.136, "rewards/accuracies": 0.875, "rewards/chosen": 2.9986696243286133, "rewards/margins": 5.128293514251709, "rewards/rejected": -2.129624128341675, "step": 1472 }, { "epoch": 0.82, "learning_rate": 6.686367707256991e-07, "logits/chosen": -6.049459934234619, "logits/rejected": -6.035828590393066, "logps/chosen": -375.839599609375, "logps/rejected": -253.07582092285156, "loss": 0.0529, "rewards/accuracies": 1.0, "rewards/chosen": 3.4270949363708496, "rewards/margins": 10.41610336303711, "rewards/rejected": -6.989007949829102, "step": 1473 }, { "epoch": 0.82, "learning_rate": 6.682133535433393e-07, "logits/chosen": -6.050764083862305, "logits/rejected": -6.036583423614502, "logps/chosen": -239.5872802734375, "logps/rejected": -164.75473022460938, "loss": 0.1804, "rewards/accuracies": 0.9375, "rewards/chosen": 3.1709675788879395, "rewards/margins": 7.245018005371094, "rewards/rejected": -4.074050426483154, "step": 1474 }, { "epoch": 0.82, "learning_rate": 6.677898002907823e-07, "logits/chosen": -6.048431396484375, "logits/rejected": -5.971869945526123, "logps/chosen": -370.1640625, "logps/rejected": -442.60546875, "loss": 0.1261, "rewards/accuracies": 0.875, "rewards/chosen": 2.3146142959594727, "rewards/margins": 7.257044792175293, "rewards/rejected": -4.94243049621582, "step": 1475 }, { "epoch": 0.82, "learning_rate": 6.673661113106465e-07, "logits/chosen": -6.079365253448486, "logits/rejected": -6.054747581481934, "logps/chosen": -163.7370147705078, "logps/rejected": -247.7954864501953, "loss": 0.1719, "rewards/accuracies": 1.0, "rewards/chosen": 1.1867938041687012, "rewards/margins": 10.377145767211914, "rewards/rejected": -9.190353393554688, "step": 1476 }, { "epoch": 0.82, "learning_rate": 6.669422869456601e-07, "logits/chosen": -6.1464643478393555, "logits/rejected": -6.017050743103027, "logps/chosen": -254.98779296875, "logps/rejected": -272.50885009765625, "loss": 0.0963, "rewards/accuracies": 0.9375, "rewards/chosen": 2.1468982696533203, "rewards/margins": 6.069151878356934, "rewards/rejected": -3.9222536087036133, "step": 1477 }, { "epoch": 0.82, "learning_rate": 6.665183275386605e-07, "logits/chosen": -6.019639015197754, "logits/rejected": -6.017031192779541, "logps/chosen": -229.76785278320312, "logps/rejected": -213.37869262695312, "loss": 0.1109, "rewards/accuracies": 0.9375, "rewards/chosen": 3.5435714721679688, "rewards/margins": 7.712712287902832, "rewards/rejected": -4.1691412925720215, "step": 1478 }, { "epoch": 0.82, "learning_rate": 6.660942334325949e-07, "logits/chosen": -5.999509334564209, "logits/rejected": -6.1875810623168945, "logps/chosen": -212.84242248535156, "logps/rejected": -199.08053588867188, "loss": 0.0789, "rewards/accuracies": 1.0, "rewards/chosen": 4.274172782897949, "rewards/margins": 9.802881240844727, "rewards/rejected": -5.528709411621094, "step": 1479 }, { "epoch": 0.82, "learning_rate": 6.656700049705185e-07, "logits/chosen": -6.031126976013184, "logits/rejected": -5.983010292053223, "logps/chosen": -348.4028015136719, "logps/rejected": -364.0274353027344, "loss": 0.1678, "rewards/accuracies": 0.875, "rewards/chosen": 1.3733344078063965, "rewards/margins": 8.505704879760742, "rewards/rejected": -7.132370471954346, "step": 1480 }, { "epoch": 0.82, "learning_rate": 6.652456424955963e-07, "logits/chosen": -6.0654296875, "logits/rejected": -6.087596893310547, "logps/chosen": -274.8836669921875, "logps/rejected": -224.65957641601562, "loss": 0.1973, "rewards/accuracies": 0.8125, "rewards/chosen": 5.105591297149658, "rewards/margins": 7.5897417068481445, "rewards/rejected": -2.484149932861328, "step": 1481 }, { "epoch": 0.82, "learning_rate": 6.648211463511011e-07, "logits/chosen": -6.02240514755249, "logits/rejected": -6.003151893615723, "logps/chosen": -247.96609497070312, "logps/rejected": -180.516845703125, "loss": 0.0959, "rewards/accuracies": 1.0, "rewards/chosen": 3.9012649059295654, "rewards/margins": 9.428915977478027, "rewards/rejected": -5.527651309967041, "step": 1482 }, { "epoch": 0.82, "learning_rate": 6.643965168804139e-07, "logits/chosen": -6.031816005706787, "logits/rejected": -6.178317070007324, "logps/chosen": -173.96502685546875, "logps/rejected": -265.8345947265625, "loss": 0.0708, "rewards/accuracies": 0.9375, "rewards/chosen": 1.4066572189331055, "rewards/margins": 7.960937976837158, "rewards/rejected": -6.554281234741211, "step": 1483 }, { "epoch": 0.82, "learning_rate": 6.639717544270234e-07, "logits/chosen": -6.076478004455566, "logits/rejected": -6.0373640060424805, "logps/chosen": -187.6679229736328, "logps/rejected": -183.00970458984375, "loss": 0.1113, "rewards/accuracies": 1.0, "rewards/chosen": 3.886981964111328, "rewards/margins": 9.747179985046387, "rewards/rejected": -5.860198497772217, "step": 1484 }, { "epoch": 0.82, "learning_rate": 6.635468593345265e-07, "logits/chosen": -6.0751423835754395, "logits/rejected": -6.001895904541016, "logps/chosen": -441.370361328125, "logps/rejected": -335.5556945800781, "loss": 0.1207, "rewards/accuracies": 0.9375, "rewards/chosen": 4.724020004272461, "rewards/margins": 10.331440925598145, "rewards/rejected": -5.607420444488525, "step": 1485 }, { "epoch": 0.82, "learning_rate": 6.631218319466263e-07, "logits/chosen": -6.061740875244141, "logits/rejected": -5.957640647888184, "logps/chosen": -270.77569580078125, "logps/rejected": -107.0867691040039, "loss": 0.0601, "rewards/accuracies": 1.0, "rewards/chosen": 3.981630802154541, "rewards/margins": 8.394826889038086, "rewards/rejected": -4.413196086883545, "step": 1486 }, { "epoch": 0.83, "learning_rate": 6.626966726071342e-07, "logits/chosen": -6.152543544769287, "logits/rejected": -6.038361072540283, "logps/chosen": -249.37525939941406, "logps/rejected": -251.13914489746094, "loss": 0.0833, "rewards/accuracies": 1.0, "rewards/chosen": 3.1670141220092773, "rewards/margins": 10.742419242858887, "rewards/rejected": -7.575405120849609, "step": 1487 }, { "epoch": 0.83, "learning_rate": 6.622713816599673e-07, "logits/chosen": -6.0412917137146, "logits/rejected": -6.055008888244629, "logps/chosen": -255.441650390625, "logps/rejected": -194.39944458007812, "loss": 0.1782, "rewards/accuracies": 1.0, "rewards/chosen": 3.221275806427002, "rewards/margins": 8.120697975158691, "rewards/rejected": -4.899421691894531, "step": 1488 }, { "epoch": 0.83, "learning_rate": 6.618459594491495e-07, "logits/chosen": -6.012195587158203, "logits/rejected": -6.0641326904296875, "logps/chosen": -319.96246337890625, "logps/rejected": -190.9318389892578, "loss": 0.0695, "rewards/accuracies": 1.0, "rewards/chosen": 6.645084381103516, "rewards/margins": 9.797301292419434, "rewards/rejected": -3.152216672897339, "step": 1489 }, { "epoch": 0.83, "learning_rate": 6.614204063188114e-07, "logits/chosen": -6.089256763458252, "logits/rejected": -6.088281154632568, "logps/chosen": -368.1231994628906, "logps/rejected": -251.91021728515625, "loss": 0.0744, "rewards/accuracies": 1.0, "rewards/chosen": 1.5372778177261353, "rewards/margins": 9.805821418762207, "rewards/rejected": -8.268543243408203, "step": 1490 }, { "epoch": 0.83, "learning_rate": 6.609947226131886e-07, "logits/chosen": -6.111526966094971, "logits/rejected": -6.039122581481934, "logps/chosen": -237.298828125, "logps/rejected": -146.17066955566406, "loss": 0.0448, "rewards/accuracies": 1.0, "rewards/chosen": 2.5563857555389404, "rewards/margins": 8.63990592956543, "rewards/rejected": -6.08351993560791, "step": 1491 }, { "epoch": 0.83, "learning_rate": 6.605689086766228e-07, "logits/chosen": -6.0566277503967285, "logits/rejected": -6.029140949249268, "logps/chosen": -302.3438720703125, "logps/rejected": -218.9851837158203, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": 4.197715759277344, "rewards/margins": 10.570226669311523, "rewards/rejected": -6.37251091003418, "step": 1492 }, { "epoch": 0.83, "learning_rate": 6.601429648535612e-07, "logits/chosen": -6.034142971038818, "logits/rejected": -6.0317583084106445, "logps/chosen": -247.66632080078125, "logps/rejected": -168.04318237304688, "loss": 0.1083, "rewards/accuracies": 0.9375, "rewards/chosen": 3.723799228668213, "rewards/margins": 7.753540992736816, "rewards/rejected": -4.029741287231445, "step": 1493 }, { "epoch": 0.83, "learning_rate": 6.597168914885557e-07, "logits/chosen": -6.078152656555176, "logits/rejected": -6.0400495529174805, "logps/chosen": -266.41522216796875, "logps/rejected": -162.90313720703125, "loss": 0.0717, "rewards/accuracies": 1.0, "rewards/chosen": 5.66048526763916, "rewards/margins": 9.01348876953125, "rewards/rejected": -3.353003740310669, "step": 1494 }, { "epoch": 0.83, "learning_rate": 6.592906889262631e-07, "logits/chosen": -6.165025234222412, "logits/rejected": -6.052868366241455, "logps/chosen": -220.1788330078125, "logps/rejected": -84.975830078125, "loss": 0.0421, "rewards/accuracies": 1.0, "rewards/chosen": 3.315195083618164, "rewards/margins": 8.364167213439941, "rewards/rejected": -5.048972129821777, "step": 1495 }, { "epoch": 0.83, "learning_rate": 6.588643575114452e-07, "logits/chosen": -6.019942283630371, "logits/rejected": -6.101696968078613, "logps/chosen": -258.0700378417969, "logps/rejected": -229.6990966796875, "loss": 0.0651, "rewards/accuracies": 0.875, "rewards/chosen": 3.961568832397461, "rewards/margins": 9.1167573928833, "rewards/rejected": -5.155189037322998, "step": 1496 }, { "epoch": 0.83, "learning_rate": 6.584378975889671e-07, "logits/chosen": -6.160703659057617, "logits/rejected": -6.054957389831543, "logps/chosen": -284.7214660644531, "logps/rejected": -163.1097412109375, "loss": 0.1077, "rewards/accuracies": 1.0, "rewards/chosen": 3.7605252265930176, "rewards/margins": 8.585071563720703, "rewards/rejected": -4.824546813964844, "step": 1497 }, { "epoch": 0.83, "learning_rate": 6.580113095037988e-07, "logits/chosen": -6.072506427764893, "logits/rejected": -5.935673713684082, "logps/chosen": -264.23480224609375, "logps/rejected": -172.68856811523438, "loss": 0.1541, "rewards/accuracies": 1.0, "rewards/chosen": 3.734403371810913, "rewards/margins": 8.53876781463623, "rewards/rejected": -4.8043646812438965, "step": 1498 }, { "epoch": 0.83, "learning_rate": 6.57584593601013e-07, "logits/chosen": -5.980068683624268, "logits/rejected": -5.982108116149902, "logps/chosen": -426.399169921875, "logps/rejected": -211.71568298339844, "loss": 0.0416, "rewards/accuracies": 0.9375, "rewards/chosen": 3.8362672328948975, "rewards/margins": 9.273807525634766, "rewards/rejected": -5.437540054321289, "step": 1499 }, { "epoch": 0.83, "learning_rate": 6.571577502257868e-07, "logits/chosen": -6.0733160972595215, "logits/rejected": -6.1206769943237305, "logps/chosen": -285.8538818359375, "logps/rejected": -198.59625244140625, "loss": 0.1772, "rewards/accuracies": 0.9375, "rewards/chosen": 3.068192481994629, "rewards/margins": 8.373435974121094, "rewards/rejected": -5.305243492126465, "step": 1500 }, { "epoch": 0.83, "learning_rate": 6.567307797233996e-07, "logits/chosen": -6.009754657745361, "logits/rejected": -6.047876834869385, "logps/chosen": -155.22549438476562, "logps/rejected": -214.78366088867188, "loss": 0.1048, "rewards/accuracies": 1.0, "rewards/chosen": 1.6588099002838135, "rewards/margins": 9.528677940368652, "rewards/rejected": -7.869868755340576, "step": 1501 }, { "epoch": 0.83, "learning_rate": 6.563036824392344e-07, "logits/chosen": -6.0781707763671875, "logits/rejected": -6.000606060028076, "logps/chosen": -282.7364501953125, "logps/rejected": -207.93417358398438, "loss": 0.063, "rewards/accuracies": 1.0, "rewards/chosen": 3.638075828552246, "rewards/margins": 7.778914928436279, "rewards/rejected": -4.140839576721191, "step": 1502 }, { "epoch": 0.83, "learning_rate": 6.558764587187757e-07, "logits/chosen": -6.010304927825928, "logits/rejected": -6.055992126464844, "logps/chosen": -264.73358154296875, "logps/rejected": -248.3172607421875, "loss": 0.1171, "rewards/accuracies": 0.9375, "rewards/chosen": 4.877448081970215, "rewards/margins": 9.787357330322266, "rewards/rejected": -4.909909248352051, "step": 1503 }, { "epoch": 0.83, "learning_rate": 6.554491089076115e-07, "logits/chosen": -5.961630821228027, "logits/rejected": -6.027688980102539, "logps/chosen": -195.45396423339844, "logps/rejected": -203.13922119140625, "loss": 0.0736, "rewards/accuracies": 0.875, "rewards/chosen": 2.116954803466797, "rewards/margins": 6.763010501861572, "rewards/rejected": -4.646055698394775, "step": 1504 }, { "epoch": 0.84, "learning_rate": 6.55021633351431e-07, "logits/chosen": -5.9885663986206055, "logits/rejected": -6.113959312438965, "logps/chosen": -264.9067077636719, "logps/rejected": -213.6064910888672, "loss": 0.0844, "rewards/accuracies": 1.0, "rewards/chosen": 4.300991058349609, "rewards/margins": 9.4136381149292, "rewards/rejected": -5.11264705657959, "step": 1505 }, { "epoch": 0.84, "learning_rate": 6.545940323960252e-07, "logits/chosen": -6.012739181518555, "logits/rejected": -6.031932830810547, "logps/chosen": -362.15667724609375, "logps/rejected": -341.4839782714844, "loss": 0.0727, "rewards/accuracies": 1.0, "rewards/chosen": 3.1901092529296875, "rewards/margins": 9.962516784667969, "rewards/rejected": -6.7724080085754395, "step": 1506 }, { "epoch": 0.84, "learning_rate": 6.541663063872865e-07, "logits/chosen": -5.912851333618164, "logits/rejected": -5.994185447692871, "logps/chosen": -581.7152709960938, "logps/rejected": -466.0202941894531, "loss": 0.1096, "rewards/accuracies": 1.0, "rewards/chosen": 8.059507369995117, "rewards/margins": 13.451876640319824, "rewards/rejected": -5.392368316650391, "step": 1507 }, { "epoch": 0.84, "learning_rate": 6.537384556712091e-07, "logits/chosen": -6.107147693634033, "logits/rejected": -6.093385696411133, "logps/chosen": -206.99224853515625, "logps/rejected": -281.73028564453125, "loss": 0.0845, "rewards/accuracies": 1.0, "rewards/chosen": 3.761775016784668, "rewards/margins": 9.875458717346191, "rewards/rejected": -6.11368465423584, "step": 1508 }, { "epoch": 0.84, "learning_rate": 6.533104805938873e-07, "logits/chosen": -6.107189655303955, "logits/rejected": -6.059619903564453, "logps/chosen": -223.06524658203125, "logps/rejected": -174.7859649658203, "loss": 0.1791, "rewards/accuracies": 0.9375, "rewards/chosen": 4.646453857421875, "rewards/margins": 9.642837524414062, "rewards/rejected": -4.996383190155029, "step": 1509 }, { "epoch": 0.84, "learning_rate": 6.528823815015161e-07, "logits/chosen": -6.079587936401367, "logits/rejected": -6.064438343048096, "logps/chosen": -326.9176940917969, "logps/rejected": -231.8112335205078, "loss": 0.0875, "rewards/accuracies": 1.0, "rewards/chosen": 4.938834190368652, "rewards/margins": 10.277746200561523, "rewards/rejected": -5.338911533355713, "step": 1510 }, { "epoch": 0.84, "learning_rate": 6.524541587403913e-07, "logits/chosen": -6.045315265655518, "logits/rejected": -6.064666748046875, "logps/chosen": -295.633056640625, "logps/rejected": -218.20455932617188, "loss": 0.1563, "rewards/accuracies": 1.0, "rewards/chosen": 6.5309295654296875, "rewards/margins": 9.430135726928711, "rewards/rejected": -2.8992056846618652, "step": 1511 }, { "epoch": 0.84, "learning_rate": 6.520258126569085e-07, "logits/chosen": -6.0268731117248535, "logits/rejected": -6.085370063781738, "logps/chosen": -206.82005310058594, "logps/rejected": -151.923583984375, "loss": 0.1005, "rewards/accuracies": 0.9375, "rewards/chosen": 6.144710540771484, "rewards/margins": 9.349162101745605, "rewards/rejected": -3.204451560974121, "step": 1512 }, { "epoch": 0.84, "learning_rate": 6.515973435975627e-07, "logits/chosen": -5.9786553382873535, "logits/rejected": -5.984989166259766, "logps/chosen": -230.40760803222656, "logps/rejected": -437.4208068847656, "loss": 0.1441, "rewards/accuracies": 0.875, "rewards/chosen": 3.8241376876831055, "rewards/margins": 7.0693135261535645, "rewards/rejected": -3.245176315307617, "step": 1513 }, { "epoch": 0.84, "learning_rate": 6.511687519089488e-07, "logits/chosen": -5.969646453857422, "logits/rejected": -6.095038890838623, "logps/chosen": -247.21487426757812, "logps/rejected": -218.79827880859375, "loss": 0.1774, "rewards/accuracies": 0.875, "rewards/chosen": 5.555938720703125, "rewards/margins": 10.867562294006348, "rewards/rejected": -5.311623573303223, "step": 1514 }, { "epoch": 0.84, "learning_rate": 6.507400379377608e-07, "logits/chosen": -5.962715148925781, "logits/rejected": -5.9349365234375, "logps/chosen": -232.77459716796875, "logps/rejected": -220.08827209472656, "loss": 0.086, "rewards/accuracies": 1.0, "rewards/chosen": 3.252127170562744, "rewards/margins": 8.342917442321777, "rewards/rejected": -5.090789794921875, "step": 1515 }, { "epoch": 0.84, "learning_rate": 6.503112020307916e-07, "logits/chosen": -6.0490803718566895, "logits/rejected": -6.092459678649902, "logps/chosen": -268.6518249511719, "logps/rejected": -265.962158203125, "loss": 0.0586, "rewards/accuracies": 0.9375, "rewards/chosen": 3.0213003158569336, "rewards/margins": 8.60166072845459, "rewards/rejected": -5.580360412597656, "step": 1516 }, { "epoch": 0.84, "learning_rate": 6.498822445349326e-07, "logits/chosen": -6.04590368270874, "logits/rejected": -6.079697608947754, "logps/chosen": -293.30230712890625, "logps/rejected": -207.64404296875, "loss": 0.087, "rewards/accuracies": 0.9375, "rewards/chosen": 6.941742897033691, "rewards/margins": 9.99316692352295, "rewards/rejected": -3.0514235496520996, "step": 1517 }, { "epoch": 0.84, "learning_rate": 6.49453165797174e-07, "logits/chosen": -6.051844120025635, "logits/rejected": -6.123490810394287, "logps/chosen": -177.1420135498047, "logps/rejected": -156.5010528564453, "loss": 0.053, "rewards/accuracies": 1.0, "rewards/chosen": 4.021813869476318, "rewards/margins": 8.02657699584961, "rewards/rejected": -4.004763126373291, "step": 1518 }, { "epoch": 0.84, "learning_rate": 6.490239661646034e-07, "logits/chosen": -5.997028350830078, "logits/rejected": -6.019591808319092, "logps/chosen": -252.76181030273438, "logps/rejected": -181.79441833496094, "loss": 0.13, "rewards/accuracies": 1.0, "rewards/chosen": 3.4510135650634766, "rewards/margins": 7.724188804626465, "rewards/rejected": -4.273175239562988, "step": 1519 }, { "epoch": 0.84, "learning_rate": 6.485946459844066e-07, "logits/chosen": -6.1198811531066895, "logits/rejected": -6.072324275970459, "logps/chosen": -222.13272094726562, "logps/rejected": -194.37680053710938, "loss": 0.0687, "rewards/accuracies": 1.0, "rewards/chosen": 2.492597818374634, "rewards/margins": 8.012527465820312, "rewards/rejected": -5.519929885864258, "step": 1520 }, { "epoch": 0.84, "learning_rate": 6.481652056038671e-07, "logits/chosen": -6.025328636169434, "logits/rejected": -6.1905741691589355, "logps/chosen": -282.0143127441406, "logps/rejected": -258.8031311035156, "loss": 0.099, "rewards/accuracies": 1.0, "rewards/chosen": 4.902243137359619, "rewards/margins": 9.191075325012207, "rewards/rejected": -4.288832664489746, "step": 1521 }, { "epoch": 0.84, "learning_rate": 6.47735645370365e-07, "logits/chosen": -6.059779644012451, "logits/rejected": -6.00537633895874, "logps/chosen": -239.95272827148438, "logps/rejected": -117.53172302246094, "loss": 0.1179, "rewards/accuracies": 1.0, "rewards/chosen": 3.3107194900512695, "rewards/margins": 10.327556610107422, "rewards/rejected": -7.016837120056152, "step": 1522 }, { "epoch": 0.85, "learning_rate": 6.473059656313782e-07, "logits/chosen": -6.122950077056885, "logits/rejected": -6.084155082702637, "logps/chosen": -224.3162841796875, "logps/rejected": -157.5320587158203, "loss": 0.1148, "rewards/accuracies": 1.0, "rewards/chosen": 3.2769901752471924, "rewards/margins": 9.102947235107422, "rewards/rejected": -5.825957298278809, "step": 1523 }, { "epoch": 0.85, "learning_rate": 6.468761667344804e-07, "logits/chosen": -6.061455249786377, "logits/rejected": -6.024293899536133, "logps/chosen": -321.89453125, "logps/rejected": -234.50857543945312, "loss": 0.061, "rewards/accuracies": 1.0, "rewards/chosen": 4.580606460571289, "rewards/margins": 10.58520221710205, "rewards/rejected": -6.004595756530762, "step": 1524 }, { "epoch": 0.85, "learning_rate": 6.464462490273425e-07, "logits/chosen": -6.024209976196289, "logits/rejected": -6.152169227600098, "logps/chosen": -188.84844970703125, "logps/rejected": -373.57305908203125, "loss": 0.1767, "rewards/accuracies": 0.875, "rewards/chosen": 2.5305137634277344, "rewards/margins": 9.856021881103516, "rewards/rejected": -7.325508117675781, "step": 1525 }, { "epoch": 0.85, "learning_rate": 6.460162128577306e-07, "logits/chosen": -6.08634614944458, "logits/rejected": -6.153275489807129, "logps/chosen": -186.43894958496094, "logps/rejected": -136.40496826171875, "loss": 0.1011, "rewards/accuracies": 1.0, "rewards/chosen": 1.7607314586639404, "rewards/margins": 7.624366760253906, "rewards/rejected": -5.863636016845703, "step": 1526 }, { "epoch": 0.85, "learning_rate": 6.455860585735075e-07, "logits/chosen": -6.04850959777832, "logits/rejected": -6.014963150024414, "logps/chosen": -285.8932189941406, "logps/rejected": -145.84214782714844, "loss": 0.1065, "rewards/accuracies": 1.0, "rewards/chosen": 4.159332752227783, "rewards/margins": 8.467098236083984, "rewards/rejected": -4.307765007019043, "step": 1527 }, { "epoch": 0.85, "learning_rate": 6.451557865226312e-07, "logits/chosen": -5.955102920532227, "logits/rejected": -5.938246726989746, "logps/chosen": -290.38189697265625, "logps/rejected": -284.46490478515625, "loss": 0.0655, "rewards/accuracies": 1.0, "rewards/chosen": 4.154396057128906, "rewards/margins": 9.470549583435059, "rewards/rejected": -5.3161540031433105, "step": 1528 }, { "epoch": 0.85, "learning_rate": 6.447253970531548e-07, "logits/chosen": -5.997064590454102, "logits/rejected": -5.961553573608398, "logps/chosen": -249.8273468017578, "logps/rejected": -91.36607360839844, "loss": 0.0452, "rewards/accuracies": 1.0, "rewards/chosen": 3.2848103046417236, "rewards/margins": 8.784551620483398, "rewards/rejected": -5.499741077423096, "step": 1529 }, { "epoch": 0.85, "learning_rate": 6.442948905132266e-07, "logits/chosen": -6.04508113861084, "logits/rejected": -6.0933518409729, "logps/chosen": -326.8143615722656, "logps/rejected": -437.25115966796875, "loss": 0.1109, "rewards/accuracies": 0.875, "rewards/chosen": 2.756941318511963, "rewards/margins": 8.458830833435059, "rewards/rejected": -5.701889514923096, "step": 1530 }, { "epoch": 0.85, "learning_rate": 6.438642672510893e-07, "logits/chosen": -6.02292013168335, "logits/rejected": -6.0656418800354, "logps/chosen": -247.14959716796875, "logps/rejected": -229.37168884277344, "loss": 0.1055, "rewards/accuracies": 1.0, "rewards/chosen": 3.233776807785034, "rewards/margins": 10.408937454223633, "rewards/rejected": -7.175159931182861, "step": 1531 }, { "epoch": 0.85, "learning_rate": 6.434335276150806e-07, "logits/chosen": -6.128911018371582, "logits/rejected": -6.127171516418457, "logps/chosen": -286.97357177734375, "logps/rejected": -194.70970153808594, "loss": 0.1673, "rewards/accuracies": 0.875, "rewards/chosen": 4.893794059753418, "rewards/margins": 8.010163307189941, "rewards/rejected": -3.1163692474365234, "step": 1532 }, { "epoch": 0.85, "learning_rate": 6.430026719536317e-07, "logits/chosen": -5.981942176818848, "logits/rejected": -6.1224260330200195, "logps/chosen": -325.8704833984375, "logps/rejected": -264.0738525390625, "loss": 0.1785, "rewards/accuracies": 1.0, "rewards/chosen": 4.648290157318115, "rewards/margins": 10.809797286987305, "rewards/rejected": -6.161507606506348, "step": 1533 }, { "epoch": 0.85, "learning_rate": 6.425717006152682e-07, "logits/chosen": -5.98784065246582, "logits/rejected": -5.971792697906494, "logps/chosen": -260.17596435546875, "logps/rejected": -215.7284393310547, "loss": 0.0731, "rewards/accuracies": 1.0, "rewards/chosen": 5.305717468261719, "rewards/margins": 8.951008796691895, "rewards/rejected": -3.6452908515930176, "step": 1534 }, { "epoch": 0.85, "learning_rate": 6.421406139486085e-07, "logits/chosen": -6.038982391357422, "logits/rejected": -6.065860748291016, "logps/chosen": -235.33087158203125, "logps/rejected": -195.76947021484375, "loss": 0.0897, "rewards/accuracies": 0.9375, "rewards/chosen": 2.6187243461608887, "rewards/margins": 9.809900283813477, "rewards/rejected": -7.191175937652588, "step": 1535 }, { "epoch": 0.85, "learning_rate": 6.417094123023653e-07, "logits/chosen": -6.179136276245117, "logits/rejected": -6.115667343139648, "logps/chosen": -269.2964172363281, "logps/rejected": -148.14620971679688, "loss": 0.0959, "rewards/accuracies": 0.9375, "rewards/chosen": 4.510981559753418, "rewards/margins": 9.573383331298828, "rewards/rejected": -5.06240177154541, "step": 1536 }, { "epoch": 0.85, "learning_rate": 6.412780960253435e-07, "logits/chosen": -6.004170894622803, "logits/rejected": -6.126778602600098, "logps/chosen": -292.09417724609375, "logps/rejected": -329.09722900390625, "loss": 0.1348, "rewards/accuracies": 0.9375, "rewards/chosen": 3.106339693069458, "rewards/margins": 10.125008583068848, "rewards/rejected": -7.018669128417969, "step": 1537 }, { "epoch": 0.85, "learning_rate": 6.408466654664415e-07, "logits/chosen": -6.064284324645996, "logits/rejected": -6.001248359680176, "logps/chosen": -326.7357177734375, "logps/rejected": -145.67568969726562, "loss": 0.1154, "rewards/accuracies": 1.0, "rewards/chosen": 4.517518043518066, "rewards/margins": 9.204078674316406, "rewards/rejected": -4.686561584472656, "step": 1538 }, { "epoch": 0.85, "learning_rate": 6.40415120974649e-07, "logits/chosen": -6.062586784362793, "logits/rejected": -6.0497870445251465, "logps/chosen": -347.80194091796875, "logps/rejected": -251.15802001953125, "loss": 0.1889, "rewards/accuracies": 0.8125, "rewards/chosen": 1.872363567352295, "rewards/margins": 5.529762268066406, "rewards/rejected": -3.6573987007141113, "step": 1539 }, { "epoch": 0.85, "learning_rate": 6.399834628990491e-07, "logits/chosen": -6.206107139587402, "logits/rejected": -6.043509483337402, "logps/chosen": -327.65240478515625, "logps/rejected": -165.11007690429688, "loss": 0.1401, "rewards/accuracies": 1.0, "rewards/chosen": 4.359947204589844, "rewards/margins": 10.10993480682373, "rewards/rejected": -5.7499871253967285, "step": 1540 }, { "epoch": 0.86, "learning_rate": 6.395516915888158e-07, "logits/chosen": -6.0172271728515625, "logits/rejected": -6.0335373878479, "logps/chosen": -145.28672790527344, "logps/rejected": -394.504638671875, "loss": 0.1932, "rewards/accuracies": 0.875, "rewards/chosen": 1.1613267660140991, "rewards/margins": 8.556397438049316, "rewards/rejected": -7.395070552825928, "step": 1541 }, { "epoch": 0.86, "learning_rate": 6.391198073932154e-07, "logits/chosen": -6.136104106903076, "logits/rejected": -6.032974720001221, "logps/chosen": -320.7725524902344, "logps/rejected": -201.81126403808594, "loss": 0.0978, "rewards/accuracies": 1.0, "rewards/chosen": 4.751264572143555, "rewards/margins": 8.955544471740723, "rewards/rejected": -4.204279899597168, "step": 1542 }, { "epoch": 0.86, "learning_rate": 6.386878106616049e-07, "logits/chosen": -6.092422962188721, "logits/rejected": -6.071534156799316, "logps/chosen": -262.4831237792969, "logps/rejected": -210.07272338867188, "loss": 0.1025, "rewards/accuracies": 1.0, "rewards/chosen": 3.649991989135742, "rewards/margins": 8.891105651855469, "rewards/rejected": -5.241113662719727, "step": 1543 }, { "epoch": 0.86, "learning_rate": 6.382557017434331e-07, "logits/chosen": -5.98707389831543, "logits/rejected": -6.002582550048828, "logps/chosen": -296.519287109375, "logps/rejected": -229.4639434814453, "loss": 0.3435, "rewards/accuracies": 0.875, "rewards/chosen": 3.500556230545044, "rewards/margins": 8.813047409057617, "rewards/rejected": -5.312489986419678, "step": 1544 }, { "epoch": 0.86, "learning_rate": 6.37823480988239e-07, "logits/chosen": -5.984830856323242, "logits/rejected": -6.065682411193848, "logps/chosen": -273.384033203125, "logps/rejected": -175.19082641601562, "loss": 0.0515, "rewards/accuracies": 1.0, "rewards/chosen": 4.989239692687988, "rewards/margins": 9.436053276062012, "rewards/rejected": -4.446813106536865, "step": 1545 }, { "epoch": 0.86, "learning_rate": 6.373911487456518e-07, "logits/chosen": -6.002178192138672, "logits/rejected": -6.09694766998291, "logps/chosen": -225.46202087402344, "logps/rejected": -223.5124053955078, "loss": 0.1081, "rewards/accuracies": 0.9375, "rewards/chosen": 5.750392913818359, "rewards/margins": 10.21452808380127, "rewards/rejected": -4.46413516998291, "step": 1546 }, { "epoch": 0.86, "learning_rate": 6.369587053653916e-07, "logits/chosen": -6.00137186050415, "logits/rejected": -6.011900901794434, "logps/chosen": -234.06491088867188, "logps/rejected": -247.56619262695312, "loss": 0.1725, "rewards/accuracies": 1.0, "rewards/chosen": 3.4039435386657715, "rewards/margins": 7.989649772644043, "rewards/rejected": -4.5857062339782715, "step": 1547 }, { "epoch": 0.86, "learning_rate": 6.365261511972681e-07, "logits/chosen": -6.0477294921875, "logits/rejected": -6.016139030456543, "logps/chosen": -286.07208251953125, "logps/rejected": -153.6287384033203, "loss": 0.3619, "rewards/accuracies": 1.0, "rewards/chosen": 4.561643123626709, "rewards/margins": 8.517780303955078, "rewards/rejected": -3.9561378955841064, "step": 1548 }, { "epoch": 0.86, "learning_rate": 6.360934865911804e-07, "logits/chosen": -6.053130149841309, "logits/rejected": -5.975343227386475, "logps/chosen": -194.338134765625, "logps/rejected": -255.15139770507812, "loss": 0.0818, "rewards/accuracies": 0.875, "rewards/chosen": 0.3310614228248596, "rewards/margins": 8.372060775756836, "rewards/rejected": -8.040999412536621, "step": 1549 }, { "epoch": 0.86, "learning_rate": 6.356607118971172e-07, "logits/chosen": -5.9168500900268555, "logits/rejected": -6.025561809539795, "logps/chosen": -397.751220703125, "logps/rejected": -569.8141479492188, "loss": 0.09, "rewards/accuracies": 0.875, "rewards/chosen": 3.3987338542938232, "rewards/margins": 7.176258563995361, "rewards/rejected": -3.777524709701538, "step": 1550 }, { "epoch": 0.86, "learning_rate": 6.352278274651561e-07, "logits/chosen": -5.979213714599609, "logits/rejected": -6.009153366088867, "logps/chosen": -471.18359375, "logps/rejected": -316.000244140625, "loss": 0.0796, "rewards/accuracies": 0.9375, "rewards/chosen": 2.655750274658203, "rewards/margins": 8.628314971923828, "rewards/rejected": -5.972565650939941, "step": 1551 }, { "epoch": 0.86, "learning_rate": 6.347948336454636e-07, "logits/chosen": -6.068974494934082, "logits/rejected": -6.065965175628662, "logps/chosen": -205.3529815673828, "logps/rejected": -176.92367553710938, "loss": 0.118, "rewards/accuracies": 1.0, "rewards/chosen": 4.999876022338867, "rewards/margins": 10.026641845703125, "rewards/rejected": -5.026766777038574, "step": 1552 }, { "epoch": 0.86, "learning_rate": 6.343617307882946e-07, "logits/chosen": -6.004965305328369, "logits/rejected": -6.020050525665283, "logps/chosen": -197.9374237060547, "logps/rejected": -124.93611145019531, "loss": 0.1448, "rewards/accuracies": 1.0, "rewards/chosen": 2.751539707183838, "rewards/margins": 7.499063968658447, "rewards/rejected": -4.747524261474609, "step": 1553 }, { "epoch": 0.86, "learning_rate": 6.339285192439922e-07, "logits/chosen": -5.960280418395996, "logits/rejected": -6.061750411987305, "logps/chosen": -319.9427490234375, "logps/rejected": -277.14849853515625, "loss": 0.199, "rewards/accuracies": 0.8125, "rewards/chosen": 4.937018394470215, "rewards/margins": 10.118391990661621, "rewards/rejected": -5.181374549865723, "step": 1554 }, { "epoch": 0.86, "learning_rate": 6.334951993629873e-07, "logits/chosen": -5.973063945770264, "logits/rejected": -5.965819835662842, "logps/chosen": -283.764892578125, "logps/rejected": -149.3102569580078, "loss": 0.124, "rewards/accuracies": 0.9375, "rewards/chosen": 1.7832188606262207, "rewards/margins": 8.154903411865234, "rewards/rejected": -6.371684551239014, "step": 1555 }, { "epoch": 0.86, "learning_rate": 6.330617714957988e-07, "logits/chosen": -6.013232707977295, "logits/rejected": -6.071284770965576, "logps/chosen": -206.70184326171875, "logps/rejected": -240.93252563476562, "loss": 0.0706, "rewards/accuracies": 0.9375, "rewards/chosen": 3.48183012008667, "rewards/margins": 8.723468780517578, "rewards/rejected": -5.24163818359375, "step": 1556 }, { "epoch": 0.86, "learning_rate": 6.326282359930329e-07, "logits/chosen": -6.010014533996582, "logits/rejected": -6.0707807540893555, "logps/chosen": -250.85995483398438, "logps/rejected": -191.19610595703125, "loss": 0.1376, "rewards/accuracies": 1.0, "rewards/chosen": 4.763997554779053, "rewards/margins": 8.918511390686035, "rewards/rejected": -4.154513359069824, "step": 1557 }, { "epoch": 0.86, "learning_rate": 6.321945932053821e-07, "logits/chosen": -6.08875036239624, "logits/rejected": -6.086699485778809, "logps/chosen": -253.40621948242188, "logps/rejected": -196.63616943359375, "loss": 0.0925, "rewards/accuracies": 1.0, "rewards/chosen": 4.122265338897705, "rewards/margins": 10.288337707519531, "rewards/rejected": -6.166072845458984, "step": 1558 }, { "epoch": 0.87, "learning_rate": 6.317608434836269e-07, "logits/chosen": -5.901261329650879, "logits/rejected": -6.079282760620117, "logps/chosen": -273.4316101074219, "logps/rejected": -196.25074768066406, "loss": 0.1096, "rewards/accuracies": 0.9375, "rewards/chosen": 3.54884672164917, "rewards/margins": 8.871281623840332, "rewards/rejected": -5.32243537902832, "step": 1559 }, { "epoch": 0.87, "learning_rate": 6.313269871786333e-07, "logits/chosen": -6.032444953918457, "logits/rejected": -6.01957893371582, "logps/chosen": -373.0577697753906, "logps/rejected": -277.5589904785156, "loss": 0.1496, "rewards/accuracies": 0.875, "rewards/chosen": 1.683506727218628, "rewards/margins": 6.8682403564453125, "rewards/rejected": -5.1847333908081055, "step": 1560 }, { "epoch": 0.87, "learning_rate": 6.308930246413539e-07, "logits/chosen": -6.060894012451172, "logits/rejected": -6.109151840209961, "logps/chosen": -120.60383605957031, "logps/rejected": -243.50296020507812, "loss": 0.1529, "rewards/accuracies": 0.875, "rewards/chosen": 0.5975800156593323, "rewards/margins": 8.686995506286621, "rewards/rejected": -8.089415550231934, "step": 1561 }, { "epoch": 0.87, "learning_rate": 6.304589562228274e-07, "logits/chosen": -6.08255672454834, "logits/rejected": -5.972866058349609, "logps/chosen": -212.03384399414062, "logps/rejected": -161.5650634765625, "loss": 0.1754, "rewards/accuracies": 1.0, "rewards/chosen": 2.3021321296691895, "rewards/margins": 7.601987838745117, "rewards/rejected": -5.299855709075928, "step": 1562 }, { "epoch": 0.87, "learning_rate": 6.30024782274178e-07, "logits/chosen": -6.047173500061035, "logits/rejected": -6.177470684051514, "logps/chosen": -246.3357696533203, "logps/rejected": -407.22332763671875, "loss": 0.0456, "rewards/accuracies": 1.0, "rewards/chosen": 2.3515381813049316, "rewards/margins": 13.513887405395508, "rewards/rejected": -11.162348747253418, "step": 1563 }, { "epoch": 0.87, "learning_rate": 6.295905031466149e-07, "logits/chosen": -6.055995464324951, "logits/rejected": -6.017308712005615, "logps/chosen": -291.23626708984375, "logps/rejected": -253.708251953125, "loss": 0.163, "rewards/accuracies": 1.0, "rewards/chosen": 4.247689723968506, "rewards/margins": 10.158323287963867, "rewards/rejected": -5.910633563995361, "step": 1564 }, { "epoch": 0.87, "learning_rate": 6.291561191914332e-07, "logits/chosen": -6.051196575164795, "logits/rejected": -6.088999271392822, "logps/chosen": -357.74371337890625, "logps/rejected": -215.20126342773438, "loss": 0.0671, "rewards/accuracies": 0.9375, "rewards/chosen": 3.589533805847168, "rewards/margins": 7.931283950805664, "rewards/rejected": -4.341750621795654, "step": 1565 }, { "epoch": 0.87, "learning_rate": 6.28721630760012e-07, "logits/chosen": -6.013383865356445, "logits/rejected": -6.019855499267578, "logps/chosen": -283.69561767578125, "logps/rejected": -167.05299377441406, "loss": 0.1157, "rewards/accuracies": 1.0, "rewards/chosen": 5.971200942993164, "rewards/margins": 8.460598945617676, "rewards/rejected": -2.489398241043091, "step": 1566 }, { "epoch": 0.87, "learning_rate": 6.282870382038153e-07, "logits/chosen": -6.0243940353393555, "logits/rejected": -6.058873176574707, "logps/chosen": -384.5630798339844, "logps/rejected": -347.2423400878906, "loss": 0.2348, "rewards/accuracies": 0.9375, "rewards/chosen": 4.926438331604004, "rewards/margins": 8.740817070007324, "rewards/rejected": -3.8143787384033203, "step": 1567 }, { "epoch": 0.87, "learning_rate": 6.278523418743911e-07, "logits/chosen": -5.995765686035156, "logits/rejected": -6.01341438293457, "logps/chosen": -284.8578796386719, "logps/rejected": -175.67529296875, "loss": 0.1304, "rewards/accuracies": 0.9375, "rewards/chosen": 5.592702388763428, "rewards/margins": 10.397024154663086, "rewards/rejected": -4.804322242736816, "step": 1568 }, { "epoch": 0.87, "learning_rate": 6.27417542123372e-07, "logits/chosen": -5.983226299285889, "logits/rejected": -6.0141801834106445, "logps/chosen": -276.4656066894531, "logps/rejected": -272.04150390625, "loss": 0.0906, "rewards/accuracies": 1.0, "rewards/chosen": 4.7871294021606445, "rewards/margins": 9.075657844543457, "rewards/rejected": -4.288527965545654, "step": 1569 }, { "epoch": 0.87, "learning_rate": 6.269826393024733e-07, "logits/chosen": -5.981426239013672, "logits/rejected": -5.987794399261475, "logps/chosen": -284.2666320800781, "logps/rejected": -240.8155517578125, "loss": 0.0816, "rewards/accuracies": 0.9375, "rewards/chosen": 4.239241123199463, "rewards/margins": 9.43250846862793, "rewards/rejected": -5.193266868591309, "step": 1570 }, { "epoch": 0.87, "learning_rate": 6.265476337634942e-07, "logits/chosen": -6.0205278396606445, "logits/rejected": -6.149717807769775, "logps/chosen": -204.74798583984375, "logps/rejected": -267.1357421875, "loss": 0.0901, "rewards/accuracies": 1.0, "rewards/chosen": 4.057836532592773, "rewards/margins": 11.835511207580566, "rewards/rejected": -7.777674674987793, "step": 1571 }, { "epoch": 0.87, "learning_rate": 6.261125258583171e-07, "logits/chosen": -6.125675201416016, "logits/rejected": -6.067366123199463, "logps/chosen": -259.5830078125, "logps/rejected": -198.3575439453125, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/chosen": 3.3249621391296387, "rewards/margins": 9.200708389282227, "rewards/rejected": -5.875746726989746, "step": 1572 }, { "epoch": 0.87, "learning_rate": 6.25677315938907e-07, "logits/chosen": -6.099063396453857, "logits/rejected": -6.110627174377441, "logps/chosen": -243.74395751953125, "logps/rejected": -285.3859558105469, "loss": 0.0806, "rewards/accuracies": 0.9375, "rewards/chosen": 4.556891441345215, "rewards/margins": 10.717342376708984, "rewards/rejected": -6.160451889038086, "step": 1573 }, { "epoch": 0.87, "learning_rate": 6.252420043573112e-07, "logits/chosen": -5.97601318359375, "logits/rejected": -6.014868259429932, "logps/chosen": -284.7549133300781, "logps/rejected": -262.31365966796875, "loss": 0.2363, "rewards/accuracies": 1.0, "rewards/chosen": 4.432206630706787, "rewards/margins": 8.367952346801758, "rewards/rejected": -3.935746192932129, "step": 1574 }, { "epoch": 0.87, "learning_rate": 6.248065914656598e-07, "logits/chosen": -6.028090476989746, "logits/rejected": -6.012065887451172, "logps/chosen": -200.5252227783203, "logps/rejected": -220.95648193359375, "loss": 0.0976, "rewards/accuracies": 1.0, "rewards/chosen": 1.9378859996795654, "rewards/margins": 9.212809562683105, "rewards/rejected": -7.274923801422119, "step": 1575 }, { "epoch": 0.87, "learning_rate": 6.243710776161644e-07, "logits/chosen": -6.1136250495910645, "logits/rejected": -6.111564636230469, "logps/chosen": -197.63059997558594, "logps/rejected": -231.11972045898438, "loss": 0.1149, "rewards/accuracies": 0.9375, "rewards/chosen": 0.9823468923568726, "rewards/margins": 8.564823150634766, "rewards/rejected": -7.582476615905762, "step": 1576 }, { "epoch": 0.88, "learning_rate": 6.239354631611182e-07, "logits/chosen": -6.040706634521484, "logits/rejected": -6.003392696380615, "logps/chosen": -262.1278381347656, "logps/rejected": -172.72784423828125, "loss": 0.1992, "rewards/accuracies": 0.875, "rewards/chosen": 1.7138876914978027, "rewards/margins": 7.532416343688965, "rewards/rejected": -5.818528652191162, "step": 1577 }, { "epoch": 0.88, "learning_rate": 6.234997484528964e-07, "logits/chosen": -6.1126837730407715, "logits/rejected": -6.050934314727783, "logps/chosen": -216.34088134765625, "logps/rejected": -141.0587921142578, "loss": 0.2181, "rewards/accuracies": 0.875, "rewards/chosen": 2.820971965789795, "rewards/margins": 7.2255353927612305, "rewards/rejected": -4.4045634269714355, "step": 1578 }, { "epoch": 0.88, "learning_rate": 6.230639338439549e-07, "logits/chosen": -6.119752883911133, "logits/rejected": -6.071784019470215, "logps/chosen": -304.8153076171875, "logps/rejected": -216.68666076660156, "loss": 0.1075, "rewards/accuracies": 0.9375, "rewards/chosen": 3.3761560916900635, "rewards/margins": 9.090608596801758, "rewards/rejected": -5.714452743530273, "step": 1579 }, { "epoch": 0.88, "learning_rate": 6.2262801968683e-07, "logits/chosen": -6.240898609161377, "logits/rejected": -6.1555938720703125, "logps/chosen": -331.58636474609375, "logps/rejected": -198.53575134277344, "loss": 0.1503, "rewards/accuracies": 1.0, "rewards/chosen": 4.587867259979248, "rewards/margins": 11.527914047241211, "rewards/rejected": -6.940046310424805, "step": 1580 }, { "epoch": 0.88, "learning_rate": 6.221920063341389e-07, "logits/chosen": -6.066771030426025, "logits/rejected": -6.078385353088379, "logps/chosen": -196.9260711669922, "logps/rejected": -198.60049438476562, "loss": 0.0976, "rewards/accuracies": 0.9375, "rewards/chosen": 2.441727638244629, "rewards/margins": 8.387102127075195, "rewards/rejected": -5.945374488830566, "step": 1581 }, { "epoch": 0.88, "learning_rate": 6.217558941385796e-07, "logits/chosen": -5.973296642303467, "logits/rejected": -5.99074125289917, "logps/chosen": -284.7915344238281, "logps/rejected": -192.51072692871094, "loss": 0.0828, "rewards/accuracies": 1.0, "rewards/chosen": 4.780998229980469, "rewards/margins": 8.858169555664062, "rewards/rejected": -4.077171325683594, "step": 1582 }, { "epoch": 0.88, "learning_rate": 6.21319683452929e-07, "logits/chosen": -6.077666759490967, "logits/rejected": -6.171769618988037, "logps/chosen": -252.00613403320312, "logps/rejected": -344.53851318359375, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/chosen": 2.3261559009552, "rewards/margins": 10.450654983520508, "rewards/rejected": -8.12449836730957, "step": 1583 }, { "epoch": 0.88, "learning_rate": 6.208833746300445e-07, "logits/chosen": -6.190008163452148, "logits/rejected": -6.0175042152404785, "logps/chosen": -299.26727294921875, "logps/rejected": -242.75509643554688, "loss": 0.115, "rewards/accuracies": 0.9375, "rewards/chosen": 4.364312171936035, "rewards/margins": 10.677509307861328, "rewards/rejected": -6.313196182250977, "step": 1584 }, { "epoch": 0.88, "learning_rate": 6.204469680228623e-07, "logits/chosen": -5.9903974533081055, "logits/rejected": -5.971234321594238, "logps/chosen": -366.62152099609375, "logps/rejected": -182.26686096191406, "loss": 0.0546, "rewards/accuracies": 1.0, "rewards/chosen": 5.514803886413574, "rewards/margins": 9.876321792602539, "rewards/rejected": -4.361516952514648, "step": 1585 }, { "epoch": 0.88, "learning_rate": 6.200104639843983e-07, "logits/chosen": -6.056526184082031, "logits/rejected": -6.155455589294434, "logps/chosen": -166.16644287109375, "logps/rejected": -207.77301025390625, "loss": 0.1118, "rewards/accuracies": 1.0, "rewards/chosen": 2.733886241912842, "rewards/margins": 10.08726978302002, "rewards/rejected": -7.353383541107178, "step": 1586 }, { "epoch": 0.88, "learning_rate": 6.195738628677466e-07, "logits/chosen": -5.939488887786865, "logits/rejected": -6.113559246063232, "logps/chosen": -265.79119873046875, "logps/rejected": -235.15760803222656, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/chosen": 5.89518928527832, "rewards/margins": 9.511821746826172, "rewards/rejected": -3.6166317462921143, "step": 1587 }, { "epoch": 0.88, "learning_rate": 6.191371650260803e-07, "logits/chosen": -6.019706726074219, "logits/rejected": -6.010505199432373, "logps/chosen": -232.84024047851562, "logps/rejected": -188.3590087890625, "loss": 0.0787, "rewards/accuracies": 1.0, "rewards/chosen": 3.3112173080444336, "rewards/margins": 9.335816383361816, "rewards/rejected": -6.024599552154541, "step": 1588 }, { "epoch": 0.88, "learning_rate": 6.187003708126504e-07, "logits/chosen": -6.107633113861084, "logits/rejected": -6.013517379760742, "logps/chosen": -418.4058837890625, "logps/rejected": -222.45245361328125, "loss": 0.0744, "rewards/accuracies": 1.0, "rewards/chosen": 4.374371528625488, "rewards/margins": 8.695125579833984, "rewards/rejected": -4.320754051208496, "step": 1589 }, { "epoch": 0.88, "learning_rate": 6.18263480580786e-07, "logits/chosen": -6.064288139343262, "logits/rejected": -6.015161037445068, "logps/chosen": -247.60064697265625, "logps/rejected": -357.397216796875, "loss": 0.1561, "rewards/accuracies": 1.0, "rewards/chosen": 4.848325252532959, "rewards/margins": 12.091970443725586, "rewards/rejected": -7.243644714355469, "step": 1590 }, { "epoch": 0.88, "learning_rate": 6.178264946838941e-07, "logits/chosen": -6.005642414093018, "logits/rejected": -5.958504676818848, "logps/chosen": -380.40936279296875, "logps/rejected": -259.028564453125, "loss": 0.1183, "rewards/accuracies": 0.9375, "rewards/chosen": 4.211583137512207, "rewards/margins": 8.856236457824707, "rewards/rejected": -4.644654273986816, "step": 1591 }, { "epoch": 0.88, "learning_rate": 6.173894134754587e-07, "logits/chosen": -6.055545806884766, "logits/rejected": -5.987666130065918, "logps/chosen": -271.00189208984375, "logps/rejected": -248.7504425048828, "loss": 0.1044, "rewards/accuracies": 1.0, "rewards/chosen": 5.057931423187256, "rewards/margins": 11.021064758300781, "rewards/rejected": -5.963132381439209, "step": 1592 }, { "epoch": 0.88, "learning_rate": 6.169522373090412e-07, "logits/chosen": -6.0154266357421875, "logits/rejected": -6.043294906616211, "logps/chosen": -263.5159912109375, "logps/rejected": -212.57164001464844, "loss": 0.0697, "rewards/accuracies": 1.0, "rewards/chosen": 4.054960250854492, "rewards/margins": 9.149494171142578, "rewards/rejected": -5.094533443450928, "step": 1593 }, { "epoch": 0.88, "learning_rate": 6.165149665382794e-07, "logits/chosen": -6.111642837524414, "logits/rejected": -6.024404525756836, "logps/chosen": -320.2715759277344, "logps/rejected": -222.80615234375, "loss": 0.0929, "rewards/accuracies": 0.9375, "rewards/chosen": 4.725929260253906, "rewards/margins": 9.520512580871582, "rewards/rejected": -4.794583320617676, "step": 1594 }, { "epoch": 0.89, "learning_rate": 6.160776015168882e-07, "logits/chosen": -6.1476287841796875, "logits/rejected": -6.146442413330078, "logps/chosen": -321.476806640625, "logps/rejected": -176.91757202148438, "loss": 0.1272, "rewards/accuracies": 1.0, "rewards/chosen": 4.80144739151001, "rewards/margins": 11.896947860717773, "rewards/rejected": -7.095500946044922, "step": 1595 }, { "epoch": 0.89, "learning_rate": 6.156401425986581e-07, "logits/chosen": -6.092324256896973, "logits/rejected": -6.082326412200928, "logps/chosen": -436.78826904296875, "logps/rejected": -313.94415283203125, "loss": 0.0973, "rewards/accuracies": 1.0, "rewards/chosen": 4.678506374359131, "rewards/margins": 8.120420455932617, "rewards/rejected": -3.441913604736328, "step": 1596 }, { "epoch": 0.89, "learning_rate": 6.152025901374564e-07, "logits/chosen": -5.968473434448242, "logits/rejected": -5.879940986633301, "logps/chosen": -227.7308349609375, "logps/rejected": -170.058837890625, "loss": 0.0504, "rewards/accuracies": 1.0, "rewards/chosen": 1.7285454273223877, "rewards/margins": 8.137310981750488, "rewards/rejected": -6.408764839172363, "step": 1597 }, { "epoch": 0.89, "learning_rate": 6.147649444872251e-07, "logits/chosen": -6.07185697555542, "logits/rejected": -6.033268451690674, "logps/chosen": -283.682373046875, "logps/rejected": -141.59521484375, "loss": 0.0719, "rewards/accuracies": 0.9375, "rewards/chosen": 5.431815147399902, "rewards/margins": 8.766358375549316, "rewards/rejected": -3.334542751312256, "step": 1598 }, { "epoch": 0.89, "learning_rate": 6.143272060019825e-07, "logits/chosen": -6.059990882873535, "logits/rejected": -6.046867370605469, "logps/chosen": -464.9678039550781, "logps/rejected": -366.86968994140625, "loss": 0.0854, "rewards/accuracies": 1.0, "rewards/chosen": 4.116926193237305, "rewards/margins": 8.936237335205078, "rewards/rejected": -4.819311141967773, "step": 1599 }, { "epoch": 0.89, "learning_rate": 6.138893750358212e-07, "logits/chosen": -6.023461818695068, "logits/rejected": -6.09202766418457, "logps/chosen": -230.0322265625, "logps/rejected": -348.1483154296875, "loss": 0.1283, "rewards/accuracies": 1.0, "rewards/chosen": 2.8508732318878174, "rewards/margins": 12.070137023925781, "rewards/rejected": -9.21926498413086, "step": 1600 }, { "epoch": 0.89, "learning_rate": 6.134514519429089e-07, "logits/chosen": -6.111840724945068, "logits/rejected": -6.073615074157715, "logps/chosen": -268.7021789550781, "logps/rejected": -268.137451171875, "loss": 0.0744, "rewards/accuracies": 1.0, "rewards/chosen": 3.090149402618408, "rewards/margins": 10.661005020141602, "rewards/rejected": -7.570855140686035, "step": 1601 }, { "epoch": 0.89, "learning_rate": 6.130134370774881e-07, "logits/chosen": -6.042485237121582, "logits/rejected": -6.073906421661377, "logps/chosen": -218.1932373046875, "logps/rejected": -160.50013732910156, "loss": 0.1632, "rewards/accuracies": 0.9375, "rewards/chosen": 5.21904993057251, "rewards/margins": 8.19244384765625, "rewards/rejected": -2.973393440246582, "step": 1602 }, { "epoch": 0.89, "learning_rate": 6.125753307938753e-07, "logits/chosen": -5.975161552429199, "logits/rejected": -6.082486152648926, "logps/chosen": -337.9960632324219, "logps/rejected": -491.2858581542969, "loss": 0.077, "rewards/accuracies": 0.875, "rewards/chosen": 2.6007204055786133, "rewards/margins": 9.032004356384277, "rewards/rejected": -6.431283950805664, "step": 1603 }, { "epoch": 0.89, "learning_rate": 6.121371334464609e-07, "logits/chosen": -6.055604457855225, "logits/rejected": -5.942366600036621, "logps/chosen": -263.89013671875, "logps/rejected": -330.2244873046875, "loss": 0.1997, "rewards/accuracies": 1.0, "rewards/chosen": 2.801701545715332, "rewards/margins": 11.037227630615234, "rewards/rejected": -8.235525131225586, "step": 1604 }, { "epoch": 0.89, "learning_rate": 6.116988453897089e-07, "logits/chosen": -6.037120819091797, "logits/rejected": -6.068561553955078, "logps/chosen": -343.61065673828125, "logps/rejected": -296.2835693359375, "loss": 0.0524, "rewards/accuracies": 0.9375, "rewards/chosen": 2.9230141639709473, "rewards/margins": 7.629105567932129, "rewards/rejected": -4.70609188079834, "step": 1605 }, { "epoch": 0.89, "learning_rate": 6.112604669781572e-07, "logits/chosen": -5.972808361053467, "logits/rejected": -6.015952110290527, "logps/chosen": -398.26953125, "logps/rejected": -342.5775146484375, "loss": 0.1486, "rewards/accuracies": 0.8125, "rewards/chosen": 3.0364255905151367, "rewards/margins": 8.178760528564453, "rewards/rejected": -5.142334938049316, "step": 1606 }, { "epoch": 0.89, "learning_rate": 6.10821998566416e-07, "logits/chosen": -6.034297943115234, "logits/rejected": -6.077381610870361, "logps/chosen": -178.36383056640625, "logps/rejected": -206.99319458007812, "loss": 0.1356, "rewards/accuracies": 0.9375, "rewards/chosen": 4.1765947341918945, "rewards/margins": 10.91254997253418, "rewards/rejected": -6.735956192016602, "step": 1607 }, { "epoch": 0.89, "learning_rate": 6.103834405091687e-07, "logits/chosen": -6.073627471923828, "logits/rejected": -5.9763264656066895, "logps/chosen": -198.65975952148438, "logps/rejected": -196.73388671875, "loss": 0.1625, "rewards/accuracies": 1.0, "rewards/chosen": 3.2910025119781494, "rewards/margins": 9.72872543334961, "rewards/rejected": -6.437723636627197, "step": 1608 }, { "epoch": 0.89, "learning_rate": 6.099447931611716e-07, "logits/chosen": -6.1006622314453125, "logits/rejected": -6.019248962402344, "logps/chosen": -272.0364685058594, "logps/rejected": -228.73544311523438, "loss": 0.0531, "rewards/accuracies": 1.0, "rewards/chosen": 4.6215362548828125, "rewards/margins": 9.888105392456055, "rewards/rejected": -5.266568183898926, "step": 1609 }, { "epoch": 0.89, "learning_rate": 6.095060568772523e-07, "logits/chosen": -5.9687418937683105, "logits/rejected": -6.059020042419434, "logps/chosen": -215.60928344726562, "logps/rejected": -431.5743103027344, "loss": 0.1306, "rewards/accuracies": 1.0, "rewards/chosen": 3.33670973777771, "rewards/margins": 9.8176851272583, "rewards/rejected": -6.480975151062012, "step": 1610 }, { "epoch": 0.89, "learning_rate": 6.090672320123113e-07, "logits/chosen": -6.085768699645996, "logits/rejected": -6.015437126159668, "logps/chosen": -249.66848754882812, "logps/rejected": -197.0887451171875, "loss": 0.2794, "rewards/accuracies": 0.875, "rewards/chosen": 2.87576961517334, "rewards/margins": 7.986615180969238, "rewards/rejected": -5.110846519470215, "step": 1611 }, { "epoch": 0.89, "learning_rate": 6.086283189213202e-07, "logits/chosen": -6.142833709716797, "logits/rejected": -6.158761024475098, "logps/chosen": -327.9089660644531, "logps/rejected": -213.78756713867188, "loss": 0.1103, "rewards/accuracies": 1.0, "rewards/chosen": 6.70697021484375, "rewards/margins": 12.172812461853027, "rewards/rejected": -5.465841293334961, "step": 1612 }, { "epoch": 0.9, "learning_rate": 6.08189317959322e-07, "logits/chosen": -6.073903560638428, "logits/rejected": -5.978496074676514, "logps/chosen": -471.262451171875, "logps/rejected": -305.10821533203125, "loss": 0.0916, "rewards/accuracies": 0.9375, "rewards/chosen": 3.8883132934570312, "rewards/margins": 9.285351753234863, "rewards/rejected": -5.397038459777832, "step": 1613 }, { "epoch": 0.9, "learning_rate": 6.077502294814311e-07, "logits/chosen": -6.074234962463379, "logits/rejected": -6.065969467163086, "logps/chosen": -263.7818908691406, "logps/rejected": -215.27810668945312, "loss": 0.0994, "rewards/accuracies": 0.9375, "rewards/chosen": 4.871700763702393, "rewards/margins": 9.275404930114746, "rewards/rejected": -4.403703689575195, "step": 1614 }, { "epoch": 0.9, "learning_rate": 6.073110538428322e-07, "logits/chosen": -6.127803325653076, "logits/rejected": -6.070075035095215, "logps/chosen": -235.57232666015625, "logps/rejected": -227.0072784423828, "loss": 0.1391, "rewards/accuracies": 0.9375, "rewards/chosen": 4.013041019439697, "rewards/margins": 9.492513656616211, "rewards/rejected": -5.479473114013672, "step": 1615 }, { "epoch": 0.9, "learning_rate": 6.06871791398781e-07, "logits/chosen": -6.029587745666504, "logits/rejected": -5.958592891693115, "logps/chosen": -493.381103515625, "logps/rejected": -382.1161804199219, "loss": 0.0611, "rewards/accuracies": 1.0, "rewards/chosen": 2.2593960762023926, "rewards/margins": 9.24366569519043, "rewards/rejected": -6.984270095825195, "step": 1616 }, { "epoch": 0.9, "learning_rate": 6.064324425046029e-07, "logits/chosen": -6.042539596557617, "logits/rejected": -5.987517833709717, "logps/chosen": -488.30059814453125, "logps/rejected": -396.47625732421875, "loss": 0.096, "rewards/accuracies": 0.875, "rewards/chosen": 3.3180785179138184, "rewards/margins": 8.014863014221191, "rewards/rejected": -4.696784019470215, "step": 1617 }, { "epoch": 0.9, "learning_rate": 6.05993007515694e-07, "logits/chosen": -6.011741638183594, "logits/rejected": -5.980724334716797, "logps/chosen": -428.1706237792969, "logps/rejected": -287.49127197265625, "loss": 0.0686, "rewards/accuracies": 1.0, "rewards/chosen": 6.733669281005859, "rewards/margins": 11.839519500732422, "rewards/rejected": -5.1058502197265625, "step": 1618 }, { "epoch": 0.9, "learning_rate": 6.055534867875193e-07, "logits/chosen": -6.1363067626953125, "logits/rejected": -6.114630222320557, "logps/chosen": -495.82037353515625, "logps/rejected": -287.06097412109375, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 4.290609359741211, "rewards/margins": 10.329893112182617, "rewards/rejected": -6.039283275604248, "step": 1619 }, { "epoch": 0.9, "learning_rate": 6.051138806756135e-07, "logits/chosen": -6.0335516929626465, "logits/rejected": -6.0314130783081055, "logps/chosen": -120.2205810546875, "logps/rejected": -177.537841796875, "loss": 0.046, "rewards/accuracies": 0.9375, "rewards/chosen": 2.3896267414093018, "rewards/margins": 7.616544723510742, "rewards/rejected": -5.2269182205200195, "step": 1620 }, { "epoch": 0.9, "learning_rate": 6.046741895355802e-07, "logits/chosen": -6.065083980560303, "logits/rejected": -6.046645641326904, "logps/chosen": -234.73280334472656, "logps/rejected": -190.07357788085938, "loss": 0.1385, "rewards/accuracies": 1.0, "rewards/chosen": 2.0291521549224854, "rewards/margins": 8.642096519470215, "rewards/rejected": -6.612943649291992, "step": 1621 }, { "epoch": 0.9, "learning_rate": 6.042344137230918e-07, "logits/chosen": -5.89671516418457, "logits/rejected": -5.946967124938965, "logps/chosen": -351.25811767578125, "logps/rejected": -258.0903625488281, "loss": 0.1331, "rewards/accuracies": 0.9375, "rewards/chosen": 2.8127212524414062, "rewards/margins": 5.4955339431762695, "rewards/rejected": -2.682812452316284, "step": 1622 }, { "epoch": 0.9, "learning_rate": 6.037945535938895e-07, "logits/chosen": -6.01467752456665, "logits/rejected": -6.081526756286621, "logps/chosen": -222.2176971435547, "logps/rejected": -191.98956298828125, "loss": 0.2028, "rewards/accuracies": 0.9375, "rewards/chosen": 3.6933095455169678, "rewards/margins": 9.116694450378418, "rewards/rejected": -5.423384189605713, "step": 1623 }, { "epoch": 0.9, "learning_rate": 6.033546095037824e-07, "logits/chosen": -5.940022945404053, "logits/rejected": -6.065454483032227, "logps/chosen": -183.17874145507812, "logps/rejected": -265.6029052734375, "loss": 0.2257, "rewards/accuracies": 1.0, "rewards/chosen": 3.478768825531006, "rewards/margins": 12.61890983581543, "rewards/rejected": -9.140141487121582, "step": 1624 }, { "epoch": 0.9, "learning_rate": 6.029145818086474e-07, "logits/chosen": -6.084329605102539, "logits/rejected": -6.0705246925354, "logps/chosen": -274.2096862792969, "logps/rejected": -253.0350341796875, "loss": 0.1483, "rewards/accuracies": 1.0, "rewards/chosen": 3.016253709793091, "rewards/margins": 11.207036972045898, "rewards/rejected": -8.190784454345703, "step": 1625 }, { "epoch": 0.9, "learning_rate": 6.024744708644297e-07, "logits/chosen": -6.2086687088012695, "logits/rejected": -6.233136177062988, "logps/chosen": -275.8097229003906, "logps/rejected": -204.7491912841797, "loss": 0.0859, "rewards/accuracies": 1.0, "rewards/chosen": 3.799752712249756, "rewards/margins": 9.599245071411133, "rewards/rejected": -5.799491882324219, "step": 1626 }, { "epoch": 0.9, "learning_rate": 6.020342770271408e-07, "logits/chosen": -6.057321548461914, "logits/rejected": -6.091474533081055, "logps/chosen": -234.22378540039062, "logps/rejected": -269.1127014160156, "loss": 0.0606, "rewards/accuracies": 1.0, "rewards/chosen": 3.6939053535461426, "rewards/margins": 8.041869163513184, "rewards/rejected": -4.347964286804199, "step": 1627 }, { "epoch": 0.9, "learning_rate": 6.015940006528601e-07, "logits/chosen": -6.063793182373047, "logits/rejected": -6.115207195281982, "logps/chosen": -160.39950561523438, "logps/rejected": -116.2717056274414, "loss": 0.0695, "rewards/accuracies": 1.0, "rewards/chosen": 1.767885446548462, "rewards/margins": 6.83184289932251, "rewards/rejected": -5.063958168029785, "step": 1628 }, { "epoch": 0.9, "learning_rate": 6.011536420977336e-07, "logits/chosen": -6.028460502624512, "logits/rejected": -6.106487274169922, "logps/chosen": -217.99232482910156, "logps/rejected": -278.57470703125, "loss": 0.111, "rewards/accuracies": 0.9375, "rewards/chosen": 2.8945047855377197, "rewards/margins": 9.748961448669434, "rewards/rejected": -6.854457855224609, "step": 1629 }, { "epoch": 0.9, "learning_rate": 6.007132017179732e-07, "logits/chosen": -6.023352146148682, "logits/rejected": -6.044873237609863, "logps/chosen": -334.9658508300781, "logps/rejected": -137.51332092285156, "loss": 0.0804, "rewards/accuracies": 0.9375, "rewards/chosen": 7.211991786956787, "rewards/margins": 9.292257308959961, "rewards/rejected": -2.0802652835845947, "step": 1630 }, { "epoch": 0.91, "learning_rate": 6.002726798698579e-07, "logits/chosen": -6.0746002197265625, "logits/rejected": -6.059895038604736, "logps/chosen": -258.83477783203125, "logps/rejected": -161.15859985351562, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/chosen": 4.54239559173584, "rewards/margins": 8.691537857055664, "rewards/rejected": -4.149142265319824, "step": 1631 }, { "epoch": 0.91, "learning_rate": 5.998320769097321e-07, "logits/chosen": -5.967121601104736, "logits/rejected": -6.022951126098633, "logps/chosen": -216.6639862060547, "logps/rejected": -285.82562255859375, "loss": 0.0906, "rewards/accuracies": 1.0, "rewards/chosen": 3.2947282791137695, "rewards/margins": 10.178252220153809, "rewards/rejected": -6.883523941040039, "step": 1632 }, { "epoch": 0.91, "learning_rate": 5.993913931940057e-07, "logits/chosen": -6.012048721313477, "logits/rejected": -5.997523307800293, "logps/chosen": -236.0823974609375, "logps/rejected": -236.4833526611328, "loss": 0.1071, "rewards/accuracies": 0.9375, "rewards/chosen": 4.207207202911377, "rewards/margins": 8.86679744720459, "rewards/rejected": -4.659590721130371, "step": 1633 }, { "epoch": 0.91, "learning_rate": 5.989506290791541e-07, "logits/chosen": -6.020510196685791, "logits/rejected": -6.048640727996826, "logps/chosen": -555.1685180664062, "logps/rejected": -374.5869140625, "loss": 0.0871, "rewards/accuracies": 1.0, "rewards/chosen": 5.521838665008545, "rewards/margins": 9.631673812866211, "rewards/rejected": -4.109835147857666, "step": 1634 }, { "epoch": 0.91, "learning_rate": 5.985097849217178e-07, "logits/chosen": -6.055747032165527, "logits/rejected": -6.161941051483154, "logps/chosen": -238.6415557861328, "logps/rejected": -212.042236328125, "loss": 0.0877, "rewards/accuracies": 1.0, "rewards/chosen": 2.9888925552368164, "rewards/margins": 9.149168968200684, "rewards/rejected": -6.160276412963867, "step": 1635 }, { "epoch": 0.91, "learning_rate": 5.980688610783019e-07, "logits/chosen": -6.075878143310547, "logits/rejected": -6.010372161865234, "logps/chosen": -149.08538818359375, "logps/rejected": -179.68212890625, "loss": 0.1643, "rewards/accuracies": 1.0, "rewards/chosen": 1.1617798805236816, "rewards/margins": 7.105556011199951, "rewards/rejected": -5.9437761306762695, "step": 1636 }, { "epoch": 0.91, "learning_rate": 5.976278579055761e-07, "logits/chosen": -6.094882965087891, "logits/rejected": -6.16562032699585, "logps/chosen": -238.01556396484375, "logps/rejected": -283.71490478515625, "loss": 0.0695, "rewards/accuracies": 1.0, "rewards/chosen": 4.010027885437012, "rewards/margins": 11.21702766418457, "rewards/rejected": -7.206998825073242, "step": 1637 }, { "epoch": 0.91, "learning_rate": 5.971867757602741e-07, "logits/chosen": -5.996158599853516, "logits/rejected": -6.10614538192749, "logps/chosen": -275.79278564453125, "logps/rejected": -187.58639526367188, "loss": 0.1331, "rewards/accuracies": 1.0, "rewards/chosen": 3.6472434997558594, "rewards/margins": 9.233357429504395, "rewards/rejected": -5.586113452911377, "step": 1638 }, { "epoch": 0.91, "learning_rate": 5.967456149991938e-07, "logits/chosen": -6.134365558624268, "logits/rejected": -6.046101093292236, "logps/chosen": -241.95089721679688, "logps/rejected": -225.4021453857422, "loss": 0.0951, "rewards/accuracies": 1.0, "rewards/chosen": 3.530853509902954, "rewards/margins": 10.726637840270996, "rewards/rejected": -7.195784568786621, "step": 1639 }, { "epoch": 0.91, "learning_rate": 5.963043759791961e-07, "logits/chosen": -6.004218578338623, "logits/rejected": -6.0791335105896, "logps/chosen": -270.5992736816406, "logps/rejected": -195.163818359375, "loss": 0.0667, "rewards/accuracies": 0.9375, "rewards/chosen": 3.6860318183898926, "rewards/margins": 8.342131614685059, "rewards/rejected": -4.656099319458008, "step": 1640 }, { "epoch": 0.91, "learning_rate": 5.95863059057206e-07, "logits/chosen": -6.022615909576416, "logits/rejected": -5.932972431182861, "logps/chosen": -280.1654357910156, "logps/rejected": -372.62750244140625, "loss": 0.1363, "rewards/accuracies": 0.9375, "rewards/chosen": 2.132091522216797, "rewards/margins": 6.959777355194092, "rewards/rejected": -4.827686309814453, "step": 1641 }, { "epoch": 0.91, "learning_rate": 5.954216645902108e-07, "logits/chosen": -6.078455924987793, "logits/rejected": -6.0672454833984375, "logps/chosen": -301.0235900878906, "logps/rejected": -148.93423461914062, "loss": 0.1176, "rewards/accuracies": 0.9375, "rewards/chosen": 5.460176467895508, "rewards/margins": 10.216410636901855, "rewards/rejected": -4.756234169006348, "step": 1642 }, { "epoch": 0.91, "learning_rate": 5.949801929352609e-07, "logits/chosen": -6.193999290466309, "logits/rejected": -6.050579071044922, "logps/chosen": -339.7799072265625, "logps/rejected": -219.99070739746094, "loss": 0.0502, "rewards/accuracies": 1.0, "rewards/chosen": 4.0853166580200195, "rewards/margins": 10.113027572631836, "rewards/rejected": -6.027710914611816, "step": 1643 }, { "epoch": 0.91, "learning_rate": 5.945386444494691e-07, "logits/chosen": -5.981635570526123, "logits/rejected": -6.059920310974121, "logps/chosen": -311.61798095703125, "logps/rejected": -219.57330322265625, "loss": 0.061, "rewards/accuracies": 1.0, "rewards/chosen": 5.559095859527588, "rewards/margins": 9.233779907226562, "rewards/rejected": -3.674685001373291, "step": 1644 }, { "epoch": 0.91, "learning_rate": 5.940970194900103e-07, "logits/chosen": -6.061506748199463, "logits/rejected": -6.025824546813965, "logps/chosen": -324.6025085449219, "logps/rejected": -294.1129150390625, "loss": 0.1632, "rewards/accuracies": 0.8125, "rewards/chosen": 3.855998992919922, "rewards/margins": 8.98115348815918, "rewards/rejected": -5.1251540184021, "step": 1645 }, { "epoch": 0.91, "learning_rate": 5.936553184141213e-07, "logits/chosen": -6.2022600173950195, "logits/rejected": -6.093095779418945, "logps/chosen": -256.3349609375, "logps/rejected": -159.40357971191406, "loss": 0.1239, "rewards/accuracies": 1.0, "rewards/chosen": 3.0930168628692627, "rewards/margins": 8.87504768371582, "rewards/rejected": -5.782031059265137, "step": 1646 }, { "epoch": 0.91, "learning_rate": 5.932135415791003e-07, "logits/chosen": -6.067758083343506, "logits/rejected": -5.983253002166748, "logps/chosen": -197.15855407714844, "logps/rejected": -200.95245361328125, "loss": 0.0845, "rewards/accuracies": 1.0, "rewards/chosen": 2.0170936584472656, "rewards/margins": 8.701469421386719, "rewards/rejected": -6.684374809265137, "step": 1647 }, { "epoch": 0.91, "learning_rate": 5.92771689342307e-07, "logits/chosen": -5.971281051635742, "logits/rejected": -6.0630364418029785, "logps/chosen": -208.57843017578125, "logps/rejected": -216.93145751953125, "loss": 0.1224, "rewards/accuracies": 1.0, "rewards/chosen": 3.203789710998535, "rewards/margins": 8.222496032714844, "rewards/rejected": -5.018706321716309, "step": 1648 }, { "epoch": 0.92, "learning_rate": 5.923297620611622e-07, "logits/chosen": -6.057746887207031, "logits/rejected": -6.091047763824463, "logps/chosen": -328.387939453125, "logps/rejected": -261.85736083984375, "loss": 0.1157, "rewards/accuracies": 0.875, "rewards/chosen": 3.4850666522979736, "rewards/margins": 8.6096773147583, "rewards/rejected": -5.1246113777160645, "step": 1649 }, { "epoch": 0.92, "learning_rate": 5.91887760093147e-07, "logits/chosen": -6.060610771179199, "logits/rejected": -6.105068683624268, "logps/chosen": -287.94317626953125, "logps/rejected": -233.1158905029297, "loss": 0.0992, "rewards/accuracies": 0.9375, "rewards/chosen": 5.931866645812988, "rewards/margins": 11.031133651733398, "rewards/rejected": -5.099267482757568, "step": 1650 }, { "epoch": 0.92, "learning_rate": 5.914456837958032e-07, "logits/chosen": -5.869077205657959, "logits/rejected": -6.008951663970947, "logps/chosen": -329.1220397949219, "logps/rejected": -275.0701904296875, "loss": 0.0487, "rewards/accuracies": 1.0, "rewards/chosen": 4.158830642700195, "rewards/margins": 9.101585388183594, "rewards/rejected": -4.942754745483398, "step": 1651 }, { "epoch": 0.92, "learning_rate": 5.910035335267326e-07, "logits/chosen": -5.925406455993652, "logits/rejected": -6.008334159851074, "logps/chosen": -367.3049621582031, "logps/rejected": -298.9161071777344, "loss": 0.1353, "rewards/accuracies": 0.9375, "rewards/chosen": 3.3028454780578613, "rewards/margins": 8.268807411193848, "rewards/rejected": -4.965961933135986, "step": 1652 }, { "epoch": 0.92, "learning_rate": 5.905613096435971e-07, "logits/chosen": -6.020551681518555, "logits/rejected": -6.12918758392334, "logps/chosen": -288.51971435546875, "logps/rejected": -232.07142639160156, "loss": 0.0495, "rewards/accuracies": 0.9375, "rewards/chosen": 4.05113410949707, "rewards/margins": 7.991847991943359, "rewards/rejected": -3.9407143592834473, "step": 1653 }, { "epoch": 0.92, "learning_rate": 5.901190125041178e-07, "logits/chosen": -6.088929176330566, "logits/rejected": -6.075222969055176, "logps/chosen": -214.83291625976562, "logps/rejected": -151.6226806640625, "loss": 0.1149, "rewards/accuracies": 1.0, "rewards/chosen": 3.032721996307373, "rewards/margins": 8.28334903717041, "rewards/rejected": -5.250627517700195, "step": 1654 }, { "epoch": 0.92, "learning_rate": 5.896766424660751e-07, "logits/chosen": -5.931238174438477, "logits/rejected": -6.022449493408203, "logps/chosen": -227.34625244140625, "logps/rejected": -121.14425659179688, "loss": 0.0921, "rewards/accuracies": 1.0, "rewards/chosen": 3.4976446628570557, "rewards/margins": 10.24337387084961, "rewards/rejected": -6.745728969573975, "step": 1655 }, { "epoch": 0.92, "learning_rate": 5.892341998873088e-07, "logits/chosen": -6.099121570587158, "logits/rejected": -6.05796480178833, "logps/chosen": -472.5213928222656, "logps/rejected": -376.2942199707031, "loss": 0.1056, "rewards/accuracies": 0.875, "rewards/chosen": 2.4506258964538574, "rewards/margins": 8.963090896606445, "rewards/rejected": -6.512465476989746, "step": 1656 }, { "epoch": 0.92, "learning_rate": 5.887916851257166e-07, "logits/chosen": -6.025374889373779, "logits/rejected": -6.0087080001831055, "logps/chosen": -206.59649658203125, "logps/rejected": -188.548828125, "loss": 0.0502, "rewards/accuracies": 0.9375, "rewards/chosen": 3.4953083992004395, "rewards/margins": 7.6685285568237305, "rewards/rejected": -4.173219680786133, "step": 1657 }, { "epoch": 0.92, "learning_rate": 5.883490985392556e-07, "logits/chosen": -6.033034324645996, "logits/rejected": -6.112205505371094, "logps/chosen": -234.26513671875, "logps/rejected": -272.91015625, "loss": 0.0854, "rewards/accuracies": 0.9375, "rewards/chosen": 3.1062934398651123, "rewards/margins": 10.016412734985352, "rewards/rejected": -6.910120010375977, "step": 1658 }, { "epoch": 0.92, "learning_rate": 5.879064404859399e-07, "logits/chosen": -5.976876735687256, "logits/rejected": -6.0763959884643555, "logps/chosen": -189.74343872070312, "logps/rejected": -313.7462158203125, "loss": 0.1052, "rewards/accuracies": 0.75, "rewards/chosen": 1.6823885440826416, "rewards/margins": 9.751112937927246, "rewards/rejected": -8.068723678588867, "step": 1659 }, { "epoch": 0.92, "learning_rate": 5.874637113238426e-07, "logits/chosen": -6.004329681396484, "logits/rejected": -6.10116720199585, "logps/chosen": -231.22364807128906, "logps/rejected": -237.12628173828125, "loss": 0.1842, "rewards/accuracies": 1.0, "rewards/chosen": 3.7086966037750244, "rewards/margins": 10.150426864624023, "rewards/rejected": -6.44173002243042, "step": 1660 }, { "epoch": 0.92, "learning_rate": 5.870209114110929e-07, "logits/chosen": -6.2073588371276855, "logits/rejected": -6.036334037780762, "logps/chosen": -249.74461364746094, "logps/rejected": -128.74774169921875, "loss": 0.174, "rewards/accuracies": 0.9375, "rewards/chosen": 4.118016719818115, "rewards/margins": 9.922861099243164, "rewards/rejected": -5.804844856262207, "step": 1661 }, { "epoch": 0.92, "learning_rate": 5.865780411058785e-07, "logits/chosen": -6.084383487701416, "logits/rejected": -6.057465553283691, "logps/chosen": -231.67796325683594, "logps/rejected": -194.52362060546875, "loss": 0.0758, "rewards/accuracies": 1.0, "rewards/chosen": 3.3569703102111816, "rewards/margins": 9.410107612609863, "rewards/rejected": -6.05313777923584, "step": 1662 }, { "epoch": 0.92, "learning_rate": 5.861351007664433e-07, "logits/chosen": -6.035946369171143, "logits/rejected": -6.051331996917725, "logps/chosen": -328.6910400390625, "logps/rejected": -228.83966064453125, "loss": 0.198, "rewards/accuracies": 0.9375, "rewards/chosen": 3.133678674697876, "rewards/margins": 9.727907180786133, "rewards/rejected": -6.594228267669678, "step": 1663 }, { "epoch": 0.92, "learning_rate": 5.856920907510885e-07, "logits/chosen": -6.104483604431152, "logits/rejected": -6.183267116546631, "logps/chosen": -212.4864959716797, "logps/rejected": -286.6375732421875, "loss": 0.0628, "rewards/accuracies": 0.9375, "rewards/chosen": 3.366835355758667, "rewards/margins": 9.879018783569336, "rewards/rejected": -6.51218318939209, "step": 1664 }, { "epoch": 0.92, "learning_rate": 5.852490114181709e-07, "logits/chosen": -5.895264625549316, "logits/rejected": -6.006694793701172, "logps/chosen": -245.44886779785156, "logps/rejected": -213.74847412109375, "loss": 0.0713, "rewards/accuracies": 1.0, "rewards/chosen": 3.950211524963379, "rewards/margins": 8.616202354431152, "rewards/rejected": -4.665990352630615, "step": 1665 }, { "epoch": 0.92, "learning_rate": 5.848058631261038e-07, "logits/chosen": -6.158617973327637, "logits/rejected": -6.149113178253174, "logps/chosen": -262.5713195800781, "logps/rejected": -241.07415771484375, "loss": 0.1072, "rewards/accuracies": 0.9375, "rewards/chosen": 3.7070162296295166, "rewards/margins": 10.619071960449219, "rewards/rejected": -6.912055492401123, "step": 1666 }, { "epoch": 0.93, "learning_rate": 5.843626462333563e-07, "logits/chosen": -6.029292106628418, "logits/rejected": -5.979700565338135, "logps/chosen": -390.9122314453125, "logps/rejected": -326.727783203125, "loss": 0.1567, "rewards/accuracies": 0.875, "rewards/chosen": 3.799527168273926, "rewards/margins": 8.970231056213379, "rewards/rejected": -5.170703411102295, "step": 1667 }, { "epoch": 0.93, "learning_rate": 5.839193610984529e-07, "logits/chosen": -6.062492847442627, "logits/rejected": -5.9984636306762695, "logps/chosen": -244.764892578125, "logps/rejected": -166.06802368164062, "loss": 0.13, "rewards/accuracies": 1.0, "rewards/chosen": 2.411555528640747, "rewards/margins": 8.516819953918457, "rewards/rejected": -6.105264663696289, "step": 1668 }, { "epoch": 0.93, "learning_rate": 5.834760080799734e-07, "logits/chosen": -6.103978157043457, "logits/rejected": -5.988402366638184, "logps/chosen": -229.5499267578125, "logps/rejected": -117.00796508789062, "loss": 0.1134, "rewards/accuracies": 1.0, "rewards/chosen": 3.6762351989746094, "rewards/margins": 9.743535995483398, "rewards/rejected": -6.0673017501831055, "step": 1669 }, { "epoch": 0.93, "learning_rate": 5.83032587536552e-07, "logits/chosen": -6.053295612335205, "logits/rejected": -5.982060432434082, "logps/chosen": -268.34613037109375, "logps/rejected": -216.6476593017578, "loss": 0.0776, "rewards/accuracies": 1.0, "rewards/chosen": 2.8542513847351074, "rewards/margins": 11.025167465209961, "rewards/rejected": -8.170916557312012, "step": 1670 }, { "epoch": 0.93, "learning_rate": 5.825890998268783e-07, "logits/chosen": -6.001538276672363, "logits/rejected": -6.013491630554199, "logps/chosen": -366.33062744140625, "logps/rejected": -268.1955261230469, "loss": 0.0703, "rewards/accuracies": 0.9375, "rewards/chosen": 5.483433246612549, "rewards/margins": 11.683982849121094, "rewards/rejected": -6.2005486488342285, "step": 1671 }, { "epoch": 0.93, "learning_rate": 5.821455453096959e-07, "logits/chosen": -6.066564083099365, "logits/rejected": -6.094306468963623, "logps/chosen": -215.66876220703125, "logps/rejected": -188.31185913085938, "loss": 0.0482, "rewards/accuracies": 1.0, "rewards/chosen": 4.559203147888184, "rewards/margins": 12.48423957824707, "rewards/rejected": -7.9250359535217285, "step": 1672 }, { "epoch": 0.93, "learning_rate": 5.817019243438024e-07, "logits/chosen": -5.925924301147461, "logits/rejected": -5.996199607849121, "logps/chosen": -223.0858612060547, "logps/rejected": -261.86285400390625, "loss": 0.0699, "rewards/accuracies": 1.0, "rewards/chosen": 3.190077781677246, "rewards/margins": 9.126471519470215, "rewards/rejected": -5.936394214630127, "step": 1673 }, { "epoch": 0.93, "learning_rate": 5.81258237288049e-07, "logits/chosen": -6.114505290985107, "logits/rejected": -6.126010417938232, "logps/chosen": -264.5419006347656, "logps/rejected": -265.7200622558594, "loss": 0.0597, "rewards/accuracies": 1.0, "rewards/chosen": 2.4740843772888184, "rewards/margins": 11.49656867980957, "rewards/rejected": -9.022483825683594, "step": 1674 }, { "epoch": 0.93, "learning_rate": 5.808144845013407e-07, "logits/chosen": -6.084721565246582, "logits/rejected": -6.1787285804748535, "logps/chosen": -282.2773132324219, "logps/rejected": -295.3731689453125, "loss": 0.067, "rewards/accuracies": 1.0, "rewards/chosen": 3.6453847885131836, "rewards/margins": 9.424337387084961, "rewards/rejected": -5.778952598571777, "step": 1675 }, { "epoch": 0.93, "learning_rate": 5.803706663426354e-07, "logits/chosen": -6.04716682434082, "logits/rejected": -6.0968017578125, "logps/chosen": -275.8276672363281, "logps/rejected": -232.69100952148438, "loss": 0.0912, "rewards/accuracies": 0.875, "rewards/chosen": 5.1754069328308105, "rewards/margins": 10.37460708618164, "rewards/rejected": -5.19920015335083, "step": 1676 }, { "epoch": 0.93, "learning_rate": 5.799267831709442e-07, "logits/chosen": -6.0403733253479, "logits/rejected": -6.1029157638549805, "logps/chosen": -216.3463592529297, "logps/rejected": -230.60069274902344, "loss": 0.0884, "rewards/accuracies": 0.9375, "rewards/chosen": 3.2855989933013916, "rewards/margins": 8.978994369506836, "rewards/rejected": -5.693395614624023, "step": 1677 }, { "epoch": 0.93, "learning_rate": 5.7948283534533e-07, "logits/chosen": -6.019827842712402, "logits/rejected": -6.107773780822754, "logps/chosen": -372.298095703125, "logps/rejected": -296.79095458984375, "loss": 0.1792, "rewards/accuracies": 1.0, "rewards/chosen": 5.941802024841309, "rewards/margins": 10.605664253234863, "rewards/rejected": -4.663862228393555, "step": 1678 }, { "epoch": 0.93, "learning_rate": 5.790388232249093e-07, "logits/chosen": -6.107993125915527, "logits/rejected": -6.112421035766602, "logps/chosen": -301.4957275390625, "logps/rejected": -302.63665771484375, "loss": 0.1135, "rewards/accuracies": 1.0, "rewards/chosen": 4.264251232147217, "rewards/margins": 11.632864952087402, "rewards/rejected": -7.3686137199401855, "step": 1679 }, { "epoch": 0.93, "learning_rate": 5.785947471688495e-07, "logits/chosen": -5.9881439208984375, "logits/rejected": -6.00907039642334, "logps/chosen": -313.40423583984375, "logps/rejected": -406.77850341796875, "loss": 0.1372, "rewards/accuracies": 0.8125, "rewards/chosen": 2.910884141921997, "rewards/margins": 7.313832759857178, "rewards/rejected": -4.40294885635376, "step": 1680 }, { "epoch": 0.93, "learning_rate": 5.781506075363702e-07, "logits/chosen": -6.045585632324219, "logits/rejected": -6.045320510864258, "logps/chosen": -299.12335205078125, "logps/rejected": -223.4322052001953, "loss": 0.0517, "rewards/accuracies": 0.9375, "rewards/chosen": 3.6742660999298096, "rewards/margins": 10.722770690917969, "rewards/rejected": -7.04850435256958, "step": 1681 }, { "epoch": 0.93, "learning_rate": 5.77706404686742e-07, "logits/chosen": -6.127384662628174, "logits/rejected": -6.007124900817871, "logps/chosen": -269.5698547363281, "logps/rejected": -150.7200164794922, "loss": 0.1037, "rewards/accuracies": 0.875, "rewards/chosen": 6.553863048553467, "rewards/margins": 10.566854476928711, "rewards/rejected": -4.012990474700928, "step": 1682 }, { "epoch": 0.93, "learning_rate": 5.772621389792874e-07, "logits/chosen": -6.033136367797852, "logits/rejected": -6.010811805725098, "logps/chosen": -351.0094299316406, "logps/rejected": -169.36257934570312, "loss": 0.1459, "rewards/accuracies": 0.9375, "rewards/chosen": 6.1614484786987305, "rewards/margins": 10.550751686096191, "rewards/rejected": -4.389302730560303, "step": 1683 }, { "epoch": 0.93, "learning_rate": 5.76817810773379e-07, "logits/chosen": -6.188190460205078, "logits/rejected": -6.135143280029297, "logps/chosen": -315.7386474609375, "logps/rejected": -191.27687072753906, "loss": 0.0607, "rewards/accuracies": 0.9375, "rewards/chosen": 4.779800891876221, "rewards/margins": 9.996560096740723, "rewards/rejected": -5.21675968170166, "step": 1684 }, { "epoch": 0.94, "learning_rate": 5.763734204284406e-07, "logits/chosen": -5.983306884765625, "logits/rejected": -6.032714366912842, "logps/chosen": -292.3626708984375, "logps/rejected": -230.3086700439453, "loss": 0.112, "rewards/accuracies": 0.9375, "rewards/chosen": 2.039560556411743, "rewards/margins": 9.374382972717285, "rewards/rejected": -7.334822654724121, "step": 1685 }, { "epoch": 0.94, "learning_rate": 5.759289683039458e-07, "logits/chosen": -6.00457239151001, "logits/rejected": -6.106083869934082, "logps/chosen": -264.59149169921875, "logps/rejected": -250.4671630859375, "loss": 0.1364, "rewards/accuracies": 1.0, "rewards/chosen": 3.9344682693481445, "rewards/margins": 12.711753845214844, "rewards/rejected": -8.7772855758667, "step": 1686 }, { "epoch": 0.94, "learning_rate": 5.754844547594181e-07, "logits/chosen": -5.988086700439453, "logits/rejected": -6.091413974761963, "logps/chosen": -247.50717163085938, "logps/rejected": -269.55084228515625, "loss": 0.1784, "rewards/accuracies": 0.875, "rewards/chosen": 3.9273130893707275, "rewards/margins": 9.449560165405273, "rewards/rejected": -5.522247314453125, "step": 1687 }, { "epoch": 0.94, "learning_rate": 5.75039880154431e-07, "logits/chosen": -5.948085784912109, "logits/rejected": -6.01035737991333, "logps/chosen": -213.41136169433594, "logps/rejected": -245.03778076171875, "loss": 0.0897, "rewards/accuracies": 0.9375, "rewards/chosen": 3.765791177749634, "rewards/margins": 9.150490760803223, "rewards/rejected": -5.384699821472168, "step": 1688 }, { "epoch": 0.94, "learning_rate": 5.745952448486073e-07, "logits/chosen": -6.027675628662109, "logits/rejected": -6.056613922119141, "logps/chosen": -299.7242431640625, "logps/rejected": -251.724609375, "loss": 0.1116, "rewards/accuracies": 1.0, "rewards/chosen": 4.050775051116943, "rewards/margins": 8.59959602355957, "rewards/rejected": -4.548820972442627, "step": 1689 }, { "epoch": 0.94, "learning_rate": 5.741505492016191e-07, "logits/chosen": -6.068517208099365, "logits/rejected": -5.978466033935547, "logps/chosen": -275.3399658203125, "logps/rejected": -137.3173828125, "loss": 0.1384, "rewards/accuracies": 1.0, "rewards/chosen": 4.242453575134277, "rewards/margins": 9.378767013549805, "rewards/rejected": -5.136313438415527, "step": 1690 }, { "epoch": 0.94, "learning_rate": 5.737057935731867e-07, "logits/chosen": -5.933292388916016, "logits/rejected": -6.015089988708496, "logps/chosen": -244.66958618164062, "logps/rejected": -307.3807373046875, "loss": 0.1797, "rewards/accuracies": 0.9375, "rewards/chosen": 2.230458974838257, "rewards/margins": 6.73511266708374, "rewards/rejected": -4.504653453826904, "step": 1691 }, { "epoch": 0.94, "learning_rate": 5.732609783230796e-07, "logits/chosen": -6.020136833190918, "logits/rejected": -5.9978766441345215, "logps/chosen": -190.59173583984375, "logps/rejected": -192.9050750732422, "loss": 0.0728, "rewards/accuracies": 0.875, "rewards/chosen": 1.9744515419006348, "rewards/margins": 8.589170455932617, "rewards/rejected": -6.614718437194824, "step": 1692 }, { "epoch": 0.94, "learning_rate": 5.72816103811115e-07, "logits/chosen": -6.20889139175415, "logits/rejected": -6.129967212677002, "logps/chosen": -278.4263000488281, "logps/rejected": -226.95803833007812, "loss": 0.1199, "rewards/accuracies": 1.0, "rewards/chosen": 4.271392822265625, "rewards/margins": 10.851140975952148, "rewards/rejected": -6.579748153686523, "step": 1693 }, { "epoch": 0.94, "learning_rate": 5.723711703971588e-07, "logits/chosen": -6.105234146118164, "logits/rejected": -6.088257789611816, "logps/chosen": -206.6703643798828, "logps/rejected": -146.031494140625, "loss": 0.1028, "rewards/accuracies": 0.9375, "rewards/chosen": 4.285841941833496, "rewards/margins": 7.564861297607422, "rewards/rejected": -3.2790191173553467, "step": 1694 }, { "epoch": 0.94, "learning_rate": 5.719261784411232e-07, "logits/chosen": -6.16725492477417, "logits/rejected": -6.020571708679199, "logps/chosen": -358.14373779296875, "logps/rejected": -138.42141723632812, "loss": 0.0504, "rewards/accuracies": 0.9375, "rewards/chosen": 6.429331302642822, "rewards/margins": 10.822843551635742, "rewards/rejected": -4.39351224899292, "step": 1695 }, { "epoch": 0.94, "learning_rate": 5.714811283029694e-07, "logits/chosen": -5.997410297393799, "logits/rejected": -5.949276924133301, "logps/chosen": -195.60508728027344, "logps/rejected": -240.2404022216797, "loss": 0.0479, "rewards/accuracies": 0.9375, "rewards/chosen": 2.4760560989379883, "rewards/margins": 9.150813102722168, "rewards/rejected": -6.67475700378418, "step": 1696 }, { "epoch": 0.94, "learning_rate": 5.710360203427043e-07, "logits/chosen": -6.103306770324707, "logits/rejected": -6.105691909790039, "logps/chosen": -248.89801025390625, "logps/rejected": -225.83343505859375, "loss": 0.1192, "rewards/accuracies": 0.9375, "rewards/chosen": 3.466752052307129, "rewards/margins": 10.280374526977539, "rewards/rejected": -6.813622951507568, "step": 1697 }, { "epoch": 0.94, "learning_rate": 5.705908549203822e-07, "logits/chosen": -6.05153751373291, "logits/rejected": -6.087894916534424, "logps/chosen": -223.52183532714844, "logps/rejected": -268.2416687011719, "loss": 0.1021, "rewards/accuracies": 0.9375, "rewards/chosen": 2.131145477294922, "rewards/margins": 7.828208923339844, "rewards/rejected": -5.697063446044922, "step": 1698 }, { "epoch": 0.94, "learning_rate": 5.701456323961041e-07, "logits/chosen": -5.998207092285156, "logits/rejected": -6.037786960601807, "logps/chosen": -263.532470703125, "logps/rejected": -258.3464050292969, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/chosen": 4.18204927444458, "rewards/margins": 9.48819351196289, "rewards/rejected": -5.306143283843994, "step": 1699 }, { "epoch": 0.94, "learning_rate": 5.697003531300167e-07, "logits/chosen": -6.024164199829102, "logits/rejected": -5.977136135101318, "logps/chosen": -178.07638549804688, "logps/rejected": -170.05552673339844, "loss": 0.1027, "rewards/accuracies": 0.9375, "rewards/chosen": 3.7530224323272705, "rewards/margins": 8.313206672668457, "rewards/rejected": -4.560184478759766, "step": 1700 }, { "epoch": 0.94, "learning_rate": 5.692550174823128e-07, "logits/chosen": -6.075284957885742, "logits/rejected": -6.112494468688965, "logps/chosen": -270.1612854003906, "logps/rejected": -270.247802734375, "loss": 0.0782, "rewards/accuracies": 1.0, "rewards/chosen": 3.896007537841797, "rewards/margins": 10.17474365234375, "rewards/rejected": -6.278736114501953, "step": 1701 }, { "epoch": 0.94, "learning_rate": 5.688096258132309e-07, "logits/chosen": -6.004752159118652, "logits/rejected": -6.025112628936768, "logps/chosen": -303.37152099609375, "logps/rejected": -212.45809936523438, "loss": 0.1483, "rewards/accuracies": 1.0, "rewards/chosen": 6.0625081062316895, "rewards/margins": 10.830415725708008, "rewards/rejected": -4.767908096313477, "step": 1702 }, { "epoch": 0.95, "learning_rate": 5.683641784830544e-07, "logits/chosen": -5.983229637145996, "logits/rejected": -6.0156097412109375, "logps/chosen": -559.2811279296875, "logps/rejected": -259.67578125, "loss": 0.0513, "rewards/accuracies": 0.9375, "rewards/chosen": 4.900753021240234, "rewards/margins": 12.244871139526367, "rewards/rejected": -7.344118595123291, "step": 1703 }, { "epoch": 0.95, "learning_rate": 5.679186758521124e-07, "logits/chosen": -6.076374053955078, "logits/rejected": -6.0754241943359375, "logps/chosen": -318.7716064453125, "logps/rejected": -227.55563354492188, "loss": 0.0825, "rewards/accuracies": 0.9375, "rewards/chosen": 6.161821365356445, "rewards/margins": 12.475669860839844, "rewards/rejected": -6.313848495483398, "step": 1704 }, { "epoch": 0.95, "learning_rate": 5.67473118280778e-07, "logits/chosen": -6.018278121948242, "logits/rejected": -6.1088151931762695, "logps/chosen": -232.03890991210938, "logps/rejected": -240.87831115722656, "loss": 0.0942, "rewards/accuracies": 0.9375, "rewards/chosen": 3.7984120845794678, "rewards/margins": 9.803409576416016, "rewards/rejected": -6.0049967765808105, "step": 1705 }, { "epoch": 0.95, "learning_rate": 5.670275061294696e-07, "logits/chosen": -5.993927001953125, "logits/rejected": -6.068212509155273, "logps/chosen": -250.81671142578125, "logps/rejected": -197.90719604492188, "loss": 0.0618, "rewards/accuracies": 1.0, "rewards/chosen": 6.616723537445068, "rewards/margins": 10.986177444458008, "rewards/rejected": -4.369453430175781, "step": 1706 }, { "epoch": 0.95, "learning_rate": 5.665818397586491e-07, "logits/chosen": -6.038577556610107, "logits/rejected": -6.017710208892822, "logps/chosen": -250.56527709960938, "logps/rejected": -238.33526611328125, "loss": 0.0549, "rewards/accuracies": 0.9375, "rewards/chosen": 3.084714889526367, "rewards/margins": 9.492712020874023, "rewards/rejected": -6.4079976081848145, "step": 1707 }, { "epoch": 0.95, "learning_rate": 5.661361195288222e-07, "logits/chosen": -6.052594184875488, "logits/rejected": -6.061879634857178, "logps/chosen": -244.0168914794922, "logps/rejected": -391.445068359375, "loss": 0.1273, "rewards/accuracies": 0.9375, "rewards/chosen": 3.049630880355835, "rewards/margins": 10.026989936828613, "rewards/rejected": -6.977359771728516, "step": 1708 }, { "epoch": 0.95, "learning_rate": 5.656903458005384e-07, "logits/chosen": -6.060481548309326, "logits/rejected": -6.037837028503418, "logps/chosen": -286.1758117675781, "logps/rejected": -240.4217529296875, "loss": 0.0731, "rewards/accuracies": 1.0, "rewards/chosen": 4.923750400543213, "rewards/margins": 10.941208839416504, "rewards/rejected": -6.017458915710449, "step": 1709 }, { "epoch": 0.95, "learning_rate": 5.652445189343908e-07, "logits/chosen": -6.060173034667969, "logits/rejected": -6.055638313293457, "logps/chosen": -214.8919677734375, "logps/rejected": -222.76364135742188, "loss": 0.0896, "rewards/accuracies": 0.9375, "rewards/chosen": 2.1366214752197266, "rewards/margins": 9.474306106567383, "rewards/rejected": -7.337684631347656, "step": 1710 }, { "epoch": 0.95, "learning_rate": 5.647986392910149e-07, "logits/chosen": -6.065427780151367, "logits/rejected": -6.003887176513672, "logps/chosen": -377.794921875, "logps/rejected": -267.34893798828125, "loss": 0.1546, "rewards/accuracies": 1.0, "rewards/chosen": 2.991891384124756, "rewards/margins": 9.204052925109863, "rewards/rejected": -6.212161540985107, "step": 1711 }, { "epoch": 0.95, "learning_rate": 5.64352707231089e-07, "logits/chosen": -6.079336166381836, "logits/rejected": -6.113300800323486, "logps/chosen": -212.64820861816406, "logps/rejected": -230.4220733642578, "loss": 0.0966, "rewards/accuracies": 0.9375, "rewards/chosen": 3.4550843238830566, "rewards/margins": 9.937385559082031, "rewards/rejected": -6.482301712036133, "step": 1712 }, { "epoch": 0.95, "learning_rate": 5.639067231153345e-07, "logits/chosen": -6.07692813873291, "logits/rejected": -5.988956928253174, "logps/chosen": -289.99652099609375, "logps/rejected": -155.18746948242188, "loss": 0.1449, "rewards/accuracies": 1.0, "rewards/chosen": 6.29909610748291, "rewards/margins": 9.552812576293945, "rewards/rejected": -3.253716230392456, "step": 1713 }, { "epoch": 0.95, "learning_rate": 5.63460687304514e-07, "logits/chosen": -6.060111045837402, "logits/rejected": -6.06573486328125, "logps/chosen": -203.68896484375, "logps/rejected": -251.35052490234375, "loss": 0.0813, "rewards/accuracies": 0.9375, "rewards/chosen": 2.0316240787506104, "rewards/margins": 9.575860023498535, "rewards/rejected": -7.544236183166504, "step": 1714 }, { "epoch": 0.95, "learning_rate": 5.630146001594321e-07, "logits/chosen": -6.0594868659973145, "logits/rejected": -5.968243598937988, "logps/chosen": -248.15789794921875, "logps/rejected": -108.33037567138672, "loss": 0.1131, "rewards/accuracies": 1.0, "rewards/chosen": 5.890284061431885, "rewards/margins": 9.787765502929688, "rewards/rejected": -3.8974807262420654, "step": 1715 }, { "epoch": 0.95, "learning_rate": 5.625684620409353e-07, "logits/chosen": -6.100727081298828, "logits/rejected": -6.031149387359619, "logps/chosen": -232.20535278320312, "logps/rejected": -186.85635375976562, "loss": 0.2176, "rewards/accuracies": 1.0, "rewards/chosen": 5.278368949890137, "rewards/margins": 8.774920463562012, "rewards/rejected": -3.496551036834717, "step": 1716 }, { "epoch": 0.95, "learning_rate": 5.621222733099111e-07, "logits/chosen": -5.979246139526367, "logits/rejected": -5.986502170562744, "logps/chosen": -181.61614990234375, "logps/rejected": -126.36569213867188, "loss": 0.1064, "rewards/accuracies": 1.0, "rewards/chosen": 3.948385238647461, "rewards/margins": 7.542776107788086, "rewards/rejected": -3.594390392303467, "step": 1717 }, { "epoch": 0.95, "learning_rate": 5.61676034327288e-07, "logits/chosen": -6.121935844421387, "logits/rejected": -5.968808650970459, "logps/chosen": -285.17437744140625, "logps/rejected": -217.25189208984375, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/chosen": 3.6432108879089355, "rewards/margins": 11.300409317016602, "rewards/rejected": -7.657198905944824, "step": 1718 }, { "epoch": 0.95, "learning_rate": 5.612297454540351e-07, "logits/chosen": -6.021277904510498, "logits/rejected": -5.986725330352783, "logps/chosen": -263.98590087890625, "logps/rejected": -243.15145874023438, "loss": 0.0441, "rewards/accuracies": 1.0, "rewards/chosen": 2.5184102058410645, "rewards/margins": 10.340777397155762, "rewards/rejected": -7.822366714477539, "step": 1719 }, { "epoch": 0.95, "learning_rate": 5.607834070511619e-07, "logits/chosen": -6.230771064758301, "logits/rejected": -6.1452507972717285, "logps/chosen": -235.73257446289062, "logps/rejected": -184.9095001220703, "loss": 0.124, "rewards/accuracies": 1.0, "rewards/chosen": 3.789754867553711, "rewards/margins": 11.347652435302734, "rewards/rejected": -7.557898044586182, "step": 1720 }, { "epoch": 0.96, "learning_rate": 5.60337019479718e-07, "logits/chosen": -6.093498229980469, "logits/rejected": -6.050182342529297, "logps/chosen": -235.94192504882812, "logps/rejected": -243.06466674804688, "loss": 0.0576, "rewards/accuracies": 0.9375, "rewards/chosen": 3.772261619567871, "rewards/margins": 9.778432846069336, "rewards/rejected": -6.006170749664307, "step": 1721 }, { "epoch": 0.96, "learning_rate": 5.598905831007925e-07, "logits/chosen": -6.116461753845215, "logits/rejected": -6.0691680908203125, "logps/chosen": -209.3686981201172, "logps/rejected": -178.42250061035156, "loss": 0.1182, "rewards/accuracies": 1.0, "rewards/chosen": 2.8810012340545654, "rewards/margins": 9.460474014282227, "rewards/rejected": -6.579473495483398, "step": 1722 }, { "epoch": 0.96, "learning_rate": 5.594440982755145e-07, "logits/chosen": -5.999689102172852, "logits/rejected": -6.095966815948486, "logps/chosen": -257.3532409667969, "logps/rejected": -275.64581298828125, "loss": 0.2144, "rewards/accuracies": 1.0, "rewards/chosen": 5.335100173950195, "rewards/margins": 12.27045726776123, "rewards/rejected": -6.935357093811035, "step": 1723 }, { "epoch": 0.96, "learning_rate": 5.589975653650516e-07, "logits/chosen": -5.980070114135742, "logits/rejected": -5.9726386070251465, "logps/chosen": -318.661865234375, "logps/rejected": -172.4778289794922, "loss": 0.0697, "rewards/accuracies": 1.0, "rewards/chosen": 6.651203632354736, "rewards/margins": 9.698352813720703, "rewards/rejected": -3.047149658203125, "step": 1724 }, { "epoch": 0.96, "learning_rate": 5.585509847306111e-07, "logits/chosen": -6.066951751708984, "logits/rejected": -6.036312103271484, "logps/chosen": -386.8587951660156, "logps/rejected": -214.55093383789062, "loss": 0.1109, "rewards/accuracies": 0.875, "rewards/chosen": 2.414067029953003, "rewards/margins": 8.106562614440918, "rewards/rejected": -5.692495346069336, "step": 1725 }, { "epoch": 0.96, "learning_rate": 5.581043567334382e-07, "logits/chosen": -6.155275821685791, "logits/rejected": -6.026451110839844, "logps/chosen": -291.753173828125, "logps/rejected": -167.1681365966797, "loss": 0.0711, "rewards/accuracies": 1.0, "rewards/chosen": 5.618697166442871, "rewards/margins": 11.622634887695312, "rewards/rejected": -6.003937721252441, "step": 1726 }, { "epoch": 0.96, "learning_rate": 5.576576817348168e-07, "logits/chosen": -5.955081939697266, "logits/rejected": -6.065281391143799, "logps/chosen": -308.8512268066406, "logps/rejected": -282.62640380859375, "loss": 0.1154, "rewards/accuracies": 0.875, "rewards/chosen": 1.6788594722747803, "rewards/margins": 6.4368977546691895, "rewards/rejected": -4.758038520812988, "step": 1727 }, { "epoch": 0.96, "learning_rate": 5.572109600960689e-07, "logits/chosen": -5.937175273895264, "logits/rejected": -5.978873252868652, "logps/chosen": -166.73675537109375, "logps/rejected": -100.77214813232422, "loss": 0.0597, "rewards/accuracies": 1.0, "rewards/chosen": 2.46315860748291, "rewards/margins": 7.457557678222656, "rewards/rejected": -4.994398593902588, "step": 1728 }, { "epoch": 0.96, "learning_rate": 5.567641921785536e-07, "logits/chosen": -5.960906505584717, "logits/rejected": -6.011504173278809, "logps/chosen": -172.11474609375, "logps/rejected": -242.76968383789062, "loss": 0.1224, "rewards/accuracies": 0.9375, "rewards/chosen": 2.090827465057373, "rewards/margins": 10.278005599975586, "rewards/rejected": -8.187177658081055, "step": 1729 }, { "epoch": 0.96, "learning_rate": 5.563173783436683e-07, "logits/chosen": -6.118989944458008, "logits/rejected": -6.0743794441223145, "logps/chosen": -268.8447570800781, "logps/rejected": -246.7779998779297, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": 5.085609436035156, "rewards/margins": 10.67564582824707, "rewards/rejected": -5.590036392211914, "step": 1730 }, { "epoch": 0.96, "learning_rate": 5.558705189528469e-07, "logits/chosen": -5.8825907707214355, "logits/rejected": -5.994557857513428, "logps/chosen": -398.38909912109375, "logps/rejected": -395.1163635253906, "loss": 0.0573, "rewards/accuracies": 0.9375, "rewards/chosen": 5.860487937927246, "rewards/margins": 8.575429916381836, "rewards/rejected": -2.714942216873169, "step": 1731 }, { "epoch": 0.96, "learning_rate": 5.554236143675604e-07, "logits/chosen": -6.059482574462891, "logits/rejected": -5.989377021789551, "logps/chosen": -238.25869750976562, "logps/rejected": -195.1131591796875, "loss": 0.0882, "rewards/accuracies": 1.0, "rewards/chosen": 3.7015292644500732, "rewards/margins": 9.535395622253418, "rewards/rejected": -5.833866596221924, "step": 1732 }, { "epoch": 0.96, "learning_rate": 5.549766649493165e-07, "logits/chosen": -6.020595550537109, "logits/rejected": -6.024684906005859, "logps/chosen": -303.675048828125, "logps/rejected": -139.88656616210938, "loss": 0.1327, "rewards/accuracies": 0.9375, "rewards/chosen": 5.305063247680664, "rewards/margins": 9.176942825317383, "rewards/rejected": -3.871880054473877, "step": 1733 }, { "epoch": 0.96, "learning_rate": 5.54529671059659e-07, "logits/chosen": -6.1357879638671875, "logits/rejected": -6.000717639923096, "logps/chosen": -357.8458251953125, "logps/rejected": -111.11077117919922, "loss": 0.0715, "rewards/accuracies": 1.0, "rewards/chosen": 6.3479323387146, "rewards/margins": 9.436260223388672, "rewards/rejected": -3.088327407836914, "step": 1734 }, { "epoch": 0.96, "learning_rate": 5.540826330601673e-07, "logits/chosen": -6.081478118896484, "logits/rejected": -6.045555114746094, "logps/chosen": -252.00772094726562, "logps/rejected": -218.2467803955078, "loss": 0.132, "rewards/accuracies": 0.9375, "rewards/chosen": 2.1179816722869873, "rewards/margins": 6.616425514221191, "rewards/rejected": -4.498443603515625, "step": 1735 }, { "epoch": 0.96, "learning_rate": 5.536355513124573e-07, "logits/chosen": -6.045331001281738, "logits/rejected": -6.038725852966309, "logps/chosen": -299.6036376953125, "logps/rejected": -166.5810546875, "loss": 0.1055, "rewards/accuracies": 1.0, "rewards/chosen": 5.087450981140137, "rewards/margins": 11.170979499816895, "rewards/rejected": -6.083529472351074, "step": 1736 }, { "epoch": 0.96, "learning_rate": 5.531884261781796e-07, "logits/chosen": -5.936446189880371, "logits/rejected": -5.867033004760742, "logps/chosen": -329.31182861328125, "logps/rejected": -549.4730224609375, "loss": 0.2731, "rewards/accuracies": 0.875, "rewards/chosen": 2.3294646739959717, "rewards/margins": 6.5792765617370605, "rewards/rejected": -4.249812126159668, "step": 1737 }, { "epoch": 0.96, "learning_rate": 5.527412580190203e-07, "logits/chosen": -6.0780792236328125, "logits/rejected": -6.089186668395996, "logps/chosen": -435.2736511230469, "logps/rejected": -375.1732177734375, "loss": 0.1321, "rewards/accuracies": 0.875, "rewards/chosen": 4.820488452911377, "rewards/margins": 7.034019470214844, "rewards/rejected": -2.213531017303467, "step": 1738 }, { "epoch": 0.97, "learning_rate": 5.522940471966998e-07, "logits/chosen": -5.960508346557617, "logits/rejected": -5.921926021575928, "logps/chosen": -356.068603515625, "logps/rejected": -176.3830108642578, "loss": 0.1497, "rewards/accuracies": 1.0, "rewards/chosen": 4.862730979919434, "rewards/margins": 8.418228149414062, "rewards/rejected": -3.5554962158203125, "step": 1739 }, { "epoch": 0.97, "learning_rate": 5.518467940729739e-07, "logits/chosen": -6.033069610595703, "logits/rejected": -5.991218566894531, "logps/chosen": -310.73870849609375, "logps/rejected": -218.90850830078125, "loss": 0.1906, "rewards/accuracies": 1.0, "rewards/chosen": 2.600318670272827, "rewards/margins": 9.250263214111328, "rewards/rejected": -6.649944305419922, "step": 1740 }, { "epoch": 0.97, "learning_rate": 5.513994990096317e-07, "logits/chosen": -5.9801435470581055, "logits/rejected": -6.070993423461914, "logps/chosen": -226.63946533203125, "logps/rejected": -154.25357055664062, "loss": 0.2051, "rewards/accuracies": 0.9375, "rewards/chosen": 3.919178009033203, "rewards/margins": 8.264886856079102, "rewards/rejected": -4.345708847045898, "step": 1741 }, { "epoch": 0.97, "learning_rate": 5.509521623684967e-07, "logits/chosen": -6.070733547210693, "logits/rejected": -6.061666488647461, "logps/chosen": -230.67474365234375, "logps/rejected": -210.35537719726562, "loss": 0.0802, "rewards/accuracies": 0.9375, "rewards/chosen": 2.4693965911865234, "rewards/margins": 9.869250297546387, "rewards/rejected": -7.399853706359863, "step": 1742 }, { "epoch": 0.97, "learning_rate": 5.505047845114257e-07, "logits/chosen": -6.134788990020752, "logits/rejected": -6.163646697998047, "logps/chosen": -329.51739501953125, "logps/rejected": -378.7964782714844, "loss": 0.099, "rewards/accuracies": 0.9375, "rewards/chosen": 5.019062519073486, "rewards/margins": 9.925071716308594, "rewards/rejected": -4.906009674072266, "step": 1743 }, { "epoch": 0.97, "learning_rate": 5.500573658003096e-07, "logits/chosen": -6.055116653442383, "logits/rejected": -5.9980549812316895, "logps/chosen": -233.5533447265625, "logps/rejected": -132.40023803710938, "loss": 0.0976, "rewards/accuracies": 1.0, "rewards/chosen": 5.414774417877197, "rewards/margins": 8.615896224975586, "rewards/rejected": -3.2011220455169678, "step": 1744 }, { "epoch": 0.97, "learning_rate": 5.496099065970713e-07, "logits/chosen": -6.091683387756348, "logits/rejected": -6.006340980529785, "logps/chosen": -323.0792236328125, "logps/rejected": -168.2088165283203, "loss": 0.1255, "rewards/accuracies": 0.9375, "rewards/chosen": 4.644017219543457, "rewards/margins": 10.073871612548828, "rewards/rejected": -5.4298553466796875, "step": 1745 }, { "epoch": 0.97, "learning_rate": 5.491624072636674e-07, "logits/chosen": -6.020832061767578, "logits/rejected": -5.966015338897705, "logps/chosen": -501.643310546875, "logps/rejected": -251.74331665039062, "loss": 0.0649, "rewards/accuracies": 0.9375, "rewards/chosen": 6.078819274902344, "rewards/margins": 9.429304122924805, "rewards/rejected": -3.350484848022461, "step": 1746 }, { "epoch": 0.97, "learning_rate": 5.487148681620861e-07, "logits/chosen": -6.053314208984375, "logits/rejected": -6.064682483673096, "logps/chosen": -387.930908203125, "logps/rejected": -369.8958740234375, "loss": 0.0771, "rewards/accuracies": 1.0, "rewards/chosen": 2.565434455871582, "rewards/margins": 8.70631217956543, "rewards/rejected": -6.1408772468566895, "step": 1747 }, { "epoch": 0.97, "learning_rate": 5.482672896543487e-07, "logits/chosen": -5.883838653564453, "logits/rejected": -5.952502250671387, "logps/chosen": -266.3194885253906, "logps/rejected": -352.6053161621094, "loss": 0.0844, "rewards/accuracies": 1.0, "rewards/chosen": 4.347149848937988, "rewards/margins": 8.855294227600098, "rewards/rejected": -4.508144378662109, "step": 1748 }, { "epoch": 0.97, "learning_rate": 5.478196721025073e-07, "logits/chosen": -6.076763153076172, "logits/rejected": -6.135603904724121, "logps/chosen": -232.51976013183594, "logps/rejected": -231.39987182617188, "loss": 0.1124, "rewards/accuracies": 0.9375, "rewards/chosen": 3.9670166969299316, "rewards/margins": 10.33535385131836, "rewards/rejected": -6.3683366775512695, "step": 1749 }, { "epoch": 0.97, "learning_rate": 5.473720158686466e-07, "logits/chosen": -6.080636024475098, "logits/rejected": -5.994876861572266, "logps/chosen": -324.75927734375, "logps/rejected": -163.03707885742188, "loss": 0.0598, "rewards/accuracies": 0.9375, "rewards/chosen": 4.939268589019775, "rewards/margins": 9.360408782958984, "rewards/rejected": -4.421140670776367, "step": 1750 }, { "epoch": 0.97, "learning_rate": 5.469243213148821e-07, "logits/chosen": -6.053596496582031, "logits/rejected": -6.09262228012085, "logps/chosen": -226.68820190429688, "logps/rejected": -228.97470092773438, "loss": 0.0733, "rewards/accuracies": 1.0, "rewards/chosen": 2.600846767425537, "rewards/margins": 9.860485076904297, "rewards/rejected": -7.259636878967285, "step": 1751 }, { "epoch": 0.97, "learning_rate": 5.464765888033601e-07, "logits/chosen": -6.000123977661133, "logits/rejected": -6.103403091430664, "logps/chosen": -380.31146240234375, "logps/rejected": -226.42881774902344, "loss": 0.1277, "rewards/accuracies": 1.0, "rewards/chosen": 6.550181865692139, "rewards/margins": 11.011163711547852, "rewards/rejected": -4.460981369018555, "step": 1752 }, { "epoch": 0.97, "learning_rate": 5.460288186962583e-07, "logits/chosen": -6.0967512130737305, "logits/rejected": -6.035017490386963, "logps/chosen": -339.26690673828125, "logps/rejected": -138.21649169921875, "loss": 0.0679, "rewards/accuracies": 1.0, "rewards/chosen": 5.985968589782715, "rewards/margins": 11.046777725219727, "rewards/rejected": -5.06080961227417, "step": 1753 }, { "epoch": 0.97, "learning_rate": 5.455810113557839e-07, "logits/chosen": -6.107752799987793, "logits/rejected": -5.915765762329102, "logps/chosen": -324.0242919921875, "logps/rejected": -200.53903198242188, "loss": 0.1255, "rewards/accuracies": 1.0, "rewards/chosen": 5.7387895584106445, "rewards/margins": 13.479515075683594, "rewards/rejected": -7.740725517272949, "step": 1754 }, { "epoch": 0.97, "learning_rate": 5.45133167144175e-07, "logits/chosen": -6.006989002227783, "logits/rejected": -5.981245517730713, "logps/chosen": -279.8411865234375, "logps/rejected": -197.72512817382812, "loss": 0.0499, "rewards/accuracies": 1.0, "rewards/chosen": 4.845150947570801, "rewards/margins": 9.565816879272461, "rewards/rejected": -4.720666408538818, "step": 1755 }, { "epoch": 0.97, "learning_rate": 5.44685286423699e-07, "logits/chosen": -6.077478408813477, "logits/rejected": -5.9635210037231445, "logps/chosen": -264.5694580078125, "logps/rejected": -155.19436645507812, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": 4.193822860717773, "rewards/margins": 9.272444725036621, "rewards/rejected": -5.078622341156006, "step": 1756 }, { "epoch": 0.98, "learning_rate": 5.442373695566534e-07, "logits/chosen": -6.085696697235107, "logits/rejected": -6.025036811828613, "logps/chosen": -176.32015991210938, "logps/rejected": -209.7735137939453, "loss": 0.1737, "rewards/accuracies": 0.875, "rewards/chosen": 2.4368138313293457, "rewards/margins": 7.548969268798828, "rewards/rejected": -5.112155437469482, "step": 1757 }, { "epoch": 0.98, "learning_rate": 5.437894169053641e-07, "logits/chosen": -6.1440229415893555, "logits/rejected": -6.119662284851074, "logps/chosen": -334.3770751953125, "logps/rejected": -268.3811340332031, "loss": 0.0626, "rewards/accuracies": 1.0, "rewards/chosen": 5.078064441680908, "rewards/margins": 9.959487915039062, "rewards/rejected": -4.881423473358154, "step": 1758 }, { "epoch": 0.98, "learning_rate": 5.433414288321867e-07, "logits/chosen": -6.040040969848633, "logits/rejected": -6.130155563354492, "logps/chosen": -267.6120300292969, "logps/rejected": -276.7823486328125, "loss": 0.3377, "rewards/accuracies": 0.9375, "rewards/chosen": 3.5561954975128174, "rewards/margins": 8.344185829162598, "rewards/rejected": -4.787990093231201, "step": 1759 }, { "epoch": 0.98, "learning_rate": 5.428934056995051e-07, "logits/chosen": -5.993177890777588, "logits/rejected": -5.952477931976318, "logps/chosen": -581.8359375, "logps/rejected": -393.17108154296875, "loss": 0.1802, "rewards/accuracies": 0.9375, "rewards/chosen": 2.872494697570801, "rewards/margins": 8.69118595123291, "rewards/rejected": -5.818691253662109, "step": 1760 }, { "epoch": 0.98, "learning_rate": 5.424453478697321e-07, "logits/chosen": -6.0208587646484375, "logits/rejected": -5.934541702270508, "logps/chosen": -264.15399169921875, "logps/rejected": -193.76531982421875, "loss": 0.1289, "rewards/accuracies": 1.0, "rewards/chosen": 3.8061435222625732, "rewards/margins": 8.145981788635254, "rewards/rejected": -4.33983850479126, "step": 1761 }, { "epoch": 0.98, "learning_rate": 5.419972557053073e-07, "logits/chosen": -6.027193069458008, "logits/rejected": -6.105833530426025, "logps/chosen": -372.34442138671875, "logps/rejected": -328.0779113769531, "loss": 0.1749, "rewards/accuracies": 1.0, "rewards/chosen": 4.264946937561035, "rewards/margins": 9.451692581176758, "rewards/rejected": -5.186746120452881, "step": 1762 }, { "epoch": 0.98, "learning_rate": 5.415491295686994e-07, "logits/chosen": -5.971102714538574, "logits/rejected": -6.039644718170166, "logps/chosen": -353.03729248046875, "logps/rejected": -305.7369079589844, "loss": 0.1215, "rewards/accuracies": 0.9375, "rewards/chosen": 2.493971347808838, "rewards/margins": 8.344425201416016, "rewards/rejected": -5.850454330444336, "step": 1763 }, { "epoch": 0.98, "learning_rate": 5.411009698224041e-07, "logits/chosen": -5.975619316101074, "logits/rejected": -6.014265537261963, "logps/chosen": -554.0324096679688, "logps/rejected": -520.143798828125, "loss": 0.1059, "rewards/accuracies": 0.875, "rewards/chosen": 1.170736312866211, "rewards/margins": 9.212665557861328, "rewards/rejected": -8.041929244995117, "step": 1764 }, { "epoch": 0.98, "learning_rate": 5.406527768289441e-07, "logits/chosen": -6.0996174812316895, "logits/rejected": -6.152000904083252, "logps/chosen": -175.34046936035156, "logps/rejected": -232.63623046875, "loss": 0.1106, "rewards/accuracies": 0.8125, "rewards/chosen": 0.8092444539070129, "rewards/margins": 7.4260711669921875, "rewards/rejected": -6.616827487945557, "step": 1765 }, { "epoch": 0.98, "learning_rate": 5.402045509508691e-07, "logits/chosen": -6.075286865234375, "logits/rejected": -6.182805061340332, "logps/chosen": -346.1241760253906, "logps/rejected": -210.02279663085938, "loss": 0.1314, "rewards/accuracies": 1.0, "rewards/chosen": 4.291744232177734, "rewards/margins": 9.315752983093262, "rewards/rejected": -5.024008750915527, "step": 1766 }, { "epoch": 0.98, "learning_rate": 5.397562925507555e-07, "logits/chosen": -6.1344218254089355, "logits/rejected": -5.972559452056885, "logps/chosen": -330.64599609375, "logps/rejected": -241.87782287597656, "loss": 0.2293, "rewards/accuracies": 0.9375, "rewards/chosen": 3.625816822052002, "rewards/margins": 8.81809139251709, "rewards/rejected": -5.192275524139404, "step": 1767 }, { "epoch": 0.98, "learning_rate": 5.393080019912061e-07, "logits/chosen": -6.0352983474731445, "logits/rejected": -5.985045433044434, "logps/chosen": -205.23817443847656, "logps/rejected": -215.16607666015625, "loss": 0.079, "rewards/accuracies": 1.0, "rewards/chosen": 3.4668054580688477, "rewards/margins": 8.330798149108887, "rewards/rejected": -4.863992691040039, "step": 1768 }, { "epoch": 0.98, "learning_rate": 5.388596796348494e-07, "logits/chosen": -5.9087653160095215, "logits/rejected": -6.11488151550293, "logps/chosen": -279.7240905761719, "logps/rejected": -269.5967102050781, "loss": 0.1488, "rewards/accuracies": 1.0, "rewards/chosen": 5.786014556884766, "rewards/margins": 11.21630573272705, "rewards/rejected": -5.430291175842285, "step": 1769 }, { "epoch": 0.98, "learning_rate": 5.384113258443398e-07, "logits/chosen": -6.0474066734313965, "logits/rejected": -5.955110549926758, "logps/chosen": -280.3716125488281, "logps/rejected": -153.76206970214844, "loss": 0.1964, "rewards/accuracies": 1.0, "rewards/chosen": 4.7572021484375, "rewards/margins": 9.651796340942383, "rewards/rejected": -4.894594669342041, "step": 1770 }, { "epoch": 0.98, "learning_rate": 5.379629409823571e-07, "logits/chosen": -5.960622310638428, "logits/rejected": -5.97377872467041, "logps/chosen": -227.83863830566406, "logps/rejected": -407.30718994140625, "loss": 0.1374, "rewards/accuracies": 1.0, "rewards/chosen": 2.5162644386291504, "rewards/margins": 8.22326946258545, "rewards/rejected": -5.707004547119141, "step": 1771 }, { "epoch": 0.98, "learning_rate": 5.375145254116065e-07, "logits/chosen": -6.089793682098389, "logits/rejected": -6.034579277038574, "logps/chosen": -230.10867309570312, "logps/rejected": -112.90684509277344, "loss": 0.0839, "rewards/accuracies": 1.0, "rewards/chosen": 3.572538375854492, "rewards/margins": 9.099968910217285, "rewards/rejected": -5.527430534362793, "step": 1772 }, { "epoch": 0.98, "learning_rate": 5.370660794948173e-07, "logits/chosen": -5.983413219451904, "logits/rejected": -6.0162553787231445, "logps/chosen": -188.06817626953125, "logps/rejected": -285.6835021972656, "loss": 0.093, "rewards/accuracies": 1.0, "rewards/chosen": 3.81246018409729, "rewards/margins": 9.503846168518066, "rewards/rejected": -5.6913862228393555, "step": 1773 }, { "epoch": 0.98, "learning_rate": 5.366176035947444e-07, "logits/chosen": -6.14173698425293, "logits/rejected": -6.0814666748046875, "logps/chosen": -240.16981506347656, "logps/rejected": -178.03628540039062, "loss": 0.131, "rewards/accuracies": 1.0, "rewards/chosen": 3.2566041946411133, "rewards/margins": 9.797295570373535, "rewards/rejected": -6.540690898895264, "step": 1774 }, { "epoch": 0.99, "learning_rate": 5.361690980741662e-07, "logits/chosen": -6.052424907684326, "logits/rejected": -6.015815734863281, "logps/chosen": -529.0465087890625, "logps/rejected": -304.69232177734375, "loss": 0.1071, "rewards/accuracies": 0.9375, "rewards/chosen": 3.694235324859619, "rewards/margins": 8.701213836669922, "rewards/rejected": -5.0069780349731445, "step": 1775 }, { "epoch": 0.99, "learning_rate": 5.357205632958851e-07, "logits/chosen": -6.065710067749023, "logits/rejected": -6.040406227111816, "logps/chosen": -160.8578643798828, "logps/rejected": -204.7766571044922, "loss": 0.0754, "rewards/accuracies": 0.9375, "rewards/chosen": 1.0712980031967163, "rewards/margins": 9.256240844726562, "rewards/rejected": -8.184942245483398, "step": 1776 }, { "epoch": 0.99, "learning_rate": 5.352719996227272e-07, "logits/chosen": -6.160549640655518, "logits/rejected": -6.134790897369385, "logps/chosen": -200.45147705078125, "logps/rejected": -147.0968017578125, "loss": 0.1059, "rewards/accuracies": 0.9375, "rewards/chosen": 3.5794897079467773, "rewards/margins": 8.538986206054688, "rewards/rejected": -4.959496021270752, "step": 1777 }, { "epoch": 0.99, "learning_rate": 5.348234074175427e-07, "logits/chosen": -6.0922441482543945, "logits/rejected": -6.099526882171631, "logps/chosen": -246.48023986816406, "logps/rejected": -176.42453002929688, "loss": 0.0779, "rewards/accuracies": 1.0, "rewards/chosen": 5.689099311828613, "rewards/margins": 9.099736213684082, "rewards/rejected": -3.4106369018554688, "step": 1778 }, { "epoch": 0.99, "learning_rate": 5.343747870432037e-07, "logits/chosen": -6.05610990524292, "logits/rejected": -6.034100532531738, "logps/chosen": -288.02154541015625, "logps/rejected": -141.57046508789062, "loss": 0.1664, "rewards/accuracies": 0.9375, "rewards/chosen": 4.8394293785095215, "rewards/margins": 9.106836318969727, "rewards/rejected": -4.267406463623047, "step": 1779 }, { "epoch": 0.99, "learning_rate": 5.33926138862606e-07, "logits/chosen": -6.0863847732543945, "logits/rejected": -6.074906349182129, "logps/chosen": -303.73748779296875, "logps/rejected": -165.09652709960938, "loss": 0.165, "rewards/accuracies": 0.9375, "rewards/chosen": 5.3330817222595215, "rewards/margins": 8.410788536071777, "rewards/rejected": -3.077707290649414, "step": 1780 }, { "epoch": 0.99, "learning_rate": 5.334774632386671e-07, "logits/chosen": -6.066429615020752, "logits/rejected": -6.088163375854492, "logps/chosen": -225.9710693359375, "logps/rejected": -250.40817260742188, "loss": 0.2093, "rewards/accuracies": 1.0, "rewards/chosen": 1.7199763059616089, "rewards/margins": 7.8949971199035645, "rewards/rejected": -6.175021171569824, "step": 1781 }, { "epoch": 0.99, "learning_rate": 5.330287605343279e-07, "logits/chosen": -6.154438018798828, "logits/rejected": -6.037128925323486, "logps/chosen": -323.70318603515625, "logps/rejected": -287.4939880371094, "loss": 0.0843, "rewards/accuracies": 1.0, "rewards/chosen": 4.009980201721191, "rewards/margins": 12.314352989196777, "rewards/rejected": -8.304372787475586, "step": 1782 }, { "epoch": 0.99, "learning_rate": 5.325800311125497e-07, "logits/chosen": -6.130349159240723, "logits/rejected": -6.029362678527832, "logps/chosen": -231.93177795410156, "logps/rejected": -164.32212829589844, "loss": 0.051, "rewards/accuracies": 1.0, "rewards/chosen": 4.123553276062012, "rewards/margins": 11.765924453735352, "rewards/rejected": -7.64237117767334, "step": 1783 }, { "epoch": 0.99, "learning_rate": 5.321312753363167e-07, "logits/chosen": -6.140496253967285, "logits/rejected": -6.109273433685303, "logps/chosen": -167.45013427734375, "logps/rejected": -106.63610076904297, "loss": 0.1396, "rewards/accuracies": 0.9375, "rewards/chosen": 1.7653002738952637, "rewards/margins": 7.042107582092285, "rewards/rejected": -5.276806831359863, "step": 1784 }, { "epoch": 0.99, "learning_rate": 5.316824935686335e-07, "logits/chosen": -6.095839500427246, "logits/rejected": -5.986178874969482, "logps/chosen": -389.1156921386719, "logps/rejected": -177.7682647705078, "loss": 0.0896, "rewards/accuracies": 0.9375, "rewards/chosen": 5.025352954864502, "rewards/margins": 9.261821746826172, "rewards/rejected": -4.236468315124512, "step": 1785 }, { "epoch": 0.99, "learning_rate": 5.312336861725265e-07, "logits/chosen": -6.031859874725342, "logits/rejected": -6.007116317749023, "logps/chosen": -246.8549041748047, "logps/rejected": -182.6326141357422, "loss": 0.112, "rewards/accuracies": 1.0, "rewards/chosen": 1.6565583944320679, "rewards/margins": 8.611536979675293, "rewards/rejected": -6.954977989196777, "step": 1786 }, { "epoch": 0.99, "learning_rate": 5.307848535110422e-07, "logits/chosen": -6.033936500549316, "logits/rejected": -6.125017166137695, "logps/chosen": -270.4765319824219, "logps/rejected": -318.7375183105469, "loss": 0.0714, "rewards/accuracies": 0.9375, "rewards/chosen": 3.9741249084472656, "rewards/margins": 7.595946311950684, "rewards/rejected": -3.621821880340576, "step": 1787 }, { "epoch": 0.99, "learning_rate": 5.303359959472479e-07, "logits/chosen": -6.012404918670654, "logits/rejected": -5.970884799957275, "logps/chosen": -428.156494140625, "logps/rejected": -247.81219482421875, "loss": 0.1843, "rewards/accuracies": 0.875, "rewards/chosen": 4.765674591064453, "rewards/margins": 9.307014465332031, "rewards/rejected": -4.54133939743042, "step": 1788 }, { "epoch": 0.99, "learning_rate": 5.298871138442307e-07, "logits/chosen": -6.106619834899902, "logits/rejected": -6.055088043212891, "logps/chosen": -214.39620971679688, "logps/rejected": -271.99420166015625, "loss": 0.0714, "rewards/accuracies": 1.0, "rewards/chosen": 1.6269638538360596, "rewards/margins": 9.667224884033203, "rewards/rejected": -8.040260314941406, "step": 1789 }, { "epoch": 0.99, "learning_rate": 5.294382075650981e-07, "logits/chosen": -6.042771339416504, "logits/rejected": -6.090173244476318, "logps/chosen": -219.26087951660156, "logps/rejected": -210.63934326171875, "loss": 0.0852, "rewards/accuracies": 0.875, "rewards/chosen": 2.1552205085754395, "rewards/margins": 8.054862976074219, "rewards/rejected": -5.899641990661621, "step": 1790 }, { "epoch": 0.99, "learning_rate": 5.289892774729766e-07, "logits/chosen": -6.189848899841309, "logits/rejected": -5.976567268371582, "logps/chosen": -277.1036376953125, "logps/rejected": -124.62348937988281, "loss": 0.1408, "rewards/accuracies": 1.0, "rewards/chosen": 3.8925890922546387, "rewards/margins": 9.819205284118652, "rewards/rejected": -5.9266157150268555, "step": 1791 }, { "epoch": 0.99, "learning_rate": 5.285403239310122e-07, "logits/chosen": -5.993503093719482, "logits/rejected": -5.973652362823486, "logps/chosen": -249.13058471679688, "logps/rejected": -206.55947875976562, "loss": 0.0401, "rewards/accuracies": 1.0, "rewards/chosen": 3.4751181602478027, "rewards/margins": 8.765192985534668, "rewards/rejected": -5.290075302124023, "step": 1792 }, { "epoch": 1.0, "learning_rate": 5.280913473023701e-07, "logits/chosen": -6.05924654006958, "logits/rejected": -6.111875534057617, "logps/chosen": -269.559814453125, "logps/rejected": -216.08047485351562, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 5.336367130279541, "rewards/margins": 11.940631866455078, "rewards/rejected": -6.604264736175537, "step": 1793 }, { "epoch": 1.0, "learning_rate": 5.276423479502338e-07, "logits/chosen": -6.121520042419434, "logits/rejected": -6.064853668212891, "logps/chosen": -155.28224182128906, "logps/rejected": -168.59918212890625, "loss": 0.1215, "rewards/accuracies": 0.9375, "rewards/chosen": 1.8153560161590576, "rewards/margins": 7.990162372589111, "rewards/rejected": -6.174806118011475, "step": 1794 }, { "epoch": 1.0, "learning_rate": 5.271933262378053e-07, "logits/chosen": -6.085386753082275, "logits/rejected": -5.931347370147705, "logps/chosen": -352.0317687988281, "logps/rejected": -112.78047943115234, "loss": 0.2349, "rewards/accuracies": 0.9375, "rewards/chosen": 5.266853332519531, "rewards/margins": 8.830245971679688, "rewards/rejected": -3.5633931159973145, "step": 1795 }, { "epoch": 1.0, "learning_rate": 5.267442825283047e-07, "logits/chosen": -6.004005432128906, "logits/rejected": -6.076939582824707, "logps/chosen": -251.55783081054688, "logps/rejected": -233.39430236816406, "loss": 0.1292, "rewards/accuracies": 0.9375, "rewards/chosen": 3.6216049194335938, "rewards/margins": 7.414919376373291, "rewards/rejected": -3.7933146953582764, "step": 1796 }, { "epoch": 1.0, "learning_rate": 5.262952171849699e-07, "logits/chosen": -6.017520427703857, "logits/rejected": -6.102938652038574, "logps/chosen": -281.85791015625, "logps/rejected": -227.73953247070312, "loss": 0.0682, "rewards/accuracies": 1.0, "rewards/chosen": 6.452046871185303, "rewards/margins": 11.67090129852295, "rewards/rejected": -5.2188544273376465, "step": 1797 }, { "epoch": 1.0, "learning_rate": 5.258461305710563e-07, "logits/chosen": -5.927096366882324, "logits/rejected": -5.950714588165283, "logps/chosen": -206.12503051757812, "logps/rejected": -272.39892578125, "loss": 0.0669, "rewards/accuracies": 0.875, "rewards/chosen": 3.428823947906494, "rewards/margins": 9.773807525634766, "rewards/rejected": -6.344983100891113, "step": 1798 }, { "epoch": 1.0, "learning_rate": 5.253970230498366e-07, "logits/chosen": -5.847893238067627, "logits/rejected": -5.8980607986450195, "logps/chosen": -285.61309814453125, "logps/rejected": -190.28451538085938, "loss": 0.1032, "rewards/accuracies": 0.9375, "rewards/chosen": 6.472165107727051, "rewards/margins": 9.67319393157959, "rewards/rejected": -3.20102858543396, "step": 1799 }, { "epoch": 1.0, "learning_rate": 5.249478949846003e-07, "logits/chosen": -6.0698347091674805, "logits/rejected": -5.957569122314453, "logps/chosen": -180.30416870117188, "logps/rejected": -87.56045532226562, "loss": 0.0478, "rewards/accuracies": 1.0, "rewards/chosen": 2.2329752445220947, "rewards/margins": 7.6163763999938965, "rewards/rejected": -5.383400917053223, "step": 1800 }, { "epoch": 1.0, "learning_rate": 5.244987467386536e-07, "logits/chosen": -6.00157356262207, "logits/rejected": -6.046659469604492, "logps/chosen": -228.69573974609375, "logps/rejected": -171.19491577148438, "loss": 0.1195, "rewards/accuracies": 1.0, "rewards/chosen": 3.1584434509277344, "rewards/margins": 8.930736541748047, "rewards/rejected": -5.772292613983154, "step": 1801 }, { "epoch": 1.0, "learning_rate": 5.240495786753188e-07, "logits/chosen": -6.114706993103027, "logits/rejected": -6.040691375732422, "logps/chosen": -262.97320556640625, "logps/rejected": -204.85919189453125, "loss": 0.0754, "rewards/accuracies": 1.0, "rewards/chosen": 6.977927207946777, "rewards/margins": 11.654382705688477, "rewards/rejected": -4.676455974578857, "step": 1802 }, { "epoch": 1.0, "learning_rate": 5.236003911579344e-07, "logits/chosen": -6.115494728088379, "logits/rejected": -6.048371315002441, "logps/chosen": -258.90643310546875, "logps/rejected": -176.60098266601562, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": 3.2788126468658447, "rewards/margins": 8.909316062927246, "rewards/rejected": -5.630503177642822, "step": 1803 }, { "epoch": 1.0, "learning_rate": 5.231511845498547e-07, "logits/chosen": -6.088490962982178, "logits/rejected": -6.046998023986816, "logps/chosen": -341.0428771972656, "logps/rejected": -262.29620361328125, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": 4.6948347091674805, "rewards/margins": 10.459768295288086, "rewards/rejected": -5.7649335861206055, "step": 1804 }, { "epoch": 1.0, "learning_rate": 5.227019592144495e-07, "logits/chosen": -6.023583889007568, "logits/rejected": -6.036393642425537, "logps/chosen": -390.2452697753906, "logps/rejected": -340.47552490234375, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": 4.6213297843933105, "rewards/margins": 12.239441871643066, "rewards/rejected": -7.618112087249756, "step": 1805 }, { "epoch": 1.0, "learning_rate": 5.222527155151035e-07, "logits/chosen": -6.001046657562256, "logits/rejected": -6.104438781738281, "logps/chosen": -201.6888427734375, "logps/rejected": -283.7159729003906, "loss": 0.0989, "rewards/accuracies": 1.0, "rewards/chosen": 1.7009737491607666, "rewards/margins": 9.497265815734863, "rewards/rejected": -7.796291828155518, "step": 1806 }, { "epoch": 1.0, "learning_rate": 5.218034538152162e-07, "logits/chosen": -6.1251654624938965, "logits/rejected": -6.126156806945801, "logps/chosen": -271.482666015625, "logps/rejected": -224.35940551757812, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": 4.98295259475708, "rewards/margins": 10.349592208862305, "rewards/rejected": -5.366639137268066, "step": 1807 }, { "epoch": 1.0, "learning_rate": 5.213541744782022e-07, "logits/chosen": -5.932274341583252, "logits/rejected": -6.0623016357421875, "logps/chosen": -169.90396118164062, "logps/rejected": -227.5870361328125, "loss": 0.038, "rewards/accuracies": 0.875, "rewards/chosen": 3.731241464614868, "rewards/margins": 10.588634490966797, "rewards/rejected": -6.857392311096191, "step": 1808 }, { "epoch": 1.0, "learning_rate": 5.209048778674897e-07, "logits/chosen": -6.04867696762085, "logits/rejected": -6.007298946380615, "logps/chosen": -198.46453857421875, "logps/rejected": -286.49847412109375, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": 2.8506126403808594, "rewards/margins": 9.786096572875977, "rewards/rejected": -6.935482978820801, "step": 1809 }, { "epoch": 1.0, "learning_rate": 5.204555643465215e-07, "logits/chosen": -6.012876987457275, "logits/rejected": -5.957106113433838, "logps/chosen": -180.30093383789062, "logps/rejected": -143.05987548828125, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": 3.123382091522217, "rewards/margins": 8.829038619995117, "rewards/rejected": -5.7056565284729, "step": 1810 }, { "epoch": 1.01, "learning_rate": 5.200062342787533e-07, "logits/chosen": -5.905455589294434, "logits/rejected": -6.017183780670166, "logps/chosen": -213.30044555664062, "logps/rejected": -262.6538391113281, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": 3.545478343963623, "rewards/margins": 10.232410430908203, "rewards/rejected": -6.686932563781738, "step": 1811 }, { "epoch": 1.01, "learning_rate": 5.195568880276552e-07, "logits/chosen": -5.942913055419922, "logits/rejected": -5.938980579376221, "logps/chosen": -347.5810546875, "logps/rejected": -161.91859436035156, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": 5.06414794921875, "rewards/margins": 10.575919151306152, "rewards/rejected": -5.511772155761719, "step": 1812 }, { "epoch": 1.01, "learning_rate": 5.191075259567092e-07, "logits/chosen": -5.989589691162109, "logits/rejected": -6.032357215881348, "logps/chosen": -532.195556640625, "logps/rejected": -480.96533203125, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": 5.448015213012695, "rewards/margins": 12.436272621154785, "rewards/rejected": -6.98825740814209, "step": 1813 }, { "epoch": 1.01, "learning_rate": 5.186581484294114e-07, "logits/chosen": -6.014554023742676, "logits/rejected": -6.153059959411621, "logps/chosen": -193.82180786132812, "logps/rejected": -294.7038879394531, "loss": 0.0439, "rewards/accuracies": 1.0, "rewards/chosen": 2.5915820598602295, "rewards/margins": 12.386700630187988, "rewards/rejected": -9.79511833190918, "step": 1814 }, { "epoch": 1.01, "learning_rate": 5.182087558092693e-07, "logits/chosen": -6.025635719299316, "logits/rejected": -6.059473514556885, "logps/chosen": -191.87608337402344, "logps/rejected": -274.44268798828125, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": 4.611990451812744, "rewards/margins": 10.738818168640137, "rewards/rejected": -6.126827239990234, "step": 1815 }, { "epoch": 1.01, "learning_rate": 5.177593484598033e-07, "logits/chosen": -5.951524257659912, "logits/rejected": -5.985499382019043, "logps/chosen": -363.41253662109375, "logps/rejected": -193.4256134033203, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": 7.165279388427734, "rewards/margins": 10.720399856567383, "rewards/rejected": -3.5551204681396484, "step": 1816 }, { "epoch": 1.01, "learning_rate": 5.173099267445452e-07, "logits/chosen": -6.032737731933594, "logits/rejected": -6.107087135314941, "logps/chosen": -190.65786743164062, "logps/rejected": -233.75735473632812, "loss": 0.1782, "rewards/accuracies": 1.0, "rewards/chosen": 1.8556745052337646, "rewards/margins": 10.050419807434082, "rewards/rejected": -8.194745063781738, "step": 1817 }, { "epoch": 1.01, "learning_rate": 5.168604910270388e-07, "logits/chosen": -6.020101070404053, "logits/rejected": -6.0378031730651855, "logps/chosen": -223.0863494873047, "logps/rejected": -212.9246826171875, "loss": 0.014, "rewards/accuracies": 0.9375, "rewards/chosen": 2.675264358520508, "rewards/margins": 9.677352905273438, "rewards/rejected": -7.002089500427246, "step": 1818 }, { "epoch": 1.01, "learning_rate": 5.164110416708389e-07, "logits/chosen": -5.944554805755615, "logits/rejected": -6.103886604309082, "logps/chosen": -318.6248779296875, "logps/rejected": -182.2769317626953, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": 7.063381195068359, "rewards/margins": 11.554950714111328, "rewards/rejected": -4.491570472717285, "step": 1819 }, { "epoch": 1.01, "learning_rate": 5.15961579039512e-07, "logits/chosen": -6.100459575653076, "logits/rejected": -6.081725120544434, "logps/chosen": -265.1867370605469, "logps/rejected": -411.538330078125, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/chosen": 3.8189167976379395, "rewards/margins": 12.292264938354492, "rewards/rejected": -8.473349571228027, "step": 1820 }, { "epoch": 1.01, "learning_rate": 5.155121034966345e-07, "logits/chosen": -5.926143169403076, "logits/rejected": -5.982969284057617, "logps/chosen": -491.398193359375, "logps/rejected": -251.16476440429688, "loss": 0.0471, "rewards/accuracies": 1.0, "rewards/chosen": 3.623948574066162, "rewards/margins": 7.512927532196045, "rewards/rejected": -3.888978958129883, "step": 1821 }, { "epoch": 1.01, "learning_rate": 5.150626154057939e-07, "logits/chosen": -5.988580226898193, "logits/rejected": -6.100577354431152, "logps/chosen": -207.91441345214844, "logps/rejected": -188.99417114257812, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": 2.2661476135253906, "rewards/margins": 9.118423461914062, "rewards/rejected": -6.85227632522583, "step": 1822 }, { "epoch": 1.01, "learning_rate": 5.146131151305872e-07, "logits/chosen": -6.143149375915527, "logits/rejected": -6.057539939880371, "logps/chosen": -481.54180908203125, "logps/rejected": -343.95831298828125, "loss": 0.0793, "rewards/accuracies": 1.0, "rewards/chosen": 5.047091007232666, "rewards/margins": 12.30682373046875, "rewards/rejected": -7.259732723236084, "step": 1823 }, { "epoch": 1.01, "learning_rate": 5.14163603034622e-07, "logits/chosen": -5.982477188110352, "logits/rejected": -6.097934722900391, "logps/chosen": -315.621337890625, "logps/rejected": -301.994873046875, "loss": 0.0586, "rewards/accuracies": 1.0, "rewards/chosen": 5.009387016296387, "rewards/margins": 10.505802154541016, "rewards/rejected": -5.4964141845703125, "step": 1824 }, { "epoch": 1.01, "learning_rate": 5.137140794815148e-07, "logits/chosen": -6.0729217529296875, "logits/rejected": -6.033689498901367, "logps/chosen": -280.2242126464844, "logps/rejected": -257.3727722167969, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": 3.0694212913513184, "rewards/margins": 10.390327453613281, "rewards/rejected": -7.320905685424805, "step": 1825 }, { "epoch": 1.01, "learning_rate": 5.132645448348919e-07, "logits/chosen": -5.977911472320557, "logits/rejected": -6.027446746826172, "logps/chosen": -252.031982421875, "logps/rejected": -159.31488037109375, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": 4.374935150146484, "rewards/margins": 9.201324462890625, "rewards/rejected": -4.826389312744141, "step": 1826 }, { "epoch": 1.01, "learning_rate": 5.128149994583882e-07, "logits/chosen": -6.030625343322754, "logits/rejected": -6.046460151672363, "logps/chosen": -253.09808349609375, "logps/rejected": -221.19229125976562, "loss": 0.0318, "rewards/accuracies": 0.9375, "rewards/chosen": 4.2839202880859375, "rewards/margins": 8.912757873535156, "rewards/rejected": -4.6288371086120605, "step": 1827 }, { "epoch": 1.01, "learning_rate": 5.123654437156473e-07, "logits/chosen": -5.975937843322754, "logits/rejected": -6.057568073272705, "logps/chosen": -228.0985107421875, "logps/rejected": -279.3045654296875, "loss": 0.0332, "rewards/accuracies": 1.0, "rewards/chosen": 4.03267240524292, "rewards/margins": 10.176295280456543, "rewards/rejected": -6.143622398376465, "step": 1828 }, { "epoch": 1.02, "learning_rate": 5.119158779703215e-07, "logits/chosen": -6.106114387512207, "logits/rejected": -6.064109802246094, "logps/chosen": -174.81980895996094, "logps/rejected": -257.0326843261719, "loss": 0.0228, "rewards/accuracies": 0.9375, "rewards/chosen": 1.4569532871246338, "rewards/margins": 9.67894172668457, "rewards/rejected": -8.221988677978516, "step": 1829 }, { "epoch": 1.02, "learning_rate": 5.114663025860709e-07, "logits/chosen": -6.094972610473633, "logits/rejected": -6.022943019866943, "logps/chosen": -243.74278259277344, "logps/rejected": -191.2958984375, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": 1.2826440334320068, "rewards/margins": 10.407382011413574, "rewards/rejected": -9.124737739562988, "step": 1830 }, { "epoch": 1.02, "learning_rate": 5.110167179265636e-07, "logits/chosen": -5.982882499694824, "logits/rejected": -5.921555519104004, "logps/chosen": -361.90185546875, "logps/rejected": -300.5950927734375, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": 4.530759811401367, "rewards/margins": 11.393819808959961, "rewards/rejected": -6.863059043884277, "step": 1831 }, { "epoch": 1.02, "learning_rate": 5.105671243554746e-07, "logits/chosen": -6.066009521484375, "logits/rejected": -6.030106544494629, "logps/chosen": -284.96954345703125, "logps/rejected": -200.30520629882812, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": 3.1298325061798096, "rewards/margins": 9.589889526367188, "rewards/rejected": -6.460056781768799, "step": 1832 }, { "epoch": 1.02, "learning_rate": 5.101175222364873e-07, "logits/chosen": -6.005758762359619, "logits/rejected": -6.062605857849121, "logps/chosen": -371.8992919921875, "logps/rejected": -360.57330322265625, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": 9.038958549499512, "rewards/margins": 15.031340599060059, "rewards/rejected": -5.9923810958862305, "step": 1833 }, { "epoch": 1.02, "learning_rate": 5.096679119332908e-07, "logits/chosen": -6.123107433319092, "logits/rejected": -6.074812889099121, "logps/chosen": -279.0322265625, "logps/rejected": -233.85888671875, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": 3.362705707550049, "rewards/margins": 10.081302642822266, "rewards/rejected": -6.718596458435059, "step": 1834 }, { "epoch": 1.02, "learning_rate": 5.092182938095817e-07, "logits/chosen": -5.981299877166748, "logits/rejected": -6.081324577331543, "logps/chosen": -222.57608032226562, "logps/rejected": -210.74978637695312, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": 3.588977098464966, "rewards/margins": 9.294584274291992, "rewards/rejected": -5.7056074142456055, "step": 1835 }, { "epoch": 1.02, "learning_rate": 5.087686682290624e-07, "logits/chosen": -6.023966312408447, "logits/rejected": -6.026386737823486, "logps/chosen": -240.45120239257812, "logps/rejected": -209.01060485839844, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": 3.6624648571014404, "rewards/margins": 11.31568717956543, "rewards/rejected": -7.653221130371094, "step": 1836 }, { "epoch": 1.02, "learning_rate": 5.083190355554413e-07, "logits/chosen": -5.978084087371826, "logits/rejected": -5.9418816566467285, "logps/chosen": -347.157470703125, "logps/rejected": -176.3663787841797, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": 6.311686038970947, "rewards/margins": 9.550460815429688, "rewards/rejected": -3.238774299621582, "step": 1837 }, { "epoch": 1.02, "learning_rate": 5.078693961524329e-07, "logits/chosen": -6.078263759613037, "logits/rejected": -5.949637413024902, "logps/chosen": -389.06915283203125, "logps/rejected": -335.544921875, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 6.870992183685303, "rewards/margins": 12.74270248413086, "rewards/rejected": -5.871710777282715, "step": 1838 }, { "epoch": 1.02, "learning_rate": 5.074197503837569e-07, "logits/chosen": -5.957345008850098, "logits/rejected": -6.068726062774658, "logps/chosen": -197.00265502929688, "logps/rejected": -250.1580810546875, "loss": 0.0193, "rewards/accuracies": 0.9375, "rewards/chosen": 1.0973591804504395, "rewards/margins": 9.159050941467285, "rewards/rejected": -8.061692237854004, "step": 1839 }, { "epoch": 1.02, "learning_rate": 5.069700986131384e-07, "logits/chosen": -6.092434406280518, "logits/rejected": -6.048206329345703, "logps/chosen": -246.98318481445312, "logps/rejected": -171.43014526367188, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": 4.270969867706299, "rewards/margins": 10.603689193725586, "rewards/rejected": -6.332718849182129, "step": 1840 }, { "epoch": 1.02, "learning_rate": 5.065204412043071e-07, "logits/chosen": -5.993409156799316, "logits/rejected": -6.027359485626221, "logps/chosen": -147.26718139648438, "logps/rejected": -226.6878662109375, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": 2.3302173614501953, "rewards/margins": 10.415563583374023, "rewards/rejected": -8.085345268249512, "step": 1841 }, { "epoch": 1.02, "learning_rate": 5.060707785209971e-07, "logits/chosen": -6.0745530128479, "logits/rejected": -6.044883728027344, "logps/chosen": -247.28451538085938, "logps/rejected": -237.12338256835938, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": 4.229165554046631, "rewards/margins": 10.136589050292969, "rewards/rejected": -5.907423496246338, "step": 1842 }, { "epoch": 1.02, "learning_rate": 5.056211109269473e-07, "logits/chosen": -5.94854736328125, "logits/rejected": -5.928465366363525, "logps/chosen": -344.979248046875, "logps/rejected": -203.6572265625, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": 5.435098171234131, "rewards/margins": 9.697216033935547, "rewards/rejected": -4.262117385864258, "step": 1843 }, { "epoch": 1.02, "learning_rate": 5.051714387859e-07, "logits/chosen": -5.9411821365356445, "logits/rejected": -5.894941806793213, "logps/chosen": -303.73773193359375, "logps/rejected": -206.341552734375, "loss": 0.2354, "rewards/accuracies": 1.0, "rewards/chosen": 4.217486381530762, "rewards/margins": 8.752604484558105, "rewards/rejected": -4.535118579864502, "step": 1844 }, { "epoch": 1.02, "learning_rate": 5.047217624616018e-07, "logits/chosen": -6.074535369873047, "logits/rejected": -6.211299419403076, "logps/chosen": -288.0648193359375, "logps/rejected": -373.98126220703125, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": 3.88018798828125, "rewards/margins": 11.43905258178711, "rewards/rejected": -7.558865547180176, "step": 1845 }, { "epoch": 1.02, "learning_rate": 5.042720823178021e-07, "logits/chosen": -6.221604824066162, "logits/rejected": -6.068987846374512, "logps/chosen": -207.4340057373047, "logps/rejected": -193.29470825195312, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": 3.567835569381714, "rewards/margins": 11.41816520690918, "rewards/rejected": -7.850329875946045, "step": 1846 }, { "epoch": 1.03, "learning_rate": 5.038223987182536e-07, "logits/chosen": -5.933065414428711, "logits/rejected": -5.948248863220215, "logps/chosen": -256.23529052734375, "logps/rejected": -258.82086181640625, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": 6.15544319152832, "rewards/margins": 9.881441116333008, "rewards/rejected": -3.7259979248046875, "step": 1847 }, { "epoch": 1.03, "learning_rate": 5.03372712026712e-07, "logits/chosen": -6.156299591064453, "logits/rejected": -6.079863548278809, "logps/chosen": -257.8160705566406, "logps/rejected": -239.27249145507812, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": 3.9084699153900146, "rewards/margins": 11.604880332946777, "rewards/rejected": -7.696410655975342, "step": 1848 }, { "epoch": 1.03, "learning_rate": 5.029230226069351e-07, "logits/chosen": -6.0119218826293945, "logits/rejected": -6.00432825088501, "logps/chosen": -280.91741943359375, "logps/rejected": -196.84716796875, "loss": 0.0387, "rewards/accuracies": 0.9375, "rewards/chosen": 3.809587001800537, "rewards/margins": 10.913276672363281, "rewards/rejected": -7.103690147399902, "step": 1849 }, { "epoch": 1.03, "learning_rate": 5.024733308226833e-07, "logits/chosen": -6.00695276260376, "logits/rejected": -6.028680324554443, "logps/chosen": -178.35227966308594, "logps/rejected": -199.35696411132812, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": 2.6086833477020264, "rewards/margins": 10.597023010253906, "rewards/rejected": -7.988339424133301, "step": 1850 }, { "epoch": 1.03, "learning_rate": 5.020236370377186e-07, "logits/chosen": -6.012679576873779, "logits/rejected": -6.02616548538208, "logps/chosen": -244.01658630371094, "logps/rejected": -139.7169189453125, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": 2.800642967224121, "rewards/margins": 7.387931823730469, "rewards/rejected": -4.587288856506348, "step": 1851 }, { "epoch": 1.03, "learning_rate": 5.015739416158049e-07, "logits/chosen": -6.143317699432373, "logits/rejected": -6.076678276062012, "logps/chosen": -215.53286743164062, "logps/rejected": -155.325927734375, "loss": 0.1448, "rewards/accuracies": 1.0, "rewards/chosen": 2.310981035232544, "rewards/margins": 7.631192207336426, "rewards/rejected": -5.320211410522461, "step": 1852 }, { "epoch": 1.03, "learning_rate": 5.011242449207072e-07, "logits/chosen": -6.02565860748291, "logits/rejected": -6.016844749450684, "logps/chosen": -260.60113525390625, "logps/rejected": -179.8709259033203, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": 2.436530113220215, "rewards/margins": 10.349895477294922, "rewards/rejected": -7.913365364074707, "step": 1853 }, { "epoch": 1.03, "learning_rate": 5.006745473161916e-07, "logits/chosen": -6.046818733215332, "logits/rejected": -6.028665542602539, "logps/chosen": -212.13946533203125, "logps/rejected": -183.74522399902344, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": 2.15226149559021, "rewards/margins": 9.33090591430664, "rewards/rejected": -7.178645133972168, "step": 1854 }, { "epoch": 1.03, "learning_rate": 5.00224849166025e-07, "logits/chosen": -6.004281997680664, "logits/rejected": -6.150160312652588, "logps/chosen": -291.5535888671875, "logps/rejected": -272.7027587890625, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": 5.593328952789307, "rewards/margins": 13.263567924499512, "rewards/rejected": -7.670238494873047, "step": 1855 }, { "epoch": 1.03, "learning_rate": 4.997751508339749e-07, "logits/chosen": -6.042325019836426, "logits/rejected": -5.958584785461426, "logps/chosen": -267.2956848144531, "logps/rejected": -164.50804138183594, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": 4.726014614105225, "rewards/margins": 10.587224960327148, "rewards/rejected": -5.861209869384766, "step": 1856 }, { "epoch": 1.03, "learning_rate": 4.993254526838082e-07, "logits/chosen": -6.03292179107666, "logits/rejected": -6.0266852378845215, "logps/chosen": -197.42552185058594, "logps/rejected": -180.12469482421875, "loss": 0.0495, "rewards/accuracies": 1.0, "rewards/chosen": 3.153914451599121, "rewards/margins": 9.49013900756836, "rewards/rejected": -6.336224555969238, "step": 1857 }, { "epoch": 1.03, "learning_rate": 4.988757550792928e-07, "logits/chosen": -6.080069541931152, "logits/rejected": -5.996646881103516, "logps/chosen": -324.9978942871094, "logps/rejected": -268.0149230957031, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": 5.679217338562012, "rewards/margins": 14.101913452148438, "rewards/rejected": -8.42269515991211, "step": 1858 }, { "epoch": 1.03, "learning_rate": 4.984260583841952e-07, "logits/chosen": -6.131690979003906, "logits/rejected": -6.031868934631348, "logps/chosen": -323.26885986328125, "logps/rejected": -273.924560546875, "loss": 0.0214, "rewards/accuracies": 0.9375, "rewards/chosen": 5.870213508605957, "rewards/margins": 10.87600326538086, "rewards/rejected": -5.005788803100586, "step": 1859 }, { "epoch": 1.03, "learning_rate": 4.979763629622814e-07, "logits/chosen": -5.875388145446777, "logits/rejected": -5.893945693969727, "logps/chosen": -253.83026123046875, "logps/rejected": -151.2635498046875, "loss": 0.0231, "rewards/accuracies": 0.875, "rewards/chosen": 4.382236480712891, "rewards/margins": 9.511873245239258, "rewards/rejected": -5.129636764526367, "step": 1860 }, { "epoch": 1.03, "learning_rate": 4.975266691773168e-07, "logits/chosen": -6.030360221862793, "logits/rejected": -6.036195278167725, "logps/chosen": -250.2236785888672, "logps/rejected": -271.9579162597656, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": 3.9520721435546875, "rewards/margins": 9.575251579284668, "rewards/rejected": -5.623178482055664, "step": 1861 }, { "epoch": 1.03, "learning_rate": 4.970769773930649e-07, "logits/chosen": -6.100361347198486, "logits/rejected": -5.989335060119629, "logps/chosen": -457.1402587890625, "logps/rejected": -326.7939758300781, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": 4.644698143005371, "rewards/margins": 12.249584197998047, "rewards/rejected": -7.604886054992676, "step": 1862 }, { "epoch": 1.03, "learning_rate": 4.966272879732881e-07, "logits/chosen": -6.0021443367004395, "logits/rejected": -6.016513824462891, "logps/chosen": -206.05303955078125, "logps/rejected": -107.72305297851562, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": 2.319697141647339, "rewards/margins": 6.767287731170654, "rewards/rejected": -4.447590351104736, "step": 1863 }, { "epoch": 1.03, "learning_rate": 4.961776012817463e-07, "logits/chosen": -6.08133602142334, "logits/rejected": -6.007480621337891, "logps/chosen": -245.5545196533203, "logps/rejected": -144.92507934570312, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": 5.7399444580078125, "rewards/margins": 11.82651424407959, "rewards/rejected": -6.086568832397461, "step": 1864 }, { "epoch": 1.04, "learning_rate": 4.957279176821979e-07, "logits/chosen": -6.022604465484619, "logits/rejected": -5.927023410797119, "logps/chosen": -472.3182678222656, "logps/rejected": -261.87158203125, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": 5.398054122924805, "rewards/margins": 11.762296676635742, "rewards/rejected": -6.364242076873779, "step": 1865 }, { "epoch": 1.04, "learning_rate": 4.952782375383983e-07, "logits/chosen": -6.106681823730469, "logits/rejected": -5.974905490875244, "logps/chosen": -333.50640869140625, "logps/rejected": -163.0648956298828, "loss": 0.1253, "rewards/accuracies": 1.0, "rewards/chosen": 4.6915388107299805, "rewards/margins": 9.68687629699707, "rewards/rejected": -4.995336532592773, "step": 1866 }, { "epoch": 1.04, "learning_rate": 4.948285612140999e-07, "logits/chosen": -6.009525299072266, "logits/rejected": -6.0038652420043945, "logps/chosen": -264.98040771484375, "logps/rejected": -182.34747314453125, "loss": 0.0301, "rewards/accuracies": 0.9375, "rewards/chosen": 4.729205131530762, "rewards/margins": 9.069845199584961, "rewards/rejected": -4.340641021728516, "step": 1867 }, { "epoch": 1.04, "learning_rate": 4.943788890730528e-07, "logits/chosen": -6.0783514976501465, "logits/rejected": -5.979545593261719, "logps/chosen": -261.673583984375, "logps/rejected": -158.70123291015625, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": 4.393133640289307, "rewards/margins": 9.794353485107422, "rewards/rejected": -5.401219367980957, "step": 1868 }, { "epoch": 1.04, "learning_rate": 4.939292214790029e-07, "logits/chosen": -5.968932151794434, "logits/rejected": -5.898183822631836, "logps/chosen": -279.4676818847656, "logps/rejected": -159.1640625, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": 4.957337379455566, "rewards/margins": 10.698403358459473, "rewards/rejected": -5.7410664558410645, "step": 1869 }, { "epoch": 1.04, "learning_rate": 4.93479558795693e-07, "logits/chosen": -6.009599208831787, "logits/rejected": -6.138462066650391, "logps/chosen": -270.84686279296875, "logps/rejected": -285.6448669433594, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/chosen": 2.9147865772247314, "rewards/margins": 9.455622673034668, "rewards/rejected": -6.540835857391357, "step": 1870 }, { "epoch": 1.04, "learning_rate": 4.930299013868615e-07, "logits/chosen": -5.94548225402832, "logits/rejected": -6.066230297088623, "logps/chosen": -209.94020080566406, "logps/rejected": -322.4173278808594, "loss": 0.2185, "rewards/accuracies": 1.0, "rewards/chosen": 2.3788490295410156, "rewards/margins": 12.113120079040527, "rewards/rejected": -9.734272003173828, "step": 1871 }, { "epoch": 1.04, "learning_rate": 4.92580249616243e-07, "logits/chosen": -5.99629545211792, "logits/rejected": -6.028326511383057, "logps/chosen": -233.68942260742188, "logps/rejected": -261.9771728515625, "loss": 0.0541, "rewards/accuracies": 1.0, "rewards/chosen": 3.9161758422851562, "rewards/margins": 11.942205429077148, "rewards/rejected": -8.026030540466309, "step": 1872 }, { "epoch": 1.04, "learning_rate": 4.921306038475671e-07, "logits/chosen": -6.013983726501465, "logits/rejected": -6.020716667175293, "logps/chosen": -200.5009765625, "logps/rejected": -157.69351196289062, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 4.6876373291015625, "rewards/margins": 8.44036865234375, "rewards/rejected": -3.752732038497925, "step": 1873 }, { "epoch": 1.04, "learning_rate": 4.916809644445586e-07, "logits/chosen": -5.899567604064941, "logits/rejected": -6.0603132247924805, "logps/chosen": -277.94329833984375, "logps/rejected": -450.11822509765625, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 3.432631254196167, "rewards/margins": 15.906673431396484, "rewards/rejected": -12.474040985107422, "step": 1874 }, { "epoch": 1.04, "learning_rate": 4.912313317709378e-07, "logits/chosen": -6.1583733558654785, "logits/rejected": -5.97966194152832, "logps/chosen": -204.62416076660156, "logps/rejected": -116.36996459960938, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": 4.468833923339844, "rewards/margins": 10.012650489807129, "rewards/rejected": -5.543816566467285, "step": 1875 }, { "epoch": 1.04, "learning_rate": 4.907817061904182e-07, "logits/chosen": -6.066568374633789, "logits/rejected": -6.0595245361328125, "logps/chosen": -218.5001678466797, "logps/rejected": -183.1620330810547, "loss": 0.0344, "rewards/accuracies": 1.0, "rewards/chosen": 3.692760467529297, "rewards/margins": 7.238266944885254, "rewards/rejected": -3.545506000518799, "step": 1876 }, { "epoch": 1.04, "learning_rate": 4.903320880667092e-07, "logits/chosen": -6.042264938354492, "logits/rejected": -6.049899578094482, "logps/chosen": -277.3656311035156, "logps/rejected": -193.2887725830078, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": 6.315193176269531, "rewards/margins": 10.079948425292969, "rewards/rejected": -3.7647547721862793, "step": 1877 }, { "epoch": 1.04, "learning_rate": 4.898824777635126e-07, "logits/chosen": -6.071563720703125, "logits/rejected": -6.091313362121582, "logps/chosen": -300.6523132324219, "logps/rejected": -400.526123046875, "loss": 0.0402, "rewards/accuracies": 1.0, "rewards/chosen": 6.367997169494629, "rewards/margins": 11.391524314880371, "rewards/rejected": -5.023528099060059, "step": 1878 }, { "epoch": 1.04, "learning_rate": 4.894328756445253e-07, "logits/chosen": -6.042452812194824, "logits/rejected": -6.1017985343933105, "logps/chosen": -241.381103515625, "logps/rejected": -141.88414001464844, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": 3.274001121520996, "rewards/margins": 8.164825439453125, "rewards/rejected": -4.890824317932129, "step": 1879 }, { "epoch": 1.04, "learning_rate": 4.889832820734366e-07, "logits/chosen": -6.077545642852783, "logits/rejected": -6.024738311767578, "logps/chosen": -226.95166015625, "logps/rejected": -200.27008056640625, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": 4.1786956787109375, "rewards/margins": 10.694580078125, "rewards/rejected": -6.515885353088379, "step": 1880 }, { "epoch": 1.04, "learning_rate": 4.885336974139291e-07, "logits/chosen": -6.086283206939697, "logits/rejected": -5.996415138244629, "logps/chosen": -293.5216064453125, "logps/rejected": -188.0725860595703, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": 4.571725845336914, "rewards/margins": 13.3060302734375, "rewards/rejected": -8.734304428100586, "step": 1881 }, { "epoch": 1.04, "learning_rate": 4.880841220296786e-07, "logits/chosen": -6.0994391441345215, "logits/rejected": -6.109082221984863, "logps/chosen": -304.8642578125, "logps/rejected": -428.4677734375, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 4.33247184753418, "rewards/margins": 13.270088195800781, "rewards/rejected": -8.937616348266602, "step": 1882 }, { "epoch": 1.05, "learning_rate": 4.876345562843527e-07, "logits/chosen": -6.165115833282471, "logits/rejected": -6.162534236907959, "logps/chosen": -293.153564453125, "logps/rejected": -283.6568298339844, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": 3.156400203704834, "rewards/margins": 11.419122695922852, "rewards/rejected": -8.26272201538086, "step": 1883 }, { "epoch": 1.05, "learning_rate": 4.871850005416119e-07, "logits/chosen": -6.051914215087891, "logits/rejected": -6.039128303527832, "logps/chosen": -227.9682159423828, "logps/rejected": -164.15731811523438, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": 2.0331852436065674, "rewards/margins": 9.865620613098145, "rewards/rejected": -7.832435607910156, "step": 1884 }, { "epoch": 1.05, "learning_rate": 4.86735455165108e-07, "logits/chosen": -6.016336917877197, "logits/rejected": -6.075559139251709, "logps/chosen": -445.76470947265625, "logps/rejected": -256.3887939453125, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": 6.172435283660889, "rewards/margins": 10.993314743041992, "rewards/rejected": -4.82088041305542, "step": 1885 }, { "epoch": 1.05, "learning_rate": 4.862859205184852e-07, "logits/chosen": -6.079267978668213, "logits/rejected": -6.05358362197876, "logps/chosen": -261.91094970703125, "logps/rejected": -243.10147094726562, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": 3.323915481567383, "rewards/margins": 11.384714126586914, "rewards/rejected": -8.060798645019531, "step": 1886 }, { "epoch": 1.05, "learning_rate": 4.858363969653781e-07, "logits/chosen": -6.060550689697266, "logits/rejected": -5.9945807456970215, "logps/chosen": -282.9512634277344, "logps/rejected": -153.3715057373047, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": 4.068159580230713, "rewards/margins": 9.364974975585938, "rewards/rejected": -5.296814918518066, "step": 1887 }, { "epoch": 1.05, "learning_rate": 4.853868848694127e-07, "logits/chosen": -6.026928901672363, "logits/rejected": -6.058923721313477, "logps/chosen": -249.73565673828125, "logps/rejected": -124.06092071533203, "loss": 0.0346, "rewards/accuracies": 1.0, "rewards/chosen": 6.613268852233887, "rewards/margins": 10.160130500793457, "rewards/rejected": -3.546860933303833, "step": 1888 }, { "epoch": 1.05, "learning_rate": 4.849373845942062e-07, "logits/chosen": -6.011430740356445, "logits/rejected": -6.040105819702148, "logps/chosen": -220.0915069580078, "logps/rejected": -243.7734375, "loss": 0.0189, "rewards/accuracies": 0.9375, "rewards/chosen": 4.269588947296143, "rewards/margins": 10.81051254272461, "rewards/rejected": -6.540923118591309, "step": 1889 }, { "epoch": 1.05, "learning_rate": 4.844878965033654e-07, "logits/chosen": -6.016816139221191, "logits/rejected": -5.9813642501831055, "logps/chosen": -404.8103332519531, "logps/rejected": -350.6636657714844, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": 3.1119227409362793, "rewards/margins": 12.21781063079834, "rewards/rejected": -9.105888366699219, "step": 1890 }, { "epoch": 1.05, "learning_rate": 4.84038420960488e-07, "logits/chosen": -5.953370571136475, "logits/rejected": -5.921365737915039, "logps/chosen": -351.66107177734375, "logps/rejected": -220.33819580078125, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": 5.24995231628418, "rewards/margins": 10.286346435546875, "rewards/rejected": -5.036394119262695, "step": 1891 }, { "epoch": 1.05, "learning_rate": 4.83588958329161e-07, "logits/chosen": -5.922070503234863, "logits/rejected": -6.015998840332031, "logps/chosen": -231.8256378173828, "logps/rejected": -141.2408447265625, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": 5.151294231414795, "rewards/margins": 10.402179718017578, "rewards/rejected": -5.250885009765625, "step": 1892 }, { "epoch": 1.05, "learning_rate": 4.831395089729613e-07, "logits/chosen": -6.095033645629883, "logits/rejected": -6.02055549621582, "logps/chosen": -241.5682830810547, "logps/rejected": -113.35852813720703, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": 2.8784539699554443, "rewards/margins": 8.08183479309082, "rewards/rejected": -5.203381061553955, "step": 1893 }, { "epoch": 1.05, "learning_rate": 4.82690073255455e-07, "logits/chosen": -6.026623725891113, "logits/rejected": -6.107961654663086, "logps/chosen": -261.31292724609375, "logps/rejected": -317.07513427734375, "loss": 0.0331, "rewards/accuracies": 0.9375, "rewards/chosen": 4.931014060974121, "rewards/margins": 10.61815357208252, "rewards/rejected": -5.687139511108398, "step": 1894 }, { "epoch": 1.05, "learning_rate": 4.822406515401967e-07, "logits/chosen": -6.018436431884766, "logits/rejected": -6.082248687744141, "logps/chosen": -206.0643310546875, "logps/rejected": -338.70928955078125, "loss": 0.07, "rewards/accuracies": 1.0, "rewards/chosen": 3.531083822250366, "rewards/margins": 10.491650581359863, "rewards/rejected": -6.960565567016602, "step": 1895 }, { "epoch": 1.05, "learning_rate": 4.817912441907307e-07, "logits/chosen": -5.973178863525391, "logits/rejected": -6.142834186553955, "logps/chosen": -185.54251098632812, "logps/rejected": -334.1771545410156, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": 1.1084078550338745, "rewards/margins": 11.750961303710938, "rewards/rejected": -10.642555236816406, "step": 1896 }, { "epoch": 1.05, "learning_rate": 4.813418515705885e-07, "logits/chosen": -6.035396099090576, "logits/rejected": -5.954374313354492, "logps/chosen": -290.2207336425781, "logps/rejected": -239.22557067871094, "loss": 0.0275, "rewards/accuracies": 0.9375, "rewards/chosen": 3.4845364093780518, "rewards/margins": 10.306879997253418, "rewards/rejected": -6.822342872619629, "step": 1897 }, { "epoch": 1.05, "learning_rate": 4.808924740432907e-07, "logits/chosen": -6.138979911804199, "logits/rejected": -6.046184062957764, "logps/chosen": -285.0171203613281, "logps/rejected": -177.3695068359375, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": 3.5237960815429688, "rewards/margins": 11.103017807006836, "rewards/rejected": -7.579221725463867, "step": 1898 }, { "epoch": 1.05, "learning_rate": 4.804431119723448e-07, "logits/chosen": -6.078659534454346, "logits/rejected": -6.106680393218994, "logps/chosen": -272.50543212890625, "logps/rejected": -273.85797119140625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": 4.896775245666504, "rewards/margins": 13.413597106933594, "rewards/rejected": -8.516822814941406, "step": 1899 }, { "epoch": 1.05, "learning_rate": 4.799937657212466e-07, "logits/chosen": -5.913263320922852, "logits/rejected": -5.980337619781494, "logps/chosen": -247.29298400878906, "logps/rejected": -208.90579223632812, "loss": 0.0544, "rewards/accuracies": 0.875, "rewards/chosen": 1.7622876167297363, "rewards/margins": 6.922271728515625, "rewards/rejected": -5.159984111785889, "step": 1900 }, { "epoch": 1.06, "learning_rate": 4.795444356534787e-07, "logits/chosen": -6.06416654586792, "logits/rejected": -5.970822811126709, "logps/chosen": -324.96087646484375, "logps/rejected": -270.060302734375, "loss": 0.0259, "rewards/accuracies": 0.9375, "rewards/chosen": 2.2937912940979004, "rewards/margins": 11.016016006469727, "rewards/rejected": -8.722225189208984, "step": 1901 }, { "epoch": 1.06, "learning_rate": 4.790951221325102e-07, "logits/chosen": -6.083187580108643, "logits/rejected": -6.021004676818848, "logps/chosen": -260.17291259765625, "logps/rejected": -288.47198486328125, "loss": 0.0471, "rewards/accuracies": 1.0, "rewards/chosen": 2.469050884246826, "rewards/margins": 11.774456977844238, "rewards/rejected": -9.30540657043457, "step": 1902 }, { "epoch": 1.06, "learning_rate": 4.786458255217979e-07, "logits/chosen": -6.0472612380981445, "logits/rejected": -6.008672714233398, "logps/chosen": -306.74090576171875, "logps/rejected": -192.56533813476562, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": 6.467069625854492, "rewards/margins": 12.907750129699707, "rewards/rejected": -6.440679550170898, "step": 1903 }, { "epoch": 1.06, "learning_rate": 4.781965461847837e-07, "logits/chosen": -6.0959153175354, "logits/rejected": -6.1031999588012695, "logps/chosen": -182.71548461914062, "logps/rejected": -169.37139892578125, "loss": 0.1021, "rewards/accuracies": 1.0, "rewards/chosen": 2.7708897590637207, "rewards/margins": 8.05449104309082, "rewards/rejected": -5.283600807189941, "step": 1904 }, { "epoch": 1.06, "learning_rate": 4.777472844848966e-07, "logits/chosen": -6.025444507598877, "logits/rejected": -6.095890045166016, "logps/chosen": -207.23658752441406, "logps/rejected": -325.281982421875, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": 1.8034794330596924, "rewards/margins": 10.838083267211914, "rewards/rejected": -9.0346040725708, "step": 1905 }, { "epoch": 1.06, "learning_rate": 4.772980407855504e-07, "logits/chosen": -6.046475410461426, "logits/rejected": -6.146720886230469, "logps/chosen": -290.3211669921875, "logps/rejected": -230.36526489257812, "loss": 0.0167, "rewards/accuracies": 0.9375, "rewards/chosen": 3.742337226867676, "rewards/margins": 11.506937980651855, "rewards/rejected": -7.76460075378418, "step": 1906 }, { "epoch": 1.06, "learning_rate": 4.768488154501454e-07, "logits/chosen": -6.031718730926514, "logits/rejected": -5.9602885246276855, "logps/chosen": -257.0833740234375, "logps/rejected": -168.79383850097656, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": 4.663142204284668, "rewards/margins": 10.77215576171875, "rewards/rejected": -6.109013557434082, "step": 1907 }, { "epoch": 1.06, "learning_rate": 4.763996088420657e-07, "logits/chosen": -6.027499198913574, "logits/rejected": -6.125320911407471, "logps/chosen": -281.4068603515625, "logps/rejected": -323.06463623046875, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": 4.4927496910095215, "rewards/margins": 12.25295639038086, "rewards/rejected": -7.76020622253418, "step": 1908 }, { "epoch": 1.06, "learning_rate": 4.759504213246813e-07, "logits/chosen": -5.998739242553711, "logits/rejected": -6.073901176452637, "logps/chosen": -524.5200805664062, "logps/rejected": -367.7401428222656, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": 6.245882511138916, "rewards/margins": 11.633719444274902, "rewards/rejected": -5.387836456298828, "step": 1909 }, { "epoch": 1.06, "learning_rate": 4.755012532613465e-07, "logits/chosen": -5.939165115356445, "logits/rejected": -6.049388885498047, "logps/chosen": -257.9983825683594, "logps/rejected": -331.6851501464844, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": 3.086479902267456, "rewards/margins": 11.661535263061523, "rewards/rejected": -8.575056076049805, "step": 1910 }, { "epoch": 1.06, "learning_rate": 4.750521050153996e-07, "logits/chosen": -6.040159225463867, "logits/rejected": -5.980966091156006, "logps/chosen": -254.6655731201172, "logps/rejected": -246.91543579101562, "loss": 0.0835, "rewards/accuracies": 1.0, "rewards/chosen": 5.379167079925537, "rewards/margins": 12.818623542785645, "rewards/rejected": -7.439455986022949, "step": 1911 }, { "epoch": 1.06, "learning_rate": 4.746029769501633e-07, "logits/chosen": -5.921230316162109, "logits/rejected": -5.977749347686768, "logps/chosen": -237.43536376953125, "logps/rejected": -255.53311157226562, "loss": 0.0272, "rewards/accuracies": 1.0, "rewards/chosen": 4.188033103942871, "rewards/margins": 10.314167976379395, "rewards/rejected": -6.126135349273682, "step": 1912 }, { "epoch": 1.06, "learning_rate": 4.741538694289436e-07, "logits/chosen": -6.051982879638672, "logits/rejected": -6.059843063354492, "logps/chosen": -294.02960205078125, "logps/rejected": -162.22213745117188, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": 6.2756171226501465, "rewards/margins": 9.648913383483887, "rewards/rejected": -3.3732964992523193, "step": 1913 }, { "epoch": 1.06, "learning_rate": 4.737047828150301e-07, "logits/chosen": -6.126262664794922, "logits/rejected": -5.9917497634887695, "logps/chosen": -298.7803649902344, "logps/rejected": -148.17733764648438, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": 4.7904534339904785, "rewards/margins": 7.071943759918213, "rewards/rejected": -2.2814905643463135, "step": 1914 }, { "epoch": 1.06, "learning_rate": 4.732557174716954e-07, "logits/chosen": -6.04936408996582, "logits/rejected": -5.990326881408691, "logps/chosen": -191.1981658935547, "logps/rejected": -151.55322265625, "loss": 0.11, "rewards/accuracies": 1.0, "rewards/chosen": 2.7957310676574707, "rewards/margins": 8.52639389038086, "rewards/rejected": -5.730661869049072, "step": 1915 }, { "epoch": 1.06, "learning_rate": 4.728066737621946e-07, "logits/chosen": -6.00051212310791, "logits/rejected": -5.981884956359863, "logps/chosen": -265.35003662109375, "logps/rejected": -148.6277313232422, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": 5.265832901000977, "rewards/margins": 9.749616622924805, "rewards/rejected": -4.483783721923828, "step": 1916 }, { "epoch": 1.06, "learning_rate": 4.7235765204976624e-07, "logits/chosen": -5.993633270263672, "logits/rejected": -5.994665622711182, "logps/chosen": -331.48773193359375, "logps/rejected": -284.19818115234375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 5.474018573760986, "rewards/margins": 12.220595359802246, "rewards/rejected": -6.746577262878418, "step": 1917 }, { "epoch": 1.06, "learning_rate": 4.7190865269762976e-07, "logits/chosen": -6.012314796447754, "logits/rejected": -5.9357991218566895, "logps/chosen": -253.35768127441406, "logps/rejected": -161.90960693359375, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": 3.972564935684204, "rewards/margins": 9.792278289794922, "rewards/rejected": -5.819714546203613, "step": 1918 }, { "epoch": 1.07, "learning_rate": 4.714596760689877e-07, "logits/chosen": -6.08245325088501, "logits/rejected": -6.128499507904053, "logps/chosen": -472.22808837890625, "logps/rejected": -314.913818359375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 6.144374847412109, "rewards/margins": 13.783271789550781, "rewards/rejected": -7.6388983726501465, "step": 1919 }, { "epoch": 1.07, "learning_rate": 4.710107225270235e-07, "logits/chosen": -6.043799877166748, "logits/rejected": -6.071410655975342, "logps/chosen": -292.63458251953125, "logps/rejected": -241.61651611328125, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": 4.974261283874512, "rewards/margins": 9.11881160736084, "rewards/rejected": -4.144550800323486, "step": 1920 }, { "epoch": 1.07, "learning_rate": 4.7056179243490196e-07, "logits/chosen": -5.988397598266602, "logits/rejected": -6.03122091293335, "logps/chosen": -423.9935607910156, "logps/rejected": -282.01611328125, "loss": 0.0127, "rewards/accuracies": 0.9375, "rewards/chosen": 5.81303071975708, "rewards/margins": 13.230051040649414, "rewards/rejected": -7.417020320892334, "step": 1921 }, { "epoch": 1.07, "learning_rate": 4.701128861557693e-07, "logits/chosen": -6.102496147155762, "logits/rejected": -6.0300140380859375, "logps/chosen": -254.3193359375, "logps/rejected": -265.6123046875, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": 4.389340877532959, "rewards/margins": 9.824228286743164, "rewards/rejected": -5.434887409210205, "step": 1922 }, { "epoch": 1.07, "learning_rate": 4.696640040527522e-07, "logits/chosen": -6.02290153503418, "logits/rejected": -5.97606897354126, "logps/chosen": -287.6868591308594, "logps/rejected": -264.76422119140625, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": 4.465066432952881, "rewards/margins": 9.705671310424805, "rewards/rejected": -5.240604400634766, "step": 1923 }, { "epoch": 1.07, "learning_rate": 4.692151464889578e-07, "logits/chosen": -6.021668910980225, "logits/rejected": -5.95738410949707, "logps/chosen": -255.8255615234375, "logps/rejected": -144.16937255859375, "loss": 0.0364, "rewards/accuracies": 0.9375, "rewards/chosen": 6.080979347229004, "rewards/margins": 10.482882499694824, "rewards/rejected": -4.401904106140137, "step": 1924 }, { "epoch": 1.07, "learning_rate": 4.687663138274734e-07, "logits/chosen": -6.059621810913086, "logits/rejected": -6.094228267669678, "logps/chosen": -203.93551635742188, "logps/rejected": -205.46240234375, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": 2.0305700302124023, "rewards/margins": 9.425565719604492, "rewards/rejected": -7.394995212554932, "step": 1925 }, { "epoch": 1.07, "learning_rate": 4.683175064313665e-07, "logits/chosen": -6.142236232757568, "logits/rejected": -5.954691410064697, "logps/chosen": -239.51736450195312, "logps/rejected": -158.732177734375, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": 4.763214588165283, "rewards/margins": 10.742417335510254, "rewards/rejected": -5.9792022705078125, "step": 1926 }, { "epoch": 1.07, "learning_rate": 4.6786872466368345e-07, "logits/chosen": -6.025959491729736, "logits/rejected": -6.148527145385742, "logps/chosen": -214.55685424804688, "logps/rejected": -225.61314392089844, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": 4.624087333679199, "rewards/margins": 11.232898712158203, "rewards/rejected": -6.608811378479004, "step": 1927 }, { "epoch": 1.07, "learning_rate": 4.6741996888745026e-07, "logits/chosen": -6.062747001647949, "logits/rejected": -6.022736549377441, "logps/chosen": -369.8567199707031, "logps/rejected": -178.80258178710938, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": 4.321377754211426, "rewards/margins": 10.159486770629883, "rewards/rejected": -5.838108539581299, "step": 1928 }, { "epoch": 1.07, "learning_rate": 4.6697123946567224e-07, "logits/chosen": -6.018511772155762, "logits/rejected": -6.173917770385742, "logps/chosen": -172.16131591796875, "logps/rejected": -311.54254150390625, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": 2.112292766571045, "rewards/margins": 12.35033893585205, "rewards/rejected": -10.238046646118164, "step": 1929 }, { "epoch": 1.07, "learning_rate": 4.6652253676133276e-07, "logits/chosen": -6.059083938598633, "logits/rejected": -6.151837348937988, "logps/chosen": -158.68051147460938, "logps/rejected": -210.62550354003906, "loss": 0.0414, "rewards/accuracies": 0.9375, "rewards/chosen": 1.43021821975708, "rewards/margins": 9.73530387878418, "rewards/rejected": -8.305086135864258, "step": 1930 }, { "epoch": 1.07, "learning_rate": 4.660738611373941e-07, "logits/chosen": -6.178500175476074, "logits/rejected": -5.981407165527344, "logps/chosen": -231.6632080078125, "logps/rejected": -100.73297119140625, "loss": 0.031, "rewards/accuracies": 0.9375, "rewards/chosen": 4.755801200866699, "rewards/margins": 10.156129837036133, "rewards/rejected": -5.400328636169434, "step": 1931 }, { "epoch": 1.07, "learning_rate": 4.6562521295679626e-07, "logits/chosen": -6.024205684661865, "logits/rejected": -5.997628211975098, "logps/chosen": -272.73138427734375, "logps/rejected": -183.29647827148438, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": 4.021706581115723, "rewards/margins": 10.391359329223633, "rewards/rejected": -6.369651794433594, "step": 1932 }, { "epoch": 1.07, "learning_rate": 4.651765925824573e-07, "logits/chosen": -6.058446884155273, "logits/rejected": -6.038429260253906, "logps/chosen": -254.6513671875, "logps/rejected": -190.4105987548828, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": 4.861032485961914, "rewards/margins": 11.75594711303711, "rewards/rejected": -6.894915580749512, "step": 1933 }, { "epoch": 1.07, "learning_rate": 4.6472800037727277e-07, "logits/chosen": -6.0837202072143555, "logits/rejected": -5.947319030761719, "logps/chosen": -243.31991577148438, "logps/rejected": -196.51785278320312, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": 1.8719849586486816, "rewards/margins": 10.537160873413086, "rewards/rejected": -8.665176391601562, "step": 1934 }, { "epoch": 1.07, "learning_rate": 4.642794367041149e-07, "logits/chosen": -6.072412490844727, "logits/rejected": -5.997608661651611, "logps/chosen": -356.951904296875, "logps/rejected": -246.4039764404297, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": 7.202654838562012, "rewards/margins": 12.272706985473633, "rewards/rejected": -5.070052623748779, "step": 1935 }, { "epoch": 1.07, "learning_rate": 4.6383090192583394e-07, "logits/chosen": -5.985522270202637, "logits/rejected": -6.099018573760986, "logps/chosen": -303.37744140625, "logps/rejected": -399.6278991699219, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": 3.6240596771240234, "rewards/margins": 12.132078170776367, "rewards/rejected": -8.50801944732666, "step": 1936 }, { "epoch": 1.08, "learning_rate": 4.6338239640525547e-07, "logits/chosen": -6.0749006271362305, "logits/rejected": -5.968088150024414, "logps/chosen": -224.321533203125, "logps/rejected": -175.26239013671875, "loss": 0.0229, "rewards/accuracies": 0.9375, "rewards/chosen": 2.4872190952301025, "rewards/margins": 9.176481246948242, "rewards/rejected": -6.689261436462402, "step": 1937 }, { "epoch": 1.08, "learning_rate": 4.6293392050518263e-07, "logits/chosen": -5.988156318664551, "logits/rejected": -6.007442951202393, "logps/chosen": -274.442626953125, "logps/rejected": -171.80108642578125, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": 5.4347310066223145, "rewards/margins": 11.916572570800781, "rewards/rejected": -6.481841087341309, "step": 1938 }, { "epoch": 1.08, "learning_rate": 4.624854745883936e-07, "logits/chosen": -6.002516269683838, "logits/rejected": -5.994278907775879, "logps/chosen": -249.19383239746094, "logps/rejected": -271.0928039550781, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": 3.9970903396606445, "rewards/margins": 10.947344779968262, "rewards/rejected": -6.950255393981934, "step": 1939 }, { "epoch": 1.08, "learning_rate": 4.6203705901764295e-07, "logits/chosen": -6.096403121948242, "logits/rejected": -6.044656753540039, "logps/chosen": -395.7075500488281, "logps/rejected": -212.25880432128906, "loss": 0.0676, "rewards/accuracies": 1.0, "rewards/chosen": 3.8505656719207764, "rewards/margins": 11.20595932006836, "rewards/rejected": -7.355393409729004, "step": 1940 }, { "epoch": 1.08, "learning_rate": 4.615886741556603e-07, "logits/chosen": -6.0479559898376465, "logits/rejected": -6.043234348297119, "logps/chosen": -298.5643615722656, "logps/rejected": -196.30523681640625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": 5.023324012756348, "rewards/margins": 11.930937767028809, "rewards/rejected": -6.907613754272461, "step": 1941 }, { "epoch": 1.08, "learning_rate": 4.6114032036515067e-07, "logits/chosen": -6.067697525024414, "logits/rejected": -6.0089006423950195, "logps/chosen": -424.6168212890625, "logps/rejected": -386.349365234375, "loss": 0.0367, "rewards/accuracies": 0.9375, "rewards/chosen": 4.572612762451172, "rewards/margins": 11.399978637695312, "rewards/rejected": -6.827364921569824, "step": 1942 }, { "epoch": 1.08, "learning_rate": 4.60691998008794e-07, "logits/chosen": -6.041598796844482, "logits/rejected": -6.046303749084473, "logps/chosen": -218.19346618652344, "logps/rejected": -216.4139404296875, "loss": 0.0407, "rewards/accuracies": 1.0, "rewards/chosen": 3.6313047409057617, "rewards/margins": 9.167243957519531, "rewards/rejected": -5.5359392166137695, "step": 1943 }, { "epoch": 1.08, "learning_rate": 4.6024370744924446e-07, "logits/chosen": -6.087817192077637, "logits/rejected": -6.0073652267456055, "logps/chosen": -226.6303253173828, "logps/rejected": -211.44549560546875, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": 1.267214059829712, "rewards/margins": 10.352611541748047, "rewards/rejected": -9.085397720336914, "step": 1944 }, { "epoch": 1.08, "learning_rate": 4.597954490491309e-07, "logits/chosen": -6.06420373916626, "logits/rejected": -5.954751014709473, "logps/chosen": -260.595703125, "logps/rejected": -95.92369079589844, "loss": 0.0274, "rewards/accuracies": 0.9375, "rewards/chosen": 5.111032485961914, "rewards/margins": 9.11734390258789, "rewards/rejected": -4.006310939788818, "step": 1945 }, { "epoch": 1.08, "learning_rate": 4.5934722317105585e-07, "logits/chosen": -5.97757625579834, "logits/rejected": -6.0990800857543945, "logps/chosen": -270.6358337402344, "logps/rejected": -276.8843078613281, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": 4.613459587097168, "rewards/margins": 13.15401554107666, "rewards/rejected": -8.540555953979492, "step": 1946 }, { "epoch": 1.08, "learning_rate": 4.5889903017759596e-07, "logits/chosen": -6.042016506195068, "logits/rejected": -5.950843811035156, "logps/chosen": -494.1524353027344, "logps/rejected": -181.74856567382812, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": 3.9755587577819824, "rewards/margins": 9.91890811920166, "rewards/rejected": -5.9433488845825195, "step": 1947 }, { "epoch": 1.08, "learning_rate": 4.584508704313006e-07, "logits/chosen": -6.08103084564209, "logits/rejected": -6.0320353507995605, "logps/chosen": -314.69384765625, "logps/rejected": -211.02369689941406, "loss": 0.0129, "rewards/accuracies": 0.9375, "rewards/chosen": 4.561222076416016, "rewards/margins": 8.840066909790039, "rewards/rejected": -4.278844356536865, "step": 1948 }, { "epoch": 1.08, "learning_rate": 4.580027442946927e-07, "logits/chosen": -6.0434889793396, "logits/rejected": -6.091495037078857, "logps/chosen": -330.0324401855469, "logps/rejected": -240.2761993408203, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/chosen": 7.5484771728515625, "rewards/margins": 12.996004104614258, "rewards/rejected": -5.447525978088379, "step": 1949 }, { "epoch": 1.08, "learning_rate": 4.5755465213026806e-07, "logits/chosen": -5.9309401512146, "logits/rejected": -5.976186752319336, "logps/chosen": -471.7536315917969, "logps/rejected": -302.3275146484375, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": 6.941547870635986, "rewards/margins": 14.135414123535156, "rewards/rejected": -7.193866729736328, "step": 1950 }, { "epoch": 1.08, "learning_rate": 4.571065943004947e-07, "logits/chosen": -6.054040908813477, "logits/rejected": -6.079030990600586, "logps/chosen": -289.3255310058594, "logps/rejected": -148.38302612304688, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": 6.620156288146973, "rewards/margins": 10.595417022705078, "rewards/rejected": -3.9752612113952637, "step": 1951 }, { "epoch": 1.08, "learning_rate": 4.5665857116781323e-07, "logits/chosen": -6.082910537719727, "logits/rejected": -6.008674144744873, "logps/chosen": -309.0670471191406, "logps/rejected": -131.04238891601562, "loss": 0.0366, "rewards/accuracies": 0.9375, "rewards/chosen": 3.930762767791748, "rewards/margins": 8.389612197875977, "rewards/rejected": -4.458849906921387, "step": 1952 }, { "epoch": 1.08, "learning_rate": 4.562105830946359e-07, "logits/chosen": -6.115083694458008, "logits/rejected": -6.093807220458984, "logps/chosen": -229.48419189453125, "logps/rejected": -256.09259033203125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": 2.326232433319092, "rewards/margins": 11.996837615966797, "rewards/rejected": -9.670604705810547, "step": 1953 }, { "epoch": 1.08, "learning_rate": 4.557626304433467e-07, "logits/chosen": -5.995242595672607, "logits/rejected": -5.986537933349609, "logps/chosen": -247.69752502441406, "logps/rejected": -163.24105834960938, "loss": 0.0652, "rewards/accuracies": 0.9375, "rewards/chosen": 4.597681045532227, "rewards/margins": 9.017643928527832, "rewards/rejected": -4.419961452484131, "step": 1954 }, { "epoch": 1.09, "learning_rate": 4.5531471357630106e-07, "logits/chosen": -5.995850086212158, "logits/rejected": -6.122398853302002, "logps/chosen": -188.32073974609375, "logps/rejected": -310.38079833984375, "loss": 0.1469, "rewards/accuracies": 1.0, "rewards/chosen": 3.719351291656494, "rewards/margins": 12.120813369750977, "rewards/rejected": -8.401461601257324, "step": 1955 }, { "epoch": 1.09, "learning_rate": 4.5486683285582493e-07, "logits/chosen": -6.060155391693115, "logits/rejected": -6.010281085968018, "logps/chosen": -223.92022705078125, "logps/rejected": -318.6434631347656, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": 2.190155029296875, "rewards/margins": 11.823484420776367, "rewards/rejected": -9.633329391479492, "step": 1956 }, { "epoch": 1.09, "learning_rate": 4.544189886442162e-07, "logits/chosen": -6.067928314208984, "logits/rejected": -6.06927490234375, "logps/chosen": -250.8816680908203, "logps/rejected": -236.99539184570312, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": 2.376028060913086, "rewards/margins": 10.704397201538086, "rewards/rejected": -8.328369140625, "step": 1957 }, { "epoch": 1.09, "learning_rate": 4.539711813037417e-07, "logits/chosen": -6.080174446105957, "logits/rejected": -6.040923595428467, "logps/chosen": -259.6265869140625, "logps/rejected": -210.95147705078125, "loss": 0.0329, "rewards/accuracies": 1.0, "rewards/chosen": 3.339311361312866, "rewards/margins": 10.488981246948242, "rewards/rejected": -7.149670600891113, "step": 1958 }, { "epoch": 1.09, "learning_rate": 4.535234111966399e-07, "logits/chosen": -5.9716315269470215, "logits/rejected": -5.950847625732422, "logps/chosen": -277.2032775878906, "logps/rejected": -234.45501708984375, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": 4.672444820404053, "rewards/margins": 11.468427658081055, "rewards/rejected": -6.79598331451416, "step": 1959 }, { "epoch": 1.09, "learning_rate": 4.530756786851179e-07, "logits/chosen": -6.044960975646973, "logits/rejected": -6.116077899932861, "logps/chosen": -240.29725646972656, "logps/rejected": -186.39559936523438, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": 4.1270952224731445, "rewards/margins": 11.151318550109863, "rewards/rejected": -7.024222373962402, "step": 1960 }, { "epoch": 1.09, "learning_rate": 4.526279841313534e-07, "logits/chosen": -6.004721641540527, "logits/rejected": -6.037195682525635, "logps/chosen": -315.6829833984375, "logps/rejected": -290.6506042480469, "loss": 0.0293, "rewards/accuracies": 1.0, "rewards/chosen": 3.7952017784118652, "rewards/margins": 10.866455078125, "rewards/rejected": -7.071253299713135, "step": 1961 }, { "epoch": 1.09, "learning_rate": 4.521803278974927e-07, "logits/chosen": -6.126140594482422, "logits/rejected": -6.128844738006592, "logps/chosen": -230.04722595214844, "logps/rejected": -204.36590576171875, "loss": 0.1004, "rewards/accuracies": 1.0, "rewards/chosen": 4.126490592956543, "rewards/margins": 12.109212875366211, "rewards/rejected": -7.98272180557251, "step": 1962 }, { "epoch": 1.09, "learning_rate": 4.517327103456514e-07, "logits/chosen": -6.045319557189941, "logits/rejected": -6.0218658447265625, "logps/chosen": -278.53924560546875, "logps/rejected": -320.71856689453125, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": 4.784902572631836, "rewards/margins": 12.665833473205566, "rewards/rejected": -7.880931377410889, "step": 1963 }, { "epoch": 1.09, "learning_rate": 4.512851318379138e-07, "logits/chosen": -6.013064861297607, "logits/rejected": -6.035854339599609, "logps/chosen": -380.68798828125, "logps/rejected": -321.00396728515625, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": 3.0975797176361084, "rewards/margins": 10.119903564453125, "rewards/rejected": -7.022324562072754, "step": 1964 }, { "epoch": 1.09, "learning_rate": 4.508375927363326e-07, "logits/chosen": -6.090761184692383, "logits/rejected": -6.057212829589844, "logps/chosen": -236.50457763671875, "logps/rejected": -202.25790405273438, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": 2.0472426414489746, "rewards/margins": 8.602660179138184, "rewards/rejected": -6.555417537689209, "step": 1965 }, { "epoch": 1.09, "learning_rate": 4.5039009340292867e-07, "logits/chosen": -6.096175193786621, "logits/rejected": -6.06922721862793, "logps/chosen": -260.0218505859375, "logps/rejected": -260.1181335449219, "loss": 0.0518, "rewards/accuracies": 0.9375, "rewards/chosen": 4.929109573364258, "rewards/margins": 12.740299224853516, "rewards/rejected": -7.811190605163574, "step": 1966 }, { "epoch": 1.09, "learning_rate": 4.499426341996905e-07, "logits/chosen": -6.007576942443848, "logits/rejected": -6.075263500213623, "logps/chosen": -281.697265625, "logps/rejected": -290.08831787109375, "loss": 0.0488, "rewards/accuracies": 1.0, "rewards/chosen": 2.53389310836792, "rewards/margins": 11.42552375793457, "rewards/rejected": -8.891631126403809, "step": 1967 }, { "epoch": 1.09, "learning_rate": 4.494952154885742e-07, "logits/chosen": -6.023314476013184, "logits/rejected": -6.043367385864258, "logps/chosen": -510.085693359375, "logps/rejected": -272.94915771484375, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": 5.340670585632324, "rewards/margins": 14.24753189086914, "rewards/rejected": -8.906861305236816, "step": 1968 }, { "epoch": 1.09, "learning_rate": 4.4904783763150346e-07, "logits/chosen": -6.094202995300293, "logits/rejected": -6.028227806091309, "logps/chosen": -297.65771484375, "logps/rejected": -174.12734985351562, "loss": 0.0293, "rewards/accuracies": 0.9375, "rewards/chosen": 3.0803565979003906, "rewards/margins": 10.090396881103516, "rewards/rejected": -7.010040283203125, "step": 1969 }, { "epoch": 1.09, "learning_rate": 4.4860050099036833e-07, "logits/chosen": -6.212804794311523, "logits/rejected": -6.152372360229492, "logps/chosen": -351.0110168457031, "logps/rejected": -201.21607971191406, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": 5.242564678192139, "rewards/margins": 11.154016494750977, "rewards/rejected": -5.91145133972168, "step": 1970 }, { "epoch": 1.09, "learning_rate": 4.4815320592702614e-07, "logits/chosen": -6.050727367401123, "logits/rejected": -5.9862589836120605, "logps/chosen": -231.1610107421875, "logps/rejected": -173.9521484375, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": 3.623732566833496, "rewards/margins": 11.633892059326172, "rewards/rejected": -8.010160446166992, "step": 1971 }, { "epoch": 1.09, "learning_rate": 4.477059528033e-07, "logits/chosen": -6.0577239990234375, "logits/rejected": -6.146202087402344, "logps/chosen": -244.15423583984375, "logps/rejected": -283.61126708984375, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": 0.9644191265106201, "rewards/margins": 11.277369499206543, "rewards/rejected": -10.312950134277344, "step": 1972 }, { "epoch": 1.1, "learning_rate": 4.472587419809798e-07, "logits/chosen": -5.95086145401001, "logits/rejected": -5.917092323303223, "logps/chosen": -341.845703125, "logps/rejected": -355.8064880371094, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": 3.5848209857940674, "rewards/margins": 13.50572395324707, "rewards/rejected": -9.920903205871582, "step": 1973 }, { "epoch": 1.1, "learning_rate": 4.4681157382182054e-07, "logits/chosen": -5.978577613830566, "logits/rejected": -5.966857433319092, "logps/chosen": -229.43667602539062, "logps/rejected": -231.34132385253906, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": 1.8540210723876953, "rewards/margins": 9.113515853881836, "rewards/rejected": -7.259493827819824, "step": 1974 }, { "epoch": 1.1, "learning_rate": 4.463644486875427e-07, "logits/chosen": -6.043390274047852, "logits/rejected": -6.075811386108398, "logps/chosen": -253.6865997314453, "logps/rejected": -220.96290588378906, "loss": 0.0178, "rewards/accuracies": 0.9375, "rewards/chosen": 4.625949382781982, "rewards/margins": 11.101932525634766, "rewards/rejected": -6.475983142852783, "step": 1975 }, { "epoch": 1.1, "learning_rate": 4.459173669398328e-07, "logits/chosen": -6.000879764556885, "logits/rejected": -6.061623573303223, "logps/chosen": -382.0575256347656, "logps/rejected": -206.25714111328125, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": 4.08866024017334, "rewards/margins": 8.760580062866211, "rewards/rejected": -4.6719207763671875, "step": 1976 }, { "epoch": 1.1, "learning_rate": 4.4547032894034104e-07, "logits/chosen": -6.0701904296875, "logits/rejected": -6.010223388671875, "logps/chosen": -202.16189575195312, "logps/rejected": -137.25250244140625, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": 5.390490531921387, "rewards/margins": 11.877644538879395, "rewards/rejected": -6.487153053283691, "step": 1977 }, { "epoch": 1.1, "learning_rate": 4.450233350506835e-07, "logits/chosen": -6.072836875915527, "logits/rejected": -5.977201461791992, "logps/chosen": -241.59677124023438, "logps/rejected": -173.3630828857422, "loss": 0.015, "rewards/accuracies": 0.9375, "rewards/chosen": 3.8214526176452637, "rewards/margins": 10.590309143066406, "rewards/rejected": -6.768857002258301, "step": 1978 }, { "epoch": 1.1, "learning_rate": 4.4457638563243947e-07, "logits/chosen": -5.924408912658691, "logits/rejected": -5.9441022872924805, "logps/chosen": -300.5652160644531, "logps/rejected": -234.86026000976562, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": 5.088561534881592, "rewards/margins": 9.725373268127441, "rewards/rejected": -4.636811256408691, "step": 1979 }, { "epoch": 1.1, "learning_rate": 4.441294810471531e-07, "logits/chosen": -5.949326515197754, "logits/rejected": -6.16370153427124, "logps/chosen": -225.33645629882812, "logps/rejected": -371.59613037109375, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": 2.637359380722046, "rewards/margins": 12.347626686096191, "rewards/rejected": -9.710267066955566, "step": 1980 }, { "epoch": 1.1, "learning_rate": 4.436826216563318e-07, "logits/chosen": -5.929574012756348, "logits/rejected": -6.060392379760742, "logps/chosen": -242.0503387451172, "logps/rejected": -200.03970336914062, "loss": 0.0258, "rewards/accuracies": 1.0, "rewards/chosen": 3.7222321033477783, "rewards/margins": 12.975643157958984, "rewards/rejected": -9.253412246704102, "step": 1981 }, { "epoch": 1.1, "learning_rate": 4.432358078214464e-07, "logits/chosen": -6.068131446838379, "logits/rejected": -5.953961372375488, "logps/chosen": -165.44215393066406, "logps/rejected": -110.60714721679688, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": 2.265669584274292, "rewards/margins": 8.041644096374512, "rewards/rejected": -5.775974273681641, "step": 1982 }, { "epoch": 1.1, "learning_rate": 4.4278903990393124e-07, "logits/chosen": -6.091263771057129, "logits/rejected": -5.904436111450195, "logps/chosen": -268.2107238769531, "logps/rejected": -126.76972198486328, "loss": 0.0223, "rewards/accuracies": 0.9375, "rewards/chosen": 4.879786491394043, "rewards/margins": 9.45283317565918, "rewards/rejected": -4.573046684265137, "step": 1983 }, { "epoch": 1.1, "learning_rate": 4.423423182651831e-07, "logits/chosen": -6.016658782958984, "logits/rejected": -6.048752784729004, "logps/chosen": -298.93927001953125, "logps/rejected": -238.31744384765625, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": 4.208688259124756, "rewards/margins": 11.045614242553711, "rewards/rejected": -6.836925983428955, "step": 1984 }, { "epoch": 1.1, "learning_rate": 4.418956432665618e-07, "logits/chosen": -5.953249931335449, "logits/rejected": -5.970922946929932, "logps/chosen": -279.29998779296875, "logps/rejected": -179.7772674560547, "loss": 0.1877, "rewards/accuracies": 1.0, "rewards/chosen": 5.703009605407715, "rewards/margins": 9.883058547973633, "rewards/rejected": -4.180049896240234, "step": 1985 }, { "epoch": 1.1, "learning_rate": 4.414490152693888e-07, "logits/chosen": -6.078096866607666, "logits/rejected": -6.071434020996094, "logps/chosen": -263.70233154296875, "logps/rejected": -257.4645080566406, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": 3.004648208618164, "rewards/margins": 11.969795227050781, "rewards/rejected": -8.965147018432617, "step": 1986 }, { "epoch": 1.1, "learning_rate": 4.4100243463494837e-07, "logits/chosen": -5.985021114349365, "logits/rejected": -6.015590667724609, "logps/chosen": -317.9515380859375, "logps/rejected": -249.070556640625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": 4.218674182891846, "rewards/margins": 11.060633659362793, "rewards/rejected": -6.841959476470947, "step": 1987 }, { "epoch": 1.1, "learning_rate": 4.405559017244856e-07, "logits/chosen": -6.1741623878479, "logits/rejected": -5.966325759887695, "logps/chosen": -223.34042358398438, "logps/rejected": -96.24006652832031, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": 5.407529354095459, "rewards/margins": 10.869564056396484, "rewards/rejected": -5.462033748626709, "step": 1988 }, { "epoch": 1.1, "learning_rate": 4.401094168992075e-07, "logits/chosen": -6.136321067810059, "logits/rejected": -6.095383644104004, "logps/chosen": -321.111083984375, "logps/rejected": -184.60496520996094, "loss": 0.0517, "rewards/accuracies": 1.0, "rewards/chosen": 5.117707252502441, "rewards/margins": 10.528228759765625, "rewards/rejected": -5.410521030426025, "step": 1989 }, { "epoch": 1.1, "learning_rate": 4.3966298052028206e-07, "logits/chosen": -5.961550712585449, "logits/rejected": -6.251178741455078, "logps/chosen": -228.5654296875, "logps/rejected": -245.86642456054688, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": 5.785643100738525, "rewards/margins": 11.734521865844727, "rewards/rejected": -5.948879241943359, "step": 1990 }, { "epoch": 1.11, "learning_rate": 4.3921659294883806e-07, "logits/chosen": -5.92824649810791, "logits/rejected": -6.099187850952148, "logps/chosen": -327.36407470703125, "logps/rejected": -342.4981689453125, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": 5.633353233337402, "rewards/margins": 13.07844352722168, "rewards/rejected": -7.445090293884277, "step": 1991 }, { "epoch": 1.11, "learning_rate": 4.387702545459649e-07, "logits/chosen": -5.996705055236816, "logits/rejected": -5.993683815002441, "logps/chosen": -241.01840209960938, "logps/rejected": -224.63015747070312, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": 3.3702502250671387, "rewards/margins": 10.334994316101074, "rewards/rejected": -6.964743614196777, "step": 1992 }, { "epoch": 1.11, "learning_rate": 4.383239656727119e-07, "logits/chosen": -6.023403644561768, "logits/rejected": -6.033566474914551, "logps/chosen": -246.241943359375, "logps/rejected": -192.1588897705078, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": 3.1791467666625977, "rewards/margins": 9.94428825378418, "rewards/rejected": -6.76514196395874, "step": 1993 }, { "epoch": 1.11, "learning_rate": 4.3787772669008887e-07, "logits/chosen": -6.019256591796875, "logits/rejected": -5.9605841636657715, "logps/chosen": -277.6598205566406, "logps/rejected": -186.84567260742188, "loss": 0.0416, "rewards/accuracies": 0.9375, "rewards/chosen": 4.0241241455078125, "rewards/margins": 8.73667049407959, "rewards/rejected": -4.712546348571777, "step": 1994 }, { "epoch": 1.11, "learning_rate": 4.3743153795906484e-07, "logits/chosen": -6.020019054412842, "logits/rejected": -6.0138630867004395, "logps/chosen": -276.8045654296875, "logps/rejected": -175.0975341796875, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": 4.423821449279785, "rewards/margins": 9.569171905517578, "rewards/rejected": -5.145350933074951, "step": 1995 }, { "epoch": 1.11, "learning_rate": 4.3698539984056795e-07, "logits/chosen": -6.062546253204346, "logits/rejected": -5.954237937927246, "logps/chosen": -195.22523498535156, "logps/rejected": -149.30735778808594, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": 3.231731653213501, "rewards/margins": 10.829689025878906, "rewards/rejected": -7.597958087921143, "step": 1996 }, { "epoch": 1.11, "learning_rate": 4.365393126954862e-07, "logits/chosen": -6.131132125854492, "logits/rejected": -6.102544784545898, "logps/chosen": -280.1304626464844, "logps/rejected": -272.94647216796875, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": 4.587507247924805, "rewards/margins": 12.4299955368042, "rewards/rejected": -7.842487812042236, "step": 1997 }, { "epoch": 1.11, "learning_rate": 4.360932768846654e-07, "logits/chosen": -6.04046106338501, "logits/rejected": -5.9525651931762695, "logps/chosen": -317.8210144042969, "logps/rejected": -191.3002166748047, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": 5.583152770996094, "rewards/margins": 10.215570449829102, "rewards/rejected": -4.632417678833008, "step": 1998 }, { "epoch": 1.11, "learning_rate": 4.3564729276891087e-07, "logits/chosen": -6.00811243057251, "logits/rejected": -5.980976581573486, "logps/chosen": -359.52825927734375, "logps/rejected": -192.0741729736328, "loss": 0.0584, "rewards/accuracies": 1.0, "rewards/chosen": 5.372524261474609, "rewards/margins": 11.511788368225098, "rewards/rejected": -6.139263153076172, "step": 1999 }, { "epoch": 1.11, "learning_rate": 4.3520136070898515e-07, "logits/chosen": -5.983983516693115, "logits/rejected": -5.97285795211792, "logps/chosen": -324.62762451171875, "logps/rejected": -220.10458374023438, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": 6.432916164398193, "rewards/margins": 11.69717788696289, "rewards/rejected": -5.2642621994018555, "step": 2000 }, { "epoch": 1.11, "learning_rate": 4.347554810656093e-07, "logits/chosen": -6.103461265563965, "logits/rejected": -6.044883728027344, "logps/chosen": -237.41680908203125, "logps/rejected": -183.40867614746094, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": 3.7783308029174805, "rewards/margins": 10.529134750366211, "rewards/rejected": -6.7508039474487305, "step": 2001 }, { "epoch": 1.11, "learning_rate": 4.3430965419946164e-07, "logits/chosen": -6.042598724365234, "logits/rejected": -6.014400005340576, "logps/chosen": -299.2994689941406, "logps/rejected": -171.672119140625, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": 4.682945251464844, "rewards/margins": 10.1018648147583, "rewards/rejected": -5.418919563293457, "step": 2002 }, { "epoch": 1.11, "learning_rate": 4.338638804711779e-07, "logits/chosen": -6.041474342346191, "logits/rejected": -6.085263729095459, "logps/chosen": -239.0155487060547, "logps/rejected": -286.31884765625, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 4.654528617858887, "rewards/margins": 13.120211601257324, "rewards/rejected": -8.465682983398438, "step": 2003 }, { "epoch": 1.11, "learning_rate": 4.3341816024135105e-07, "logits/chosen": -6.017666339874268, "logits/rejected": -6.0025835037231445, "logps/chosen": -204.54278564453125, "logps/rejected": -152.9380645751953, "loss": 0.039, "rewards/accuracies": 1.0, "rewards/chosen": 3.1459968090057373, "rewards/margins": 10.121025085449219, "rewards/rejected": -6.975028038024902, "step": 2004 }, { "epoch": 1.11, "learning_rate": 4.329724938705303e-07, "logits/chosen": -6.06732177734375, "logits/rejected": -6.127131938934326, "logps/chosen": -358.27197265625, "logps/rejected": -245.32383728027344, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": 5.762861728668213, "rewards/margins": 12.521610260009766, "rewards/rejected": -6.7587480545043945, "step": 2005 }, { "epoch": 1.11, "learning_rate": 4.3252688171922196e-07, "logits/chosen": -6.008769512176514, "logits/rejected": -5.886867523193359, "logps/chosen": -212.33273315429688, "logps/rejected": -154.54051208496094, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": 1.6326277256011963, "rewards/margins": 10.789413452148438, "rewards/rejected": -9.15678596496582, "step": 2006 }, { "epoch": 1.11, "learning_rate": 4.320813241478876e-07, "logits/chosen": -6.152738571166992, "logits/rejected": -6.042823314666748, "logps/chosen": -285.77691650390625, "logps/rejected": -182.0032958984375, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": 3.1802549362182617, "rewards/margins": 11.356294631958008, "rewards/rejected": -8.176039695739746, "step": 2007 }, { "epoch": 1.11, "learning_rate": 4.316358215169457e-07, "logits/chosen": -6.00032901763916, "logits/rejected": -6.090102672576904, "logps/chosen": -285.8059387207031, "logps/rejected": -291.7701416015625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 3.4707112312316895, "rewards/margins": 10.824790954589844, "rewards/rejected": -7.3540802001953125, "step": 2008 }, { "epoch": 1.12, "learning_rate": 4.311903741867693e-07, "logits/chosen": -6.055309295654297, "logits/rejected": -6.063157558441162, "logps/chosen": -302.4688720703125, "logps/rejected": -285.2602844238281, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": 5.699613571166992, "rewards/margins": 10.99528980255127, "rewards/rejected": -5.295676231384277, "step": 2009 }, { "epoch": 1.12, "learning_rate": 4.3074498251768713e-07, "logits/chosen": -6.071086883544922, "logits/rejected": -6.037929534912109, "logps/chosen": -209.42559814453125, "logps/rejected": -245.04893493652344, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 3.0666842460632324, "rewards/margins": 10.962956428527832, "rewards/rejected": -7.8962721824646, "step": 2010 }, { "epoch": 1.12, "learning_rate": 4.3029964686998327e-07, "logits/chosen": -6.101701736450195, "logits/rejected": -6.022207736968994, "logps/chosen": -255.1505889892578, "logps/rejected": -228.27285766601562, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 5.038585186004639, "rewards/margins": 12.127538681030273, "rewards/rejected": -7.088953018188477, "step": 2011 }, { "epoch": 1.12, "learning_rate": 4.2985436760389575e-07, "logits/chosen": -6.059013366699219, "logits/rejected": -6.036291122436523, "logps/chosen": -224.30783081054688, "logps/rejected": -198.31631469726562, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 3.499423027038574, "rewards/margins": 9.056032180786133, "rewards/rejected": -5.556609153747559, "step": 2012 }, { "epoch": 1.12, "learning_rate": 4.2940914507961766e-07, "logits/chosen": -6.042582988739014, "logits/rejected": -6.06395149230957, "logps/chosen": -311.53387451171875, "logps/rejected": -264.802001953125, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": 4.647558689117432, "rewards/margins": 10.58702278137207, "rewards/rejected": -5.939464569091797, "step": 2013 }, { "epoch": 1.12, "learning_rate": 4.289639796572957e-07, "logits/chosen": -6.029817581176758, "logits/rejected": -6.038044452667236, "logps/chosen": -314.39166259765625, "logps/rejected": -175.6176300048828, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": 4.176847457885742, "rewards/margins": 9.829198837280273, "rewards/rejected": -5.652350425720215, "step": 2014 }, { "epoch": 1.12, "learning_rate": 4.285188716970307e-07, "logits/chosen": -5.990592956542969, "logits/rejected": -6.040985107421875, "logps/chosen": -230.08168029785156, "logps/rejected": -219.67929077148438, "loss": 0.0427, "rewards/accuracies": 1.0, "rewards/chosen": 4.1564812660217285, "rewards/margins": 12.371153831481934, "rewards/rejected": -8.214673042297363, "step": 2015 }, { "epoch": 1.12, "learning_rate": 4.280738215588768e-07, "logits/chosen": -5.985392093658447, "logits/rejected": -6.098077297210693, "logps/chosen": -204.49404907226562, "logps/rejected": -274.3758850097656, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": 2.287497043609619, "rewards/margins": 12.256365776062012, "rewards/rejected": -9.96886920928955, "step": 2016 }, { "epoch": 1.12, "learning_rate": 4.276288296028413e-07, "logits/chosen": -6.204521179199219, "logits/rejected": -6.159053802490234, "logps/chosen": -278.74993896484375, "logps/rejected": -193.92514038085938, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": 6.149545192718506, "rewards/margins": 10.911575317382812, "rewards/rejected": -4.762031078338623, "step": 2017 }, { "epoch": 1.12, "learning_rate": 4.27183896188885e-07, "logits/chosen": -6.070416450500488, "logits/rejected": -6.033722400665283, "logps/chosen": -190.31442260742188, "logps/rejected": -121.68008422851562, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": 2.3849010467529297, "rewards/margins": 10.2301664352417, "rewards/rejected": -7.8452653884887695, "step": 2018 }, { "epoch": 1.12, "learning_rate": 4.267390216769204e-07, "logits/chosen": -5.964534282684326, "logits/rejected": -5.9794840812683105, "logps/chosen": -210.21627807617188, "logps/rejected": -261.7598876953125, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": 3.299388885498047, "rewards/margins": 11.371152877807617, "rewards/rejected": -8.07176399230957, "step": 2019 }, { "epoch": 1.12, "learning_rate": 4.2629420642681334e-07, "logits/chosen": -5.893372535705566, "logits/rejected": -5.9917683601379395, "logps/chosen": -263.4591064453125, "logps/rejected": -167.36216735839844, "loss": 0.0364, "rewards/accuracies": 0.9375, "rewards/chosen": 3.456045627593994, "rewards/margins": 8.220071792602539, "rewards/rejected": -4.764026165008545, "step": 2020 }, { "epoch": 1.12, "learning_rate": 4.258494507983809e-07, "logits/chosen": -5.944472789764404, "logits/rejected": -5.927775859832764, "logps/chosen": -548.4710083007812, "logps/rejected": -334.1579895019531, "loss": 0.0435, "rewards/accuracies": 1.0, "rewards/chosen": 3.1911027431488037, "rewards/margins": 8.431083679199219, "rewards/rejected": -5.239980697631836, "step": 2021 }, { "epoch": 1.12, "learning_rate": 4.254047551513926e-07, "logits/chosen": -6.066806316375732, "logits/rejected": -6.100927352905273, "logps/chosen": -227.56976318359375, "logps/rejected": -368.60205078125, "loss": 0.0805, "rewards/accuracies": 1.0, "rewards/chosen": 3.49080228805542, "rewards/margins": 13.024026870727539, "rewards/rejected": -9.533224105834961, "step": 2022 }, { "epoch": 1.12, "learning_rate": 4.249601198455691e-07, "logits/chosen": -6.119330406188965, "logits/rejected": -6.039960861206055, "logps/chosen": -275.0003662109375, "logps/rejected": -158.16201782226562, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": 4.658514976501465, "rewards/margins": 10.805623054504395, "rewards/rejected": -6.14710807800293, "step": 2023 }, { "epoch": 1.12, "learning_rate": 4.245155452405819e-07, "logits/chosen": -6.02893590927124, "logits/rejected": -5.971807956695557, "logps/chosen": -260.9530029296875, "logps/rejected": -203.27284240722656, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": 7.2471160888671875, "rewards/margins": 11.936620712280273, "rewards/rejected": -4.689504623413086, "step": 2024 }, { "epoch": 1.12, "learning_rate": 4.240710316960543e-07, "logits/chosen": -6.090381145477295, "logits/rejected": -6.004976272583008, "logps/chosen": -307.0580139160156, "logps/rejected": -128.31332397460938, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": 5.699338912963867, "rewards/margins": 12.18565559387207, "rewards/rejected": -6.486316680908203, "step": 2025 }, { "epoch": 1.12, "learning_rate": 4.2362657957155927e-07, "logits/chosen": -6.035431861877441, "logits/rejected": -5.94258451461792, "logps/chosen": -324.33758544921875, "logps/rejected": -267.56201171875, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": 6.190048694610596, "rewards/margins": 14.765637397766113, "rewards/rejected": -8.57558822631836, "step": 2026 }, { "epoch": 1.13, "learning_rate": 4.2318218922662097e-07, "logits/chosen": -6.045996189117432, "logits/rejected": -5.971231937408447, "logps/chosen": -267.21099853515625, "logps/rejected": -301.4332580566406, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": 3.4140377044677734, "rewards/margins": 11.173334121704102, "rewards/rejected": -7.759296894073486, "step": 2027 }, { "epoch": 1.13, "learning_rate": 4.227378610207127e-07, "logits/chosen": -6.125516891479492, "logits/rejected": -5.998752593994141, "logps/chosen": -229.6411590576172, "logps/rejected": -244.4157257080078, "loss": 0.0716, "rewards/accuracies": 1.0, "rewards/chosen": 2.5532987117767334, "rewards/margins": 10.277143478393555, "rewards/rejected": -7.723845481872559, "step": 2028 }, { "epoch": 1.13, "learning_rate": 4.2229359531325806e-07, "logits/chosen": -6.025758266448975, "logits/rejected": -6.069386959075928, "logps/chosen": -246.12020874023438, "logps/rejected": -199.78126525878906, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": 2.2512807846069336, "rewards/margins": 8.091848373413086, "rewards/rejected": -5.8405680656433105, "step": 2029 }, { "epoch": 1.13, "learning_rate": 4.2184939246363005e-07, "logits/chosen": -6.0854811668396, "logits/rejected": -6.011920928955078, "logps/chosen": -271.8483581542969, "logps/rejected": -252.21812438964844, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 2.7042505741119385, "rewards/margins": 12.076672554016113, "rewards/rejected": -9.37242317199707, "step": 2030 }, { "epoch": 1.13, "learning_rate": 4.2140525283115054e-07, "logits/chosen": -5.919516563415527, "logits/rejected": -5.9343109130859375, "logps/chosen": -415.84478759765625, "logps/rejected": -136.69581604003906, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": 3.6688547134399414, "rewards/margins": 8.908442497253418, "rewards/rejected": -5.239587306976318, "step": 2031 }, { "epoch": 1.13, "learning_rate": 4.2096117677509074e-07, "logits/chosen": -6.114212989807129, "logits/rejected": -6.087369441986084, "logps/chosen": -313.8937683105469, "logps/rejected": -196.23245239257812, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": 4.28455924987793, "rewards/margins": 10.160086631774902, "rewards/rejected": -5.8755269050598145, "step": 2032 }, { "epoch": 1.13, "learning_rate": 4.2051716465466985e-07, "logits/chosen": -6.047153949737549, "logits/rejected": -6.054474353790283, "logps/chosen": -250.64080810546875, "logps/rejected": -159.9251708984375, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": 5.560901641845703, "rewards/margins": 9.804487228393555, "rewards/rejected": -4.243585586547852, "step": 2033 }, { "epoch": 1.13, "learning_rate": 4.2007321682905594e-07, "logits/chosen": -6.074378490447998, "logits/rejected": -5.930115699768066, "logps/chosen": -189.92184448242188, "logps/rejected": -217.4199676513672, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": 2.1455562114715576, "rewards/margins": 11.434778213500977, "rewards/rejected": -9.289222717285156, "step": 2034 }, { "epoch": 1.13, "learning_rate": 4.1962933365736477e-07, "logits/chosen": -6.098061561584473, "logits/rejected": -6.148648262023926, "logps/chosen": -286.91796875, "logps/rejected": -337.2796325683594, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": 5.174078464508057, "rewards/margins": 12.701963424682617, "rewards/rejected": -7.527884483337402, "step": 2035 }, { "epoch": 1.13, "learning_rate": 4.191855154986593e-07, "logits/chosen": -6.033214092254639, "logits/rejected": -6.085608959197998, "logps/chosen": -229.3629150390625, "logps/rejected": -271.34869384765625, "loss": 0.0438, "rewards/accuracies": 0.9375, "rewards/chosen": 2.5188567638397217, "rewards/margins": 10.466476440429688, "rewards/rejected": -7.94761848449707, "step": 2036 }, { "epoch": 1.13, "learning_rate": 4.1874176271195103e-07, "logits/chosen": -6.008803844451904, "logits/rejected": -6.097605228424072, "logps/chosen": -239.84457397460938, "logps/rejected": -273.1845397949219, "loss": 0.0401, "rewards/accuracies": 1.0, "rewards/chosen": 4.392939567565918, "rewards/margins": 11.30364990234375, "rewards/rejected": -6.910709857940674, "step": 2037 }, { "epoch": 1.13, "learning_rate": 4.182980756561976e-07, "logits/chosen": -6.170996189117432, "logits/rejected": -6.038454532623291, "logps/chosen": -243.7645263671875, "logps/rejected": -137.65972900390625, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 4.972392559051514, "rewards/margins": 11.306708335876465, "rewards/rejected": -6.334315299987793, "step": 2038 }, { "epoch": 1.13, "learning_rate": 4.178544546903041e-07, "logits/chosen": -6.089956760406494, "logits/rejected": -6.052335262298584, "logps/chosen": -200.41065979003906, "logps/rejected": -229.99610900878906, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": 4.592212677001953, "rewards/margins": 11.491279602050781, "rewards/rejected": -6.899066925048828, "step": 2039 }, { "epoch": 1.13, "learning_rate": 4.1741090017312164e-07, "logits/chosen": -6.055412292480469, "logits/rejected": -6.031351089477539, "logps/chosen": -299.7844543457031, "logps/rejected": -261.59832763671875, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 2.5254383087158203, "rewards/margins": 10.559054374694824, "rewards/rejected": -8.033616065979004, "step": 2040 }, { "epoch": 1.13, "learning_rate": 4.169674124634481e-07, "logits/chosen": -6.141879558563232, "logits/rejected": -6.087100505828857, "logps/chosen": -284.4850769042969, "logps/rejected": -210.48365783691406, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": 2.7213051319122314, "rewards/margins": 11.1030912399292, "rewards/rejected": -8.38178539276123, "step": 2041 }, { "epoch": 1.13, "learning_rate": 4.1652399192002686e-07, "logits/chosen": -6.033472061157227, "logits/rejected": -6.028627395629883, "logps/chosen": -284.1005859375, "logps/rejected": -192.98428344726562, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/chosen": 4.275796890258789, "rewards/margins": 10.364389419555664, "rewards/rejected": -6.088592529296875, "step": 2042 }, { "epoch": 1.13, "learning_rate": 4.160806389015471e-07, "logits/chosen": -5.982165336608887, "logits/rejected": -5.928549766540527, "logps/chosen": -292.4010009765625, "logps/rejected": -150.40965270996094, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": 3.999448776245117, "rewards/margins": 9.75323486328125, "rewards/rejected": -5.753787040710449, "step": 2043 }, { "epoch": 1.13, "learning_rate": 4.1563735376664366e-07, "logits/chosen": -5.99672794342041, "logits/rejected": -5.970532417297363, "logps/chosen": -310.1698913574219, "logps/rejected": -177.8804931640625, "loss": 0.0302, "rewards/accuracies": 1.0, "rewards/chosen": 5.828517913818359, "rewards/margins": 10.877204895019531, "rewards/rejected": -5.048686504364014, "step": 2044 }, { "epoch": 1.14, "learning_rate": 4.151941368738961e-07, "logits/chosen": -5.991631984710693, "logits/rejected": -5.932295322418213, "logps/chosen": -253.966552734375, "logps/rejected": -317.4151611328125, "loss": 0.0335, "rewards/accuracies": 0.9375, "rewards/chosen": 1.9570677280426025, "rewards/margins": 9.696828842163086, "rewards/rejected": -7.7397613525390625, "step": 2045 }, { "epoch": 1.14, "learning_rate": 4.1475098858182917e-07, "logits/chosen": -5.9948954582214355, "logits/rejected": -5.969392776489258, "logps/chosen": -405.08251953125, "logps/rejected": -280.8932800292969, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": 1.7119115591049194, "rewards/margins": 8.941719055175781, "rewards/rejected": -7.229807376861572, "step": 2046 }, { "epoch": 1.14, "learning_rate": 4.143079092489114e-07, "logits/chosen": -6.038480758666992, "logits/rejected": -5.883029937744141, "logps/chosen": -279.6312255859375, "logps/rejected": -175.98599243164062, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": 4.086835861206055, "rewards/margins": 12.039911270141602, "rewards/rejected": -7.953076362609863, "step": 2047 }, { "epoch": 1.14, "learning_rate": 4.138648992335566e-07, "logits/chosen": -6.061066627502441, "logits/rejected": -6.06434440612793, "logps/chosen": -339.742919921875, "logps/rejected": -206.82760620117188, "loss": 0.0237, "rewards/accuracies": 0.9375, "rewards/chosen": 6.53367280960083, "rewards/margins": 12.299005508422852, "rewards/rejected": -5.7653326988220215, "step": 2048 }, { "epoch": 1.14, "learning_rate": 4.1342195889412164e-07, "logits/chosen": -6.046993255615234, "logits/rejected": -5.963727951049805, "logps/chosen": -418.5528564453125, "logps/rejected": -213.4547882080078, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": 5.242633819580078, "rewards/margins": 10.729639053344727, "rewards/rejected": -5.487005233764648, "step": 2049 }, { "epoch": 1.14, "learning_rate": 4.129790885889072e-07, "logits/chosen": -5.9244818687438965, "logits/rejected": -5.954813003540039, "logps/chosen": -180.8475341796875, "logps/rejected": -202.22006225585938, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": 2.5861916542053223, "rewards/margins": 10.855627059936523, "rewards/rejected": -8.26943588256836, "step": 2050 }, { "epoch": 1.14, "learning_rate": 4.1253628867615765e-07, "logits/chosen": -6.040879249572754, "logits/rejected": -6.049322605133057, "logps/chosen": -312.24822998046875, "logps/rejected": -216.06930541992188, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 5.693780422210693, "rewards/margins": 12.706254959106445, "rewards/rejected": -7.01247501373291, "step": 2051 }, { "epoch": 1.14, "learning_rate": 4.1209355951406e-07, "logits/chosen": -6.023768424987793, "logits/rejected": -6.017282009124756, "logps/chosen": -240.32757568359375, "logps/rejected": -226.4123992919922, "loss": 0.0708, "rewards/accuracies": 0.9375, "rewards/chosen": 3.167832612991333, "rewards/margins": 11.36423397064209, "rewards/rejected": -8.196401596069336, "step": 2052 }, { "epoch": 1.14, "learning_rate": 4.116509014607444e-07, "logits/chosen": -6.0305891036987305, "logits/rejected": -5.907188415527344, "logps/chosen": -270.0105895996094, "logps/rejected": -216.22341918945312, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": 5.249234199523926, "rewards/margins": 11.692131042480469, "rewards/rejected": -6.442896366119385, "step": 2053 }, { "epoch": 1.14, "learning_rate": 4.1120831487428323e-07, "logits/chosen": -6.036801815032959, "logits/rejected": -6.127948760986328, "logps/chosen": -205.0321807861328, "logps/rejected": -267.8775329589844, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": 2.8711206912994385, "rewards/margins": 10.730396270751953, "rewards/rejected": -7.8592753410339355, "step": 2054 }, { "epoch": 1.14, "learning_rate": 4.1076580011269125e-07, "logits/chosen": -6.066061019897461, "logits/rejected": -6.083137512207031, "logps/chosen": -313.563232421875, "logps/rejected": -239.64906311035156, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": 6.740576267242432, "rewards/margins": 13.16943645477295, "rewards/rejected": -6.428860664367676, "step": 2055 }, { "epoch": 1.14, "learning_rate": 4.10323357533925e-07, "logits/chosen": -6.147105693817139, "logits/rejected": -6.0543694496154785, "logps/chosen": -267.075439453125, "logps/rejected": -172.44000244140625, "loss": 0.1085, "rewards/accuracies": 1.0, "rewards/chosen": 6.055104732513428, "rewards/margins": 12.938070297241211, "rewards/rejected": -6.882965087890625, "step": 2056 }, { "epoch": 1.14, "learning_rate": 4.098809874958822e-07, "logits/chosen": -6.112576007843018, "logits/rejected": -6.008731842041016, "logps/chosen": -274.197509765625, "logps/rejected": -173.66000366210938, "loss": 0.0467, "rewards/accuracies": 1.0, "rewards/chosen": 3.6656885147094727, "rewards/margins": 11.632467269897461, "rewards/rejected": -7.966778755187988, "step": 2057 }, { "epoch": 1.14, "learning_rate": 4.09438690356403e-07, "logits/chosen": -6.007561683654785, "logits/rejected": -5.973101615905762, "logps/chosen": -182.46047973632812, "logps/rejected": -255.45191955566406, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/chosen": 2.943829298019409, "rewards/margins": 11.431533813476562, "rewards/rejected": -8.487704277038574, "step": 2058 }, { "epoch": 1.14, "learning_rate": 4.0899646647326734e-07, "logits/chosen": -6.1134514808654785, "logits/rejected": -6.029565334320068, "logps/chosen": -262.4852294921875, "logps/rejected": -254.86697387695312, "loss": 0.0403, "rewards/accuracies": 0.9375, "rewards/chosen": 2.1521503925323486, "rewards/margins": 10.19991683959961, "rewards/rejected": -8.04776668548584, "step": 2059 }, { "epoch": 1.14, "learning_rate": 4.085543162041969e-07, "logits/chosen": -5.953845500946045, "logits/rejected": -5.966691970825195, "logps/chosen": -330.2265319824219, "logps/rejected": -141.8565673828125, "loss": 0.0469, "rewards/accuracies": 1.0, "rewards/chosen": 4.222141265869141, "rewards/margins": 8.827554702758789, "rewards/rejected": -4.605413913726807, "step": 2060 }, { "epoch": 1.14, "learning_rate": 4.08112239906853e-07, "logits/chosen": -6.030209541320801, "logits/rejected": -5.928686618804932, "logps/chosen": -251.14373779296875, "logps/rejected": -158.30323791503906, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": 2.414459228515625, "rewards/margins": 10.17712116241455, "rewards/rejected": -7.762661933898926, "step": 2061 }, { "epoch": 1.14, "learning_rate": 4.0767023793883783e-07, "logits/chosen": -5.97909688949585, "logits/rejected": -6.026049613952637, "logps/chosen": -218.78952026367188, "logps/rejected": -245.12078857421875, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": 2.7437782287597656, "rewards/margins": 10.032120704650879, "rewards/rejected": -7.288342475891113, "step": 2062 }, { "epoch": 1.15, "learning_rate": 4.0722831065769296e-07, "logits/chosen": -5.89067268371582, "logits/rejected": -5.923644065856934, "logps/chosen": -509.53021240234375, "logps/rejected": -298.56732177734375, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": 6.081277847290039, "rewards/margins": 11.798187255859375, "rewards/rejected": -5.716910362243652, "step": 2063 }, { "epoch": 1.15, "learning_rate": 4.0678645842089964e-07, "logits/chosen": -6.066727161407471, "logits/rejected": -6.061819553375244, "logps/chosen": -305.425537109375, "logps/rejected": -220.41928100585938, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": 5.68057107925415, "rewards/margins": 12.328773498535156, "rewards/rejected": -6.648202419281006, "step": 2064 }, { "epoch": 1.15, "learning_rate": 4.063446815858788e-07, "logits/chosen": -5.958450794219971, "logits/rejected": -6.081603050231934, "logps/chosen": -289.33636474609375, "logps/rejected": -205.66323852539062, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": 5.220637798309326, "rewards/margins": 9.80769157409668, "rewards/rejected": -4.587053298950195, "step": 2065 }, { "epoch": 1.15, "learning_rate": 4.059029805099896e-07, "logits/chosen": -6.056000709533691, "logits/rejected": -6.012218475341797, "logps/chosen": -272.5361328125, "logps/rejected": -209.93601989746094, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": 3.047886371612549, "rewards/margins": 10.71495532989502, "rewards/rejected": -7.667069435119629, "step": 2066 }, { "epoch": 1.15, "learning_rate": 4.0546135555053094e-07, "logits/chosen": -6.056440353393555, "logits/rejected": -5.945173740386963, "logps/chosen": -382.97198486328125, "logps/rejected": -226.975341796875, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": 5.494094371795654, "rewards/margins": 11.800613403320312, "rewards/rejected": -6.306519031524658, "step": 2067 }, { "epoch": 1.15, "learning_rate": 4.05019807064739e-07, "logits/chosen": -6.015727996826172, "logits/rejected": -6.172430038452148, "logps/chosen": -266.19818115234375, "logps/rejected": -260.5086669921875, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": 3.6451807022094727, "rewards/margins": 11.998016357421875, "rewards/rejected": -8.352836608886719, "step": 2068 }, { "epoch": 1.15, "learning_rate": 4.0457833540978923e-07, "logits/chosen": -6.07535457611084, "logits/rejected": -6.028608798980713, "logps/chosen": -215.548583984375, "logps/rejected": -189.10545349121094, "loss": 0.0821, "rewards/accuracies": 1.0, "rewards/chosen": 2.195411205291748, "rewards/margins": 8.532471656799316, "rewards/rejected": -6.337060928344727, "step": 2069 }, { "epoch": 1.15, "learning_rate": 4.0413694094279403e-07, "logits/chosen": -5.983251571655273, "logits/rejected": -6.045894622802734, "logps/chosen": -225.62998962402344, "logps/rejected": -305.0858154296875, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": 0.9301243424415588, "rewards/margins": 10.805978775024414, "rewards/rejected": -9.875853538513184, "step": 2070 }, { "epoch": 1.15, "learning_rate": 4.036956240208038e-07, "logits/chosen": -5.993923187255859, "logits/rejected": -5.966887950897217, "logps/chosen": -304.8023986816406, "logps/rejected": -151.41213989257812, "loss": 0.0375, "rewards/accuracies": 0.9375, "rewards/chosen": 7.572530269622803, "rewards/margins": 11.136680603027344, "rewards/rejected": -3.564150810241699, "step": 2071 }, { "epoch": 1.15, "learning_rate": 4.0325438500080626e-07, "logits/chosen": -6.037890911102295, "logits/rejected": -5.978332042694092, "logps/chosen": -271.0095520019531, "logps/rejected": -154.25950622558594, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": 6.681291103363037, "rewards/margins": 13.264412879943848, "rewards/rejected": -6.583121299743652, "step": 2072 }, { "epoch": 1.15, "learning_rate": 4.028132242397258e-07, "logits/chosen": -6.082786560058594, "logits/rejected": -5.96268892288208, "logps/chosen": -178.65225219726562, "logps/rejected": -122.4891357421875, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": 2.6421730518341064, "rewards/margins": 9.594867706298828, "rewards/rejected": -6.952694892883301, "step": 2073 }, { "epoch": 1.15, "learning_rate": 4.023721420944239e-07, "logits/chosen": -6.092837333679199, "logits/rejected": -6.024293899536133, "logps/chosen": -266.0078125, "logps/rejected": -127.36415100097656, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": 6.05551815032959, "rewards/margins": 11.391505241394043, "rewards/rejected": -5.335987091064453, "step": 2074 }, { "epoch": 1.15, "learning_rate": 4.01931138921698e-07, "logits/chosen": -6.121530532836914, "logits/rejected": -6.055596828460693, "logps/chosen": -250.1311798095703, "logps/rejected": -273.9803161621094, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": 2.8460371494293213, "rewards/margins": 12.935344696044922, "rewards/rejected": -10.089306831359863, "step": 2075 }, { "epoch": 1.15, "learning_rate": 4.0149021507828217e-07, "logits/chosen": -5.971124172210693, "logits/rejected": -5.936243057250977, "logps/chosen": -244.6016845703125, "logps/rejected": -175.1747589111328, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": 3.986623764038086, "rewards/margins": 10.547805786132812, "rewards/rejected": -6.561182022094727, "step": 2076 }, { "epoch": 1.15, "learning_rate": 4.01049370920846e-07, "logits/chosen": -6.041568279266357, "logits/rejected": -5.890645980834961, "logps/chosen": -331.1547546386719, "logps/rejected": -123.53160095214844, "loss": 0.0484, "rewards/accuracies": 0.9375, "rewards/chosen": 3.1638360023498535, "rewards/margins": 9.544233322143555, "rewards/rejected": -6.380397796630859, "step": 2077 }, { "epoch": 1.15, "learning_rate": 4.006086068059942e-07, "logits/chosen": -6.1226582527160645, "logits/rejected": -6.033687591552734, "logps/chosen": -197.2452850341797, "logps/rejected": -207.03494262695312, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": 2.8314223289489746, "rewards/margins": 8.534809112548828, "rewards/rejected": -5.703387260437012, "step": 2078 }, { "epoch": 1.15, "learning_rate": 4.00167923090268e-07, "logits/chosen": -6.071294784545898, "logits/rejected": -6.092892646789551, "logps/chosen": -326.83197021484375, "logps/rejected": -260.464599609375, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": 3.860232353210449, "rewards/margins": 12.522607803344727, "rewards/rejected": -8.662374496459961, "step": 2079 }, { "epoch": 1.15, "learning_rate": 3.9972732013014197e-07, "logits/chosen": -5.986355781555176, "logits/rejected": -6.0840935707092285, "logps/chosen": -395.5187072753906, "logps/rejected": -327.07623291015625, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": 5.29862117767334, "rewards/margins": 11.89752197265625, "rewards/rejected": -6.59890079498291, "step": 2080 }, { "epoch": 1.16, "learning_rate": 3.9928679828202677e-07, "logits/chosen": -6.040380954742432, "logits/rejected": -6.036853790283203, "logps/chosen": -230.88365173339844, "logps/rejected": -261.0192565917969, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": 2.240626096725464, "rewards/margins": 10.418328285217285, "rewards/rejected": -8.177701950073242, "step": 2081 }, { "epoch": 1.16, "learning_rate": 3.9884635790226656e-07, "logits/chosen": -5.9692463874816895, "logits/rejected": -6.09443998336792, "logps/chosen": -329.2596130371094, "logps/rejected": -302.78961181640625, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 5.0312042236328125, "rewards/margins": 12.640283584594727, "rewards/rejected": -7.609081268310547, "step": 2082 }, { "epoch": 1.16, "learning_rate": 3.9840599934713985e-07, "logits/chosen": -6.020245552062988, "logits/rejected": -5.934515476226807, "logps/chosen": -383.1236572265625, "logps/rejected": -335.6385192871094, "loss": 0.0966, "rewards/accuracies": 0.875, "rewards/chosen": 5.368155479431152, "rewards/margins": 10.067987442016602, "rewards/rejected": -4.699831962585449, "step": 2083 }, { "epoch": 1.16, "learning_rate": 3.979657229728592e-07, "logits/chosen": -5.949087619781494, "logits/rejected": -5.9799299240112305, "logps/chosen": -447.13153076171875, "logps/rejected": -269.6925354003906, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": 6.216032028198242, "rewards/margins": 13.809871673583984, "rewards/rejected": -7.5938401222229, "step": 2084 }, { "epoch": 1.16, "learning_rate": 3.975255291355703e-07, "logits/chosen": -6.1535234451293945, "logits/rejected": -5.974637031555176, "logps/chosen": -221.18179321289062, "logps/rejected": -133.10757446289062, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": 3.0331859588623047, "rewards/margins": 9.33405590057373, "rewards/rejected": -6.300869941711426, "step": 2085 }, { "epoch": 1.16, "learning_rate": 3.9708541819135254e-07, "logits/chosen": -5.982646942138672, "logits/rejected": -6.09426212310791, "logps/chosen": -210.9851531982422, "logps/rejected": -180.91622924804688, "loss": 0.0239, "rewards/accuracies": 0.9375, "rewards/chosen": 3.4405202865600586, "rewards/margins": 10.70834732055664, "rewards/rejected": -7.26782751083374, "step": 2086 }, { "epoch": 1.16, "learning_rate": 3.966453904962175e-07, "logits/chosen": -6.029088497161865, "logits/rejected": -6.018563270568848, "logps/chosen": -237.32008361816406, "logps/rejected": -202.34283447265625, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": 4.088081359863281, "rewards/margins": 12.370930671691895, "rewards/rejected": -8.28285026550293, "step": 2087 }, { "epoch": 1.16, "learning_rate": 3.9620544640611055e-07, "logits/chosen": -6.063943386077881, "logits/rejected": -6.024956703186035, "logps/chosen": -316.806884765625, "logps/rejected": -146.32009887695312, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": 5.003572940826416, "rewards/margins": 10.76974868774414, "rewards/rejected": -5.766175270080566, "step": 2088 }, { "epoch": 1.16, "learning_rate": 3.957655862769083e-07, "logits/chosen": -6.000597953796387, "logits/rejected": -6.023066997528076, "logps/chosen": -231.7674102783203, "logps/rejected": -151.26828002929688, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": 4.589715480804443, "rewards/margins": 11.579895973205566, "rewards/rejected": -6.990180015563965, "step": 2089 }, { "epoch": 1.16, "learning_rate": 3.9532581046441993e-07, "logits/chosen": -6.078149318695068, "logits/rejected": -6.102599143981934, "logps/chosen": -287.11102294921875, "logps/rejected": -308.103759765625, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": 2.9926788806915283, "rewards/margins": 10.673810958862305, "rewards/rejected": -7.681131362915039, "step": 2090 }, { "epoch": 1.16, "learning_rate": 3.9488611932438663e-07, "logits/chosen": -6.010808944702148, "logits/rejected": -6.038384437561035, "logps/chosen": -283.6156005859375, "logps/rejected": -264.74383544921875, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": 6.010136127471924, "rewards/margins": 12.715678215026855, "rewards/rejected": -6.705542087554932, "step": 2091 }, { "epoch": 1.16, "learning_rate": 3.944465132124807e-07, "logits/chosen": -6.079892158508301, "logits/rejected": -6.246397495269775, "logps/chosen": -298.9991760253906, "logps/rejected": -268.6226806640625, "loss": 0.0506, "rewards/accuracies": 0.9375, "rewards/chosen": 2.8461897373199463, "rewards/margins": 10.80710220336914, "rewards/rejected": -7.960911750793457, "step": 2092 }, { "epoch": 1.16, "learning_rate": 3.94006992484306e-07, "logits/chosen": -5.97189998626709, "logits/rejected": -6.03511381149292, "logps/chosen": -218.32130432128906, "logps/rejected": -190.2774658203125, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": 1.9576842784881592, "rewards/margins": 9.26318359375, "rewards/rejected": -7.305500030517578, "step": 2093 }, { "epoch": 1.16, "learning_rate": 3.9356755749539694e-07, "logits/chosen": -6.057982921600342, "logits/rejected": -5.968610763549805, "logps/chosen": -228.9237060546875, "logps/rejected": -179.15512084960938, "loss": 0.0296, "rewards/accuracies": 0.9375, "rewards/chosen": 2.16829776763916, "rewards/margins": 10.164047241210938, "rewards/rejected": -7.995749473571777, "step": 2094 }, { "epoch": 1.16, "learning_rate": 3.931282086012191e-07, "logits/chosen": -5.994572639465332, "logits/rejected": -6.149031639099121, "logps/chosen": -273.94403076171875, "logps/rejected": -348.58734130859375, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": 4.925235748291016, "rewards/margins": 14.5109281539917, "rewards/rejected": -9.585691452026367, "step": 2095 }, { "epoch": 1.16, "learning_rate": 3.92688946157168e-07, "logits/chosen": -6.164400577545166, "logits/rejected": -6.1668782234191895, "logps/chosen": -350.11553955078125, "logps/rejected": -314.304443359375, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/chosen": 5.363129615783691, "rewards/margins": 15.225841522216797, "rewards/rejected": -9.862711906433105, "step": 2096 }, { "epoch": 1.16, "learning_rate": 3.92249770518569e-07, "logits/chosen": -6.022764682769775, "logits/rejected": -6.105811595916748, "logps/chosen": -163.961181640625, "logps/rejected": -249.37118530273438, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": 1.8729504346847534, "rewards/margins": 10.436362266540527, "rewards/rejected": -8.563411712646484, "step": 2097 }, { "epoch": 1.16, "learning_rate": 3.918106820406781e-07, "logits/chosen": -5.939395427703857, "logits/rejected": -6.026051044464111, "logps/chosen": -166.0118865966797, "logps/rejected": -359.3738708496094, "loss": 0.4319, "rewards/accuracies": 0.9375, "rewards/chosen": 2.3671047687530518, "rewards/margins": 6.675649642944336, "rewards/rejected": -4.308544635772705, "step": 2098 }, { "epoch": 1.17, "learning_rate": 3.913716810786799e-07, "logits/chosen": -5.907708644866943, "logits/rejected": -6.002495765686035, "logps/chosen": -179.7222900390625, "logps/rejected": -302.96795654296875, "loss": 0.0551, "rewards/accuracies": 0.9375, "rewards/chosen": 2.7668280601501465, "rewards/margins": 12.42633056640625, "rewards/rejected": -9.659502029418945, "step": 2099 }, { "epoch": 1.17, "learning_rate": 3.9093276798768875e-07, "logits/chosen": -6.224584579467773, "logits/rejected": -6.073891639709473, "logps/chosen": -274.0213928222656, "logps/rejected": -221.62184143066406, "loss": 0.018, "rewards/accuracies": 0.9375, "rewards/chosen": 3.6684281826019287, "rewards/margins": 13.283510208129883, "rewards/rejected": -9.615081787109375, "step": 2100 }, { "epoch": 1.17, "learning_rate": 3.9049394312274763e-07, "logits/chosen": -6.023200035095215, "logits/rejected": -6.089901924133301, "logps/chosen": -236.6409454345703, "logps/rejected": -249.85633850097656, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": 2.8698654174804688, "rewards/margins": 10.47277545928955, "rewards/rejected": -7.602910041809082, "step": 2101 }, { "epoch": 1.17, "learning_rate": 3.900552068388285e-07, "logits/chosen": -6.0250678062438965, "logits/rejected": -5.958938121795654, "logps/chosen": -277.26513671875, "logps/rejected": -214.4188995361328, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": 4.15516996383667, "rewards/margins": 10.354037284851074, "rewards/rejected": -6.198867321014404, "step": 2102 }, { "epoch": 1.17, "learning_rate": 3.8961655949083127e-07, "logits/chosen": -5.9609479904174805, "logits/rejected": -5.980653285980225, "logps/chosen": -246.21688842773438, "logps/rejected": -189.89793395996094, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": 3.8289988040924072, "rewards/margins": 10.9459228515625, "rewards/rejected": -7.116924285888672, "step": 2103 }, { "epoch": 1.17, "learning_rate": 3.89178001433584e-07, "logits/chosen": -6.176412582397461, "logits/rejected": -5.9506964683532715, "logps/chosen": -355.45977783203125, "logps/rejected": -178.76113891601562, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": 4.95888614654541, "rewards/margins": 12.70368766784668, "rewards/rejected": -7.744802474975586, "step": 2104 }, { "epoch": 1.17, "learning_rate": 3.8873953302184283e-07, "logits/chosen": -5.90530252456665, "logits/rejected": -6.0296173095703125, "logps/chosen": -174.26663208007812, "logps/rejected": -309.13800048828125, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": 1.2945120334625244, "rewards/margins": 10.300224304199219, "rewards/rejected": -9.005712509155273, "step": 2105 }, { "epoch": 1.17, "learning_rate": 3.883011546102909e-07, "logits/chosen": -6.116114616394043, "logits/rejected": -6.061368942260742, "logps/chosen": -439.6474914550781, "logps/rejected": -234.09701538085938, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": 5.220036506652832, "rewards/margins": 12.868148803710938, "rewards/rejected": -7.648113250732422, "step": 2106 }, { "epoch": 1.17, "learning_rate": 3.8786286655353916e-07, "logits/chosen": -5.942208290100098, "logits/rejected": -5.927914619445801, "logps/chosen": -237.23110961914062, "logps/rejected": -228.64825439453125, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": 3.489598274230957, "rewards/margins": 10.940126419067383, "rewards/rejected": -7.450527667999268, "step": 2107 }, { "epoch": 1.17, "learning_rate": 3.874246692061245e-07, "logits/chosen": -5.933018207550049, "logits/rejected": -5.978981018066406, "logps/chosen": -203.8055419921875, "logps/rejected": -154.35113525390625, "loss": 0.0457, "rewards/accuracies": 1.0, "rewards/chosen": 3.6488451957702637, "rewards/margins": 9.516234397888184, "rewards/rejected": -5.867389678955078, "step": 2108 }, { "epoch": 1.17, "learning_rate": 3.8698656292251186e-07, "logits/chosen": -6.112795352935791, "logits/rejected": -6.057438850402832, "logps/chosen": -306.797119140625, "logps/rejected": -237.86138916015625, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": 4.178532123565674, "rewards/margins": 11.648040771484375, "rewards/rejected": -7.469508647918701, "step": 2109 }, { "epoch": 1.17, "learning_rate": 3.8654854805709114e-07, "logits/chosen": -5.952667236328125, "logits/rejected": -5.936728000640869, "logps/chosen": -262.6867370605469, "logps/rejected": -143.95993041992188, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": 4.778227806091309, "rewards/margins": 9.8955717086792, "rewards/rejected": -5.117344379425049, "step": 2110 }, { "epoch": 1.17, "learning_rate": 3.861106249641789e-07, "logits/chosen": -5.9893693923950195, "logits/rejected": -5.932865142822266, "logps/chosen": -331.0829162597656, "logps/rejected": -238.9612274169922, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": 4.617718696594238, "rewards/margins": 11.661709785461426, "rewards/rejected": -7.043991565704346, "step": 2111 }, { "epoch": 1.17, "learning_rate": 3.8567279399801753e-07, "logits/chosen": -6.014883041381836, "logits/rejected": -6.0363688468933105, "logps/chosen": -205.90908813476562, "logps/rejected": -164.88681030273438, "loss": 0.0206, "rewards/accuracies": 0.9375, "rewards/chosen": 3.800830841064453, "rewards/margins": 9.355023384094238, "rewards/rejected": -5.554192543029785, "step": 2112 }, { "epoch": 1.17, "learning_rate": 3.852350555127747e-07, "logits/chosen": -6.092146396636963, "logits/rejected": -5.960020065307617, "logps/chosen": -226.5075225830078, "logps/rejected": -131.48922729492188, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": 2.1303653717041016, "rewards/margins": 10.073992729187012, "rewards/rejected": -7.943627834320068, "step": 2113 }, { "epoch": 1.17, "learning_rate": 3.8479740986254363e-07, "logits/chosen": -6.0842766761779785, "logits/rejected": -6.022128105163574, "logps/chosen": -185.2234649658203, "logps/rejected": -229.2371368408203, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": 1.4278862476348877, "rewards/margins": 8.91536808013916, "rewards/rejected": -7.487481117248535, "step": 2114 }, { "epoch": 1.17, "learning_rate": 3.843598574013417e-07, "logits/chosen": -5.9652299880981445, "logits/rejected": -6.046535968780518, "logps/chosen": -288.2265319824219, "logps/rejected": -352.7848205566406, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/chosen": 3.068387508392334, "rewards/margins": 14.022563934326172, "rewards/rejected": -10.95417594909668, "step": 2115 }, { "epoch": 1.17, "learning_rate": 3.839223984831119e-07, "logits/chosen": -6.002036094665527, "logits/rejected": -6.005465984344482, "logps/chosen": -327.0464782714844, "logps/rejected": -214.90228271484375, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": 5.372214317321777, "rewards/margins": 12.682441711425781, "rewards/rejected": -7.310227394104004, "step": 2116 }, { "epoch": 1.18, "learning_rate": 3.834850334617207e-07, "logits/chosen": -5.988431453704834, "logits/rejected": -6.088332176208496, "logps/chosen": -289.420166015625, "logps/rejected": -172.49452209472656, "loss": 0.0407, "rewards/accuracies": 1.0, "rewards/chosen": 5.008116245269775, "rewards/margins": 11.47415828704834, "rewards/rejected": -6.466042518615723, "step": 2117 }, { "epoch": 1.18, "learning_rate": 3.8304776269095883e-07, "logits/chosen": -6.0193190574646, "logits/rejected": -6.077665328979492, "logps/chosen": -299.35003662109375, "logps/rejected": -228.01522827148438, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 3.143798351287842, "rewards/margins": 11.577930450439453, "rewards/rejected": -8.434131622314453, "step": 2118 }, { "epoch": 1.18, "learning_rate": 3.8261058652454137e-07, "logits/chosen": -6.011880397796631, "logits/rejected": -6.054649353027344, "logps/chosen": -268.1106872558594, "logps/rejected": -234.6591796875, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": 6.428981781005859, "rewards/margins": 10.588048934936523, "rewards/rejected": -4.159066200256348, "step": 2119 }, { "epoch": 1.18, "learning_rate": 3.821735053161059e-07, "logits/chosen": -6.03709602355957, "logits/rejected": -6.015842914581299, "logps/chosen": -296.6615295410156, "logps/rejected": -217.19235229492188, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": 4.435799598693848, "rewards/margins": 8.876873016357422, "rewards/rejected": -4.441073417663574, "step": 2120 }, { "epoch": 1.18, "learning_rate": 3.8173651941921397e-07, "logits/chosen": -5.8471360206604, "logits/rejected": -6.0207295417785645, "logps/chosen": -513.2993774414062, "logps/rejected": -481.87432861328125, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": 7.580201148986816, "rewards/margins": 16.111284255981445, "rewards/rejected": -8.531083106994629, "step": 2121 }, { "epoch": 1.18, "learning_rate": 3.812996291873496e-07, "logits/chosen": -6.071333885192871, "logits/rejected": -6.015387058258057, "logps/chosen": -271.8248291015625, "logps/rejected": -181.8795928955078, "loss": 0.0321, "rewards/accuracies": 0.9375, "rewards/chosen": 5.238445281982422, "rewards/margins": 11.203836441040039, "rewards/rejected": -5.965391635894775, "step": 2122 }, { "epoch": 1.18, "learning_rate": 3.8086283497391975e-07, "logits/chosen": -5.943763732910156, "logits/rejected": -5.922417163848877, "logps/chosen": -251.7279815673828, "logps/rejected": -231.1309051513672, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": 3.459888458251953, "rewards/margins": 11.15106201171875, "rewards/rejected": -7.691174507141113, "step": 2123 }, { "epoch": 1.18, "learning_rate": 3.8042613713225344e-07, "logits/chosen": -5.920239448547363, "logits/rejected": -6.0936784744262695, "logps/chosen": -346.9718017578125, "logps/rejected": -353.3809814453125, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": 4.5266618728637695, "rewards/margins": 12.357841491699219, "rewards/rejected": -7.831180095672607, "step": 2124 }, { "epoch": 1.18, "learning_rate": 3.7998953601560167e-07, "logits/chosen": -5.917881011962891, "logits/rejected": -6.033193588256836, "logps/chosen": -169.45925903320312, "logps/rejected": -298.2037658691406, "loss": 0.058, "rewards/accuracies": 1.0, "rewards/chosen": 3.9205551147460938, "rewards/margins": 13.78617000579834, "rewards/rejected": -9.86561393737793, "step": 2125 }, { "epoch": 1.18, "learning_rate": 3.795530319771377e-07, "logits/chosen": -6.088479518890381, "logits/rejected": -5.972578048706055, "logps/chosen": -264.4351501464844, "logps/rejected": -245.12518310546875, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": 5.378475189208984, "rewards/margins": 12.26482105255127, "rewards/rejected": -6.886344909667969, "step": 2126 }, { "epoch": 1.18, "learning_rate": 3.7911662536995547e-07, "logits/chosen": -6.0349907875061035, "logits/rejected": -5.927371501922607, "logps/chosen": -335.4752197265625, "logps/rejected": -183.5862274169922, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": 3.2388391494750977, "rewards/margins": 12.579296112060547, "rewards/rejected": -9.340456008911133, "step": 2127 }, { "epoch": 1.18, "learning_rate": 3.78680316547071e-07, "logits/chosen": -6.022331714630127, "logits/rejected": -5.981701850891113, "logps/chosen": -556.6703491210938, "logps/rejected": -293.90216064453125, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": 6.0884246826171875, "rewards/margins": 13.23353385925293, "rewards/rejected": -7.145109176635742, "step": 2128 }, { "epoch": 1.18, "learning_rate": 3.782441058614203e-07, "logits/chosen": -5.979020118713379, "logits/rejected": -5.990787506103516, "logps/chosen": -261.169921875, "logps/rejected": -173.05535888671875, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": 2.3289597034454346, "rewards/margins": 7.657330513000488, "rewards/rejected": -5.328370094299316, "step": 2129 }, { "epoch": 1.18, "learning_rate": 3.77807993665861e-07, "logits/chosen": -6.041449546813965, "logits/rejected": -6.027647972106934, "logps/chosen": -287.163818359375, "logps/rejected": -235.6146240234375, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": 5.643082141876221, "rewards/margins": 12.87699031829834, "rewards/rejected": -7.233907699584961, "step": 2130 }, { "epoch": 1.18, "learning_rate": 3.773719803131702e-07, "logits/chosen": -5.9782328605651855, "logits/rejected": -6.045768737792969, "logps/chosen": -221.64776611328125, "logps/rejected": -261.3091125488281, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": 2.5704474449157715, "rewards/margins": 11.478413581848145, "rewards/rejected": -8.907966613769531, "step": 2131 }, { "epoch": 1.18, "learning_rate": 3.7693606615604524e-07, "logits/chosen": -5.951251983642578, "logits/rejected": -5.926849365234375, "logps/chosen": -198.46197509765625, "logps/rejected": -146.26077270507812, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": 2.0010294914245605, "rewards/margins": 9.305964469909668, "rewards/rejected": -7.304934978485107, "step": 2132 }, { "epoch": 1.18, "learning_rate": 3.7650025154710353e-07, "logits/chosen": -6.081243515014648, "logits/rejected": -5.950530052185059, "logps/chosen": -179.8111114501953, "logps/rejected": -134.24974060058594, "loss": 0.0417, "rewards/accuracies": 1.0, "rewards/chosen": 2.614410400390625, "rewards/margins": 10.038537979125977, "rewards/rejected": -7.424126625061035, "step": 2133 }, { "epoch": 1.18, "learning_rate": 3.7606453683888167e-07, "logits/chosen": -5.943870544433594, "logits/rejected": -6.0802764892578125, "logps/chosen": -248.0695343017578, "logps/rejected": -245.63131713867188, "loss": 0.1177, "rewards/accuracies": 0.9375, "rewards/chosen": 1.7373491525650024, "rewards/margins": 9.927593231201172, "rewards/rejected": -8.1902437210083, "step": 2134 }, { "epoch": 1.19, "learning_rate": 3.756289223838357e-07, "logits/chosen": -5.990396976470947, "logits/rejected": -6.076320171356201, "logps/chosen": -258.9862060546875, "logps/rejected": -222.28919982910156, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 5.610735893249512, "rewards/margins": 11.028312683105469, "rewards/rejected": -5.417577266693115, "step": 2135 }, { "epoch": 1.19, "learning_rate": 3.751934085343404e-07, "logits/chosen": -6.189992904663086, "logits/rejected": -6.002874374389648, "logps/chosen": -357.0701599121094, "logps/rejected": -198.6336669921875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": 5.03900146484375, "rewards/margins": 14.388778686523438, "rewards/rejected": -9.349777221679688, "step": 2136 }, { "epoch": 1.19, "learning_rate": 3.7475799564268877e-07, "logits/chosen": -6.092938423156738, "logits/rejected": -5.936051845550537, "logps/chosen": -354.8009948730469, "logps/rejected": -202.82241821289062, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": 4.4988274574279785, "rewards/margins": 12.050047874450684, "rewards/rejected": -7.551220417022705, "step": 2137 }, { "epoch": 1.19, "learning_rate": 3.7432268406109323e-07, "logits/chosen": -6.139179706573486, "logits/rejected": -6.046348571777344, "logps/chosen": -365.1632385253906, "logps/rejected": -210.79397583007812, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": 5.128464221954346, "rewards/margins": 11.549829483032227, "rewards/rejected": -6.421365737915039, "step": 2138 }, { "epoch": 1.19, "learning_rate": 3.738874741416829e-07, "logits/chosen": -5.926730632781982, "logits/rejected": -5.898819446563721, "logps/chosen": -532.8753051757812, "logps/rejected": -184.55653381347656, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": 3.730651378631592, "rewards/margins": 10.686203002929688, "rewards/rejected": -6.9555511474609375, "step": 2139 }, { "epoch": 1.19, "learning_rate": 3.7345236623650584e-07, "logits/chosen": -6.052023887634277, "logits/rejected": -6.013278007507324, "logps/chosen": -323.0547790527344, "logps/rejected": -164.5134735107422, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": 5.428584098815918, "rewards/margins": 10.823670387268066, "rewards/rejected": -5.395086765289307, "step": 2140 }, { "epoch": 1.19, "learning_rate": 3.7301736069752675e-07, "logits/chosen": -6.092200756072998, "logits/rejected": -6.023123264312744, "logps/chosen": -253.02871704101562, "logps/rejected": -213.84925842285156, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 3.642775058746338, "rewards/margins": 11.881628036499023, "rewards/rejected": -8.238853454589844, "step": 2141 }, { "epoch": 1.19, "learning_rate": 3.72582457876628e-07, "logits/chosen": -6.065927505493164, "logits/rejected": -6.036616802215576, "logps/chosen": -292.30828857421875, "logps/rejected": -159.43780517578125, "loss": 0.1097, "rewards/accuracies": 1.0, "rewards/chosen": 4.452220439910889, "rewards/margins": 12.1831693649292, "rewards/rejected": -7.730949401855469, "step": 2142 }, { "epoch": 1.19, "learning_rate": 3.7214765812560883e-07, "logits/chosen": -5.979813575744629, "logits/rejected": -6.070225238800049, "logps/chosen": -228.93655395507812, "logps/rejected": -396.114013671875, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": 2.8988397121429443, "rewards/margins": 13.767059326171875, "rewards/rejected": -10.868219375610352, "step": 2143 }, { "epoch": 1.19, "learning_rate": 3.7171296179618474e-07, "logits/chosen": -6.017569065093994, "logits/rejected": -6.089857578277588, "logps/chosen": -389.37823486328125, "logps/rejected": -356.25408935546875, "loss": 0.0783, "rewards/accuracies": 1.0, "rewards/chosen": 3.8538124561309814, "rewards/margins": 11.094797134399414, "rewards/rejected": -7.240984916687012, "step": 2144 }, { "epoch": 1.19, "learning_rate": 3.712783692399881e-07, "logits/chosen": -6.077382564544678, "logits/rejected": -5.959808349609375, "logps/chosen": -355.68060302734375, "logps/rejected": -339.1357421875, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": 5.9560041427612305, "rewards/margins": 12.441729545593262, "rewards/rejected": -6.485726356506348, "step": 2145 }, { "epoch": 1.19, "learning_rate": 3.7084388080856674e-07, "logits/chosen": -6.026546955108643, "logits/rejected": -5.998648166656494, "logps/chosen": -444.1613464355469, "logps/rejected": -190.9917755126953, "loss": 0.0537, "rewards/accuracies": 1.0, "rewards/chosen": 4.070362567901611, "rewards/margins": 11.879179954528809, "rewards/rejected": -7.808816432952881, "step": 2146 }, { "epoch": 1.19, "learning_rate": 3.7040949685338513e-07, "logits/chosen": -6.041611194610596, "logits/rejected": -6.075855255126953, "logps/chosen": -274.1055603027344, "logps/rejected": -249.59884643554688, "loss": 0.0358, "rewards/accuracies": 1.0, "rewards/chosen": 3.4474892616271973, "rewards/margins": 11.46080207824707, "rewards/rejected": -8.013312339782715, "step": 2147 }, { "epoch": 1.19, "learning_rate": 3.6997521772582195e-07, "logits/chosen": -5.99467658996582, "logits/rejected": -5.995199680328369, "logps/chosen": -420.7314453125, "logps/rejected": -345.43646240234375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": 5.371244430541992, "rewards/margins": 12.58681583404541, "rewards/rejected": -7.21557092666626, "step": 2148 }, { "epoch": 1.19, "learning_rate": 3.6954104377717267e-07, "logits/chosen": -6.074515342712402, "logits/rejected": -6.039737224578857, "logps/chosen": -184.01934814453125, "logps/rejected": -219.5859375, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 3.1431195735931396, "rewards/margins": 9.530338287353516, "rewards/rejected": -6.387218952178955, "step": 2149 }, { "epoch": 1.19, "learning_rate": 3.691069753586462e-07, "logits/chosen": -6.012041091918945, "logits/rejected": -6.030083179473877, "logps/chosen": -297.899658203125, "logps/rejected": -351.3660583496094, "loss": 0.0948, "rewards/accuracies": 1.0, "rewards/chosen": 3.7936644554138184, "rewards/margins": 15.190435409545898, "rewards/rejected": -11.396770477294922, "step": 2150 }, { "epoch": 1.19, "learning_rate": 3.6867301282136674e-07, "logits/chosen": -5.882019996643066, "logits/rejected": -5.982522010803223, "logps/chosen": -235.88897705078125, "logps/rejected": -288.354736328125, "loss": 0.0495, "rewards/accuracies": 1.0, "rewards/chosen": 2.4862046241760254, "rewards/margins": 10.267722129821777, "rewards/rejected": -7.78151798248291, "step": 2151 }, { "epoch": 1.19, "learning_rate": 3.682391565163732e-07, "logits/chosen": -6.062137603759766, "logits/rejected": -6.017359256744385, "logps/chosen": -169.72320556640625, "logps/rejected": -122.6934814453125, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": 2.723217725753784, "rewards/margins": 9.218303680419922, "rewards/rejected": -6.4950852394104, "step": 2152 }, { "epoch": 1.2, "learning_rate": 3.678054067946178e-07, "logits/chosen": -6.0573577880859375, "logits/rejected": -6.0178022384643555, "logps/chosen": -305.4921875, "logps/rejected": -266.1901550292969, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": 4.254820823669434, "rewards/margins": 12.028167724609375, "rewards/rejected": -7.773346900939941, "step": 2153 }, { "epoch": 1.2, "learning_rate": 3.673717640069672e-07, "logits/chosen": -6.155869007110596, "logits/rejected": -5.954139709472656, "logps/chosen": -297.9353332519531, "logps/rejected": -134.93948364257812, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 4.529033184051514, "rewards/margins": 11.155856132507324, "rewards/rejected": -6.626822471618652, "step": 2154 }, { "epoch": 1.2, "learning_rate": 3.6693822850420103e-07, "logits/chosen": -5.977699279785156, "logits/rejected": -6.136055946350098, "logps/chosen": -205.0050811767578, "logps/rejected": -338.51116943359375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": 2.2284247875213623, "rewards/margins": 12.982089042663574, "rewards/rejected": -10.753664016723633, "step": 2155 }, { "epoch": 1.2, "learning_rate": 3.665048006370126e-07, "logits/chosen": -5.972402095794678, "logits/rejected": -6.038353443145752, "logps/chosen": -341.3072509765625, "logps/rejected": -265.8182067871094, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 3.674245595932007, "rewards/margins": 11.577766418457031, "rewards/rejected": -7.903521537780762, "step": 2156 }, { "epoch": 1.2, "learning_rate": 3.66071480756008e-07, "logits/chosen": -6.087700843811035, "logits/rejected": -6.085653305053711, "logps/chosen": -339.4696044921875, "logps/rejected": -252.80702209472656, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 1.4413063526153564, "rewards/margins": 11.054134368896484, "rewards/rejected": -9.61282730102539, "step": 2157 }, { "epoch": 1.2, "learning_rate": 3.656382692117054e-07, "logits/chosen": -6.109331130981445, "logits/rejected": -6.066789627075195, "logps/chosen": -168.4695281982422, "logps/rejected": -165.06300354003906, "loss": 0.0351, "rewards/accuracies": 1.0, "rewards/chosen": 1.3218227624893188, "rewards/margins": 8.704771995544434, "rewards/rejected": -7.382948875427246, "step": 2158 }, { "epoch": 1.2, "learning_rate": 3.6520516635453656e-07, "logits/chosen": -5.981453895568848, "logits/rejected": -5.863733768463135, "logps/chosen": -331.6478271484375, "logps/rejected": -263.3958435058594, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": 4.556159973144531, "rewards/margins": 11.572408676147461, "rewards/rejected": -7.0162482261657715, "step": 2159 }, { "epoch": 1.2, "learning_rate": 3.647721725348439e-07, "logits/chosen": -6.031795978546143, "logits/rejected": -6.032443523406982, "logps/chosen": -217.5689239501953, "logps/rejected": -227.91827392578125, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": 4.181755065917969, "rewards/margins": 12.007575988769531, "rewards/rejected": -7.825819969177246, "step": 2160 }, { "epoch": 1.2, "learning_rate": 3.643392881028829e-07, "logits/chosen": -6.066811561584473, "logits/rejected": -6.078805446624756, "logps/chosen": -219.02713012695312, "logps/rejected": -167.5829315185547, "loss": 0.0222, "rewards/accuracies": 0.9375, "rewards/chosen": 2.317164659500122, "rewards/margins": 10.27730941772461, "rewards/rejected": -7.960144996643066, "step": 2161 }, { "epoch": 1.2, "learning_rate": 3.639065134088196e-07, "logits/chosen": -6.036445617675781, "logits/rejected": -5.905074596405029, "logps/chosen": -463.7830810546875, "logps/rejected": -290.62286376953125, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": 2.618422031402588, "rewards/margins": 12.502778053283691, "rewards/rejected": -9.884355545043945, "step": 2162 }, { "epoch": 1.2, "learning_rate": 3.634738488027319e-07, "logits/chosen": -6.0677266120910645, "logits/rejected": -6.0976386070251465, "logps/chosen": -231.07659912109375, "logps/rejected": -210.1973876953125, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": 2.95043683052063, "rewards/margins": 10.926919937133789, "rewards/rejected": -7.97648286819458, "step": 2163 }, { "epoch": 1.2, "learning_rate": 3.630412946346084e-07, "logits/chosen": -5.991546630859375, "logits/rejected": -6.053244113922119, "logps/chosen": -173.33543395996094, "logps/rejected": -238.07809448242188, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": 3.859405994415283, "rewards/margins": 13.13271713256836, "rewards/rejected": -9.273313522338867, "step": 2164 }, { "epoch": 1.2, "learning_rate": 3.6260885125434817e-07, "logits/chosen": -5.9894022941589355, "logits/rejected": -6.073168754577637, "logps/chosen": -284.033935546875, "logps/rejected": -345.3709411621094, "loss": 0.042, "rewards/accuracies": 1.0, "rewards/chosen": 3.8022899627685547, "rewards/margins": 16.906160354614258, "rewards/rejected": -13.10387134552002, "step": 2165 }, { "epoch": 1.2, "learning_rate": 3.621765190117612e-07, "logits/chosen": -6.1023945808410645, "logits/rejected": -5.937046527862549, "logps/chosen": -267.9584655761719, "logps/rejected": -121.27342987060547, "loss": 0.091, "rewards/accuracies": 1.0, "rewards/chosen": 4.81996488571167, "rewards/margins": 9.645308494567871, "rewards/rejected": -4.825343608856201, "step": 2166 }, { "epoch": 1.2, "learning_rate": 3.6174429825656684e-07, "logits/chosen": -5.959743022918701, "logits/rejected": -5.953767776489258, "logps/chosen": -192.87379455566406, "logps/rejected": -172.11962890625, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": 2.9437255859375, "rewards/margins": 11.01257610321045, "rewards/rejected": -8.06885051727295, "step": 2167 }, { "epoch": 1.2, "learning_rate": 3.613121893383951e-07, "logits/chosen": -6.034115314483643, "logits/rejected": -6.042903900146484, "logps/chosen": -231.25796508789062, "logps/rejected": -183.07554626464844, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": 4.275875091552734, "rewards/margins": 10.800922393798828, "rewards/rejected": -6.525045871734619, "step": 2168 }, { "epoch": 1.2, "learning_rate": 3.608801926067846e-07, "logits/chosen": -6.0541181564331055, "logits/rejected": -6.122720718383789, "logps/chosen": -531.6107177734375, "logps/rejected": -354.33453369140625, "loss": 0.0474, "rewards/accuracies": 0.9375, "rewards/chosen": 4.323030948638916, "rewards/margins": 10.476247787475586, "rewards/rejected": -6.153217315673828, "step": 2169 }, { "epoch": 1.2, "learning_rate": 3.604483084111843e-07, "logits/chosen": -6.051597595214844, "logits/rejected": -5.967946529388428, "logps/chosen": -257.26763916015625, "logps/rejected": -236.77911376953125, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": 4.281414031982422, "rewards/margins": 14.926902770996094, "rewards/rejected": -10.645488739013672, "step": 2170 }, { "epoch": 1.21, "learning_rate": 3.600165371009511e-07, "logits/chosen": -6.008535385131836, "logits/rejected": -6.023561477661133, "logps/chosen": -181.55599975585938, "logps/rejected": -191.405517578125, "loss": 0.0483, "rewards/accuracies": 0.9375, "rewards/chosen": 3.4325480461120605, "rewards/margins": 10.076141357421875, "rewards/rejected": -6.643593788146973, "step": 2171 }, { "epoch": 1.21, "learning_rate": 3.59584879025351e-07, "logits/chosen": -6.0816144943237305, "logits/rejected": -6.078678607940674, "logps/chosen": -440.4065856933594, "logps/rejected": -211.23104858398438, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": 3.342564582824707, "rewards/margins": 10.608942985534668, "rewards/rejected": -7.266378402709961, "step": 2172 }, { "epoch": 1.21, "learning_rate": 3.5915333453355866e-07, "logits/chosen": -5.984996795654297, "logits/rejected": -6.009987831115723, "logps/chosen": -270.7489013671875, "logps/rejected": -225.87371826171875, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": 3.7752442359924316, "rewards/margins": 12.004011154174805, "rewards/rejected": -8.228766441345215, "step": 2173 }, { "epoch": 1.21, "learning_rate": 3.5872190397465635e-07, "logits/chosen": -6.001547813415527, "logits/rejected": -5.986220836639404, "logps/chosen": -410.67596435546875, "logps/rejected": -306.8314514160156, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": 3.041569471359253, "rewards/margins": 10.523002624511719, "rewards/rejected": -7.481432914733887, "step": 2174 }, { "epoch": 1.21, "learning_rate": 3.5829058769763473e-07, "logits/chosen": -6.009856224060059, "logits/rejected": -5.9851555824279785, "logps/chosen": -254.72079467773438, "logps/rejected": -233.0342559814453, "loss": 0.0342, "rewards/accuracies": 1.0, "rewards/chosen": 2.6936538219451904, "rewards/margins": 12.255229949951172, "rewards/rejected": -9.561574935913086, "step": 2175 }, { "epoch": 1.21, "learning_rate": 3.5785938605139145e-07, "logits/chosen": -6.105144500732422, "logits/rejected": -6.048525810241699, "logps/chosen": -181.8139190673828, "logps/rejected": -222.07135009765625, "loss": 0.0161, "rewards/accuracies": 0.9375, "rewards/chosen": 0.6365322470664978, "rewards/margins": 10.035103797912598, "rewards/rejected": -9.398571014404297, "step": 2176 }, { "epoch": 1.21, "learning_rate": 3.5742829938473196e-07, "logits/chosen": -5.900888442993164, "logits/rejected": -5.973162651062012, "logps/chosen": -241.7764434814453, "logps/rejected": -262.1161804199219, "loss": 0.0702, "rewards/accuracies": 1.0, "rewards/chosen": 3.544520378112793, "rewards/margins": 10.585918426513672, "rewards/rejected": -7.041397571563721, "step": 2177 }, { "epoch": 1.21, "learning_rate": 3.569973280463684e-07, "logits/chosen": -5.933357238769531, "logits/rejected": -5.944730758666992, "logps/chosen": -342.9530944824219, "logps/rejected": -210.12791442871094, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": 2.601330518722534, "rewards/margins": 11.175804138183594, "rewards/rejected": -8.57447338104248, "step": 2178 }, { "epoch": 1.21, "learning_rate": 3.5656647238491944e-07, "logits/chosen": -6.059909343719482, "logits/rejected": -5.99796199798584, "logps/chosen": -283.502197265625, "logps/rejected": -210.75115966796875, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": 6.391751289367676, "rewards/margins": 12.364119529724121, "rewards/rejected": -5.972367763519287, "step": 2179 }, { "epoch": 1.21, "learning_rate": 3.5613573274891073e-07, "logits/chosen": -5.959878444671631, "logits/rejected": -5.952846050262451, "logps/chosen": -242.1795196533203, "logps/rejected": -251.9462432861328, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": 2.904346227645874, "rewards/margins": 12.470457077026367, "rewards/rejected": -9.566109657287598, "step": 2180 }, { "epoch": 1.21, "learning_rate": 3.5570510948677347e-07, "logits/chosen": -6.092194557189941, "logits/rejected": -6.0248212814331055, "logps/chosen": -344.6768798828125, "logps/rejected": -215.4803924560547, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": 6.324705123901367, "rewards/margins": 13.27536392211914, "rewards/rejected": -6.950659275054932, "step": 2181 }, { "epoch": 1.21, "learning_rate": 3.552746029468452e-07, "logits/chosen": -6.024700164794922, "logits/rejected": -5.864335060119629, "logps/chosen": -262.6950378417969, "logps/rejected": -159.4449920654297, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": 3.05790114402771, "rewards/margins": 10.700597763061523, "rewards/rejected": -7.642696380615234, "step": 2182 }, { "epoch": 1.21, "learning_rate": 3.548442134773687e-07, "logits/chosen": -6.085309982299805, "logits/rejected": -5.924007892608643, "logps/chosen": -274.7127380371094, "logps/rejected": -174.90492248535156, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": 6.280056953430176, "rewards/margins": 10.366842269897461, "rewards/rejected": -4.086785316467285, "step": 2183 }, { "epoch": 1.21, "learning_rate": 3.544139414264924e-07, "logits/chosen": -6.176103591918945, "logits/rejected": -6.02294921875, "logps/chosen": -272.91021728515625, "logps/rejected": -175.84408569335938, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": 3.6860156059265137, "rewards/margins": 11.807516098022461, "rewards/rejected": -8.121500015258789, "step": 2184 }, { "epoch": 1.21, "learning_rate": 3.5398378714226937e-07, "logits/chosen": -6.05672550201416, "logits/rejected": -6.085795879364014, "logps/chosen": -258.0662841796875, "logps/rejected": -207.8891143798828, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": 6.749724864959717, "rewards/margins": 14.679312705993652, "rewards/rejected": -7.929588794708252, "step": 2185 }, { "epoch": 1.21, "learning_rate": 3.535537509726575e-07, "logits/chosen": -5.973959922790527, "logits/rejected": -5.904354095458984, "logps/chosen": -287.9137878417969, "logps/rejected": -141.12799072265625, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": 6.791418075561523, "rewards/margins": 10.623013496398926, "rewards/rejected": -3.831594467163086, "step": 2186 }, { "epoch": 1.21, "learning_rate": 3.5312383326551964e-07, "logits/chosen": -6.0731730461120605, "logits/rejected": -6.076137542724609, "logps/chosen": -262.54632568359375, "logps/rejected": -207.94190979003906, "loss": 0.0649, "rewards/accuracies": 1.0, "rewards/chosen": 5.77264404296875, "rewards/margins": 10.868244171142578, "rewards/rejected": -5.095600605010986, "step": 2187 }, { "epoch": 1.21, "learning_rate": 3.5269403436862174e-07, "logits/chosen": -6.199896335601807, "logits/rejected": -5.956387996673584, "logps/chosen": -419.57861328125, "logps/rejected": -192.986572265625, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": 4.3548407554626465, "rewards/margins": 13.446642875671387, "rewards/rejected": -9.091802597045898, "step": 2188 }, { "epoch": 1.22, "learning_rate": 3.52264354629635e-07, "logits/chosen": -6.032748222351074, "logits/rejected": -6.05328893661499, "logps/chosen": -307.2540588378906, "logps/rejected": -145.7016143798828, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": 4.4694647789001465, "rewards/margins": 9.525517463684082, "rewards/rejected": -5.0560526847839355, "step": 2189 }, { "epoch": 1.22, "learning_rate": 3.518347943961331e-07, "logits/chosen": -6.0927581787109375, "logits/rejected": -6.0493268966674805, "logps/chosen": -282.94390869140625, "logps/rejected": -240.15167236328125, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": 6.535819053649902, "rewards/margins": 13.43632698059082, "rewards/rejected": -6.900507926940918, "step": 2190 }, { "epoch": 1.22, "learning_rate": 3.514053540155933e-07, "logits/chosen": -6.087756156921387, "logits/rejected": -6.082070350646973, "logps/chosen": -279.4628601074219, "logps/rejected": -168.02230834960938, "loss": 0.0504, "rewards/accuracies": 1.0, "rewards/chosen": 4.633811950683594, "rewards/margins": 12.327079772949219, "rewards/rejected": -7.693267822265625, "step": 2191 }, { "epoch": 1.22, "learning_rate": 3.509760338353967e-07, "logits/chosen": -6.019723892211914, "logits/rejected": -5.970667839050293, "logps/chosen": -405.5321960449219, "logps/rejected": -284.7217102050781, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": 4.87257719039917, "rewards/margins": 11.40634536743164, "rewards/rejected": -6.533768177032471, "step": 2192 }, { "epoch": 1.22, "learning_rate": 3.5054683420282604e-07, "logits/chosen": -6.003713130950928, "logits/rejected": -6.1569743156433105, "logps/chosen": -269.23370361328125, "logps/rejected": -365.8025207519531, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": 4.653672218322754, "rewards/margins": 14.300053596496582, "rewards/rejected": -9.646381378173828, "step": 2193 }, { "epoch": 1.22, "learning_rate": 3.5011775546506733e-07, "logits/chosen": -5.989640235900879, "logits/rejected": -5.9049530029296875, "logps/chosen": -353.9905090332031, "logps/rejected": -137.8941650390625, "loss": 0.0683, "rewards/accuracies": 1.0, "rewards/chosen": 6.5896687507629395, "rewards/margins": 11.299455642700195, "rewards/rejected": -4.709786891937256, "step": 2194 }, { "epoch": 1.22, "learning_rate": 3.496887979692084e-07, "logits/chosen": -6.120600700378418, "logits/rejected": -5.910799980163574, "logps/chosen": -323.5918884277344, "logps/rejected": -184.2523193359375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": 6.632693767547607, "rewards/margins": 11.550849914550781, "rewards/rejected": -4.918156623840332, "step": 2195 }, { "epoch": 1.22, "learning_rate": 3.492599620622392e-07, "logits/chosen": -6.011776924133301, "logits/rejected": -6.030875205993652, "logps/chosen": -186.47390747070312, "logps/rejected": -230.17425537109375, "loss": 0.0622, "rewards/accuracies": 1.0, "rewards/chosen": 3.415097713470459, "rewards/margins": 11.019669532775879, "rewards/rejected": -7.604572296142578, "step": 2196 }, { "epoch": 1.22, "learning_rate": 3.4883124809105134e-07, "logits/chosen": -6.024844169616699, "logits/rejected": -5.910401344299316, "logps/chosen": -329.4203186035156, "logps/rejected": -140.71963500976562, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": 5.661991119384766, "rewards/margins": 10.443187713623047, "rewards/rejected": -4.781198024749756, "step": 2197 }, { "epoch": 1.22, "learning_rate": 3.484026564024373e-07, "logits/chosen": -6.0795698165893555, "logits/rejected": -6.141546249389648, "logps/chosen": -285.23016357421875, "logps/rejected": -323.7020568847656, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": 4.687984466552734, "rewards/margins": 11.156834602355957, "rewards/rejected": -6.468849182128906, "step": 2198 }, { "epoch": 1.22, "learning_rate": 3.4797418734309165e-07, "logits/chosen": -5.935372352600098, "logits/rejected": -6.002300262451172, "logps/chosen": -184.6094512939453, "logps/rejected": -245.8180694580078, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": 3.1313960552215576, "rewards/margins": 10.55142593383789, "rewards/rejected": -7.42003059387207, "step": 2199 }, { "epoch": 1.22, "learning_rate": 3.475458412596086e-07, "logits/chosen": -6.019465446472168, "logits/rejected": -6.057075023651123, "logps/chosen": -300.4042663574219, "logps/rejected": -191.87628173828125, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": 2.5139784812927246, "rewards/margins": 10.173956871032715, "rewards/rejected": -7.659977912902832, "step": 2200 }, { "epoch": 1.22, "learning_rate": 3.4711761849848385e-07, "logits/chosen": -5.981142520904541, "logits/rejected": -5.914841651916504, "logps/chosen": -420.79913330078125, "logps/rejected": -365.1252136230469, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 3.8551487922668457, "rewards/margins": 9.221738815307617, "rewards/rejected": -5.3665900230407715, "step": 2201 }, { "epoch": 1.22, "learning_rate": 3.4668951940611274e-07, "logits/chosen": -5.989768028259277, "logits/rejected": -6.022624969482422, "logps/chosen": -277.9732666015625, "logps/rejected": -270.378173828125, "loss": 0.0376, "rewards/accuracies": 1.0, "rewards/chosen": 5.378348350524902, "rewards/margins": 14.748363494873047, "rewards/rejected": -9.370015144348145, "step": 2202 }, { "epoch": 1.22, "learning_rate": 3.462615443287909e-07, "logits/chosen": -5.948150634765625, "logits/rejected": -6.067149639129639, "logps/chosen": -230.791259765625, "logps/rejected": -391.965087890625, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/chosen": 2.420522689819336, "rewards/margins": 13.875080108642578, "rewards/rejected": -11.454558372497559, "step": 2203 }, { "epoch": 1.22, "learning_rate": 3.4583369361271343e-07, "logits/chosen": -5.988370895385742, "logits/rejected": -6.026382923126221, "logps/chosen": -275.17449951171875, "logps/rejected": -286.8697204589844, "loss": 0.0747, "rewards/accuracies": 0.9375, "rewards/chosen": 4.3518171310424805, "rewards/margins": 14.485601425170898, "rewards/rejected": -10.133785247802734, "step": 2204 }, { "epoch": 1.22, "learning_rate": 3.4540596760397483e-07, "logits/chosen": -6.040920257568359, "logits/rejected": -5.979387283325195, "logps/chosen": -286.9959716796875, "logps/rejected": -245.50506591796875, "loss": 0.0126, "rewards/accuracies": 0.9375, "rewards/chosen": 5.644781589508057, "rewards/margins": 12.59176254272461, "rewards/rejected": -6.946980953216553, "step": 2205 }, { "epoch": 1.22, "learning_rate": 3.4497836664856906e-07, "logits/chosen": -6.07645845413208, "logits/rejected": -6.00739860534668, "logps/chosen": -209.52276611328125, "logps/rejected": -150.91656494140625, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": 1.567055106163025, "rewards/margins": 11.39522647857666, "rewards/rejected": -9.828170776367188, "step": 2206 }, { "epoch": 1.23, "learning_rate": 3.445508910923883e-07, "logits/chosen": -5.996717929840088, "logits/rejected": -6.148115634918213, "logps/chosen": -211.5477294921875, "logps/rejected": -326.53741455078125, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": 3.7052464485168457, "rewards/margins": 12.773865699768066, "rewards/rejected": -9.068620681762695, "step": 2207 }, { "epoch": 1.23, "learning_rate": 3.441235412812242e-07, "logits/chosen": -6.037749290466309, "logits/rejected": -6.033299922943115, "logps/chosen": -261.0731201171875, "logps/rejected": -162.4339141845703, "loss": 0.0875, "rewards/accuracies": 0.9375, "rewards/chosen": 5.33279275894165, "rewards/margins": 9.208327293395996, "rewards/rejected": -3.8755340576171875, "step": 2208 }, { "epoch": 1.23, "learning_rate": 3.436963175607656e-07, "logits/chosen": -5.847963333129883, "logits/rejected": -5.892668724060059, "logps/chosen": -231.6630401611328, "logps/rejected": -205.89117431640625, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": 2.860945224761963, "rewards/margins": 11.622557640075684, "rewards/rejected": -8.761611938476562, "step": 2209 }, { "epoch": 1.23, "learning_rate": 3.432692202766003e-07, "logits/chosen": -6.0546555519104, "logits/rejected": -6.069189071655273, "logps/chosen": -243.6806640625, "logps/rejected": -257.95306396484375, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": 3.2772014141082764, "rewards/margins": 13.648402214050293, "rewards/rejected": -10.371200561523438, "step": 2210 }, { "epoch": 1.23, "learning_rate": 3.428422497742133e-07, "logits/chosen": -5.978381633758545, "logits/rejected": -6.008026599884033, "logps/chosen": -270.4603576660156, "logps/rejected": -332.870849609375, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/chosen": 3.8038392066955566, "rewards/margins": 11.87739086151123, "rewards/rejected": -8.073551177978516, "step": 2211 }, { "epoch": 1.23, "learning_rate": 3.4241540639898696e-07, "logits/chosen": -5.958189487457275, "logits/rejected": -6.059482097625732, "logps/chosen": -202.92352294921875, "logps/rejected": -158.0264129638672, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": 4.7467522621154785, "rewards/margins": 11.2622652053833, "rewards/rejected": -6.515512466430664, "step": 2212 }, { "epoch": 1.23, "learning_rate": 3.419886904962014e-07, "logits/chosen": -6.065496444702148, "logits/rejected": -5.922168731689453, "logps/chosen": -381.4829406738281, "logps/rejected": -328.81622314453125, "loss": 0.0252, "rewards/accuracies": 0.9375, "rewards/chosen": 6.2682037353515625, "rewards/margins": 14.121180534362793, "rewards/rejected": -7.852976322174072, "step": 2213 }, { "epoch": 1.23, "learning_rate": 3.4156210241103285e-07, "logits/chosen": -5.9249467849731445, "logits/rejected": -6.0813164710998535, "logps/chosen": -235.9288330078125, "logps/rejected": -350.27838134765625, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": 3.600416421890259, "rewards/margins": 14.366010665893555, "rewards/rejected": -10.765594482421875, "step": 2214 }, { "epoch": 1.23, "learning_rate": 3.4113564248855486e-07, "logits/chosen": -6.044134140014648, "logits/rejected": -5.904597282409668, "logps/chosen": -290.7218933105469, "logps/rejected": -189.03598022460938, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": 3.9996304512023926, "rewards/margins": 9.326271057128906, "rewards/rejected": -5.3266401290893555, "step": 2215 }, { "epoch": 1.23, "learning_rate": 3.4070931107373673e-07, "logits/chosen": -5.914367198944092, "logits/rejected": -5.926413059234619, "logps/chosen": -200.72903442382812, "logps/rejected": -208.55979919433594, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": 4.5572428703308105, "rewards/margins": 11.94587230682373, "rewards/rejected": -7.388629913330078, "step": 2216 }, { "epoch": 1.23, "learning_rate": 3.402831085114444e-07, "logits/chosen": -6.043334007263184, "logits/rejected": -6.069281101226807, "logps/chosen": -260.7772216796875, "logps/rejected": -218.85369873046875, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": 4.208593368530273, "rewards/margins": 12.888383865356445, "rewards/rejected": -8.679790496826172, "step": 2217 }, { "epoch": 1.23, "learning_rate": 3.3985703514643904e-07, "logits/chosen": -6.032451152801514, "logits/rejected": -6.1334943771362305, "logps/chosen": -295.17742919921875, "logps/rejected": -216.09657287597656, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 5.533249855041504, "rewards/margins": 11.982526779174805, "rewards/rejected": -6.449276924133301, "step": 2218 }, { "epoch": 1.23, "learning_rate": 3.3943109132337733e-07, "logits/chosen": -6.058385848999023, "logits/rejected": -6.0269317626953125, "logps/chosen": -310.92138671875, "logps/rejected": -169.9984588623047, "loss": 0.0589, "rewards/accuracies": 0.875, "rewards/chosen": 6.2611613273620605, "rewards/margins": 10.79533576965332, "rewards/rejected": -4.534175395965576, "step": 2219 }, { "epoch": 1.23, "learning_rate": 3.390052773868116e-07, "logits/chosen": -5.994178771972656, "logits/rejected": -6.013818264007568, "logps/chosen": -219.1494140625, "logps/rejected": -157.92471313476562, "loss": 0.0157, "rewards/accuracies": 0.9375, "rewards/chosen": 4.012739181518555, "rewards/margins": 9.140436172485352, "rewards/rejected": -5.127696990966797, "step": 2220 }, { "epoch": 1.23, "learning_rate": 3.385795936811887e-07, "logits/chosen": -6.080808639526367, "logits/rejected": -6.017148017883301, "logps/chosen": -248.55262756347656, "logps/rejected": -198.62750244140625, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": 4.242490291595459, "rewards/margins": 11.21962833404541, "rewards/rejected": -6.977138519287109, "step": 2221 }, { "epoch": 1.23, "learning_rate": 3.3815404055085044e-07, "logits/chosen": -5.984261989593506, "logits/rejected": -6.049404144287109, "logps/chosen": -241.3507843017578, "logps/rejected": -231.84585571289062, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": 5.612478733062744, "rewards/margins": 12.304768562316895, "rewards/rejected": -6.692291259765625, "step": 2222 }, { "epoch": 1.23, "learning_rate": 3.3772861834003276e-07, "logits/chosen": -6.089982032775879, "logits/rejected": -5.979287147521973, "logps/chosen": -239.32398986816406, "logps/rejected": -164.36248779296875, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": 4.6610283851623535, "rewards/margins": 10.688525199890137, "rewards/rejected": -6.027496814727783, "step": 2223 }, { "epoch": 1.23, "learning_rate": 3.3730332739286583e-07, "logits/chosen": -5.992130279541016, "logits/rejected": -6.028103351593018, "logps/chosen": -200.30938720703125, "logps/rejected": -226.27276611328125, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": 2.7990896701812744, "rewards/margins": 10.959708213806152, "rewards/rejected": -8.160618782043457, "step": 2224 }, { "epoch": 1.24, "learning_rate": 3.3687816805337367e-07, "logits/chosen": -5.966530799865723, "logits/rejected": -5.968653678894043, "logps/chosen": -319.46649169921875, "logps/rejected": -295.8363342285156, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": 6.840421676635742, "rewards/margins": 12.115060806274414, "rewards/rejected": -5.274639129638672, "step": 2225 }, { "epoch": 1.24, "learning_rate": 3.364531406654736e-07, "logits/chosen": -6.139559268951416, "logits/rejected": -6.041268348693848, "logps/chosen": -232.9525909423828, "logps/rejected": -191.35385131835938, "loss": 0.0503, "rewards/accuracies": 0.9375, "rewards/chosen": 2.8127708435058594, "rewards/margins": 9.626893997192383, "rewards/rejected": -6.81412410736084, "step": 2226 }, { "epoch": 1.24, "learning_rate": 3.360282455729766e-07, "logits/chosen": -6.060303688049316, "logits/rejected": -6.140000343322754, "logps/chosen": -313.6977844238281, "logps/rejected": -299.382568359375, "loss": 0.106, "rewards/accuracies": 1.0, "rewards/chosen": 7.394830226898193, "rewards/margins": 14.971765518188477, "rewards/rejected": -7.576935768127441, "step": 2227 }, { "epoch": 1.24, "learning_rate": 3.3560348311958607e-07, "logits/chosen": -6.007757663726807, "logits/rejected": -6.000129699707031, "logps/chosen": -228.29991149902344, "logps/rejected": -169.70541381835938, "loss": 0.1021, "rewards/accuracies": 1.0, "rewards/chosen": 3.8655056953430176, "rewards/margins": 10.671478271484375, "rewards/rejected": -6.805971622467041, "step": 2228 }, { "epoch": 1.24, "learning_rate": 3.3517885364889895e-07, "logits/chosen": -6.080385208129883, "logits/rejected": -6.071739196777344, "logps/chosen": -312.16571044921875, "logps/rejected": -265.3199768066406, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": 3.4895288944244385, "rewards/margins": 13.630191802978516, "rewards/rejected": -10.140663146972656, "step": 2229 }, { "epoch": 1.24, "learning_rate": 3.347543575044035e-07, "logits/chosen": -6.143063068389893, "logits/rejected": -6.010542392730713, "logps/chosen": -256.5155029296875, "logps/rejected": -213.12362670898438, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": 2.311777114868164, "rewards/margins": 10.937896728515625, "rewards/rejected": -8.626118659973145, "step": 2230 }, { "epoch": 1.24, "learning_rate": 3.343299950294815e-07, "logits/chosen": -6.001489639282227, "logits/rejected": -6.008856773376465, "logps/chosen": -243.02243041992188, "logps/rejected": -219.689453125, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": 4.944821357727051, "rewards/margins": 11.627910614013672, "rewards/rejected": -6.6830902099609375, "step": 2231 }, { "epoch": 1.24, "learning_rate": 3.3390576656740535e-07, "logits/chosen": -6.068883895874023, "logits/rejected": -5.975887298583984, "logps/chosen": -239.0500030517578, "logps/rejected": -230.01968383789062, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": 2.8729212284088135, "rewards/margins": 10.117903709411621, "rewards/rejected": -7.24498176574707, "step": 2232 }, { "epoch": 1.24, "learning_rate": 3.3348167246133943e-07, "logits/chosen": -6.152531623840332, "logits/rejected": -5.982811450958252, "logps/chosen": -330.1235046386719, "logps/rejected": -184.83421325683594, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": 5.121949195861816, "rewards/margins": 13.52321720123291, "rewards/rejected": -8.40126895904541, "step": 2233 }, { "epoch": 1.24, "learning_rate": 3.3305771305433987e-07, "logits/chosen": -6.086719512939453, "logits/rejected": -6.04872989654541, "logps/chosen": -220.8989715576172, "logps/rejected": -183.1656036376953, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": 3.6614010334014893, "rewards/margins": 11.118840217590332, "rewards/rejected": -7.457439422607422, "step": 2234 }, { "epoch": 1.24, "learning_rate": 3.3263388868935337e-07, "logits/chosen": -5.916825771331787, "logits/rejected": -6.036949634552002, "logps/chosen": -208.2411346435547, "logps/rejected": -214.65728759765625, "loss": 0.0181, "rewards/accuracies": 0.9375, "rewards/chosen": 3.728426218032837, "rewards/margins": 11.325756072998047, "rewards/rejected": -7.597330093383789, "step": 2235 }, { "epoch": 1.24, "learning_rate": 3.322101997092176e-07, "logits/chosen": -5.992469787597656, "logits/rejected": -6.028905391693115, "logps/chosen": -242.736083984375, "logps/rejected": -229.7825469970703, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": 3.2402687072753906, "rewards/margins": 11.90699577331543, "rewards/rejected": -8.666727066040039, "step": 2236 }, { "epoch": 1.24, "learning_rate": 3.3178664645666065e-07, "logits/chosen": -5.932543754577637, "logits/rejected": -5.939016342163086, "logps/chosen": -280.1851806640625, "logps/rejected": -195.3406982421875, "loss": 0.0446, "rewards/accuracies": 1.0, "rewards/chosen": 2.510249137878418, "rewards/margins": 11.33556842803955, "rewards/rejected": -8.825319290161133, "step": 2237 }, { "epoch": 1.24, "learning_rate": 3.313632292743008e-07, "logits/chosen": -6.103855609893799, "logits/rejected": -6.038689613342285, "logps/chosen": -257.393798828125, "logps/rejected": -269.79669189453125, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": 2.986452102661133, "rewards/margins": 12.805675506591797, "rewards/rejected": -9.819223403930664, "step": 2238 }, { "epoch": 1.24, "learning_rate": 3.3093994850464655e-07, "logits/chosen": -6.111170291900635, "logits/rejected": -6.049247741699219, "logps/chosen": -237.89920043945312, "logps/rejected": -141.50509643554688, "loss": 0.0215, "rewards/accuracies": 0.9375, "rewards/chosen": 3.8814451694488525, "rewards/margins": 10.497572898864746, "rewards/rejected": -6.6161274909973145, "step": 2239 }, { "epoch": 1.24, "learning_rate": 3.3051680449009535e-07, "logits/chosen": -6.168972015380859, "logits/rejected": -5.995662689208984, "logps/chosen": -283.7601318359375, "logps/rejected": -225.6195831298828, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": 3.9386775493621826, "rewards/margins": 13.240943908691406, "rewards/rejected": -9.302267074584961, "step": 2240 }, { "epoch": 1.24, "learning_rate": 3.3009379757293464e-07, "logits/chosen": -6.098535537719727, "logits/rejected": -5.971846580505371, "logps/chosen": -295.4337158203125, "logps/rejected": -160.92019653320312, "loss": 0.0309, "rewards/accuracies": 0.9375, "rewards/chosen": 8.825800895690918, "rewards/margins": 12.32624626159668, "rewards/rejected": -3.50044584274292, "step": 2241 }, { "epoch": 1.24, "learning_rate": 3.296709280953408e-07, "logits/chosen": -5.863680839538574, "logits/rejected": -5.959981918334961, "logps/chosen": -247.324951171875, "logps/rejected": -185.74354553222656, "loss": 0.0407, "rewards/accuracies": 1.0, "rewards/chosen": 3.3769917488098145, "rewards/margins": 8.119976043701172, "rewards/rejected": -4.742984294891357, "step": 2242 }, { "epoch": 1.25, "learning_rate": 3.292481963993792e-07, "logits/chosen": -6.036923408508301, "logits/rejected": -5.935266971588135, "logps/chosen": -316.0061340332031, "logps/rejected": -228.0316162109375, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 4.523752212524414, "rewards/margins": 13.456201553344727, "rewards/rejected": -8.932449340820312, "step": 2243 }, { "epoch": 1.25, "learning_rate": 3.288256028270033e-07, "logits/chosen": -5.909862995147705, "logits/rejected": -5.949867248535156, "logps/chosen": -366.5180358886719, "logps/rejected": -401.5788879394531, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": 6.087734222412109, "rewards/margins": 12.822906494140625, "rewards/rejected": -6.735172271728516, "step": 2244 }, { "epoch": 1.25, "learning_rate": 3.284031477200553e-07, "logits/chosen": -6.0112690925598145, "logits/rejected": -5.9622626304626465, "logps/chosen": -278.65753173828125, "logps/rejected": -172.94943237304688, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": 4.63688325881958, "rewards/margins": 7.9292497634887695, "rewards/rejected": -3.2923660278320312, "step": 2245 }, { "epoch": 1.25, "learning_rate": 3.2798083142026514e-07, "logits/chosen": -6.013212203979492, "logits/rejected": -5.931667327880859, "logps/chosen": -338.2451171875, "logps/rejected": -218.15380859375, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": 6.863763809204102, "rewards/margins": 13.534076690673828, "rewards/rejected": -6.67031192779541, "step": 2246 }, { "epoch": 1.25, "learning_rate": 3.275586542692504e-07, "logits/chosen": -6.171058177947998, "logits/rejected": -6.065699577331543, "logps/chosen": -252.36370849609375, "logps/rejected": -166.39979553222656, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": 4.306223392486572, "rewards/margins": 13.305055618286133, "rewards/rejected": -8.998832702636719, "step": 2247 }, { "epoch": 1.25, "learning_rate": 3.271366166085166e-07, "logits/chosen": -5.978797912597656, "logits/rejected": -6.087658882141113, "logps/chosen": -324.11273193359375, "logps/rejected": -331.00250244140625, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": 5.967864990234375, "rewards/margins": 16.415254592895508, "rewards/rejected": -10.447391510009766, "step": 2248 }, { "epoch": 1.25, "learning_rate": 3.2671471877945555e-07, "logits/chosen": -5.900234222412109, "logits/rejected": -5.946764945983887, "logps/chosen": -240.08172607421875, "logps/rejected": -163.2977294921875, "loss": 0.1254, "rewards/accuracies": 1.0, "rewards/chosen": 4.936413764953613, "rewards/margins": 11.107810020446777, "rewards/rejected": -6.171395301818848, "step": 2249 }, { "epoch": 1.25, "learning_rate": 3.2629296112334704e-07, "logits/chosen": -6.056756973266602, "logits/rejected": -5.93759822845459, "logps/chosen": -259.258544921875, "logps/rejected": -192.17495727539062, "loss": 0.0476, "rewards/accuracies": 1.0, "rewards/chosen": 4.64754581451416, "rewards/margins": 10.912644386291504, "rewards/rejected": -6.26509952545166, "step": 2250 }, { "epoch": 1.25, "learning_rate": 3.2587134398135654e-07, "logits/chosen": -5.936157703399658, "logits/rejected": -5.931404113769531, "logps/chosen": -291.4293212890625, "logps/rejected": -257.07586669921875, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": 4.9232378005981445, "rewards/margins": 13.427482604980469, "rewards/rejected": -8.50424575805664, "step": 2251 }, { "epoch": 1.25, "learning_rate": 3.2544986769453614e-07, "logits/chosen": -5.907023906707764, "logits/rejected": -5.997394561767578, "logps/chosen": -327.1443176269531, "logps/rejected": -325.0244140625, "loss": 0.0122, "rewards/accuracies": 0.9375, "rewards/chosen": 4.082762241363525, "rewards/margins": 9.377336502075195, "rewards/rejected": -5.29457426071167, "step": 2252 }, { "epoch": 1.25, "learning_rate": 3.2502853260382434e-07, "logits/chosen": -6.087357521057129, "logits/rejected": -6.075188159942627, "logps/chosen": -213.978759765625, "logps/rejected": -142.2908477783203, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": 3.257337808609009, "rewards/margins": 9.576361656188965, "rewards/rejected": -6.319024085998535, "step": 2253 }, { "epoch": 1.25, "learning_rate": 3.2460733905004485e-07, "logits/chosen": -5.951781272888184, "logits/rejected": -5.986225128173828, "logps/chosen": -272.317138671875, "logps/rejected": -217.9612274169922, "loss": 0.0913, "rewards/accuracies": 0.9375, "rewards/chosen": 2.1523122787475586, "rewards/margins": 8.537389755249023, "rewards/rejected": -6.385077953338623, "step": 2254 }, { "epoch": 1.25, "learning_rate": 3.2418628737390747e-07, "logits/chosen": -5.947014808654785, "logits/rejected": -6.053666114807129, "logps/chosen": -257.3855285644531, "logps/rejected": -260.043212890625, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": 5.515714168548584, "rewards/margins": 10.713688850402832, "rewards/rejected": -5.197973728179932, "step": 2255 }, { "epoch": 1.25, "learning_rate": 3.237653779160067e-07, "logits/chosen": -6.022737503051758, "logits/rejected": -6.022777557373047, "logps/chosen": -264.94683837890625, "logps/rejected": -188.2183837890625, "loss": 0.0851, "rewards/accuracies": 1.0, "rewards/chosen": 4.86511754989624, "rewards/margins": 11.902847290039062, "rewards/rejected": -7.037729263305664, "step": 2256 }, { "epoch": 1.25, "learning_rate": 3.233446110168224e-07, "logits/chosen": -5.968899726867676, "logits/rejected": -5.919304370880127, "logps/chosen": -244.94674682617188, "logps/rejected": -190.49314880371094, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": 6.341326713562012, "rewards/margins": 12.0546236038208, "rewards/rejected": -5.713297367095947, "step": 2257 }, { "epoch": 1.25, "learning_rate": 3.229239870167191e-07, "logits/chosen": -6.025756359100342, "logits/rejected": -6.016737937927246, "logps/chosen": -285.8648986816406, "logps/rejected": -153.8723602294922, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": 5.090956211090088, "rewards/margins": 10.3023681640625, "rewards/rejected": -5.211411952972412, "step": 2258 }, { "epoch": 1.25, "learning_rate": 3.225035062559452e-07, "logits/chosen": -6.127427101135254, "logits/rejected": -6.1193132400512695, "logps/chosen": -203.95266723632812, "logps/rejected": -161.03677368164062, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": 3.765889883041382, "rewards/margins": 10.099747657775879, "rewards/rejected": -6.333858489990234, "step": 2259 }, { "epoch": 1.25, "learning_rate": 3.220831690746339e-07, "logits/chosen": -5.920053958892822, "logits/rejected": -5.9819746017456055, "logps/chosen": -249.38726806640625, "logps/rejected": -256.8517150878906, "loss": 0.0272, "rewards/accuracies": 1.0, "rewards/chosen": 2.929021120071411, "rewards/margins": 10.694659233093262, "rewards/rejected": -7.765637397766113, "step": 2260 }, { "epoch": 1.26, "learning_rate": 3.216629758128018e-07, "logits/chosen": -6.103593826293945, "logits/rejected": -6.0030622482299805, "logps/chosen": -306.21319580078125, "logps/rejected": -161.19764709472656, "loss": 0.0346, "rewards/accuracies": 0.9375, "rewards/chosen": 5.614226341247559, "rewards/margins": 11.32861614227295, "rewards/rejected": -5.714390277862549, "step": 2261 }, { "epoch": 1.26, "learning_rate": 3.212429268103495e-07, "logits/chosen": -5.915380001068115, "logits/rejected": -5.977517604827881, "logps/chosen": -234.45172119140625, "logps/rejected": -221.95936584472656, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": 4.977819442749023, "rewards/margins": 12.492634773254395, "rewards/rejected": -7.514815330505371, "step": 2262 }, { "epoch": 1.26, "learning_rate": 3.2082302240706046e-07, "logits/chosen": -6.044513702392578, "logits/rejected": -6.107656478881836, "logps/chosen": -243.3467559814453, "logps/rejected": -306.712646484375, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": 5.249654769897461, "rewards/margins": 14.588027000427246, "rewards/rejected": -9.338371276855469, "step": 2263 }, { "epoch": 1.26, "learning_rate": 3.2040326294260157e-07, "logits/chosen": -6.122509956359863, "logits/rejected": -6.027152061462402, "logps/chosen": -301.0761413574219, "logps/rejected": -172.14036560058594, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": 6.167818069458008, "rewards/margins": 10.602992057800293, "rewards/rejected": -4.435173034667969, "step": 2264 }, { "epoch": 1.26, "learning_rate": 3.1998364875652217e-07, "logits/chosen": -6.060027122497559, "logits/rejected": -6.101469039916992, "logps/chosen": -239.90574645996094, "logps/rejected": -306.5310363769531, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": 1.9791663885116577, "rewards/margins": 10.867120742797852, "rewards/rejected": -8.887954711914062, "step": 2265 }, { "epoch": 1.26, "learning_rate": 3.1956418018825403e-07, "logits/chosen": -6.079892635345459, "logits/rejected": -6.001169204711914, "logps/chosen": -298.0222473144531, "logps/rejected": -302.88861083984375, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": 2.609837770462036, "rewards/margins": 11.418334007263184, "rewards/rejected": -8.808496475219727, "step": 2266 }, { "epoch": 1.26, "learning_rate": 3.1914485757711184e-07, "logits/chosen": -5.983346939086914, "logits/rejected": -6.0734686851501465, "logps/chosen": -186.7416229248047, "logps/rejected": -161.80906677246094, "loss": 0.0159, "rewards/accuracies": 0.9375, "rewards/chosen": 2.6195998191833496, "rewards/margins": 7.981833457946777, "rewards/rejected": -5.3622331619262695, "step": 2267 }, { "epoch": 1.26, "learning_rate": 3.1872568126229095e-07, "logits/chosen": -6.189042568206787, "logits/rejected": -6.0135321617126465, "logps/chosen": -271.4981689453125, "logps/rejected": -226.12928771972656, "loss": 0.0726, "rewards/accuracies": 1.0, "rewards/chosen": 5.110143661499023, "rewards/margins": 11.954694747924805, "rewards/rejected": -6.844552040100098, "step": 2268 }, { "epoch": 1.26, "learning_rate": 3.1830665158286976e-07, "logits/chosen": -5.98447847366333, "logits/rejected": -5.942537307739258, "logps/chosen": -220.16519165039062, "logps/rejected": -246.8513641357422, "loss": 0.0629, "rewards/accuracies": 0.9375, "rewards/chosen": 2.3924293518066406, "rewards/margins": 9.675708770751953, "rewards/rejected": -7.2832794189453125, "step": 2269 }, { "epoch": 1.26, "learning_rate": 3.178877688778068e-07, "logits/chosen": -6.093862533569336, "logits/rejected": -6.044132232666016, "logps/chosen": -267.7285461425781, "logps/rejected": -239.73695373535156, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": 2.395045042037964, "rewards/margins": 10.756832122802734, "rewards/rejected": -8.361786842346191, "step": 2270 }, { "epoch": 1.26, "learning_rate": 3.17469033485943e-07, "logits/chosen": -5.997392177581787, "logits/rejected": -6.00528621673584, "logps/chosen": -236.81613159179688, "logps/rejected": -172.0525360107422, "loss": 0.0584, "rewards/accuracies": 1.0, "rewards/chosen": 4.215239524841309, "rewards/margins": 12.312320709228516, "rewards/rejected": -8.097082138061523, "step": 2271 }, { "epoch": 1.26, "learning_rate": 3.170504457459989e-07, "logits/chosen": -5.984018325805664, "logits/rejected": -5.977464199066162, "logps/chosen": -248.787353515625, "logps/rejected": -209.45999145507812, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 3.752310276031494, "rewards/margins": 11.988922119140625, "rewards/rejected": -8.236612319946289, "step": 2272 }, { "epoch": 1.26, "learning_rate": 3.1663200599657616e-07, "logits/chosen": -5.908556938171387, "logits/rejected": -5.949486255645752, "logps/chosen": -194.29428100585938, "logps/rejected": -142.33595275878906, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": 3.7885947227478027, "rewards/margins": 11.081169128417969, "rewards/rejected": -7.292574882507324, "step": 2273 }, { "epoch": 1.26, "learning_rate": 3.1621371457615697e-07, "logits/chosen": -6.19091272354126, "logits/rejected": -5.942721366882324, "logps/chosen": -266.17138671875, "logps/rejected": -186.23716735839844, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": 5.135397911071777, "rewards/margins": 10.917085647583008, "rewards/rejected": -5.781686782836914, "step": 2274 }, { "epoch": 1.26, "learning_rate": 3.15795571823103e-07, "logits/chosen": -6.000606060028076, "logits/rejected": -5.976632595062256, "logps/chosen": -472.452392578125, "logps/rejected": -439.8952941894531, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": 3.643610715866089, "rewards/margins": 12.378664016723633, "rewards/rejected": -8.735053062438965, "step": 2275 }, { "epoch": 1.26, "learning_rate": 3.1537757807565615e-07, "logits/chosen": -6.052730083465576, "logits/rejected": -6.030242919921875, "logps/chosen": -231.95455932617188, "logps/rejected": -197.5889434814453, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": 3.588181495666504, "rewards/margins": 10.691126823425293, "rewards/rejected": -7.102945804595947, "step": 2276 }, { "epoch": 1.26, "learning_rate": 3.149597336719373e-07, "logits/chosen": -6.143755912780762, "logits/rejected": -6.261805534362793, "logps/chosen": -226.3404083251953, "logps/rejected": -225.28433227539062, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 3.9513659477233887, "rewards/margins": 13.132905960083008, "rewards/rejected": -9.181540489196777, "step": 2277 }, { "epoch": 1.26, "learning_rate": 3.1454203894994707e-07, "logits/chosen": -5.942129135131836, "logits/rejected": -6.040492057800293, "logps/chosen": -253.07818603515625, "logps/rejected": -234.455810546875, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": 6.48801326751709, "rewards/margins": 12.077600479125977, "rewards/rejected": -5.58958625793457, "step": 2278 }, { "epoch": 1.27, "learning_rate": 3.141244942475647e-07, "logits/chosen": -5.976454257965088, "logits/rejected": -5.864012241363525, "logps/chosen": -311.3356018066406, "logps/rejected": -161.5417022705078, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": 6.412549018859863, "rewards/margins": 11.69835090637207, "rewards/rejected": -5.285801887512207, "step": 2279 }, { "epoch": 1.27, "learning_rate": 3.1370709990254784e-07, "logits/chosen": -6.114336013793945, "logits/rejected": -6.028956413269043, "logps/chosen": -302.3358154296875, "logps/rejected": -236.42593383789062, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": 3.396369457244873, "rewards/margins": 10.420791625976562, "rewards/rejected": -7.0244221687316895, "step": 2280 }, { "epoch": 1.27, "learning_rate": 3.13289856252533e-07, "logits/chosen": -6.087893009185791, "logits/rejected": -6.166195869445801, "logps/chosen": -275.31842041015625, "logps/rejected": -278.55072021484375, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": 5.226800918579102, "rewards/margins": 14.37844181060791, "rewards/rejected": -9.151640892028809, "step": 2281 }, { "epoch": 1.27, "learning_rate": 3.128727636350344e-07, "logits/chosen": -5.939764976501465, "logits/rejected": -5.949954509735107, "logps/chosen": -541.9190673828125, "logps/rejected": -376.5848388671875, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": 6.842098236083984, "rewards/margins": 16.086933135986328, "rewards/rejected": -9.244834899902344, "step": 2282 }, { "epoch": 1.27, "learning_rate": 3.124558223874444e-07, "logits/chosen": -6.006401062011719, "logits/rejected": -5.927967548370361, "logps/chosen": -520.0072631835938, "logps/rejected": -186.04190063476562, "loss": 0.0309, "rewards/accuracies": 1.0, "rewards/chosen": 5.937927722930908, "rewards/margins": 14.086769104003906, "rewards/rejected": -8.14884090423584, "step": 2283 }, { "epoch": 1.27, "learning_rate": 3.120390328470326e-07, "logits/chosen": -6.0915446281433105, "logits/rejected": -5.907059192657471, "logps/chosen": -385.5290832519531, "logps/rejected": -364.98089599609375, "loss": 0.0302, "rewards/accuracies": 0.9375, "rewards/chosen": 1.9238439798355103, "rewards/margins": 9.00586223602295, "rewards/rejected": -7.082018852233887, "step": 2284 }, { "epoch": 1.27, "learning_rate": 3.116223953509463e-07, "logits/chosen": -6.062019348144531, "logits/rejected": -5.868758201599121, "logps/chosen": -275.2102966308594, "logps/rejected": -199.90182495117188, "loss": 0.0707, "rewards/accuracies": 1.0, "rewards/chosen": 4.320333957672119, "rewards/margins": 9.767766952514648, "rewards/rejected": -5.447432994842529, "step": 2285 }, { "epoch": 1.27, "learning_rate": 3.1120591023620925e-07, "logits/chosen": -6.106131076812744, "logits/rejected": -5.980440139770508, "logps/chosen": -256.83563232421875, "logps/rejected": -167.13946533203125, "loss": 0.0223, "rewards/accuracies": 0.9375, "rewards/chosen": 3.842822313308716, "rewards/margins": 11.013957977294922, "rewards/rejected": -7.171135902404785, "step": 2286 }, { "epoch": 1.27, "learning_rate": 3.1078957783972236e-07, "logits/chosen": -6.023602485656738, "logits/rejected": -6.090132236480713, "logps/chosen": -247.17079162597656, "logps/rejected": -274.34564208984375, "loss": 0.0961, "rewards/accuracies": 1.0, "rewards/chosen": 3.2715423107147217, "rewards/margins": 11.276573181152344, "rewards/rejected": -8.005030632019043, "step": 2287 }, { "epoch": 1.27, "learning_rate": 3.103733984982631e-07, "logits/chosen": -6.039963722229004, "logits/rejected": -5.949822425842285, "logps/chosen": -280.0352478027344, "logps/rejected": -171.1864776611328, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": 4.0996599197387695, "rewards/margins": 10.590787887573242, "rewards/rejected": -6.4911274909973145, "step": 2288 }, { "epoch": 1.27, "learning_rate": 3.0995737254848443e-07, "logits/chosen": -6.016569137573242, "logits/rejected": -5.988250255584717, "logps/chosen": -261.6293640136719, "logps/rejected": -209.73582458496094, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 3.6644420623779297, "rewards/margins": 10.461658477783203, "rewards/rejected": -6.797216415405273, "step": 2289 }, { "epoch": 1.27, "learning_rate": 3.095415003269163e-07, "logits/chosen": -6.068857192993164, "logits/rejected": -6.0408196449279785, "logps/chosen": -216.75914001464844, "logps/rejected": -233.51364135742188, "loss": 0.0306, "rewards/accuracies": 0.9375, "rewards/chosen": 3.9708783626556396, "rewards/margins": 11.927034378051758, "rewards/rejected": -7.956154823303223, "step": 2290 }, { "epoch": 1.27, "learning_rate": 3.091257821699631e-07, "logits/chosen": -6.038810729980469, "logits/rejected": -6.0815606117248535, "logps/chosen": -247.91709899902344, "logps/rejected": -199.50753784179688, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": 4.976468563079834, "rewards/margins": 11.459545135498047, "rewards/rejected": -6.483077049255371, "step": 2291 }, { "epoch": 1.27, "learning_rate": 3.087102184139059e-07, "logits/chosen": -6.086742401123047, "logits/rejected": -6.059727668762207, "logps/chosen": -185.5904541015625, "logps/rejected": -179.2090301513672, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": 3.8478055000305176, "rewards/margins": 10.292318344116211, "rewards/rejected": -6.444512367248535, "step": 2292 }, { "epoch": 1.27, "learning_rate": 3.0829480939489967e-07, "logits/chosen": -6.107442855834961, "logits/rejected": -6.0662312507629395, "logps/chosen": -209.48594665527344, "logps/rejected": -138.49050903320312, "loss": 0.0485, "rewards/accuracies": 0.9375, "rewards/chosen": 3.1066951751708984, "rewards/margins": 9.474756240844727, "rewards/rejected": -6.368061542510986, "step": 2293 }, { "epoch": 1.27, "learning_rate": 3.078795554489748e-07, "logits/chosen": -6.0160980224609375, "logits/rejected": -5.946385383605957, "logps/chosen": -201.21383666992188, "logps/rejected": -133.9981689453125, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": 2.887265920639038, "rewards/margins": 9.19332218170166, "rewards/rejected": -6.306056499481201, "step": 2294 }, { "epoch": 1.27, "learning_rate": 3.0746445691203637e-07, "logits/chosen": -5.893644332885742, "logits/rejected": -5.957416534423828, "logps/chosen": -540.8975219726562, "logps/rejected": -547.14453125, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": 6.847466945648193, "rewards/margins": 18.948156356811523, "rewards/rejected": -12.100689888000488, "step": 2295 }, { "epoch": 1.27, "learning_rate": 3.0704951411986334e-07, "logits/chosen": -6.044565200805664, "logits/rejected": -6.027858257293701, "logps/chosen": -235.50302124023438, "logps/rejected": -176.65237426757812, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": 4.47662353515625, "rewards/margins": 12.163799285888672, "rewards/rejected": -7.687175750732422, "step": 2296 }, { "epoch": 1.28, "learning_rate": 3.0663472740810903e-07, "logits/chosen": -6.043672561645508, "logits/rejected": -5.937671184539795, "logps/chosen": -181.73666381835938, "logps/rejected": -167.5074462890625, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": 4.216267108917236, "rewards/margins": 12.020923614501953, "rewards/rejected": -7.804656982421875, "step": 2297 }, { "epoch": 1.28, "learning_rate": 3.0622009711230035e-07, "logits/chosen": -6.010104179382324, "logits/rejected": -6.016018390655518, "logps/chosen": -218.7241973876953, "logps/rejected": -188.2919921875, "loss": 0.0605, "rewards/accuracies": 1.0, "rewards/chosen": 4.466512203216553, "rewards/margins": 13.445743560791016, "rewards/rejected": -8.979230880737305, "step": 2298 }, { "epoch": 1.28, "learning_rate": 3.0580562356783777e-07, "logits/chosen": -6.032464504241943, "logits/rejected": -6.012444496154785, "logps/chosen": -280.2689208984375, "logps/rejected": -233.86441040039062, "loss": 0.0641, "rewards/accuracies": 1.0, "rewards/chosen": 4.708468437194824, "rewards/margins": 12.408349990844727, "rewards/rejected": -7.699882507324219, "step": 2299 }, { "epoch": 1.28, "learning_rate": 3.0539130710999473e-07, "logits/chosen": -6.087648391723633, "logits/rejected": -5.992462635040283, "logps/chosen": -228.11695861816406, "logps/rejected": -195.48193359375, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": 3.4837450981140137, "rewards/margins": 10.329363822937012, "rewards/rejected": -6.84561824798584, "step": 2300 }, { "epoch": 1.28, "learning_rate": 3.049771480739177e-07, "logits/chosen": -5.969643592834473, "logits/rejected": -5.936608791351318, "logps/chosen": -278.98687744140625, "logps/rejected": -264.68841552734375, "loss": 0.0693, "rewards/accuracies": 0.875, "rewards/chosen": 2.468466281890869, "rewards/margins": 8.894600868225098, "rewards/rejected": -6.426135063171387, "step": 2301 }, { "epoch": 1.28, "learning_rate": 3.045631467946262e-07, "logits/chosen": -6.004966735839844, "logits/rejected": -5.997018814086914, "logps/chosen": -261.2623596191406, "logps/rejected": -164.3196563720703, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": 4.50650691986084, "rewards/margins": 10.078064918518066, "rewards/rejected": -5.571557998657227, "step": 2302 }, { "epoch": 1.28, "learning_rate": 3.041493036070115e-07, "logits/chosen": -6.0701398849487305, "logits/rejected": -6.00887393951416, "logps/chosen": -244.59718322753906, "logps/rejected": -163.26004028320312, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": 3.745520830154419, "rewards/margins": 9.761496543884277, "rewards/rejected": -6.015975475311279, "step": 2303 }, { "epoch": 1.28, "learning_rate": 3.0373561884583744e-07, "logits/chosen": -6.005340576171875, "logits/rejected": -5.969516754150391, "logps/chosen": -284.5514221191406, "logps/rejected": -187.76046752929688, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": 4.376543998718262, "rewards/margins": 10.558552742004395, "rewards/rejected": -6.182007789611816, "step": 2304 }, { "epoch": 1.28, "learning_rate": 3.0332209284573954e-07, "logits/chosen": -5.9479522705078125, "logits/rejected": -6.074127197265625, "logps/chosen": -178.17694091796875, "logps/rejected": -260.267333984375, "loss": 0.0595, "rewards/accuracies": 1.0, "rewards/chosen": 1.6253654956817627, "rewards/margins": 10.014781951904297, "rewards/rejected": -8.389416694641113, "step": 2305 }, { "epoch": 1.28, "learning_rate": 3.0290872594122485e-07, "logits/chosen": -6.062763690948486, "logits/rejected": -5.917664051055908, "logps/chosen": -203.7792205810547, "logps/rejected": -160.31410217285156, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": 2.3722634315490723, "rewards/margins": 11.929037094116211, "rewards/rejected": -9.556774139404297, "step": 2306 }, { "epoch": 1.28, "learning_rate": 3.02495518466672e-07, "logits/chosen": -5.926652431488037, "logits/rejected": -5.997398376464844, "logps/chosen": -225.86297607421875, "logps/rejected": -236.68267822265625, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": 2.555713176727295, "rewards/margins": 11.701678276062012, "rewards/rejected": -9.145964622497559, "step": 2307 }, { "epoch": 1.28, "learning_rate": 3.020824707563301e-07, "logits/chosen": -5.969752788543701, "logits/rejected": -6.004875183105469, "logps/chosen": -129.631103515625, "logps/rejected": -172.79623413085938, "loss": 0.0529, "rewards/accuracies": 1.0, "rewards/chosen": 2.338841438293457, "rewards/margins": 10.08521556854248, "rewards/rejected": -7.746373653411865, "step": 2308 }, { "epoch": 1.28, "learning_rate": 3.0166958314431985e-07, "logits/chosen": -5.919133186340332, "logits/rejected": -5.969163417816162, "logps/chosen": -222.9934539794922, "logps/rejected": -180.82162475585938, "loss": 0.0301, "rewards/accuracies": 1.0, "rewards/chosen": 4.585212707519531, "rewards/margins": 10.096101760864258, "rewards/rejected": -5.510889053344727, "step": 2309 }, { "epoch": 1.28, "learning_rate": 3.0125685596463136e-07, "logits/chosen": -5.9511003494262695, "logits/rejected": -5.985077857971191, "logps/chosen": -253.28933715820312, "logps/rejected": -266.07958984375, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": 3.755829095840454, "rewards/margins": 11.699254989624023, "rewards/rejected": -7.94342565536499, "step": 2310 }, { "epoch": 1.28, "learning_rate": 3.0084428955112626e-07, "logits/chosen": -6.064139366149902, "logits/rejected": -6.01872444152832, "logps/chosen": -292.3132019042969, "logps/rejected": -321.68017578125, "loss": 0.0372, "rewards/accuracies": 0.9375, "rewards/chosen": 1.0709813833236694, "rewards/margins": 11.968997955322266, "rewards/rejected": -10.898017883300781, "step": 2311 }, { "epoch": 1.28, "learning_rate": 3.0043188423753493e-07, "logits/chosen": -6.009072303771973, "logits/rejected": -6.041866302490234, "logps/chosen": -135.28582763671875, "logps/rejected": -229.40403747558594, "loss": 0.0175, "rewards/accuracies": 0.9375, "rewards/chosen": 2.527743101119995, "rewards/margins": 9.767891883850098, "rewards/rejected": -7.240148544311523, "step": 2312 }, { "epoch": 1.28, "learning_rate": 3.000196403574581e-07, "logits/chosen": -6.017492294311523, "logits/rejected": -6.07942008972168, "logps/chosen": -304.9125671386719, "logps/rejected": -396.6733093261719, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": 4.357198715209961, "rewards/margins": 16.545963287353516, "rewards/rejected": -12.188764572143555, "step": 2313 }, { "epoch": 1.28, "learning_rate": 2.996075582443658e-07, "logits/chosen": -6.018593788146973, "logits/rejected": -5.942388534545898, "logps/chosen": -222.152099609375, "logps/rejected": -243.8101043701172, "loss": 0.0425, "rewards/accuracies": 0.9375, "rewards/chosen": 1.7487421035766602, "rewards/margins": 10.746671676635742, "rewards/rejected": -8.997928619384766, "step": 2314 }, { "epoch": 1.29, "learning_rate": 2.991956382315971e-07, "logits/chosen": -5.998175144195557, "logits/rejected": -5.915539741516113, "logps/chosen": -411.78509521484375, "logps/rejected": -208.0288543701172, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": 4.615116119384766, "rewards/margins": 11.470611572265625, "rewards/rejected": -6.855495452880859, "step": 2315 }, { "epoch": 1.29, "learning_rate": 2.9878388065236013e-07, "logits/chosen": -6.13807487487793, "logits/rejected": -6.064520835876465, "logps/chosen": -300.405517578125, "logps/rejected": -270.050537109375, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": 3.166159152984619, "rewards/margins": 13.159337997436523, "rewards/rejected": -9.99317741394043, "step": 2316 }, { "epoch": 1.29, "learning_rate": 2.983722858397314e-07, "logits/chosen": -6.13188362121582, "logits/rejected": -6.05776834487915, "logps/chosen": -199.8199920654297, "logps/rejected": -166.40911865234375, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": 2.7548229694366455, "rewards/margins": 10.764382362365723, "rewards/rejected": -8.009559631347656, "step": 2317 }, { "epoch": 1.29, "learning_rate": 2.9796085412665597e-07, "logits/chosen": -5.935791969299316, "logits/rejected": -5.972326755523682, "logps/chosen": -422.14642333984375, "logps/rejected": -382.56317138671875, "loss": 0.1342, "rewards/accuracies": 1.0, "rewards/chosen": 4.003332138061523, "rewards/margins": 11.448984146118164, "rewards/rejected": -7.445652484893799, "step": 2318 }, { "epoch": 1.29, "learning_rate": 2.9754958584594694e-07, "logits/chosen": -6.149610996246338, "logits/rejected": -6.072335243225098, "logps/chosen": -345.31787109375, "logps/rejected": -229.1063995361328, "loss": 0.0779, "rewards/accuracies": 1.0, "rewards/chosen": 4.659195899963379, "rewards/margins": 14.354915618896484, "rewards/rejected": -9.695718765258789, "step": 2319 }, { "epoch": 1.29, "learning_rate": 2.971384813302847e-07, "logits/chosen": -6.025716781616211, "logits/rejected": -5.961935520172119, "logps/chosen": -231.55010986328125, "logps/rejected": -237.22061157226562, "loss": 0.0344, "rewards/accuracies": 1.0, "rewards/chosen": 4.479290008544922, "rewards/margins": 11.828725814819336, "rewards/rejected": -7.349435329437256, "step": 2320 }, { "epoch": 1.29, "learning_rate": 2.96727540912218e-07, "logits/chosen": -6.011725902557373, "logits/rejected": -6.074343681335449, "logps/chosen": -308.23419189453125, "logps/rejected": -372.6004943847656, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": 8.179576873779297, "rewards/margins": 17.372772216796875, "rewards/rejected": -9.193194389343262, "step": 2321 }, { "epoch": 1.29, "learning_rate": 2.9631676492416224e-07, "logits/chosen": -5.931802272796631, "logits/rejected": -5.99833345413208, "logps/chosen": -235.98648071289062, "logps/rejected": -264.38580322265625, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": 3.3274457454681396, "rewards/margins": 13.885438919067383, "rewards/rejected": -10.557992935180664, "step": 2322 }, { "epoch": 1.29, "learning_rate": 2.9590615369840024e-07, "logits/chosen": -6.053328990936279, "logits/rejected": -6.019549369812012, "logps/chosen": -254.97042846679688, "logps/rejected": -162.3144073486328, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": 5.2141032218933105, "rewards/margins": 13.108772277832031, "rewards/rejected": -7.894667625427246, "step": 2323 }, { "epoch": 1.29, "learning_rate": 2.9549570756708105e-07, "logits/chosen": -6.072831153869629, "logits/rejected": -5.985373497009277, "logps/chosen": -171.1398468017578, "logps/rejected": -114.66203308105469, "loss": 0.0531, "rewards/accuracies": 1.0, "rewards/chosen": 3.702291250228882, "rewards/margins": 10.382268905639648, "rewards/rejected": -6.6799774169921875, "step": 2324 }, { "epoch": 1.29, "learning_rate": 2.950854268622207e-07, "logits/chosen": -6.091464042663574, "logits/rejected": -5.983369827270508, "logps/chosen": -220.01220703125, "logps/rejected": -180.73193359375, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": 3.4062440395355225, "rewards/margins": 10.58775806427002, "rewards/rejected": -7.181514263153076, "step": 2325 }, { "epoch": 1.29, "learning_rate": 2.9467531191570093e-07, "logits/chosen": -5.984318256378174, "logits/rejected": -6.075167655944824, "logps/chosen": -245.2593536376953, "logps/rejected": -250.75790405273438, "loss": 0.0518, "rewards/accuracies": 1.0, "rewards/chosen": 2.580632448196411, "rewards/margins": 11.083465576171875, "rewards/rejected": -8.502832412719727, "step": 2326 }, { "epoch": 1.29, "learning_rate": 2.942653630592696e-07, "logits/chosen": -6.009891510009766, "logits/rejected": -6.016462802886963, "logps/chosen": -216.84182739257812, "logps/rejected": -213.84524536132812, "loss": 0.07, "rewards/accuracies": 1.0, "rewards/chosen": 3.4370574951171875, "rewards/margins": 11.647055625915527, "rewards/rejected": -8.20999813079834, "step": 2327 }, { "epoch": 1.29, "learning_rate": 2.938555806245406e-07, "logits/chosen": -6.019664764404297, "logits/rejected": -6.0652899742126465, "logps/chosen": -255.6765899658203, "logps/rejected": -156.81781005859375, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": 3.082598924636841, "rewards/margins": 10.728434562683105, "rewards/rejected": -7.645835876464844, "step": 2328 }, { "epoch": 1.29, "learning_rate": 2.934459649429922e-07, "logits/chosen": -6.008298873901367, "logits/rejected": -5.953972816467285, "logps/chosen": -231.63926696777344, "logps/rejected": -178.2015838623047, "loss": 0.2491, "rewards/accuracies": 1.0, "rewards/chosen": 3.829685688018799, "rewards/margins": 11.152828216552734, "rewards/rejected": -7.323143005371094, "step": 2329 }, { "epoch": 1.29, "learning_rate": 2.9303651634596907e-07, "logits/chosen": -6.007289409637451, "logits/rejected": -5.999009609222412, "logps/chosen": -278.91204833984375, "logps/rejected": -221.58387756347656, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": 4.285077095031738, "rewards/margins": 13.084216117858887, "rewards/rejected": -8.799139022827148, "step": 2330 }, { "epoch": 1.29, "learning_rate": 2.926272351646794e-07, "logits/chosen": -6.011284351348877, "logits/rejected": -6.039461135864258, "logps/chosen": -341.52423095703125, "logps/rejected": -311.3385314941406, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": 5.784041404724121, "rewards/margins": 12.87531852722168, "rewards/rejected": -7.091278076171875, "step": 2331 }, { "epoch": 1.29, "learning_rate": 2.922181217301972e-07, "logits/chosen": -6.068609237670898, "logits/rejected": -6.0743818283081055, "logps/chosen": -335.626708984375, "logps/rejected": -169.19882202148438, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": 6.186347961425781, "rewards/margins": 11.16938304901123, "rewards/rejected": -4.983035087585449, "step": 2332 }, { "epoch": 1.3, "learning_rate": 2.9180917637345994e-07, "logits/chosen": -5.931430816650391, "logits/rejected": -5.962825775146484, "logps/chosen": -303.2214050292969, "logps/rejected": -493.2407531738281, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": 7.9134063720703125, "rewards/margins": 15.459136962890625, "rewards/rejected": -7.5457305908203125, "step": 2333 }, { "epoch": 1.3, "learning_rate": 2.914003994252693e-07, "logits/chosen": -6.010650157928467, "logits/rejected": -6.0170369148254395, "logps/chosen": -180.8641357421875, "logps/rejected": -158.33843994140625, "loss": 0.0236, "rewards/accuracies": 0.9375, "rewards/chosen": 2.451838970184326, "rewards/margins": 9.094306945800781, "rewards/rejected": -6.642467498779297, "step": 2334 }, { "epoch": 1.3, "learning_rate": 2.9099179121629116e-07, "logits/chosen": -5.968501091003418, "logits/rejected": -5.997028350830078, "logps/chosen": -231.48184204101562, "logps/rejected": -253.54075622558594, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": 4.388858318328857, "rewards/margins": 12.080901145935059, "rewards/rejected": -7.692043304443359, "step": 2335 }, { "epoch": 1.3, "learning_rate": 2.9058335207705404e-07, "logits/chosen": -6.084155559539795, "logits/rejected": -6.055165767669678, "logps/chosen": -230.0330810546875, "logps/rejected": -277.0082702636719, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": 3.0681490898132324, "rewards/margins": 13.190150260925293, "rewards/rejected": -10.122000694274902, "step": 2336 }, { "epoch": 1.3, "learning_rate": 2.9017508233795055e-07, "logits/chosen": -5.949781894683838, "logits/rejected": -6.054832458496094, "logps/chosen": -275.58209228515625, "logps/rejected": -203.36117553710938, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": 6.905038833618164, "rewards/margins": 13.523566246032715, "rewards/rejected": -6.618527412414551, "step": 2337 }, { "epoch": 1.3, "learning_rate": 2.8976698232923593e-07, "logits/chosen": -6.01677942276001, "logits/rejected": -5.9569268226623535, "logps/chosen": -677.7967529296875, "logps/rejected": -372.66363525390625, "loss": 0.0834, "rewards/accuracies": 0.9375, "rewards/chosen": 5.846036911010742, "rewards/margins": 14.266268730163574, "rewards/rejected": -8.420230865478516, "step": 2338 }, { "epoch": 1.3, "learning_rate": 2.893590523810283e-07, "logits/chosen": -6.029797077178955, "logits/rejected": -5.892638206481934, "logps/chosen": -379.6832275390625, "logps/rejected": -376.85833740234375, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": 1.4560391902923584, "rewards/margins": 8.791444778442383, "rewards/rejected": -7.335405349731445, "step": 2339 }, { "epoch": 1.3, "learning_rate": 2.8895129282330774e-07, "logits/chosen": -5.933444023132324, "logits/rejected": -5.927021503448486, "logps/chosen": -280.10137939453125, "logps/rejected": -143.65505981445312, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": 4.144457817077637, "rewards/margins": 9.777953147888184, "rewards/rejected": -5.6334943771362305, "step": 2340 }, { "epoch": 1.3, "learning_rate": 2.885437039859169e-07, "logits/chosen": -5.977455139160156, "logits/rejected": -6.021002769470215, "logps/chosen": -239.35928344726562, "logps/rejected": -268.3104248046875, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 5.112725257873535, "rewards/margins": 14.319546699523926, "rewards/rejected": -9.20682144165039, "step": 2341 }, { "epoch": 1.3, "learning_rate": 2.881362861985606e-07, "logits/chosen": -5.947387218475342, "logits/rejected": -5.870352745056152, "logps/chosen": -402.6593322753906, "logps/rejected": -187.17807006835938, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 5.858060359954834, "rewards/margins": 12.494209289550781, "rewards/rejected": -6.636147975921631, "step": 2342 }, { "epoch": 1.3, "learning_rate": 2.8772903979080426e-07, "logits/chosen": -6.095277786254883, "logits/rejected": -5.977279186248779, "logps/chosen": -215.98843383789062, "logps/rejected": -169.47088623046875, "loss": 0.0831, "rewards/accuracies": 1.0, "rewards/chosen": 3.1554861068725586, "rewards/margins": 10.628035545349121, "rewards/rejected": -7.4725494384765625, "step": 2343 }, { "epoch": 1.3, "learning_rate": 2.8732196509207596e-07, "logits/chosen": -6.031471252441406, "logits/rejected": -5.939507484436035, "logps/chosen": -214.5731201171875, "logps/rejected": -186.9007110595703, "loss": 0.0188, "rewards/accuracies": 0.9375, "rewards/chosen": 2.66213059425354, "rewards/margins": 10.063251495361328, "rewards/rejected": -7.401120185852051, "step": 2344 }, { "epoch": 1.3, "learning_rate": 2.869150624316636e-07, "logits/chosen": -6.0835795402526855, "logits/rejected": -6.013944625854492, "logps/chosen": -329.9295654296875, "logps/rejected": -319.4842529296875, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 4.395545482635498, "rewards/margins": 12.006978034973145, "rewards/rejected": -7.611433029174805, "step": 2345 }, { "epoch": 1.3, "learning_rate": 2.865083321387175e-07, "logits/chosen": -6.058014869689941, "logits/rejected": -5.997493743896484, "logps/chosen": -276.68450927734375, "logps/rejected": -252.9375457763672, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": 5.7804412841796875, "rewards/margins": 12.77973747253418, "rewards/rejected": -6.999296188354492, "step": 2346 }, { "epoch": 1.3, "learning_rate": 2.861017745422472e-07, "logits/chosen": -5.955658435821533, "logits/rejected": -5.910489559173584, "logps/chosen": -198.16452026367188, "logps/rejected": -114.10167694091797, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/chosen": 3.539030075073242, "rewards/margins": 9.489142417907715, "rewards/rejected": -5.950112819671631, "step": 2347 }, { "epoch": 1.3, "learning_rate": 2.8569538997112256e-07, "logits/chosen": -5.958967685699463, "logits/rejected": -5.973474979400635, "logps/chosen": -272.047119140625, "logps/rejected": -230.61468505859375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 6.466505527496338, "rewards/margins": 10.437443733215332, "rewards/rejected": -3.970938205718994, "step": 2348 }, { "epoch": 1.3, "learning_rate": 2.8528917875407433e-07, "logits/chosen": -6.039499759674072, "logits/rejected": -5.972617149353027, "logps/chosen": -429.0350341796875, "logps/rejected": -339.8009033203125, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": 3.38592791557312, "rewards/margins": 11.563338279724121, "rewards/rejected": -8.177410125732422, "step": 2349 }, { "epoch": 1.3, "learning_rate": 2.848831412196925e-07, "logits/chosen": -6.0390305519104, "logits/rejected": -6.088890552520752, "logps/chosen": -390.7691650390625, "logps/rejected": -272.1302795410156, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": 1.9026519060134888, "rewards/margins": 9.862046241760254, "rewards/rejected": -7.959394931793213, "step": 2350 }, { "epoch": 1.31, "learning_rate": 2.8447727769642693e-07, "logits/chosen": -6.049563884735107, "logits/rejected": -5.887932777404785, "logps/chosen": -296.1823425292969, "logps/rejected": -215.70846557617188, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": 6.167177200317383, "rewards/margins": 12.225232124328613, "rewards/rejected": -6.058055877685547, "step": 2351 }, { "epoch": 1.31, "learning_rate": 2.840715885125859e-07, "logits/chosen": -6.092733383178711, "logits/rejected": -6.000514030456543, "logps/chosen": -321.7739562988281, "logps/rejected": -217.94775390625, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": 5.0976715087890625, "rewards/margins": 12.71888256072998, "rewards/rejected": -7.621211528778076, "step": 2352 }, { "epoch": 1.31, "learning_rate": 2.836660739963377e-07, "logits/chosen": -5.9531965255737305, "logits/rejected": -5.988106727600098, "logps/chosen": -306.47113037109375, "logps/rejected": -125.93856811523438, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": 4.0194902420043945, "rewards/margins": 9.82999038696289, "rewards/rejected": -5.810500144958496, "step": 2353 }, { "epoch": 1.31, "learning_rate": 2.8326073447570876e-07, "logits/chosen": -6.0261430740356445, "logits/rejected": -6.002286911010742, "logps/chosen": -231.21633911132812, "logps/rejected": -211.9345245361328, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": 3.525139808654785, "rewards/margins": 10.39821720123291, "rewards/rejected": -6.873077869415283, "step": 2354 }, { "epoch": 1.31, "learning_rate": 2.8285557027858374e-07, "logits/chosen": -5.980740547180176, "logits/rejected": -6.006600856781006, "logps/chosen": -266.7535095214844, "logps/rejected": -248.1187744140625, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": 2.692767858505249, "rewards/margins": 9.907032012939453, "rewards/rejected": -7.214264392852783, "step": 2355 }, { "epoch": 1.31, "learning_rate": 2.824505817327062e-07, "logits/chosen": -6.017612457275391, "logits/rejected": -5.997525215148926, "logps/chosen": -257.9459533691406, "logps/rejected": -199.41769409179688, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": 4.276620388031006, "rewards/margins": 12.35460376739502, "rewards/rejected": -8.077982902526855, "step": 2356 }, { "epoch": 1.31, "learning_rate": 2.820457691656768e-07, "logits/chosen": -5.850709915161133, "logits/rejected": -5.995916843414307, "logps/chosen": -191.40939331054688, "logps/rejected": -219.17959594726562, "loss": 0.0241, "rewards/accuracies": 0.9375, "rewards/chosen": 2.6198229789733887, "rewards/margins": 10.694449424743652, "rewards/rejected": -8.074626922607422, "step": 2357 }, { "epoch": 1.31, "learning_rate": 2.816411329049544e-07, "logits/chosen": -5.913640022277832, "logits/rejected": -5.817754745483398, "logps/chosen": -403.1468505859375, "logps/rejected": -366.2749938964844, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": 2.3788270950317383, "rewards/margins": 9.445260047912598, "rewards/rejected": -7.066432952880859, "step": 2358 }, { "epoch": 1.31, "learning_rate": 2.812366732778553e-07, "logits/chosen": -6.0526628494262695, "logits/rejected": -6.134152889251709, "logps/chosen": -261.43133544921875, "logps/rejected": -257.9267578125, "loss": 0.1211, "rewards/accuracies": 1.0, "rewards/chosen": 3.0463757514953613, "rewards/margins": 12.678810119628906, "rewards/rejected": -9.632434844970703, "step": 2359 }, { "epoch": 1.31, "learning_rate": 2.808323906115523e-07, "logits/chosen": -6.040823459625244, "logits/rejected": -6.118876934051514, "logps/chosen": -231.01304626464844, "logps/rejected": -248.35089111328125, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": 4.748124122619629, "rewards/margins": 13.882678985595703, "rewards/rejected": -9.13455581665039, "step": 2360 }, { "epoch": 1.31, "learning_rate": 2.804282852330756e-07, "logits/chosen": -6.100799083709717, "logits/rejected": -5.931714057922363, "logps/chosen": -217.3105010986328, "logps/rejected": -83.78839111328125, "loss": 0.0634, "rewards/accuracies": 0.9375, "rewards/chosen": 3.185687780380249, "rewards/margins": 9.868122100830078, "rewards/rejected": -6.68243408203125, "step": 2361 }, { "epoch": 1.31, "learning_rate": 2.800243574693119e-07, "logits/chosen": -5.946054935455322, "logits/rejected": -6.068731307983398, "logps/chosen": -229.87103271484375, "logps/rejected": -256.24365234375, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": 3.560128927230835, "rewards/margins": 13.452346801757812, "rewards/rejected": -9.892217636108398, "step": 2362 }, { "epoch": 1.31, "learning_rate": 2.7962060764700436e-07, "logits/chosen": -5.989453315734863, "logits/rejected": -5.982301712036133, "logps/chosen": -314.43988037109375, "logps/rejected": -279.4677734375, "loss": 0.0271, "rewards/accuracies": 0.9375, "rewards/chosen": 5.447225570678711, "rewards/margins": 11.40855598449707, "rewards/rejected": -5.961331367492676, "step": 2363 }, { "epoch": 1.31, "learning_rate": 2.7921703609275167e-07, "logits/chosen": -6.017815589904785, "logits/rejected": -5.920984268188477, "logps/chosen": -309.89129638671875, "logps/rejected": -188.54177856445312, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": 3.6083829402923584, "rewards/margins": 12.398273468017578, "rewards/rejected": -8.78989028930664, "step": 2364 }, { "epoch": 1.31, "learning_rate": 2.788136431330089e-07, "logits/chosen": -5.927883625030518, "logits/rejected": -6.083087921142578, "logps/chosen": -282.1346130371094, "logps/rejected": -565.2540893554688, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": 3.500211238861084, "rewards/margins": 15.64726448059082, "rewards/rejected": -12.147053718566895, "step": 2365 }, { "epoch": 1.31, "learning_rate": 2.784104290940862e-07, "logits/chosen": -6.096134662628174, "logits/rejected": -6.029979705810547, "logps/chosen": -241.8394012451172, "logps/rejected": -200.13067626953125, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": 4.031450271606445, "rewards/margins": 12.648773193359375, "rewards/rejected": -8.61732292175293, "step": 2366 }, { "epoch": 1.31, "learning_rate": 2.7800739430214926e-07, "logits/chosen": -5.979488849639893, "logits/rejected": -6.0282087326049805, "logps/chosen": -234.03359985351562, "logps/rejected": -166.5821990966797, "loss": 0.0603, "rewards/accuracies": 1.0, "rewards/chosen": 5.269256114959717, "rewards/margins": 12.190484046936035, "rewards/rejected": -6.921228408813477, "step": 2367 }, { "epoch": 1.31, "learning_rate": 2.776045390832189e-07, "logits/chosen": -6.141307353973389, "logits/rejected": -6.086292266845703, "logps/chosen": -207.9996337890625, "logps/rejected": -292.0419616699219, "loss": 0.0486, "rewards/accuracies": 1.0, "rewards/chosen": 2.403841495513916, "rewards/margins": 15.116377830505371, "rewards/rejected": -12.712535858154297, "step": 2368 }, { "epoch": 1.32, "learning_rate": 2.7720186376317023e-07, "logits/chosen": -5.960885047912598, "logits/rejected": -5.983213424682617, "logps/chosen": -315.60040283203125, "logps/rejected": -258.61322021484375, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": 5.9503583908081055, "rewards/margins": 13.308290481567383, "rewards/rejected": -7.357931613922119, "step": 2369 }, { "epoch": 1.32, "learning_rate": 2.7679936866773315e-07, "logits/chosen": -6.009253978729248, "logits/rejected": -6.015429973602295, "logps/chosen": -227.6686553955078, "logps/rejected": -199.15589904785156, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": 4.245411396026611, "rewards/margins": 12.56773567199707, "rewards/rejected": -8.322324752807617, "step": 2370 }, { "epoch": 1.32, "learning_rate": 2.7639705412249184e-07, "logits/chosen": -5.9365668296813965, "logits/rejected": -5.988585472106934, "logps/chosen": -383.49627685546875, "logps/rejected": -475.33233642578125, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": 5.254347801208496, "rewards/margins": 16.537364959716797, "rewards/rejected": -11.283016204833984, "step": 2371 }, { "epoch": 1.32, "learning_rate": 2.7599492045288436e-07, "logits/chosen": -6.081114292144775, "logits/rejected": -5.990527153015137, "logps/chosen": -241.4212646484375, "logps/rejected": -170.52574157714844, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": 5.83199405670166, "rewards/margins": 13.559199333190918, "rewards/rejected": -7.727204322814941, "step": 2372 }, { "epoch": 1.32, "learning_rate": 2.755929679842023e-07, "logits/chosen": -6.058785438537598, "logits/rejected": -5.961758136749268, "logps/chosen": -273.6675109863281, "logps/rejected": -168.08729553222656, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": 5.110248565673828, "rewards/margins": 10.077665328979492, "rewards/rejected": -4.967417240142822, "step": 2373 }, { "epoch": 1.32, "learning_rate": 2.751911970415905e-07, "logits/chosen": -6.032627582550049, "logits/rejected": -6.054938793182373, "logps/chosen": -139.98147583007812, "logps/rejected": -238.68661499023438, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": 1.0395008325576782, "rewards/margins": 9.98556900024414, "rewards/rejected": -8.946067810058594, "step": 2374 }, { "epoch": 1.32, "learning_rate": 2.7478960795004787e-07, "logits/chosen": -6.052794933319092, "logits/rejected": -5.9964399337768555, "logps/chosen": -237.1635284423828, "logps/rejected": -150.32192993164062, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": 2.9550976753234863, "rewards/margins": 8.273530960083008, "rewards/rejected": -5.31843376159668, "step": 2375 }, { "epoch": 1.32, "learning_rate": 2.7438820103442506e-07, "logits/chosen": -6.016436576843262, "logits/rejected": -6.065893173217773, "logps/chosen": -224.39605712890625, "logps/rejected": -262.2621765136719, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": 4.834671974182129, "rewards/margins": 12.83120346069336, "rewards/rejected": -7.9965314865112305, "step": 2376 }, { "epoch": 1.32, "learning_rate": 2.7398697661942627e-07, "logits/chosen": -6.0240888595581055, "logits/rejected": -6.021200656890869, "logps/chosen": -228.06332397460938, "logps/rejected": -234.5519561767578, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": 4.922686576843262, "rewards/margins": 11.597405433654785, "rewards/rejected": -6.67471981048584, "step": 2377 }, { "epoch": 1.32, "learning_rate": 2.7358593502960727e-07, "logits/chosen": -6.049701690673828, "logits/rejected": -6.027373790740967, "logps/chosen": -292.71124267578125, "logps/rejected": -256.6053771972656, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": 5.0333051681518555, "rewards/margins": 11.994182586669922, "rewards/rejected": -6.960877418518066, "step": 2378 }, { "epoch": 1.32, "learning_rate": 2.731850765893766e-07, "logits/chosen": -6.075674057006836, "logits/rejected": -5.933302879333496, "logps/chosen": -344.5204162597656, "logps/rejected": -185.8482666015625, "loss": 0.0464, "rewards/accuracies": 1.0, "rewards/chosen": 5.676426887512207, "rewards/margins": 10.745828628540039, "rewards/rejected": -5.069401741027832, "step": 2379 }, { "epoch": 1.32, "learning_rate": 2.7278440162299465e-07, "logits/chosen": -5.971502304077148, "logits/rejected": -6.028569221496582, "logps/chosen": -386.1290283203125, "logps/rejected": -279.0733642578125, "loss": 0.1065, "rewards/accuracies": 0.9375, "rewards/chosen": 5.1176838874816895, "rewards/margins": 13.398433685302734, "rewards/rejected": -8.280750274658203, "step": 2380 }, { "epoch": 1.32, "learning_rate": 2.7238391045457273e-07, "logits/chosen": -6.0165181159973145, "logits/rejected": -6.001373291015625, "logps/chosen": -402.49163818359375, "logps/rejected": -342.3865661621094, "loss": 0.0568, "rewards/accuracies": 0.875, "rewards/chosen": 5.3714070320129395, "rewards/margins": 10.827147483825684, "rewards/rejected": -5.455740451812744, "step": 2381 }, { "epoch": 1.32, "learning_rate": 2.7198360340807403e-07, "logits/chosen": -6.039082050323486, "logits/rejected": -6.057740211486816, "logps/chosen": -288.3912353515625, "logps/rejected": -208.24981689453125, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": 5.189970970153809, "rewards/margins": 12.087037086486816, "rewards/rejected": -6.897067070007324, "step": 2382 }, { "epoch": 1.32, "learning_rate": 2.715834808073127e-07, "logits/chosen": -6.012584209442139, "logits/rejected": -5.981104373931885, "logps/chosen": -146.38145446777344, "logps/rejected": -186.07376098632812, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": 1.9499119520187378, "rewards/margins": 11.866888046264648, "rewards/rejected": -9.916976928710938, "step": 2383 }, { "epoch": 1.32, "learning_rate": 2.711835429759539e-07, "logits/chosen": -6.0002875328063965, "logits/rejected": -6.06125020980835, "logps/chosen": -448.02972412109375, "logps/rejected": -417.2580871582031, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 6.252192497253418, "rewards/margins": 17.249675750732422, "rewards/rejected": -10.99748420715332, "step": 2384 }, { "epoch": 1.32, "learning_rate": 2.707837902375126e-07, "logits/chosen": -6.137562274932861, "logits/rejected": -6.025083541870117, "logps/chosen": -284.737060546875, "logps/rejected": -196.96588134765625, "loss": 0.0178, "rewards/accuracies": 0.9375, "rewards/chosen": 6.041902542114258, "rewards/margins": 14.05344009399414, "rewards/rejected": -8.011537551879883, "step": 2385 }, { "epoch": 1.32, "learning_rate": 2.7038422291535503e-07, "logits/chosen": -6.068052768707275, "logits/rejected": -6.0384202003479, "logps/chosen": -238.46511840820312, "logps/rejected": -212.83969116210938, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": 3.390990972518921, "rewards/margins": 10.821929931640625, "rewards/rejected": -7.430939674377441, "step": 2386 }, { "epoch": 1.33, "learning_rate": 2.699848413326965e-07, "logits/chosen": -6.064791202545166, "logits/rejected": -5.906497478485107, "logps/chosen": -259.544189453125, "logps/rejected": -119.86782836914062, "loss": 0.0359, "rewards/accuracies": 1.0, "rewards/chosen": 4.62100887298584, "rewards/margins": 10.934648513793945, "rewards/rejected": -6.313640594482422, "step": 2387 }, { "epoch": 1.33, "learning_rate": 2.6958564581260264e-07, "logits/chosen": -6.022066593170166, "logits/rejected": -5.906090259552002, "logps/chosen": -407.87261962890625, "logps/rejected": -183.35711669921875, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": 6.187507629394531, "rewards/margins": 11.313238143920898, "rewards/rejected": -5.125730991363525, "step": 2388 }, { "epoch": 1.33, "learning_rate": 2.691866366779887e-07, "logits/chosen": -6.060244560241699, "logits/rejected": -5.926141262054443, "logps/chosen": -283.6443786621094, "logps/rejected": -135.79759216308594, "loss": 0.0291, "rewards/accuracies": 1.0, "rewards/chosen": 7.202863693237305, "rewards/margins": 13.40083122253418, "rewards/rejected": -6.197967529296875, "step": 2389 }, { "epoch": 1.33, "learning_rate": 2.6878781425161857e-07, "logits/chosen": -6.090882301330566, "logits/rejected": -6.059062480926514, "logps/chosen": -256.4751281738281, "logps/rejected": -269.9779357910156, "loss": 0.0688, "rewards/accuracies": 1.0, "rewards/chosen": 2.6025705337524414, "rewards/margins": 11.180971145629883, "rewards/rejected": -8.578401565551758, "step": 2390 }, { "epoch": 1.33, "learning_rate": 2.683891788561055e-07, "logits/chosen": -6.031289577484131, "logits/rejected": -6.047286033630371, "logps/chosen": -446.8968505859375, "logps/rejected": -359.0592041015625, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": 3.9436354637145996, "rewards/margins": 11.244184494018555, "rewards/rejected": -7.300548553466797, "step": 2391 }, { "epoch": 1.33, "learning_rate": 2.679907308139114e-07, "logits/chosen": -6.095222473144531, "logits/rejected": -6.129111289978027, "logps/chosen": -201.26019287109375, "logps/rejected": -230.9331817626953, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": 2.6581614017486572, "rewards/margins": 11.829804420471191, "rewards/rejected": -9.171642303466797, "step": 2392 }, { "epoch": 1.33, "learning_rate": 2.675924704473469e-07, "logits/chosen": -6.158815383911133, "logits/rejected": -6.049388408660889, "logps/chosen": -451.0550842285156, "logps/rejected": -154.60311889648438, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": 7.844182014465332, "rewards/margins": 13.794665336608887, "rewards/rejected": -5.950483322143555, "step": 2393 }, { "epoch": 1.33, "learning_rate": 2.671943980785703e-07, "logits/chosen": -5.846923828125, "logits/rejected": -5.996524810791016, "logps/chosen": -241.49295043945312, "logps/rejected": -440.823486328125, "loss": 0.013, "rewards/accuracies": 0.9375, "rewards/chosen": 3.851593494415283, "rewards/margins": 11.515266418457031, "rewards/rejected": -7.663673400878906, "step": 2394 }, { "epoch": 1.33, "learning_rate": 2.6679651402958764e-07, "logits/chosen": -5.929903030395508, "logits/rejected": -5.89527702331543, "logps/chosen": -146.843017578125, "logps/rejected": -156.41529846191406, "loss": 0.0689, "rewards/accuracies": 1.0, "rewards/chosen": 0.6262696981430054, "rewards/margins": 7.643410682678223, "rewards/rejected": -7.017141342163086, "step": 2395 }, { "epoch": 1.33, "learning_rate": 2.663988186222538e-07, "logits/chosen": -5.991744518280029, "logits/rejected": -6.00352144241333, "logps/chosen": -203.2237091064453, "logps/rejected": -273.54705810546875, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 3.455596446990967, "rewards/margins": 13.881343841552734, "rewards/rejected": -10.425747871398926, "step": 2396 }, { "epoch": 1.33, "learning_rate": 2.660013121782698e-07, "logits/chosen": -5.954671859741211, "logits/rejected": -6.065997123718262, "logps/chosen": -250.565185546875, "logps/rejected": -290.7129821777344, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": 4.889400959014893, "rewards/margins": 11.324056625366211, "rewards/rejected": -6.434656143188477, "step": 2397 }, { "epoch": 1.33, "learning_rate": 2.6560399501918464e-07, "logits/chosen": -5.8714799880981445, "logits/rejected": -6.012543678283691, "logps/chosen": -328.25189208984375, "logps/rejected": -418.5286865234375, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": 5.374491214752197, "rewards/margins": 15.176779747009277, "rewards/rejected": -9.802288055419922, "step": 2398 }, { "epoch": 1.33, "learning_rate": 2.6520686746639354e-07, "logits/chosen": -6.145804405212402, "logits/rejected": -5.93585205078125, "logps/chosen": -387.8832092285156, "logps/rejected": -108.65596008300781, "loss": 0.0302, "rewards/accuracies": 1.0, "rewards/chosen": 5.4339494705200195, "rewards/margins": 10.821539878845215, "rewards/rejected": -5.387590408325195, "step": 2399 }, { "epoch": 1.33, "learning_rate": 2.6480992984113877e-07, "logits/chosen": -5.936415672302246, "logits/rejected": -6.006277084350586, "logps/chosen": -264.86151123046875, "logps/rejected": -330.0614929199219, "loss": 0.0407, "rewards/accuracies": 1.0, "rewards/chosen": 3.5570383071899414, "rewards/margins": 14.162952423095703, "rewards/rejected": -10.605913162231445, "step": 2400 }, { "epoch": 1.33, "learning_rate": 2.644131824645092e-07, "logits/chosen": -6.057956695556641, "logits/rejected": -6.074459075927734, "logps/chosen": -253.52195739746094, "logps/rejected": -221.23196411132812, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": 3.066253185272217, "rewards/margins": 9.875152587890625, "rewards/rejected": -6.808899402618408, "step": 2401 }, { "epoch": 1.33, "learning_rate": 2.6401662565743885e-07, "logits/chosen": -6.140383720397949, "logits/rejected": -6.009072303771973, "logps/chosen": -334.5899963378906, "logps/rejected": -141.32098388671875, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": 5.690517425537109, "rewards/margins": 9.854970932006836, "rewards/rejected": -4.164454460144043, "step": 2402 }, { "epoch": 1.33, "learning_rate": 2.63620259740709e-07, "logits/chosen": -6.106635093688965, "logits/rejected": -6.056024551391602, "logps/chosen": -217.10391235351562, "logps/rejected": -187.3903350830078, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 3.383786201477051, "rewards/margins": 13.958768844604492, "rewards/rejected": -10.574982643127441, "step": 2403 }, { "epoch": 1.33, "learning_rate": 2.632240850349448e-07, "logits/chosen": -5.988376617431641, "logits/rejected": -6.011858940124512, "logps/chosen": -299.86907958984375, "logps/rejected": -330.75555419921875, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": 4.652440071105957, "rewards/margins": 15.253022193908691, "rewards/rejected": -10.600582122802734, "step": 2404 }, { "epoch": 1.34, "learning_rate": 2.628281018606186e-07, "logits/chosen": -6.081586837768555, "logits/rejected": -6.086433410644531, "logps/chosen": -268.6580810546875, "logps/rejected": -229.8616943359375, "loss": 0.0227, "rewards/accuracies": 0.9375, "rewards/chosen": 3.5986499786376953, "rewards/margins": 11.068370819091797, "rewards/rejected": -7.469719886779785, "step": 2405 }, { "epoch": 1.34, "learning_rate": 2.6243231053804625e-07, "logits/chosen": -6.04334020614624, "logits/rejected": -5.897017478942871, "logps/chosen": -249.39312744140625, "logps/rejected": -151.2741241455078, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": 3.460359573364258, "rewards/margins": 11.185396194458008, "rewards/rejected": -7.72503662109375, "step": 2406 }, { "epoch": 1.34, "learning_rate": 2.620367113873894e-07, "logits/chosen": -6.01474666595459, "logits/rejected": -6.020662307739258, "logps/chosen": -321.56964111328125, "logps/rejected": -251.43197631835938, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": 5.0839643478393555, "rewards/margins": 13.579487800598145, "rewards/rejected": -8.495524406433105, "step": 2407 }, { "epoch": 1.34, "learning_rate": 2.616413047286536e-07, "logits/chosen": -6.022913455963135, "logits/rejected": -6.096138000488281, "logps/chosen": -224.6216583251953, "logps/rejected": -295.73480224609375, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 3.2890148162841797, "rewards/margins": 14.982717514038086, "rewards/rejected": -11.693702697753906, "step": 2408 }, { "epoch": 1.34, "learning_rate": 2.6124609088168903e-07, "logits/chosen": -6.026452541351318, "logits/rejected": -6.0601606369018555, "logps/chosen": -253.05648803710938, "logps/rejected": -152.96792602539062, "loss": 0.0845, "rewards/accuracies": 1.0, "rewards/chosen": 4.960844993591309, "rewards/margins": 10.157280921936035, "rewards/rejected": -5.196435451507568, "step": 2409 }, { "epoch": 1.34, "learning_rate": 2.608510701661901e-07, "logits/chosen": -5.964314937591553, "logits/rejected": -5.974457740783691, "logps/chosen": -244.9525146484375, "logps/rejected": -243.29037475585938, "loss": 0.0147, "rewards/accuracies": 0.9375, "rewards/chosen": 4.530874729156494, "rewards/margins": 14.546055793762207, "rewards/rejected": -10.015181541442871, "step": 2410 }, { "epoch": 1.34, "learning_rate": 2.604562429016944e-07, "logits/chosen": -6.050661087036133, "logits/rejected": -6.022424221038818, "logps/chosen": -211.45680236816406, "logps/rejected": -144.86270141601562, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/chosen": 2.7300689220428467, "rewards/margins": 11.391962051391602, "rewards/rejected": -8.661892890930176, "step": 2411 }, { "epoch": 1.34, "learning_rate": 2.600616094075835e-07, "logits/chosen": -5.983444690704346, "logits/rejected": -5.968264579772949, "logps/chosen": -388.53814697265625, "logps/rejected": -269.56890869140625, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 4.937291622161865, "rewards/margins": 11.825467109680176, "rewards/rejected": -6.888175964355469, "step": 2412 }, { "epoch": 1.34, "learning_rate": 2.5966717000308225e-07, "logits/chosen": -5.971097946166992, "logits/rejected": -5.981206893920898, "logps/chosen": -231.96640014648438, "logps/rejected": -184.6160888671875, "loss": 0.0628, "rewards/accuracies": 1.0, "rewards/chosen": 4.903783798217773, "rewards/margins": 11.226200103759766, "rewards/rejected": -6.322417259216309, "step": 2413 }, { "epoch": 1.34, "learning_rate": 2.5927292500725805e-07, "logits/chosen": -5.874332427978516, "logits/rejected": -5.951513290405273, "logps/chosen": -213.5603790283203, "logps/rejected": -321.13031005859375, "loss": 0.0558, "rewards/accuracies": 0.9375, "rewards/chosen": 1.7734122276306152, "rewards/margins": 10.9143705368042, "rewards/rejected": -9.140958786010742, "step": 2414 }, { "epoch": 1.34, "learning_rate": 2.5887887473902184e-07, "logits/chosen": -6.0603437423706055, "logits/rejected": -5.992434024810791, "logps/chosen": -213.97885131835938, "logps/rejected": -145.14207458496094, "loss": 0.0679, "rewards/accuracies": 1.0, "rewards/chosen": 2.371520519256592, "rewards/margins": 9.036362648010254, "rewards/rejected": -6.664841651916504, "step": 2415 }, { "epoch": 1.34, "learning_rate": 2.5848501951712586e-07, "logits/chosen": -6.04728889465332, "logits/rejected": -5.976201057434082, "logps/chosen": -436.1409912109375, "logps/rejected": -296.3759765625, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": 8.713632583618164, "rewards/margins": 16.863513946533203, "rewards/rejected": -8.149882316589355, "step": 2416 }, { "epoch": 1.34, "learning_rate": 2.580913596601656e-07, "logits/chosen": -6.059576988220215, "logits/rejected": -5.972738265991211, "logps/chosen": -306.33282470703125, "logps/rejected": -206.3212432861328, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": 3.8432369232177734, "rewards/margins": 11.993799209594727, "rewards/rejected": -8.150562286376953, "step": 2417 }, { "epoch": 1.34, "learning_rate": 2.5769789548657826e-07, "logits/chosen": -6.053862571716309, "logits/rejected": -6.161704063415527, "logps/chosen": -316.53094482421875, "logps/rejected": -298.56524658203125, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": 5.241616725921631, "rewards/margins": 14.07105827331543, "rewards/rejected": -8.829442024230957, "step": 2418 }, { "epoch": 1.34, "learning_rate": 2.573046273146427e-07, "logits/chosen": -6.075712203979492, "logits/rejected": -5.967021942138672, "logps/chosen": -356.42578125, "logps/rejected": -181.45010375976562, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": 4.821987152099609, "rewards/margins": 11.35609245300293, "rewards/rejected": -6.53410530090332, "step": 2419 }, { "epoch": 1.34, "learning_rate": 2.569115554624789e-07, "logits/chosen": -5.932361602783203, "logits/rejected": -6.03513240814209, "logps/chosen": -239.52247619628906, "logps/rejected": -231.92559814453125, "loss": 0.0171, "rewards/accuracies": 0.9375, "rewards/chosen": 3.3756916522979736, "rewards/margins": 9.833518981933594, "rewards/rejected": -6.457827091217041, "step": 2420 }, { "epoch": 1.34, "learning_rate": 2.5651868024804846e-07, "logits/chosen": -5.940278053283691, "logits/rejected": -6.125178337097168, "logps/chosen": -241.6448974609375, "logps/rejected": -354.65087890625, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": 2.6288671493530273, "rewards/margins": 13.559794425964355, "rewards/rejected": -10.930928230285645, "step": 2421 }, { "epoch": 1.34, "learning_rate": 2.56126001989154e-07, "logits/chosen": -5.957253932952881, "logits/rejected": -5.955377101898193, "logps/chosen": -289.19140625, "logps/rejected": -196.8339080810547, "loss": 0.0154, "rewards/accuracies": 0.9375, "rewards/chosen": 4.286261558532715, "rewards/margins": 9.633959770202637, "rewards/rejected": -5.347698211669922, "step": 2422 }, { "epoch": 1.35, "learning_rate": 2.5573352100343825e-07, "logits/chosen": -5.90920877456665, "logits/rejected": -6.045840740203857, "logps/chosen": -197.29238891601562, "logps/rejected": -264.39093017578125, "loss": 0.0309, "rewards/accuracies": 1.0, "rewards/chosen": 3.040677547454834, "rewards/margins": 12.561137199401855, "rewards/rejected": -9.52046012878418, "step": 2423 }, { "epoch": 1.35, "learning_rate": 2.553412376083851e-07, "logits/chosen": -6.002394199371338, "logits/rejected": -5.959193229675293, "logps/chosen": -231.09866333007812, "logps/rejected": -156.44363403320312, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 2.4419870376586914, "rewards/margins": 10.361984252929688, "rewards/rejected": -7.919997215270996, "step": 2424 }, { "epoch": 1.35, "learning_rate": 2.549491521213176e-07, "logits/chosen": -6.089354515075684, "logits/rejected": -6.124168395996094, "logps/chosen": -158.59059143066406, "logps/rejected": -208.66461181640625, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": 0.37123626470565796, "rewards/margins": 9.306346893310547, "rewards/rejected": -8.935110092163086, "step": 2425 }, { "epoch": 1.35, "learning_rate": 2.545572648594001e-07, "logits/chosen": -5.99063777923584, "logits/rejected": -5.988126754760742, "logps/chosen": -395.2220458984375, "logps/rejected": -286.742919921875, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": 2.5836057662963867, "rewards/margins": 10.141809463500977, "rewards/rejected": -7.55820369720459, "step": 2426 }, { "epoch": 1.35, "learning_rate": 2.5416557613963544e-07, "logits/chosen": -5.96472692489624, "logits/rejected": -6.102055549621582, "logps/chosen": -196.50711059570312, "logps/rejected": -301.57440185546875, "loss": 0.0473, "rewards/accuracies": 1.0, "rewards/chosen": 1.362152338027954, "rewards/margins": 9.243643760681152, "rewards/rejected": -7.881491184234619, "step": 2427 }, { "epoch": 1.35, "learning_rate": 2.5377408627886614e-07, "logits/chosen": -6.040610313415527, "logits/rejected": -6.169589996337891, "logps/chosen": -220.95877075195312, "logps/rejected": -334.34686279296875, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": 2.6010444164276123, "rewards/margins": 11.879637718200684, "rewards/rejected": -9.278594017028809, "step": 2428 }, { "epoch": 1.35, "learning_rate": 2.5338279559377405e-07, "logits/chosen": -5.9786152839660645, "logits/rejected": -6.055740833282471, "logps/chosen": -228.70645141601562, "logps/rejected": -260.2898864746094, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": 3.7484841346740723, "rewards/margins": 10.809617042541504, "rewards/rejected": -7.06113338470459, "step": 2429 }, { "epoch": 1.35, "learning_rate": 2.529917044008799e-07, "logits/chosen": -6.058680534362793, "logits/rejected": -5.9138970375061035, "logps/chosen": -432.1139831542969, "logps/rejected": -188.55950927734375, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": 2.779815912246704, "rewards/margins": 10.059671401977539, "rewards/rejected": -7.279855728149414, "step": 2430 }, { "epoch": 1.35, "learning_rate": 2.526008130165432e-07, "logits/chosen": -6.059854030609131, "logits/rejected": -5.977879047393799, "logps/chosen": -261.37103271484375, "logps/rejected": -222.9483184814453, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": 5.07399845123291, "rewards/margins": 12.112825393676758, "rewards/rejected": -7.038827419281006, "step": 2431 }, { "epoch": 1.35, "learning_rate": 2.5221012175696113e-07, "logits/chosen": -6.032728672027588, "logits/rejected": -5.924400329589844, "logps/chosen": -432.6305236816406, "logps/rejected": -429.056640625, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": 4.562004089355469, "rewards/margins": 13.552178382873535, "rewards/rejected": -8.990174293518066, "step": 2432 }, { "epoch": 1.35, "learning_rate": 2.518196309381696e-07, "logits/chosen": -6.051273822784424, "logits/rejected": -5.974239349365234, "logps/chosen": -378.3869323730469, "logps/rejected": -342.3431396484375, "loss": 0.0243, "rewards/accuracies": 0.9375, "rewards/chosen": 4.508539199829102, "rewards/margins": 11.444936752319336, "rewards/rejected": -6.936398506164551, "step": 2433 }, { "epoch": 1.35, "learning_rate": 2.5142934087604257e-07, "logits/chosen": -5.938191890716553, "logits/rejected": -6.084506988525391, "logps/chosen": -174.74525451660156, "logps/rejected": -268.5876159667969, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": 3.409527063369751, "rewards/margins": 13.056368827819824, "rewards/rejected": -9.646842002868652, "step": 2434 }, { "epoch": 1.35, "learning_rate": 2.5103925188629085e-07, "logits/chosen": -6.058104038238525, "logits/rejected": -6.017327308654785, "logps/chosen": -186.97662353515625, "logps/rejected": -258.0440673828125, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": 2.916198253631592, "rewards/margins": 11.219179153442383, "rewards/rejected": -8.30298137664795, "step": 2435 }, { "epoch": 1.35, "learning_rate": 2.5064936428446337e-07, "logits/chosen": -5.894774436950684, "logits/rejected": -6.178088188171387, "logps/chosen": -302.92767333984375, "logps/rejected": -354.82037353515625, "loss": 0.0262, "rewards/accuracies": 0.9375, "rewards/chosen": 5.730827808380127, "rewards/margins": 12.900705337524414, "rewards/rejected": -7.169878005981445, "step": 2436 }, { "epoch": 1.35, "learning_rate": 2.502596783859455e-07, "logits/chosen": -6.117931365966797, "logits/rejected": -6.0167555809021, "logps/chosen": -224.35751342773438, "logps/rejected": -214.5661163330078, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": 3.4196791648864746, "rewards/margins": 12.270483016967773, "rewards/rejected": -8.85080337524414, "step": 2437 }, { "epoch": 1.35, "learning_rate": 2.4987019450595985e-07, "logits/chosen": -5.974888324737549, "logits/rejected": -5.95579719543457, "logps/chosen": -275.20501708984375, "logps/rejected": -356.4578857421875, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": 4.928802967071533, "rewards/margins": 12.387774467468262, "rewards/rejected": -7.458971977233887, "step": 2438 }, { "epoch": 1.35, "learning_rate": 2.494809129595656e-07, "logits/chosen": -6.0133233070373535, "logits/rejected": -5.9590067863464355, "logps/chosen": -316.82501220703125, "logps/rejected": -149.25494384765625, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": 5.875299453735352, "rewards/margins": 10.730157852172852, "rewards/rejected": -4.8548583984375, "step": 2439 }, { "epoch": 1.35, "learning_rate": 2.4909183406165833e-07, "logits/chosen": -5.908512592315674, "logits/rejected": -6.025338172912598, "logps/chosen": -232.37745666503906, "logps/rejected": -293.37750244140625, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": 4.742608070373535, "rewards/margins": 12.308072090148926, "rewards/rejected": -7.565464496612549, "step": 2440 }, { "epoch": 1.36, "learning_rate": 2.487029581269692e-07, "logits/chosen": -5.9892168045043945, "logits/rejected": -6.023041725158691, "logps/chosen": -223.03195190429688, "logps/rejected": -187.086181640625, "loss": 0.0429, "rewards/accuracies": 1.0, "rewards/chosen": 3.9033641815185547, "rewards/margins": 11.48148250579834, "rewards/rejected": -7.578117370605469, "step": 2441 }, { "epoch": 1.36, "learning_rate": 2.4831428547006576e-07, "logits/chosen": -5.986387252807617, "logits/rejected": -5.9531707763671875, "logps/chosen": -259.329833984375, "logps/rejected": -216.91986083984375, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": 3.745126724243164, "rewards/margins": 12.179369926452637, "rewards/rejected": -8.434243202209473, "step": 2442 }, { "epoch": 1.36, "learning_rate": 2.4792581640535105e-07, "logits/chosen": -5.981703758239746, "logits/rejected": -5.9733757972717285, "logps/chosen": -272.5546875, "logps/rejected": -195.12435913085938, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": 5.835007667541504, "rewards/margins": 12.633258819580078, "rewards/rejected": -6.798251152038574, "step": 2443 }, { "epoch": 1.36, "learning_rate": 2.4753755124706285e-07, "logits/chosen": -6.0044264793396, "logits/rejected": -6.01611328125, "logps/chosen": -200.0368194580078, "logps/rejected": -248.800537109375, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": 2.5874078273773193, "rewards/margins": 12.922310829162598, "rewards/rejected": -10.334903717041016, "step": 2444 }, { "epoch": 1.36, "learning_rate": 2.4714949030927485e-07, "logits/chosen": -6.0273518562316895, "logits/rejected": -5.966620445251465, "logps/chosen": -271.5258483886719, "logps/rejected": -143.00086975097656, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": 4.425530433654785, "rewards/margins": 9.917160034179688, "rewards/rejected": -5.491629600524902, "step": 2445 }, { "epoch": 1.36, "learning_rate": 2.467616339058945e-07, "logits/chosen": -6.052104473114014, "logits/rejected": -6.039775848388672, "logps/chosen": -269.23712158203125, "logps/rejected": -249.70211791992188, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": 6.0440826416015625, "rewards/margins": 14.140742301940918, "rewards/rejected": -8.096658706665039, "step": 2446 }, { "epoch": 1.36, "learning_rate": 2.4637398235066523e-07, "logits/chosen": -6.0015974044799805, "logits/rejected": -6.067493915557861, "logps/chosen": -222.05703735351562, "logps/rejected": -273.4723815917969, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": 3.3347954750061035, "rewards/margins": 12.722265243530273, "rewards/rejected": -9.387470245361328, "step": 2447 }, { "epoch": 1.36, "learning_rate": 2.459865359571636e-07, "logits/chosen": -5.954771041870117, "logits/rejected": -6.080338954925537, "logps/chosen": -255.701171875, "logps/rejected": -232.93736267089844, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": 5.536412239074707, "rewards/margins": 13.43863296508789, "rewards/rejected": -7.902221202850342, "step": 2448 }, { "epoch": 1.36, "learning_rate": 2.4559929503880026e-07, "logits/chosen": -6.014968395233154, "logits/rejected": -5.953942775726318, "logps/chosen": -242.89732360839844, "logps/rejected": -200.2346649169922, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": 1.499592661857605, "rewards/margins": 8.648031234741211, "rewards/rejected": -7.148438930511475, "step": 2449 }, { "epoch": 1.36, "learning_rate": 2.452122599088203e-07, "logits/chosen": -5.850827217102051, "logits/rejected": -6.041988372802734, "logps/chosen": -224.48353576660156, "logps/rejected": -268.40240478515625, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": 4.260334014892578, "rewards/margins": 12.269547462463379, "rewards/rejected": -8.0092134475708, "step": 2450 }, { "epoch": 1.36, "learning_rate": 2.4482543088030186e-07, "logits/chosen": -5.9789323806762695, "logits/rejected": -5.958249092102051, "logps/chosen": -193.144775390625, "logps/rejected": -179.80020141601562, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 2.0553479194641113, "rewards/margins": 8.087648391723633, "rewards/rejected": -6.03230094909668, "step": 2451 }, { "epoch": 1.36, "learning_rate": 2.4443880826615665e-07, "logits/chosen": -5.887761116027832, "logits/rejected": -5.84102725982666, "logps/chosen": -201.13650512695312, "logps/rejected": -144.12120056152344, "loss": 0.0535, "rewards/accuracies": 1.0, "rewards/chosen": 3.7720792293548584, "rewards/margins": 11.119940757751465, "rewards/rejected": -7.3478617668151855, "step": 2452 }, { "epoch": 1.36, "learning_rate": 2.44052392379129e-07, "logits/chosen": -6.149271011352539, "logits/rejected": -6.009925842285156, "logps/chosen": -368.4791564941406, "logps/rejected": -213.63540649414062, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": 4.196213722229004, "rewards/margins": 12.887262344360352, "rewards/rejected": -8.691048622131348, "step": 2453 }, { "epoch": 1.36, "learning_rate": 2.4366618353179644e-07, "logits/chosen": -6.029051780700684, "logits/rejected": -5.918294429779053, "logps/chosen": -339.6546325683594, "logps/rejected": -204.02662658691406, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 6.301857948303223, "rewards/margins": 12.869818687438965, "rewards/rejected": -6.567960739135742, "step": 2454 }, { "epoch": 1.36, "learning_rate": 2.4328018203656897e-07, "logits/chosen": -6.022098064422607, "logits/rejected": -6.060107231140137, "logps/chosen": -275.9141540527344, "logps/rejected": -275.51702880859375, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": 4.356867790222168, "rewards/margins": 13.38962459564209, "rewards/rejected": -9.032756805419922, "step": 2455 }, { "epoch": 1.36, "learning_rate": 2.428943882056884e-07, "logits/chosen": -6.031108379364014, "logits/rejected": -6.026770114898682, "logps/chosen": -272.1036376953125, "logps/rejected": -245.10546875, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": 5.066742897033691, "rewards/margins": 11.792686462402344, "rewards/rejected": -6.725943088531494, "step": 2456 }, { "epoch": 1.36, "learning_rate": 2.4250880235122927e-07, "logits/chosen": -5.988160610198975, "logits/rejected": -5.998551368713379, "logps/chosen": -225.08628845214844, "logps/rejected": -274.6347351074219, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": 3.1468911170959473, "rewards/margins": 10.999387741088867, "rewards/rejected": -7.852497100830078, "step": 2457 }, { "epoch": 1.36, "learning_rate": 2.421234247850972e-07, "logits/chosen": -5.963118076324463, "logits/rejected": -5.997945785522461, "logps/chosen": -282.06976318359375, "logps/rejected": -328.594482421875, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": 3.6843185424804688, "rewards/margins": 13.218514442443848, "rewards/rejected": -9.534196853637695, "step": 2458 }, { "epoch": 1.37, "learning_rate": 2.4173825581902974e-07, "logits/chosen": -6.043313503265381, "logits/rejected": -5.953929901123047, "logps/chosen": -226.52737426757812, "logps/rejected": -121.86913299560547, "loss": 0.0988, "rewards/accuracies": 1.0, "rewards/chosen": 4.0561957359313965, "rewards/margins": 10.573366165161133, "rewards/rejected": -6.517170429229736, "step": 2459 }, { "epoch": 1.37, "learning_rate": 2.4135329576459564e-07, "logits/chosen": -5.925291061401367, "logits/rejected": -5.839075088500977, "logps/chosen": -287.94488525390625, "logps/rejected": -179.55300903320312, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": 6.931821823120117, "rewards/margins": 12.482854843139648, "rewards/rejected": -5.551032066345215, "step": 2460 }, { "epoch": 1.37, "learning_rate": 2.4096854493319476e-07, "logits/chosen": -5.923050880432129, "logits/rejected": -5.9788031578063965, "logps/chosen": -300.63909912109375, "logps/rejected": -260.76116943359375, "loss": 0.0473, "rewards/accuracies": 0.9375, "rewards/chosen": 5.0392303466796875, "rewards/margins": 11.643689155578613, "rewards/rejected": -6.604458808898926, "step": 2461 }, { "epoch": 1.37, "learning_rate": 2.405840036360572e-07, "logits/chosen": -6.045295715332031, "logits/rejected": -5.906418800354004, "logps/chosen": -313.74163818359375, "logps/rejected": -209.0500946044922, "loss": 0.0681, "rewards/accuracies": 1.0, "rewards/chosen": 6.504228591918945, "rewards/margins": 11.70899486541748, "rewards/rejected": -5.204765319824219, "step": 2462 }, { "epoch": 1.37, "learning_rate": 2.4019967218424425e-07, "logits/chosen": -6.036847114562988, "logits/rejected": -6.068711757659912, "logps/chosen": -318.2848815917969, "logps/rejected": -295.8402099609375, "loss": 0.0367, "rewards/accuracies": 0.9375, "rewards/chosen": 4.890444755554199, "rewards/margins": 13.130805969238281, "rewards/rejected": -8.240361213684082, "step": 2463 }, { "epoch": 1.37, "learning_rate": 2.398155508886472e-07, "logits/chosen": -6.039822578430176, "logits/rejected": -5.992742538452148, "logps/chosen": -327.7769470214844, "logps/rejected": -293.39422607421875, "loss": 0.0227, "rewards/accuracies": 0.875, "rewards/chosen": 4.42464542388916, "rewards/margins": 10.604372024536133, "rewards/rejected": -6.179727077484131, "step": 2464 }, { "epoch": 1.37, "learning_rate": 2.3943164005998697e-07, "logits/chosen": -6.01889705657959, "logits/rejected": -6.0046067237854, "logps/chosen": -221.84861755371094, "logps/rejected": -125.96250915527344, "loss": 0.0376, "rewards/accuracies": 1.0, "rewards/chosen": 6.576045036315918, "rewards/margins": 9.748971939086914, "rewards/rejected": -3.1729276180267334, "step": 2465 }, { "epoch": 1.37, "learning_rate": 2.3904794000881487e-07, "logits/chosen": -6.088532447814941, "logits/rejected": -5.88570499420166, "logps/chosen": -415.61676025390625, "logps/rejected": -294.6895446777344, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": 4.007394790649414, "rewards/margins": 10.229902267456055, "rewards/rejected": -6.222506999969482, "step": 2466 }, { "epoch": 1.37, "learning_rate": 2.3866445104551097e-07, "logits/chosen": -6.137975215911865, "logits/rejected": -6.149299621582031, "logps/chosen": -222.3904571533203, "logps/rejected": -286.2838439941406, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": 2.705254077911377, "rewards/margins": 14.90870475769043, "rewards/rejected": -12.203450202941895, "step": 2467 }, { "epoch": 1.37, "learning_rate": 2.3828117348028526e-07, "logits/chosen": -6.043090343475342, "logits/rejected": -6.101902484893799, "logps/chosen": -273.131103515625, "logps/rejected": -264.2532653808594, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": 4.956843376159668, "rewards/margins": 13.091224670410156, "rewards/rejected": -8.134382247924805, "step": 2468 }, { "epoch": 1.37, "learning_rate": 2.3789810762317664e-07, "logits/chosen": -6.01228666305542, "logits/rejected": -5.995189189910889, "logps/chosen": -238.24240112304688, "logps/rejected": -233.89364624023438, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": 4.33717679977417, "rewards/margins": 12.362195014953613, "rewards/rejected": -8.025018692016602, "step": 2469 }, { "epoch": 1.37, "learning_rate": 2.375152537840522e-07, "logits/chosen": -6.0875725746154785, "logits/rejected": -5.991194725036621, "logps/chosen": -271.8900451660156, "logps/rejected": -137.74237060546875, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": 5.718740463256836, "rewards/margins": 12.436980247497559, "rewards/rejected": -6.718240261077881, "step": 2470 }, { "epoch": 1.37, "learning_rate": 2.371326122726081e-07, "logits/chosen": -6.121011734008789, "logits/rejected": -5.972813606262207, "logps/chosen": -304.0509338378906, "logps/rejected": -186.32186889648438, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": 4.817135334014893, "rewards/margins": 11.714208602905273, "rewards/rejected": -6.897073268890381, "step": 2471 }, { "epoch": 1.37, "learning_rate": 2.3675018339836854e-07, "logits/chosen": -6.048725128173828, "logits/rejected": -6.119344711303711, "logps/chosen": -276.6568603515625, "logps/rejected": -240.6710205078125, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": 4.201079368591309, "rewards/margins": 9.764127731323242, "rewards/rejected": -5.563048362731934, "step": 2472 }, { "epoch": 1.37, "learning_rate": 2.3636796747068594e-07, "logits/chosen": -5.977577209472656, "logits/rejected": -6.0421977043151855, "logps/chosen": -185.85919189453125, "logps/rejected": -258.15447998046875, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": 3.714998722076416, "rewards/margins": 13.27947998046875, "rewards/rejected": -9.564481735229492, "step": 2473 }, { "epoch": 1.37, "learning_rate": 2.3598596479874006e-07, "logits/chosen": -5.9072346687316895, "logits/rejected": -5.977917671203613, "logps/chosen": -292.2288513183594, "logps/rejected": -217.40164184570312, "loss": 0.1406, "rewards/accuracies": 1.0, "rewards/chosen": 3.0558595657348633, "rewards/margins": 12.80269718170166, "rewards/rejected": -9.746838569641113, "step": 2474 }, { "epoch": 1.37, "learning_rate": 2.3560417569153794e-07, "logits/chosen": -5.896275520324707, "logits/rejected": -6.004260063171387, "logps/chosen": -208.87843322753906, "logps/rejected": -225.55215454101562, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": 4.038536071777344, "rewards/margins": 11.842793464660645, "rewards/rejected": -7.804257392883301, "step": 2475 }, { "epoch": 1.37, "learning_rate": 2.3522260045791508e-07, "logits/chosen": -6.076737403869629, "logits/rejected": -6.0794501304626465, "logps/chosen": -312.8426513671875, "logps/rejected": -273.4827880859375, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": 6.375234603881836, "rewards/margins": 12.253477096557617, "rewards/rejected": -5.878242015838623, "step": 2476 }, { "epoch": 1.38, "learning_rate": 2.348412394065325e-07, "logits/chosen": -6.008640289306641, "logits/rejected": -5.984250068664551, "logps/chosen": -274.66461181640625, "logps/rejected": -233.45492553710938, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": 2.6553030014038086, "rewards/margins": 13.474204063415527, "rewards/rejected": -10.818901062011719, "step": 2477 }, { "epoch": 1.38, "learning_rate": 2.3446009284587914e-07, "logits/chosen": -5.934058666229248, "logits/rejected": -6.2918195724487305, "logps/chosen": -282.2406921386719, "logps/rejected": -337.3055419921875, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": 5.549520015716553, "rewards/margins": 13.592872619628906, "rewards/rejected": -8.043352127075195, "step": 2478 }, { "epoch": 1.38, "learning_rate": 2.3407916108426944e-07, "logits/chosen": -6.068138122558594, "logits/rejected": -6.013392448425293, "logps/chosen": -246.9834747314453, "logps/rejected": -192.03408813476562, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": 4.726210594177246, "rewards/margins": 14.346437454223633, "rewards/rejected": -9.620227813720703, "step": 2479 }, { "epoch": 1.38, "learning_rate": 2.3369844442984482e-07, "logits/chosen": -5.98366117477417, "logits/rejected": -6.088159561157227, "logps/chosen": -276.24139404296875, "logps/rejected": -274.7080078125, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/chosen": 5.318423271179199, "rewards/margins": 12.912933349609375, "rewards/rejected": -7.594510555267334, "step": 2480 }, { "epoch": 1.38, "learning_rate": 2.3331794319057257e-07, "logits/chosen": -5.907327651977539, "logits/rejected": -5.93843936920166, "logps/chosen": -172.49017333984375, "logps/rejected": -186.978271484375, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": 3.9883956909179688, "rewards/margins": 14.485700607299805, "rewards/rejected": -10.49730396270752, "step": 2481 }, { "epoch": 1.38, "learning_rate": 2.3293765767424534e-07, "logits/chosen": -6.072893142700195, "logits/rejected": -6.049513816833496, "logps/chosen": -396.7846374511719, "logps/rejected": -288.61688232421875, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": 6.404360771179199, "rewards/margins": 14.874698638916016, "rewards/rejected": -8.470337867736816, "step": 2482 }, { "epoch": 1.38, "learning_rate": 2.3255758818848188e-07, "logits/chosen": -6.007786750793457, "logits/rejected": -6.064075469970703, "logps/chosen": -257.44244384765625, "logps/rejected": -283.0362548828125, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": 3.0248966217041016, "rewards/margins": 15.09150505065918, "rewards/rejected": -12.066608428955078, "step": 2483 }, { "epoch": 1.38, "learning_rate": 2.3217773504072518e-07, "logits/chosen": -6.065225601196289, "logits/rejected": -6.023357391357422, "logps/chosen": -394.590576171875, "logps/rejected": -277.3525390625, "loss": 0.093, "rewards/accuracies": 1.0, "rewards/chosen": 5.056812763214111, "rewards/margins": 12.924506187438965, "rewards/rejected": -7.867692947387695, "step": 2484 }, { "epoch": 1.38, "learning_rate": 2.317980985382448e-07, "logits/chosen": -6.075424671173096, "logits/rejected": -5.9734954833984375, "logps/chosen": -283.0997009277344, "logps/rejected": -140.9993133544922, "loss": 0.0348, "rewards/accuracies": 1.0, "rewards/chosen": 3.9819436073303223, "rewards/margins": 10.378604888916016, "rewards/rejected": -6.396661758422852, "step": 2485 }, { "epoch": 1.38, "learning_rate": 2.3141867898813356e-07, "logits/chosen": -5.972635269165039, "logits/rejected": -6.023864269256592, "logps/chosen": -298.0623779296875, "logps/rejected": -234.86431884765625, "loss": 0.0437, "rewards/accuracies": 1.0, "rewards/chosen": 6.26788330078125, "rewards/margins": 13.7358980178833, "rewards/rejected": -7.468014717102051, "step": 2486 }, { "epoch": 1.38, "learning_rate": 2.3103947669730967e-07, "logits/chosen": -6.042602062225342, "logits/rejected": -5.998401641845703, "logps/chosen": -209.62904357910156, "logps/rejected": -158.8727569580078, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": 4.276974201202393, "rewards/margins": 9.610466003417969, "rewards/rejected": -5.33349084854126, "step": 2487 }, { "epoch": 1.38, "learning_rate": 2.306604919725151e-07, "logits/chosen": -6.084473133087158, "logits/rejected": -6.075992107391357, "logps/chosen": -193.20301818847656, "logps/rejected": -228.7218475341797, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": 3.179737091064453, "rewards/margins": 13.051814079284668, "rewards/rejected": -9.872076988220215, "step": 2488 }, { "epoch": 1.38, "learning_rate": 2.30281725120316e-07, "logits/chosen": -6.009676456451416, "logits/rejected": -6.093044757843018, "logps/chosen": -295.18994140625, "logps/rejected": -370.56317138671875, "loss": 0.0864, "rewards/accuracies": 1.0, "rewards/chosen": 4.84388542175293, "rewards/margins": 13.42427921295166, "rewards/rejected": -8.580394744873047, "step": 2489 }, { "epoch": 1.38, "learning_rate": 2.299031764471027e-07, "logits/chosen": -6.047247409820557, "logits/rejected": -5.974254608154297, "logps/chosen": -313.2701416015625, "logps/rejected": -178.238037109375, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": 3.8430051803588867, "rewards/margins": 12.897737503051758, "rewards/rejected": -9.054732322692871, "step": 2490 }, { "epoch": 1.38, "learning_rate": 2.2952484625908825e-07, "logits/chosen": -6.010015487670898, "logits/rejected": -6.168493270874023, "logps/chosen": -195.97463989257812, "logps/rejected": -315.7371520996094, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": 2.421909809112549, "rewards/margins": 12.52396297454834, "rewards/rejected": -10.102052688598633, "step": 2491 }, { "epoch": 1.38, "learning_rate": 2.291467348623095e-07, "logits/chosen": -6.140913963317871, "logits/rejected": -6.028045177459717, "logps/chosen": -264.5126647949219, "logps/rejected": -157.19552612304688, "loss": 0.0457, "rewards/accuracies": 1.0, "rewards/chosen": 3.42226505279541, "rewards/margins": 10.94581413269043, "rewards/rejected": -7.5235490798950195, "step": 2492 }, { "epoch": 1.38, "learning_rate": 2.287688425626262e-07, "logits/chosen": -6.080526351928711, "logits/rejected": -6.081820487976074, "logps/chosen": -204.30233764648438, "logps/rejected": -187.80471801757812, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": 2.640092134475708, "rewards/margins": 10.192900657653809, "rewards/rejected": -7.55280876159668, "step": 2493 }, { "epoch": 1.38, "learning_rate": 2.2839116966572102e-07, "logits/chosen": -6.07634162902832, "logits/rejected": -5.928041934967041, "logps/chosen": -294.86029052734375, "logps/rejected": -118.71656036376953, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": 8.622209548950195, "rewards/margins": 12.632574081420898, "rewards/rejected": -4.010363578796387, "step": 2494 }, { "epoch": 1.39, "learning_rate": 2.2801371647709887e-07, "logits/chosen": -6.164182662963867, "logits/rejected": -6.056835174560547, "logps/chosen": -285.0874328613281, "logps/rejected": -217.33154296875, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 4.49754524230957, "rewards/margins": 11.594566345214844, "rewards/rejected": -7.097021102905273, "step": 2495 }, { "epoch": 1.39, "learning_rate": 2.276364833020868e-07, "logits/chosen": -6.031524181365967, "logits/rejected": -6.092380523681641, "logps/chosen": -235.435546875, "logps/rejected": -189.88507080078125, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": 5.6147050857543945, "rewards/margins": 10.25464153289795, "rewards/rejected": -4.639936923980713, "step": 2496 }, { "epoch": 1.39, "learning_rate": 2.2725947044583438e-07, "logits/chosen": -6.02617883682251, "logits/rejected": -6.038632392883301, "logps/chosen": -282.2105712890625, "logps/rejected": -201.2494354248047, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": 5.140401840209961, "rewards/margins": 10.50175666809082, "rewards/rejected": -5.361354827880859, "step": 2497 }, { "epoch": 1.39, "learning_rate": 2.2688267821331276e-07, "logits/chosen": -5.91141414642334, "logits/rejected": -5.987805366516113, "logps/chosen": -259.04638671875, "logps/rejected": -276.45526123046875, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 4.701376914978027, "rewards/margins": 14.100801467895508, "rewards/rejected": -9.399423599243164, "step": 2498 }, { "epoch": 1.39, "learning_rate": 2.2650610690931466e-07, "logits/chosen": -6.034251689910889, "logits/rejected": -5.984560489654541, "logps/chosen": -268.88818359375, "logps/rejected": -265.6688537597656, "loss": 0.051, "rewards/accuracies": 0.9375, "rewards/chosen": 3.4831466674804688, "rewards/margins": 12.787434577941895, "rewards/rejected": -9.304287910461426, "step": 2499 }, { "epoch": 1.39, "learning_rate": 2.261297568384537e-07, "logits/chosen": -6.126667022705078, "logits/rejected": -6.00230073928833, "logps/chosen": -230.6427001953125, "logps/rejected": -148.4762420654297, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": 3.2845468521118164, "rewards/margins": 10.759870529174805, "rewards/rejected": -7.47532320022583, "step": 2500 }, { "epoch": 1.39, "learning_rate": 2.2575362830516503e-07, "logits/chosen": -5.993673801422119, "logits/rejected": -5.902313232421875, "logps/chosen": -229.0555419921875, "logps/rejected": -138.416015625, "loss": 0.0521, "rewards/accuracies": 0.9375, "rewards/chosen": 4.825735092163086, "rewards/margins": 10.789263725280762, "rewards/rejected": -5.963528633117676, "step": 2501 }, { "epoch": 1.39, "learning_rate": 2.2537772161370466e-07, "logits/chosen": -6.049550533294678, "logits/rejected": -6.063791751861572, "logps/chosen": -329.72357177734375, "logps/rejected": -286.80218505859375, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": 5.328552722930908, "rewards/margins": 13.74809455871582, "rewards/rejected": -8.41954231262207, "step": 2502 }, { "epoch": 1.39, "learning_rate": 2.2500203706814853e-07, "logits/chosen": -6.032975673675537, "logits/rejected": -6.04668664932251, "logps/chosen": -310.5806884765625, "logps/rejected": -275.80303955078125, "loss": 0.1247, "rewards/accuracies": 1.0, "rewards/chosen": 7.268392562866211, "rewards/margins": 14.534101486206055, "rewards/rejected": -7.265708923339844, "step": 2503 }, { "epoch": 1.39, "learning_rate": 2.2462657497239357e-07, "logits/chosen": -6.060533046722412, "logits/rejected": -6.003922462463379, "logps/chosen": -292.155517578125, "logps/rejected": -209.18243408203125, "loss": 0.0469, "rewards/accuracies": 1.0, "rewards/chosen": 6.527609825134277, "rewards/margins": 13.910055160522461, "rewards/rejected": -7.382445335388184, "step": 2504 }, { "epoch": 1.39, "learning_rate": 2.2425133563015592e-07, "logits/chosen": -5.947848320007324, "logits/rejected": -5.975152969360352, "logps/chosen": -270.4889831542969, "logps/rejected": -235.5066375732422, "loss": 0.0716, "rewards/accuracies": 1.0, "rewards/chosen": 4.196038246154785, "rewards/margins": 11.760560989379883, "rewards/rejected": -7.564523220062256, "step": 2505 }, { "epoch": 1.39, "learning_rate": 2.2387631934497286e-07, "logits/chosen": -6.002858638763428, "logits/rejected": -5.8978729248046875, "logps/chosen": -304.462158203125, "logps/rejected": -159.1411895751953, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": 6.528561592102051, "rewards/margins": 13.067903518676758, "rewards/rejected": -6.539342403411865, "step": 2506 }, { "epoch": 1.39, "learning_rate": 2.2350152642019982e-07, "logits/chosen": -6.023138999938965, "logits/rejected": -5.975364685058594, "logps/chosen": -263.005859375, "logps/rejected": -220.26971435546875, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": 1.4744362831115723, "rewards/margins": 9.527149200439453, "rewards/rejected": -8.052711486816406, "step": 2507 }, { "epoch": 1.39, "learning_rate": 2.2312695715901263e-07, "logits/chosen": -5.958362579345703, "logits/rejected": -5.982548713684082, "logps/chosen": -315.58343505859375, "logps/rejected": -164.0423583984375, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": 6.699167251586914, "rewards/margins": 11.795701026916504, "rewards/rejected": -5.09653377532959, "step": 2508 }, { "epoch": 1.39, "learning_rate": 2.2275261186440536e-07, "logits/chosen": -5.963801860809326, "logits/rejected": -6.023316383361816, "logps/chosen": -193.13980102539062, "logps/rejected": -260.9193420410156, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": 3.3683736324310303, "rewards/margins": 12.1319580078125, "rewards/rejected": -8.76358413696289, "step": 2509 }, { "epoch": 1.39, "learning_rate": 2.2237849083919142e-07, "logits/chosen": -5.902318000793457, "logits/rejected": -5.969248294830322, "logps/chosen": -159.37045288085938, "logps/rejected": -234.04119873046875, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": 2.140178680419922, "rewards/margins": 12.224502563476562, "rewards/rejected": -10.08432388305664, "step": 2510 }, { "epoch": 1.39, "learning_rate": 2.2200459438600294e-07, "logits/chosen": -6.158872127532959, "logits/rejected": -6.0377607345581055, "logps/chosen": -229.17755126953125, "logps/rejected": -166.9517822265625, "loss": 0.0078, "rewards/accuracies": 0.9375, "rewards/chosen": 4.580441474914551, "rewards/margins": 11.311458587646484, "rewards/rejected": -6.731017589569092, "step": 2511 }, { "epoch": 1.39, "learning_rate": 2.2163092280728967e-07, "logits/chosen": -6.005479335784912, "logits/rejected": -6.017887115478516, "logps/chosen": -183.07582092285156, "logps/rejected": -275.56048583984375, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": 3.247684955596924, "rewards/margins": 11.801922798156738, "rewards/rejected": -8.554237365722656, "step": 2512 }, { "epoch": 1.4, "learning_rate": 2.212574764053202e-07, "logits/chosen": -5.9635491371154785, "logits/rejected": -6.059039115905762, "logps/chosen": -305.7556457519531, "logps/rejected": -304.92340087890625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": 5.596721172332764, "rewards/margins": 14.47549057006836, "rewards/rejected": -8.878768920898438, "step": 2513 }, { "epoch": 1.4, "learning_rate": 2.2088425548218066e-07, "logits/chosen": -6.067325592041016, "logits/rejected": -6.035930633544922, "logps/chosen": -278.95849609375, "logps/rejected": -195.18496704101562, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": 5.412003517150879, "rewards/margins": 11.32504940032959, "rewards/rejected": -5.913046836853027, "step": 2514 }, { "epoch": 1.4, "learning_rate": 2.205112603397749e-07, "logits/chosen": -5.943610668182373, "logits/rejected": -5.999807357788086, "logps/chosen": -424.20208740234375, "logps/rejected": -238.18661499023438, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 4.301639556884766, "rewards/margins": 9.354875564575195, "rewards/rejected": -5.053236484527588, "step": 2515 }, { "epoch": 1.4, "learning_rate": 2.2013849127982397e-07, "logits/chosen": -5.992076396942139, "logits/rejected": -5.951979637145996, "logps/chosen": -292.69970703125, "logps/rejected": -197.45388793945312, "loss": 0.0616, "rewards/accuracies": 0.9375, "rewards/chosen": 6.577702522277832, "rewards/margins": 9.688790321350098, "rewards/rejected": -3.1110880374908447, "step": 2516 }, { "epoch": 1.4, "learning_rate": 2.1976594860386594e-07, "logits/chosen": -6.029429912567139, "logits/rejected": -6.018504619598389, "logps/chosen": -251.49179077148438, "logps/rejected": -136.2207489013672, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": 3.853968620300293, "rewards/margins": 9.830253601074219, "rewards/rejected": -5.976284503936768, "step": 2517 }, { "epoch": 1.4, "learning_rate": 2.1939363261325606e-07, "logits/chosen": -6.1385884284973145, "logits/rejected": -6.022377967834473, "logps/chosen": -260.5699462890625, "logps/rejected": -160.526123046875, "loss": 0.062, "rewards/accuracies": 0.9375, "rewards/chosen": 4.340621471405029, "rewards/margins": 12.877281188964844, "rewards/rejected": -8.536660194396973, "step": 2518 }, { "epoch": 1.4, "learning_rate": 2.1902154360916608e-07, "logits/chosen": -6.04622220993042, "logits/rejected": -5.973210334777832, "logps/chosen": -256.12353515625, "logps/rejected": -200.1398468017578, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": 2.2877283096313477, "rewards/margins": 10.860879898071289, "rewards/rejected": -8.573151588439941, "step": 2519 }, { "epoch": 1.4, "learning_rate": 2.186496818925843e-07, "logits/chosen": -6.064825534820557, "logits/rejected": -6.15644645690918, "logps/chosen": -253.15618896484375, "logps/rejected": -257.98388671875, "loss": 0.0733, "rewards/accuracies": 1.0, "rewards/chosen": 2.3904831409454346, "rewards/margins": 11.412914276123047, "rewards/rejected": -9.022430419921875, "step": 2520 }, { "epoch": 1.4, "learning_rate": 2.1827804776431476e-07, "logits/chosen": -5.972905158996582, "logits/rejected": -6.106732368469238, "logps/chosen": -227.87088012695312, "logps/rejected": -255.1871337890625, "loss": 0.0721, "rewards/accuracies": 1.0, "rewards/chosen": 1.587448000907898, "rewards/margins": 12.573034286499023, "rewards/rejected": -10.985586166381836, "step": 2521 }, { "epoch": 1.4, "learning_rate": 2.1790664152497767e-07, "logits/chosen": -6.139456748962402, "logits/rejected": -6.074533462524414, "logps/chosen": -352.8417053222656, "logps/rejected": -326.4112854003906, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": 5.571272850036621, "rewards/margins": 12.954818725585938, "rewards/rejected": -7.383545875549316, "step": 2522 }, { "epoch": 1.4, "learning_rate": 2.1753546347500918e-07, "logits/chosen": -6.033705711364746, "logits/rejected": -5.8914899826049805, "logps/chosen": -285.49493408203125, "logps/rejected": -162.73655700683594, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": 4.298704624176025, "rewards/margins": 12.152239799499512, "rewards/rejected": -7.853534698486328, "step": 2523 }, { "epoch": 1.4, "learning_rate": 2.1716451391466006e-07, "logits/chosen": -5.989190578460693, "logits/rejected": -6.035830020904541, "logps/chosen": -268.0834045410156, "logps/rejected": -179.60020446777344, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": 4.090267181396484, "rewards/margins": 10.07317066192627, "rewards/rejected": -5.982903480529785, "step": 2524 }, { "epoch": 1.4, "learning_rate": 2.167937931439972e-07, "logits/chosen": -6.024443626403809, "logits/rejected": -5.971081733703613, "logps/chosen": -390.90399169921875, "logps/rejected": -249.02401733398438, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": 6.546749114990234, "rewards/margins": 13.156644821166992, "rewards/rejected": -6.609895706176758, "step": 2525 }, { "epoch": 1.4, "learning_rate": 2.1642330146290137e-07, "logits/chosen": -5.944376468658447, "logits/rejected": -5.975122928619385, "logps/chosen": -378.5636291503906, "logps/rejected": -440.0393371582031, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": 6.237563133239746, "rewards/margins": 13.207307815551758, "rewards/rejected": -6.969744682312012, "step": 2526 }, { "epoch": 1.4, "learning_rate": 2.1605303917106939e-07, "logits/chosen": -6.055701732635498, "logits/rejected": -5.998732566833496, "logps/chosen": -273.31561279296875, "logps/rejected": -161.06192016601562, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 3.871995449066162, "rewards/margins": 10.422669410705566, "rewards/rejected": -6.5506744384765625, "step": 2527 }, { "epoch": 1.4, "learning_rate": 2.1568300656801125e-07, "logits/chosen": -6.032378673553467, "logits/rejected": -6.091825485229492, "logps/chosen": -298.57818603515625, "logps/rejected": -294.22601318359375, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": 4.836411476135254, "rewards/margins": 11.554387092590332, "rewards/rejected": -6.717975616455078, "step": 2528 }, { "epoch": 1.4, "learning_rate": 2.1531320395305157e-07, "logits/chosen": -6.024990081787109, "logits/rejected": -5.988454818725586, "logps/chosen": -365.8590087890625, "logps/rejected": -197.77528381347656, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": 3.35286545753479, "rewards/margins": 10.702868461608887, "rewards/rejected": -7.350003242492676, "step": 2529 }, { "epoch": 1.4, "learning_rate": 2.1494363162532915e-07, "logits/chosen": -5.979207992553711, "logits/rejected": -5.917230606079102, "logps/chosen": -200.51992797851562, "logps/rejected": -141.86862182617188, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": 3.669618844985962, "rewards/margins": 9.943256378173828, "rewards/rejected": -6.273637294769287, "step": 2530 }, { "epoch": 1.41, "learning_rate": 2.1457428988379634e-07, "logits/chosen": -6.066526412963867, "logits/rejected": -6.085756301879883, "logps/chosen": -239.29385375976562, "logps/rejected": -221.65045166015625, "loss": 0.0498, "rewards/accuracies": 1.0, "rewards/chosen": 2.3107752799987793, "rewards/margins": 12.17037582397461, "rewards/rejected": -9.859601020812988, "step": 2531 }, { "epoch": 1.41, "learning_rate": 2.1420517902721913e-07, "logits/chosen": -5.98575496673584, "logits/rejected": -5.998167514801025, "logps/chosen": -282.658447265625, "logps/rejected": -265.154541015625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 5.560221195220947, "rewards/margins": 13.492501258850098, "rewards/rejected": -7.932280540466309, "step": 2532 }, { "epoch": 1.41, "learning_rate": 2.138362993541763e-07, "logits/chosen": -5.981104850769043, "logits/rejected": -6.024198532104492, "logps/chosen": -421.28411865234375, "logps/rejected": -425.3899841308594, "loss": 0.0659, "rewards/accuracies": 1.0, "rewards/chosen": 5.527599334716797, "rewards/margins": 13.538768768310547, "rewards/rejected": -8.01116943359375, "step": 2533 }, { "epoch": 1.41, "learning_rate": 2.1346765116306003e-07, "logits/chosen": -5.9687395095825195, "logits/rejected": -5.859867572784424, "logps/chosen": -215.55856323242188, "logps/rejected": -122.1614990234375, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 3.4766178131103516, "rewards/margins": 9.875699996948242, "rewards/rejected": -6.399083137512207, "step": 2534 }, { "epoch": 1.41, "learning_rate": 2.1309923475207536e-07, "logits/chosen": -6.04272985458374, "logits/rejected": -5.897574424743652, "logps/chosen": -288.4026184082031, "logps/rejected": -114.6846923828125, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": 4.911491870880127, "rewards/margins": 11.810800552368164, "rewards/rejected": -6.899309158325195, "step": 2535 }, { "epoch": 1.41, "learning_rate": 2.1273105041923927e-07, "logits/chosen": -5.986194610595703, "logits/rejected": -6.074341773986816, "logps/chosen": -157.0316925048828, "logps/rejected": -284.7236022949219, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": 0.9961214065551758, "rewards/margins": 12.067994117736816, "rewards/rejected": -11.07187271118164, "step": 2536 }, { "epoch": 1.41, "learning_rate": 2.1236309846238165e-07, "logits/chosen": -6.0274481773376465, "logits/rejected": -6.051440238952637, "logps/chosen": -228.21641540527344, "logps/rejected": -252.19027709960938, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 3.5525832176208496, "rewards/margins": 12.778308868408203, "rewards/rejected": -9.225727081298828, "step": 2537 }, { "epoch": 1.41, "learning_rate": 2.1199537917914385e-07, "logits/chosen": -6.041122913360596, "logits/rejected": -6.1287150382995605, "logps/chosen": -289.472412109375, "logps/rejected": -280.61968994140625, "loss": 0.0681, "rewards/accuracies": 1.0, "rewards/chosen": 2.847874402999878, "rewards/margins": 13.93426513671875, "rewards/rejected": -11.08639144897461, "step": 2538 }, { "epoch": 1.41, "learning_rate": 2.1162789286697936e-07, "logits/chosen": -6.034151554107666, "logits/rejected": -6.138542652130127, "logps/chosen": -154.8404541015625, "logps/rejected": -197.73135375976562, "loss": 0.0423, "rewards/accuracies": 1.0, "rewards/chosen": 1.2825595140457153, "rewards/margins": 9.673210144042969, "rewards/rejected": -8.39065170288086, "step": 2539 }, { "epoch": 1.41, "learning_rate": 2.1126063982315317e-07, "logits/chosen": -6.062189102172852, "logits/rejected": -6.087285995483398, "logps/chosen": -332.26025390625, "logps/rejected": -234.37030029296875, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": 5.747993469238281, "rewards/margins": 11.934232711791992, "rewards/rejected": -6.186239719390869, "step": 2540 }, { "epoch": 1.41, "learning_rate": 2.1089362034474173e-07, "logits/chosen": -6.007208347320557, "logits/rejected": -6.048912048339844, "logps/chosen": -221.39358520507812, "logps/rejected": -316.2041015625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 4.369772911071777, "rewards/margins": 14.517537117004395, "rewards/rejected": -10.147764205932617, "step": 2541 }, { "epoch": 1.41, "learning_rate": 2.1052683472863203e-07, "logits/chosen": -6.024223804473877, "logits/rejected": -6.142364025115967, "logps/chosen": -264.248779296875, "logps/rejected": -328.3428039550781, "loss": 0.0634, "rewards/accuracies": 1.0, "rewards/chosen": 3.2397851943969727, "rewards/margins": 11.068108558654785, "rewards/rejected": -7.8283233642578125, "step": 2542 }, { "epoch": 1.41, "learning_rate": 2.1016028327152236e-07, "logits/chosen": -6.057723522186279, "logits/rejected": -6.002135276794434, "logps/chosen": -216.26744079589844, "logps/rejected": -452.2799072265625, "loss": 0.0481, "rewards/accuracies": 1.0, "rewards/chosen": 1.2579883337020874, "rewards/margins": 13.22842788696289, "rewards/rejected": -11.970439910888672, "step": 2543 }, { "epoch": 1.41, "learning_rate": 2.0979396626992167e-07, "logits/chosen": -5.912196159362793, "logits/rejected": -5.932877540588379, "logps/chosen": -242.4927520751953, "logps/rejected": -262.86517333984375, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": 2.035351514816284, "rewards/margins": 12.278369903564453, "rewards/rejected": -10.243017196655273, "step": 2544 }, { "epoch": 1.41, "learning_rate": 2.0942788402014865e-07, "logits/chosen": -5.970818519592285, "logits/rejected": -6.096130847930908, "logps/chosen": -271.9091796875, "logps/rejected": -277.197509765625, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": 6.109560966491699, "rewards/margins": 13.272045135498047, "rewards/rejected": -7.162484169006348, "step": 2545 }, { "epoch": 1.41, "learning_rate": 2.090620368183329e-07, "logits/chosen": -6.0770463943481445, "logits/rejected": -5.940942287445068, "logps/chosen": -299.45343017578125, "logps/rejected": -195.13690185546875, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": 6.151813507080078, "rewards/margins": 12.333322525024414, "rewards/rejected": -6.181509017944336, "step": 2546 }, { "epoch": 1.41, "learning_rate": 2.0869642496041284e-07, "logits/chosen": -6.049172401428223, "logits/rejected": -5.971097469329834, "logps/chosen": -245.6571807861328, "logps/rejected": -168.93231201171875, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": 4.931611061096191, "rewards/margins": 11.452988624572754, "rewards/rejected": -6.521378040313721, "step": 2547 }, { "epoch": 1.41, "learning_rate": 2.0833104874213798e-07, "logits/chosen": -6.067986488342285, "logits/rejected": -6.151178359985352, "logps/chosen": -235.38934326171875, "logps/rejected": -264.8143005371094, "loss": 0.077, "rewards/accuracies": 0.9375, "rewards/chosen": 2.7493789196014404, "rewards/margins": 11.292613983154297, "rewards/rejected": -8.543233871459961, "step": 2548 }, { "epoch": 1.42, "learning_rate": 2.0796590845906598e-07, "logits/chosen": -6.115818977355957, "logits/rejected": -6.0171356201171875, "logps/chosen": -299.21990966796875, "logps/rejected": -176.65968322753906, "loss": 0.0413, "rewards/accuracies": 1.0, "rewards/chosen": 4.864518642425537, "rewards/margins": 9.356886863708496, "rewards/rejected": -4.492368221282959, "step": 2549 }, { "epoch": 1.42, "learning_rate": 2.0760100440656387e-07, "logits/chosen": -5.976222991943359, "logits/rejected": -5.926055908203125, "logps/chosen": -174.59063720703125, "logps/rejected": -157.48721313476562, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": 1.8286442756652832, "rewards/margins": 9.520068168640137, "rewards/rejected": -7.6914238929748535, "step": 2550 }, { "epoch": 1.42, "learning_rate": 2.0723633687980795e-07, "logits/chosen": -5.898292541503906, "logits/rejected": -5.955050945281982, "logps/chosen": -369.4996337890625, "logps/rejected": -279.2488098144531, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": 4.682047367095947, "rewards/margins": 11.20654296875, "rewards/rejected": -6.5244951248168945, "step": 2551 }, { "epoch": 1.42, "learning_rate": 2.0687190617378308e-07, "logits/chosen": -5.987234115600586, "logits/rejected": -5.953174591064453, "logps/chosen": -293.5170593261719, "logps/rejected": -240.66957092285156, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 3.7765040397644043, "rewards/margins": 12.23190689086914, "rewards/rejected": -8.455402374267578, "step": 2552 }, { "epoch": 1.42, "learning_rate": 2.0650771258328258e-07, "logits/chosen": -6.057151794433594, "logits/rejected": -5.960883140563965, "logps/chosen": -235.68643188476562, "logps/rejected": -210.81808471679688, "loss": 0.024, "rewards/accuracies": 0.9375, "rewards/chosen": 3.554636001586914, "rewards/margins": 11.795801162719727, "rewards/rejected": -8.241165161132812, "step": 2553 }, { "epoch": 1.42, "learning_rate": 2.061437564029076e-07, "logits/chosen": -6.072116851806641, "logits/rejected": -5.956481456756592, "logps/chosen": -201.5703582763672, "logps/rejected": -151.43777465820312, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": 4.694564342498779, "rewards/margins": 11.23710823059082, "rewards/rejected": -6.542544364929199, "step": 2554 }, { "epoch": 1.42, "learning_rate": 2.0578003792706767e-07, "logits/chosen": -5.9836320877075195, "logits/rejected": -5.906848907470703, "logps/chosen": -206.13775634765625, "logps/rejected": -183.54562377929688, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": 4.947175025939941, "rewards/margins": 11.922760009765625, "rewards/rejected": -6.975585460662842, "step": 2555 }, { "epoch": 1.42, "learning_rate": 2.0541655744997998e-07, "logits/chosen": -6.048609256744385, "logits/rejected": -6.183287143707275, "logps/chosen": -270.46588134765625, "logps/rejected": -251.70562744140625, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": 6.916921615600586, "rewards/margins": 15.360092163085938, "rewards/rejected": -8.443170547485352, "step": 2556 }, { "epoch": 1.42, "learning_rate": 2.0505331526566893e-07, "logits/chosen": -5.975879192352295, "logits/rejected": -5.955498218536377, "logps/chosen": -210.07723999023438, "logps/rejected": -194.51168823242188, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": 3.72554087638855, "rewards/margins": 11.929264068603516, "rewards/rejected": -8.203722953796387, "step": 2557 }, { "epoch": 1.42, "learning_rate": 2.046903116679665e-07, "logits/chosen": -5.975942611694336, "logits/rejected": -5.916098117828369, "logps/chosen": -262.93231201171875, "logps/rejected": -244.61204528808594, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": 2.6955790519714355, "rewards/margins": 12.9404935836792, "rewards/rejected": -10.244915008544922, "step": 2558 }, { "epoch": 1.42, "learning_rate": 2.0432754695051136e-07, "logits/chosen": -6.089264392852783, "logits/rejected": -5.970090866088867, "logps/chosen": -237.78164672851562, "logps/rejected": -213.4195556640625, "loss": 0.0523, "rewards/accuracies": 1.0, "rewards/chosen": 3.2650747299194336, "rewards/margins": 12.360595703125, "rewards/rejected": -9.09552001953125, "step": 2559 }, { "epoch": 1.42, "learning_rate": 2.039650214067491e-07, "logits/chosen": -5.981086730957031, "logits/rejected": -6.119392395019531, "logps/chosen": -235.14633178710938, "logps/rejected": -248.9031524658203, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": 1.9608988761901855, "rewards/margins": 12.057050704956055, "rewards/rejected": -10.096151351928711, "step": 2560 }, { "epoch": 1.42, "learning_rate": 2.0360273532993195e-07, "logits/chosen": -6.0031208992004395, "logits/rejected": -6.022597789764404, "logps/chosen": -239.67005920410156, "logps/rejected": -205.66786193847656, "loss": 0.0665, "rewards/accuracies": 1.0, "rewards/chosen": 4.912619590759277, "rewards/margins": 12.010904312133789, "rewards/rejected": -7.09828519821167, "step": 2561 }, { "epoch": 1.42, "learning_rate": 2.0324068901311842e-07, "logits/chosen": -6.2022528648376465, "logits/rejected": -6.081840991973877, "logps/chosen": -256.18914794921875, "logps/rejected": -229.30682373046875, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": 3.149918556213379, "rewards/margins": 13.768390655517578, "rewards/rejected": -10.6184720993042, "step": 2562 }, { "epoch": 1.42, "learning_rate": 2.0287888274917287e-07, "logits/chosen": -5.949443817138672, "logits/rejected": -6.0406174659729, "logps/chosen": -319.9194641113281, "logps/rejected": -325.03802490234375, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": 2.8597381114959717, "rewards/margins": 13.43550968170166, "rewards/rejected": -10.57577133178711, "step": 2563 }, { "epoch": 1.42, "learning_rate": 2.0251731683076512e-07, "logits/chosen": -5.949721336364746, "logits/rejected": -6.001396179199219, "logps/chosen": -450.5175476074219, "logps/rejected": -312.615966796875, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": 6.1152472496032715, "rewards/margins": 10.223554611206055, "rewards/rejected": -4.108306884765625, "step": 2564 }, { "epoch": 1.42, "learning_rate": 2.0215599155037188e-07, "logits/chosen": -5.951605319976807, "logits/rejected": -5.966066360473633, "logps/chosen": -223.9708709716797, "logps/rejected": -219.4849853515625, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": 3.9354372024536133, "rewards/margins": 10.66722297668457, "rewards/rejected": -6.731785297393799, "step": 2565 }, { "epoch": 1.42, "learning_rate": 2.017949072002737e-07, "logits/chosen": -5.938808441162109, "logits/rejected": -6.043859481811523, "logps/chosen": -176.7351837158203, "logps/rejected": -173.6629180908203, "loss": 0.0887, "rewards/accuracies": 1.0, "rewards/chosen": 3.964599609375, "rewards/margins": 11.401601791381836, "rewards/rejected": -7.437002182006836, "step": 2566 }, { "epoch": 1.43, "learning_rate": 2.0143406407255737e-07, "logits/chosen": -5.991452217102051, "logits/rejected": -5.832886219024658, "logps/chosen": -239.81820678710938, "logps/rejected": -314.97381591796875, "loss": 0.0888, "rewards/accuracies": 0.875, "rewards/chosen": 4.582777976989746, "rewards/margins": 10.519075393676758, "rewards/rejected": -5.936297416687012, "step": 2567 }, { "epoch": 1.43, "learning_rate": 2.0107346245911361e-07, "logits/chosen": -6.012126922607422, "logits/rejected": -6.138110160827637, "logps/chosen": -398.1123962402344, "logps/rejected": -337.95037841796875, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": 4.696969985961914, "rewards/margins": 14.57954216003418, "rewards/rejected": -9.882572174072266, "step": 2568 }, { "epoch": 1.43, "learning_rate": 2.007131026516385e-07, "logits/chosen": -6.011655807495117, "logits/rejected": -6.081642150878906, "logps/chosen": -225.72491455078125, "logps/rejected": -206.1255340576172, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": 3.6353204250335693, "rewards/margins": 9.770086288452148, "rewards/rejected": -6.134764194488525, "step": 2569 }, { "epoch": 1.43, "learning_rate": 2.0035298494163238e-07, "logits/chosen": -6.08812141418457, "logits/rejected": -6.0742716789245605, "logps/chosen": -288.98553466796875, "logps/rejected": -190.59567260742188, "loss": 0.0472, "rewards/accuracies": 0.9375, "rewards/chosen": 5.152657985687256, "rewards/margins": 10.00971794128418, "rewards/rejected": -4.857059955596924, "step": 2570 }, { "epoch": 1.43, "learning_rate": 1.9999310962039934e-07, "logits/chosen": -5.8581061363220215, "logits/rejected": -5.863955497741699, "logps/chosen": -301.626220703125, "logps/rejected": -239.15408325195312, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": 6.700802803039551, "rewards/margins": 14.551299095153809, "rewards/rejected": -7.850495338439941, "step": 2571 }, { "epoch": 1.43, "learning_rate": 1.9963347697904785e-07, "logits/chosen": -5.99679708480835, "logits/rejected": -5.920683860778809, "logps/chosen": -448.4183654785156, "logps/rejected": -254.84353637695312, "loss": 0.0418, "rewards/accuracies": 1.0, "rewards/chosen": 4.137386322021484, "rewards/margins": 12.370901107788086, "rewards/rejected": -8.233513832092285, "step": 2572 }, { "epoch": 1.43, "learning_rate": 1.992740873084899e-07, "logits/chosen": -6.033275604248047, "logits/rejected": -6.056143760681152, "logps/chosen": -168.00076293945312, "logps/rejected": -212.89205932617188, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": 1.8030215501785278, "rewards/margins": 10.58691692352295, "rewards/rejected": -8.783894538879395, "step": 2573 }, { "epoch": 1.43, "learning_rate": 1.9891494089944115e-07, "logits/chosen": -6.012348175048828, "logits/rejected": -5.9931135177612305, "logps/chosen": -208.6188507080078, "logps/rejected": -170.95742797851562, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 2.4891982078552246, "rewards/margins": 11.511785507202148, "rewards/rejected": -9.022587776184082, "step": 2574 }, { "epoch": 1.43, "learning_rate": 1.9855603804241994e-07, "logits/chosen": -5.9835734367370605, "logits/rejected": -5.985574245452881, "logps/chosen": -253.18038940429688, "logps/rejected": -238.72900390625, "loss": 0.137, "rewards/accuracies": 1.0, "rewards/chosen": 5.0499420166015625, "rewards/margins": 12.771093368530273, "rewards/rejected": -7.721151351928711, "step": 2575 }, { "epoch": 1.43, "learning_rate": 1.9819737902774825e-07, "logits/chosen": -6.198188781738281, "logits/rejected": -5.998645305633545, "logps/chosen": -215.28175354003906, "logps/rejected": -108.9721450805664, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": 3.9395995140075684, "rewards/margins": 12.202099800109863, "rewards/rejected": -8.262500762939453, "step": 2576 }, { "epoch": 1.43, "learning_rate": 1.9783896414555023e-07, "logits/chosen": -6.116331100463867, "logits/rejected": -6.101071834564209, "logps/chosen": -228.96829223632812, "logps/rejected": -297.204833984375, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": 0.8968302011489868, "rewards/margins": 12.480437278747559, "rewards/rejected": -11.583608627319336, "step": 2577 }, { "epoch": 1.43, "learning_rate": 1.9748079368575293e-07, "logits/chosen": -6.012812614440918, "logits/rejected": -6.046624660491943, "logps/chosen": -225.18850708007812, "logps/rejected": -184.83538818359375, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": 4.4218525886535645, "rewards/margins": 13.218703269958496, "rewards/rejected": -8.79685115814209, "step": 2578 }, { "epoch": 1.43, "learning_rate": 1.971228679380858e-07, "logits/chosen": -6.137676239013672, "logits/rejected": -6.037575721740723, "logps/chosen": -233.1518096923828, "logps/rejected": -235.37158203125, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": 5.190288066864014, "rewards/margins": 13.978689193725586, "rewards/rejected": -8.788400650024414, "step": 2579 }, { "epoch": 1.43, "learning_rate": 1.9676518719207975e-07, "logits/chosen": -5.905722618103027, "logits/rejected": -6.002302646636963, "logps/chosen": -235.13442993164062, "logps/rejected": -220.76234436035156, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": 5.935917377471924, "rewards/margins": 12.80733585357666, "rewards/rejected": -6.8714189529418945, "step": 2580 }, { "epoch": 1.43, "learning_rate": 1.9640775173706808e-07, "logits/chosen": -6.034961223602295, "logits/rejected": -5.9774627685546875, "logps/chosen": -278.577392578125, "logps/rejected": -236.79229736328125, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": 3.7198915481567383, "rewards/margins": 11.812349319458008, "rewards/rejected": -8.092456817626953, "step": 2581 }, { "epoch": 1.43, "learning_rate": 1.9605056186218555e-07, "logits/chosen": -6.067327499389648, "logits/rejected": -6.056968688964844, "logps/chosen": -345.26385498046875, "logps/rejected": -274.5216979980469, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": 6.210459232330322, "rewards/margins": 13.940327644348145, "rewards/rejected": -7.729868412017822, "step": 2582 }, { "epoch": 1.43, "learning_rate": 1.9569361785636796e-07, "logits/chosen": -6.070265769958496, "logits/rejected": -6.036040782928467, "logps/chosen": -425.6120910644531, "logps/rejected": -200.62583923339844, "loss": 0.0505, "rewards/accuracies": 1.0, "rewards/chosen": 5.6094794273376465, "rewards/margins": 12.994691848754883, "rewards/rejected": -7.385212421417236, "step": 2583 }, { "epoch": 1.43, "learning_rate": 1.9533692000835267e-07, "logits/chosen": -5.959110260009766, "logits/rejected": -6.013546466827393, "logps/chosen": -448.4656982421875, "logps/rejected": -333.5651550292969, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 7.735292911529541, "rewards/margins": 17.301851272583008, "rewards/rejected": -9.566558837890625, "step": 2584 }, { "epoch": 1.44, "learning_rate": 1.9498046860667715e-07, "logits/chosen": -5.993696689605713, "logits/rejected": -5.91107702255249, "logps/chosen": -152.64459228515625, "logps/rejected": -131.81846618652344, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": 2.661067485809326, "rewards/margins": 10.337930679321289, "rewards/rejected": -7.676862716674805, "step": 2585 }, { "epoch": 1.44, "learning_rate": 1.946242639396809e-07, "logits/chosen": -6.106401443481445, "logits/rejected": -5.980383396148682, "logps/chosen": -248.52676391601562, "logps/rejected": -169.09510803222656, "loss": 0.0524, "rewards/accuracies": 1.0, "rewards/chosen": 4.557333946228027, "rewards/margins": 11.314901351928711, "rewards/rejected": -6.757567405700684, "step": 2586 }, { "epoch": 1.44, "learning_rate": 1.942683062955024e-07, "logits/chosen": -6.014683723449707, "logits/rejected": -5.950277328491211, "logps/chosen": -229.64300537109375, "logps/rejected": -180.08786010742188, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": 4.585780143737793, "rewards/margins": 12.473139762878418, "rewards/rejected": -7.887358665466309, "step": 2587 }, { "epoch": 1.44, "learning_rate": 1.939125959620812e-07, "logits/chosen": -6.013711452484131, "logits/rejected": -6.02601432800293, "logps/chosen": -234.772216796875, "logps/rejected": -175.83187866210938, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 4.707393646240234, "rewards/margins": 12.422830581665039, "rewards/rejected": -7.715437412261963, "step": 2588 }, { "epoch": 1.44, "learning_rate": 1.9355713322715615e-07, "logits/chosen": -5.981118202209473, "logits/rejected": -5.9729413986206055, "logps/chosen": -436.4272155761719, "logps/rejected": -333.6960754394531, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": 4.311884880065918, "rewards/margins": 11.938813209533691, "rewards/rejected": -7.626928329467773, "step": 2589 }, { "epoch": 1.44, "learning_rate": 1.9320191837826644e-07, "logits/chosen": -6.128877639770508, "logits/rejected": -6.00410795211792, "logps/chosen": -323.56707763671875, "logps/rejected": -232.51669311523438, "loss": 0.0313, "rewards/accuracies": 0.9375, "rewards/chosen": 4.555102348327637, "rewards/margins": 12.225784301757812, "rewards/rejected": -7.670682907104492, "step": 2590 }, { "epoch": 1.44, "learning_rate": 1.928469517027505e-07, "logits/chosen": -5.970818996429443, "logits/rejected": -5.98847770690918, "logps/chosen": -129.0853729248047, "logps/rejected": -181.50076293945312, "loss": 0.022, "rewards/accuracies": 0.9375, "rewards/chosen": 1.1417534351348877, "rewards/margins": 10.98608112335205, "rewards/rejected": -9.844327926635742, "step": 2591 }, { "epoch": 1.44, "learning_rate": 1.9249223348774574e-07, "logits/chosen": -6.046792507171631, "logits/rejected": -6.014123916625977, "logps/chosen": -274.45635986328125, "logps/rejected": -215.3978271484375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": 4.44957971572876, "rewards/margins": 13.482666015625, "rewards/rejected": -9.033086776733398, "step": 2592 }, { "epoch": 1.44, "learning_rate": 1.9213776402018889e-07, "logits/chosen": -6.110111236572266, "logits/rejected": -6.13524055480957, "logps/chosen": -276.3330078125, "logps/rejected": -253.45188903808594, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": 4.340912342071533, "rewards/margins": 11.7179536819458, "rewards/rejected": -7.377040863037109, "step": 2593 }, { "epoch": 1.44, "learning_rate": 1.9178354358681548e-07, "logits/chosen": -5.99770975112915, "logits/rejected": -5.959561824798584, "logps/chosen": -211.8267364501953, "logps/rejected": -135.08154296875, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/chosen": 4.579092979431152, "rewards/margins": 11.12382698059082, "rewards/rejected": -6.544734001159668, "step": 2594 }, { "epoch": 1.44, "learning_rate": 1.914295724741596e-07, "logits/chosen": -6.083967685699463, "logits/rejected": -6.007506370544434, "logps/chosen": -267.84228515625, "logps/rejected": -228.9547119140625, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": 6.406710624694824, "rewards/margins": 12.872206687927246, "rewards/rejected": -6.4654951095581055, "step": 2595 }, { "epoch": 1.44, "learning_rate": 1.9107585096855345e-07, "logits/chosen": -6.026615619659424, "logits/rejected": -5.96568489074707, "logps/chosen": -368.3607177734375, "logps/rejected": -183.24624633789062, "loss": 0.0645, "rewards/accuracies": 1.0, "rewards/chosen": 5.456113815307617, "rewards/margins": 9.767122268676758, "rewards/rejected": -4.311008453369141, "step": 2596 }, { "epoch": 1.44, "learning_rate": 1.9072237935612722e-07, "logits/chosen": -6.024547576904297, "logits/rejected": -5.944974899291992, "logps/chosen": -355.2122497558594, "logps/rejected": -136.95384216308594, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": 3.505960464477539, "rewards/margins": 11.137516021728516, "rewards/rejected": -7.631556034088135, "step": 2597 }, { "epoch": 1.44, "learning_rate": 1.9036915792280938e-07, "logits/chosen": -5.974887847900391, "logits/rejected": -5.994414329528809, "logps/chosen": -232.39459228515625, "logps/rejected": -197.40469360351562, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": 2.6432478427886963, "rewards/margins": 11.477350234985352, "rewards/rejected": -8.834102630615234, "step": 2598 }, { "epoch": 1.44, "learning_rate": 1.9001618695432585e-07, "logits/chosen": -5.831428527832031, "logits/rejected": -5.917568683624268, "logps/chosen": -177.44100952148438, "logps/rejected": -257.11614990234375, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": 4.255346775054932, "rewards/margins": 11.662800788879395, "rewards/rejected": -7.407454013824463, "step": 2599 }, { "epoch": 1.44, "learning_rate": 1.8966346673619998e-07, "logits/chosen": -6.118019104003906, "logits/rejected": -5.929319858551025, "logps/chosen": -236.07151794433594, "logps/rejected": -128.91688537597656, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": 4.852202415466309, "rewards/margins": 10.445243835449219, "rewards/rejected": -5.59304141998291, "step": 2600 }, { "epoch": 1.44, "learning_rate": 1.89310997553752e-07, "logits/chosen": -5.946934700012207, "logits/rejected": -6.161675453186035, "logps/chosen": -192.16085815429688, "logps/rejected": -328.1517333984375, "loss": 0.0647, "rewards/accuracies": 1.0, "rewards/chosen": 2.8325881958007812, "rewards/margins": 14.176117897033691, "rewards/rejected": -11.343528747558594, "step": 2601 }, { "epoch": 1.44, "learning_rate": 1.8895877969209938e-07, "logits/chosen": -6.042159557342529, "logits/rejected": -6.104034900665283, "logps/chosen": -390.7427673339844, "logps/rejected": -268.18927001953125, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": 4.250926971435547, "rewards/margins": 13.831295013427734, "rewards/rejected": -9.580367088317871, "step": 2602 }, { "epoch": 1.45, "learning_rate": 1.8860681343615638e-07, "logits/chosen": -5.941668510437012, "logits/rejected": -6.012010097503662, "logps/chosen": -399.18341064453125, "logps/rejected": -368.3060607910156, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": 7.059154987335205, "rewards/margins": 14.39714527130127, "rewards/rejected": -7.337990760803223, "step": 2603 }, { "epoch": 1.45, "learning_rate": 1.8825509907063326e-07, "logits/chosen": -5.890479564666748, "logits/rejected": -5.947697639465332, "logps/chosen": -454.1359558105469, "logps/rejected": -426.4210205078125, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 6.597092151641846, "rewards/margins": 15.311857223510742, "rewards/rejected": -8.714765548706055, "step": 2604 }, { "epoch": 1.45, "learning_rate": 1.8790363688003713e-07, "logits/chosen": -6.087886333465576, "logits/rejected": -6.044254302978516, "logps/chosen": -198.6640625, "logps/rejected": -167.0561065673828, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": 3.1152968406677246, "rewards/margins": 11.723371505737305, "rewards/rejected": -8.608075141906738, "step": 2605 }, { "epoch": 1.45, "learning_rate": 1.8755242714867032e-07, "logits/chosen": -6.015127658843994, "logits/rejected": -5.908890724182129, "logps/chosen": -212.81048583984375, "logps/rejected": -164.3147430419922, "loss": 0.0373, "rewards/accuracies": 0.9375, "rewards/chosen": 3.924862861633301, "rewards/margins": 10.043133735656738, "rewards/rejected": -6.1182708740234375, "step": 2606 }, { "epoch": 1.45, "learning_rate": 1.872014701606321e-07, "logits/chosen": -5.960136890411377, "logits/rejected": -5.952495574951172, "logps/chosen": -457.826171875, "logps/rejected": -265.15850830078125, "loss": 0.0517, "rewards/accuracies": 0.9375, "rewards/chosen": 3.3919286727905273, "rewards/margins": 15.032758712768555, "rewards/rejected": -11.640830993652344, "step": 2607 }, { "epoch": 1.45, "learning_rate": 1.8685076619981603e-07, "logits/chosen": -5.973432540893555, "logits/rejected": -6.068699836730957, "logps/chosen": -279.9331359863281, "logps/rejected": -186.30763244628906, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 7.894637107849121, "rewards/margins": 14.212752342224121, "rewards/rejected": -6.318115234375, "step": 2608 }, { "epoch": 1.45, "learning_rate": 1.8650031554991201e-07, "logits/chosen": -6.1078691482543945, "logits/rejected": -5.922670364379883, "logps/chosen": -263.4691467285156, "logps/rejected": -179.75062561035156, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": 5.498929977416992, "rewards/margins": 14.827485084533691, "rewards/rejected": -9.3285551071167, "step": 2609 }, { "epoch": 1.45, "learning_rate": 1.8615011849440425e-07, "logits/chosen": -6.0499372482299805, "logits/rejected": -6.021924018859863, "logps/chosen": -339.6859436035156, "logps/rejected": -194.54730224609375, "loss": 0.1539, "rewards/accuracies": 1.0, "rewards/chosen": 3.9957265853881836, "rewards/margins": 11.053123474121094, "rewards/rejected": -7.057397365570068, "step": 2610 }, { "epoch": 1.45, "learning_rate": 1.8580017531657238e-07, "logits/chosen": -6.016930103302002, "logits/rejected": -6.029181480407715, "logps/chosen": -271.7675476074219, "logps/rejected": -240.31942749023438, "loss": 0.0582, "rewards/accuracies": 1.0, "rewards/chosen": 4.374602317810059, "rewards/margins": 12.090285301208496, "rewards/rejected": -7.7156829833984375, "step": 2611 }, { "epoch": 1.45, "learning_rate": 1.854504862994905e-07, "logits/chosen": -5.943078994750977, "logits/rejected": -5.995457649230957, "logps/chosen": -241.9229736328125, "logps/rejected": -227.1125946044922, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": 2.6740407943725586, "rewards/margins": 12.404743194580078, "rewards/rejected": -9.730703353881836, "step": 2612 }, { "epoch": 1.45, "learning_rate": 1.8510105172602692e-07, "logits/chosen": -5.961413383483887, "logits/rejected": -6.033151149749756, "logps/chosen": -236.25921630859375, "logps/rejected": -171.47171020507812, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": 2.257652759552002, "rewards/margins": 8.346678733825684, "rewards/rejected": -6.089025974273682, "step": 2613 }, { "epoch": 1.45, "learning_rate": 1.8475187187884427e-07, "logits/chosen": -5.9945549964904785, "logits/rejected": -6.0829854011535645, "logps/chosen": -195.82289123535156, "logps/rejected": -259.0721740722656, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": 2.55029034614563, "rewards/margins": 13.949861526489258, "rewards/rejected": -11.399572372436523, "step": 2614 }, { "epoch": 1.45, "learning_rate": 1.8440294704039927e-07, "logits/chosen": -5.997231960296631, "logits/rejected": -6.043231964111328, "logps/chosen": -273.0850830078125, "logps/rejected": -286.49322509765625, "loss": 0.0491, "rewards/accuracies": 1.0, "rewards/chosen": 5.702023506164551, "rewards/margins": 12.752701759338379, "rewards/rejected": -7.050678253173828, "step": 2615 }, { "epoch": 1.45, "learning_rate": 1.8405427749294233e-07, "logits/chosen": -6.031641006469727, "logits/rejected": -6.018310546875, "logps/chosen": -261.73992919921875, "logps/rejected": -186.003662109375, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": 6.6215972900390625, "rewards/margins": 11.576752662658691, "rewards/rejected": -4.955155372619629, "step": 2616 }, { "epoch": 1.45, "learning_rate": 1.837058635185172e-07, "logits/chosen": -6.074983596801758, "logits/rejected": -6.059910774230957, "logps/chosen": -321.47479248046875, "logps/rejected": -187.88662719726562, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 2.9333341121673584, "rewards/margins": 9.93860149383545, "rewards/rejected": -7.005267143249512, "step": 2617 }, { "epoch": 1.45, "learning_rate": 1.8335770539896063e-07, "logits/chosen": -6.022655487060547, "logits/rejected": -5.917633533477783, "logps/chosen": -272.3983154296875, "logps/rejected": -257.6538391113281, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": 3.246199131011963, "rewards/margins": 10.070966720581055, "rewards/rejected": -6.824767112731934, "step": 2618 }, { "epoch": 1.45, "learning_rate": 1.8300980341590294e-07, "logits/chosen": -6.061032295227051, "logits/rejected": -6.04593563079834, "logps/chosen": -211.48956298828125, "logps/rejected": -218.5620880126953, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": 3.7827823162078857, "rewards/margins": 10.594709396362305, "rewards/rejected": -6.811927318572998, "step": 2619 }, { "epoch": 1.45, "learning_rate": 1.8266215785076705e-07, "logits/chosen": -6.024047374725342, "logits/rejected": -6.066557884216309, "logps/chosen": -206.0623779296875, "logps/rejected": -218.1866912841797, "loss": 0.1471, "rewards/accuracies": 0.875, "rewards/chosen": 1.099966287612915, "rewards/margins": 11.068257331848145, "rewards/rejected": -9.968291282653809, "step": 2620 }, { "epoch": 1.46, "learning_rate": 1.8231476898476866e-07, "logits/chosen": -6.019387245178223, "logits/rejected": -6.024446487426758, "logps/chosen": -286.8615417480469, "logps/rejected": -263.2427062988281, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": 5.493334770202637, "rewards/margins": 13.012566566467285, "rewards/rejected": -7.519231796264648, "step": 2621 }, { "epoch": 1.46, "learning_rate": 1.8196763709891522e-07, "logits/chosen": -5.946531772613525, "logits/rejected": -5.955026149749756, "logps/chosen": -371.4233093261719, "logps/rejected": -329.8896484375, "loss": 0.0119, "rewards/accuracies": 0.9375, "rewards/chosen": 5.741802215576172, "rewards/margins": 10.049148559570312, "rewards/rejected": -4.307346343994141, "step": 2622 }, { "epoch": 1.46, "learning_rate": 1.8162076247400688e-07, "logits/chosen": -6.088022708892822, "logits/rejected": -5.952983856201172, "logps/chosen": -301.873046875, "logps/rejected": -123.59239196777344, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": 4.975376605987549, "rewards/margins": 12.162506103515625, "rewards/rejected": -7.187130451202393, "step": 2623 }, { "epoch": 1.46, "learning_rate": 1.8127414539063567e-07, "logits/chosen": -5.930994987487793, "logits/rejected": -5.846035957336426, "logps/chosen": -497.5343322753906, "logps/rejected": -157.48086547851562, "loss": 0.0445, "rewards/accuracies": 1.0, "rewards/chosen": 6.854043006896973, "rewards/margins": 11.773168563842773, "rewards/rejected": -4.919124603271484, "step": 2624 }, { "epoch": 1.46, "learning_rate": 1.809277861291848e-07, "logits/chosen": -6.016106605529785, "logits/rejected": -6.107669353485107, "logps/chosen": -249.01690673828125, "logps/rejected": -194.8541717529297, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": 3.5541470050811768, "rewards/margins": 11.915385246276855, "rewards/rejected": -8.361239433288574, "step": 2625 }, { "epoch": 1.46, "learning_rate": 1.8058168496982963e-07, "logits/chosen": -5.997503757476807, "logits/rejected": -6.057467460632324, "logps/chosen": -568.5291137695312, "logps/rejected": -528.5801391601562, "loss": 0.0439, "rewards/accuracies": 1.0, "rewards/chosen": 7.571612358093262, "rewards/margins": 19.322301864624023, "rewards/rejected": -11.750690460205078, "step": 2626 }, { "epoch": 1.46, "learning_rate": 1.8023584219253573e-07, "logits/chosen": -6.081358909606934, "logits/rejected": -6.006854057312012, "logps/chosen": -309.7957763671875, "logps/rejected": -266.14324951171875, "loss": 0.2204, "rewards/accuracies": 0.9375, "rewards/chosen": 2.921898365020752, "rewards/margins": 12.003684043884277, "rewards/rejected": -9.081786155700684, "step": 2627 }, { "epoch": 1.46, "learning_rate": 1.7989025807706109e-07, "logits/chosen": -5.978677749633789, "logits/rejected": -6.1000189781188965, "logps/chosen": -265.4895935058594, "logps/rejected": -313.05517578125, "loss": 0.0338, "rewards/accuracies": 1.0, "rewards/chosen": 4.6477460861206055, "rewards/margins": 11.998933792114258, "rewards/rejected": -7.351187705993652, "step": 2628 }, { "epoch": 1.46, "learning_rate": 1.7954493290295309e-07, "logits/chosen": -6.04191780090332, "logits/rejected": -6.0801873207092285, "logps/chosen": -169.95803833007812, "logps/rejected": -188.8238983154297, "loss": 0.0147, "rewards/accuracies": 0.9375, "rewards/chosen": 2.686316728591919, "rewards/margins": 10.691366195678711, "rewards/rejected": -8.005048751831055, "step": 2629 }, { "epoch": 1.46, "learning_rate": 1.7919986694955057e-07, "logits/chosen": -6.072889804840088, "logits/rejected": -5.913082599639893, "logps/chosen": -293.94110107421875, "logps/rejected": -96.38447570800781, "loss": 0.053, "rewards/accuracies": 1.0, "rewards/chosen": 6.859499454498291, "rewards/margins": 11.707469940185547, "rewards/rejected": -4.847970485687256, "step": 2630 }, { "epoch": 1.46, "learning_rate": 1.7885506049598197e-07, "logits/chosen": -5.951114177703857, "logits/rejected": -6.056576251983643, "logps/chosen": -223.59915161132812, "logps/rejected": -230.8191680908203, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": 5.035905838012695, "rewards/margins": 14.032395362854004, "rewards/rejected": -8.996488571166992, "step": 2631 }, { "epoch": 1.46, "learning_rate": 1.7851051382116645e-07, "logits/chosen": -5.974483489990234, "logits/rejected": -6.051054954528809, "logps/chosen": -204.87351989746094, "logps/rejected": -215.9029998779297, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": 2.577768325805664, "rewards/margins": 10.255881309509277, "rewards/rejected": -7.678112983703613, "step": 2632 }, { "epoch": 1.46, "learning_rate": 1.7816622720381281e-07, "logits/chosen": -6.100625514984131, "logits/rejected": -6.073219299316406, "logps/chosen": -306.24822998046875, "logps/rejected": -306.093994140625, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": 3.76953125, "rewards/margins": 13.045038223266602, "rewards/rejected": -9.275507926940918, "step": 2633 }, { "epoch": 1.46, "learning_rate": 1.7782220092241917e-07, "logits/chosen": -6.08226203918457, "logits/rejected": -5.962646484375, "logps/chosen": -239.31643676757812, "logps/rejected": -174.97637939453125, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": 3.978576183319092, "rewards/margins": 11.977398872375488, "rewards/rejected": -7.998823165893555, "step": 2634 }, { "epoch": 1.46, "learning_rate": 1.774784352552735e-07, "logits/chosen": -6.0509538650512695, "logits/rejected": -5.919201850891113, "logps/chosen": -200.24456787109375, "logps/rejected": -165.43515014648438, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 3.39654278755188, "rewards/margins": 11.00386905670166, "rewards/rejected": -7.607326030731201, "step": 2635 }, { "epoch": 1.46, "learning_rate": 1.771349304804529e-07, "logits/chosen": -5.9422478675842285, "logits/rejected": -5.961570739746094, "logps/chosen": -300.1994323730469, "logps/rejected": -231.92788696289062, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": 5.514967918395996, "rewards/margins": 14.971206665039062, "rewards/rejected": -9.456239700317383, "step": 2636 }, { "epoch": 1.46, "learning_rate": 1.7679168687582308e-07, "logits/chosen": -5.9948930740356445, "logits/rejected": -5.930080413818359, "logps/chosen": -207.45623779296875, "logps/rejected": -127.6595230102539, "loss": 0.1162, "rewards/accuracies": 1.0, "rewards/chosen": 2.9898226261138916, "rewards/margins": 11.58791732788086, "rewards/rejected": -8.598094940185547, "step": 2637 }, { "epoch": 1.46, "learning_rate": 1.7644870471903894e-07, "logits/chosen": -5.984455585479736, "logits/rejected": -6.013023376464844, "logps/chosen": -213.65957641601562, "logps/rejected": -305.35614013671875, "loss": 0.0606, "rewards/accuracies": 1.0, "rewards/chosen": 2.3459525108337402, "rewards/margins": 11.814865112304688, "rewards/rejected": -9.468912124633789, "step": 2638 }, { "epoch": 1.47, "learning_rate": 1.7610598428754336e-07, "logits/chosen": -5.894781112670898, "logits/rejected": -5.90716552734375, "logps/chosen": -223.56178283691406, "logps/rejected": -205.91366577148438, "loss": 0.0325, "rewards/accuracies": 0.9375, "rewards/chosen": 2.8911550045013428, "rewards/margins": 10.173944473266602, "rewards/rejected": -7.282789707183838, "step": 2639 }, { "epoch": 1.47, "learning_rate": 1.7576352585856806e-07, "logits/chosen": -6.047110557556152, "logits/rejected": -6.097609996795654, "logps/chosen": -225.98455810546875, "logps/rejected": -201.49493408203125, "loss": 0.0405, "rewards/accuracies": 1.0, "rewards/chosen": 3.113623857498169, "rewards/margins": 9.771121978759766, "rewards/rejected": -6.657497882843018, "step": 2640 }, { "epoch": 1.47, "learning_rate": 1.7542132970913248e-07, "logits/chosen": -5.8748698234558105, "logits/rejected": -5.926742076873779, "logps/chosen": -314.0448303222656, "logps/rejected": -573.21826171875, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": 3.2514796257019043, "rewards/margins": 13.995800018310547, "rewards/rejected": -10.744321823120117, "step": 2641 }, { "epoch": 1.47, "learning_rate": 1.7507939611604426e-07, "logits/chosen": -5.982841968536377, "logits/rejected": -5.907782554626465, "logps/chosen": -236.72677612304688, "logps/rejected": -218.74917602539062, "loss": 0.0277, "rewards/accuracies": 0.9375, "rewards/chosen": 4.35761833190918, "rewards/margins": 11.983137130737305, "rewards/rejected": -7.625518321990967, "step": 2642 }, { "epoch": 1.47, "learning_rate": 1.747377253558982e-07, "logits/chosen": -6.006142616271973, "logits/rejected": -6.071733474731445, "logps/chosen": -237.614990234375, "logps/rejected": -254.78294372558594, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": 4.446221351623535, "rewards/margins": 13.330016136169434, "rewards/rejected": -8.883794784545898, "step": 2643 }, { "epoch": 1.47, "learning_rate": 1.743963177050763e-07, "logits/chosen": -5.980809688568115, "logits/rejected": -6.096837043762207, "logps/chosen": -205.00271606445312, "logps/rejected": -327.86968994140625, "loss": 0.0819, "rewards/accuracies": 1.0, "rewards/chosen": 2.581913471221924, "rewards/margins": 12.64066219329834, "rewards/rejected": -10.058749198913574, "step": 2644 }, { "epoch": 1.47, "learning_rate": 1.7405517343974885e-07, "logits/chosen": -5.968758583068848, "logits/rejected": -5.9134840965271, "logps/chosen": -296.542724609375, "logps/rejected": -208.9921875, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": 4.433172225952148, "rewards/margins": 11.089752197265625, "rewards/rejected": -6.656580448150635, "step": 2645 }, { "epoch": 1.47, "learning_rate": 1.7371429283587174e-07, "logits/chosen": -6.056885719299316, "logits/rejected": -6.1553568840026855, "logps/chosen": -172.60986328125, "logps/rejected": -169.98663330078125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": 1.7410224676132202, "rewards/margins": 9.407829284667969, "rewards/rejected": -7.666806221008301, "step": 2646 }, { "epoch": 1.47, "learning_rate": 1.7337367616918868e-07, "logits/chosen": -6.004146575927734, "logits/rejected": -5.875487327575684, "logps/chosen": -257.8654479980469, "logps/rejected": -151.35104370117188, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": 2.819913387298584, "rewards/margins": 8.785852432250977, "rewards/rejected": -5.965938568115234, "step": 2647 }, { "epoch": 1.47, "learning_rate": 1.730333237152289e-07, "logits/chosen": -6.0459465980529785, "logits/rejected": -6.013803958892822, "logps/chosen": -311.8554382324219, "logps/rejected": -161.1846160888672, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": 8.13219928741455, "rewards/margins": 14.255232810974121, "rewards/rejected": -6.12303352355957, "step": 2648 }, { "epoch": 1.47, "learning_rate": 1.7269323574930861e-07, "logits/chosen": -6.074981689453125, "logits/rejected": -6.09755802154541, "logps/chosen": -262.19403076171875, "logps/rejected": -246.43438720703125, "loss": 0.0485, "rewards/accuracies": 1.0, "rewards/chosen": 4.467007160186768, "rewards/margins": 12.56635856628418, "rewards/rejected": -8.09935188293457, "step": 2649 }, { "epoch": 1.47, "learning_rate": 1.7235341254653003e-07, "logits/chosen": -5.911451816558838, "logits/rejected": -6.000173568725586, "logps/chosen": -223.405517578125, "logps/rejected": -248.19546508789062, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": 2.864556312561035, "rewards/margins": 13.697973251342773, "rewards/rejected": -10.833417892456055, "step": 2650 }, { "epoch": 1.47, "learning_rate": 1.7201385438178067e-07, "logits/chosen": -5.9642510414123535, "logits/rejected": -6.01364278793335, "logps/chosen": -299.75775146484375, "logps/rejected": -218.2108917236328, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": 3.9165663719177246, "rewards/margins": 11.868788719177246, "rewards/rejected": -7.9522223472595215, "step": 2651 }, { "epoch": 1.47, "learning_rate": 1.716745615297342e-07, "logits/chosen": -5.976836681365967, "logits/rejected": -6.103259563446045, "logps/chosen": -352.9756774902344, "logps/rejected": -342.0292663574219, "loss": 0.0332, "rewards/accuracies": 1.0, "rewards/chosen": 6.8119306564331055, "rewards/margins": 13.196829795837402, "rewards/rejected": -6.384899616241455, "step": 2652 }, { "epoch": 1.47, "learning_rate": 1.713355342648494e-07, "logits/chosen": -6.008270740509033, "logits/rejected": -6.011086463928223, "logps/chosen": -254.97781372070312, "logps/rejected": -208.7782745361328, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": 4.745916366577148, "rewards/margins": 14.686767578125, "rewards/rejected": -9.940851211547852, "step": 2653 }, { "epoch": 1.47, "learning_rate": 1.7099677286137065e-07, "logits/chosen": -5.983397006988525, "logits/rejected": -5.863387107849121, "logps/chosen": -525.8779296875, "logps/rejected": -183.19451904296875, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": 7.401122093200684, "rewards/margins": 11.985167503356934, "rewards/rejected": -4.584046363830566, "step": 2654 }, { "epoch": 1.47, "learning_rate": 1.7065827759332635e-07, "logits/chosen": -6.01161003112793, "logits/rejected": -5.989853382110596, "logps/chosen": -241.48919677734375, "logps/rejected": -169.2913360595703, "loss": 0.0599, "rewards/accuracies": 1.0, "rewards/chosen": 3.4070630073547363, "rewards/margins": 12.636808395385742, "rewards/rejected": -9.229745864868164, "step": 2655 }, { "epoch": 1.47, "learning_rate": 1.7032004873453066e-07, "logits/chosen": -5.983094692230225, "logits/rejected": -6.042935371398926, "logps/chosen": -190.5235595703125, "logps/rejected": -232.37269592285156, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 3.896198272705078, "rewards/margins": 13.445843696594238, "rewards/rejected": -9.54964542388916, "step": 2656 }, { "epoch": 1.48, "learning_rate": 1.6998208655858137e-07, "logits/chosen": -6.127315521240234, "logits/rejected": -6.0253190994262695, "logps/chosen": -282.1282958984375, "logps/rejected": -168.64468383789062, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": 7.476901531219482, "rewards/margins": 13.280722618103027, "rewards/rejected": -5.803820610046387, "step": 2657 }, { "epoch": 1.48, "learning_rate": 1.696443913388611e-07, "logits/chosen": -5.886910915374756, "logits/rejected": -6.025235652923584, "logps/chosen": -251.17311096191406, "logps/rejected": -451.845458984375, "loss": 0.0101, "rewards/accuracies": 0.9375, "rewards/chosen": 4.18380069732666, "rewards/margins": 14.329597473144531, "rewards/rejected": -10.145797729492188, "step": 2658 }, { "epoch": 1.48, "learning_rate": 1.693069633485366e-07, "logits/chosen": -6.065347671508789, "logits/rejected": -6.084829807281494, "logps/chosen": -286.2860107421875, "logps/rejected": -289.48162841796875, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": 7.455116271972656, "rewards/margins": 14.610822677612305, "rewards/rejected": -7.155706405639648, "step": 2659 }, { "epoch": 1.48, "learning_rate": 1.689698028605578e-07, "logits/chosen": -6.050009727478027, "logits/rejected": -5.973926544189453, "logps/chosen": -262.716064453125, "logps/rejected": -191.22427368164062, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": 1.8551875352859497, "rewards/margins": 9.614181518554688, "rewards/rejected": -7.758994102478027, "step": 2660 }, { "epoch": 1.48, "learning_rate": 1.686329101476589e-07, "logits/chosen": -5.957542896270752, "logits/rejected": -5.997342586517334, "logps/chosen": -272.0722351074219, "logps/rejected": -253.22628784179688, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": 4.3840861320495605, "rewards/margins": 12.27316665649414, "rewards/rejected": -7.889080047607422, "step": 2661 }, { "epoch": 1.48, "learning_rate": 1.6829628548235714e-07, "logits/chosen": -6.031079292297363, "logits/rejected": -6.168103218078613, "logps/chosen": -327.6275634765625, "logps/rejected": -288.34283447265625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 5.040383815765381, "rewards/margins": 13.447820663452148, "rewards/rejected": -8.40743637084961, "step": 2662 }, { "epoch": 1.48, "learning_rate": 1.6795992913695333e-07, "logits/chosen": -6.0290985107421875, "logits/rejected": -5.982369422912598, "logps/chosen": -385.82379150390625, "logps/rejected": -179.455810546875, "loss": 0.0453, "rewards/accuracies": 1.0, "rewards/chosen": 7.694351673126221, "rewards/margins": 12.524518013000488, "rewards/rejected": -4.830165863037109, "step": 2663 }, { "epoch": 1.48, "learning_rate": 1.6762384138353075e-07, "logits/chosen": -5.968094348907471, "logits/rejected": -5.8820648193359375, "logps/chosen": -616.2125244140625, "logps/rejected": -366.4725036621094, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 9.243731498718262, "rewards/margins": 15.86059284210205, "rewards/rejected": -6.616861343383789, "step": 2664 }, { "epoch": 1.48, "learning_rate": 1.6728802249395524e-07, "logits/chosen": -6.0777459144592285, "logits/rejected": -5.966627597808838, "logps/chosen": -326.9218444824219, "logps/rejected": -135.89730834960938, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": 6.766003608703613, "rewards/margins": 13.140264511108398, "rewards/rejected": -6.374260425567627, "step": 2665 }, { "epoch": 1.48, "learning_rate": 1.6695247273987623e-07, "logits/chosen": -6.003714084625244, "logits/rejected": -5.994399070739746, "logps/chosen": -333.3348388671875, "logps/rejected": -215.80410766601562, "loss": 0.1073, "rewards/accuracies": 1.0, "rewards/chosen": 6.305205821990967, "rewards/margins": 11.813695907592773, "rewards/rejected": -5.50848913192749, "step": 2666 }, { "epoch": 1.48, "learning_rate": 1.666171923927242e-07, "logits/chosen": -5.923666000366211, "logits/rejected": -5.947751045227051, "logps/chosen": -358.921630859375, "logps/rejected": -328.1883239746094, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": 7.3565802574157715, "rewards/margins": 14.177319526672363, "rewards/rejected": -6.820740699768066, "step": 2667 }, { "epoch": 1.48, "learning_rate": 1.6628218172371255e-07, "logits/chosen": -6.0352864265441895, "logits/rejected": -6.06903076171875, "logps/chosen": -287.52801513671875, "logps/rejected": -196.99456787109375, "loss": 0.0879, "rewards/accuracies": 0.9375, "rewards/chosen": 4.233116149902344, "rewards/margins": 10.68794059753418, "rewards/rejected": -6.454823970794678, "step": 2668 }, { "epoch": 1.48, "learning_rate": 1.6594744100383596e-07, "logits/chosen": -5.976251125335693, "logits/rejected": -5.9154438972473145, "logps/chosen": -478.39141845703125, "logps/rejected": -344.8285217285156, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": 3.854379177093506, "rewards/margins": 7.842145919799805, "rewards/rejected": -3.987766742706299, "step": 2669 }, { "epoch": 1.48, "learning_rate": 1.6561297050387114e-07, "logits/chosen": -6.0121612548828125, "logits/rejected": -6.079837799072266, "logps/chosen": -188.78758239746094, "logps/rejected": -220.74240112304688, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": 3.7835044860839844, "rewards/margins": 11.812305450439453, "rewards/rejected": -8.028800964355469, "step": 2670 }, { "epoch": 1.48, "learning_rate": 1.6527877049437623e-07, "logits/chosen": -6.021451950073242, "logits/rejected": -6.098676681518555, "logps/chosen": -239.0612030029297, "logps/rejected": -251.87950134277344, "loss": 0.0155, "rewards/accuracies": 0.9375, "rewards/chosen": 3.4586024284362793, "rewards/margins": 11.574063301086426, "rewards/rejected": -8.115461349487305, "step": 2671 }, { "epoch": 1.48, "learning_rate": 1.649448412456901e-07, "logits/chosen": -6.154148101806641, "logits/rejected": -6.001794338226318, "logps/chosen": -209.48928833007812, "logps/rejected": -221.55807495117188, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 1.87203049659729, "rewards/margins": 13.209100723266602, "rewards/rejected": -11.337071418762207, "step": 2672 }, { "epoch": 1.48, "learning_rate": 1.6461118302793307e-07, "logits/chosen": -6.06785249710083, "logits/rejected": -5.989174842834473, "logps/chosen": -290.086669921875, "logps/rejected": -214.91722106933594, "loss": 0.0199, "rewards/accuracies": 0.9375, "rewards/chosen": 4.6450958251953125, "rewards/margins": 14.541082382202148, "rewards/rejected": -9.895987510681152, "step": 2673 }, { "epoch": 1.48, "learning_rate": 1.6427779611100606e-07, "logits/chosen": -6.054555892944336, "logits/rejected": -6.007603168487549, "logps/chosen": -401.7891540527344, "logps/rejected": -237.35968017578125, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": 3.0063562393188477, "rewards/margins": 12.271858215332031, "rewards/rejected": -9.2655029296875, "step": 2674 }, { "epoch": 1.49, "learning_rate": 1.6394468076459073e-07, "logits/chosen": -6.138070583343506, "logits/rejected": -6.091052532196045, "logps/chosen": -310.51275634765625, "logps/rejected": -243.17926025390625, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": 5.290284156799316, "rewards/margins": 14.045282363891602, "rewards/rejected": -8.754999160766602, "step": 2675 }, { "epoch": 1.49, "learning_rate": 1.6361183725814852e-07, "logits/chosen": -6.000548362731934, "logits/rejected": -6.091817855834961, "logps/chosen": -263.1171875, "logps/rejected": -307.3666076660156, "loss": 0.0901, "rewards/accuracies": 1.0, "rewards/chosen": 5.509580135345459, "rewards/margins": 15.979560852050781, "rewards/rejected": -10.469980239868164, "step": 2676 }, { "epoch": 1.49, "learning_rate": 1.632792658609216e-07, "logits/chosen": -5.984292507171631, "logits/rejected": -5.9551801681518555, "logps/chosen": -233.81622314453125, "logps/rejected": -154.072998046875, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": 3.561959743499756, "rewards/margins": 10.18616008758545, "rewards/rejected": -6.624199867248535, "step": 2677 }, { "epoch": 1.49, "learning_rate": 1.6294696684193155e-07, "logits/chosen": -6.045591831207275, "logits/rejected": -5.938069820404053, "logps/chosen": -201.91415405273438, "logps/rejected": -201.33338928222656, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": 1.6189181804656982, "rewards/margins": 12.041847229003906, "rewards/rejected": -10.422928810119629, "step": 2678 }, { "epoch": 1.49, "learning_rate": 1.6261494046997986e-07, "logits/chosen": -6.154476165771484, "logits/rejected": -5.981076240539551, "logps/chosen": -284.60003662109375, "logps/rejected": -228.5103759765625, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": 4.05274772644043, "rewards/margins": 13.174483299255371, "rewards/rejected": -9.121736526489258, "step": 2679 }, { "epoch": 1.49, "learning_rate": 1.6228318701364762e-07, "logits/chosen": -5.966563701629639, "logits/rejected": -6.050137042999268, "logps/chosen": -213.47335815429688, "logps/rejected": -228.69662475585938, "loss": 0.0854, "rewards/accuracies": 1.0, "rewards/chosen": 1.885453701019287, "rewards/margins": 10.900932312011719, "rewards/rejected": -9.015478134155273, "step": 2680 }, { "epoch": 1.49, "learning_rate": 1.6195170674129466e-07, "logits/chosen": -5.991781234741211, "logits/rejected": -6.0240020751953125, "logps/chosen": -228.70872497558594, "logps/rejected": -291.2513427734375, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/chosen": 4.340075492858887, "rewards/margins": 13.60069465637207, "rewards/rejected": -9.260618209838867, "step": 2681 }, { "epoch": 1.49, "learning_rate": 1.6162049992106024e-07, "logits/chosen": -6.149246692657471, "logits/rejected": -6.001673698425293, "logps/chosen": -284.1844482421875, "logps/rejected": -248.42031860351562, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": 2.8108620643615723, "rewards/margins": 11.508150100708008, "rewards/rejected": -8.697288513183594, "step": 2682 }, { "epoch": 1.49, "learning_rate": 1.6128956682086243e-07, "logits/chosen": -6.032205581665039, "logits/rejected": -5.996171951293945, "logps/chosen": -174.8988037109375, "logps/rejected": -185.57408142089844, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": 3.1520509719848633, "rewards/margins": 11.11344051361084, "rewards/rejected": -7.961389541625977, "step": 2683 }, { "epoch": 1.49, "learning_rate": 1.609589077083978e-07, "logits/chosen": -5.98722505569458, "logits/rejected": -6.016868591308594, "logps/chosen": -309.70477294921875, "logps/rejected": -255.6048583984375, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": 6.706547737121582, "rewards/margins": 13.621538162231445, "rewards/rejected": -6.914990425109863, "step": 2684 }, { "epoch": 1.49, "learning_rate": 1.606285228511412e-07, "logits/chosen": -5.895966053009033, "logits/rejected": -5.8626532554626465, "logps/chosen": -311.1020812988281, "logps/rejected": -342.51348876953125, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": 6.516446113586426, "rewards/margins": 13.802959442138672, "rewards/rejected": -7.286513328552246, "step": 2685 }, { "epoch": 1.49, "learning_rate": 1.6029841251634529e-07, "logits/chosen": -5.969554901123047, "logits/rejected": -5.877676486968994, "logps/chosen": -197.61444091796875, "logps/rejected": -220.61508178710938, "loss": 0.1637, "rewards/accuracies": 1.0, "rewards/chosen": -0.54011470079422, "rewards/margins": 10.233964920043945, "rewards/rejected": -10.774080276489258, "step": 2686 }, { "epoch": 1.49, "learning_rate": 1.599685769710417e-07, "logits/chosen": -6.045764923095703, "logits/rejected": -6.097658157348633, "logps/chosen": -219.60574340820312, "logps/rejected": -323.97509765625, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/chosen": 1.4977984428405762, "rewards/margins": 13.384749412536621, "rewards/rejected": -11.886950492858887, "step": 2687 }, { "epoch": 1.49, "learning_rate": 1.596390164820387e-07, "logits/chosen": -5.951934814453125, "logits/rejected": -5.9281816482543945, "logps/chosen": -219.5841064453125, "logps/rejected": -154.8157958984375, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": 4.182682991027832, "rewards/margins": 11.428312301635742, "rewards/rejected": -7.24562931060791, "step": 2688 }, { "epoch": 1.49, "learning_rate": 1.5930973131592285e-07, "logits/chosen": -6.004570007324219, "logits/rejected": -6.015059947967529, "logps/chosen": -342.63916015625, "logps/rejected": -275.6766357421875, "loss": 0.0605, "rewards/accuracies": 0.9375, "rewards/chosen": 4.396542549133301, "rewards/margins": 13.057134628295898, "rewards/rejected": -8.660593032836914, "step": 2689 }, { "epoch": 1.49, "learning_rate": 1.5898072173905724e-07, "logits/chosen": -5.997983932495117, "logits/rejected": -5.989459037780762, "logps/chosen": -363.01507568359375, "logps/rejected": -344.2943115234375, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": 3.1986002922058105, "rewards/margins": 11.517494201660156, "rewards/rejected": -8.318894386291504, "step": 2690 }, { "epoch": 1.49, "learning_rate": 1.5865198801758268e-07, "logits/chosen": -6.2503533363342285, "logits/rejected": -6.029561519622803, "logps/chosen": -468.7018737792969, "logps/rejected": -212.41209411621094, "loss": 0.0301, "rewards/accuracies": 1.0, "rewards/chosen": 5.691835403442383, "rewards/margins": 13.768715858459473, "rewards/rejected": -8.07688045501709, "step": 2691 }, { "epoch": 1.49, "learning_rate": 1.583235304174167e-07, "logits/chosen": -6.020793914794922, "logits/rejected": -6.0951080322265625, "logps/chosen": -242.8710479736328, "logps/rejected": -251.34329223632812, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 4.928524971008301, "rewards/margins": 12.528265953063965, "rewards/rejected": -7.599740505218506, "step": 2692 }, { "epoch": 1.5, "learning_rate": 1.5799534920425305e-07, "logits/chosen": -6.031859397888184, "logits/rejected": -6.044652462005615, "logps/chosen": -293.6159973144531, "logps/rejected": -245.43063354492188, "loss": 0.0486, "rewards/accuracies": 1.0, "rewards/chosen": 5.509553909301758, "rewards/margins": 13.233457565307617, "rewards/rejected": -7.723904609680176, "step": 2693 }, { "epoch": 1.5, "learning_rate": 1.576674446435624e-07, "logits/chosen": -5.889019966125488, "logits/rejected": -6.008051872253418, "logps/chosen": -375.045166015625, "logps/rejected": -341.37921142578125, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": 7.169346332550049, "rewards/margins": 13.488523483276367, "rewards/rejected": -6.319177627563477, "step": 2694 }, { "epoch": 1.5, "learning_rate": 1.5733981700059152e-07, "logits/chosen": -6.014262676239014, "logits/rejected": -6.008856296539307, "logps/chosen": -319.8405456542969, "logps/rejected": -199.6162872314453, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/chosen": 8.17529296875, "rewards/margins": 13.816397666931152, "rewards/rejected": -5.641104698181152, "step": 2695 }, { "epoch": 1.5, "learning_rate": 1.570124665403632e-07, "logits/chosen": -5.984405517578125, "logits/rejected": -5.9207024574279785, "logps/chosen": -282.703125, "logps/rejected": -145.16546630859375, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 7.284914970397949, "rewards/margins": 12.839152336120605, "rewards/rejected": -5.554237365722656, "step": 2696 }, { "epoch": 1.5, "learning_rate": 1.566853935276758e-07, "logits/chosen": -6.013916492462158, "logits/rejected": -5.944499969482422, "logps/chosen": -278.98675537109375, "logps/rejected": -235.30816650390625, "loss": 0.0731, "rewards/accuracies": 0.875, "rewards/chosen": 4.7223944664001465, "rewards/margins": 10.649430274963379, "rewards/rejected": -5.927035331726074, "step": 2697 }, { "epoch": 1.5, "learning_rate": 1.5635859822710317e-07, "logits/chosen": -6.0178961753845215, "logits/rejected": -5.9469451904296875, "logps/chosen": -368.7945861816406, "logps/rejected": -232.78048706054688, "loss": 0.1079, "rewards/accuracies": 0.9375, "rewards/chosen": 5.426708221435547, "rewards/margins": 13.133989334106445, "rewards/rejected": -7.70728063583374, "step": 2698 }, { "epoch": 1.5, "learning_rate": 1.5603208090299496e-07, "logits/chosen": -6.046270847320557, "logits/rejected": -5.98099422454834, "logps/chosen": -229.75704956054688, "logps/rejected": -197.41519165039062, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": 4.923891067504883, "rewards/margins": 11.540233612060547, "rewards/rejected": -6.616342544555664, "step": 2699 }, { "epoch": 1.5, "learning_rate": 1.5570584181947567e-07, "logits/chosen": -5.975000381469727, "logits/rejected": -5.9547905921936035, "logps/chosen": -216.99710083007812, "logps/rejected": -209.9534149169922, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": 2.084458112716675, "rewards/margins": 10.318975448608398, "rewards/rejected": -8.234518051147461, "step": 2700 }, { "epoch": 1.5, "learning_rate": 1.5537988124044494e-07, "logits/chosen": -5.988711833953857, "logits/rejected": -6.047671794891357, "logps/chosen": -292.6430358886719, "logps/rejected": -223.52011108398438, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": 6.500491142272949, "rewards/margins": 11.619991302490234, "rewards/rejected": -5.119500160217285, "step": 2701 }, { "epoch": 1.5, "learning_rate": 1.5505419942957664e-07, "logits/chosen": -5.951976776123047, "logits/rejected": -5.933615684509277, "logps/chosen": -293.2245178222656, "logps/rejected": -246.4137420654297, "loss": 0.0281, "rewards/accuracies": 0.9375, "rewards/chosen": 4.972593307495117, "rewards/margins": 10.360706329345703, "rewards/rejected": -5.388113021850586, "step": 2702 }, { "epoch": 1.5, "learning_rate": 1.5472879665031961e-07, "logits/chosen": -6.037505626678467, "logits/rejected": -5.923588752746582, "logps/chosen": -254.53585815429688, "logps/rejected": -117.12690734863281, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": 4.239035606384277, "rewards/margins": 11.006068229675293, "rewards/rejected": -6.767033100128174, "step": 2703 }, { "epoch": 1.5, "learning_rate": 1.5440367316589708e-07, "logits/chosen": -6.00106143951416, "logits/rejected": -5.985405921936035, "logps/chosen": -213.19593811035156, "logps/rejected": -244.3052978515625, "loss": 0.0661, "rewards/accuracies": 1.0, "rewards/chosen": 2.635725498199463, "rewards/margins": 9.331857681274414, "rewards/rejected": -6.696132659912109, "step": 2704 }, { "epoch": 1.5, "learning_rate": 1.5407882923930566e-07, "logits/chosen": -6.082079887390137, "logits/rejected": -5.987815856933594, "logps/chosen": -294.864990234375, "logps/rejected": -197.03744506835938, "loss": 0.0396, "rewards/accuracies": 0.9375, "rewards/chosen": 5.123889923095703, "rewards/margins": 12.950580596923828, "rewards/rejected": -7.826691150665283, "step": 2705 }, { "epoch": 1.5, "learning_rate": 1.5375426513331668e-07, "logits/chosen": -5.999274730682373, "logits/rejected": -5.88674783706665, "logps/chosen": -276.2969055175781, "logps/rejected": -188.52792358398438, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 5.600494384765625, "rewards/margins": 14.287010192871094, "rewards/rejected": -8.686515808105469, "step": 2706 }, { "epoch": 1.5, "learning_rate": 1.5342998111047417e-07, "logits/chosen": -5.95366907119751, "logits/rejected": -5.953266620635986, "logps/chosen": -267.54949951171875, "logps/rejected": -148.81997680664062, "loss": 0.035, "rewards/accuracies": 0.9375, "rewards/chosen": 4.716647148132324, "rewards/margins": 11.418110847473145, "rewards/rejected": -6.701464653015137, "step": 2707 }, { "epoch": 1.5, "learning_rate": 1.5310597743309683e-07, "logits/chosen": -6.078037261962891, "logits/rejected": -6.086089134216309, "logps/chosen": -159.359619140625, "logps/rejected": -139.59877014160156, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": 1.8628144264221191, "rewards/margins": 8.701065063476562, "rewards/rejected": -6.838249683380127, "step": 2708 }, { "epoch": 1.5, "learning_rate": 1.5278225436327535e-07, "logits/chosen": -6.009430408477783, "logits/rejected": -6.0141496658325195, "logps/chosen": -285.48992919921875, "logps/rejected": -268.123291015625, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": 3.5070085525512695, "rewards/margins": 15.296457290649414, "rewards/rejected": -11.789447784423828, "step": 2709 }, { "epoch": 1.5, "learning_rate": 1.524588121628743e-07, "logits/chosen": -6.045942306518555, "logits/rejected": -6.00799560546875, "logps/chosen": -226.4724578857422, "logps/rejected": -212.8748016357422, "loss": 0.042, "rewards/accuracies": 0.9375, "rewards/chosen": 2.383307456970215, "rewards/margins": 10.62232494354248, "rewards/rejected": -8.239017486572266, "step": 2710 }, { "epoch": 1.51, "learning_rate": 1.5213565109353045e-07, "logits/chosen": -5.950809478759766, "logits/rejected": -5.914853572845459, "logps/chosen": -248.465576171875, "logps/rejected": -313.8935546875, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": 4.493083953857422, "rewards/margins": 12.034433364868164, "rewards/rejected": -7.5413498878479, "step": 2711 }, { "epoch": 1.51, "learning_rate": 1.5181277141665355e-07, "logits/chosen": -6.0208868980407715, "logits/rejected": -5.905996799468994, "logps/chosen": -327.4942626953125, "logps/rejected": -282.0352783203125, "loss": 0.0292, "rewards/accuracies": 1.0, "rewards/chosen": 3.2568588256835938, "rewards/margins": 11.531147003173828, "rewards/rejected": -8.274288177490234, "step": 2712 }, { "epoch": 1.51, "learning_rate": 1.5149017339342574e-07, "logits/chosen": -5.990623474121094, "logits/rejected": -6.049631118774414, "logps/chosen": -415.6714782714844, "logps/rejected": -356.3099670410156, "loss": 0.0372, "rewards/accuracies": 0.9375, "rewards/chosen": 6.782413959503174, "rewards/margins": 14.427820205688477, "rewards/rejected": -7.645406723022461, "step": 2713 }, { "epoch": 1.51, "learning_rate": 1.511678572848009e-07, "logits/chosen": -6.2561116218566895, "logits/rejected": -6.1920599937438965, "logps/chosen": -303.5064697265625, "logps/rejected": -169.04388427734375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 3.6189098358154297, "rewards/margins": 14.082053184509277, "rewards/rejected": -10.463144302368164, "step": 2714 }, { "epoch": 1.51, "learning_rate": 1.5084582335150535e-07, "logits/chosen": -6.072682857513428, "logits/rejected": -6.118354320526123, "logps/chosen": -301.01708984375, "logps/rejected": -264.5145263671875, "loss": 0.0885, "rewards/accuracies": 1.0, "rewards/chosen": 6.266904354095459, "rewards/margins": 14.152412414550781, "rewards/rejected": -7.885508060455322, "step": 2715 }, { "epoch": 1.51, "learning_rate": 1.5052407185403693e-07, "logits/chosen": -6.065178871154785, "logits/rejected": -5.877956867218018, "logps/chosen": -270.7094421386719, "logps/rejected": -125.3255844116211, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": 2.890289068222046, "rewards/margins": 10.553412437438965, "rewards/rejected": -7.663124084472656, "step": 2716 }, { "epoch": 1.51, "learning_rate": 1.502026030526652e-07, "logits/chosen": -6.037732124328613, "logits/rejected": -6.111299514770508, "logps/chosen": -169.23220825195312, "logps/rejected": -171.16014099121094, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": 1.3506851196289062, "rewards/margins": 10.96854305267334, "rewards/rejected": -9.617857933044434, "step": 2717 }, { "epoch": 1.51, "learning_rate": 1.4988141720743074e-07, "logits/chosen": -6.173798561096191, "logits/rejected": -6.254303932189941, "logps/chosen": -280.80810546875, "logps/rejected": -310.8380126953125, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": 3.4570837020874023, "rewards/margins": 13.608133316040039, "rewards/rejected": -10.151050567626953, "step": 2718 }, { "epoch": 1.51, "learning_rate": 1.4956051457814523e-07, "logits/chosen": -6.018387317657471, "logits/rejected": -5.955741882324219, "logps/chosen": -281.49542236328125, "logps/rejected": -153.33441162109375, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": 6.118990898132324, "rewards/margins": 10.953453063964844, "rewards/rejected": -4.8344621658325195, "step": 2719 }, { "epoch": 1.51, "learning_rate": 1.4923989542439159e-07, "logits/chosen": -6.056145668029785, "logits/rejected": -5.950112342834473, "logps/chosen": -199.6468048095703, "logps/rejected": -103.28825378417969, "loss": 0.0332, "rewards/accuracies": 1.0, "rewards/chosen": 2.956143856048584, "rewards/margins": 10.764246940612793, "rewards/rejected": -7.808102607727051, "step": 2720 }, { "epoch": 1.51, "learning_rate": 1.489195600055232e-07, "logits/chosen": -5.992768287658691, "logits/rejected": -5.903437614440918, "logps/chosen": -259.4864196777344, "logps/rejected": -129.39517211914062, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": 3.4759159088134766, "rewards/margins": 11.279136657714844, "rewards/rejected": -7.803219795227051, "step": 2721 }, { "epoch": 1.51, "learning_rate": 1.4859950858066423e-07, "logits/chosen": -5.977712154388428, "logits/rejected": -5.942886829376221, "logps/chosen": -370.4947509765625, "logps/rejected": -253.19862365722656, "loss": 0.2653, "rewards/accuracies": 1.0, "rewards/chosen": 5.822406768798828, "rewards/margins": 14.19758129119873, "rewards/rejected": -8.375175476074219, "step": 2722 }, { "epoch": 1.51, "learning_rate": 1.4827974140870847e-07, "logits/chosen": -6.023708343505859, "logits/rejected": -6.018555641174316, "logps/chosen": -467.1929931640625, "logps/rejected": -447.673583984375, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 5.899956226348877, "rewards/margins": 13.717194557189941, "rewards/rejected": -7.817238807678223, "step": 2723 }, { "epoch": 1.51, "learning_rate": 1.479602587483204e-07, "logits/chosen": -5.992818355560303, "logits/rejected": -6.012350082397461, "logps/chosen": -322.15850830078125, "logps/rejected": -217.41802978515625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": 4.110634803771973, "rewards/margins": 11.623884201049805, "rewards/rejected": -7.513249397277832, "step": 2724 }, { "epoch": 1.51, "learning_rate": 1.4764106085793426e-07, "logits/chosen": -5.990987777709961, "logits/rejected": -6.110018253326416, "logps/chosen": -217.43939208984375, "logps/rejected": -229.1820831298828, "loss": 0.1165, "rewards/accuracies": 0.9375, "rewards/chosen": 1.251574993133545, "rewards/margins": 9.840919494628906, "rewards/rejected": -8.58934497833252, "step": 2725 }, { "epoch": 1.51, "learning_rate": 1.4732214799575354e-07, "logits/chosen": -5.993425369262695, "logits/rejected": -5.971353530883789, "logps/chosen": -217.13414001464844, "logps/rejected": -187.46730041503906, "loss": 0.0458, "rewards/accuracies": 0.9375, "rewards/chosen": 2.736628770828247, "rewards/margins": 9.866557121276855, "rewards/rejected": -7.129928112030029, "step": 2726 }, { "epoch": 1.51, "learning_rate": 1.4700352041975167e-07, "logits/chosen": -5.977334022521973, "logits/rejected": -5.95656681060791, "logps/chosen": -455.027587890625, "logps/rejected": -338.1572570800781, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": 5.7121453285217285, "rewards/margins": 15.992959022521973, "rewards/rejected": -10.280813217163086, "step": 2727 }, { "epoch": 1.51, "learning_rate": 1.4668517838767057e-07, "logits/chosen": -5.937817096710205, "logits/rejected": -5.958150863647461, "logps/chosen": -441.4105529785156, "logps/rejected": -563.2869873046875, "loss": 0.0303, "rewards/accuracies": 1.0, "rewards/chosen": 4.519432067871094, "rewards/margins": 16.614910125732422, "rewards/rejected": -12.095478057861328, "step": 2728 }, { "epoch": 1.52, "learning_rate": 1.4636712215702246e-07, "logits/chosen": -5.968850135803223, "logits/rejected": -6.066062927246094, "logps/chosen": -429.15771484375, "logps/rejected": -370.6822509765625, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": 3.577993869781494, "rewards/margins": 11.220863342285156, "rewards/rejected": -7.64286994934082, "step": 2729 }, { "epoch": 1.52, "learning_rate": 1.46049351985087e-07, "logits/chosen": -6.041812896728516, "logits/rejected": -6.001117706298828, "logps/chosen": -278.9946594238281, "logps/rejected": -167.41046142578125, "loss": 0.0303, "rewards/accuracies": 1.0, "rewards/chosen": 5.659119606018066, "rewards/margins": 12.527056694030762, "rewards/rejected": -6.867937088012695, "step": 2730 }, { "epoch": 1.52, "learning_rate": 1.457318681289134e-07, "logits/chosen": -6.02103328704834, "logits/rejected": -5.985163688659668, "logps/chosen": -276.3202819824219, "logps/rejected": -169.49185180664062, "loss": 0.0603, "rewards/accuracies": 1.0, "rewards/chosen": 5.266014099121094, "rewards/margins": 9.564388275146484, "rewards/rejected": -4.298374176025391, "step": 2731 }, { "epoch": 1.52, "learning_rate": 1.454146708453186e-07, "logits/chosen": -6.103213310241699, "logits/rejected": -6.064296245574951, "logps/chosen": -338.5374450683594, "logps/rejected": -333.9747314453125, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 5.114234924316406, "rewards/margins": 12.915889739990234, "rewards/rejected": -7.801654815673828, "step": 2732 }, { "epoch": 1.52, "learning_rate": 1.4509776039088818e-07, "logits/chosen": -6.057962417602539, "logits/rejected": -5.990216255187988, "logps/chosen": -202.306640625, "logps/rejected": -181.44322204589844, "loss": 0.0351, "rewards/accuracies": 0.875, "rewards/chosen": 2.4061999320983887, "rewards/margins": 10.758832931518555, "rewards/rejected": -8.352633476257324, "step": 2733 }, { "epoch": 1.52, "learning_rate": 1.4478113702197569e-07, "logits/chosen": -5.9256415367126465, "logits/rejected": -5.976197719573975, "logps/chosen": -412.39947509765625, "logps/rejected": -224.305419921875, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": 4.851347923278809, "rewards/margins": 11.763016700744629, "rewards/rejected": -6.911668300628662, "step": 2734 }, { "epoch": 1.52, "learning_rate": 1.4446480099470203e-07, "logits/chosen": -6.119576454162598, "logits/rejected": -5.925920009613037, "logps/chosen": -264.543212890625, "logps/rejected": -77.85153198242188, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/chosen": 3.5753068923950195, "rewards/margins": 10.485957145690918, "rewards/rejected": -6.91064977645874, "step": 2735 }, { "epoch": 1.52, "learning_rate": 1.4414875256495634e-07, "logits/chosen": -6.096478462219238, "logits/rejected": -6.011764049530029, "logps/chosen": -223.27105712890625, "logps/rejected": -216.85610961914062, "loss": 0.0437, "rewards/accuracies": 1.0, "rewards/chosen": 3.6513476371765137, "rewards/margins": 11.25513744354248, "rewards/rejected": -7.603789806365967, "step": 2736 }, { "epoch": 1.52, "learning_rate": 1.4383299198839415e-07, "logits/chosen": -6.028878688812256, "logits/rejected": -6.057061195373535, "logps/chosen": -229.69285583496094, "logps/rejected": -171.717529296875, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/chosen": 4.640629291534424, "rewards/margins": 12.88300895690918, "rewards/rejected": -8.242380142211914, "step": 2737 }, { "epoch": 1.52, "learning_rate": 1.435175195204394e-07, "logits/chosen": -5.90206241607666, "logits/rejected": -5.973958969116211, "logps/chosen": -258.28741455078125, "logps/rejected": -192.64585876464844, "loss": 0.0815, "rewards/accuracies": 1.0, "rewards/chosen": 6.424414157867432, "rewards/margins": 11.98512077331543, "rewards/rejected": -5.560707092285156, "step": 2738 }, { "epoch": 1.52, "learning_rate": 1.4320233541628195e-07, "logits/chosen": -6.1064863204956055, "logits/rejected": -6.156087875366211, "logps/chosen": -283.2687683105469, "logps/rejected": -236.23162841796875, "loss": 0.0457, "rewards/accuracies": 1.0, "rewards/chosen": 6.496044158935547, "rewards/margins": 16.773696899414062, "rewards/rejected": -10.277652740478516, "step": 2739 }, { "epoch": 1.52, "learning_rate": 1.4288743993087865e-07, "logits/chosen": -6.031621932983398, "logits/rejected": -6.105675220489502, "logps/chosen": -275.4445495605469, "logps/rejected": -213.6908416748047, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": 4.361866474151611, "rewards/margins": 13.006827354431152, "rewards/rejected": -8.644960403442383, "step": 2740 }, { "epoch": 1.52, "learning_rate": 1.4257283331895313e-07, "logits/chosen": -6.076555252075195, "logits/rejected": -5.99932336807251, "logps/chosen": -300.9097900390625, "logps/rejected": -225.17193603515625, "loss": 0.0161, "rewards/accuracies": 0.9375, "rewards/chosen": 4.079579830169678, "rewards/margins": 9.883621215820312, "rewards/rejected": -5.804041862487793, "step": 2741 }, { "epoch": 1.52, "learning_rate": 1.4225851583499514e-07, "logits/chosen": -5.947054862976074, "logits/rejected": -6.019962310791016, "logps/chosen": -210.63638305664062, "logps/rejected": -444.91070556640625, "loss": 0.0984, "rewards/accuracies": 1.0, "rewards/chosen": 3.4006166458129883, "rewards/margins": 12.128974914550781, "rewards/rejected": -8.72835922241211, "step": 2742 }, { "epoch": 1.52, "learning_rate": 1.4194448773326084e-07, "logits/chosen": -5.946873188018799, "logits/rejected": -5.953253269195557, "logps/chosen": -399.2258605957031, "logps/rejected": -453.0296325683594, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": 4.200654029846191, "rewards/margins": 15.029913902282715, "rewards/rejected": -10.829259872436523, "step": 2743 }, { "epoch": 1.52, "learning_rate": 1.4163074926777186e-07, "logits/chosen": -5.915032386779785, "logits/rejected": -5.996628761291504, "logps/chosen": -189.2996368408203, "logps/rejected": -448.9058837890625, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/chosen": 1.7082291841506958, "rewards/margins": 17.76930809020996, "rewards/rejected": -16.061080932617188, "step": 2744 }, { "epoch": 1.52, "learning_rate": 1.4131730069231547e-07, "logits/chosen": -6.021705150604248, "logits/rejected": -5.916335582733154, "logps/chosen": -249.93460083007812, "logps/rejected": -128.69161987304688, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": 3.7609148025512695, "rewards/margins": 11.015777587890625, "rewards/rejected": -7.2548627853393555, "step": 2745 }, { "epoch": 1.52, "learning_rate": 1.4100414226044526e-07, "logits/chosen": -5.93605375289917, "logits/rejected": -6.0107574462890625, "logps/chosen": -234.3870849609375, "logps/rejected": -234.8928680419922, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": 7.034464359283447, "rewards/margins": 12.599281311035156, "rewards/rejected": -5.564816474914551, "step": 2746 }, { "epoch": 1.53, "learning_rate": 1.4069127422547928e-07, "logits/chosen": -6.059748649597168, "logits/rejected": -6.017024993896484, "logps/chosen": -292.39703369140625, "logps/rejected": -261.5130310058594, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 3.092867851257324, "rewards/margins": 9.849364280700684, "rewards/rejected": -6.756496429443359, "step": 2747 }, { "epoch": 1.53, "learning_rate": 1.4037869684050113e-07, "logits/chosen": -6.013854503631592, "logits/rejected": -6.020479202270508, "logps/chosen": -324.195068359375, "logps/rejected": -247.6628875732422, "loss": 0.0734, "rewards/accuracies": 1.0, "rewards/chosen": 6.244766712188721, "rewards/margins": 13.65498161315918, "rewards/rejected": -7.410214900970459, "step": 2748 }, { "epoch": 1.53, "learning_rate": 1.4006641035835887e-07, "logits/chosen": -6.045251846313477, "logits/rejected": -6.164672374725342, "logps/chosen": -250.27685546875, "logps/rejected": -281.697021484375, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": 4.5798797607421875, "rewards/margins": 12.727866172790527, "rewards/rejected": -8.147985458374023, "step": 2749 }, { "epoch": 1.53, "learning_rate": 1.3975441503166574e-07, "logits/chosen": -6.207612991333008, "logits/rejected": -6.0329999923706055, "logps/chosen": -332.98486328125, "logps/rejected": -175.88059997558594, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 4.3434247970581055, "rewards/margins": 10.626733779907227, "rewards/rejected": -6.283308982849121, "step": 2750 }, { "epoch": 1.53, "learning_rate": 1.3944271111279937e-07, "logits/chosen": -5.95381498336792, "logits/rejected": -6.006143569946289, "logps/chosen": -268.89898681640625, "logps/rejected": -213.16632080078125, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": 5.328825950622559, "rewards/margins": 11.012697219848633, "rewards/rejected": -5.683870792388916, "step": 2751 }, { "epoch": 1.53, "learning_rate": 1.391312988539012e-07, "logits/chosen": -6.069829940795898, "logits/rejected": -5.970266342163086, "logps/chosen": -448.1622009277344, "logps/rejected": -129.590576171875, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": 4.682988166809082, "rewards/margins": 10.577491760253906, "rewards/rejected": -5.894503593444824, "step": 2752 }, { "epoch": 1.53, "learning_rate": 1.3882017850687723e-07, "logits/chosen": -6.037511825561523, "logits/rejected": -6.022702693939209, "logps/chosen": -188.43582153320312, "logps/rejected": -130.246826171875, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": 3.2463202476501465, "rewards/margins": 9.978192329406738, "rewards/rejected": -6.731871128082275, "step": 2753 }, { "epoch": 1.53, "learning_rate": 1.3850935032339716e-07, "logits/chosen": -5.998416423797607, "logits/rejected": -5.966304779052734, "logps/chosen": -467.19818115234375, "logps/rejected": -286.58673095703125, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": 6.793298721313477, "rewards/margins": 13.644851684570312, "rewards/rejected": -6.851553440093994, "step": 2754 }, { "epoch": 1.53, "learning_rate": 1.3819881455489456e-07, "logits/chosen": -6.0796895027160645, "logits/rejected": -6.054487705230713, "logps/chosen": -226.21600341796875, "logps/rejected": -129.5921630859375, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": 3.779536008834839, "rewards/margins": 9.384248733520508, "rewards/rejected": -5.604712963104248, "step": 2755 }, { "epoch": 1.53, "learning_rate": 1.37888571452566e-07, "logits/chosen": -6.045642852783203, "logits/rejected": -6.078470230102539, "logps/chosen": -249.5653076171875, "logps/rejected": -219.66781616210938, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 4.450311660766602, "rewards/margins": 10.608003616333008, "rewards/rejected": -6.157691955566406, "step": 2756 }, { "epoch": 1.53, "learning_rate": 1.3757862126737184e-07, "logits/chosen": -6.027272701263428, "logits/rejected": -6.0814056396484375, "logps/chosen": -290.5831604003906, "logps/rejected": -277.333740234375, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": 5.626577854156494, "rewards/margins": 14.619644165039062, "rewards/rejected": -8.993066787719727, "step": 2757 }, { "epoch": 1.53, "learning_rate": 1.3726896425003493e-07, "logits/chosen": -5.970977306365967, "logits/rejected": -6.032231330871582, "logps/chosen": -236.6807098388672, "logps/rejected": -227.96974182128906, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": 4.39860725402832, "rewards/margins": 13.244943618774414, "rewards/rejected": -8.846335411071777, "step": 2758 }, { "epoch": 1.53, "learning_rate": 1.369596006510414e-07, "logits/chosen": -5.99475622177124, "logits/rejected": -6.089081764221191, "logps/chosen": -270.7714538574219, "logps/rejected": -312.5848693847656, "loss": 0.0318, "rewards/accuracies": 0.9375, "rewards/chosen": 4.603184223175049, "rewards/margins": 13.477392196655273, "rewards/rejected": -8.874207496643066, "step": 2759 }, { "epoch": 1.53, "learning_rate": 1.3665053072064013e-07, "logits/chosen": -6.02227783203125, "logits/rejected": -6.059125900268555, "logps/chosen": -355.25311279296875, "logps/rejected": -297.07415771484375, "loss": 0.0515, "rewards/accuracies": 0.9375, "rewards/chosen": 6.599355220794678, "rewards/margins": 15.310197830200195, "rewards/rejected": -8.710844039916992, "step": 2760 }, { "epoch": 1.53, "learning_rate": 1.363417547088419e-07, "logits/chosen": -5.955930709838867, "logits/rejected": -6.11051082611084, "logps/chosen": -155.2777862548828, "logps/rejected": -240.10018920898438, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": 2.469320297241211, "rewards/margins": 10.098979949951172, "rewards/rejected": -7.629659652709961, "step": 2761 }, { "epoch": 1.53, "learning_rate": 1.3603327286542022e-07, "logits/chosen": -5.9304914474487305, "logits/rejected": -6.0305681228637695, "logps/chosen": -287.61468505859375, "logps/rejected": -263.94940185546875, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": 3.710242748260498, "rewards/margins": 13.154425621032715, "rewards/rejected": -9.444183349609375, "step": 2762 }, { "epoch": 1.53, "learning_rate": 1.357250854399104e-07, "logits/chosen": -6.036404609680176, "logits/rejected": -6.077338695526123, "logps/chosen": -456.35101318359375, "logps/rejected": -394.90081787109375, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": 5.281140327453613, "rewards/margins": 13.542716979980469, "rewards/rejected": -8.261575698852539, "step": 2763 }, { "epoch": 1.53, "learning_rate": 1.3541719268160988e-07, "logits/chosen": -5.988699913024902, "logits/rejected": -5.969539642333984, "logps/chosen": -231.8837127685547, "logps/rejected": -151.84291076660156, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": 5.644664764404297, "rewards/margins": 11.746066093444824, "rewards/rejected": -6.101401329040527, "step": 2764 }, { "epoch": 1.54, "learning_rate": 1.3510959483957745e-07, "logits/chosen": -6.012862682342529, "logits/rejected": -6.002038478851318, "logps/chosen": -324.34527587890625, "logps/rejected": -335.35943603515625, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": 6.035346031188965, "rewards/margins": 10.768843650817871, "rewards/rejected": -4.733497619628906, "step": 2765 }, { "epoch": 1.54, "learning_rate": 1.34802292162633e-07, "logits/chosen": -6.082261562347412, "logits/rejected": -6.024072170257568, "logps/chosen": -184.8409423828125, "logps/rejected": -195.43734741210938, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 3.0850141048431396, "rewards/margins": 12.52830696105957, "rewards/rejected": -9.443293571472168, "step": 2766 }, { "epoch": 1.54, "learning_rate": 1.344952848993588e-07, "logits/chosen": -6.125243663787842, "logits/rejected": -6.017767429351807, "logps/chosen": -240.71522521972656, "logps/rejected": -137.84999084472656, "loss": 0.0438, "rewards/accuracies": 1.0, "rewards/chosen": 5.505758285522461, "rewards/margins": 11.621942520141602, "rewards/rejected": -6.116184234619141, "step": 2767 }, { "epoch": 1.54, "learning_rate": 1.3418857329809686e-07, "logits/chosen": -6.024812698364258, "logits/rejected": -5.91227388381958, "logps/chosen": -272.911376953125, "logps/rejected": -165.77635192871094, "loss": 0.0297, "rewards/accuracies": 0.9375, "rewards/chosen": 3.4981114864349365, "rewards/margins": 9.416009902954102, "rewards/rejected": -5.917897701263428, "step": 2768 }, { "epoch": 1.54, "learning_rate": 1.3388215760695098e-07, "logits/chosen": -6.021203994750977, "logits/rejected": -6.079339504241943, "logps/chosen": -205.21200561523438, "logps/rejected": -239.83489990234375, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": 3.504810333251953, "rewards/margins": 13.409347534179688, "rewards/rejected": -9.904536247253418, "step": 2769 }, { "epoch": 1.54, "learning_rate": 1.3357603807378492e-07, "logits/chosen": -6.062882423400879, "logits/rejected": -5.881381034851074, "logps/chosen": -396.4107971191406, "logps/rejected": -258.81927490234375, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": 5.9955644607543945, "rewards/margins": 13.092782974243164, "rewards/rejected": -7.0972185134887695, "step": 2770 }, { "epoch": 1.54, "learning_rate": 1.3327021494622327e-07, "logits/chosen": -6.132093906402588, "logits/rejected": -6.0301103591918945, "logps/chosen": -241.78448486328125, "logps/rejected": -157.74118041992188, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": 3.770638942718506, "rewards/margins": 11.553764343261719, "rewards/rejected": -7.783125877380371, "step": 2771 }, { "epoch": 1.54, "learning_rate": 1.3296468847165093e-07, "logits/chosen": -6.010432243347168, "logits/rejected": -6.1243510246276855, "logps/chosen": -259.5086364746094, "logps/rejected": -230.49806213378906, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": 4.715231895446777, "rewards/margins": 13.80246353149414, "rewards/rejected": -9.087230682373047, "step": 2772 }, { "epoch": 1.54, "learning_rate": 1.326594588972123e-07, "logits/chosen": -6.056521415710449, "logits/rejected": -6.044638156890869, "logps/chosen": -262.4285888671875, "logps/rejected": -287.13818359375, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": 3.997565269470215, "rewards/margins": 15.032721519470215, "rewards/rejected": -11.035157203674316, "step": 2773 }, { "epoch": 1.54, "learning_rate": 1.3235452646981222e-07, "logits/chosen": -5.975808143615723, "logits/rejected": -5.9725341796875, "logps/chosen": -283.40069580078125, "logps/rejected": -334.6389465332031, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": 2.980783700942993, "rewards/margins": 11.991636276245117, "rewards/rejected": -9.010852813720703, "step": 2774 }, { "epoch": 1.54, "learning_rate": 1.320498914361148e-07, "logits/chosen": -5.905922889709473, "logits/rejected": -5.904779434204102, "logps/chosen": -367.72332763671875, "logps/rejected": -379.4211120605469, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": 9.224478721618652, "rewards/margins": 16.179393768310547, "rewards/rejected": -6.954916000366211, "step": 2775 }, { "epoch": 1.54, "learning_rate": 1.317455540425439e-07, "logits/chosen": -6.031130790710449, "logits/rejected": -6.136335372924805, "logps/chosen": -204.1639404296875, "logps/rejected": -277.5942687988281, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": 1.3445358276367188, "rewards/margins": 14.047225952148438, "rewards/rejected": -12.702689170837402, "step": 2776 }, { "epoch": 1.54, "learning_rate": 1.3144151453528208e-07, "logits/chosen": -6.045711517333984, "logits/rejected": -6.056891441345215, "logps/chosen": -197.36184692382812, "logps/rejected": -163.22708129882812, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 4.592886447906494, "rewards/margins": 10.576175689697266, "rewards/rejected": -5.98328971862793, "step": 2777 }, { "epoch": 1.54, "learning_rate": 1.311377731602716e-07, "logits/chosen": -5.995922565460205, "logits/rejected": -6.114121913909912, "logps/chosen": -217.07127380371094, "logps/rejected": -266.2978820800781, "loss": 0.06, "rewards/accuracies": 1.0, "rewards/chosen": 2.7376270294189453, "rewards/margins": 12.900030136108398, "rewards/rejected": -10.162403106689453, "step": 2778 }, { "epoch": 1.54, "learning_rate": 1.3083433016321287e-07, "logits/chosen": -6.096219539642334, "logits/rejected": -6.047628879547119, "logps/chosen": -381.5577392578125, "logps/rejected": -232.4395751953125, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": 4.798701286315918, "rewards/margins": 12.023053169250488, "rewards/rejected": -7.2243523597717285, "step": 2779 }, { "epoch": 1.54, "learning_rate": 1.3053118578956552e-07, "logits/chosen": -6.0475969314575195, "logits/rejected": -6.006044387817383, "logps/chosen": -314.4101257324219, "logps/rejected": -231.78038024902344, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": 6.765412330627441, "rewards/margins": 13.795330047607422, "rewards/rejected": -7.029919147491455, "step": 2780 }, { "epoch": 1.54, "learning_rate": 1.3022834028454744e-07, "logits/chosen": -6.012678623199463, "logits/rejected": -6.197514057159424, "logps/chosen": -214.91244506835938, "logps/rejected": -207.91183471679688, "loss": 0.0882, "rewards/accuracies": 1.0, "rewards/chosen": 2.382631301879883, "rewards/margins": 10.868974685668945, "rewards/rejected": -8.486344337463379, "step": 2781 }, { "epoch": 1.54, "learning_rate": 1.2992579389313447e-07, "logits/chosen": -5.956023216247559, "logits/rejected": -5.960333824157715, "logps/chosen": -165.83770751953125, "logps/rejected": -315.8829040527344, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": 3.1089680194854736, "rewards/margins": 13.034099578857422, "rewards/rejected": -9.925130844116211, "step": 2782 }, { "epoch": 1.55, "learning_rate": 1.2962354686006083e-07, "logits/chosen": -5.98984432220459, "logits/rejected": -5.982302665710449, "logps/chosen": -218.40870666503906, "logps/rejected": -194.31649780273438, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": 2.6288018226623535, "rewards/margins": 10.906102180480957, "rewards/rejected": -8.277300834655762, "step": 2783 }, { "epoch": 1.55, "learning_rate": 1.2932159942981858e-07, "logits/chosen": -6.031997203826904, "logits/rejected": -6.025669097900391, "logps/chosen": -253.53981018066406, "logps/rejected": -348.37335205078125, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": 3.9157698154449463, "rewards/margins": 12.235331535339355, "rewards/rejected": -8.319561004638672, "step": 2784 }, { "epoch": 1.55, "learning_rate": 1.2901995184665737e-07, "logits/chosen": -6.094098091125488, "logits/rejected": -6.080342769622803, "logps/chosen": -264.85504150390625, "logps/rejected": -158.12008666992188, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": 5.370887756347656, "rewards/margins": 10.99645709991455, "rewards/rejected": -5.625570297241211, "step": 2785 }, { "epoch": 1.55, "learning_rate": 1.2871860435458416e-07, "logits/chosen": -5.9730329513549805, "logits/rejected": -6.099792003631592, "logps/chosen": -257.21929931640625, "logps/rejected": -324.6011962890625, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": 1.9200817346572876, "rewards/margins": 12.699989318847656, "rewards/rejected": -10.779908180236816, "step": 2786 }, { "epoch": 1.55, "learning_rate": 1.2841755719736298e-07, "logits/chosen": -6.1736626625061035, "logits/rejected": -6.104909420013428, "logps/chosen": -391.67657470703125, "logps/rejected": -240.5519256591797, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": 7.144718170166016, "rewards/margins": 12.588987350463867, "rewards/rejected": -5.44426965713501, "step": 2787 }, { "epoch": 1.55, "learning_rate": 1.2811681061851575e-07, "logits/chosen": -5.933032512664795, "logits/rejected": -5.973757266998291, "logps/chosen": -305.1463623046875, "logps/rejected": -151.46853637695312, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": 6.328857421875, "rewards/margins": 11.621403694152832, "rewards/rejected": -5.292546272277832, "step": 2788 }, { "epoch": 1.55, "learning_rate": 1.278163648613203e-07, "logits/chosen": -6.05722713470459, "logits/rejected": -5.943060874938965, "logps/chosen": -292.36712646484375, "logps/rejected": -154.46884155273438, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": 6.34999418258667, "rewards/margins": 12.758651733398438, "rewards/rejected": -6.408657073974609, "step": 2789 }, { "epoch": 1.55, "learning_rate": 1.275162201688118e-07, "logits/chosen": -5.992911338806152, "logits/rejected": -5.987405776977539, "logps/chosen": -247.32345581054688, "logps/rejected": -191.55075073242188, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 3.9656944274902344, "rewards/margins": 11.600057601928711, "rewards/rejected": -7.634364128112793, "step": 2790 }, { "epoch": 1.55, "learning_rate": 1.2721637678378132e-07, "logits/chosen": -5.989652633666992, "logits/rejected": -6.021525859832764, "logps/chosen": -302.0589904785156, "logps/rejected": -220.26634216308594, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 5.566849708557129, "rewards/margins": 12.943299293518066, "rewards/rejected": -7.376450061798096, "step": 2791 }, { "epoch": 1.55, "learning_rate": 1.269168349487766e-07, "logits/chosen": -6.13615083694458, "logits/rejected": -6.006818771362305, "logps/chosen": -386.24334716796875, "logps/rejected": -370.95794677734375, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": 4.4459638595581055, "rewards/margins": 11.645873069763184, "rewards/rejected": -7.199909210205078, "step": 2792 }, { "epoch": 1.55, "learning_rate": 1.2661759490610146e-07, "logits/chosen": -6.1174163818359375, "logits/rejected": -6.051069259643555, "logps/chosen": -276.9567565917969, "logps/rejected": -163.99298095703125, "loss": 0.0551, "rewards/accuracies": 0.9375, "rewards/chosen": 2.80507493019104, "rewards/margins": 9.547674179077148, "rewards/rejected": -6.7425994873046875, "step": 2793 }, { "epoch": 1.55, "learning_rate": 1.2631865689781517e-07, "logits/chosen": -6.018803596496582, "logits/rejected": -6.16185998916626, "logps/chosen": -380.5386962890625, "logps/rejected": -275.27154541015625, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 5.695296287536621, "rewards/margins": 11.66894817352295, "rewards/rejected": -5.973651885986328, "step": 2794 }, { "epoch": 1.55, "learning_rate": 1.2602002116573323e-07, "logits/chosen": -5.959576606750488, "logits/rejected": -5.996778964996338, "logps/chosen": -219.55364990234375, "logps/rejected": -166.13088989257812, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": 4.738890647888184, "rewards/margins": 9.976837158203125, "rewards/rejected": -5.237946510314941, "step": 2795 }, { "epoch": 1.55, "learning_rate": 1.2572168795142623e-07, "logits/chosen": -6.04942512512207, "logits/rejected": -6.02132511138916, "logps/chosen": -270.4757385253906, "logps/rejected": -152.97329711914062, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": 5.587876319885254, "rewards/margins": 11.959362030029297, "rewards/rejected": -6.371485710144043, "step": 2796 }, { "epoch": 1.55, "learning_rate": 1.2542365749622046e-07, "logits/chosen": -6.032748699188232, "logits/rejected": -6.040210723876953, "logps/chosen": -279.1451416015625, "logps/rejected": -218.99957275390625, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": 2.900317668914795, "rewards/margins": 11.961423873901367, "rewards/rejected": -9.06110668182373, "step": 2797 }, { "epoch": 1.55, "learning_rate": 1.2512593004119665e-07, "logits/chosen": -6.082256317138672, "logits/rejected": -6.16755485534668, "logps/chosen": -222.51803588867188, "logps/rejected": -196.4034881591797, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": 2.5895261764526367, "rewards/margins": 9.950223922729492, "rewards/rejected": -7.3606977462768555, "step": 2798 }, { "epoch": 1.55, "learning_rate": 1.248285058271911e-07, "logits/chosen": -6.049485206604004, "logits/rejected": -6.017448425292969, "logps/chosen": -322.1624755859375, "logps/rejected": -210.3543701171875, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": 4.276023864746094, "rewards/margins": 11.462790489196777, "rewards/rejected": -7.186766624450684, "step": 2799 }, { "epoch": 1.55, "learning_rate": 1.2453138509479438e-07, "logits/chosen": -6.070240020751953, "logits/rejected": -6.086127281188965, "logps/chosen": -209.15199279785156, "logps/rejected": -186.65933227539062, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": 3.1869237422943115, "rewards/margins": 12.177387237548828, "rewards/rejected": -8.990463256835938, "step": 2800 }, { "epoch": 1.56, "learning_rate": 1.2423456808435167e-07, "logits/chosen": -5.858648777008057, "logits/rejected": -5.889249801635742, "logps/chosen": -420.12701416015625, "logps/rejected": -340.9334716796875, "loss": 0.0359, "rewards/accuracies": 1.0, "rewards/chosen": 7.499903678894043, "rewards/margins": 12.304803848266602, "rewards/rejected": -4.804901599884033, "step": 2801 }, { "epoch": 1.56, "learning_rate": 1.2393805503596276e-07, "logits/chosen": -5.919240951538086, "logits/rejected": -6.000970840454102, "logps/chosen": -232.34642028808594, "logps/rejected": -260.0671081542969, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": 3.860440731048584, "rewards/margins": 15.516359329223633, "rewards/rejected": -11.65591812133789, "step": 2802 }, { "epoch": 1.56, "learning_rate": 1.236418461894811e-07, "logits/chosen": -6.067031383514404, "logits/rejected": -6.071226119995117, "logps/chosen": -231.01588439941406, "logps/rejected": -219.6621856689453, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": 4.417662620544434, "rewards/margins": 13.054545402526855, "rewards/rejected": -8.636882781982422, "step": 2803 }, { "epoch": 1.56, "learning_rate": 1.2334594178451424e-07, "logits/chosen": -6.066131591796875, "logits/rejected": -5.927060604095459, "logps/chosen": -247.66827392578125, "logps/rejected": -196.3426055908203, "loss": 0.0471, "rewards/accuracies": 1.0, "rewards/chosen": 4.374057769775391, "rewards/margins": 11.119134902954102, "rewards/rejected": -6.745076656341553, "step": 2804 }, { "epoch": 1.56, "learning_rate": 1.2305034206042376e-07, "logits/chosen": -5.966875076293945, "logits/rejected": -6.060980319976807, "logps/chosen": -163.94589233398438, "logps/rejected": -258.5101013183594, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": 1.8532794713974, "rewards/margins": 12.137706756591797, "rewards/rejected": -10.28442668914795, "step": 2805 }, { "epoch": 1.56, "learning_rate": 1.227550472563242e-07, "logits/chosen": -6.195480823516846, "logits/rejected": -6.156513690948486, "logps/chosen": -253.54486083984375, "logps/rejected": -212.48143005371094, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 4.381162166595459, "rewards/margins": 11.087835311889648, "rewards/rejected": -6.706673622131348, "step": 2806 }, { "epoch": 1.56, "learning_rate": 1.2246005761108408e-07, "logits/chosen": -5.935011386871338, "logits/rejected": -5.930628776550293, "logps/chosen": -281.6213073730469, "logps/rejected": -271.01983642578125, "loss": 0.0605, "rewards/accuracies": 0.9375, "rewards/chosen": 3.9554083347320557, "rewards/margins": 10.392202377319336, "rewards/rejected": -6.436793804168701, "step": 2807 }, { "epoch": 1.56, "learning_rate": 1.2216537336332434e-07, "logits/chosen": -5.991063594818115, "logits/rejected": -5.974117279052734, "logps/chosen": -420.39666748046875, "logps/rejected": -503.5919494628906, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": 6.481162071228027, "rewards/margins": 17.92129898071289, "rewards/rejected": -11.44013500213623, "step": 2808 }, { "epoch": 1.56, "learning_rate": 1.218709947514198e-07, "logits/chosen": -6.0075225830078125, "logits/rejected": -6.0423150062561035, "logps/chosen": -225.23483276367188, "logps/rejected": -225.67864990234375, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": 4.510097026824951, "rewards/margins": 10.421993255615234, "rewards/rejected": -5.9118971824646, "step": 2809 }, { "epoch": 1.56, "learning_rate": 1.2157692201349716e-07, "logits/chosen": -6.002410888671875, "logits/rejected": -6.031530857086182, "logps/chosen": -207.69822692871094, "logps/rejected": -275.82806396484375, "loss": 0.0407, "rewards/accuracies": 1.0, "rewards/chosen": 2.3352065086364746, "rewards/margins": 12.145207405090332, "rewards/rejected": -9.810001373291016, "step": 2810 }, { "epoch": 1.56, "learning_rate": 1.2128315538743644e-07, "logits/chosen": -5.993069648742676, "logits/rejected": -6.002892017364502, "logps/chosen": -391.827880859375, "logps/rejected": -335.18731689453125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": 4.800339221954346, "rewards/margins": 13.194023132324219, "rewards/rejected": -8.393684387207031, "step": 2811 }, { "epoch": 1.56, "learning_rate": 1.2098969511086926e-07, "logits/chosen": -6.063396453857422, "logits/rejected": -6.0609893798828125, "logps/chosen": -331.499755859375, "logps/rejected": -230.48216247558594, "loss": 0.036, "rewards/accuracies": 0.9375, "rewards/chosen": 5.227898597717285, "rewards/margins": 9.445962905883789, "rewards/rejected": -4.218064785003662, "step": 2812 }, { "epoch": 1.56, "learning_rate": 1.2069654142118013e-07, "logits/chosen": -6.0562744140625, "logits/rejected": -6.006141662597656, "logps/chosen": -275.0367736816406, "logps/rejected": -210.71627807617188, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": 3.0532305240631104, "rewards/margins": 12.841581344604492, "rewards/rejected": -9.788352012634277, "step": 2813 }, { "epoch": 1.56, "learning_rate": 1.204036945555053e-07, "logits/chosen": -6.060159206390381, "logits/rejected": -5.991422176361084, "logps/chosen": -323.3311767578125, "logps/rejected": -307.7439880371094, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": 4.1084771156311035, "rewards/margins": 15.83923053741455, "rewards/rejected": -11.730753898620605, "step": 2814 }, { "epoch": 1.56, "learning_rate": 1.2011115475073265e-07, "logits/chosen": -5.905133247375488, "logits/rejected": -5.949141502380371, "logps/chosen": -327.7174072265625, "logps/rejected": -232.52386474609375, "loss": 0.0346, "rewards/accuracies": 1.0, "rewards/chosen": 4.44539737701416, "rewards/margins": 10.94782543182373, "rewards/rejected": -6.50242805480957, "step": 2815 }, { "epoch": 1.56, "learning_rate": 1.1981892224350194e-07, "logits/chosen": -5.962993621826172, "logits/rejected": -5.846383094787598, "logps/chosen": -207.9647216796875, "logps/rejected": -175.2650604248047, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": 3.1769309043884277, "rewards/margins": 10.694282531738281, "rewards/rejected": -7.517351150512695, "step": 2816 }, { "epoch": 1.56, "learning_rate": 1.1952699727020393e-07, "logits/chosen": -6.11765718460083, "logits/rejected": -5.975888252258301, "logps/chosen": -143.46658325195312, "logps/rejected": -149.87083435058594, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": 2.305647850036621, "rewards/margins": 10.018367767333984, "rewards/rejected": -7.712718963623047, "step": 2817 }, { "epoch": 1.56, "learning_rate": 1.1923538006698154e-07, "logits/chosen": -6.015953063964844, "logits/rejected": -6.047368049621582, "logps/chosen": -360.1620178222656, "logps/rejected": -312.6199035644531, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 3.738555431365967, "rewards/margins": 14.402993202209473, "rewards/rejected": -10.664438247680664, "step": 2818 }, { "epoch": 1.57, "learning_rate": 1.1894407086972774e-07, "logits/chosen": -6.012429237365723, "logits/rejected": -5.93794059753418, "logps/chosen": -261.03662109375, "logps/rejected": -145.80325317382812, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 4.912535190582275, "rewards/margins": 11.101301193237305, "rewards/rejected": -6.188766002655029, "step": 2819 }, { "epoch": 1.57, "learning_rate": 1.1865306991408657e-07, "logits/chosen": -5.893239974975586, "logits/rejected": -5.918240547180176, "logps/chosen": -240.63380432128906, "logps/rejected": -224.077880859375, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": 3.9503440856933594, "rewards/margins": 12.152474403381348, "rewards/rejected": -8.202129364013672, "step": 2820 }, { "epoch": 1.57, "learning_rate": 1.1836237743545308e-07, "logits/chosen": -5.946480751037598, "logits/rejected": -5.991326332092285, "logps/chosen": -455.52349853515625, "logps/rejected": -285.02508544921875, "loss": 0.0631, "rewards/accuracies": 1.0, "rewards/chosen": 5.996417999267578, "rewards/margins": 12.484256744384766, "rewards/rejected": -6.4878387451171875, "step": 2821 }, { "epoch": 1.57, "learning_rate": 1.1807199366897258e-07, "logits/chosen": -5.997977256774902, "logits/rejected": -6.129364013671875, "logps/chosen": -289.28564453125, "logps/rejected": -296.075927734375, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": 5.471940517425537, "rewards/margins": 13.22244644165039, "rewards/rejected": -7.750505447387695, "step": 2822 }, { "epoch": 1.57, "learning_rate": 1.1778191884954076e-07, "logits/chosen": -6.07027006149292, "logits/rejected": -6.12927770614624, "logps/chosen": -203.92501831054688, "logps/rejected": -270.1787109375, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 3.569019079208374, "rewards/margins": 14.588720321655273, "rewards/rejected": -11.01970100402832, "step": 2823 }, { "epoch": 1.57, "learning_rate": 1.1749215321180312e-07, "logits/chosen": -6.071409225463867, "logits/rejected": -6.1526265144348145, "logps/chosen": -214.88595581054688, "logps/rejected": -252.93508911132812, "loss": 0.0348, "rewards/accuracies": 1.0, "rewards/chosen": 1.7732954025268555, "rewards/margins": 11.307409286499023, "rewards/rejected": -9.534114837646484, "step": 2824 }, { "epoch": 1.57, "learning_rate": 1.1720269699015529e-07, "logits/chosen": -6.119973182678223, "logits/rejected": -6.09299898147583, "logps/chosen": -301.036865234375, "logps/rejected": -313.77410888671875, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": 4.632352828979492, "rewards/margins": 15.483612060546875, "rewards/rejected": -10.8512601852417, "step": 2825 }, { "epoch": 1.57, "learning_rate": 1.1691355041874273e-07, "logits/chosen": -6.147007942199707, "logits/rejected": -6.066320419311523, "logps/chosen": -317.861328125, "logps/rejected": -377.66717529296875, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": 5.198760509490967, "rewards/margins": 16.36918067932129, "rewards/rejected": -11.170419692993164, "step": 2826 }, { "epoch": 1.57, "learning_rate": 1.1662471373145999e-07, "logits/chosen": -5.983674049377441, "logits/rejected": -6.036198616027832, "logps/chosen": -234.08514404296875, "logps/rejected": -249.75155639648438, "loss": 0.065, "rewards/accuracies": 0.9375, "rewards/chosen": 3.761202335357666, "rewards/margins": 11.615625381469727, "rewards/rejected": -7.854422092437744, "step": 2827 }, { "epoch": 1.57, "learning_rate": 1.1633618716195154e-07, "logits/chosen": -5.956722259521484, "logits/rejected": -5.900917053222656, "logps/chosen": -322.78814697265625, "logps/rejected": -162.36962890625, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": 6.742478370666504, "rewards/margins": 9.875818252563477, "rewards/rejected": -3.1333396434783936, "step": 2828 }, { "epoch": 1.57, "learning_rate": 1.160479709436103e-07, "logits/chosen": -6.052881240844727, "logits/rejected": -6.00319242477417, "logps/chosen": -173.44155883789062, "logps/rejected": -148.0575714111328, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/chosen": 3.061885356903076, "rewards/margins": 10.859334945678711, "rewards/rejected": -7.797450065612793, "step": 2829 }, { "epoch": 1.57, "learning_rate": 1.1576006530957865e-07, "logits/chosen": -6.0821332931518555, "logits/rejected": -5.951809883117676, "logps/chosen": -260.00811767578125, "logps/rejected": -165.48712158203125, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": 3.9461898803710938, "rewards/margins": 9.689714431762695, "rewards/rejected": -5.743524551391602, "step": 2830 }, { "epoch": 1.57, "learning_rate": 1.1547247049274767e-07, "logits/chosen": -5.971907615661621, "logits/rejected": -6.024977207183838, "logps/chosen": -327.8743896484375, "logps/rejected": -260.45855712890625, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": 5.21850061416626, "rewards/margins": 14.254093170166016, "rewards/rejected": -9.035593032836914, "step": 2831 }, { "epoch": 1.57, "learning_rate": 1.15185186725757e-07, "logits/chosen": -6.133383750915527, "logits/rejected": -6.06072473526001, "logps/chosen": -263.5525207519531, "logps/rejected": -291.08660888671875, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": 2.618239402770996, "rewards/margins": 12.319585800170898, "rewards/rejected": -9.701345443725586, "step": 2832 }, { "epoch": 1.57, "learning_rate": 1.1489821424099438e-07, "logits/chosen": -5.9849090576171875, "logits/rejected": -6.011178970336914, "logps/chosen": -307.1939697265625, "logps/rejected": -194.25283813476562, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": 3.04742431640625, "rewards/margins": 12.705891609191895, "rewards/rejected": -9.658468246459961, "step": 2833 }, { "epoch": 1.57, "learning_rate": 1.1461155327059607e-07, "logits/chosen": -6.018040657043457, "logits/rejected": -6.070941925048828, "logps/chosen": -360.3785705566406, "logps/rejected": -275.95123291015625, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": 5.338044166564941, "rewards/margins": 12.854649543762207, "rewards/rejected": -7.516604423522949, "step": 2834 }, { "epoch": 1.57, "learning_rate": 1.1432520404644641e-07, "logits/chosen": -5.910093307495117, "logits/rejected": -5.939589500427246, "logps/chosen": -179.50613403320312, "logps/rejected": -150.83152770996094, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": 3.7841672897338867, "rewards/margins": 12.374728202819824, "rewards/rejected": -8.590559959411621, "step": 2835 }, { "epoch": 1.57, "learning_rate": 1.1403916680017711e-07, "logits/chosen": -5.983973979949951, "logits/rejected": -6.085618019104004, "logps/chosen": -233.5832977294922, "logps/rejected": -263.99896240234375, "loss": 0.0185, "rewards/accuracies": 0.9375, "rewards/chosen": 3.915897846221924, "rewards/margins": 13.452802658081055, "rewards/rejected": -9.536904335021973, "step": 2836 }, { "epoch": 1.58, "learning_rate": 1.1375344176316815e-07, "logits/chosen": -6.045503616333008, "logits/rejected": -6.096192359924316, "logps/chosen": -281.2466735839844, "logps/rejected": -185.76344299316406, "loss": 0.0479, "rewards/accuracies": 0.9375, "rewards/chosen": 4.870368957519531, "rewards/margins": 9.900390625, "rewards/rejected": -5.0300211906433105, "step": 2837 }, { "epoch": 1.58, "learning_rate": 1.1346802916654607e-07, "logits/chosen": -6.018929958343506, "logits/rejected": -5.908872127532959, "logps/chosen": -308.10174560546875, "logps/rejected": -160.54122924804688, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": 6.436609268188477, "rewards/margins": 13.04667854309082, "rewards/rejected": -6.610068321228027, "step": 2838 }, { "epoch": 1.58, "learning_rate": 1.1318292924118584e-07, "logits/chosen": -6.036866188049316, "logits/rejected": -5.931656837463379, "logps/chosen": -241.36865234375, "logps/rejected": -191.71051025390625, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 5.350590705871582, "rewards/margins": 12.635456085205078, "rewards/rejected": -7.2848639488220215, "step": 2839 }, { "epoch": 1.58, "learning_rate": 1.128981422177086e-07, "logits/chosen": -5.867807865142822, "logits/rejected": -5.929267883300781, "logps/chosen": -482.4255065917969, "logps/rejected": -241.0175323486328, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": 7.905529022216797, "rewards/margins": 12.175821304321289, "rewards/rejected": -4.270291805267334, "step": 2840 }, { "epoch": 1.58, "learning_rate": 1.1261366832648255e-07, "logits/chosen": -6.020145416259766, "logits/rejected": -6.107805252075195, "logps/chosen": -191.68231201171875, "logps/rejected": -222.78350830078125, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": 2.3349385261535645, "rewards/margins": 10.612297058105469, "rewards/rejected": -8.277359008789062, "step": 2841 }, { "epoch": 1.58, "learning_rate": 1.1232950779762284e-07, "logits/chosen": -6.199501037597656, "logits/rejected": -6.073214054107666, "logps/chosen": -323.1374206542969, "logps/rejected": -136.99761962890625, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": 5.3144307136535645, "rewards/margins": 14.145458221435547, "rewards/rejected": -8.83102798461914, "step": 2842 }, { "epoch": 1.58, "learning_rate": 1.1204566086099099e-07, "logits/chosen": -6.039080619812012, "logits/rejected": -6.00943660736084, "logps/chosen": -338.1979064941406, "logps/rejected": -244.34854125976562, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": 5.394972801208496, "rewards/margins": 12.913909912109375, "rewards/rejected": -7.518937110900879, "step": 2843 }, { "epoch": 1.58, "learning_rate": 1.1176212774619497e-07, "logits/chosen": -5.991674423217773, "logits/rejected": -6.014147758483887, "logps/chosen": -243.31800842285156, "logps/rejected": -285.04827880859375, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": 3.7412471771240234, "rewards/margins": 12.65972900390625, "rewards/rejected": -8.918481826782227, "step": 2844 }, { "epoch": 1.58, "learning_rate": 1.1147890868258853e-07, "logits/chosen": -5.822721004486084, "logits/rejected": -6.060988426208496, "logps/chosen": -307.89227294921875, "logps/rejected": -332.9800720214844, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": 4.189206600189209, "rewards/margins": 14.039691925048828, "rewards/rejected": -9.850485801696777, "step": 2845 }, { "epoch": 1.58, "learning_rate": 1.1119600389927181e-07, "logits/chosen": -5.958341598510742, "logits/rejected": -6.131561279296875, "logps/chosen": -226.1390838623047, "logps/rejected": -261.664794921875, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": 4.086145877838135, "rewards/margins": 11.098764419555664, "rewards/rejected": -7.012618541717529, "step": 2846 }, { "epoch": 1.58, "learning_rate": 1.1091341362509066e-07, "logits/chosen": -6.052499294281006, "logits/rejected": -6.000421524047852, "logps/chosen": -241.6478271484375, "logps/rejected": -249.72288513183594, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": 5.659179210662842, "rewards/margins": 13.672660827636719, "rewards/rejected": -8.013481140136719, "step": 2847 }, { "epoch": 1.58, "learning_rate": 1.1063113808863611e-07, "logits/chosen": -5.949954032897949, "logits/rejected": -6.071805477142334, "logps/chosen": -356.9288330078125, "logps/rejected": -229.46170043945312, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": 5.401878356933594, "rewards/margins": 12.938848495483398, "rewards/rejected": -7.536970615386963, "step": 2848 }, { "epoch": 1.58, "learning_rate": 1.1034917751824523e-07, "logits/chosen": -5.970493316650391, "logits/rejected": -6.118124961853027, "logps/chosen": -283.5058898925781, "logps/rejected": -293.4771423339844, "loss": 0.0454, "rewards/accuracies": 1.0, "rewards/chosen": 4.337657451629639, "rewards/margins": 14.122971534729004, "rewards/rejected": -9.785313606262207, "step": 2849 }, { "epoch": 1.58, "learning_rate": 1.1006753214199965e-07, "logits/chosen": -6.011882305145264, "logits/rejected": -6.071715354919434, "logps/chosen": -235.78164672851562, "logps/rejected": -210.8234100341797, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": 4.565822601318359, "rewards/margins": 9.527935028076172, "rewards/rejected": -4.962112903594971, "step": 2850 }, { "epoch": 1.58, "learning_rate": 1.0978620218772655e-07, "logits/chosen": -6.009098529815674, "logits/rejected": -6.015998840332031, "logps/chosen": -211.0067901611328, "logps/rejected": -203.1649932861328, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": 3.299102544784546, "rewards/margins": 10.435234069824219, "rewards/rejected": -7.136131763458252, "step": 2851 }, { "epoch": 1.58, "learning_rate": 1.0950518788299773e-07, "logits/chosen": -6.0740461349487305, "logits/rejected": -6.105055809020996, "logps/chosen": -173.15289306640625, "logps/rejected": -151.5443878173828, "loss": 0.0777, "rewards/accuracies": 0.9375, "rewards/chosen": 2.273479461669922, "rewards/margins": 12.016761779785156, "rewards/rejected": -9.743282318115234, "step": 2852 }, { "epoch": 1.58, "learning_rate": 1.092244894551298e-07, "logits/chosen": -6.132009029388428, "logits/rejected": -6.052165508270264, "logps/chosen": -242.86923217773438, "logps/rejected": -189.89808654785156, "loss": 0.012, "rewards/accuracies": 0.9375, "rewards/chosen": 5.948429584503174, "rewards/margins": 12.483292579650879, "rewards/rejected": -6.534862995147705, "step": 2853 }, { "epoch": 1.58, "learning_rate": 1.0894410713118352e-07, "logits/chosen": -6.096768379211426, "logits/rejected": -6.126471519470215, "logps/chosen": -247.40621948242188, "logps/rejected": -279.27362060546875, "loss": 0.0276, "rewards/accuracies": 0.9375, "rewards/chosen": 5.3820271492004395, "rewards/margins": 13.842123985290527, "rewards/rejected": -8.46009635925293, "step": 2854 }, { "epoch": 1.59, "learning_rate": 1.0866404113796435e-07, "logits/chosen": -6.020496368408203, "logits/rejected": -5.9493889808654785, "logps/chosen": -290.1221008300781, "logps/rejected": -340.8052062988281, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": 4.188403606414795, "rewards/margins": 14.21217155456543, "rewards/rejected": -10.02376651763916, "step": 2855 }, { "epoch": 1.59, "learning_rate": 1.0838429170202174e-07, "logits/chosen": -6.034076690673828, "logits/rejected": -5.924398899078369, "logps/chosen": -250.4320526123047, "logps/rejected": -235.43560791015625, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": 5.333853721618652, "rewards/margins": 12.50230598449707, "rewards/rejected": -7.168452739715576, "step": 2856 }, { "epoch": 1.59, "learning_rate": 1.0810485904964883e-07, "logits/chosen": -6.132635116577148, "logits/rejected": -6.0944132804870605, "logps/chosen": -303.5135192871094, "logps/rejected": -273.779296875, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": 3.748772382736206, "rewards/margins": 13.304435729980469, "rewards/rejected": -9.555663108825684, "step": 2857 }, { "epoch": 1.59, "learning_rate": 1.0782574340688294e-07, "logits/chosen": -5.950340270996094, "logits/rejected": -5.971930503845215, "logps/chosen": -257.97955322265625, "logps/rejected": -234.2775421142578, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": 4.362268447875977, "rewards/margins": 12.005830764770508, "rewards/rejected": -7.643560886383057, "step": 2858 }, { "epoch": 1.59, "learning_rate": 1.0754694499950445e-07, "logits/chosen": -6.048643112182617, "logits/rejected": -5.971091270446777, "logps/chosen": -272.991943359375, "logps/rejected": -152.35496520996094, "loss": 0.0131, "rewards/accuracies": 0.9375, "rewards/chosen": 4.108638286590576, "rewards/margins": 11.476299285888672, "rewards/rejected": -7.367661476135254, "step": 2859 }, { "epoch": 1.59, "learning_rate": 1.0726846405303752e-07, "logits/chosen": -5.977331161499023, "logits/rejected": -5.972989082336426, "logps/chosen": -421.21234130859375, "logps/rejected": -381.41986083984375, "loss": 0.0486, "rewards/accuracies": 1.0, "rewards/chosen": 6.183767795562744, "rewards/margins": 14.580241203308105, "rewards/rejected": -8.396472930908203, "step": 2860 }, { "epoch": 1.59, "learning_rate": 1.0699030079274957e-07, "logits/chosen": -5.975149631500244, "logits/rejected": -5.995989799499512, "logps/chosen": -175.6493682861328, "logps/rejected": -147.04367065429688, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": 3.0555708408355713, "rewards/margins": 10.226007461547852, "rewards/rejected": -7.170435905456543, "step": 2861 }, { "epoch": 1.59, "learning_rate": 1.0671245544365055e-07, "logits/chosen": -5.953890800476074, "logits/rejected": -6.04473352432251, "logps/chosen": -224.08706665039062, "logps/rejected": -232.41448974609375, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": 4.155515193939209, "rewards/margins": 11.672567367553711, "rewards/rejected": -7.517051696777344, "step": 2862 }, { "epoch": 1.59, "learning_rate": 1.0643492823049371e-07, "logits/chosen": -6.029045104980469, "logits/rejected": -5.979645729064941, "logps/chosen": -189.28355407714844, "logps/rejected": -271.64898681640625, "loss": 0.0581, "rewards/accuracies": 0.9375, "rewards/chosen": 1.8227463960647583, "rewards/margins": 11.874757766723633, "rewards/rejected": -10.052011489868164, "step": 2863 }, { "epoch": 1.59, "learning_rate": 1.0615771937777474e-07, "logits/chosen": -6.096158504486084, "logits/rejected": -5.967397212982178, "logps/chosen": -200.74667358398438, "logps/rejected": -122.6671142578125, "loss": 0.0257, "rewards/accuracies": 0.9375, "rewards/chosen": 3.614602565765381, "rewards/margins": 10.754767417907715, "rewards/rejected": -7.140165328979492, "step": 2864 }, { "epoch": 1.59, "learning_rate": 1.0588082910973207e-07, "logits/chosen": -6.043371677398682, "logits/rejected": -5.964413166046143, "logps/chosen": -166.02207946777344, "logps/rejected": -212.4761962890625, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": 1.2952104806900024, "rewards/margins": 9.66843318939209, "rewards/rejected": -8.373222351074219, "step": 2865 }, { "epoch": 1.59, "learning_rate": 1.0560425765034602e-07, "logits/chosen": -5.983571529388428, "logits/rejected": -6.085182189941406, "logps/chosen": -247.96585083007812, "logps/rejected": -398.06756591796875, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": 2.766695976257324, "rewards/margins": 14.5193510055542, "rewards/rejected": -11.752655029296875, "step": 2866 }, { "epoch": 1.59, "learning_rate": 1.0532800522333901e-07, "logits/chosen": -6.090530872344971, "logits/rejected": -5.968194961547852, "logps/chosen": -267.8844299316406, "logps/rejected": -232.0277099609375, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": 1.4666869640350342, "rewards/margins": 10.360312461853027, "rewards/rejected": -8.893624305725098, "step": 2867 }, { "epoch": 1.59, "learning_rate": 1.0505207205217603e-07, "logits/chosen": -6.0134196281433105, "logits/rejected": -5.965149879455566, "logps/chosen": -232.42706298828125, "logps/rejected": -243.42434692382812, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": 2.6859946250915527, "rewards/margins": 11.640007019042969, "rewards/rejected": -8.954011917114258, "step": 2868 }, { "epoch": 1.59, "learning_rate": 1.047764583600631e-07, "logits/chosen": -6.046103477478027, "logits/rejected": -6.085557460784912, "logps/chosen": -253.44952392578125, "logps/rejected": -399.78118896484375, "loss": 0.0517, "rewards/accuracies": 1.0, "rewards/chosen": 4.411923885345459, "rewards/margins": 15.483689308166504, "rewards/rejected": -11.07176399230957, "step": 2869 }, { "epoch": 1.59, "learning_rate": 1.0450116436994833e-07, "logits/chosen": -5.935895919799805, "logits/rejected": -5.948087692260742, "logps/chosen": -210.91363525390625, "logps/rejected": -257.1748962402344, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 2.795046806335449, "rewards/margins": 11.204952239990234, "rewards/rejected": -8.409904479980469, "step": 2870 }, { "epoch": 1.59, "learning_rate": 1.0422619030452062e-07, "logits/chosen": -5.9848761558532715, "logits/rejected": -6.090298652648926, "logps/chosen": -296.8497314453125, "logps/rejected": -278.2664489746094, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": 7.631110668182373, "rewards/margins": 14.000097274780273, "rewards/rejected": -6.368987083435059, "step": 2871 }, { "epoch": 1.59, "learning_rate": 1.0395153638621068e-07, "logits/chosen": -6.109851837158203, "logits/rejected": -6.090228080749512, "logps/chosen": -298.6154479980469, "logps/rejected": -184.47125244140625, "loss": 0.0413, "rewards/accuracies": 1.0, "rewards/chosen": 7.276213645935059, "rewards/margins": 12.387221336364746, "rewards/rejected": -5.1110076904296875, "step": 2872 }, { "epoch": 1.6, "learning_rate": 1.0367720283719012e-07, "logits/chosen": -6.002288818359375, "logits/rejected": -5.991993427276611, "logps/chosen": -213.85260009765625, "logps/rejected": -204.4442901611328, "loss": 0.0543, "rewards/accuracies": 0.9375, "rewards/chosen": 0.6645836234092712, "rewards/margins": 8.557046890258789, "rewards/rejected": -7.892462730407715, "step": 2873 }, { "epoch": 1.6, "learning_rate": 1.0340318987937097e-07, "logits/chosen": -5.957359313964844, "logits/rejected": -5.998506546020508, "logps/chosen": -230.24290466308594, "logps/rejected": -147.59420776367188, "loss": 0.048, "rewards/accuracies": 0.9375, "rewards/chosen": 6.505707263946533, "rewards/margins": 11.532544136047363, "rewards/rejected": -5.0268378257751465, "step": 2874 }, { "epoch": 1.6, "learning_rate": 1.0312949773440643e-07, "logits/chosen": -5.980722427368164, "logits/rejected": -6.013923168182373, "logps/chosen": -204.71913146972656, "logps/rejected": -266.907470703125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 2.4281129837036133, "rewards/margins": 14.269471168518066, "rewards/rejected": -11.841358184814453, "step": 2875 }, { "epoch": 1.6, "learning_rate": 1.0285612662369004e-07, "logits/chosen": -5.951308727264404, "logits/rejected": -6.002694606781006, "logps/chosen": -269.41387939453125, "logps/rejected": -257.1358337402344, "loss": 0.068, "rewards/accuracies": 1.0, "rewards/chosen": 4.650489330291748, "rewards/margins": 10.53127384185791, "rewards/rejected": -5.88078498840332, "step": 2876 }, { "epoch": 1.6, "learning_rate": 1.0258307676835565e-07, "logits/chosen": -6.194177627563477, "logits/rejected": -6.1285881996154785, "logps/chosen": -225.45343017578125, "logps/rejected": -208.91799926757812, "loss": 0.0793, "rewards/accuracies": 1.0, "rewards/chosen": 2.601200580596924, "rewards/margins": 10.434972763061523, "rewards/rejected": -7.8337721824646, "step": 2877 }, { "epoch": 1.6, "learning_rate": 1.0231034838927705e-07, "logits/chosen": -6.237893581390381, "logits/rejected": -5.970682144165039, "logps/chosen": -364.759033203125, "logps/rejected": -208.63954162597656, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": 6.6496381759643555, "rewards/margins": 13.329297065734863, "rewards/rejected": -6.679657936096191, "step": 2878 }, { "epoch": 1.6, "learning_rate": 1.0203794170706831e-07, "logits/chosen": -5.92809534072876, "logits/rejected": -5.914521217346191, "logps/chosen": -519.8268432617188, "logps/rejected": -290.9979553222656, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": 7.262414932250977, "rewards/margins": 16.064525604248047, "rewards/rejected": -8.802111625671387, "step": 2879 }, { "epoch": 1.6, "learning_rate": 1.017658569420829e-07, "logits/chosen": -6.079270362854004, "logits/rejected": -5.9274067878723145, "logps/chosen": -266.0438232421875, "logps/rejected": -231.3452911376953, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": 4.118644714355469, "rewards/margins": 12.079299926757812, "rewards/rejected": -7.960655689239502, "step": 2880 }, { "epoch": 1.6, "learning_rate": 1.0149409431441419e-07, "logits/chosen": -5.999963760375977, "logits/rejected": -6.035822868347168, "logps/chosen": -175.74945068359375, "logps/rejected": -260.6950988769531, "loss": 0.0983, "rewards/accuracies": 1.0, "rewards/chosen": 2.4020321369171143, "rewards/margins": 12.865407943725586, "rewards/rejected": -10.463376998901367, "step": 2881 }, { "epoch": 1.6, "learning_rate": 1.0122265404389502e-07, "logits/chosen": -6.038468360900879, "logits/rejected": -6.047767162322998, "logps/chosen": -250.70654296875, "logps/rejected": -216.63681030273438, "loss": 0.0639, "rewards/accuracies": 1.0, "rewards/chosen": 4.624185562133789, "rewards/margins": 13.935192108154297, "rewards/rejected": -9.311005592346191, "step": 2882 }, { "epoch": 1.6, "learning_rate": 1.0095153635009706e-07, "logits/chosen": -6.063244342803955, "logits/rejected": -5.980764865875244, "logps/chosen": -466.62353515625, "logps/rejected": -409.21148681640625, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": 9.60054874420166, "rewards/margins": 20.873859405517578, "rewards/rejected": -11.273311614990234, "step": 2883 }, { "epoch": 1.6, "learning_rate": 1.0068074145233146e-07, "logits/chosen": -6.0852131843566895, "logits/rejected": -6.103041648864746, "logps/chosen": -715.1011352539062, "logps/rejected": -604.395263671875, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 9.890740394592285, "rewards/margins": 18.375656127929688, "rewards/rejected": -8.484915733337402, "step": 2884 }, { "epoch": 1.6, "learning_rate": 1.004102695696481e-07, "logits/chosen": -5.972754001617432, "logits/rejected": -5.967830657958984, "logps/chosen": -212.57382202148438, "logps/rejected": -195.31625366210938, "loss": 0.0438, "rewards/accuracies": 1.0, "rewards/chosen": 2.3685548305511475, "rewards/margins": 11.324254989624023, "rewards/rejected": -8.955699920654297, "step": 2885 }, { "epoch": 1.6, "learning_rate": 1.0014012092083568e-07, "logits/chosen": -5.940658092498779, "logits/rejected": -5.965943813323975, "logps/chosen": -237.32342529296875, "logps/rejected": -188.13795471191406, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 3.3479409217834473, "rewards/margins": 10.489386558532715, "rewards/rejected": -7.141445159912109, "step": 2886 }, { "epoch": 1.6, "learning_rate": 9.987029572442119e-08, "logits/chosen": -5.953441619873047, "logits/rejected": -5.96394157409668, "logps/chosen": -200.29278564453125, "logps/rejected": -201.5696258544922, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": 3.734294891357422, "rewards/margins": 11.542786598205566, "rewards/rejected": -7.8084917068481445, "step": 2887 }, { "epoch": 1.6, "learning_rate": 9.960079419866984e-08, "logits/chosen": -6.131807327270508, "logits/rejected": -5.953025817871094, "logps/chosen": -199.70477294921875, "logps/rejected": -103.61785888671875, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": 2.5812158584594727, "rewards/margins": 10.51686954498291, "rewards/rejected": -7.9356536865234375, "step": 2888 }, { "epoch": 1.6, "learning_rate": 9.933161656158578e-08, "logits/chosen": -6.03707218170166, "logits/rejected": -6.084810733795166, "logps/chosen": -202.0910186767578, "logps/rejected": -236.81138610839844, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": 3.628312110900879, "rewards/margins": 9.914254188537598, "rewards/rejected": -6.285942077636719, "step": 2889 }, { "epoch": 1.6, "learning_rate": 9.906276303091038e-08, "logits/chosen": -6.078021049499512, "logits/rejected": -6.092504024505615, "logps/chosen": -305.5800476074219, "logps/rejected": -176.79107666015625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 6.631860733032227, "rewards/margins": 11.486784934997559, "rewards/rejected": -4.854923725128174, "step": 2890 }, { "epoch": 1.6, "learning_rate": 9.879423382412328e-08, "logits/chosen": -6.019787788391113, "logits/rejected": -5.9941325187683105, "logps/chosen": -286.1612548828125, "logps/rejected": -218.59877014160156, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": 5.296291828155518, "rewards/margins": 11.535430908203125, "rewards/rejected": -6.239139556884766, "step": 2891 }, { "epoch": 1.61, "learning_rate": 9.852602915844133e-08, "logits/chosen": -5.996232509613037, "logits/rejected": -5.920939922332764, "logps/chosen": -200.7182159423828, "logps/rejected": -353.56365966796875, "loss": 0.0388, "rewards/accuracies": 0.9375, "rewards/chosen": 2.829385280609131, "rewards/margins": 9.613852500915527, "rewards/rejected": -6.784467697143555, "step": 2892 }, { "epoch": 1.61, "learning_rate": 9.825814925081938e-08, "logits/chosen": -5.899144649505615, "logits/rejected": -5.889498233795166, "logps/chosen": -155.55255126953125, "logps/rejected": -154.0028076171875, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": 2.5211150646209717, "rewards/margins": 10.13492488861084, "rewards/rejected": -7.613809108734131, "step": 2893 }, { "epoch": 1.61, "learning_rate": 9.799059431794937e-08, "logits/chosen": -6.0393218994140625, "logits/rejected": -5.952813148498535, "logps/chosen": -295.95257568359375, "logps/rejected": -214.12631225585938, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": 4.999926567077637, "rewards/margins": 10.97579574584961, "rewards/rejected": -5.975868225097656, "step": 2894 }, { "epoch": 1.61, "learning_rate": 9.772336457626013e-08, "logits/chosen": -6.016574859619141, "logits/rejected": -6.033007621765137, "logps/chosen": -191.64358520507812, "logps/rejected": -249.1317138671875, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": 3.2732596397399902, "rewards/margins": 14.310201644897461, "rewards/rejected": -11.036942481994629, "step": 2895 }, { "epoch": 1.61, "learning_rate": 9.745646024191783e-08, "logits/chosen": -6.090215682983398, "logits/rejected": -6.023275375366211, "logps/chosen": -380.8063049316406, "logps/rejected": -159.98269653320312, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": 6.0956220626831055, "rewards/margins": 12.841835975646973, "rewards/rejected": -6.746212959289551, "step": 2896 }, { "epoch": 1.61, "learning_rate": 9.718988153082486e-08, "logits/chosen": -5.995373249053955, "logits/rejected": -5.946491718292236, "logps/chosen": -241.64071655273438, "logps/rejected": -271.628662109375, "loss": 0.016, "rewards/accuracies": 0.9375, "rewards/chosen": 0.8597172498703003, "rewards/margins": 10.020309448242188, "rewards/rejected": -9.160592079162598, "step": 2897 }, { "epoch": 1.61, "learning_rate": 9.692362865862114e-08, "logits/chosen": -6.0267720222473145, "logits/rejected": -5.928347110748291, "logps/chosen": -227.3172607421875, "logps/rejected": -173.4749755859375, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": 1.9763822555541992, "rewards/margins": 10.2398681640625, "rewards/rejected": -8.263484954833984, "step": 2898 }, { "epoch": 1.61, "learning_rate": 9.665770184068195e-08, "logits/chosen": -5.860696315765381, "logits/rejected": -5.925963878631592, "logps/chosen": -513.478271484375, "logps/rejected": -382.1166687011719, "loss": 0.0375, "rewards/accuracies": 1.0, "rewards/chosen": 6.8619704246521, "rewards/margins": 16.503009796142578, "rewards/rejected": -9.64103889465332, "step": 2899 }, { "epoch": 1.61, "learning_rate": 9.639210129211967e-08, "logits/chosen": -6.016479015350342, "logits/rejected": -6.133016109466553, "logps/chosen": -256.322265625, "logps/rejected": -354.67828369140625, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": 3.3750877380371094, "rewards/margins": 14.575530052185059, "rewards/rejected": -11.200441360473633, "step": 2900 }, { "epoch": 1.61, "learning_rate": 9.612682722778204e-08, "logits/chosen": -5.990909576416016, "logits/rejected": -5.980179786682129, "logps/chosen": -231.3060302734375, "logps/rejected": -187.623291015625, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 5.3068766593933105, "rewards/margins": 11.896161079406738, "rewards/rejected": -6.589284420013428, "step": 2901 }, { "epoch": 1.61, "learning_rate": 9.586187986225324e-08, "logits/chosen": -6.004758834838867, "logits/rejected": -6.073721408843994, "logps/chosen": -258.0481872558594, "logps/rejected": -217.9583740234375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 4.05662202835083, "rewards/margins": 10.211776733398438, "rewards/rejected": -6.155154228210449, "step": 2902 }, { "epoch": 1.61, "learning_rate": 9.559725940985314e-08, "logits/chosen": -5.908451080322266, "logits/rejected": -5.929376125335693, "logps/chosen": -432.432373046875, "logps/rejected": -416.39794921875, "loss": 0.0452, "rewards/accuracies": 1.0, "rewards/chosen": 5.323230743408203, "rewards/margins": 12.17775821685791, "rewards/rejected": -6.854527950286865, "step": 2903 }, { "epoch": 1.61, "learning_rate": 9.533296608463676e-08, "logits/chosen": -6.107001781463623, "logits/rejected": -6.143955230712891, "logps/chosen": -278.0469665527344, "logps/rejected": -228.4391632080078, "loss": 0.0315, "rewards/accuracies": 1.0, "rewards/chosen": 6.165131568908691, "rewards/margins": 12.86436653137207, "rewards/rejected": -6.699235439300537, "step": 2904 }, { "epoch": 1.61, "learning_rate": 9.506900010039493e-08, "logits/chosen": -5.969015121459961, "logits/rejected": -6.008816242218018, "logps/chosen": -230.358154296875, "logps/rejected": -188.34706115722656, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": 1.260632038116455, "rewards/margins": 8.177372932434082, "rewards/rejected": -6.916741371154785, "step": 2905 }, { "epoch": 1.61, "learning_rate": 9.480536167065339e-08, "logits/chosen": -6.034803867340088, "logits/rejected": -6.030231475830078, "logps/chosen": -193.70433044433594, "logps/rejected": -174.205810546875, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": 3.1631360054016113, "rewards/margins": 10.277585983276367, "rewards/rejected": -7.1144490242004395, "step": 2906 }, { "epoch": 1.61, "learning_rate": 9.454205100867335e-08, "logits/chosen": -5.896932601928711, "logits/rejected": -5.9359540939331055, "logps/chosen": -258.6864929199219, "logps/rejected": -300.489990234375, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": 4.304810523986816, "rewards/margins": 13.304105758666992, "rewards/rejected": -8.999295234680176, "step": 2907 }, { "epoch": 1.61, "learning_rate": 9.427906832745037e-08, "logits/chosen": -6.06261682510376, "logits/rejected": -5.8548736572265625, "logps/chosen": -289.84814453125, "logps/rejected": -134.42355346679688, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": 5.781238555908203, "rewards/margins": 12.956933975219727, "rewards/rejected": -7.17569637298584, "step": 2908 }, { "epoch": 1.61, "learning_rate": 9.401641383971476e-08, "logits/chosen": -5.882730484008789, "logits/rejected": -5.9062113761901855, "logps/chosen": -364.71160888671875, "logps/rejected": -379.3120422363281, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": 3.837625026702881, "rewards/margins": 12.028554916381836, "rewards/rejected": -8.190929412841797, "step": 2909 }, { "epoch": 1.62, "learning_rate": 9.37540877579317e-08, "logits/chosen": -6.064323425292969, "logits/rejected": -6.069031715393066, "logps/chosen": -286.9578552246094, "logps/rejected": -271.60693359375, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": 6.821237087249756, "rewards/margins": 14.427139282226562, "rewards/rejected": -7.605902671813965, "step": 2910 }, { "epoch": 1.62, "learning_rate": 9.34920902943005e-08, "logits/chosen": -6.0113444328308105, "logits/rejected": -5.922946453094482, "logps/chosen": -392.79864501953125, "logps/rejected": -305.0773620605469, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": 4.754177093505859, "rewards/margins": 10.685832977294922, "rewards/rejected": -5.931655406951904, "step": 2911 }, { "epoch": 1.62, "learning_rate": 9.323042166075479e-08, "logits/chosen": -5.8583173751831055, "logits/rejected": -5.9899396896362305, "logps/chosen": -390.85565185546875, "logps/rejected": -413.28228759765625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": 5.767989635467529, "rewards/margins": 16.67424964904785, "rewards/rejected": -10.906261444091797, "step": 2912 }, { "epoch": 1.62, "learning_rate": 9.296908206896182e-08, "logits/chosen": -5.993100166320801, "logits/rejected": -6.100948810577393, "logps/chosen": -229.3717498779297, "logps/rejected": -286.35504150390625, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": 2.214233875274658, "rewards/margins": 13.413921356201172, "rewards/rejected": -11.199687957763672, "step": 2913 }, { "epoch": 1.62, "learning_rate": 9.270807173032313e-08, "logits/chosen": -6.079043388366699, "logits/rejected": -6.0388078689575195, "logps/chosen": -277.1158752441406, "logps/rejected": -174.07241821289062, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": 3.833918571472168, "rewards/margins": 10.790079116821289, "rewards/rejected": -6.956160545349121, "step": 2914 }, { "epoch": 1.62, "learning_rate": 9.244739085597376e-08, "logits/chosen": -6.006513595581055, "logits/rejected": -5.953994274139404, "logps/chosen": -218.85736083984375, "logps/rejected": -127.08988952636719, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": 5.319230079650879, "rewards/margins": 12.188194274902344, "rewards/rejected": -6.868963718414307, "step": 2915 }, { "epoch": 1.62, "learning_rate": 9.218703965678204e-08, "logits/chosen": -5.972326755523682, "logits/rejected": -5.935267448425293, "logps/chosen": -349.0746154785156, "logps/rejected": -198.0733642578125, "loss": 0.0507, "rewards/accuracies": 1.0, "rewards/chosen": 4.951913833618164, "rewards/margins": 9.887985229492188, "rewards/rejected": -4.936071872711182, "step": 2916 }, { "epoch": 1.62, "learning_rate": 9.192701834334998e-08, "logits/chosen": -6.0389933586120605, "logits/rejected": -6.041510581970215, "logps/chosen": -265.16986083984375, "logps/rejected": -221.26914978027344, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": 5.1850481033325195, "rewards/margins": 11.591822624206543, "rewards/rejected": -6.406774520874023, "step": 2917 }, { "epoch": 1.62, "learning_rate": 9.166732712601228e-08, "logits/chosen": -6.111318588256836, "logits/rejected": -6.039165496826172, "logps/chosen": -247.43466186523438, "logps/rejected": -130.62301635742188, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": 3.224043130874634, "rewards/margins": 10.134026527404785, "rewards/rejected": -6.9099836349487305, "step": 2918 }, { "epoch": 1.62, "learning_rate": 9.140796621483726e-08, "logits/chosen": -6.029085159301758, "logits/rejected": -6.066253185272217, "logps/chosen": -210.36183166503906, "logps/rejected": -293.3975830078125, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": 1.5581636428833008, "rewards/margins": 14.217700958251953, "rewards/rejected": -12.659536361694336, "step": 2919 }, { "epoch": 1.62, "learning_rate": 9.114893581962552e-08, "logits/chosen": -6.021968841552734, "logits/rejected": -6.048561096191406, "logps/chosen": -262.9723815917969, "logps/rejected": -213.8948974609375, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": 4.528273582458496, "rewards/margins": 13.49497127532959, "rewards/rejected": -8.966696739196777, "step": 2920 }, { "epoch": 1.62, "learning_rate": 9.089023614991031e-08, "logits/chosen": -5.912638187408447, "logits/rejected": -5.881893157958984, "logps/chosen": -351.4481201171875, "logps/rejected": -274.0789489746094, "loss": 0.0351, "rewards/accuracies": 1.0, "rewards/chosen": 4.3594160079956055, "rewards/margins": 13.92251205444336, "rewards/rejected": -9.563096046447754, "step": 2921 }, { "epoch": 1.62, "learning_rate": 9.06318674149577e-08, "logits/chosen": -6.094287872314453, "logits/rejected": -6.050556182861328, "logps/chosen": -452.2512512207031, "logps/rejected": -366.6538391113281, "loss": 0.0506, "rewards/accuracies": 1.0, "rewards/chosen": 5.2372894287109375, "rewards/margins": 13.324494361877441, "rewards/rejected": -8.087204933166504, "step": 2922 }, { "epoch": 1.62, "learning_rate": 9.037382982376579e-08, "logits/chosen": -5.966549396514893, "logits/rejected": -6.073575019836426, "logps/chosen": -223.7469940185547, "logps/rejected": -293.10650634765625, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": 3.0035452842712402, "rewards/margins": 12.909674644470215, "rewards/rejected": -9.906129837036133, "step": 2923 }, { "epoch": 1.62, "learning_rate": 9.011612358506504e-08, "logits/chosen": -6.010828495025635, "logits/rejected": -5.963281631469727, "logps/chosen": -378.6438903808594, "logps/rejected": -356.18994140625, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": 4.204070568084717, "rewards/margins": 14.435042381286621, "rewards/rejected": -10.230972290039062, "step": 2924 }, { "epoch": 1.62, "learning_rate": 8.985874890731748e-08, "logits/chosen": -6.081313133239746, "logits/rejected": -5.95942497253418, "logps/chosen": -285.7758483886719, "logps/rejected": -207.3057403564453, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": 8.039533615112305, "rewards/margins": 13.30737018585205, "rewards/rejected": -5.267836570739746, "step": 2925 }, { "epoch": 1.62, "learning_rate": 8.96017059987173e-08, "logits/chosen": -5.902316093444824, "logits/rejected": -5.842967987060547, "logps/chosen": -414.54522705078125, "logps/rejected": -332.4378967285156, "loss": 0.0389, "rewards/accuracies": 0.9375, "rewards/chosen": 4.845070838928223, "rewards/margins": 11.754095077514648, "rewards/rejected": -6.909024238586426, "step": 2926 }, { "epoch": 1.62, "learning_rate": 8.934499506719034e-08, "logits/chosen": -6.028094291687012, "logits/rejected": -6.022563934326172, "logps/chosen": -272.8620910644531, "logps/rejected": -237.73828125, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": 4.934987545013428, "rewards/margins": 12.709230422973633, "rewards/rejected": -7.774243354797363, "step": 2927 }, { "epoch": 1.63, "learning_rate": 8.908861632039349e-08, "logits/chosen": -6.040456295013428, "logits/rejected": -5.969866752624512, "logps/chosen": -316.80889892578125, "logps/rejected": -271.41473388671875, "loss": 0.0685, "rewards/accuracies": 1.0, "rewards/chosen": 5.098078727722168, "rewards/margins": 13.412172317504883, "rewards/rejected": -8.314093589782715, "step": 2928 }, { "epoch": 1.63, "learning_rate": 8.883256996571548e-08, "logits/chosen": -5.994283676147461, "logits/rejected": -6.069708824157715, "logps/chosen": -279.0348815917969, "logps/rejected": -243.97061157226562, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": 6.522339820861816, "rewards/margins": 14.740205764770508, "rewards/rejected": -8.217865943908691, "step": 2929 }, { "epoch": 1.63, "learning_rate": 8.857685621027566e-08, "logits/chosen": -6.029524803161621, "logits/rejected": -6.091716766357422, "logps/chosen": -319.9864196777344, "logps/rejected": -323.9927978515625, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": 4.650683403015137, "rewards/margins": 12.968233108520508, "rewards/rejected": -8.317549705505371, "step": 2930 }, { "epoch": 1.63, "learning_rate": 8.83214752609246e-08, "logits/chosen": -6.247509002685547, "logits/rejected": -6.072140216827393, "logps/chosen": -248.59286499023438, "logps/rejected": -203.95143127441406, "loss": 0.0396, "rewards/accuracies": 0.875, "rewards/chosen": 1.1314316987991333, "rewards/margins": 9.813167572021484, "rewards/rejected": -8.681735038757324, "step": 2931 }, { "epoch": 1.63, "learning_rate": 8.806642732424363e-08, "logits/chosen": -5.969196319580078, "logits/rejected": -6.148697853088379, "logps/chosen": -206.88430786132812, "logps/rejected": -222.32669067382812, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/chosen": 4.704859256744385, "rewards/margins": 12.137848854064941, "rewards/rejected": -7.432989597320557, "step": 2932 }, { "epoch": 1.63, "learning_rate": 8.781171260654485e-08, "logits/chosen": -6.021313667297363, "logits/rejected": -6.048718452453613, "logps/chosen": -285.6016845703125, "logps/rejected": -221.32321166992188, "loss": 0.0258, "rewards/accuracies": 1.0, "rewards/chosen": 2.102210283279419, "rewards/margins": 11.745828628540039, "rewards/rejected": -9.643617630004883, "step": 2933 }, { "epoch": 1.63, "learning_rate": 8.755733131387039e-08, "logits/chosen": -6.1305646896362305, "logits/rejected": -5.9084601402282715, "logps/chosen": -364.2196044921875, "logps/rejected": -139.00082397460938, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": 6.998398780822754, "rewards/margins": 12.291993141174316, "rewards/rejected": -5.293593406677246, "step": 2934 }, { "epoch": 1.63, "learning_rate": 8.730328365199308e-08, "logits/chosen": -5.993167400360107, "logits/rejected": -6.06973123550415, "logps/chosen": -211.24295043945312, "logps/rejected": -288.8747253417969, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": 2.31724214553833, "rewards/margins": 11.654389381408691, "rewards/rejected": -9.337148666381836, "step": 2935 }, { "epoch": 1.63, "learning_rate": 8.704956982641581e-08, "logits/chosen": -5.994955062866211, "logits/rejected": -5.995466709136963, "logps/chosen": -279.27154541015625, "logps/rejected": -171.94088745117188, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": 4.431837558746338, "rewards/margins": 11.658744812011719, "rewards/rejected": -7.226906776428223, "step": 2936 }, { "epoch": 1.63, "learning_rate": 8.67961900423711e-08, "logits/chosen": -6.084140777587891, "logits/rejected": -5.969263076782227, "logps/chosen": -244.24798583984375, "logps/rejected": -196.49044799804688, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": 5.311582565307617, "rewards/margins": 13.072846412658691, "rewards/rejected": -7.761263847351074, "step": 2937 }, { "epoch": 1.63, "learning_rate": 8.654314450482175e-08, "logits/chosen": -5.997596263885498, "logits/rejected": -5.990222454071045, "logps/chosen": -221.97715759277344, "logps/rejected": -161.49940490722656, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": 4.716736793518066, "rewards/margins": 12.210960388183594, "rewards/rejected": -7.4942240715026855, "step": 2938 }, { "epoch": 1.63, "learning_rate": 8.629043341845954e-08, "logits/chosen": -5.97299337387085, "logits/rejected": -6.121030807495117, "logps/chosen": -219.4750213623047, "logps/rejected": -293.9043884277344, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": 3.161426305770874, "rewards/margins": 13.731513023376465, "rewards/rejected": -10.570087432861328, "step": 2939 }, { "epoch": 1.63, "learning_rate": 8.603805698770667e-08, "logits/chosen": -5.96317195892334, "logits/rejected": -6.0058722496032715, "logps/chosen": -222.46585083007812, "logps/rejected": -237.89511108398438, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 5.414397239685059, "rewards/margins": 12.851239204406738, "rewards/rejected": -7.436842918395996, "step": 2940 }, { "epoch": 1.63, "learning_rate": 8.578601541671365e-08, "logits/chosen": -6.040308475494385, "logits/rejected": -6.027368545532227, "logps/chosen": -362.2896728515625, "logps/rejected": -152.655517578125, "loss": 0.0265, "rewards/accuracies": 0.9375, "rewards/chosen": 2.439361095428467, "rewards/margins": 9.50437068939209, "rewards/rejected": -7.065009117126465, "step": 2941 }, { "epoch": 1.63, "learning_rate": 8.553430890936053e-08, "logits/chosen": -6.017702102661133, "logits/rejected": -5.9754462242126465, "logps/chosen": -226.12510681152344, "logps/rejected": -272.8492126464844, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": 4.372379302978516, "rewards/margins": 12.342960357666016, "rewards/rejected": -7.970582485198975, "step": 2942 }, { "epoch": 1.63, "learning_rate": 8.528293766925642e-08, "logits/chosen": -5.992337226867676, "logits/rejected": -6.0190653800964355, "logps/chosen": -281.664794921875, "logps/rejected": -197.84890747070312, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 7.590235710144043, "rewards/margins": 12.708642959594727, "rewards/rejected": -5.118407726287842, "step": 2943 }, { "epoch": 1.63, "learning_rate": 8.503190189973914e-08, "logits/chosen": -5.9332804679870605, "logits/rejected": -5.960633754730225, "logps/chosen": -188.72784423828125, "logps/rejected": -189.91281127929688, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 2.443134307861328, "rewards/margins": 11.613826751708984, "rewards/rejected": -9.170692443847656, "step": 2944 }, { "epoch": 1.63, "learning_rate": 8.478120180387521e-08, "logits/chosen": -6.027333736419678, "logits/rejected": -6.063182830810547, "logps/chosen": -277.7394104003906, "logps/rejected": -206.38912963867188, "loss": 0.0344, "rewards/accuracies": 0.9375, "rewards/chosen": 6.428039073944092, "rewards/margins": 14.373571395874023, "rewards/rejected": -7.945531845092773, "step": 2945 }, { "epoch": 1.64, "learning_rate": 8.453083758445928e-08, "logits/chosen": -5.998078346252441, "logits/rejected": -6.005947113037109, "logps/chosen": -152.04995727539062, "logps/rejected": -130.48764038085938, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": 2.993314743041992, "rewards/margins": 10.270216941833496, "rewards/rejected": -7.276901721954346, "step": 2946 }, { "epoch": 1.64, "learning_rate": 8.428080944401478e-08, "logits/chosen": -6.106603622436523, "logits/rejected": -6.054840564727783, "logps/chosen": -351.24810791015625, "logps/rejected": -236.49530029296875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 7.602092742919922, "rewards/margins": 15.069611549377441, "rewards/rejected": -7.4675188064575195, "step": 2947 }, { "epoch": 1.64, "learning_rate": 8.403111758479304e-08, "logits/chosen": -5.921381950378418, "logits/rejected": -6.0504655838012695, "logps/chosen": -325.26422119140625, "logps/rejected": -193.59539794921875, "loss": 0.0524, "rewards/accuracies": 1.0, "rewards/chosen": 5.799956798553467, "rewards/margins": 10.76292610168457, "rewards/rejected": -4.962969779968262, "step": 2948 }, { "epoch": 1.64, "learning_rate": 8.378176220877325e-08, "logits/chosen": -6.173277854919434, "logits/rejected": -5.972439765930176, "logps/chosen": -324.8735046386719, "logps/rejected": -146.92141723632812, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": 4.554228782653809, "rewards/margins": 12.47277545928955, "rewards/rejected": -7.918546676635742, "step": 2949 }, { "epoch": 1.64, "learning_rate": 8.353274351766281e-08, "logits/chosen": -5.886512279510498, "logits/rejected": -5.982074737548828, "logps/chosen": -242.90380859375, "logps/rejected": -354.74176025390625, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": 4.118388652801514, "rewards/margins": 13.727448463439941, "rewards/rejected": -9.60905933380127, "step": 2950 }, { "epoch": 1.64, "learning_rate": 8.32840617128962e-08, "logits/chosen": -5.940676689147949, "logits/rejected": -6.047344207763672, "logps/chosen": -276.30023193359375, "logps/rejected": -242.0906219482422, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/chosen": 3.488025188446045, "rewards/margins": 11.571619033813477, "rewards/rejected": -8.083593368530273, "step": 2951 }, { "epoch": 1.64, "learning_rate": 8.303571699563588e-08, "logits/chosen": -5.962515830993652, "logits/rejected": -6.043581008911133, "logps/chosen": -238.4265594482422, "logps/rejected": -234.749267578125, "loss": 0.0421, "rewards/accuracies": 1.0, "rewards/chosen": 3.4977195262908936, "rewards/margins": 10.936683654785156, "rewards/rejected": -7.438963413238525, "step": 2952 }, { "epoch": 1.64, "learning_rate": 8.278770956677139e-08, "logits/chosen": -6.123962879180908, "logits/rejected": -5.994239807128906, "logps/chosen": -232.96127319335938, "logps/rejected": -174.68898010253906, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": 5.154850959777832, "rewards/margins": 11.529001235961914, "rewards/rejected": -6.374150276184082, "step": 2953 }, { "epoch": 1.64, "learning_rate": 8.254003962691974e-08, "logits/chosen": -5.922451019287109, "logits/rejected": -6.063834190368652, "logps/chosen": -289.96990966796875, "logps/rejected": -294.06658935546875, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": 4.573908805847168, "rewards/margins": 13.24582290649414, "rewards/rejected": -8.671914100646973, "step": 2954 }, { "epoch": 1.64, "learning_rate": 8.229270737642436e-08, "logits/chosen": -5.976837158203125, "logits/rejected": -6.003082275390625, "logps/chosen": -336.2562255859375, "logps/rejected": -249.36048889160156, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": 6.316896915435791, "rewards/margins": 12.845727920532227, "rewards/rejected": -6.528830528259277, "step": 2955 }, { "epoch": 1.64, "learning_rate": 8.204571301535595e-08, "logits/chosen": -6.076760768890381, "logits/rejected": -6.113009452819824, "logps/chosen": -268.0446472167969, "logps/rejected": -148.84385681152344, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": 4.511950969696045, "rewards/margins": 12.937302589416504, "rewards/rejected": -8.425352096557617, "step": 2956 }, { "epoch": 1.64, "learning_rate": 8.179905674351207e-08, "logits/chosen": -5.871298789978027, "logits/rejected": -5.904255390167236, "logps/chosen": -246.97659301757812, "logps/rejected": -135.1129913330078, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": 4.62296724319458, "rewards/margins": 9.581207275390625, "rewards/rejected": -4.958239555358887, "step": 2957 }, { "epoch": 1.64, "learning_rate": 8.155273876041613e-08, "logits/chosen": -6.02165412902832, "logits/rejected": -5.916053295135498, "logps/chosen": -382.861328125, "logps/rejected": -118.74171447753906, "loss": 0.1203, "rewards/accuracies": 1.0, "rewards/chosen": 3.632354736328125, "rewards/margins": 10.438494682312012, "rewards/rejected": -6.806139945983887, "step": 2958 }, { "epoch": 1.64, "learning_rate": 8.130675926531855e-08, "logits/chosen": -6.132005214691162, "logits/rejected": -6.0424652099609375, "logps/chosen": -278.0456848144531, "logps/rejected": -122.22396087646484, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": 3.1380765438079834, "rewards/margins": 10.225927352905273, "rewards/rejected": -7.087851047515869, "step": 2959 }, { "epoch": 1.64, "learning_rate": 8.106111845719538e-08, "logits/chosen": -6.1140007972717285, "logits/rejected": -5.897777557373047, "logps/chosen": -292.2056579589844, "logps/rejected": -225.5740509033203, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": 4.508923530578613, "rewards/margins": 11.709056854248047, "rewards/rejected": -7.200133800506592, "step": 2960 }, { "epoch": 1.64, "learning_rate": 8.081581653474944e-08, "logits/chosen": -5.984936714172363, "logits/rejected": -5.922850608825684, "logps/chosen": -222.0291748046875, "logps/rejected": -173.46939086914062, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/chosen": 3.546841621398926, "rewards/margins": 10.369361877441406, "rewards/rejected": -6.822519302368164, "step": 2961 }, { "epoch": 1.64, "learning_rate": 8.057085369640875e-08, "logits/chosen": -6.0225419998168945, "logits/rejected": -5.956106185913086, "logps/chosen": -277.28387451171875, "logps/rejected": -194.37359619140625, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": 2.5976967811584473, "rewards/margins": 11.977192878723145, "rewards/rejected": -9.379495620727539, "step": 2962 }, { "epoch": 1.64, "learning_rate": 8.032623014032708e-08, "logits/chosen": -6.020261764526367, "logits/rejected": -6.021141052246094, "logps/chosen": -208.98077392578125, "logps/rejected": -187.91781616210938, "loss": 0.0543, "rewards/accuracies": 1.0, "rewards/chosen": 2.7376561164855957, "rewards/margins": 11.413257598876953, "rewards/rejected": -8.6756010055542, "step": 2963 }, { "epoch": 1.65, "learning_rate": 8.008194606438417e-08, "logits/chosen": -6.03889799118042, "logits/rejected": -6.146528720855713, "logps/chosen": -267.0921630859375, "logps/rejected": -276.9073486328125, "loss": 0.0424, "rewards/accuracies": 0.9375, "rewards/chosen": 2.998586893081665, "rewards/margins": 13.560686111450195, "rewards/rejected": -10.56209945678711, "step": 2964 }, { "epoch": 1.65, "learning_rate": 7.983800166618482e-08, "logits/chosen": -5.9498443603515625, "logits/rejected": -6.018560409545898, "logps/chosen": -461.17694091796875, "logps/rejected": -344.3772888183594, "loss": 0.0178, "rewards/accuracies": 0.9375, "rewards/chosen": 4.658782958984375, "rewards/margins": 11.748698234558105, "rewards/rejected": -7.0899152755737305, "step": 2965 }, { "epoch": 1.65, "learning_rate": 7.959439714305933e-08, "logits/chosen": -5.965007781982422, "logits/rejected": -6.028919219970703, "logps/chosen": -618.6470947265625, "logps/rejected": -466.2110290527344, "loss": 0.0738, "rewards/accuracies": 1.0, "rewards/chosen": 5.604326248168945, "rewards/margins": 14.094490051269531, "rewards/rejected": -8.490163803100586, "step": 2966 }, { "epoch": 1.65, "learning_rate": 7.935113269206273e-08, "logits/chosen": -5.966736316680908, "logits/rejected": -6.122391700744629, "logps/chosen": -233.38055419921875, "logps/rejected": -245.04849243164062, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": 2.5819642543792725, "rewards/margins": 10.370790481567383, "rewards/rejected": -7.788825988769531, "step": 2967 }, { "epoch": 1.65, "learning_rate": 7.910820850997496e-08, "logits/chosen": -6.087993621826172, "logits/rejected": -6.09414005279541, "logps/chosen": -295.8456726074219, "logps/rejected": -185.35806274414062, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": 4.514838218688965, "rewards/margins": 11.304511070251465, "rewards/rejected": -6.789672374725342, "step": 2968 }, { "epoch": 1.65, "learning_rate": 7.886562479330128e-08, "logits/chosen": -5.9673967361450195, "logits/rejected": -6.0268144607543945, "logps/chosen": -457.5639953613281, "logps/rejected": -275.22698974609375, "loss": 0.0538, "rewards/accuracies": 0.9375, "rewards/chosen": 4.339057445526123, "rewards/margins": 10.093500137329102, "rewards/rejected": -5.754443168640137, "step": 2969 }, { "epoch": 1.65, "learning_rate": 7.862338173827088e-08, "logits/chosen": -5.990646839141846, "logits/rejected": -6.053885459899902, "logps/chosen": -183.59864807128906, "logps/rejected": -250.17254638671875, "loss": 0.0277, "rewards/accuracies": 1.0, "rewards/chosen": 3.516220808029175, "rewards/margins": 12.6066255569458, "rewards/rejected": -9.090405464172363, "step": 2970 }, { "epoch": 1.65, "learning_rate": 7.838147954083779e-08, "logits/chosen": -6.009847164154053, "logits/rejected": -5.839357852935791, "logps/chosen": -198.24591064453125, "logps/rejected": -158.1627197265625, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 2.0975842475891113, "rewards/margins": 9.943086624145508, "rewards/rejected": -7.845501899719238, "step": 2971 }, { "epoch": 1.65, "learning_rate": 7.813991839667993e-08, "logits/chosen": -5.973042964935303, "logits/rejected": -5.97108268737793, "logps/chosen": -247.25086975097656, "logps/rejected": -275.2332763671875, "loss": 0.0272, "rewards/accuracies": 0.9375, "rewards/chosen": 3.917753219604492, "rewards/margins": 13.19390869140625, "rewards/rejected": -9.276155471801758, "step": 2972 }, { "epoch": 1.65, "learning_rate": 7.789869850119979e-08, "logits/chosen": -6.088698387145996, "logits/rejected": -6.047184944152832, "logps/chosen": -195.01461791992188, "logps/rejected": -180.01315307617188, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": 2.8126137256622314, "rewards/margins": 11.622303009033203, "rewards/rejected": -8.809688568115234, "step": 2973 }, { "epoch": 1.65, "learning_rate": 7.765782004952365e-08, "logits/chosen": -6.0620012283325195, "logits/rejected": -6.045625686645508, "logps/chosen": -376.06793212890625, "logps/rejected": -230.61776733398438, "loss": 0.1377, "rewards/accuracies": 1.0, "rewards/chosen": 4.922882080078125, "rewards/margins": 12.477124214172363, "rewards/rejected": -7.554242134094238, "step": 2974 }, { "epoch": 1.65, "learning_rate": 7.741728323650132e-08, "logits/chosen": -6.060927391052246, "logits/rejected": -6.087921142578125, "logps/chosen": -184.57730102539062, "logps/rejected": -224.26658630371094, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": 1.8031035661697388, "rewards/margins": 11.624600410461426, "rewards/rejected": -9.821496963500977, "step": 2975 }, { "epoch": 1.65, "learning_rate": 7.71770882567066e-08, "logits/chosen": -6.034937381744385, "logits/rejected": -5.968766212463379, "logps/chosen": -233.4550018310547, "logps/rejected": -219.53945922851562, "loss": 0.0096, "rewards/accuracies": 0.9375, "rewards/chosen": 4.0873565673828125, "rewards/margins": 15.030385971069336, "rewards/rejected": -10.943029403686523, "step": 2976 }, { "epoch": 1.65, "learning_rate": 7.693723530443675e-08, "logits/chosen": -6.139464855194092, "logits/rejected": -6.024096965789795, "logps/chosen": -331.37969970703125, "logps/rejected": -292.0008239746094, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": 5.1192827224731445, "rewards/margins": 14.674312591552734, "rewards/rejected": -9.555030822753906, "step": 2977 }, { "epoch": 1.65, "learning_rate": 7.66977245737122e-08, "logits/chosen": -6.1092963218688965, "logits/rejected": -5.950553894042969, "logps/chosen": -317.32781982421875, "logps/rejected": -170.9049072265625, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": 4.288549423217773, "rewards/margins": 11.302752494812012, "rewards/rejected": -7.014203071594238, "step": 2978 }, { "epoch": 1.65, "learning_rate": 7.645855625827657e-08, "logits/chosen": -6.06371545791626, "logits/rejected": -6.001906394958496, "logps/chosen": -249.70123291015625, "logps/rejected": -184.95765686035156, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": 5.509952545166016, "rewards/margins": 12.102925300598145, "rewards/rejected": -6.592972755432129, "step": 2979 }, { "epoch": 1.65, "learning_rate": 7.621973055159663e-08, "logits/chosen": -5.946242332458496, "logits/rejected": -6.051117897033691, "logps/chosen": -226.36532592773438, "logps/rejected": -301.41693115234375, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": 2.9028546810150146, "rewards/margins": 12.500259399414062, "rewards/rejected": -9.597405433654785, "step": 2980 }, { "epoch": 1.65, "learning_rate": 7.598124764686181e-08, "logits/chosen": -5.928071022033691, "logits/rejected": -5.942156791687012, "logps/chosen": -275.28515625, "logps/rejected": -369.32965087890625, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": 4.849396228790283, "rewards/margins": 12.76239013671875, "rewards/rejected": -7.91299295425415, "step": 2981 }, { "epoch": 1.66, "learning_rate": 7.574310773698445e-08, "logits/chosen": -5.956836700439453, "logits/rejected": -6.120039939880371, "logps/chosen": -250.0181427001953, "logps/rejected": -399.76336669921875, "loss": 0.0361, "rewards/accuracies": 0.9375, "rewards/chosen": 4.3176469802856445, "rewards/margins": 12.7000150680542, "rewards/rejected": -8.382369041442871, "step": 2982 }, { "epoch": 1.66, "learning_rate": 7.550531101459934e-08, "logits/chosen": -6.091392517089844, "logits/rejected": -6.008818626403809, "logps/chosen": -288.50732421875, "logps/rejected": -149.64666748046875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": 3.956158399581909, "rewards/margins": 10.424341201782227, "rewards/rejected": -6.4681830406188965, "step": 2983 }, { "epoch": 1.66, "learning_rate": 7.526785767206356e-08, "logits/chosen": -5.985771179199219, "logits/rejected": -6.057104110717773, "logps/chosen": -227.0063018798828, "logps/rejected": -225.80838012695312, "loss": 0.0885, "rewards/accuracies": 1.0, "rewards/chosen": 4.70982551574707, "rewards/margins": 13.214374542236328, "rewards/rejected": -8.504548072814941, "step": 2984 }, { "epoch": 1.66, "learning_rate": 7.503074790145664e-08, "logits/chosen": -6.103967666625977, "logits/rejected": -6.141610145568848, "logps/chosen": -272.1993103027344, "logps/rejected": -216.63137817382812, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": 4.519289016723633, "rewards/margins": 14.893020629882812, "rewards/rejected": -10.37373161315918, "step": 2985 }, { "epoch": 1.66, "learning_rate": 7.479398189458003e-08, "logits/chosen": -6.076441764831543, "logits/rejected": -6.031317710876465, "logps/chosen": -274.5562744140625, "logps/rejected": -264.6766052246094, "loss": 0.0489, "rewards/accuracies": 1.0, "rewards/chosen": 4.352228164672852, "rewards/margins": 11.2984619140625, "rewards/rejected": -6.946233749389648, "step": 2986 }, { "epoch": 1.66, "learning_rate": 7.455755984295725e-08, "logits/chosen": -6.1140666007995605, "logits/rejected": -6.050215721130371, "logps/chosen": -226.884033203125, "logps/rejected": -200.97320556640625, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": 4.785608291625977, "rewards/margins": 11.36406135559082, "rewards/rejected": -6.578453540802002, "step": 2987 }, { "epoch": 1.66, "learning_rate": 7.432148193783344e-08, "logits/chosen": -6.043057441711426, "logits/rejected": -5.930812835693359, "logps/chosen": -212.24398803710938, "logps/rejected": -178.2705078125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": 3.4530954360961914, "rewards/margins": 11.272159576416016, "rewards/rejected": -7.819064140319824, "step": 2988 }, { "epoch": 1.66, "learning_rate": 7.408574837017523e-08, "logits/chosen": -6.020721435546875, "logits/rejected": -5.967554569244385, "logps/chosen": -219.2356719970703, "logps/rejected": -194.71438598632812, "loss": 0.0741, "rewards/accuracies": 1.0, "rewards/chosen": 2.8175392150878906, "rewards/margins": 10.488863945007324, "rewards/rejected": -7.671325206756592, "step": 2989 }, { "epoch": 1.66, "learning_rate": 7.385035933067107e-08, "logits/chosen": -5.946898460388184, "logits/rejected": -5.850220203399658, "logps/chosen": -530.8070068359375, "logps/rejected": -154.96739196777344, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 6.831748962402344, "rewards/margins": 13.267330169677734, "rewards/rejected": -6.435580730438232, "step": 2990 }, { "epoch": 1.66, "learning_rate": 7.361531500973051e-08, "logits/chosen": -6.317861080169678, "logits/rejected": -6.01243782043457, "logps/chosen": -316.5677185058594, "logps/rejected": -164.58743286132812, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": 7.054232597351074, "rewards/margins": 15.165736198425293, "rewards/rejected": -8.111503601074219, "step": 2991 }, { "epoch": 1.66, "learning_rate": 7.338061559748432e-08, "logits/chosen": -6.084962844848633, "logits/rejected": -6.0395917892456055, "logps/chosen": -343.0085754394531, "logps/rejected": -199.05526733398438, "loss": 0.0493, "rewards/accuracies": 1.0, "rewards/chosen": 4.140514850616455, "rewards/margins": 11.84725284576416, "rewards/rejected": -7.706738471984863, "step": 2992 }, { "epoch": 1.66, "learning_rate": 7.314626128378409e-08, "logits/chosen": -6.062917232513428, "logits/rejected": -5.965209484100342, "logps/chosen": -271.41448974609375, "logps/rejected": -216.03775024414062, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": 3.3239760398864746, "rewards/margins": 12.036521911621094, "rewards/rejected": -8.712545394897461, "step": 2993 }, { "epoch": 1.66, "learning_rate": 7.291225225820247e-08, "logits/chosen": -5.9640793800354, "logits/rejected": -5.9775896072387695, "logps/chosen": -243.913818359375, "logps/rejected": -212.46978759765625, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 3.7419424057006836, "rewards/margins": 9.95113754272461, "rewards/rejected": -6.209195137023926, "step": 2994 }, { "epoch": 1.66, "learning_rate": 7.267858871003285e-08, "logits/chosen": -6.0448150634765625, "logits/rejected": -6.0934600830078125, "logps/chosen": -206.77581787109375, "logps/rejected": -288.0152893066406, "loss": 0.0802, "rewards/accuracies": 1.0, "rewards/chosen": 1.8937345743179321, "rewards/margins": 10.974512100219727, "rewards/rejected": -9.080777168273926, "step": 2995 }, { "epoch": 1.66, "learning_rate": 7.244527082828877e-08, "logits/chosen": -6.030818939208984, "logits/rejected": -5.927506923675537, "logps/chosen": -306.7991943359375, "logps/rejected": -156.54153442382812, "loss": 0.0503, "rewards/accuracies": 1.0, "rewards/chosen": 5.388582229614258, "rewards/margins": 11.450935363769531, "rewards/rejected": -6.062353134155273, "step": 2996 }, { "epoch": 1.66, "learning_rate": 7.221229880170465e-08, "logits/chosen": -6.051681041717529, "logits/rejected": -6.026858329772949, "logps/chosen": -394.0212097167969, "logps/rejected": -312.51959228515625, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 5.1423563957214355, "rewards/margins": 11.382225036621094, "rewards/rejected": -6.239869117736816, "step": 2997 }, { "epoch": 1.66, "learning_rate": 7.197967281873457e-08, "logits/chosen": -6.079954147338867, "logits/rejected": -5.984462738037109, "logps/chosen": -276.3881530761719, "logps/rejected": -165.59078979492188, "loss": 0.0403, "rewards/accuracies": 0.9375, "rewards/chosen": 5.2524824142456055, "rewards/margins": 12.199918746948242, "rewards/rejected": -6.947436332702637, "step": 2998 }, { "epoch": 1.66, "learning_rate": 7.174739306755351e-08, "logits/chosen": -5.8623762130737305, "logits/rejected": -5.943058013916016, "logps/chosen": -257.76812744140625, "logps/rejected": -288.69281005859375, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": 3.8643875122070312, "rewards/margins": 14.459285736083984, "rewards/rejected": -10.594898223876953, "step": 2999 }, { "epoch": 1.67, "learning_rate": 7.15154597360555e-08, "logits/chosen": -5.975401401519775, "logits/rejected": -5.986094951629639, "logps/chosen": -203.5496368408203, "logps/rejected": -257.104248046875, "loss": 0.0256, "rewards/accuracies": 0.9375, "rewards/chosen": 2.3474910259246826, "rewards/margins": 9.367696762084961, "rewards/rejected": -7.020205497741699, "step": 3000 }, { "epoch": 1.67, "learning_rate": 7.128387301185501e-08, "logits/chosen": -6.130771636962891, "logits/rejected": -6.0873637199401855, "logps/chosen": -294.6129455566406, "logps/rejected": -422.7804260253906, "loss": 0.0429, "rewards/accuracies": 1.0, "rewards/chosen": 5.817625522613525, "rewards/margins": 12.590174674987793, "rewards/rejected": -6.772549152374268, "step": 3001 }, { "epoch": 1.67, "learning_rate": 7.105263308228565e-08, "logits/chosen": -6.0045485496521, "logits/rejected": -6.1232075691223145, "logps/chosen": -230.7366180419922, "logps/rejected": -267.38360595703125, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/chosen": 3.5880346298217773, "rewards/margins": 13.62077522277832, "rewards/rejected": -10.032740592956543, "step": 3002 }, { "epoch": 1.67, "learning_rate": 7.082174013440079e-08, "logits/chosen": -6.041110515594482, "logits/rejected": -5.987691879272461, "logps/chosen": -152.3191375732422, "logps/rejected": -123.1510009765625, "loss": 0.0277, "rewards/accuracies": 1.0, "rewards/chosen": 2.929034948348999, "rewards/margins": 10.365106582641602, "rewards/rejected": -7.436071872711182, "step": 3003 }, { "epoch": 1.67, "learning_rate": 7.059119435497323e-08, "logits/chosen": -5.926520347595215, "logits/rejected": -6.052339553833008, "logps/chosen": -197.92242431640625, "logps/rejected": -312.7999572753906, "loss": 0.0802, "rewards/accuracies": 1.0, "rewards/chosen": 4.229375839233398, "rewards/margins": 12.540109634399414, "rewards/rejected": -8.310734748840332, "step": 3004 }, { "epoch": 1.67, "learning_rate": 7.036099593049449e-08, "logits/chosen": -6.042266845703125, "logits/rejected": -5.990746021270752, "logps/chosen": -268.2113952636719, "logps/rejected": -131.12413024902344, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": 6.4112114906311035, "rewards/margins": 13.131622314453125, "rewards/rejected": -6.720410346984863, "step": 3005 }, { "epoch": 1.67, "learning_rate": 7.013114504717544e-08, "logits/chosen": -5.945170879364014, "logits/rejected": -5.902004241943359, "logps/chosen": -284.7344970703125, "logps/rejected": -211.23141479492188, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": 4.504105567932129, "rewards/margins": 11.442632675170898, "rewards/rejected": -6.938527584075928, "step": 3006 }, { "epoch": 1.67, "learning_rate": 6.990164189094588e-08, "logits/chosen": -6.148385047912598, "logits/rejected": -6.0477824211120605, "logps/chosen": -330.35791015625, "logps/rejected": -279.1726379394531, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": 6.714444160461426, "rewards/margins": 12.809586524963379, "rewards/rejected": -6.095141887664795, "step": 3007 }, { "epoch": 1.67, "learning_rate": 6.967248664745423e-08, "logits/chosen": -6.054754257202148, "logits/rejected": -5.845457553863525, "logps/chosen": -272.50421142578125, "logps/rejected": -143.7416534423828, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": 4.165502548217773, "rewards/margins": 11.789706230163574, "rewards/rejected": -7.624202728271484, "step": 3008 }, { "epoch": 1.67, "learning_rate": 6.944367950206737e-08, "logits/chosen": -6.198802947998047, "logits/rejected": -6.0392913818359375, "logps/chosen": -233.4945526123047, "logps/rejected": -138.75137329101562, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": 3.8976809978485107, "rewards/margins": 10.982933044433594, "rewards/rejected": -7.08525276184082, "step": 3009 }, { "epoch": 1.67, "learning_rate": 6.921522063987056e-08, "logits/chosen": -5.965145587921143, "logits/rejected": -5.968708038330078, "logps/chosen": -274.68157958984375, "logps/rejected": -202.2471923828125, "loss": 0.1772, "rewards/accuracies": 1.0, "rewards/chosen": 5.1048736572265625, "rewards/margins": 13.315998077392578, "rewards/rejected": -8.211124420166016, "step": 3010 }, { "epoch": 1.67, "learning_rate": 6.898711024566761e-08, "logits/chosen": -6.027024745941162, "logits/rejected": -6.061106204986572, "logps/chosen": -269.6143798828125, "logps/rejected": -303.1522216796875, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": 2.4389028549194336, "rewards/margins": 12.706840515136719, "rewards/rejected": -10.267936706542969, "step": 3011 }, { "epoch": 1.67, "learning_rate": 6.87593485039803e-08, "logits/chosen": -5.982591152191162, "logits/rejected": -6.026750564575195, "logps/chosen": -313.4852294921875, "logps/rejected": -183.26763916015625, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/chosen": 9.711404800415039, "rewards/margins": 15.584371566772461, "rewards/rejected": -5.872965335845947, "step": 3012 }, { "epoch": 1.67, "learning_rate": 6.85319355990484e-08, "logits/chosen": -6.074381351470947, "logits/rejected": -6.095249176025391, "logps/chosen": -202.0228271484375, "logps/rejected": -200.32754516601562, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": 3.4564123153686523, "rewards/margins": 10.846410751342773, "rewards/rejected": -7.389998435974121, "step": 3013 }, { "epoch": 1.67, "learning_rate": 6.830487171482935e-08, "logits/chosen": -5.996294021606445, "logits/rejected": -5.9665207862854, "logps/chosen": -422.778564453125, "logps/rejected": -259.8580627441406, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": 3.956577777862549, "rewards/margins": 12.276071548461914, "rewards/rejected": -8.319494247436523, "step": 3014 }, { "epoch": 1.67, "learning_rate": 6.807815703499842e-08, "logits/chosen": -5.922464847564697, "logits/rejected": -5.903497695922852, "logps/chosen": -380.79937744140625, "logps/rejected": -271.76947021484375, "loss": 0.0364, "rewards/accuracies": 0.9375, "rewards/chosen": 3.866090774536133, "rewards/margins": 11.966658592224121, "rewards/rejected": -8.100567817687988, "step": 3015 }, { "epoch": 1.67, "learning_rate": 6.785179174294847e-08, "logits/chosen": -5.956384658813477, "logits/rejected": -5.86934232711792, "logps/chosen": -368.1810302734375, "logps/rejected": -234.17645263671875, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": 3.569190502166748, "rewards/margins": 8.498283386230469, "rewards/rejected": -4.929092884063721, "step": 3016 }, { "epoch": 1.67, "learning_rate": 6.762577602178953e-08, "logits/chosen": -5.865481376647949, "logits/rejected": -5.892748832702637, "logps/chosen": -511.5052185058594, "logps/rejected": -387.5272216796875, "loss": 0.0643, "rewards/accuracies": 1.0, "rewards/chosen": 5.14800500869751, "rewards/margins": 13.901423454284668, "rewards/rejected": -8.753418922424316, "step": 3017 }, { "epoch": 1.68, "learning_rate": 6.740011005434908e-08, "logits/chosen": -5.974188804626465, "logits/rejected": -6.0316853523254395, "logps/chosen": -246.1296844482422, "logps/rejected": -278.1377258300781, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": 3.805687665939331, "rewards/margins": 14.01844596862793, "rewards/rejected": -10.21275806427002, "step": 3018 }, { "epoch": 1.68, "learning_rate": 6.717479402317122e-08, "logits/chosen": -6.0197038650512695, "logits/rejected": -6.0923027992248535, "logps/chosen": -300.68695068359375, "logps/rejected": -388.6064758300781, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": 1.6471889019012451, "rewards/margins": 13.1235933303833, "rewards/rejected": -11.476404190063477, "step": 3019 }, { "epoch": 1.68, "learning_rate": 6.694982811051785e-08, "logits/chosen": -6.0121307373046875, "logits/rejected": -5.996685028076172, "logps/chosen": -252.66232299804688, "logps/rejected": -151.095458984375, "loss": 0.0511, "rewards/accuracies": 1.0, "rewards/chosen": 4.556631088256836, "rewards/margins": 11.990194320678711, "rewards/rejected": -7.433563232421875, "step": 3020 }, { "epoch": 1.68, "learning_rate": 6.672521249836688e-08, "logits/chosen": -6.173697471618652, "logits/rejected": -5.908968448638916, "logps/chosen": -277.23193359375, "logps/rejected": -167.98245239257812, "loss": 0.0952, "rewards/accuracies": 1.0, "rewards/chosen": 3.704350233078003, "rewards/margins": 11.218684196472168, "rewards/rejected": -7.514334201812744, "step": 3021 }, { "epoch": 1.68, "learning_rate": 6.650094736841294e-08, "logits/chosen": -6.018752098083496, "logits/rejected": -5.931331634521484, "logps/chosen": -236.1832275390625, "logps/rejected": -217.26173400878906, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": 2.3064279556274414, "rewards/margins": 8.602682113647461, "rewards/rejected": -6.2962541580200195, "step": 3022 }, { "epoch": 1.68, "learning_rate": 6.627703290206743e-08, "logits/chosen": -6.045271396636963, "logits/rejected": -6.07540225982666, "logps/chosen": -225.41769409179688, "logps/rejected": -212.3480224609375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 6.130838871002197, "rewards/margins": 15.67434310913086, "rewards/rejected": -9.54350471496582, "step": 3023 }, { "epoch": 1.68, "learning_rate": 6.605346928045807e-08, "logits/chosen": -5.9794535636901855, "logits/rejected": -6.067192077636719, "logps/chosen": -220.98635864257812, "logps/rejected": -249.23500061035156, "loss": 0.051, "rewards/accuracies": 1.0, "rewards/chosen": 2.7241625785827637, "rewards/margins": 11.449421882629395, "rewards/rejected": -8.725258827209473, "step": 3024 }, { "epoch": 1.68, "learning_rate": 6.583025668442866e-08, "logits/chosen": -5.930889129638672, "logits/rejected": -5.979698657989502, "logps/chosen": -198.60256958007812, "logps/rejected": -221.09213256835938, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": 4.5255584716796875, "rewards/margins": 13.04724407196045, "rewards/rejected": -8.521686553955078, "step": 3025 }, { "epoch": 1.68, "learning_rate": 6.560739529453896e-08, "logits/chosen": -6.083132743835449, "logits/rejected": -6.005427837371826, "logps/chosen": -205.82919311523438, "logps/rejected": -160.37152099609375, "loss": 0.0473, "rewards/accuracies": 0.9375, "rewards/chosen": 2.934002161026001, "rewards/margins": 7.8699140548706055, "rewards/rejected": -4.935912132263184, "step": 3026 }, { "epoch": 1.68, "learning_rate": 6.538488529106484e-08, "logits/chosen": -6.143794536590576, "logits/rejected": -5.885894775390625, "logps/chosen": -374.4681091308594, "logps/rejected": -115.41167449951172, "loss": 0.0403, "rewards/accuracies": 1.0, "rewards/chosen": 6.650513172149658, "rewards/margins": 12.43956470489502, "rewards/rejected": -5.789052486419678, "step": 3027 }, { "epoch": 1.68, "learning_rate": 6.516272685399793e-08, "logits/chosen": -6.0419793128967285, "logits/rejected": -6.1858649253845215, "logps/chosen": -327.9786376953125, "logps/rejected": -288.0475158691406, "loss": 0.0252, "rewards/accuracies": 0.9375, "rewards/chosen": 7.083033561706543, "rewards/margins": 15.519079208374023, "rewards/rejected": -8.436046600341797, "step": 3028 }, { "epoch": 1.68, "learning_rate": 6.49409201630451e-08, "logits/chosen": -5.904950141906738, "logits/rejected": -5.904261589050293, "logps/chosen": -206.9025115966797, "logps/rejected": -155.0464630126953, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": 3.1905269622802734, "rewards/margins": 9.938837051391602, "rewards/rejected": -6.748310089111328, "step": 3029 }, { "epoch": 1.68, "learning_rate": 6.471946539762929e-08, "logits/chosen": -6.062444686889648, "logits/rejected": -6.036952495574951, "logps/chosen": -287.4741516113281, "logps/rejected": -248.60702514648438, "loss": 0.0599, "rewards/accuracies": 1.0, "rewards/chosen": 2.959409713745117, "rewards/margins": 13.941844940185547, "rewards/rejected": -10.98243522644043, "step": 3030 }, { "epoch": 1.68, "learning_rate": 6.449836273688819e-08, "logits/chosen": -5.891733169555664, "logits/rejected": -5.973986625671387, "logps/chosen": -542.514892578125, "logps/rejected": -413.5769958496094, "loss": 0.0218, "rewards/accuracies": 0.9375, "rewards/chosen": 9.592389106750488, "rewards/margins": 14.880972862243652, "rewards/rejected": -5.288584232330322, "step": 3031 }, { "epoch": 1.68, "learning_rate": 6.427761235967494e-08, "logits/chosen": -5.957650184631348, "logits/rejected": -6.209136962890625, "logps/chosen": -258.5564270019531, "logps/rejected": -295.7547302246094, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": 4.998948097229004, "rewards/margins": 13.07931137084961, "rewards/rejected": -8.080362319946289, "step": 3032 }, { "epoch": 1.68, "learning_rate": 6.405721444455786e-08, "logits/chosen": -6.001251220703125, "logits/rejected": -6.127846717834473, "logps/chosen": -199.30703735351562, "logps/rejected": -284.99896240234375, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": 2.902221202850342, "rewards/margins": 11.915894508361816, "rewards/rejected": -9.013672828674316, "step": 3033 }, { "epoch": 1.68, "learning_rate": 6.383716916981991e-08, "logits/chosen": -6.023115634918213, "logits/rejected": -6.066342830657959, "logps/chosen": -367.10003662109375, "logps/rejected": -401.4236755371094, "loss": 0.0365, "rewards/accuracies": 0.9375, "rewards/chosen": 4.305096626281738, "rewards/margins": 14.468371391296387, "rewards/rejected": -10.163274765014648, "step": 3034 }, { "epoch": 1.68, "learning_rate": 6.361747671345879e-08, "logits/chosen": -6.025340557098389, "logits/rejected": -6.0617780685424805, "logps/chosen": -214.30047607421875, "logps/rejected": -157.52413940429688, "loss": 0.0338, "rewards/accuracies": 1.0, "rewards/chosen": 2.860448122024536, "rewards/margins": 11.320294380187988, "rewards/rejected": -8.459845542907715, "step": 3035 }, { "epoch": 1.69, "learning_rate": 6.339813725318694e-08, "logits/chosen": -6.036321640014648, "logits/rejected": -6.0459794998168945, "logps/chosen": -464.70831298828125, "logps/rejected": -351.3236083984375, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": 5.300577163696289, "rewards/margins": 11.082561492919922, "rewards/rejected": -5.781984806060791, "step": 3036 }, { "epoch": 1.69, "learning_rate": 6.317915096643133e-08, "logits/chosen": -6.083065509796143, "logits/rejected": -5.999674320220947, "logps/chosen": -398.23431396484375, "logps/rejected": -233.07371520996094, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": 7.112329483032227, "rewards/margins": 14.553945541381836, "rewards/rejected": -7.441617488861084, "step": 3037 }, { "epoch": 1.69, "learning_rate": 6.296051803033286e-08, "logits/chosen": -6.128612995147705, "logits/rejected": -6.081937789916992, "logps/chosen": -367.5958251953125, "logps/rejected": -284.4263000488281, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": 4.374582290649414, "rewards/margins": 13.315807342529297, "rewards/rejected": -8.9412260055542, "step": 3038 }, { "epoch": 1.69, "learning_rate": 6.27422386217471e-08, "logits/chosen": -6.133150100708008, "logits/rejected": -5.9373979568481445, "logps/chosen": -279.5715637207031, "logps/rejected": -124.30081176757812, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": 3.7191860675811768, "rewards/margins": 11.868891716003418, "rewards/rejected": -8.149704933166504, "step": 3039 }, { "epoch": 1.69, "learning_rate": 6.252431291724302e-08, "logits/chosen": -6.041529655456543, "logits/rejected": -6.002298831939697, "logps/chosen": -294.5882568359375, "logps/rejected": -187.84751892089844, "loss": 0.0234, "rewards/accuracies": 0.9375, "rewards/chosen": 5.449419021606445, "rewards/margins": 11.50232982635498, "rewards/rejected": -6.052911281585693, "step": 3040 }, { "epoch": 1.69, "learning_rate": 6.230674109310436e-08, "logits/chosen": -6.023902893066406, "logits/rejected": -5.967774868011475, "logps/chosen": -269.527587890625, "logps/rejected": -298.73388671875, "loss": 0.0284, "rewards/accuracies": 0.9375, "rewards/chosen": 5.44582462310791, "rewards/margins": 11.978130340576172, "rewards/rejected": -6.5323052406311035, "step": 3041 }, { "epoch": 1.69, "learning_rate": 6.208952332532786e-08, "logits/chosen": -6.1099958419799805, "logits/rejected": -5.980863571166992, "logps/chosen": -251.5201416015625, "logps/rejected": -168.96261596679688, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": 5.4581475257873535, "rewards/margins": 12.812719345092773, "rewards/rejected": -7.354572772979736, "step": 3042 }, { "epoch": 1.69, "learning_rate": 6.187265978962392e-08, "logits/chosen": -5.940368175506592, "logits/rejected": -5.887044906616211, "logps/chosen": -255.87982177734375, "logps/rejected": -226.1016082763672, "loss": 0.0326, "rewards/accuracies": 1.0, "rewards/chosen": 1.9646968841552734, "rewards/margins": 8.888067245483398, "rewards/rejected": -6.923370361328125, "step": 3043 }, { "epoch": 1.69, "learning_rate": 6.165615066141672e-08, "logits/chosen": -6.060143947601318, "logits/rejected": -5.948959827423096, "logps/chosen": -278.85577392578125, "logps/rejected": -214.74771118164062, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 4.6725993156433105, "rewards/margins": 13.182647705078125, "rewards/rejected": -8.510048866271973, "step": 3044 }, { "epoch": 1.69, "learning_rate": 6.143999611584361e-08, "logits/chosen": -5.98731803894043, "logits/rejected": -5.942116737365723, "logps/chosen": -253.0955352783203, "logps/rejected": -158.31063842773438, "loss": 0.0234, "rewards/accuracies": 0.9375, "rewards/chosen": 4.823714256286621, "rewards/margins": 10.14722728729248, "rewards/rejected": -5.323512554168701, "step": 3045 }, { "epoch": 1.69, "learning_rate": 6.122419632775522e-08, "logits/chosen": -6.083251476287842, "logits/rejected": -6.064135551452637, "logps/chosen": -284.0517272949219, "logps/rejected": -211.8226318359375, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": 3.9853577613830566, "rewards/margins": 11.970808029174805, "rewards/rejected": -7.985450744628906, "step": 3046 }, { "epoch": 1.69, "learning_rate": 6.100875147171486e-08, "logits/chosen": -6.021329879760742, "logits/rejected": -6.0419206619262695, "logps/chosen": -366.8172607421875, "logps/rejected": -280.6394958496094, "loss": 0.0361, "rewards/accuracies": 1.0, "rewards/chosen": 5.0183258056640625, "rewards/margins": 12.592775344848633, "rewards/rejected": -7.5744500160217285, "step": 3047 }, { "epoch": 1.69, "learning_rate": 6.079366172199906e-08, "logits/chosen": -6.060211181640625, "logits/rejected": -5.913261413574219, "logps/chosen": -276.62237548828125, "logps/rejected": -148.68919372558594, "loss": 0.0581, "rewards/accuracies": 1.0, "rewards/chosen": 5.0170392990112305, "rewards/margins": 12.53494644165039, "rewards/rejected": -7.517906665802002, "step": 3048 }, { "epoch": 1.69, "learning_rate": 6.057892725259717e-08, "logits/chosen": -6.096179008483887, "logits/rejected": -6.073298931121826, "logps/chosen": -271.1475524902344, "logps/rejected": -198.24794006347656, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 4.821780204772949, "rewards/margins": 12.696120262145996, "rewards/rejected": -7.874339580535889, "step": 3049 }, { "epoch": 1.69, "learning_rate": 6.036454823721077e-08, "logits/chosen": -6.114993572235107, "logits/rejected": -5.802801609039307, "logps/chosen": -257.44482421875, "logps/rejected": -182.91256713867188, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": 3.8360648155212402, "rewards/margins": 11.396147727966309, "rewards/rejected": -7.56008243560791, "step": 3050 }, { "epoch": 1.69, "learning_rate": 6.015052484925425e-08, "logits/chosen": -6.085386753082275, "logits/rejected": -6.262566089630127, "logps/chosen": -263.9991760253906, "logps/rejected": -304.69677734375, "loss": 0.0616, "rewards/accuracies": 0.875, "rewards/chosen": 2.8225390911102295, "rewards/margins": 9.68652629852295, "rewards/rejected": -6.863986968994141, "step": 3051 }, { "epoch": 1.69, "learning_rate": 5.993685726185404e-08, "logits/chosen": -5.965727806091309, "logits/rejected": -5.950990200042725, "logps/chosen": -209.45419311523438, "logps/rejected": -173.3172607421875, "loss": 0.019, "rewards/accuracies": 0.9375, "rewards/chosen": 3.958679437637329, "rewards/margins": 10.028388977050781, "rewards/rejected": -6.069709300994873, "step": 3052 }, { "epoch": 1.69, "learning_rate": 5.972354564784904e-08, "logits/chosen": -6.066851615905762, "logits/rejected": -6.130951881408691, "logps/chosen": -366.39532470703125, "logps/rejected": -258.27984619140625, "loss": 0.1001, "rewards/accuracies": 1.0, "rewards/chosen": 8.103391647338867, "rewards/margins": 16.85601234436035, "rewards/rejected": -8.752620697021484, "step": 3053 }, { "epoch": 1.7, "learning_rate": 5.95105901797901e-08, "logits/chosen": -6.099660873413086, "logits/rejected": -6.155003070831299, "logps/chosen": -215.037109375, "logps/rejected": -192.50840759277344, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": 2.9998950958251953, "rewards/margins": 9.934925079345703, "rewards/rejected": -6.935030937194824, "step": 3054 }, { "epoch": 1.7, "learning_rate": 5.929799102994004e-08, "logits/chosen": -5.95860481262207, "logits/rejected": -5.994747161865234, "logps/chosen": -216.52796936035156, "logps/rejected": -195.98284912109375, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": 1.9867360591888428, "rewards/margins": 9.176972389221191, "rewards/rejected": -7.1902360916137695, "step": 3055 }, { "epoch": 1.7, "learning_rate": 5.9085748370273084e-08, "logits/chosen": -6.013650894165039, "logits/rejected": -6.033389091491699, "logps/chosen": -269.3013916015625, "logps/rejected": -301.8494873046875, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": 5.248567581176758, "rewards/margins": 14.184253692626953, "rewards/rejected": -8.935686111450195, "step": 3056 }, { "epoch": 1.7, "learning_rate": 5.88738623724756e-08, "logits/chosen": -6.1171956062316895, "logits/rejected": -6.0132012367248535, "logps/chosen": -286.3804626464844, "logps/rejected": -203.3363037109375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 3.0496766567230225, "rewards/margins": 12.229127883911133, "rewards/rejected": -9.179450988769531, "step": 3057 }, { "epoch": 1.7, "learning_rate": 5.866233320794522e-08, "logits/chosen": -6.119907379150391, "logits/rejected": -5.9533491134643555, "logps/chosen": -262.01446533203125, "logps/rejected": -111.37353515625, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": 7.605354309082031, "rewards/margins": 12.75680923461914, "rewards/rejected": -5.151454925537109, "step": 3058 }, { "epoch": 1.7, "learning_rate": 5.8451161047790744e-08, "logits/chosen": -5.931660175323486, "logits/rejected": -6.004467010498047, "logps/chosen": -130.57315063476562, "logps/rejected": -285.8338317871094, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": 2.0617270469665527, "rewards/margins": 11.363212585449219, "rewards/rejected": -9.301484107971191, "step": 3059 }, { "epoch": 1.7, "learning_rate": 5.824034606283257e-08, "logits/chosen": -5.894250869750977, "logits/rejected": -5.981788635253906, "logps/chosen": -225.70718383789062, "logps/rejected": -263.0787048339844, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": 4.953303337097168, "rewards/margins": 14.402276039123535, "rewards/rejected": -9.448972702026367, "step": 3060 }, { "epoch": 1.7, "learning_rate": 5.802988842360168e-08, "logits/chosen": -6.021913528442383, "logits/rejected": -5.977307319641113, "logps/chosen": -358.71490478515625, "logps/rejected": -205.65431213378906, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": 5.406247138977051, "rewards/margins": 12.267593383789062, "rewards/rejected": -6.861347198486328, "step": 3061 }, { "epoch": 1.7, "learning_rate": 5.781978830034062e-08, "logits/chosen": -5.926337718963623, "logits/rejected": -6.018370628356934, "logps/chosen": -254.4441680908203, "logps/rejected": -252.00144958496094, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": 3.6371748447418213, "rewards/margins": 11.065170288085938, "rewards/rejected": -7.427995681762695, "step": 3062 }, { "epoch": 1.7, "learning_rate": 5.761004586300233e-08, "logits/chosen": -5.92737340927124, "logits/rejected": -5.984467029571533, "logps/chosen": -294.66436767578125, "logps/rejected": -413.1388244628906, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 3.266937732696533, "rewards/margins": 11.647798538208008, "rewards/rejected": -8.380861282348633, "step": 3063 }, { "epoch": 1.7, "learning_rate": 5.740066128125021e-08, "logits/chosen": -5.985557556152344, "logits/rejected": -6.089511394500732, "logps/chosen": -225.28692626953125, "logps/rejected": -201.77792358398438, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 5.537487983703613, "rewards/margins": 12.69571304321289, "rewards/rejected": -7.158225059509277, "step": 3064 }, { "epoch": 1.7, "learning_rate": 5.7191634724458706e-08, "logits/chosen": -6.083792686462402, "logits/rejected": -5.912182807922363, "logps/chosen": -245.93362426757812, "logps/rejected": -92.03793334960938, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": 4.033785820007324, "rewards/margins": 9.532855987548828, "rewards/rejected": -5.499070644378662, "step": 3065 }, { "epoch": 1.7, "learning_rate": 5.6982966361712295e-08, "logits/chosen": -6.047337055206299, "logits/rejected": -6.12042236328125, "logps/chosen": -235.04901123046875, "logps/rejected": -206.56800842285156, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": 2.717271327972412, "rewards/margins": 11.819947242736816, "rewards/rejected": -9.102676391601562, "step": 3066 }, { "epoch": 1.7, "learning_rate": 5.677465636180595e-08, "logits/chosen": -5.970736980438232, "logits/rejected": -5.964545249938965, "logps/chosen": -311.35321044921875, "logps/rejected": -243.5032958984375, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": 3.983321189880371, "rewards/margins": 11.087775230407715, "rewards/rejected": -7.104454040527344, "step": 3067 }, { "epoch": 1.7, "learning_rate": 5.656670489324444e-08, "logits/chosen": -5.9674391746521, "logits/rejected": -5.979220867156982, "logps/chosen": -319.4289855957031, "logps/rejected": -286.3504943847656, "loss": 0.1607, "rewards/accuracies": 0.9375, "rewards/chosen": 4.7643632888793945, "rewards/margins": 11.537187576293945, "rewards/rejected": -6.772824764251709, "step": 3068 }, { "epoch": 1.7, "learning_rate": 5.635911212424288e-08, "logits/chosen": -6.021859169006348, "logits/rejected": -5.842446327209473, "logps/chosen": -549.2466430664062, "logps/rejected": -248.90122985839844, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": 4.596744537353516, "rewards/margins": 9.708568572998047, "rewards/rejected": -5.111823558807373, "step": 3069 }, { "epoch": 1.7, "learning_rate": 5.6151878222725824e-08, "logits/chosen": -6.122286796569824, "logits/rejected": -6.06038761138916, "logps/chosen": -249.00355529785156, "logps/rejected": -219.6654510498047, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": 3.486818313598633, "rewards/margins": 10.567834854125977, "rewards/rejected": -7.08101749420166, "step": 3070 }, { "epoch": 1.7, "learning_rate": 5.594500335632785e-08, "logits/chosen": -6.077260494232178, "logits/rejected": -6.0467963218688965, "logps/chosen": -225.03578186035156, "logps/rejected": -256.99432373046875, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": 0.5296163558959961, "rewards/margins": 12.139841079711914, "rewards/rejected": -11.610224723815918, "step": 3071 }, { "epoch": 1.71, "learning_rate": 5.5738487692393145e-08, "logits/chosen": -5.988642692565918, "logits/rejected": -6.019105911254883, "logps/chosen": -281.6529541015625, "logps/rejected": -217.874267578125, "loss": 0.013, "rewards/accuracies": 0.9375, "rewards/chosen": 4.2709455490112305, "rewards/margins": 11.283760070800781, "rewards/rejected": -7.012814044952393, "step": 3072 }, { "epoch": 1.71, "learning_rate": 5.5532331397974917e-08, "logits/chosen": -6.153305530548096, "logits/rejected": -6.034122467041016, "logps/chosen": -307.29193115234375, "logps/rejected": -116.77099609375, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": 7.890339374542236, "rewards/margins": 13.960890769958496, "rewards/rejected": -6.070551872253418, "step": 3073 }, { "epoch": 1.71, "learning_rate": 5.532653463983616e-08, "logits/chosen": -5.94186544418335, "logits/rejected": -6.022814750671387, "logps/chosen": -174.87660217285156, "logps/rejected": -195.2638397216797, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": 2.2372498512268066, "rewards/margins": 10.976037979125977, "rewards/rejected": -8.738788604736328, "step": 3074 }, { "epoch": 1.71, "learning_rate": 5.512109758444877e-08, "logits/chosen": -6.167531490325928, "logits/rejected": -5.984500885009766, "logps/chosen": -262.67303466796875, "logps/rejected": -201.2362518310547, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": 5.4659271240234375, "rewards/margins": 11.708921432495117, "rewards/rejected": -6.24299430847168, "step": 3075 }, { "epoch": 1.71, "learning_rate": 5.491602039799387e-08, "logits/chosen": -6.023509979248047, "logits/rejected": -5.979726314544678, "logps/chosen": -286.8944091796875, "logps/rejected": -209.21612548828125, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": 7.041693687438965, "rewards/margins": 12.078056335449219, "rewards/rejected": -5.036363124847412, "step": 3076 }, { "epoch": 1.71, "learning_rate": 5.471130324636114e-08, "logits/chosen": -6.097691059112549, "logits/rejected": -6.083992958068848, "logps/chosen": -239.73838806152344, "logps/rejected": -233.25149536132812, "loss": 0.0165, "rewards/accuracies": 0.9375, "rewards/chosen": 2.188770294189453, "rewards/margins": 10.758190155029297, "rewards/rejected": -8.569419860839844, "step": 3077 }, { "epoch": 1.71, "learning_rate": 5.450694629514912e-08, "logits/chosen": -5.993875503540039, "logits/rejected": -5.898068904876709, "logps/chosen": -188.33819580078125, "logps/rejected": -181.12979125976562, "loss": 0.0293, "rewards/accuracies": 1.0, "rewards/chosen": 2.8559865951538086, "rewards/margins": 9.223846435546875, "rewards/rejected": -6.367860317230225, "step": 3078 }, { "epoch": 1.71, "learning_rate": 5.430294970966548e-08, "logits/chosen": -5.993077754974365, "logits/rejected": -6.000080108642578, "logps/chosen": -266.3914489746094, "logps/rejected": -386.47735595703125, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": 3.249128580093384, "rewards/margins": 14.571331024169922, "rewards/rejected": -11.322203636169434, "step": 3079 }, { "epoch": 1.71, "learning_rate": 5.409931365492565e-08, "logits/chosen": -5.959367752075195, "logits/rejected": -6.003582000732422, "logps/chosen": -385.302734375, "logps/rejected": -347.4934997558594, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": 1.030395746231079, "rewards/margins": 11.677934646606445, "rewards/rejected": -10.647539138793945, "step": 3080 }, { "epoch": 1.71, "learning_rate": 5.3896038295653926e-08, "logits/chosen": -6.011002540588379, "logits/rejected": -5.859122276306152, "logps/chosen": -283.202880859375, "logps/rejected": -168.75665283203125, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": 4.903715133666992, "rewards/margins": 10.508755683898926, "rewards/rejected": -5.605039596557617, "step": 3081 }, { "epoch": 1.71, "learning_rate": 5.3693123796282544e-08, "logits/chosen": -5.976171493530273, "logits/rejected": -6.07952880859375, "logps/chosen": -165.0991973876953, "logps/rejected": -265.2060546875, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.20169684290885925, "rewards/margins": 10.239444732666016, "rewards/rejected": -10.441142082214355, "step": 3082 }, { "epoch": 1.71, "learning_rate": 5.3490570320952e-08, "logits/chosen": -6.2063984870910645, "logits/rejected": -6.028231620788574, "logps/chosen": -234.15621948242188, "logps/rejected": -141.15036010742188, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": 4.258328437805176, "rewards/margins": 9.646363258361816, "rewards/rejected": -5.388034343719482, "step": 3083 }, { "epoch": 1.71, "learning_rate": 5.328837803351083e-08, "logits/chosen": -6.0075178146362305, "logits/rejected": -5.922654628753662, "logps/chosen": -234.87925720214844, "logps/rejected": -159.4978485107422, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": 4.328326225280762, "rewards/margins": 12.805630683898926, "rewards/rejected": -8.47730541229248, "step": 3084 }, { "epoch": 1.71, "learning_rate": 5.308654709751509e-08, "logits/chosen": -6.00212287902832, "logits/rejected": -5.946113586425781, "logps/chosen": -379.5400390625, "logps/rejected": -427.5499572753906, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": 3.522099018096924, "rewards/margins": 14.581007957458496, "rewards/rejected": -11.058908462524414, "step": 3085 }, { "epoch": 1.71, "learning_rate": 5.2885077676228795e-08, "logits/chosen": -6.0213398933410645, "logits/rejected": -6.074385643005371, "logps/chosen": -340.0432434082031, "logps/rejected": -475.4520263671875, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": 3.760298252105713, "rewards/margins": 15.679304122924805, "rewards/rejected": -11.91900634765625, "step": 3086 }, { "epoch": 1.71, "learning_rate": 5.2683969932623526e-08, "logits/chosen": -6.062809467315674, "logits/rejected": -6.0427775382995605, "logps/chosen": -296.3680725097656, "logps/rejected": -232.34661865234375, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": 4.806145191192627, "rewards/margins": 13.301450729370117, "rewards/rejected": -8.495306015014648, "step": 3087 }, { "epoch": 1.71, "learning_rate": 5.248322402937821e-08, "logits/chosen": -6.059209823608398, "logits/rejected": -5.854488849639893, "logps/chosen": -352.1544494628906, "logps/rejected": -192.69036865234375, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": 7.22580099105835, "rewards/margins": 14.356878280639648, "rewards/rejected": -7.131077289581299, "step": 3088 }, { "epoch": 1.71, "learning_rate": 5.228284012887907e-08, "logits/chosen": -5.8955607414245605, "logits/rejected": -6.012448787689209, "logps/chosen": -310.62286376953125, "logps/rejected": -270.8877258300781, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": 5.203709602355957, "rewards/margins": 13.406963348388672, "rewards/rejected": -8.203253746032715, "step": 3089 }, { "epoch": 1.72, "learning_rate": 5.2082818393219374e-08, "logits/chosen": -6.0312395095825195, "logits/rejected": -6.05174446105957, "logps/chosen": -271.90966796875, "logps/rejected": -212.4095458984375, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": 6.5274248123168945, "rewards/margins": 12.822026252746582, "rewards/rejected": -6.2946014404296875, "step": 3090 }, { "epoch": 1.72, "learning_rate": 5.18831589841997e-08, "logits/chosen": -6.035470962524414, "logits/rejected": -6.081275463104248, "logps/chosen": -243.54722595214844, "logps/rejected": -198.4825439453125, "loss": 0.0351, "rewards/accuracies": 0.9375, "rewards/chosen": 6.176976203918457, "rewards/margins": 11.806438446044922, "rewards/rejected": -5.629462242126465, "step": 3091 }, { "epoch": 1.72, "learning_rate": 5.168386206332742e-08, "logits/chosen": -6.114795684814453, "logits/rejected": -6.0692458152771, "logps/chosen": -308.41265869140625, "logps/rejected": -171.49118041992188, "loss": 0.0361, "rewards/accuracies": 0.9375, "rewards/chosen": 5.40402889251709, "rewards/margins": 11.505867958068848, "rewards/rejected": -6.101839065551758, "step": 3092 }, { "epoch": 1.72, "learning_rate": 5.1484927791816736e-08, "logits/chosen": -5.9407548904418945, "logits/rejected": -6.163428783416748, "logps/chosen": -230.6265106201172, "logps/rejected": -356.22235107421875, "loss": 0.0212, "rewards/accuracies": 0.9375, "rewards/chosen": 4.2200822830200195, "rewards/margins": 13.157197952270508, "rewards/rejected": -8.937115669250488, "step": 3093 }, { "epoch": 1.72, "learning_rate": 5.128635633058831e-08, "logits/chosen": -6.074255466461182, "logits/rejected": -6.076847076416016, "logps/chosen": -198.8577880859375, "logps/rejected": -203.31265258789062, "loss": 0.0275, "rewards/accuracies": 0.9375, "rewards/chosen": 2.299476146697998, "rewards/margins": 10.257932662963867, "rewards/rejected": -7.958456039428711, "step": 3094 }, { "epoch": 1.72, "learning_rate": 5.108814784026949e-08, "logits/chosen": -5.866878986358643, "logits/rejected": -6.098538398742676, "logps/chosen": -246.5657196044922, "logps/rejected": -365.9087219238281, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 4.071759223937988, "rewards/margins": 13.518782615661621, "rewards/rejected": -9.447023391723633, "step": 3095 }, { "epoch": 1.72, "learning_rate": 5.089030248119408e-08, "logits/chosen": -5.935311794281006, "logits/rejected": -6.070479869842529, "logps/chosen": -270.0668640136719, "logps/rejected": -161.23605346679688, "loss": 0.0488, "rewards/accuracies": 1.0, "rewards/chosen": 6.013741970062256, "rewards/margins": 13.514362335205078, "rewards/rejected": -7.500618934631348, "step": 3096 }, { "epoch": 1.72, "learning_rate": 5.069282041340184e-08, "logits/chosen": -6.033421039581299, "logits/rejected": -5.91287088394165, "logps/chosen": -444.8770751953125, "logps/rejected": -166.096923828125, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": 7.330997467041016, "rewards/margins": 13.701522827148438, "rewards/rejected": -6.370525360107422, "step": 3097 }, { "epoch": 1.72, "learning_rate": 5.049570179663909e-08, "logits/chosen": -6.010442733764648, "logits/rejected": -6.07065486907959, "logps/chosen": -237.64926147460938, "logps/rejected": -363.5059814453125, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": 3.3883862495422363, "rewards/margins": 11.803190231323242, "rewards/rejected": -8.414803504943848, "step": 3098 }, { "epoch": 1.72, "learning_rate": 5.029894679035751e-08, "logits/chosen": -5.893828392028809, "logits/rejected": -5.969742298126221, "logps/chosen": -216.87640380859375, "logps/rejected": -266.92669677734375, "loss": 0.0302, "rewards/accuracies": 0.9375, "rewards/chosen": 1.9304983615875244, "rewards/margins": 8.678044319152832, "rewards/rejected": -6.74754524230957, "step": 3099 }, { "epoch": 1.72, "learning_rate": 5.010255555371551e-08, "logits/chosen": -6.026538848876953, "logits/rejected": -5.966455459594727, "logps/chosen": -317.20477294921875, "logps/rejected": -408.6556396484375, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": 5.267683506011963, "rewards/margins": 14.525035858154297, "rewards/rejected": -9.257352828979492, "step": 3100 }, { "epoch": 1.72, "learning_rate": 4.9906528245576504e-08, "logits/chosen": -6.048227310180664, "logits/rejected": -5.969931125640869, "logps/chosen": -241.94529724121094, "logps/rejected": -291.6957702636719, "loss": 0.0231, "rewards/accuracies": 0.9375, "rewards/chosen": 3.4084174633026123, "rewards/margins": 12.558717727661133, "rewards/rejected": -9.150300025939941, "step": 3101 }, { "epoch": 1.72, "learning_rate": 4.971086502450994e-08, "logits/chosen": -6.0307183265686035, "logits/rejected": -5.981076240539551, "logps/chosen": -364.4385986328125, "logps/rejected": -227.92190551757812, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": 5.9142632484436035, "rewards/margins": 14.964225769042969, "rewards/rejected": -9.049962997436523, "step": 3102 }, { "epoch": 1.72, "learning_rate": 4.951556604879048e-08, "logits/chosen": -5.956589698791504, "logits/rejected": -5.9940314292907715, "logps/chosen": -337.6165771484375, "logps/rejected": -288.7755432128906, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": 5.4939069747924805, "rewards/margins": 13.791439056396484, "rewards/rejected": -8.297532081604004, "step": 3103 }, { "epoch": 1.72, "learning_rate": 4.9320631476398256e-08, "logits/chosen": -6.155769348144531, "logits/rejected": -6.051029682159424, "logps/chosen": -315.2653503417969, "logps/rejected": -230.60427856445312, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": 3.379429340362549, "rewards/margins": 13.712031364440918, "rewards/rejected": -10.332601547241211, "step": 3104 }, { "epoch": 1.72, "learning_rate": 4.9126061465018854e-08, "logits/chosen": -6.006890773773193, "logits/rejected": -6.045581340789795, "logps/chosen": -278.0140075683594, "logps/rejected": -203.32742309570312, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": 7.212503433227539, "rewards/margins": 14.423919677734375, "rewards/rejected": -7.211416721343994, "step": 3105 }, { "epoch": 1.72, "learning_rate": 4.893185617204254e-08, "logits/chosen": -6.017293453216553, "logits/rejected": -6.035758972167969, "logps/chosen": -256.3590087890625, "logps/rejected": -222.19094848632812, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": 4.992152214050293, "rewards/margins": 11.124139785766602, "rewards/rejected": -6.131988525390625, "step": 3106 }, { "epoch": 1.72, "learning_rate": 4.873801575456482e-08, "logits/chosen": -5.970181465148926, "logits/rejected": -6.063824653625488, "logps/chosen": -255.84771728515625, "logps/rejected": -284.3170166015625, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": 3.7502708435058594, "rewards/margins": 10.213571548461914, "rewards/rejected": -6.463300704956055, "step": 3107 }, { "epoch": 1.73, "learning_rate": 4.854454036938599e-08, "logits/chosen": -5.9681715965271, "logits/rejected": -5.948732376098633, "logps/chosen": -233.95193481445312, "logps/rejected": -175.05088806152344, "loss": 0.077, "rewards/accuracies": 0.9375, "rewards/chosen": 3.350726842880249, "rewards/margins": 9.012983322143555, "rewards/rejected": -5.662255764007568, "step": 3108 }, { "epoch": 1.73, "learning_rate": 4.835143017301119e-08, "logits/chosen": -5.974286079406738, "logits/rejected": -6.1248955726623535, "logps/chosen": -234.774658203125, "logps/rejected": -308.16534423828125, "loss": 0.0617, "rewards/accuracies": 0.9375, "rewards/chosen": 1.1340116262435913, "rewards/margins": 11.548327445983887, "rewards/rejected": -10.41431713104248, "step": 3109 }, { "epoch": 1.73, "learning_rate": 4.8158685321649906e-08, "logits/chosen": -6.117705345153809, "logits/rejected": -6.104244232177734, "logps/chosen": -201.31820678710938, "logps/rejected": -180.5050506591797, "loss": 0.0491, "rewards/accuracies": 1.0, "rewards/chosen": 2.47841215133667, "rewards/margins": 10.920961380004883, "rewards/rejected": -8.442549705505371, "step": 3110 }, { "epoch": 1.73, "learning_rate": 4.7966305971216145e-08, "logits/chosen": -5.898336887359619, "logits/rejected": -5.978230953216553, "logps/chosen": -455.320068359375, "logps/rejected": -503.6248779296875, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": 7.238512992858887, "rewards/margins": 18.316957473754883, "rewards/rejected": -11.078445434570312, "step": 3111 }, { "epoch": 1.73, "learning_rate": 4.777429227732843e-08, "logits/chosen": -6.005342960357666, "logits/rejected": -5.920749187469482, "logps/chosen": -294.0703125, "logps/rejected": -199.55850219726562, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": 3.670379638671875, "rewards/margins": 12.165363311767578, "rewards/rejected": -8.494983673095703, "step": 3112 }, { "epoch": 1.73, "learning_rate": 4.7582644395309346e-08, "logits/chosen": -6.038458824157715, "logits/rejected": -6.011198043823242, "logps/chosen": -312.88836669921875, "logps/rejected": -202.52877807617188, "loss": 0.0379, "rewards/accuracies": 1.0, "rewards/chosen": 7.553170204162598, "rewards/margins": 13.159494400024414, "rewards/rejected": -5.606324195861816, "step": 3113 }, { "epoch": 1.73, "learning_rate": 4.739136248018577e-08, "logits/chosen": -5.959808826446533, "logits/rejected": -6.0720086097717285, "logps/chosen": -296.15484619140625, "logps/rejected": -250.30691528320312, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": 3.5121753215789795, "rewards/margins": 11.85595989227295, "rewards/rejected": -8.343783378601074, "step": 3114 }, { "epoch": 1.73, "learning_rate": 4.720044668668816e-08, "logits/chosen": -5.952045440673828, "logits/rejected": -6.014026641845703, "logps/chosen": -271.7541198730469, "logps/rejected": -292.66180419921875, "loss": 0.0266, "rewards/accuracies": 0.9375, "rewards/chosen": 5.6895012855529785, "rewards/margins": 13.244150161743164, "rewards/rejected": -7.554647922515869, "step": 3115 }, { "epoch": 1.73, "learning_rate": 4.700989716925119e-08, "logits/chosen": -6.007760047912598, "logits/rejected": -6.155845642089844, "logps/chosen": -255.53591918945312, "logps/rejected": -330.04437255859375, "loss": 0.0664, "rewards/accuracies": 1.0, "rewards/chosen": 2.8286831378936768, "rewards/margins": 11.948873519897461, "rewards/rejected": -9.120190620422363, "step": 3116 }, { "epoch": 1.73, "learning_rate": 4.681971408201313e-08, "logits/chosen": -6.079806327819824, "logits/rejected": -6.034332275390625, "logps/chosen": -234.3949432373047, "logps/rejected": -171.12310791015625, "loss": 0.0457, "rewards/accuracies": 1.0, "rewards/chosen": 3.303929567337036, "rewards/margins": 10.957693099975586, "rewards/rejected": -7.653764724731445, "step": 3117 }, { "epoch": 1.73, "learning_rate": 4.6629897578815635e-08, "logits/chosen": -6.033299922943115, "logits/rejected": -6.090053558349609, "logps/chosen": -250.60494995117188, "logps/rejected": -265.9146423339844, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": 3.782590866088867, "rewards/margins": 13.47060775756836, "rewards/rejected": -9.688016891479492, "step": 3118 }, { "epoch": 1.73, "learning_rate": 4.644044781320422e-08, "logits/chosen": -6.002591133117676, "logits/rejected": -6.030586242675781, "logps/chosen": -294.3070373535156, "logps/rejected": -207.8349609375, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": 5.479800224304199, "rewards/margins": 11.83839225769043, "rewards/rejected": -6.358591556549072, "step": 3119 }, { "epoch": 1.73, "learning_rate": 4.625136493842724e-08, "logits/chosen": -6.038978576660156, "logits/rejected": -6.0174360275268555, "logps/chosen": -225.6347198486328, "logps/rejected": -252.04974365234375, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": 3.212611198425293, "rewards/margins": 9.239789009094238, "rewards/rejected": -6.027177810668945, "step": 3120 }, { "epoch": 1.73, "learning_rate": 4.60626491074369e-08, "logits/chosen": -6.034667015075684, "logits/rejected": -5.940497398376465, "logps/chosen": -291.29168701171875, "logps/rejected": -113.08123016357422, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": 5.403570175170898, "rewards/margins": 10.827291488647461, "rewards/rejected": -5.423722267150879, "step": 3121 }, { "epoch": 1.73, "learning_rate": 4.5874300472887815e-08, "logits/chosen": -6.049075126647949, "logits/rejected": -6.010773658752441, "logps/chosen": -336.0018615722656, "logps/rejected": -220.64671325683594, "loss": 0.1478, "rewards/accuracies": 1.0, "rewards/chosen": 7.297828197479248, "rewards/margins": 14.428178787231445, "rewards/rejected": -7.130350112915039, "step": 3122 }, { "epoch": 1.73, "learning_rate": 4.568631918713822e-08, "logits/chosen": -6.03626012802124, "logits/rejected": -6.042337417602539, "logps/chosen": -479.87298583984375, "logps/rejected": -316.88946533203125, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": 4.356880187988281, "rewards/margins": 13.822895050048828, "rewards/rejected": -9.46601390838623, "step": 3123 }, { "epoch": 1.73, "learning_rate": 4.549870540224854e-08, "logits/chosen": -5.938600540161133, "logits/rejected": -5.991674900054932, "logps/chosen": -204.84117126464844, "logps/rejected": -310.0762939453125, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": 1.6115138530731201, "rewards/margins": 12.341158866882324, "rewards/rejected": -10.729643821716309, "step": 3124 }, { "epoch": 1.73, "learning_rate": 4.531145926998248e-08, "logits/chosen": -6.07737398147583, "logits/rejected": -6.151154518127441, "logps/chosen": -364.70721435546875, "logps/rejected": -362.7749328613281, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": 4.544427394866943, "rewards/margins": 13.157366752624512, "rewards/rejected": -8.612939834594727, "step": 3125 }, { "epoch": 1.74, "learning_rate": 4.5124580941806154e-08, "logits/chosen": -5.921620845794678, "logits/rejected": -6.000998497009277, "logps/chosen": -199.7633056640625, "logps/rejected": -237.09725952148438, "loss": 0.0457, "rewards/accuracies": 1.0, "rewards/chosen": 4.737062454223633, "rewards/margins": 12.973556518554688, "rewards/rejected": -8.236494064331055, "step": 3126 }, { "epoch": 1.74, "learning_rate": 4.493807056888793e-08, "logits/chosen": -6.015649318695068, "logits/rejected": -6.014899253845215, "logps/chosen": -280.03009033203125, "logps/rejected": -268.7154235839844, "loss": 0.03, "rewards/accuracies": 0.9375, "rewards/chosen": 2.7894444465637207, "rewards/margins": 12.445575714111328, "rewards/rejected": -9.656131744384766, "step": 3127 }, { "epoch": 1.74, "learning_rate": 4.475192830209884e-08, "logits/chosen": -5.956293106079102, "logits/rejected": -5.934299468994141, "logps/chosen": -244.52743530273438, "logps/rejected": -152.89309692382812, "loss": 0.0133, "rewards/accuracies": 0.9375, "rewards/chosen": 4.83973503112793, "rewards/margins": 13.398271560668945, "rewards/rejected": -8.558536529541016, "step": 3128 }, { "epoch": 1.74, "learning_rate": 4.4566154292011994e-08, "logits/chosen": -5.993948459625244, "logits/rejected": -5.972169876098633, "logps/chosen": -287.2497863769531, "logps/rejected": -211.3408660888672, "loss": 0.0449, "rewards/accuracies": 1.0, "rewards/chosen": 8.462908744812012, "rewards/margins": 12.528244972229004, "rewards/rejected": -4.065335273742676, "step": 3129 }, { "epoch": 1.74, "learning_rate": 4.4380748688902846e-08, "logits/chosen": -5.933614730834961, "logits/rejected": -5.921207427978516, "logps/chosen": -270.306884765625, "logps/rejected": -271.3818359375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 3.677197217941284, "rewards/margins": 11.952054023742676, "rewards/rejected": -8.274856567382812, "step": 3130 }, { "epoch": 1.74, "learning_rate": 4.419571164274849e-08, "logits/chosen": -6.078084945678711, "logits/rejected": -6.082030773162842, "logps/chosen": -282.9170227050781, "logps/rejected": -190.07504272460938, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/chosen": 6.606863021850586, "rewards/margins": 12.204660415649414, "rewards/rejected": -5.597797870635986, "step": 3131 }, { "epoch": 1.74, "learning_rate": 4.401104330322797e-08, "logits/chosen": -6.1233930587768555, "logits/rejected": -5.955466270446777, "logps/chosen": -360.4988098144531, "logps/rejected": -192.39144897460938, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": 7.671918869018555, "rewards/margins": 17.124052047729492, "rewards/rejected": -9.452132225036621, "step": 3132 }, { "epoch": 1.74, "learning_rate": 4.3826743819722236e-08, "logits/chosen": -6.0487799644470215, "logits/rejected": -6.066554546356201, "logps/chosen": -487.3913879394531, "logps/rejected": -375.3868408203125, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": 6.665790557861328, "rewards/margins": 16.85747718811035, "rewards/rejected": -10.191686630249023, "step": 3133 }, { "epoch": 1.74, "learning_rate": 4.364281334131376e-08, "logits/chosen": -5.953242301940918, "logits/rejected": -6.085042476654053, "logps/chosen": -213.37014770507812, "logps/rejected": -264.24945068359375, "loss": 0.0387, "rewards/accuracies": 1.0, "rewards/chosen": 3.6404762268066406, "rewards/margins": 13.109541893005371, "rewards/rejected": -9.46906566619873, "step": 3134 }, { "epoch": 1.74, "learning_rate": 4.3459252016786744e-08, "logits/chosen": -6.031089782714844, "logits/rejected": -5.971142768859863, "logps/chosen": -235.36878967285156, "logps/rejected": -151.74969482421875, "loss": 0.0301, "rewards/accuracies": 1.0, "rewards/chosen": 3.534226417541504, "rewards/margins": 11.027137756347656, "rewards/rejected": -7.492911338806152, "step": 3135 }, { "epoch": 1.74, "learning_rate": 4.32760599946263e-08, "logits/chosen": -5.908489227294922, "logits/rejected": -6.007774353027344, "logps/chosen": -158.1973114013672, "logps/rejected": -258.760498046875, "loss": 0.0334, "rewards/accuracies": 1.0, "rewards/chosen": 1.5870749950408936, "rewards/margins": 13.401063919067383, "rewards/rejected": -11.813989639282227, "step": 3136 }, { "epoch": 1.74, "learning_rate": 4.309323742301918e-08, "logits/chosen": -6.038437843322754, "logits/rejected": -6.028903961181641, "logps/chosen": -247.6995849609375, "logps/rejected": -194.2353515625, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/chosen": 4.113769054412842, "rewards/margins": 12.665643692016602, "rewards/rejected": -8.551875114440918, "step": 3137 }, { "epoch": 1.74, "learning_rate": 4.291078444985335e-08, "logits/chosen": -5.998804569244385, "logits/rejected": -6.093951225280762, "logps/chosen": -270.8594970703125, "logps/rejected": -242.24652099609375, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": 4.232644081115723, "rewards/margins": 14.255460739135742, "rewards/rejected": -10.02281665802002, "step": 3138 }, { "epoch": 1.74, "learning_rate": 4.272870122271738e-08, "logits/chosen": -6.179986476898193, "logits/rejected": -6.0149946212768555, "logps/chosen": -171.8929901123047, "logps/rejected": -121.94234466552734, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 4.095998764038086, "rewards/margins": 11.438101768493652, "rewards/rejected": -7.342103481292725, "step": 3139 }, { "epoch": 1.74, "learning_rate": 4.2546987888901266e-08, "logits/chosen": -6.087535858154297, "logits/rejected": -6.026854515075684, "logps/chosen": -214.64964294433594, "logps/rejected": -134.072509765625, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": 4.28535270690918, "rewards/margins": 12.585002899169922, "rewards/rejected": -8.29964828491211, "step": 3140 }, { "epoch": 1.74, "learning_rate": 4.236564459539516e-08, "logits/chosen": -6.125412464141846, "logits/rejected": -6.037472724914551, "logps/chosen": -270.6800537109375, "logps/rejected": -183.84872436523438, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": 4.190088272094727, "rewards/margins": 12.850893020629883, "rewards/rejected": -8.660802841186523, "step": 3141 }, { "epoch": 1.74, "learning_rate": 4.218467148889071e-08, "logits/chosen": -5.971584320068359, "logits/rejected": -5.952587127685547, "logps/chosen": -185.23818969726562, "logps/rejected": -183.85186767578125, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": 5.210775375366211, "rewards/margins": 13.69638442993164, "rewards/rejected": -8.48560905456543, "step": 3142 }, { "epoch": 1.74, "learning_rate": 4.200406871577944e-08, "logits/chosen": -6.05499792098999, "logits/rejected": -6.025324821472168, "logps/chosen": -305.8414306640625, "logps/rejected": -184.95407104492188, "loss": 0.0913, "rewards/accuracies": 1.0, "rewards/chosen": 7.308553695678711, "rewards/margins": 14.721010208129883, "rewards/rejected": -7.412456512451172, "step": 3143 }, { "epoch": 1.75, "learning_rate": 4.1823836422153424e-08, "logits/chosen": -5.9965338706970215, "logits/rejected": -6.038435459136963, "logps/chosen": -190.36630249023438, "logps/rejected": -250.11798095703125, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": 2.7462644577026367, "rewards/margins": 11.786200523376465, "rewards/rejected": -9.039935111999512, "step": 3144 }, { "epoch": 1.75, "learning_rate": 4.164397475380527e-08, "logits/chosen": -6.0350022315979, "logits/rejected": -5.965267181396484, "logps/chosen": -308.3589782714844, "logps/rejected": -166.31430053710938, "loss": 0.0433, "rewards/accuracies": 0.9375, "rewards/chosen": 6.424431800842285, "rewards/margins": 9.857704162597656, "rewards/rejected": -3.4332728385925293, "step": 3145 }, { "epoch": 1.75, "learning_rate": 4.146448385622764e-08, "logits/chosen": -6.036958694458008, "logits/rejected": -6.013307571411133, "logps/chosen": -308.2975769042969, "logps/rejected": -443.16278076171875, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": 2.189882278442383, "rewards/margins": 15.082615852355957, "rewards/rejected": -12.89273452758789, "step": 3146 }, { "epoch": 1.75, "learning_rate": 4.12853638746134e-08, "logits/chosen": -6.011021137237549, "logits/rejected": -5.9382219314575195, "logps/chosen": -626.5303955078125, "logps/rejected": -388.03973388671875, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": 9.548181533813477, "rewards/margins": 12.434452056884766, "rewards/rejected": -2.886270046234131, "step": 3147 }, { "epoch": 1.75, "learning_rate": 4.1106614953855146e-08, "logits/chosen": -6.170637607574463, "logits/rejected": -6.149865627288818, "logps/chosen": -281.984130859375, "logps/rejected": -252.5240478515625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 3.482119560241699, "rewards/margins": 12.731840133666992, "rewards/rejected": -9.24972152709961, "step": 3148 }, { "epoch": 1.75, "learning_rate": 4.0928237238545495e-08, "logits/chosen": -5.987085342407227, "logits/rejected": -5.976478099822998, "logps/chosen": -229.98715209960938, "logps/rejected": -205.60012817382812, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": 4.141526699066162, "rewards/margins": 13.189689636230469, "rewards/rejected": -9.048162460327148, "step": 3149 }, { "epoch": 1.75, "learning_rate": 4.0750230872976684e-08, "logits/chosen": -6.0576629638671875, "logits/rejected": -5.983837127685547, "logps/chosen": -304.8893737792969, "logps/rejected": -182.80166625976562, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": 5.681779861450195, "rewards/margins": 12.026876449584961, "rewards/rejected": -6.345096588134766, "step": 3150 }, { "epoch": 1.75, "learning_rate": 4.0572596001140714e-08, "logits/chosen": -5.982859134674072, "logits/rejected": -5.992598533630371, "logps/chosen": -250.72021484375, "logps/rejected": -280.6966552734375, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": 4.355348587036133, "rewards/margins": 13.509145736694336, "rewards/rejected": -9.15379810333252, "step": 3151 }, { "epoch": 1.75, "learning_rate": 4.039533276672896e-08, "logits/chosen": -6.049258232116699, "logits/rejected": -6.013559341430664, "logps/chosen": -235.59796142578125, "logps/rejected": -212.0501708984375, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": 4.936710357666016, "rewards/margins": 11.729517936706543, "rewards/rejected": -6.792807579040527, "step": 3152 }, { "epoch": 1.75, "learning_rate": 4.021844131313212e-08, "logits/chosen": -5.967405319213867, "logits/rejected": -5.975096702575684, "logps/chosen": -207.78717041015625, "logps/rejected": -149.184326171875, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": 5.0818047523498535, "rewards/margins": 14.466411590576172, "rewards/rejected": -9.38460636138916, "step": 3153 }, { "epoch": 1.75, "learning_rate": 4.004192178344029e-08, "logits/chosen": -5.966555595397949, "logits/rejected": -5.982985973358154, "logps/chosen": -329.5432434082031, "logps/rejected": -168.0028533935547, "loss": 0.0506, "rewards/accuracies": 1.0, "rewards/chosen": 10.891618728637695, "rewards/margins": 13.29373550415039, "rewards/rejected": -2.4021167755126953, "step": 3154 }, { "epoch": 1.75, "learning_rate": 3.986577432044264e-08, "logits/chosen": -5.875327110290527, "logits/rejected": -5.956025123596191, "logps/chosen": -192.2930450439453, "logps/rejected": -204.62750244140625, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": 3.2369799613952637, "rewards/margins": 13.06338882446289, "rewards/rejected": -9.826410293579102, "step": 3155 }, { "epoch": 1.75, "learning_rate": 3.968999906662751e-08, "logits/chosen": -6.0430521965026855, "logits/rejected": -6.064146518707275, "logps/chosen": -344.71148681640625, "logps/rejected": -353.4832763671875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 4.210607528686523, "rewards/margins": 13.190404891967773, "rewards/rejected": -8.97979736328125, "step": 3156 }, { "epoch": 1.75, "learning_rate": 3.951459616418185e-08, "logits/chosen": -5.956183433532715, "logits/rejected": -6.04448127746582, "logps/chosen": -173.50253295898438, "logps/rejected": -228.87045288085938, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": 2.942735433578491, "rewards/margins": 13.541984558105469, "rewards/rejected": -10.599248886108398, "step": 3157 }, { "epoch": 1.75, "learning_rate": 3.933956575499148e-08, "logits/chosen": -6.079063415527344, "logits/rejected": -5.941859722137451, "logps/chosen": -331.87384033203125, "logps/rejected": -348.46270751953125, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 2.4499735832214355, "rewards/margins": 11.218145370483398, "rewards/rejected": -8.768170356750488, "step": 3158 }, { "epoch": 1.75, "learning_rate": 3.916490798064137e-08, "logits/chosen": -5.9562153816223145, "logits/rejected": -5.924863338470459, "logps/chosen": -198.60223388671875, "logps/rejected": -140.6978759765625, "loss": 0.0857, "rewards/accuracies": 1.0, "rewards/chosen": 4.764321327209473, "rewards/margins": 10.645078659057617, "rewards/rejected": -5.8807573318481445, "step": 3159 }, { "epoch": 1.75, "learning_rate": 3.8990622982414455e-08, "logits/chosen": -6.023008346557617, "logits/rejected": -5.892488479614258, "logps/chosen": -498.82635498046875, "logps/rejected": -230.3189239501953, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": 7.492980003356934, "rewards/margins": 12.250665664672852, "rewards/rejected": -4.75768518447876, "step": 3160 }, { "epoch": 1.75, "learning_rate": 3.881671090129246e-08, "logits/chosen": -5.943544864654541, "logits/rejected": -5.903099060058594, "logps/chosen": -262.1587219238281, "logps/rejected": -262.7166442871094, "loss": 0.0281, "rewards/accuracies": 0.9375, "rewards/chosen": 3.20847749710083, "rewards/margins": 13.247539520263672, "rewards/rejected": -10.039061546325684, "step": 3161 }, { "epoch": 1.76, "learning_rate": 3.864317187795524e-08, "logits/chosen": -5.99838924407959, "logits/rejected": -5.999433517456055, "logps/chosen": -222.39022827148438, "logps/rejected": -237.4224853515625, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": 2.7050087451934814, "rewards/margins": 12.178239822387695, "rewards/rejected": -9.47323226928711, "step": 3162 }, { "epoch": 1.76, "learning_rate": 3.847000605278111e-08, "logits/chosen": -6.036466598510742, "logits/rejected": -6.038665771484375, "logps/chosen": -270.13726806640625, "logps/rejected": -199.6455535888672, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/chosen": 5.545434951782227, "rewards/margins": 12.192317008972168, "rewards/rejected": -6.646882057189941, "step": 3163 }, { "epoch": 1.76, "learning_rate": 3.829721356584648e-08, "logits/chosen": -5.964164733886719, "logits/rejected": -6.029023170471191, "logps/chosen": -450.6402893066406, "logps/rejected": -380.54425048828125, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": 6.527959823608398, "rewards/margins": 14.914631843566895, "rewards/rejected": -8.38667106628418, "step": 3164 }, { "epoch": 1.76, "learning_rate": 3.8124794556925466e-08, "logits/chosen": -5.834508895874023, "logits/rejected": -5.969691753387451, "logps/chosen": -202.81893920898438, "logps/rejected": -246.41595458984375, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": 1.8395791053771973, "rewards/margins": 11.239267349243164, "rewards/rejected": -9.399688720703125, "step": 3165 }, { "epoch": 1.76, "learning_rate": 3.7952749165490404e-08, "logits/chosen": -6.018211841583252, "logits/rejected": -5.897972106933594, "logps/chosen": -238.9119110107422, "logps/rejected": -202.82656860351562, "loss": 0.0339, "rewards/accuracies": 0.9375, "rewards/chosen": 1.0209333896636963, "rewards/margins": 12.732792854309082, "rewards/rejected": -11.711859703063965, "step": 3166 }, { "epoch": 1.76, "learning_rate": 3.7781077530711247e-08, "logits/chosen": -6.064733028411865, "logits/rejected": -6.146043300628662, "logps/chosen": -185.9036865234375, "logps/rejected": -243.529052734375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 4.266242027282715, "rewards/margins": 12.073122024536133, "rewards/rejected": -7.806879997253418, "step": 3167 }, { "epoch": 1.76, "learning_rate": 3.7609779791455744e-08, "logits/chosen": -6.126744747161865, "logits/rejected": -6.010741233825684, "logps/chosen": -309.99798583984375, "logps/rejected": -192.32769775390625, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": 3.123727321624756, "rewards/margins": 11.481608390808105, "rewards/rejected": -8.357879638671875, "step": 3168 }, { "epoch": 1.76, "learning_rate": 3.743885608628888e-08, "logits/chosen": -6.109157562255859, "logits/rejected": -5.977028846740723, "logps/chosen": -222.023193359375, "logps/rejected": -87.7401123046875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": 4.276263236999512, "rewards/margins": 9.94305419921875, "rewards/rejected": -5.6667914390563965, "step": 3169 }, { "epoch": 1.76, "learning_rate": 3.7268306553473574e-08, "logits/chosen": -6.039055824279785, "logits/rejected": -5.968430519104004, "logps/chosen": -254.96913146972656, "logps/rejected": -193.07244873046875, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": 3.508387327194214, "rewards/margins": 10.585895538330078, "rewards/rejected": -7.077507972717285, "step": 3170 }, { "epoch": 1.76, "learning_rate": 3.709813133096956e-08, "logits/chosen": -5.973755836486816, "logits/rejected": -6.025217056274414, "logps/chosen": -138.7285919189453, "logps/rejected": -202.22689819335938, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 1.285015344619751, "rewards/margins": 10.057092666625977, "rewards/rejected": -8.772077560424805, "step": 3171 }, { "epoch": 1.76, "learning_rate": 3.692833055643407e-08, "logits/chosen": -5.895073890686035, "logits/rejected": -5.955254554748535, "logps/chosen": -399.3901672363281, "logps/rejected": -320.4809875488281, "loss": 0.0588, "rewards/accuracies": 0.9375, "rewards/chosen": 7.890941619873047, "rewards/margins": 14.858343124389648, "rewards/rejected": -6.967400550842285, "step": 3172 }, { "epoch": 1.76, "learning_rate": 3.6758904367221513e-08, "logits/chosen": -6.0050153732299805, "logits/rejected": -5.96477746963501, "logps/chosen": -301.1196594238281, "logps/rejected": -170.69158935546875, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": 2.8785479068756104, "rewards/margins": 9.836089134216309, "rewards/rejected": -6.9575419425964355, "step": 3173 }, { "epoch": 1.76, "learning_rate": 3.6589852900383024e-08, "logits/chosen": -6.040439605712891, "logits/rejected": -5.9039835929870605, "logps/chosen": -211.9623565673828, "logps/rejected": -186.8352508544922, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": 4.641569137573242, "rewards/margins": 12.863946914672852, "rewards/rejected": -8.222378730773926, "step": 3174 }, { "epoch": 1.76, "learning_rate": 3.642117629266678e-08, "logits/chosen": -6.0207366943359375, "logits/rejected": -6.079293251037598, "logps/chosen": -291.8460998535156, "logps/rejected": -266.2092590332031, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": 5.228752136230469, "rewards/margins": 12.628454208374023, "rewards/rejected": -7.399702072143555, "step": 3175 }, { "epoch": 1.76, "learning_rate": 3.6252874680517744e-08, "logits/chosen": -5.97189474105835, "logits/rejected": -6.038590908050537, "logps/chosen": -251.7406005859375, "logps/rejected": -156.89212036132812, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": 3.417290210723877, "rewards/margins": 9.630443572998047, "rewards/rejected": -6.213153839111328, "step": 3176 }, { "epoch": 1.76, "learning_rate": 3.608494820007757e-08, "logits/chosen": -6.113256931304932, "logits/rejected": -6.032410621643066, "logps/chosen": -314.73846435546875, "logps/rejected": -251.9326629638672, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 4.595800876617432, "rewards/margins": 12.382390975952148, "rewards/rejected": -7.786589622497559, "step": 3177 }, { "epoch": 1.76, "learning_rate": 3.591739698718438e-08, "logits/chosen": -6.154900550842285, "logits/rejected": -6.021714687347412, "logps/chosen": -221.2210693359375, "logps/rejected": -218.63926696777344, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 3.3803956508636475, "rewards/margins": 12.093376159667969, "rewards/rejected": -8.712981224060059, "step": 3178 }, { "epoch": 1.76, "learning_rate": 3.575022117737253e-08, "logits/chosen": -6.1276397705078125, "logits/rejected": -5.981954574584961, "logps/chosen": -188.6533966064453, "logps/rejected": -75.68603515625, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": 2.7862119674682617, "rewards/margins": 8.576462745666504, "rewards/rejected": -5.7902512550354, "step": 3179 }, { "epoch": 1.77, "learning_rate": 3.55834209058733e-08, "logits/chosen": -5.913086891174316, "logits/rejected": -6.059279441833496, "logps/chosen": -391.51983642578125, "logps/rejected": -311.7207946777344, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": 6.12348747253418, "rewards/margins": 13.939148902893066, "rewards/rejected": -7.815662384033203, "step": 3180 }, { "epoch": 1.77, "learning_rate": 3.5416996307613525e-08, "logits/chosen": -6.029246807098389, "logits/rejected": -6.111421585083008, "logps/chosen": -286.4197082519531, "logps/rejected": -319.36480712890625, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": 5.686724662780762, "rewards/margins": 15.227230072021484, "rewards/rejected": -9.540505409240723, "step": 3181 }, { "epoch": 1.77, "learning_rate": 3.525094751721664e-08, "logits/chosen": -6.178725242614746, "logits/rejected": -6.012394905090332, "logps/chosen": -185.6473388671875, "logps/rejected": -106.68649291992188, "loss": 0.0421, "rewards/accuracies": 0.9375, "rewards/chosen": 3.4719531536102295, "rewards/margins": 10.796661376953125, "rewards/rejected": -7.32470703125, "step": 3182 }, { "epoch": 1.77, "learning_rate": 3.5085274669001796e-08, "logits/chosen": -6.082277774810791, "logits/rejected": -6.038702487945557, "logps/chosen": -311.05438232421875, "logps/rejected": -232.31253051757812, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": 4.607050895690918, "rewards/margins": 13.188531875610352, "rewards/rejected": -8.581480026245117, "step": 3183 }, { "epoch": 1.77, "learning_rate": 3.491997789698414e-08, "logits/chosen": -6.143559455871582, "logits/rejected": -6.025759696960449, "logps/chosen": -388.96942138671875, "logps/rejected": -139.76947021484375, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": 6.746820449829102, "rewards/margins": 13.620769500732422, "rewards/rejected": -6.873948097229004, "step": 3184 }, { "epoch": 1.77, "learning_rate": 3.475505733487472e-08, "logits/chosen": -5.981389999389648, "logits/rejected": -6.057913303375244, "logps/chosen": -213.7115478515625, "logps/rejected": -283.8265686035156, "loss": 0.0643, "rewards/accuracies": 1.0, "rewards/chosen": 1.798733115196228, "rewards/margins": 14.632644653320312, "rewards/rejected": -12.833910942077637, "step": 3185 }, { "epoch": 1.77, "learning_rate": 3.4590513116079965e-08, "logits/chosen": -5.847316741943359, "logits/rejected": -5.987168788909912, "logps/chosen": -340.2420654296875, "logps/rejected": -502.728515625, "loss": 0.011, "rewards/accuracies": 0.9375, "rewards/chosen": 3.830650568008423, "rewards/margins": 13.94483470916748, "rewards/rejected": -10.11418342590332, "step": 3186 }, { "epoch": 1.77, "learning_rate": 3.442634537370215e-08, "logits/chosen": -6.129734516143799, "logits/rejected": -6.04506778717041, "logps/chosen": -305.381103515625, "logps/rejected": -221.07778930664062, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": 5.602567672729492, "rewards/margins": 10.588695526123047, "rewards/rejected": -4.986127853393555, "step": 3187 }, { "epoch": 1.77, "learning_rate": 3.4262554240538865e-08, "logits/chosen": -6.050853729248047, "logits/rejected": -6.0887627601623535, "logps/chosen": -247.95074462890625, "logps/rejected": -250.8294677734375, "loss": 0.042, "rewards/accuracies": 1.0, "rewards/chosen": 3.5765371322631836, "rewards/margins": 13.257575988769531, "rewards/rejected": -9.681038856506348, "step": 3188 }, { "epoch": 1.77, "learning_rate": 3.4099139849083304e-08, "logits/chosen": -6.021592617034912, "logits/rejected": -6.097744464874268, "logps/chosen": -196.38916015625, "logps/rejected": -305.78570556640625, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": 1.4699926376342773, "rewards/margins": 12.924895286560059, "rewards/rejected": -11.454902648925781, "step": 3189 }, { "epoch": 1.77, "learning_rate": 3.3936102331523606e-08, "logits/chosen": -6.004419326782227, "logits/rejected": -6.056353569030762, "logps/chosen": -343.21405029296875, "logps/rejected": -270.7932434082031, "loss": 0.0309, "rewards/accuracies": 1.0, "rewards/chosen": 8.020413398742676, "rewards/margins": 15.666154861450195, "rewards/rejected": -7.645740985870361, "step": 3190 }, { "epoch": 1.77, "learning_rate": 3.377344181974301e-08, "logits/chosen": -6.226051330566406, "logits/rejected": -6.1595892906188965, "logps/chosen": -236.0819091796875, "logps/rejected": -197.27256774902344, "loss": 0.043, "rewards/accuracies": 1.0, "rewards/chosen": 3.449967861175537, "rewards/margins": 10.282172203063965, "rewards/rejected": -6.832204341888428, "step": 3191 }, { "epoch": 1.77, "learning_rate": 3.3611158445320034e-08, "logits/chosen": -6.0117268562316895, "logits/rejected": -6.041057586669922, "logps/chosen": -271.32537841796875, "logps/rejected": -394.16314697265625, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": 2.99874210357666, "rewards/margins": 15.08236026763916, "rewards/rejected": -12.0836181640625, "step": 3192 }, { "epoch": 1.77, "learning_rate": 3.344925233952805e-08, "logits/chosen": -6.0895490646362305, "logits/rejected": -6.052757740020752, "logps/chosen": -257.947509765625, "logps/rejected": -259.4323425292969, "loss": 0.0194, "rewards/accuracies": 0.9375, "rewards/chosen": 1.48734450340271, "rewards/margins": 12.196898460388184, "rewards/rejected": -10.709554672241211, "step": 3193 }, { "epoch": 1.77, "learning_rate": 3.3287723633335377e-08, "logits/chosen": -6.030014991760254, "logits/rejected": -6.103655815124512, "logps/chosen": -259.52618408203125, "logps/rejected": -213.87576293945312, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": 4.779175758361816, "rewards/margins": 12.565696716308594, "rewards/rejected": -7.786520957946777, "step": 3194 }, { "epoch": 1.77, "learning_rate": 3.312657245740463e-08, "logits/chosen": -6.005715370178223, "logits/rejected": -5.949854850769043, "logps/chosen": -291.68121337890625, "logps/rejected": -301.0186767578125, "loss": 0.0919, "rewards/accuracies": 1.0, "rewards/chosen": 2.4558558464050293, "rewards/margins": 11.642407417297363, "rewards/rejected": -9.186552047729492, "step": 3195 }, { "epoch": 1.77, "learning_rate": 3.296579894209345e-08, "logits/chosen": -6.092263698577881, "logits/rejected": -6.0254364013671875, "logps/chosen": -280.1826477050781, "logps/rejected": -242.88905334472656, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": 4.8902153968811035, "rewards/margins": 12.362074851989746, "rewards/rejected": -7.471858978271484, "step": 3196 }, { "epoch": 1.77, "learning_rate": 3.280540321745395e-08, "logits/chosen": -6.028257369995117, "logits/rejected": -6.173937797546387, "logps/chosen": -283.8371887207031, "logps/rejected": -356.3996276855469, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": 4.908246040344238, "rewards/margins": 17.285907745361328, "rewards/rejected": -12.377662658691406, "step": 3197 }, { "epoch": 1.78, "learning_rate": 3.264538541323231e-08, "logits/chosen": -5.877161026000977, "logits/rejected": -5.942417144775391, "logps/chosen": -397.133544921875, "logps/rejected": -270.9036560058594, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": 7.33818244934082, "rewards/margins": 13.535539627075195, "rewards/rejected": -6.197357177734375, "step": 3198 }, { "epoch": 1.78, "learning_rate": 3.2485745658869414e-08, "logits/chosen": -6.0633344650268555, "logits/rejected": -5.900744915008545, "logps/chosen": -237.1622772216797, "logps/rejected": -216.11090087890625, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": 2.5241427421569824, "rewards/margins": 11.560720443725586, "rewards/rejected": -9.036578178405762, "step": 3199 }, { "epoch": 1.78, "learning_rate": 3.2326484083499916e-08, "logits/chosen": -5.9886474609375, "logits/rejected": -5.999460697174072, "logps/chosen": -282.5713806152344, "logps/rejected": -209.64596557617188, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": 5.509191513061523, "rewards/margins": 10.988883018493652, "rewards/rejected": -5.479691505432129, "step": 3200 }, { "epoch": 1.78, "learning_rate": 3.21676008159531e-08, "logits/chosen": -6.027900695800781, "logits/rejected": -5.9177565574646, "logps/chosen": -322.6004943847656, "logps/rejected": -369.638916015625, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/chosen": 4.8784589767456055, "rewards/margins": 13.645622253417969, "rewards/rejected": -8.76716423034668, "step": 3201 }, { "epoch": 1.78, "learning_rate": 3.200909598475171e-08, "logits/chosen": -5.977107048034668, "logits/rejected": -5.970044136047363, "logps/chosen": -221.8776397705078, "logps/rejected": -199.7263946533203, "loss": 0.0325, "rewards/accuracies": 0.9375, "rewards/chosen": 5.181483268737793, "rewards/margins": 11.34091567993164, "rewards/rejected": -6.1594319343566895, "step": 3202 }, { "epoch": 1.78, "learning_rate": 3.1850969718112744e-08, "logits/chosen": -6.150049686431885, "logits/rejected": -5.965806484222412, "logps/chosen": -272.9778137207031, "logps/rejected": -125.27851867675781, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": 6.624884605407715, "rewards/margins": 11.342440605163574, "rewards/rejected": -4.717555999755859, "step": 3203 }, { "epoch": 1.78, "learning_rate": 3.1693222143946595e-08, "logits/chosen": -6.104860782623291, "logits/rejected": -6.104393482208252, "logps/chosen": -223.90435791015625, "logps/rejected": -161.2103729248047, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": 3.8611228466033936, "rewards/margins": 12.017007827758789, "rewards/rejected": -8.155885696411133, "step": 3204 }, { "epoch": 1.78, "learning_rate": 3.15358533898577e-08, "logits/chosen": -6.084542751312256, "logits/rejected": -6.068207740783691, "logps/chosen": -222.7711181640625, "logps/rejected": -253.4425048828125, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": 1.7110744714736938, "rewards/margins": 11.583883285522461, "rewards/rejected": -9.872810363769531, "step": 3205 }, { "epoch": 1.78, "learning_rate": 3.13788635831439e-08, "logits/chosen": -6.017459392547607, "logits/rejected": -6.0583295822143555, "logps/chosen": -209.9630126953125, "logps/rejected": -280.62628173828125, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": 3.495630979537964, "rewards/margins": 15.23379898071289, "rewards/rejected": -11.738168716430664, "step": 3206 }, { "epoch": 1.78, "learning_rate": 3.122225285079638e-08, "logits/chosen": -6.117242336273193, "logits/rejected": -6.0082550048828125, "logps/chosen": -238.42193603515625, "logps/rejected": -203.7409210205078, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": 3.034022331237793, "rewards/margins": 12.706239700317383, "rewards/rejected": -9.672216415405273, "step": 3207 }, { "epoch": 1.78, "learning_rate": 3.1066021319499926e-08, "logits/chosen": -5.981601715087891, "logits/rejected": -5.95050048828125, "logps/chosen": -209.93417358398438, "logps/rejected": -118.31316375732422, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": 2.1530239582061768, "rewards/margins": 10.258371353149414, "rewards/rejected": -8.105347633361816, "step": 3208 }, { "epoch": 1.78, "learning_rate": 3.091016911563243e-08, "logits/chosen": -6.015995502471924, "logits/rejected": -6.074357986450195, "logps/chosen": -213.71194458007812, "logps/rejected": -228.0697479248047, "loss": 0.0538, "rewards/accuracies": 1.0, "rewards/chosen": 3.240100383758545, "rewards/margins": 12.90080738067627, "rewards/rejected": -9.660707473754883, "step": 3209 }, { "epoch": 1.78, "learning_rate": 3.075469636526507e-08, "logits/chosen": -6.0449934005737305, "logits/rejected": -6.0892863273620605, "logps/chosen": -258.3692321777344, "logps/rejected": -249.5950469970703, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": 5.663055419921875, "rewards/margins": 12.874452590942383, "rewards/rejected": -7.21139669418335, "step": 3210 }, { "epoch": 1.78, "learning_rate": 3.059960319416183e-08, "logits/chosen": -6.097693920135498, "logits/rejected": -6.037413120269775, "logps/chosen": -263.4211120605469, "logps/rejected": -173.05099487304688, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 4.712998867034912, "rewards/margins": 13.261062622070312, "rewards/rejected": -8.548063278198242, "step": 3211 }, { "epoch": 1.78, "learning_rate": 3.044488972777987e-08, "logits/chosen": -6.021651744842529, "logits/rejected": -6.066755771636963, "logps/chosen": -302.86785888671875, "logps/rejected": -232.994873046875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": 5.981984615325928, "rewards/margins": 11.020780563354492, "rewards/rejected": -5.038796424865723, "step": 3212 }, { "epoch": 1.78, "learning_rate": 3.0290556091269124e-08, "logits/chosen": -6.06587553024292, "logits/rejected": -6.1146697998046875, "logps/chosen": -322.39569091796875, "logps/rejected": -258.88433837890625, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": 4.980628490447998, "rewards/margins": 12.184884071350098, "rewards/rejected": -7.204255104064941, "step": 3213 }, { "epoch": 1.78, "learning_rate": 3.013660240947224e-08, "logits/chosen": -6.001654624938965, "logits/rejected": -5.995920658111572, "logps/chosen": -233.07044982910156, "logps/rejected": -287.6556396484375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 3.681396007537842, "rewards/margins": 11.779610633850098, "rewards/rejected": -8.098215103149414, "step": 3214 }, { "epoch": 1.78, "learning_rate": 2.998302880692477e-08, "logits/chosen": -5.970330238342285, "logits/rejected": -6.04831600189209, "logps/chosen": -267.1485290527344, "logps/rejected": -310.8965759277344, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": 3.1250531673431396, "rewards/margins": 12.159295082092285, "rewards/rejected": -9.034241676330566, "step": 3215 }, { "epoch": 1.79, "learning_rate": 2.982983540785433e-08, "logits/chosen": -6.033151626586914, "logits/rejected": -5.984832763671875, "logps/chosen": -184.14556884765625, "logps/rejected": -121.41027069091797, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": 3.1301302909851074, "rewards/margins": 9.332544326782227, "rewards/rejected": -6.202413558959961, "step": 3216 }, { "epoch": 1.79, "learning_rate": 2.9677022336181413e-08, "logits/chosen": -5.907684326171875, "logits/rejected": -5.837426662445068, "logps/chosen": -364.1690368652344, "logps/rejected": -317.7650146484375, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": 5.921772480010986, "rewards/margins": 13.986418724060059, "rewards/rejected": -8.06464672088623, "step": 3217 }, { "epoch": 1.79, "learning_rate": 2.95245897155188e-08, "logits/chosen": -5.8855180740356445, "logits/rejected": -5.944430828094482, "logps/chosen": -509.52838134765625, "logps/rejected": -386.73193359375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 8.007213592529297, "rewards/margins": 15.159625053405762, "rewards/rejected": -7.152411460876465, "step": 3218 }, { "epoch": 1.79, "learning_rate": 2.9372537669171195e-08, "logits/chosen": -6.045845031738281, "logits/rejected": -6.06446647644043, "logps/chosen": -262.3627014160156, "logps/rejected": -179.19369506835938, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": 2.570618152618408, "rewards/margins": 7.941236972808838, "rewards/rejected": -5.37061882019043, "step": 3219 }, { "epoch": 1.79, "learning_rate": 2.922086632013582e-08, "logits/chosen": -5.951648712158203, "logits/rejected": -5.985367774963379, "logps/chosen": -273.72406005859375, "logps/rejected": -221.6746826171875, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": 5.098891735076904, "rewards/margins": 12.212545394897461, "rewards/rejected": -7.113655090332031, "step": 3220 }, { "epoch": 1.79, "learning_rate": 2.9069575791101696e-08, "logits/chosen": -6.069296836853027, "logits/rejected": -6.087876319885254, "logps/chosen": -274.647705078125, "logps/rejected": -354.79803466796875, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": 1.6995031833648682, "rewards/margins": 14.067392349243164, "rewards/rejected": -12.367889404296875, "step": 3221 }, { "epoch": 1.79, "learning_rate": 2.8918666204450105e-08, "logits/chosen": -6.125612258911133, "logits/rejected": -5.953704833984375, "logps/chosen": -285.28778076171875, "logps/rejected": -182.49911499023438, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": 4.042618751525879, "rewards/margins": 10.663883209228516, "rewards/rejected": -6.621264457702637, "step": 3222 }, { "epoch": 1.79, "learning_rate": 2.8768137682253835e-08, "logits/chosen": -5.988738059997559, "logits/rejected": -5.935521602630615, "logps/chosen": -306.05322265625, "logps/rejected": -196.8937530517578, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": 5.622448444366455, "rewards/margins": 12.221059799194336, "rewards/rejected": -6.598610877990723, "step": 3223 }, { "epoch": 1.79, "learning_rate": 2.8617990346277655e-08, "logits/chosen": -6.050510883331299, "logits/rejected": -6.017711639404297, "logps/chosen": -258.8898010253906, "logps/rejected": -228.153564453125, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": 3.7253167629241943, "rewards/margins": 11.087921142578125, "rewards/rejected": -7.36260461807251, "step": 3224 }, { "epoch": 1.79, "learning_rate": 2.846822431797774e-08, "logits/chosen": -6.0032758712768555, "logits/rejected": -5.90881872177124, "logps/chosen": -334.4164733886719, "logps/rejected": -320.8211975097656, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 3.9424920082092285, "rewards/margins": 12.144545555114746, "rewards/rejected": -8.20205307006836, "step": 3225 }, { "epoch": 1.79, "learning_rate": 2.8318839718502128e-08, "logits/chosen": -6.164638519287109, "logits/rejected": -6.037758827209473, "logps/chosen": -241.4820556640625, "logps/rejected": -206.54791259765625, "loss": 0.0444, "rewards/accuracies": 0.9375, "rewards/chosen": 2.4986486434936523, "rewards/margins": 11.168805122375488, "rewards/rejected": -8.670156478881836, "step": 3226 }, { "epoch": 1.79, "learning_rate": 2.816983666869016e-08, "logits/chosen": -5.93841552734375, "logits/rejected": -5.939122200012207, "logps/chosen": -297.5689392089844, "logps/rejected": -335.5250549316406, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": 1.4407894611358643, "rewards/margins": 8.727620124816895, "rewards/rejected": -7.286830902099609, "step": 3227 }, { "epoch": 1.79, "learning_rate": 2.802121528907242e-08, "logits/chosen": -5.988894462585449, "logits/rejected": -6.090473175048828, "logps/chosen": -132.33523559570312, "logps/rejected": -262.1431884765625, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": 2.735999584197998, "rewards/margins": 12.807158470153809, "rewards/rejected": -10.071159362792969, "step": 3228 }, { "epoch": 1.79, "learning_rate": 2.787297569987107e-08, "logits/chosen": -6.194943428039551, "logits/rejected": -6.023226261138916, "logps/chosen": -300.0578308105469, "logps/rejected": -221.880126953125, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": 3.4585137367248535, "rewards/margins": 11.792947769165039, "rewards/rejected": -8.334433555603027, "step": 3229 }, { "epoch": 1.79, "learning_rate": 2.7725118020998972e-08, "logits/chosen": -6.002037048339844, "logits/rejected": -5.990523338317871, "logps/chosen": -197.65521240234375, "logps/rejected": -204.38540649414062, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": 1.3275946378707886, "rewards/margins": 11.217484474182129, "rewards/rejected": -9.889888763427734, "step": 3230 }, { "epoch": 1.79, "learning_rate": 2.7577642372060673e-08, "logits/chosen": -5.964648723602295, "logits/rejected": -6.038343906402588, "logps/chosen": -190.143310546875, "logps/rejected": -239.84524536132812, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": 2.887425422668457, "rewards/margins": 11.144573211669922, "rewards/rejected": -8.257147789001465, "step": 3231 }, { "epoch": 1.79, "learning_rate": 2.7430548872351133e-08, "logits/chosen": -6.073660373687744, "logits/rejected": -6.00446081161499, "logps/chosen": -237.4790802001953, "logps/rejected": -237.71160888671875, "loss": 0.032, "rewards/accuracies": 0.9375, "rewards/chosen": 1.321238398551941, "rewards/margins": 11.606738090515137, "rewards/rejected": -10.285500526428223, "step": 3232 }, { "epoch": 1.79, "learning_rate": 2.7283837640856346e-08, "logits/chosen": -6.094845771789551, "logits/rejected": -6.00308895111084, "logps/chosen": -262.8099365234375, "logps/rejected": -148.66050720214844, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": 4.546489238739014, "rewards/margins": 11.17628288269043, "rewards/rejected": -6.629793167114258, "step": 3233 }, { "epoch": 1.8, "learning_rate": 2.7137508796253206e-08, "logits/chosen": -6.030771255493164, "logits/rejected": -6.035626411437988, "logps/chosen": -453.7962341308594, "logps/rejected": -487.2325134277344, "loss": 0.0402, "rewards/accuracies": 1.0, "rewards/chosen": 3.898221254348755, "rewards/margins": 14.440180778503418, "rewards/rejected": -10.541959762573242, "step": 3234 }, { "epoch": 1.8, "learning_rate": 2.69915624569092e-08, "logits/chosen": -5.9910454750061035, "logits/rejected": -6.078117370605469, "logps/chosen": -208.61917114257812, "logps/rejected": -256.96875, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": 2.278667449951172, "rewards/margins": 13.624423027038574, "rewards/rejected": -11.345754623413086, "step": 3235 }, { "epoch": 1.8, "learning_rate": 2.6845998740882557e-08, "logits/chosen": -5.995613098144531, "logits/rejected": -5.823143482208252, "logps/chosen": -276.35186767578125, "logps/rejected": -324.10699462890625, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": 5.914069652557373, "rewards/margins": 15.208332061767578, "rewards/rejected": -9.294261932373047, "step": 3236 }, { "epoch": 1.8, "learning_rate": 2.6700817765921645e-08, "logits/chosen": -6.066481113433838, "logits/rejected": -5.980118751525879, "logps/chosen": -326.7569885253906, "logps/rejected": -132.5823974609375, "loss": 0.0727, "rewards/accuracies": 1.0, "rewards/chosen": 5.578564643859863, "rewards/margins": 12.843042373657227, "rewards/rejected": -7.264477729797363, "step": 3237 }, { "epoch": 1.8, "learning_rate": 2.6556019649465522e-08, "logits/chosen": -6.004019737243652, "logits/rejected": -6.051750659942627, "logps/chosen": -287.18939208984375, "logps/rejected": -194.22097778320312, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": 4.451593399047852, "rewards/margins": 13.180001258850098, "rewards/rejected": -8.72840690612793, "step": 3238 }, { "epoch": 1.8, "learning_rate": 2.6411604508643548e-08, "logits/chosen": -6.023707866668701, "logits/rejected": -5.946256637573242, "logps/chosen": -294.10107421875, "logps/rejected": -144.06027221679688, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": 7.634124279022217, "rewards/margins": 14.602211952209473, "rewards/rejected": -6.968087673187256, "step": 3239 }, { "epoch": 1.8, "learning_rate": 2.6267572460275056e-08, "logits/chosen": -6.112353801727295, "logits/rejected": -6.041937351226807, "logps/chosen": -257.3204040527344, "logps/rejected": -301.39129638671875, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": 3.593935489654541, "rewards/margins": 13.613945007324219, "rewards/rejected": -10.02000904083252, "step": 3240 }, { "epoch": 1.8, "learning_rate": 2.6123923620869792e-08, "logits/chosen": -5.976321220397949, "logits/rejected": -5.997453689575195, "logps/chosen": -224.4913330078125, "logps/rejected": -291.190185546875, "loss": 0.0342, "rewards/accuracies": 0.9375, "rewards/chosen": 3.373166084289551, "rewards/margins": 10.106030464172363, "rewards/rejected": -6.732863426208496, "step": 3241 }, { "epoch": 1.8, "learning_rate": 2.5980658106627197e-08, "logits/chosen": -5.948840141296387, "logits/rejected": -6.087182998657227, "logps/chosen": -300.38397216796875, "logps/rejected": -339.6183776855469, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": 6.6789326667785645, "rewards/margins": 13.171884536743164, "rewards/rejected": -6.492952823638916, "step": 3242 }, { "epoch": 1.8, "learning_rate": 2.5837776033436954e-08, "logits/chosen": -6.117251873016357, "logits/rejected": -5.899454116821289, "logps/chosen": -283.64593505859375, "logps/rejected": -199.2191162109375, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": 3.617182970046997, "rewards/margins": 10.503917694091797, "rewards/rejected": -6.886734962463379, "step": 3243 }, { "epoch": 1.8, "learning_rate": 2.5695277516878333e-08, "logits/chosen": -6.044486045837402, "logits/rejected": -6.054878234863281, "logps/chosen": -302.2994384765625, "logps/rejected": -230.2154541015625, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": 5.057141304016113, "rewards/margins": 14.719377517700195, "rewards/rejected": -9.662236213684082, "step": 3244 }, { "epoch": 1.8, "learning_rate": 2.5553162672220464e-08, "logits/chosen": -6.020636558532715, "logits/rejected": -5.9211015701293945, "logps/chosen": -355.3374938964844, "logps/rejected": -239.62380981445312, "loss": 0.1052, "rewards/accuracies": 0.875, "rewards/chosen": 4.367380142211914, "rewards/margins": 8.76144790649414, "rewards/rejected": -4.394068241119385, "step": 3245 }, { "epoch": 1.8, "learning_rate": 2.5411431614422053e-08, "logits/chosen": -5.960625648498535, "logits/rejected": -6.1254191398620605, "logps/chosen": -192.56814575195312, "logps/rejected": -257.1708679199219, "loss": 0.0132, "rewards/accuracies": 0.9375, "rewards/chosen": 1.5150203704833984, "rewards/margins": 13.2183256149292, "rewards/rejected": -11.7033052444458, "step": 3246 }, { "epoch": 1.8, "learning_rate": 2.5270084458131392e-08, "logits/chosen": -6.094138145446777, "logits/rejected": -6.10984992980957, "logps/chosen": -191.1673583984375, "logps/rejected": -143.7901611328125, "loss": 0.0494, "rewards/accuracies": 1.0, "rewards/chosen": 3.612034797668457, "rewards/margins": 11.7101411819458, "rewards/rejected": -8.098106384277344, "step": 3247 }, { "epoch": 1.8, "learning_rate": 2.5129121317686353e-08, "logits/chosen": -6.003761291503906, "logits/rejected": -6.059739112854004, "logps/chosen": -292.2894287109375, "logps/rejected": -176.7078094482422, "loss": 0.0372, "rewards/accuracies": 1.0, "rewards/chosen": 7.3642168045043945, "rewards/margins": 14.714174270629883, "rewards/rejected": -7.349957466125488, "step": 3248 }, { "epoch": 1.8, "learning_rate": 2.4988542307113837e-08, "logits/chosen": -6.043878555297852, "logits/rejected": -6.095915794372559, "logps/chosen": -207.1843719482422, "logps/rejected": -298.827392578125, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": 1.8562711477279663, "rewards/margins": 12.421189308166504, "rewards/rejected": -10.564918518066406, "step": 3249 }, { "epoch": 1.8, "learning_rate": 2.484834754013043e-08, "logits/chosen": -5.954558849334717, "logits/rejected": -6.037201881408691, "logps/chosen": -369.12335205078125, "logps/rejected": -284.5611267089844, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": 7.345415115356445, "rewards/margins": 13.710695266723633, "rewards/rejected": -6.365279197692871, "step": 3250 }, { "epoch": 1.8, "learning_rate": 2.470853713014148e-08, "logits/chosen": -6.069169998168945, "logits/rejected": -5.979931831359863, "logps/chosen": -283.434326171875, "logps/rejected": -235.32571411132812, "loss": 0.0579, "rewards/accuracies": 1.0, "rewards/chosen": 3.1436190605163574, "rewards/margins": 10.50223159790039, "rewards/rejected": -7.358613014221191, "step": 3251 }, { "epoch": 1.81, "learning_rate": 2.45691111902418e-08, "logits/chosen": -6.144805431365967, "logits/rejected": -6.071228504180908, "logps/chosen": -337.7286682128906, "logps/rejected": -199.4727325439453, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": 5.059209823608398, "rewards/margins": 13.475451469421387, "rewards/rejected": -8.416242599487305, "step": 3252 }, { "epoch": 1.81, "learning_rate": 2.4430069833214994e-08, "logits/chosen": -5.9067535400390625, "logits/rejected": -6.006453514099121, "logps/chosen": -338.97235107421875, "logps/rejected": -491.8143005371094, "loss": 0.0235, "rewards/accuracies": 0.9375, "rewards/chosen": 5.77059268951416, "rewards/margins": 10.312833786010742, "rewards/rejected": -4.542241096496582, "step": 3253 }, { "epoch": 1.81, "learning_rate": 2.4291413171533547e-08, "logits/chosen": -6.079102993011475, "logits/rejected": -5.938828468322754, "logps/chosen": -475.57574462890625, "logps/rejected": -496.0801086425781, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": 7.446325778961182, "rewards/margins": 18.123268127441406, "rewards/rejected": -10.676942825317383, "step": 3254 }, { "epoch": 1.81, "learning_rate": 2.415314131735885e-08, "logits/chosen": -5.987002849578857, "logits/rejected": -5.990908145904541, "logps/chosen": -236.3185272216797, "logps/rejected": -215.59759521484375, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": 3.9322752952575684, "rewards/margins": 11.358233451843262, "rewards/rejected": -7.425958156585693, "step": 3255 }, { "epoch": 1.81, "learning_rate": 2.4015254382541038e-08, "logits/chosen": -5.9179768562316895, "logits/rejected": -5.993685722351074, "logps/chosen": -205.86692810058594, "logps/rejected": -203.67129516601562, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": 4.588017463684082, "rewards/margins": 11.80866813659668, "rewards/rejected": -7.220650672912598, "step": 3256 }, { "epoch": 1.81, "learning_rate": 2.3877752478618896e-08, "logits/chosen": -5.977340221405029, "logits/rejected": -6.027808666229248, "logps/chosen": -197.67718505859375, "logps/rejected": -196.61793518066406, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": 3.2285544872283936, "rewards/margins": 9.71206283569336, "rewards/rejected": -6.4835076332092285, "step": 3257 }, { "epoch": 1.81, "learning_rate": 2.374063571681956e-08, "logits/chosen": -5.9717583656311035, "logits/rejected": -6.040890693664551, "logps/chosen": -237.7195587158203, "logps/rejected": -353.07818603515625, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": 3.4857711791992188, "rewards/margins": 13.975187301635742, "rewards/rejected": -10.489416122436523, "step": 3258 }, { "epoch": 1.81, "learning_rate": 2.3603904208058688e-08, "logits/chosen": -6.0169525146484375, "logits/rejected": -5.99926233291626, "logps/chosen": -223.88656616210938, "logps/rejected": -141.11880493164062, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": 5.824236869812012, "rewards/margins": 12.832180976867676, "rewards/rejected": -7.007944107055664, "step": 3259 }, { "epoch": 1.81, "learning_rate": 2.3467558062940596e-08, "logits/chosen": -5.987382411956787, "logits/rejected": -6.05640983581543, "logps/chosen": -175.6946258544922, "logps/rejected": -200.7577667236328, "loss": 0.0871, "rewards/accuracies": 1.0, "rewards/chosen": 2.912074327468872, "rewards/margins": 11.933923721313477, "rewards/rejected": -9.021849632263184, "step": 3260 }, { "epoch": 1.81, "learning_rate": 2.333159739175744e-08, "logits/chosen": -6.14028263092041, "logits/rejected": -5.83730411529541, "logps/chosen": -357.7048645019531, "logps/rejected": -144.526611328125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 5.992561340332031, "rewards/margins": 14.688636779785156, "rewards/rejected": -8.696075439453125, "step": 3261 }, { "epoch": 1.81, "learning_rate": 2.319602230448986e-08, "logits/chosen": -6.116500377655029, "logits/rejected": -5.8919243812561035, "logps/chosen": -241.010009765625, "logps/rejected": -87.4364013671875, "loss": 0.0238, "rewards/accuracies": 0.875, "rewards/chosen": 4.195842742919922, "rewards/margins": 10.1873779296875, "rewards/rejected": -5.99153470993042, "step": 3262 }, { "epoch": 1.81, "learning_rate": 2.3060832910806405e-08, "logits/chosen": -5.981832981109619, "logits/rejected": -6.113840103149414, "logps/chosen": -292.6789855957031, "logps/rejected": -293.406005859375, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": 3.5558791160583496, "rewards/margins": 10.843652725219727, "rewards/rejected": -7.287773132324219, "step": 3263 }, { "epoch": 1.81, "learning_rate": 2.2926029320063766e-08, "logits/chosen": -6.108280181884766, "logits/rejected": -6.058965682983398, "logps/chosen": -260.5918884277344, "logps/rejected": -159.3732452392578, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/chosen": 3.8329176902770996, "rewards/margins": 11.481337547302246, "rewards/rejected": -7.6484198570251465, "step": 3264 }, { "epoch": 1.81, "learning_rate": 2.2791611641306553e-08, "logits/chosen": -6.150182723999023, "logits/rejected": -6.07251501083374, "logps/chosen": -242.9721221923828, "logps/rejected": -171.57948303222656, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": 4.941057205200195, "rewards/margins": 12.43869400024414, "rewards/rejected": -7.497636795043945, "step": 3265 }, { "epoch": 1.81, "learning_rate": 2.265757998326706e-08, "logits/chosen": -5.993236541748047, "logits/rejected": -6.100301742553711, "logps/chosen": -338.023681640625, "logps/rejected": -434.8652648925781, "loss": 0.0645, "rewards/accuracies": 1.0, "rewards/chosen": 4.120901107788086, "rewards/margins": 14.22021484375, "rewards/rejected": -10.09931468963623, "step": 3266 }, { "epoch": 1.81, "learning_rate": 2.2523934454365456e-08, "logits/chosen": -5.953132629394531, "logits/rejected": -5.963675498962402, "logps/chosen": -290.9394836425781, "logps/rejected": -245.26971435546875, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": 4.060863971710205, "rewards/margins": 9.692785263061523, "rewards/rejected": -5.631921291351318, "step": 3267 }, { "epoch": 1.81, "learning_rate": 2.2390675162709537e-08, "logits/chosen": -6.043674468994141, "logits/rejected": -6.111480712890625, "logps/chosen": -233.01541137695312, "logps/rejected": -249.21615600585938, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": 3.6964032649993896, "rewards/margins": 14.294485092163086, "rewards/rejected": -10.598081588745117, "step": 3268 }, { "epoch": 1.81, "learning_rate": 2.2257802216094744e-08, "logits/chosen": -5.978850364685059, "logits/rejected": -5.935583114624023, "logps/chosen": -275.5911865234375, "logps/rejected": -206.73562622070312, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": 5.951263427734375, "rewards/margins": 11.551459312438965, "rewards/rejected": -5.600196361541748, "step": 3269 }, { "epoch": 1.82, "learning_rate": 2.2125315722003766e-08, "logits/chosen": -6.003530502319336, "logits/rejected": -5.980175018310547, "logps/chosen": -405.296630859375, "logps/rejected": -379.4683837890625, "loss": 0.0315, "rewards/accuracies": 0.9375, "rewards/chosen": 4.271256446838379, "rewards/margins": 14.723990440368652, "rewards/rejected": -10.452733993530273, "step": 3270 }, { "epoch": 1.82, "learning_rate": 2.1993215787606935e-08, "logits/chosen": -6.095696449279785, "logits/rejected": -6.040614604949951, "logps/chosen": -216.94725036621094, "logps/rejected": -164.45933532714844, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": 3.4505715370178223, "rewards/margins": 11.560577392578125, "rewards/rejected": -8.110005378723145, "step": 3271 }, { "epoch": 1.82, "learning_rate": 2.1861502519761708e-08, "logits/chosen": -6.082139015197754, "logits/rejected": -6.085935115814209, "logps/chosen": -230.48681640625, "logps/rejected": -326.3502197265625, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": 2.618851661682129, "rewards/margins": 13.827241897583008, "rewards/rejected": -11.208390235900879, "step": 3272 }, { "epoch": 1.82, "learning_rate": 2.1730176025012813e-08, "logits/chosen": -5.957664489746094, "logits/rejected": -6.007284164428711, "logps/chosen": -307.7679138183594, "logps/rejected": -286.1225891113281, "loss": 0.0499, "rewards/accuracies": 1.0, "rewards/chosen": 5.881277084350586, "rewards/margins": 13.104446411132812, "rewards/rejected": -7.223169326782227, "step": 3273 }, { "epoch": 1.82, "learning_rate": 2.1599236409592327e-08, "logits/chosen": -6.157125473022461, "logits/rejected": -5.984504222869873, "logps/chosen": -285.9309997558594, "logps/rejected": -189.5457763671875, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 5.394282341003418, "rewards/margins": 13.417007446289062, "rewards/rejected": -8.022726058959961, "step": 3274 }, { "epoch": 1.82, "learning_rate": 2.1468683779418963e-08, "logits/chosen": -6.125405788421631, "logits/rejected": -6.183160305023193, "logps/chosen": -274.21356201171875, "logps/rejected": -243.25857543945312, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": 3.7857666015625, "rewards/margins": 11.766927719116211, "rewards/rejected": -7.9811601638793945, "step": 3275 }, { "epoch": 1.82, "learning_rate": 2.133851824009869e-08, "logits/chosen": -6.047177314758301, "logits/rejected": -6.063991546630859, "logps/chosen": -224.7556610107422, "logps/rejected": -206.8470458984375, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": 2.636833667755127, "rewards/margins": 10.12826156616211, "rewards/rejected": -7.491428375244141, "step": 3276 }, { "epoch": 1.82, "learning_rate": 2.1208739896924344e-08, "logits/chosen": -6.023198127746582, "logits/rejected": -6.092850208282471, "logps/chosen": -293.47344970703125, "logps/rejected": -323.9674987792969, "loss": 0.05, "rewards/accuracies": 1.0, "rewards/chosen": 3.126631736755371, "rewards/margins": 12.64784049987793, "rewards/rejected": -9.521209716796875, "step": 3277 }, { "epoch": 1.82, "learning_rate": 2.10793488548755e-08, "logits/chosen": -6.0359625816345215, "logits/rejected": -5.905218601226807, "logps/chosen": -296.3853759765625, "logps/rejected": -169.99850463867188, "loss": 0.0293, "rewards/accuracies": 0.9375, "rewards/chosen": 5.441250801086426, "rewards/margins": 10.862822532653809, "rewards/rejected": -5.421571731567383, "step": 3278 }, { "epoch": 1.82, "learning_rate": 2.0950345218618327e-08, "logits/chosen": -6.108270645141602, "logits/rejected": -6.088973522186279, "logps/chosen": -276.9489440917969, "logps/rejected": -215.42120361328125, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": 2.707644462585449, "rewards/margins": 11.454750061035156, "rewards/rejected": -8.747105598449707, "step": 3279 }, { "epoch": 1.82, "learning_rate": 2.0821729092505678e-08, "logits/chosen": -6.089471817016602, "logits/rejected": -6.047364711761475, "logps/chosen": -186.97354125976562, "logps/rejected": -133.2260284423828, "loss": 0.0463, "rewards/accuracies": 1.0, "rewards/chosen": 6.8429856300354, "rewards/margins": 12.980792999267578, "rewards/rejected": -6.137807369232178, "step": 3280 }, { "epoch": 1.82, "learning_rate": 2.0693500580577226e-08, "logits/chosen": -5.990869045257568, "logits/rejected": -5.956447601318359, "logps/chosen": -431.6676025390625, "logps/rejected": -296.5794677734375, "loss": 0.0172, "rewards/accuracies": 0.9375, "rewards/chosen": 6.645805358886719, "rewards/margins": 12.648326873779297, "rewards/rejected": -6.002520561218262, "step": 3281 }, { "epoch": 1.82, "learning_rate": 2.056565978655861e-08, "logits/chosen": -6.012895107269287, "logits/rejected": -6.118795871734619, "logps/chosen": -489.68115234375, "logps/rejected": -641.6204833984375, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": 3.5110671520233154, "rewards/margins": 16.173444747924805, "rewards/rejected": -12.662378311157227, "step": 3282 }, { "epoch": 1.82, "learning_rate": 2.0438206813862267e-08, "logits/chosen": -6.163917541503906, "logits/rejected": -6.0887861251831055, "logps/chosen": -234.07608032226562, "logps/rejected": -162.92333984375, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": 4.801220417022705, "rewards/margins": 12.880814552307129, "rewards/rejected": -8.079593658447266, "step": 3283 }, { "epoch": 1.82, "learning_rate": 2.031114176558657e-08, "logits/chosen": -5.996232986450195, "logits/rejected": -6.052849769592285, "logps/chosen": -230.25540161132812, "logps/rejected": -261.5813903808594, "loss": 0.0323, "rewards/accuracies": 1.0, "rewards/chosen": 3.7163374423980713, "rewards/margins": 14.300254821777344, "rewards/rejected": -10.583917617797852, "step": 3284 }, { "epoch": 1.82, "learning_rate": 2.018446474451635e-08, "logits/chosen": -6.105276107788086, "logits/rejected": -5.961866855621338, "logps/chosen": -220.5656280517578, "logps/rejected": -179.75357055664062, "loss": 0.0523, "rewards/accuracies": 1.0, "rewards/chosen": 5.369378566741943, "rewards/margins": 11.954108238220215, "rewards/rejected": -6.5847296714782715, "step": 3285 }, { "epoch": 1.82, "learning_rate": 2.0058175853122528e-08, "logits/chosen": -5.964850425720215, "logits/rejected": -6.042847156524658, "logps/chosen": -357.8571472167969, "logps/rejected": -342.2859802246094, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": 4.919969081878662, "rewards/margins": 12.724574089050293, "rewards/rejected": -7.804605484008789, "step": 3286 }, { "epoch": 1.82, "learning_rate": 1.9932275193561886e-08, "logits/chosen": -6.05385684967041, "logits/rejected": -6.0218000411987305, "logps/chosen": -209.01026916503906, "logps/rejected": -136.72726440429688, "loss": 0.2319, "rewards/accuracies": 0.9375, "rewards/chosen": 3.25284481048584, "rewards/margins": 10.533966064453125, "rewards/rejected": -7.281120300292969, "step": 3287 }, { "epoch": 1.83, "learning_rate": 1.9806762867677294e-08, "logits/chosen": -6.067639350891113, "logits/rejected": -6.190415382385254, "logps/chosen": -494.39892578125, "logps/rejected": -507.68463134765625, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": 3.6789841651916504, "rewards/margins": 14.172292709350586, "rewards/rejected": -10.493309020996094, "step": 3288 }, { "epoch": 1.83, "learning_rate": 1.9681638976997485e-08, "logits/chosen": -6.102186679840088, "logits/rejected": -6.086432456970215, "logps/chosen": -167.3407440185547, "logps/rejected": -197.79837036132812, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": 2.0757267475128174, "rewards/margins": 11.03568172454834, "rewards/rejected": -8.959955215454102, "step": 3289 }, { "epoch": 1.83, "learning_rate": 1.9556903622737097e-08, "logits/chosen": -6.089737892150879, "logits/rejected": -5.969570159912109, "logps/chosen": -312.677978515625, "logps/rejected": -119.89016723632812, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": 7.164000988006592, "rewards/margins": 13.389739990234375, "rewards/rejected": -6.225737571716309, "step": 3290 }, { "epoch": 1.83, "learning_rate": 1.943255690579615e-08, "logits/chosen": -6.004214763641357, "logits/rejected": -6.1177978515625, "logps/chosen": -269.24688720703125, "logps/rejected": -323.2248229980469, "loss": 0.034, "rewards/accuracies": 1.0, "rewards/chosen": 6.108297348022461, "rewards/margins": 15.002123832702637, "rewards/rejected": -8.89382553100586, "step": 3291 }, { "epoch": 1.83, "learning_rate": 1.9308598926760623e-08, "logits/chosen": -6.061928749084473, "logits/rejected": -5.970823287963867, "logps/chosen": -299.2355651855469, "logps/rejected": -197.03146362304688, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": 7.138223171234131, "rewards/margins": 13.711504936218262, "rewards/rejected": -6.573282718658447, "step": 3292 }, { "epoch": 1.83, "learning_rate": 1.9185029785901863e-08, "logits/chosen": -6.03497314453125, "logits/rejected": -6.004091262817383, "logps/chosen": -274.669189453125, "logps/rejected": -231.72933959960938, "loss": 0.0464, "rewards/accuracies": 1.0, "rewards/chosen": 5.993549346923828, "rewards/margins": 13.85655689239502, "rewards/rejected": -7.863007068634033, "step": 3293 }, { "epoch": 1.83, "learning_rate": 1.9061849583176636e-08, "logits/chosen": -6.0551910400390625, "logits/rejected": -6.021816253662109, "logps/chosen": -191.41744995117188, "logps/rejected": -171.32485961914062, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": 2.8553361892700195, "rewards/margins": 11.830022811889648, "rewards/rejected": -8.974687576293945, "step": 3294 }, { "epoch": 1.83, "learning_rate": 1.8939058418227405e-08, "logits/chosen": -6.150834560394287, "logits/rejected": -6.060513973236084, "logps/chosen": -229.75440979003906, "logps/rejected": -178.4113311767578, "loss": 0.0405, "rewards/accuracies": 1.0, "rewards/chosen": 3.946598529815674, "rewards/margins": 10.804866790771484, "rewards/rejected": -6.8582682609558105, "step": 3295 }, { "epoch": 1.83, "learning_rate": 1.8816656390381437e-08, "logits/chosen": -6.058394908905029, "logits/rejected": -6.057329177856445, "logps/chosen": -225.0355682373047, "logps/rejected": -207.16799926757812, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": 3.3424525260925293, "rewards/margins": 12.784292221069336, "rewards/rejected": -9.441839218139648, "step": 3296 }, { "epoch": 1.83, "learning_rate": 1.8694643598651694e-08, "logits/chosen": -6.041703224182129, "logits/rejected": -6.045993328094482, "logps/chosen": -297.9451904296875, "logps/rejected": -217.4275665283203, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 5.688531875610352, "rewards/margins": 15.265497207641602, "rewards/rejected": -9.57696533203125, "step": 3297 }, { "epoch": 1.83, "learning_rate": 1.8573020141736005e-08, "logits/chosen": -6.065128326416016, "logits/rejected": -6.108821392059326, "logps/chosen": -239.5850830078125, "logps/rejected": -172.67352294921875, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": 6.972054481506348, "rewards/margins": 10.445112228393555, "rewards/rejected": -3.473057746887207, "step": 3298 }, { "epoch": 1.83, "learning_rate": 1.8451786118017233e-08, "logits/chosen": -6.066531658172607, "logits/rejected": -5.946734428405762, "logps/chosen": -270.57366943359375, "logps/rejected": -182.32308959960938, "loss": 0.0262, "rewards/accuracies": 0.875, "rewards/chosen": 5.619869232177734, "rewards/margins": 11.328754425048828, "rewards/rejected": -5.708884239196777, "step": 3299 }, { "epoch": 1.83, "learning_rate": 1.8330941625563535e-08, "logits/chosen": -6.1922149658203125, "logits/rejected": -6.0814056396484375, "logps/chosen": -322.14251708984375, "logps/rejected": -272.4261169433594, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": 5.83840274810791, "rewards/margins": 13.525158882141113, "rewards/rejected": -7.686756134033203, "step": 3300 }, { "epoch": 1.83, "learning_rate": 1.8210486762127498e-08, "logits/chosen": -5.970024108886719, "logits/rejected": -5.980034351348877, "logps/chosen": -217.26559448242188, "logps/rejected": -259.9263916015625, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 3.2821767330169678, "rewards/margins": 13.051446914672852, "rewards/rejected": -9.769268989562988, "step": 3301 }, { "epoch": 1.83, "learning_rate": 1.8090421625147013e-08, "logits/chosen": -6.112585067749023, "logits/rejected": -5.983955383300781, "logps/chosen": -274.19732666015625, "logps/rejected": -242.85723876953125, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 3.99153995513916, "rewards/margins": 13.528579711914062, "rewards/rejected": -9.537038803100586, "step": 3302 }, { "epoch": 1.83, "learning_rate": 1.7970746311744333e-08, "logits/chosen": -6.271697521209717, "logits/rejected": -5.9770379066467285, "logps/chosen": -324.464599609375, "logps/rejected": -148.81419372558594, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": 5.551766872406006, "rewards/margins": 12.918859481811523, "rewards/rejected": -7.367091655731201, "step": 3303 }, { "epoch": 1.83, "learning_rate": 1.7851460918726746e-08, "logits/chosen": -6.020170211791992, "logits/rejected": -6.0982866287231445, "logps/chosen": -294.4625244140625, "logps/rejected": -288.9921875, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 4.2920613288879395, "rewards/margins": 12.287336349487305, "rewards/rejected": -7.995275020599365, "step": 3304 }, { "epoch": 1.83, "learning_rate": 1.7732565542585676e-08, "logits/chosen": -6.065863132476807, "logits/rejected": -6.186495780944824, "logps/chosen": -242.14260864257812, "logps/rejected": -264.2710266113281, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": 4.5047831535339355, "rewards/margins": 14.257736206054688, "rewards/rejected": -9.752952575683594, "step": 3305 }, { "epoch": 1.84, "learning_rate": 1.761406027949741e-08, "logits/chosen": -6.014711380004883, "logits/rejected": -6.073516368865967, "logps/chosen": -173.57241821289062, "logps/rejected": -188.4756622314453, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": 3.3429172039031982, "rewards/margins": 11.059961318969727, "rewards/rejected": -7.717043876647949, "step": 3306 }, { "epoch": 1.84, "learning_rate": 1.7495945225322605e-08, "logits/chosen": -5.962069511413574, "logits/rejected": -5.975912570953369, "logps/chosen": -234.69784545898438, "logps/rejected": -204.1746826171875, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": 2.702373504638672, "rewards/margins": 11.407154083251953, "rewards/rejected": -8.704780578613281, "step": 3307 }, { "epoch": 1.84, "learning_rate": 1.737822047560611e-08, "logits/chosen": -5.981295108795166, "logits/rejected": -5.941031455993652, "logps/chosen": -192.68360900878906, "logps/rejected": -100.48515319824219, "loss": 0.0772, "rewards/accuracies": 1.0, "rewards/chosen": 2.654165744781494, "rewards/margins": 9.083084106445312, "rewards/rejected": -6.428918361663818, "step": 3308 }, { "epoch": 1.84, "learning_rate": 1.7260886125577245e-08, "logits/chosen": -5.9726152420043945, "logits/rejected": -6.094130516052246, "logps/chosen": -349.025634765625, "logps/rejected": -211.90863037109375, "loss": 0.0453, "rewards/accuracies": 1.0, "rewards/chosen": 7.576676368713379, "rewards/margins": 14.130451202392578, "rewards/rejected": -6.553774833679199, "step": 3309 }, { "epoch": 1.84, "learning_rate": 1.7143942270149425e-08, "logits/chosen": -6.113301753997803, "logits/rejected": -6.162169933319092, "logps/chosen": -187.16046142578125, "logps/rejected": -250.9759979248047, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": 2.2525882720947266, "rewards/margins": 10.897563934326172, "rewards/rejected": -8.644975662231445, "step": 3310 }, { "epoch": 1.84, "learning_rate": 1.7027389003920312e-08, "logits/chosen": -5.919389724731445, "logits/rejected": -6.038262367248535, "logps/chosen": -229.22317504882812, "logps/rejected": -553.4820556640625, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": 4.343230247497559, "rewards/margins": 13.006050109863281, "rewards/rejected": -8.662818908691406, "step": 3311 }, { "epoch": 1.84, "learning_rate": 1.6911226421171378e-08, "logits/chosen": -6.214164733886719, "logits/rejected": -5.9667158126831055, "logps/chosen": -230.23956298828125, "logps/rejected": -89.06584930419922, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": 3.7181997299194336, "rewards/margins": 10.859790802001953, "rewards/rejected": -7.141590595245361, "step": 3312 }, { "epoch": 1.84, "learning_rate": 1.679545461586823e-08, "logits/chosen": -6.051271915435791, "logits/rejected": -5.952699661254883, "logps/chosen": -305.1065673828125, "logps/rejected": -193.09793090820312, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/chosen": 5.620204925537109, "rewards/margins": 13.364339828491211, "rewards/rejected": -7.744133949279785, "step": 3313 }, { "epoch": 1.84, "learning_rate": 1.6680073681660357e-08, "logits/chosen": -6.0945258140563965, "logits/rejected": -6.178229331970215, "logps/chosen": -247.73611450195312, "logps/rejected": -254.55984497070312, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": 2.862027883529663, "rewards/margins": 12.481433868408203, "rewards/rejected": -9.619405746459961, "step": 3314 }, { "epoch": 1.84, "learning_rate": 1.6565083711881088e-08, "logits/chosen": -5.975027084350586, "logits/rejected": -6.066329002380371, "logps/chosen": -223.82470703125, "logps/rejected": -230.4442138671875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": 4.081107139587402, "rewards/margins": 14.391390800476074, "rewards/rejected": -10.310283660888672, "step": 3315 }, { "epoch": 1.84, "learning_rate": 1.645048479954747e-08, "logits/chosen": -6.065247058868408, "logits/rejected": -5.94964075088501, "logps/chosen": -202.0082550048828, "logps/rejected": -214.42205810546875, "loss": 0.1039, "rewards/accuracies": 1.0, "rewards/chosen": 2.8460533618927, "rewards/margins": 11.262662887573242, "rewards/rejected": -8.416608810424805, "step": 3316 }, { "epoch": 1.84, "learning_rate": 1.6336277037360126e-08, "logits/chosen": -6.1407928466796875, "logits/rejected": -6.20366096496582, "logps/chosen": -212.23126220703125, "logps/rejected": -253.8479461669922, "loss": 0.0756, "rewards/accuracies": 1.0, "rewards/chosen": 2.7747995853424072, "rewards/margins": 14.696281433105469, "rewards/rejected": -11.92148208618164, "step": 3317 }, { "epoch": 1.84, "learning_rate": 1.622246051770343e-08, "logits/chosen": -5.990303993225098, "logits/rejected": -6.057481288909912, "logps/chosen": -201.75210571289062, "logps/rejected": -249.6453857421875, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": 3.851018190383911, "rewards/margins": 10.92568588256836, "rewards/rejected": -7.074667930603027, "step": 3318 }, { "epoch": 1.84, "learning_rate": 1.610903533264518e-08, "logits/chosen": -5.946440696716309, "logits/rejected": -5.84768009185791, "logps/chosen": -219.06051635742188, "logps/rejected": -112.38284301757812, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": 4.237006187438965, "rewards/margins": 10.517810821533203, "rewards/rejected": -6.280804634094238, "step": 3319 }, { "epoch": 1.84, "learning_rate": 1.59960015739366e-08, "logits/chosen": -6.004345893859863, "logits/rejected": -6.030749797821045, "logps/chosen": -341.54595947265625, "logps/rejected": -264.8021545410156, "loss": 0.0484, "rewards/accuracies": 1.0, "rewards/chosen": 4.131326675415039, "rewards/margins": 13.860111236572266, "rewards/rejected": -9.728784561157227, "step": 3320 }, { "epoch": 1.84, "learning_rate": 1.5883359333012435e-08, "logits/chosen": -5.9979095458984375, "logits/rejected": -5.991394996643066, "logps/chosen": -222.5258331298828, "logps/rejected": -137.7519989013672, "loss": 0.0221, "rewards/accuracies": 0.9375, "rewards/chosen": 2.3170018196105957, "rewards/margins": 9.455440521240234, "rewards/rejected": -7.138439178466797, "step": 3321 }, { "epoch": 1.84, "learning_rate": 1.577110870099041e-08, "logits/chosen": -5.973868370056152, "logits/rejected": -6.073707580566406, "logps/chosen": -256.4501953125, "logps/rejected": -255.05987548828125, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 5.430474281311035, "rewards/margins": 13.395852088928223, "rewards/rejected": -7.965377330780029, "step": 3322 }, { "epoch": 1.84, "learning_rate": 1.5659249768671833e-08, "logits/chosen": -6.017251491546631, "logits/rejected": -5.975042343139648, "logps/chosen": -235.5242919921875, "logps/rejected": -157.51809692382812, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": 4.050312042236328, "rewards/margins": 13.217472076416016, "rewards/rejected": -9.167160034179688, "step": 3323 }, { "epoch": 1.85, "learning_rate": 1.554778262654094e-08, "logits/chosen": -6.016928195953369, "logits/rejected": -6.035608291625977, "logps/chosen": -288.968505859375, "logps/rejected": -193.65380859375, "loss": 0.0441, "rewards/accuracies": 1.0, "rewards/chosen": 4.092065811157227, "rewards/margins": 10.312679290771484, "rewards/rejected": -6.220613956451416, "step": 3324 }, { "epoch": 1.85, "learning_rate": 1.543670736476521e-08, "logits/chosen": -6.059978485107422, "logits/rejected": -5.970602035522461, "logps/chosen": -275.4258728027344, "logps/rejected": -102.91764831542969, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": 7.149151802062988, "rewards/margins": 13.14815616607666, "rewards/rejected": -5.999004364013672, "step": 3325 }, { "epoch": 1.85, "learning_rate": 1.5326024073194832e-08, "logits/chosen": -5.905907154083252, "logits/rejected": -5.928322792053223, "logps/chosen": -254.10147094726562, "logps/rejected": -195.15724182128906, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": 6.183888912200928, "rewards/margins": 11.34099006652832, "rewards/rejected": -5.157100677490234, "step": 3326 }, { "epoch": 1.85, "learning_rate": 1.521573284136324e-08, "logits/chosen": -6.060632705688477, "logits/rejected": -6.122459888458252, "logps/chosen": -274.9136657714844, "logps/rejected": -202.7731475830078, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 5.976874828338623, "rewards/margins": 14.01775074005127, "rewards/rejected": -8.040875434875488, "step": 3327 }, { "epoch": 1.85, "learning_rate": 1.510583375848662e-08, "logits/chosen": -6.123024940490723, "logits/rejected": -5.969224452972412, "logps/chosen": -247.58958435058594, "logps/rejected": -136.02548217773438, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 5.089559555053711, "rewards/margins": 11.93594741821289, "rewards/rejected": -6.84638786315918, "step": 3328 }, { "epoch": 1.85, "learning_rate": 1.499632691346375e-08, "logits/chosen": -5.999688148498535, "logits/rejected": -5.869468688964844, "logps/chosen": -364.5802917480469, "logps/rejected": -182.02960205078125, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": 4.994538307189941, "rewards/margins": 11.771713256835938, "rewards/rejected": -6.7771759033203125, "step": 3329 }, { "epoch": 1.85, "learning_rate": 1.4887212394876502e-08, "logits/chosen": -6.078413963317871, "logits/rejected": -6.005262851715088, "logps/chosen": -246.6546630859375, "logps/rejected": -154.80075073242188, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": 6.323077201843262, "rewards/margins": 15.095447540283203, "rewards/rejected": -8.772371292114258, "step": 3330 }, { "epoch": 1.85, "learning_rate": 1.4778490290988932e-08, "logits/chosen": -6.0139360427856445, "logits/rejected": -5.888696670532227, "logps/chosen": -320.0486145019531, "logps/rejected": -433.1320495605469, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": 7.159575462341309, "rewards/margins": 15.508100509643555, "rewards/rejected": -8.348526000976562, "step": 3331 }, { "epoch": 1.85, "learning_rate": 1.467016068974819e-08, "logits/chosen": -5.957645416259766, "logits/rejected": -5.975652694702148, "logps/chosen": -293.5139465332031, "logps/rejected": -237.95169067382812, "loss": 0.0218, "rewards/accuracies": 0.9375, "rewards/chosen": 5.705361366271973, "rewards/margins": 14.30624008178711, "rewards/rejected": -8.600879669189453, "step": 3332 }, { "epoch": 1.85, "learning_rate": 1.4562223678783514e-08, "logits/chosen": -5.99807596206665, "logits/rejected": -6.029305934906006, "logps/chosen": -367.55914306640625, "logps/rejected": -282.4165344238281, "loss": 0.0694, "rewards/accuracies": 0.9375, "rewards/chosen": 4.5116119384765625, "rewards/margins": 11.74406623840332, "rewards/rejected": -7.232453346252441, "step": 3333 }, { "epoch": 1.85, "learning_rate": 1.4454679345406617e-08, "logits/chosen": -6.068562984466553, "logits/rejected": -6.025590896606445, "logps/chosen": -265.1132507324219, "logps/rejected": -258.446044921875, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/chosen": 4.560513496398926, "rewards/margins": 14.40533447265625, "rewards/rejected": -9.844820976257324, "step": 3334 }, { "epoch": 1.85, "learning_rate": 1.4347527776611745e-08, "logits/chosen": -5.98836088180542, "logits/rejected": -5.983325958251953, "logps/chosen": -433.828857421875, "logps/rejected": -368.0505676269531, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": 7.548989772796631, "rewards/margins": 16.796527862548828, "rewards/rejected": -9.247539520263672, "step": 3335 }, { "epoch": 1.85, "learning_rate": 1.4240769059075341e-08, "logits/chosen": -6.062321662902832, "logits/rejected": -6.103654861450195, "logps/chosen": -251.01492309570312, "logps/rejected": -228.04592895507812, "loss": 0.0351, "rewards/accuracies": 1.0, "rewards/chosen": 4.82377290725708, "rewards/margins": 15.576898574829102, "rewards/rejected": -10.75312614440918, "step": 3336 }, { "epoch": 1.85, "learning_rate": 1.4134403279156105e-08, "logits/chosen": -5.941500186920166, "logits/rejected": -5.987128734588623, "logps/chosen": -280.4034423828125, "logps/rejected": -277.9828186035156, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/chosen": 5.940541744232178, "rewards/margins": 15.397187232971191, "rewards/rejected": -9.456644058227539, "step": 3337 }, { "epoch": 1.85, "learning_rate": 1.4028430522894763e-08, "logits/chosen": -6.017164707183838, "logits/rejected": -6.175673007965088, "logps/chosen": -282.5832824707031, "logps/rejected": -293.36865234375, "loss": 0.1004, "rewards/accuracies": 1.0, "rewards/chosen": 5.014176368713379, "rewards/margins": 12.209280967712402, "rewards/rejected": -7.19510555267334, "step": 3338 }, { "epoch": 1.85, "learning_rate": 1.3922850876014191e-08, "logits/chosen": -6.011686325073242, "logits/rejected": -6.006749629974365, "logps/chosen": -279.7491760253906, "logps/rejected": -221.53939819335938, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": 5.169230937957764, "rewards/margins": 11.532682418823242, "rewards/rejected": -6.363451957702637, "step": 3339 }, { "epoch": 1.85, "learning_rate": 1.381766442391935e-08, "logits/chosen": -6.075627326965332, "logits/rejected": -5.960385799407959, "logps/chosen": -285.0904541015625, "logps/rejected": -126.67306518554688, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/chosen": 4.960415840148926, "rewards/margins": 10.721516609191895, "rewards/rejected": -5.761099815368652, "step": 3340 }, { "epoch": 1.85, "learning_rate": 1.3712871251697012e-08, "logits/chosen": -6.214061737060547, "logits/rejected": -5.978861331939697, "logps/chosen": -303.7740783691406, "logps/rejected": -127.87993621826172, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": 6.451715469360352, "rewards/margins": 15.680940628051758, "rewards/rejected": -9.229226112365723, "step": 3341 }, { "epoch": 1.86, "learning_rate": 1.3608471444115977e-08, "logits/chosen": -6.063032627105713, "logits/rejected": -6.07285213470459, "logps/chosen": -466.6789855957031, "logps/rejected": -258.52630615234375, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": 5.97473669052124, "rewards/margins": 13.051017761230469, "rewards/rejected": -7.0762810707092285, "step": 3342 }, { "epoch": 1.86, "learning_rate": 1.3504465085626637e-08, "logits/chosen": -6.128002643585205, "logits/rejected": -6.063518524169922, "logps/chosen": -198.17510986328125, "logps/rejected": -191.67486572265625, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": 3.4569742679595947, "rewards/margins": 12.317376136779785, "rewards/rejected": -8.860403060913086, "step": 3343 }, { "epoch": 1.86, "learning_rate": 1.340085226036125e-08, "logits/chosen": -6.019538879394531, "logits/rejected": -5.986756801605225, "logps/chosen": -318.7757873535156, "logps/rejected": -202.90391540527344, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": 5.469503879547119, "rewards/margins": 11.282957077026367, "rewards/rejected": -5.813453674316406, "step": 3344 }, { "epoch": 1.86, "learning_rate": 1.3297633052133716e-08, "logits/chosen": -6.025663375854492, "logits/rejected": -6.053094387054443, "logps/chosen": -186.77328491210938, "logps/rejected": -263.93634033203125, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": 2.790248394012451, "rewards/margins": 11.50636100769043, "rewards/rejected": -8.716113090515137, "step": 3345 }, { "epoch": 1.86, "learning_rate": 1.3194807544439635e-08, "logits/chosen": -5.964415073394775, "logits/rejected": -6.026451110839844, "logps/chosen": -360.3322448730469, "logps/rejected": -466.9540100097656, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 5.316598892211914, "rewards/margins": 14.548282623291016, "rewards/rejected": -9.231683731079102, "step": 3346 }, { "epoch": 1.86, "learning_rate": 1.3092375820455915e-08, "logits/chosen": -5.908056259155273, "logits/rejected": -5.979411602020264, "logps/chosen": -338.54864501953125, "logps/rejected": -300.5401916503906, "loss": 0.0272, "rewards/accuracies": 0.9375, "rewards/chosen": 5.849550724029541, "rewards/margins": 12.736907958984375, "rewards/rejected": -6.887357234954834, "step": 3347 }, { "epoch": 1.86, "learning_rate": 1.299033796304111e-08, "logits/chosen": -6.019075870513916, "logits/rejected": -6.1561784744262695, "logps/chosen": -258.24273681640625, "logps/rejected": -294.6044616699219, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": 4.019927024841309, "rewards/margins": 15.019638061523438, "rewards/rejected": -10.999711036682129, "step": 3348 }, { "epoch": 1.86, "learning_rate": 1.2888694054735083e-08, "logits/chosen": -6.010910511016846, "logits/rejected": -5.893653869628906, "logps/chosen": -351.7397766113281, "logps/rejected": -193.8931121826172, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": 4.627850532531738, "rewards/margins": 11.696149826049805, "rewards/rejected": -7.068299293518066, "step": 3349 }, { "epoch": 1.86, "learning_rate": 1.2787444177759065e-08, "logits/chosen": -5.984038352966309, "logits/rejected": -6.002257823944092, "logps/chosen": -406.81866455078125, "logps/rejected": -434.6800537109375, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": 3.5439257621765137, "rewards/margins": 14.135004043579102, "rewards/rejected": -10.59107780456543, "step": 3350 }, { "epoch": 1.86, "learning_rate": 1.2686588414015542e-08, "logits/chosen": -6.027121543884277, "logits/rejected": -5.9462103843688965, "logps/chosen": -225.4637451171875, "logps/rejected": -221.07586669921875, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": 5.284140586853027, "rewards/margins": 15.248334884643555, "rewards/rejected": -9.964194297790527, "step": 3351 }, { "epoch": 1.86, "learning_rate": 1.2586126845088085e-08, "logits/chosen": -5.94842529296875, "logits/rejected": -5.998381614685059, "logps/chosen": -297.32440185546875, "logps/rejected": -294.20440673828125, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 6.308582782745361, "rewards/margins": 13.461554527282715, "rewards/rejected": -7.1529717445373535, "step": 3352 }, { "epoch": 1.86, "learning_rate": 1.248605955224169e-08, "logits/chosen": -6.1103692054748535, "logits/rejected": -5.9426469802856445, "logps/chosen": -296.3826904296875, "logps/rejected": -180.24539184570312, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": 7.014254093170166, "rewards/margins": 11.963459968566895, "rewards/rejected": -4.94920539855957, "step": 3353 }, { "epoch": 1.86, "learning_rate": 1.2386386616422162e-08, "logits/chosen": -5.938445568084717, "logits/rejected": -6.006350517272949, "logps/chosen": -267.79290771484375, "logps/rejected": -187.94085693359375, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": 4.999947547912598, "rewards/margins": 9.938475608825684, "rewards/rejected": -4.938528060913086, "step": 3354 }, { "epoch": 1.86, "learning_rate": 1.228710811825623e-08, "logits/chosen": -6.056388854980469, "logits/rejected": -5.965238094329834, "logps/chosen": -188.15179443359375, "logps/rejected": -180.69293212890625, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": 2.8642778396606445, "rewards/margins": 12.20155143737793, "rewards/rejected": -9.337272644042969, "step": 3355 }, { "epoch": 1.86, "learning_rate": 1.2188224138051873e-08, "logits/chosen": -6.049042701721191, "logits/rejected": -5.981021881103516, "logps/chosen": -277.6458740234375, "logps/rejected": -220.5860595703125, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": 2.3970413208007812, "rewards/margins": 12.740461349487305, "rewards/rejected": -10.343420028686523, "step": 3356 }, { "epoch": 1.86, "learning_rate": 1.208973475579761e-08, "logits/chosen": -6.197788238525391, "logits/rejected": -5.97410249710083, "logps/chosen": -239.64718627929688, "logps/rejected": -123.73579406738281, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": 4.950303077697754, "rewards/margins": 11.716531753540039, "rewards/rejected": -6.766228675842285, "step": 3357 }, { "epoch": 1.86, "learning_rate": 1.19916400511631e-08, "logits/chosen": -6.03148078918457, "logits/rejected": -6.013067245483398, "logps/chosen": -184.52337646484375, "logps/rejected": -160.58897399902344, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": 3.124378204345703, "rewards/margins": 11.713932991027832, "rewards/rejected": -8.589555740356445, "step": 3358 }, { "epoch": 1.86, "learning_rate": 1.1893940103498479e-08, "logits/chosen": -6.0666680335998535, "logits/rejected": -6.064769268035889, "logps/chosen": -369.8250427246094, "logps/rejected": -266.7373046875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 8.474491119384766, "rewards/margins": 15.58229923248291, "rewards/rejected": -7.1078081130981445, "step": 3359 }, { "epoch": 1.87, "learning_rate": 1.1796634991834476e-08, "logits/chosen": -6.063809394836426, "logits/rejected": -6.034440994262695, "logps/chosen": -289.21746826171875, "logps/rejected": -281.80950927734375, "loss": 0.0302, "rewards/accuracies": 1.0, "rewards/chosen": 4.292657852172852, "rewards/margins": 12.75204086303711, "rewards/rejected": -8.459383010864258, "step": 3360 }, { "epoch": 1.87, "learning_rate": 1.1699724794882848e-08, "logits/chosen": -6.0762128829956055, "logits/rejected": -5.888421058654785, "logps/chosen": -303.2669982910156, "logps/rejected": -157.573486328125, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": 6.284823417663574, "rewards/margins": 13.270628929138184, "rewards/rejected": -6.985804557800293, "step": 3361 }, { "epoch": 1.87, "learning_rate": 1.1603209591035445e-08, "logits/chosen": -5.9722442626953125, "logits/rejected": -5.993304252624512, "logps/chosen": -264.9093322753906, "logps/rejected": -187.490234375, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": 6.342223167419434, "rewards/margins": 15.128053665161133, "rewards/rejected": -8.7858304977417, "step": 3362 }, { "epoch": 1.87, "learning_rate": 1.1507089458364983e-08, "logits/chosen": -6.189849853515625, "logits/rejected": -5.990263938903809, "logps/chosen": -273.1810302734375, "logps/rejected": -112.6644515991211, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": 6.050262451171875, "rewards/margins": 12.932615280151367, "rewards/rejected": -6.882352828979492, "step": 3363 }, { "epoch": 1.87, "learning_rate": 1.1411364474624264e-08, "logits/chosen": -6.063385486602783, "logits/rejected": -6.104442596435547, "logps/chosen": -439.3880615234375, "logps/rejected": -420.0786437988281, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": 5.548847675323486, "rewards/margins": 14.424117088317871, "rewards/rejected": -8.875268936157227, "step": 3364 }, { "epoch": 1.87, "learning_rate": 1.1316034717246624e-08, "logits/chosen": -6.022680282592773, "logits/rejected": -6.099756240844727, "logps/chosen": -242.7550506591797, "logps/rejected": -332.6608581542969, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": 2.3728034496307373, "rewards/margins": 13.248762130737305, "rewards/rejected": -10.875957489013672, "step": 3365 }, { "epoch": 1.87, "learning_rate": 1.1221100263345773e-08, "logits/chosen": -6.0356974601745605, "logits/rejected": -6.049222469329834, "logps/chosen": -215.7609100341797, "logps/rejected": -207.99708557128906, "loss": 0.0794, "rewards/accuracies": 1.0, "rewards/chosen": 4.406538963317871, "rewards/margins": 10.120767593383789, "rewards/rejected": -5.714228630065918, "step": 3366 }, { "epoch": 1.87, "learning_rate": 1.11265611897155e-08, "logits/chosen": -6.045170307159424, "logits/rejected": -5.950547218322754, "logps/chosen": -225.9496612548828, "logps/rejected": -141.54598999023438, "loss": 0.0665, "rewards/accuracies": 0.9375, "rewards/chosen": 4.677237033843994, "rewards/margins": 12.404596328735352, "rewards/rejected": -7.727359771728516, "step": 3367 }, { "epoch": 1.87, "learning_rate": 1.1032417572829744e-08, "logits/chosen": -6.0154948234558105, "logits/rejected": -6.055547714233398, "logps/chosen": -300.407470703125, "logps/rejected": -495.4078369140625, "loss": 0.0382, "rewards/accuracies": 0.9375, "rewards/chosen": 3.2929227352142334, "rewards/margins": 13.06631851196289, "rewards/rejected": -9.773395538330078, "step": 3368 }, { "epoch": 1.87, "learning_rate": 1.09386694888427e-08, "logits/chosen": -6.137104034423828, "logits/rejected": -6.149275779724121, "logps/chosen": -315.48846435546875, "logps/rejected": -233.55319213867188, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": 5.467929840087891, "rewards/margins": 13.011184692382812, "rewards/rejected": -7.543254375457764, "step": 3369 }, { "epoch": 1.87, "learning_rate": 1.0845317013588595e-08, "logits/chosen": -6.076040744781494, "logits/rejected": -5.9115190505981445, "logps/chosen": -350.7526550292969, "logps/rejected": -191.75897216796875, "loss": 0.0203, "rewards/accuracies": 0.9375, "rewards/chosen": 3.3866119384765625, "rewards/margins": 7.878116607666016, "rewards/rejected": -4.491504669189453, "step": 3370 }, { "epoch": 1.87, "learning_rate": 1.075236022258147e-08, "logits/chosen": -6.119692802429199, "logits/rejected": -6.025387287139893, "logps/chosen": -266.86651611328125, "logps/rejected": -160.1429901123047, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": 3.537597894668579, "rewards/margins": 11.399589538574219, "rewards/rejected": -7.861992359161377, "step": 3371 }, { "epoch": 1.87, "learning_rate": 1.0659799191015506e-08, "logits/chosen": -6.0096635818481445, "logits/rejected": -6.063332557678223, "logps/chosen": -152.8979949951172, "logps/rejected": -214.88131713867188, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": 0.8977532386779785, "rewards/margins": 11.964483261108398, "rewards/rejected": -11.066730499267578, "step": 3372 }, { "epoch": 1.87, "learning_rate": 1.0567633993764535e-08, "logits/chosen": -5.964729309082031, "logits/rejected": -5.936261177062988, "logps/chosen": -266.0382385253906, "logps/rejected": -273.01824951171875, "loss": 0.048, "rewards/accuracies": 1.0, "rewards/chosen": 3.3307273387908936, "rewards/margins": 11.230825424194336, "rewards/rejected": -7.900097846984863, "step": 3373 }, { "epoch": 1.87, "learning_rate": 1.047586470538242e-08, "logits/chosen": -6.06882905960083, "logits/rejected": -6.041383266448975, "logps/chosen": -241.29464721679688, "logps/rejected": -228.33070373535156, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": 3.5420286655426025, "rewards/margins": 11.223018646240234, "rewards/rejected": -7.680990695953369, "step": 3374 }, { "epoch": 1.87, "learning_rate": 1.0384491400102613e-08, "logits/chosen": -6.003843307495117, "logits/rejected": -6.014012336730957, "logps/chosen": -208.58258056640625, "logps/rejected": -346.5247497558594, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": 1.6718981266021729, "rewards/margins": 13.833708763122559, "rewards/rejected": -12.161810874938965, "step": 3375 }, { "epoch": 1.87, "learning_rate": 1.0293514151838267e-08, "logits/chosen": -6.0906548500061035, "logits/rejected": -5.96999979019165, "logps/chosen": -239.93231201171875, "logps/rejected": -273.11016845703125, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 4.4553070068359375, "rewards/margins": 12.480538368225098, "rewards/rejected": -8.025230407714844, "step": 3376 }, { "epoch": 1.87, "learning_rate": 1.0202933034182237e-08, "logits/chosen": -6.064099311828613, "logits/rejected": -5.9919939041137695, "logps/chosen": -256.3941955566406, "logps/rejected": -140.86485290527344, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": 3.3716282844543457, "rewards/margins": 9.276965141296387, "rewards/rejected": -5.905337333679199, "step": 3377 }, { "epoch": 1.88, "learning_rate": 1.0112748120406855e-08, "logits/chosen": -6.008428573608398, "logits/rejected": -5.857933044433594, "logps/chosen": -479.5400695800781, "logps/rejected": -220.4672088623047, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 4.216286659240723, "rewards/margins": 11.345346450805664, "rewards/rejected": -7.129059314727783, "step": 3378 }, { "epoch": 1.88, "learning_rate": 1.0022959483464044e-08, "logits/chosen": -6.06264066696167, "logits/rejected": -6.004977226257324, "logps/chosen": -272.47210693359375, "logps/rejected": -170.0643768310547, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": 3.827693462371826, "rewards/margins": 9.516958236694336, "rewards/rejected": -5.689264297485352, "step": 3379 }, { "epoch": 1.88, "learning_rate": 9.933567195985093e-09, "logits/chosen": -6.003566741943359, "logits/rejected": -6.079854488372803, "logps/chosen": -337.25274658203125, "logps/rejected": -381.07135009765625, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": 6.4313178062438965, "rewards/margins": 12.810564041137695, "rewards/rejected": -6.379245758056641, "step": 3380 }, { "epoch": 1.88, "learning_rate": 9.844571330280548e-09, "logits/chosen": -6.052260875701904, "logits/rejected": -5.931991100311279, "logps/chosen": -278.4114074707031, "logps/rejected": -161.21630859375, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": 2.7969424724578857, "rewards/margins": 11.972387313842773, "rewards/rejected": -9.175445556640625, "step": 3381 }, { "epoch": 1.88, "learning_rate": 9.75597195834077e-09, "logits/chosen": -6.000558853149414, "logits/rejected": -6.033109188079834, "logps/chosen": -212.52023315429688, "logps/rejected": -233.3191375732422, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": 3.9695448875427246, "rewards/margins": 13.593780517578125, "rewards/rejected": -9.624235153198242, "step": 3382 }, { "epoch": 1.88, "learning_rate": 9.667769151834759e-09, "logits/chosen": -6.033174514770508, "logits/rejected": -6.133277893066406, "logps/chosen": -234.59262084960938, "logps/rejected": -243.8161163330078, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": 5.234903812408447, "rewards/margins": 12.82613468170166, "rewards/rejected": -7.591230392456055, "step": 3383 }, { "epoch": 1.88, "learning_rate": 9.579962982111222e-09, "logits/chosen": -6.0523152351379395, "logits/rejected": -5.956982612609863, "logps/chosen": -268.01837158203125, "logps/rejected": -93.61296844482422, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": 4.631059169769287, "rewards/margins": 11.239044189453125, "rewards/rejected": -6.6079864501953125, "step": 3384 }, { "epoch": 1.88, "learning_rate": 9.492553520197733e-09, "logits/chosen": -6.036990165710449, "logits/rejected": -6.12095832824707, "logps/chosen": -206.90618896484375, "logps/rejected": -295.35223388671875, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": 2.051318407058716, "rewards/margins": 12.86054801940918, "rewards/rejected": -10.80923080444336, "step": 3385 }, { "epoch": 1.88, "learning_rate": 9.405540836801117e-09, "logits/chosen": -6.013848781585693, "logits/rejected": -6.0263447761535645, "logps/chosen": -268.39996337890625, "logps/rejected": -242.45095825195312, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": 5.114694118499756, "rewards/margins": 12.847939491271973, "rewards/rejected": -7.733245849609375, "step": 3386 }, { "epoch": 1.88, "learning_rate": 9.318925002307187e-09, "logits/chosen": -6.055620193481445, "logits/rejected": -6.021042823791504, "logps/chosen": -426.76495361328125, "logps/rejected": -231.66354370117188, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": 7.766218185424805, "rewards/margins": 14.184858322143555, "rewards/rejected": -6.418641090393066, "step": 3387 }, { "epoch": 1.88, "learning_rate": 9.232706086780618e-09, "logits/chosen": -5.9172163009643555, "logits/rejected": -5.837883949279785, "logps/chosen": -214.11488342285156, "logps/rejected": -260.5693054199219, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": 2.259939193725586, "rewards/margins": 10.930294036865234, "rewards/rejected": -8.670354843139648, "step": 3388 }, { "epoch": 1.88, "learning_rate": 9.146884159965284e-09, "logits/chosen": -6.01556396484375, "logits/rejected": -6.029621124267578, "logps/chosen": -329.4875183105469, "logps/rejected": -218.72769165039062, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": 5.34652042388916, "rewards/margins": 12.078370094299316, "rewards/rejected": -6.73184871673584, "step": 3389 }, { "epoch": 1.88, "learning_rate": 9.061459291283657e-09, "logits/chosen": -6.092802047729492, "logits/rejected": -6.12442684173584, "logps/chosen": -187.9405975341797, "logps/rejected": -268.4600830078125, "loss": 0.0241, "rewards/accuracies": 0.9375, "rewards/chosen": 1.4735019207000732, "rewards/margins": 14.317913055419922, "rewards/rejected": -12.844411849975586, "step": 3390 }, { "epoch": 1.88, "learning_rate": 8.976431549837237e-09, "logits/chosen": -6.1107258796691895, "logits/rejected": -6.040934085845947, "logps/chosen": -224.71438598632812, "logps/rejected": -151.86715698242188, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": 4.889483451843262, "rewards/margins": 10.86007022857666, "rewards/rejected": -5.97058629989624, "step": 3391 }, { "epoch": 1.88, "learning_rate": 8.891801004406119e-09, "logits/chosen": -6.1433610916137695, "logits/rejected": -6.059093475341797, "logps/chosen": -181.0308074951172, "logps/rejected": -202.30113220214844, "loss": 0.0518, "rewards/accuracies": 1.0, "rewards/chosen": 3.1504435539245605, "rewards/margins": 9.746073722839355, "rewards/rejected": -6.595630645751953, "step": 3392 }, { "epoch": 1.88, "learning_rate": 8.807567723449205e-09, "logits/chosen": -6.024277687072754, "logits/rejected": -6.004981517791748, "logps/chosen": -254.20260620117188, "logps/rejected": -117.22332000732422, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": 5.581762313842773, "rewards/margins": 11.662337303161621, "rewards/rejected": -6.080574989318848, "step": 3393 }, { "epoch": 1.88, "learning_rate": 8.723731775103938e-09, "logits/chosen": -6.089639663696289, "logits/rejected": -6.0123491287231445, "logps/chosen": -209.62289428710938, "logps/rejected": -157.5995635986328, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/chosen": 5.298503875732422, "rewards/margins": 12.347349166870117, "rewards/rejected": -7.048845291137695, "step": 3394 }, { "epoch": 1.88, "learning_rate": 8.640293227186513e-09, "logits/chosen": -6.026703357696533, "logits/rejected": -6.040779113769531, "logps/chosen": -246.5997314453125, "logps/rejected": -255.30706787109375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 2.7372334003448486, "rewards/margins": 12.801000595092773, "rewards/rejected": -10.063767433166504, "step": 3395 }, { "epoch": 1.89, "learning_rate": 8.557252147191495e-09, "logits/chosen": -6.094252586364746, "logits/rejected": -6.024603843688965, "logps/chosen": -264.1853332519531, "logps/rejected": -256.0248718261719, "loss": 0.044, "rewards/accuracies": 1.0, "rewards/chosen": 5.55550479888916, "rewards/margins": 13.535715103149414, "rewards/rejected": -7.980210304260254, "step": 3396 }, { "epoch": 1.89, "learning_rate": 8.474608602292044e-09, "logits/chosen": -6.084725379943848, "logits/rejected": -5.961300373077393, "logps/chosen": -288.51141357421875, "logps/rejected": -157.28955078125, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": 6.126706123352051, "rewards/margins": 12.130047798156738, "rewards/rejected": -6.003343105316162, "step": 3397 }, { "epoch": 1.89, "learning_rate": 8.39236265933968e-09, "logits/chosen": -6.076655387878418, "logits/rejected": -6.019850254058838, "logps/chosen": -278.4696044921875, "logps/rejected": -202.940185546875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": 5.394187927246094, "rewards/margins": 13.829452514648438, "rewards/rejected": -8.435264587402344, "step": 3398 }, { "epoch": 1.89, "learning_rate": 8.31051438486441e-09, "logits/chosen": -5.93653678894043, "logits/rejected": -5.964478492736816, "logps/chosen": -232.35598754882812, "logps/rejected": -179.93576049804688, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": 5.429666519165039, "rewards/margins": 11.571229934692383, "rewards/rejected": -6.141563892364502, "step": 3399 }, { "epoch": 1.89, "learning_rate": 8.22906384507438e-09, "logits/chosen": -5.971207618713379, "logits/rejected": -6.070615768432617, "logps/chosen": -246.98455810546875, "logps/rejected": -295.76312255859375, "loss": 0.0174, "rewards/accuracies": 0.9375, "rewards/chosen": 2.78292179107666, "rewards/margins": 11.764786720275879, "rewards/rejected": -8.981864929199219, "step": 3400 }, { "epoch": 1.89, "learning_rate": 8.148011105856167e-09, "logits/chosen": -6.033870220184326, "logits/rejected": -5.984285354614258, "logps/chosen": -343.91424560546875, "logps/rejected": -257.6102294921875, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": 5.5241498947143555, "rewards/margins": 11.578330993652344, "rewards/rejected": -6.0541815757751465, "step": 3401 }, { "epoch": 1.89, "learning_rate": 8.067356232774436e-09, "logits/chosen": -6.00203800201416, "logits/rejected": -5.95134973526001, "logps/chosen": -347.2447509765625, "logps/rejected": -242.1746826171875, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": 5.047000885009766, "rewards/margins": 12.76749038696289, "rewards/rejected": -7.720488548278809, "step": 3402 }, { "epoch": 1.89, "learning_rate": 7.987099291072108e-09, "logits/chosen": -5.936339378356934, "logits/rejected": -5.999271392822266, "logps/chosen": -254.31982421875, "logps/rejected": -213.66380310058594, "loss": 0.0727, "rewards/accuracies": 1.0, "rewards/chosen": 3.0931100845336914, "rewards/margins": 10.251466751098633, "rewards/rejected": -7.158357620239258, "step": 3403 }, { "epoch": 1.89, "learning_rate": 7.90724034567014e-09, "logits/chosen": -5.957056522369385, "logits/rejected": -6.018537521362305, "logps/chosen": -384.23590087890625, "logps/rejected": -344.1216735839844, "loss": 0.0739, "rewards/accuracies": 1.0, "rewards/chosen": 6.413564205169678, "rewards/margins": 13.226845741271973, "rewards/rejected": -6.813282012939453, "step": 3404 }, { "epoch": 1.89, "learning_rate": 7.827779461167693e-09, "logits/chosen": -6.047393798828125, "logits/rejected": -6.022944450378418, "logps/chosen": -277.6308288574219, "logps/rejected": -211.29769897460938, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": 3.8817338943481445, "rewards/margins": 9.638738632202148, "rewards/rejected": -5.7570037841796875, "step": 3405 }, { "epoch": 1.89, "learning_rate": 7.748716701841684e-09, "logits/chosen": -6.067195892333984, "logits/rejected": -6.101151466369629, "logps/chosen": -271.5376281738281, "logps/rejected": -210.87786865234375, "loss": 0.0662, "rewards/accuracies": 0.9375, "rewards/chosen": 3.246593713760376, "rewards/margins": 12.26140022277832, "rewards/rejected": -9.014806747436523, "step": 3406 }, { "epoch": 1.89, "learning_rate": 7.67005213164712e-09, "logits/chosen": -6.044814109802246, "logits/rejected": -5.990665912628174, "logps/chosen": -208.89797973632812, "logps/rejected": -233.0523223876953, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": 1.1288652420043945, "rewards/margins": 11.096348762512207, "rewards/rejected": -9.967484474182129, "step": 3407 }, { "epoch": 1.89, "learning_rate": 7.591785814217045e-09, "logits/chosen": -6.067615032196045, "logits/rejected": -6.116896629333496, "logps/chosen": -286.80450439453125, "logps/rejected": -213.313232421875, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": 4.933253288269043, "rewards/margins": 12.53899097442627, "rewards/rejected": -7.60573673248291, "step": 3408 }, { "epoch": 1.89, "learning_rate": 7.513917812862035e-09, "logits/chosen": -6.036545753479004, "logits/rejected": -6.029287338256836, "logps/chosen": -201.49386596679688, "logps/rejected": -174.14625549316406, "loss": 0.0485, "rewards/accuracies": 1.0, "rewards/chosen": 3.3444552421569824, "rewards/margins": 10.27320671081543, "rewards/rejected": -6.928751468658447, "step": 3409 }, { "epoch": 1.89, "learning_rate": 7.436448190570766e-09, "logits/chosen": -5.901634693145752, "logits/rejected": -5.9992475509643555, "logps/chosen": -371.9295959472656, "logps/rejected": -238.3789520263672, "loss": 0.025, "rewards/accuracies": 0.9375, "rewards/chosen": 8.550060272216797, "rewards/margins": 13.070058822631836, "rewards/rejected": -4.5199995040893555, "step": 3410 }, { "epoch": 1.89, "learning_rate": 7.359377010009382e-09, "logits/chosen": -6.094067096710205, "logits/rejected": -6.021387100219727, "logps/chosen": -337.4674072265625, "logps/rejected": -139.11044311523438, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": 4.916130542755127, "rewards/margins": 13.134599685668945, "rewards/rejected": -8.218469619750977, "step": 3411 }, { "epoch": 1.89, "learning_rate": 7.282704333522016e-09, "logits/chosen": -6.043653964996338, "logits/rejected": -6.054142475128174, "logps/chosen": -195.7540740966797, "logps/rejected": -196.6707305908203, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": 2.7454018592834473, "rewards/margins": 13.395870208740234, "rewards/rejected": -10.650468826293945, "step": 3412 }, { "epoch": 1.89, "learning_rate": 7.206430223130278e-09, "logits/chosen": -6.022243499755859, "logits/rejected": -5.9789557456970215, "logps/chosen": -303.80908203125, "logps/rejected": -431.7923278808594, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": 7.245274066925049, "rewards/margins": 14.935575485229492, "rewards/rejected": -7.690301895141602, "step": 3413 }, { "epoch": 1.9, "learning_rate": 7.130554740533312e-09, "logits/chosen": -5.960560321807861, "logits/rejected": -5.973687648773193, "logps/chosen": -253.434326171875, "logps/rejected": -285.96917724609375, "loss": 0.2048, "rewards/accuracies": 0.9375, "rewards/chosen": 3.2134056091308594, "rewards/margins": 7.272968292236328, "rewards/rejected": -4.059562683105469, "step": 3414 }, { "epoch": 1.9, "learning_rate": 7.055077947107857e-09, "logits/chosen": -6.029378890991211, "logits/rejected": -6.013434410095215, "logps/chosen": -491.7319641113281, "logps/rejected": -258.48681640625, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": 3.186849355697632, "rewards/margins": 11.11105728149414, "rewards/rejected": -7.924208641052246, "step": 3415 }, { "epoch": 1.9, "learning_rate": 6.9799999039082955e-09, "logits/chosen": -6.0032782554626465, "logits/rejected": -6.126401901245117, "logps/chosen": -252.36219787597656, "logps/rejected": -229.35025024414062, "loss": 0.0929, "rewards/accuracies": 1.0, "rewards/chosen": 5.683971881866455, "rewards/margins": 13.84600830078125, "rewards/rejected": -8.162036895751953, "step": 3416 }, { "epoch": 1.9, "learning_rate": 6.905320671666326e-09, "logits/chosen": -5.958062171936035, "logits/rejected": -5.970751762390137, "logps/chosen": -327.1742248535156, "logps/rejected": -279.98846435546875, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": 6.545469284057617, "rewards/margins": 12.162190437316895, "rewards/rejected": -5.616721153259277, "step": 3417 }, { "epoch": 1.9, "learning_rate": 6.831040310790959e-09, "logits/chosen": -5.931173324584961, "logits/rejected": -6.010575771331787, "logps/chosen": -94.913818359375, "logps/rejected": -166.50648498535156, "loss": 0.084, "rewards/accuracies": 1.0, "rewards/chosen": 1.2221391201019287, "rewards/margins": 9.080092430114746, "rewards/rejected": -7.857952117919922, "step": 3418 }, { "epoch": 1.9, "learning_rate": 6.7571588813686855e-09, "logits/chosen": -6.009402751922607, "logits/rejected": -6.129831790924072, "logps/chosen": -330.7508850097656, "logps/rejected": -240.39352416992188, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 4.5422186851501465, "rewards/margins": 13.704368591308594, "rewards/rejected": -9.162149429321289, "step": 3419 }, { "epoch": 1.9, "learning_rate": 6.683676443163311e-09, "logits/chosen": -5.95150089263916, "logits/rejected": -5.935224533081055, "logps/chosen": -304.08758544921875, "logps/rejected": -177.75531005859375, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": 8.504707336425781, "rewards/margins": 14.082077026367188, "rewards/rejected": -5.577369689941406, "step": 3420 }, { "epoch": 1.9, "learning_rate": 6.610593055615732e-09, "logits/chosen": -6.070681571960449, "logits/rejected": -5.971473217010498, "logps/chosen": -217.86590576171875, "logps/rejected": -202.94505310058594, "loss": 0.0493, "rewards/accuracies": 0.9375, "rewards/chosen": 2.520367383956909, "rewards/margins": 10.209870338439941, "rewards/rejected": -7.6895036697387695, "step": 3421 }, { "epoch": 1.9, "learning_rate": 6.537908777844214e-09, "logits/chosen": -6.019057273864746, "logits/rejected": -6.038736343383789, "logps/chosen": -285.86065673828125, "logps/rejected": -301.797607421875, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": 4.460233688354492, "rewards/margins": 13.858405113220215, "rewards/rejected": -9.398172378540039, "step": 3422 }, { "epoch": 1.9, "learning_rate": 6.465623668644116e-09, "logits/chosen": -5.946149826049805, "logits/rejected": -6.149974822998047, "logps/chosen": -290.95751953125, "logps/rejected": -295.45611572265625, "loss": 0.0357, "rewards/accuracies": 1.0, "rewards/chosen": 4.9739837646484375, "rewards/margins": 13.041237831115723, "rewards/rejected": -8.067255020141602, "step": 3423 }, { "epoch": 1.9, "learning_rate": 6.393737786487885e-09, "logits/chosen": -6.116482734680176, "logits/rejected": -6.025895118713379, "logps/chosen": -194.30921936035156, "logps/rejected": -151.7678985595703, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": 2.329692840576172, "rewards/margins": 10.53228759765625, "rewards/rejected": -8.202594757080078, "step": 3424 }, { "epoch": 1.9, "learning_rate": 6.3222511895250075e-09, "logits/chosen": -6.01584529876709, "logits/rejected": -6.102022171020508, "logps/chosen": -273.57867431640625, "logps/rejected": -261.0615539550781, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": 5.074468612670898, "rewards/margins": 12.682829856872559, "rewards/rejected": -7.608361721038818, "step": 3425 }, { "epoch": 1.9, "learning_rate": 6.2511639355821155e-09, "logits/chosen": -6.047306060791016, "logits/rejected": -5.976645469665527, "logps/chosen": -332.04339599609375, "logps/rejected": -150.5264434814453, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 5.94491720199585, "rewards/margins": 10.462546348571777, "rewards/rejected": -4.517629623413086, "step": 3426 }, { "epoch": 1.9, "learning_rate": 6.180476082162656e-09, "logits/chosen": -5.968803405761719, "logits/rejected": -5.928895950317383, "logps/chosen": -369.5594177246094, "logps/rejected": -217.30752563476562, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": 6.0249714851379395, "rewards/margins": 13.43715763092041, "rewards/rejected": -7.4121856689453125, "step": 3427 }, { "epoch": 1.9, "learning_rate": 6.110187686446999e-09, "logits/chosen": -5.970504283905029, "logits/rejected": -5.952651023864746, "logps/chosen": -235.31283569335938, "logps/rejected": -144.39308166503906, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 7.186537742614746, "rewards/margins": 14.008245468139648, "rewards/rejected": -6.821707725524902, "step": 3428 }, { "epoch": 1.9, "learning_rate": 6.040298805292498e-09, "logits/chosen": -6.016402244567871, "logits/rejected": -5.972483158111572, "logps/chosen": -183.07615661621094, "logps/rejected": -146.63995361328125, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": 2.390387535095215, "rewards/margins": 9.71371841430664, "rewards/rejected": -7.323329925537109, "step": 3429 }, { "epoch": 1.9, "learning_rate": 5.970809495233264e-09, "logits/chosen": -6.055994987487793, "logits/rejected": -6.044909954071045, "logps/chosen": -226.16891479492188, "logps/rejected": -161.91282653808594, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": 6.224034309387207, "rewards/margins": 12.371467590332031, "rewards/rejected": -6.147432804107666, "step": 3430 }, { "epoch": 1.9, "learning_rate": 5.901719812480166e-09, "logits/chosen": -6.120797157287598, "logits/rejected": -6.092977523803711, "logps/chosen": -286.3763427734375, "logps/rejected": -219.79415893554688, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": 4.605075836181641, "rewards/margins": 12.185853004455566, "rewards/rejected": -7.580777168273926, "step": 3431 }, { "epoch": 1.91, "learning_rate": 5.833029812920831e-09, "logits/chosen": -5.987419605255127, "logits/rejected": -5.939540386199951, "logps/chosen": -309.69671630859375, "logps/rejected": -167.16134643554688, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": 5.653559684753418, "rewards/margins": 12.590211868286133, "rewards/rejected": -6.936651229858398, "step": 3432 }, { "epoch": 1.91, "learning_rate": 5.764739552119702e-09, "logits/chosen": -6.010579586029053, "logits/rejected": -6.005764484405518, "logps/chosen": -297.7249450683594, "logps/rejected": -257.0526123046875, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 3.9797868728637695, "rewards/margins": 12.711803436279297, "rewards/rejected": -8.732016563415527, "step": 3433 }, { "epoch": 1.91, "learning_rate": 5.696849085317645e-09, "logits/chosen": -6.047107696533203, "logits/rejected": -5.992990493774414, "logps/chosen": -245.83480834960938, "logps/rejected": -178.65301513671875, "loss": 0.0321, "rewards/accuracies": 0.9375, "rewards/chosen": 4.4553608894348145, "rewards/margins": 12.136004447937012, "rewards/rejected": -7.680643081665039, "step": 3434 }, { "epoch": 1.91, "learning_rate": 5.629358467432288e-09, "logits/chosen": -6.07940673828125, "logits/rejected": -6.101558685302734, "logps/chosen": -298.9095764160156, "logps/rejected": -236.42831420898438, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": 4.378322601318359, "rewards/margins": 12.076964378356934, "rewards/rejected": -7.698642730712891, "step": 3435 }, { "epoch": 1.91, "learning_rate": 5.562267753057626e-09, "logits/chosen": -5.935094833374023, "logits/rejected": -5.964250564575195, "logps/chosen": -394.81060791015625, "logps/rejected": -254.543701171875, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": 4.198535442352295, "rewards/margins": 8.955339431762695, "rewards/rejected": -4.7568039894104, "step": 3436 }, { "epoch": 1.91, "learning_rate": 5.495576996464468e-09, "logits/chosen": -6.072501182556152, "logits/rejected": -6.19529914855957, "logps/chosen": -221.45870971679688, "logps/rejected": -320.6878662109375, "loss": 0.0326, "rewards/accuracies": 1.0, "rewards/chosen": 3.7600955963134766, "rewards/margins": 13.482549667358398, "rewards/rejected": -9.722454071044922, "step": 3437 }, { "epoch": 1.91, "learning_rate": 5.429286251599885e-09, "logits/chosen": -6.083869457244873, "logits/rejected": -5.995389461517334, "logps/chosen": -297.17828369140625, "logps/rejected": -176.6085205078125, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": 4.781745910644531, "rewards/margins": 11.833959579467773, "rewards/rejected": -7.052214622497559, "step": 3438 }, { "epoch": 1.91, "learning_rate": 5.36339557208737e-09, "logits/chosen": -6.002504348754883, "logits/rejected": -5.962428569793701, "logps/chosen": -214.05722045898438, "logps/rejected": -227.2814178466797, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": 3.778137445449829, "rewards/margins": 11.5155611038208, "rewards/rejected": -7.737423419952393, "step": 3439 }, { "epoch": 1.91, "learning_rate": 5.297905011226844e-09, "logits/chosen": -6.1488237380981445, "logits/rejected": -6.166616439819336, "logps/chosen": -316.0910949707031, "logps/rejected": -204.35858154296875, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": 5.174012184143066, "rewards/margins": 11.240657806396484, "rewards/rejected": -6.066645622253418, "step": 3440 }, { "epoch": 1.91, "learning_rate": 5.232814621994597e-09, "logits/chosen": -6.029797077178955, "logits/rejected": -6.0606608390808105, "logps/chosen": -212.9543914794922, "logps/rejected": -206.71731567382812, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": 4.38971471786499, "rewards/margins": 12.318502426147461, "rewards/rejected": -7.928787708282471, "step": 3441 }, { "epoch": 1.91, "learning_rate": 5.168124457043177e-09, "logits/chosen": -5.981224536895752, "logits/rejected": -5.999009132385254, "logps/chosen": -237.3447723388672, "logps/rejected": -186.84706115722656, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": 4.3772196769714355, "rewards/margins": 11.216529846191406, "rewards/rejected": -6.839310169219971, "step": 3442 }, { "epoch": 1.91, "learning_rate": 5.103834568701338e-09, "logits/chosen": -6.050859451293945, "logits/rejected": -6.047548770904541, "logps/chosen": -274.82855224609375, "logps/rejected": -192.30775451660156, "loss": 0.0116, "rewards/accuracies": 0.9375, "rewards/chosen": 3.5238990783691406, "rewards/margins": 11.366555213928223, "rewards/rejected": -7.842656135559082, "step": 3443 }, { "epoch": 1.91, "learning_rate": 5.039945008974144e-09, "logits/chosen": -5.968369960784912, "logits/rejected": -5.978664398193359, "logps/chosen": -146.57266235351562, "logps/rejected": -168.77655029296875, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": 2.0563807487487793, "rewards/margins": 9.362213134765625, "rewards/rejected": -7.305832862854004, "step": 3444 }, { "epoch": 1.91, "learning_rate": 4.9764558295427025e-09, "logits/chosen": -5.997447967529297, "logits/rejected": -6.057222366333008, "logps/chosen": -302.02325439453125, "logps/rejected": -368.9807434082031, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": 4.207529544830322, "rewards/margins": 12.090185165405273, "rewards/rejected": -7.882656097412109, "step": 3445 }, { "epoch": 1.91, "learning_rate": 4.91336708176443e-09, "logits/chosen": -6.150251388549805, "logits/rejected": -5.931697845458984, "logps/chosen": -288.7473449707031, "logps/rejected": -195.951171875, "loss": 0.0579, "rewards/accuracies": 1.0, "rewards/chosen": 5.803863525390625, "rewards/margins": 14.058349609375, "rewards/rejected": -8.254487037658691, "step": 3446 }, { "epoch": 1.91, "learning_rate": 4.850678816672671e-09, "logits/chosen": -6.128890514373779, "logits/rejected": -6.078154563903809, "logps/chosen": -224.17755126953125, "logps/rejected": -170.26089477539062, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": 3.3546245098114014, "rewards/margins": 10.386969566345215, "rewards/rejected": -7.032344818115234, "step": 3447 }, { "epoch": 1.91, "learning_rate": 4.788391084976862e-09, "logits/chosen": -6.001502513885498, "logits/rejected": -6.223222732543945, "logps/chosen": -341.6020202636719, "logps/rejected": -244.72933959960938, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/chosen": 4.98610782623291, "rewards/margins": 12.862455368041992, "rewards/rejected": -7.876347064971924, "step": 3448 }, { "epoch": 1.91, "learning_rate": 4.72650393706242e-09, "logits/chosen": -6.054933071136475, "logits/rejected": -5.9867963790893555, "logps/chosen": -271.103271484375, "logps/rejected": -193.56375122070312, "loss": 0.1563, "rewards/accuracies": 0.9375, "rewards/chosen": 1.2540905475616455, "rewards/margins": 8.827423095703125, "rewards/rejected": -7.573331832885742, "step": 3449 }, { "epoch": 1.92, "learning_rate": 4.665017422990802e-09, "logits/chosen": -5.934591293334961, "logits/rejected": -5.9754133224487305, "logps/chosen": -317.89105224609375, "logps/rejected": -279.932373046875, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": 2.154722213745117, "rewards/margins": 11.43388557434082, "rewards/rejected": -9.279163360595703, "step": 3450 }, { "epoch": 1.92, "learning_rate": 4.6039315924992775e-09, "logits/chosen": -6.096431732177734, "logits/rejected": -6.1512131690979, "logps/chosen": -248.93228149414062, "logps/rejected": -262.0028076171875, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": 5.067176818847656, "rewards/margins": 13.485374450683594, "rewards/rejected": -8.418196678161621, "step": 3451 }, { "epoch": 1.92, "learning_rate": 4.543246495001097e-09, "logits/chosen": -5.932162284851074, "logits/rejected": -5.970460891723633, "logps/chosen": -283.4754638671875, "logps/rejected": -283.6087341308594, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": 2.2022664546966553, "rewards/margins": 10.985886573791504, "rewards/rejected": -8.78361988067627, "step": 3452 }, { "epoch": 1.92, "learning_rate": 4.48296217958527e-09, "logits/chosen": -6.122232437133789, "logits/rejected": -6.0424089431762695, "logps/chosen": -258.32525634765625, "logps/rejected": -211.71385192871094, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": 2.1373658180236816, "rewards/margins": 10.372626304626465, "rewards/rejected": -8.235260963439941, "step": 3453 }, { "epoch": 1.92, "learning_rate": 4.42307869501668e-09, "logits/chosen": -5.9869384765625, "logits/rejected": -6.065641403198242, "logps/chosen": -388.314697265625, "logps/rejected": -200.63983154296875, "loss": 0.0417, "rewards/accuracies": 1.0, "rewards/chosen": 7.096622467041016, "rewards/margins": 14.634696960449219, "rewards/rejected": -7.538073539733887, "step": 3454 }, { "epoch": 1.92, "learning_rate": 4.36359608973591e-09, "logits/chosen": -6.05165958404541, "logits/rejected": -6.033117771148682, "logps/chosen": -282.488037109375, "logps/rejected": -215.5809783935547, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": 4.921645164489746, "rewards/margins": 13.418578147888184, "rewards/rejected": -8.496932983398438, "step": 3455 }, { "epoch": 1.92, "learning_rate": 4.304514411859306e-09, "logits/chosen": -6.040031433105469, "logits/rejected": -6.000308036804199, "logps/chosen": -254.91990661621094, "logps/rejected": -135.8778076171875, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": 3.6880602836608887, "rewards/margins": 9.729394912719727, "rewards/rejected": -6.041335105895996, "step": 3456 }, { "epoch": 1.92, "learning_rate": 4.245833709178859e-09, "logits/chosen": -6.065311908721924, "logits/rejected": -6.110269546508789, "logps/chosen": -312.23870849609375, "logps/rejected": -308.11456298828125, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": 5.325702667236328, "rewards/margins": 14.022807121276855, "rewards/rejected": -8.697104454040527, "step": 3457 }, { "epoch": 1.92, "learning_rate": 4.1875540291622104e-09, "logits/chosen": -6.037755966186523, "logits/rejected": -6.058319568634033, "logps/chosen": -225.42080688476562, "logps/rejected": -220.7959747314453, "loss": 0.0471, "rewards/accuracies": 1.0, "rewards/chosen": 2.640984058380127, "rewards/margins": 12.465490341186523, "rewards/rejected": -9.824505805969238, "step": 3458 }, { "epoch": 1.92, "learning_rate": 4.1296754189527604e-09, "logits/chosen": -5.973651885986328, "logits/rejected": -5.915680408477783, "logps/chosen": -301.33514404296875, "logps/rejected": -201.20880126953125, "loss": 0.0836, "rewards/accuracies": 1.0, "rewards/chosen": 5.488066673278809, "rewards/margins": 13.230592727661133, "rewards/rejected": -7.742525100708008, "step": 3459 }, { "epoch": 1.92, "learning_rate": 4.072197925369225e-09, "logits/chosen": -5.9583001136779785, "logits/rejected": -5.964210510253906, "logps/chosen": -548.4039306640625, "logps/rejected": -308.8366394042969, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": 6.384961128234863, "rewards/margins": 13.00136947631836, "rewards/rejected": -6.616408348083496, "step": 3460 }, { "epoch": 1.92, "learning_rate": 4.015121594906024e-09, "logits/chosen": -5.985142707824707, "logits/rejected": -5.975220203399658, "logps/chosen": -441.6514892578125, "logps/rejected": -229.7705078125, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": 4.109958648681641, "rewards/margins": 10.05091667175293, "rewards/rejected": -5.940958023071289, "step": 3461 }, { "epoch": 1.92, "learning_rate": 3.958446473733001e-09, "logits/chosen": -6.040571212768555, "logits/rejected": -6.036523818969727, "logps/chosen": -293.9393310546875, "logps/rejected": -205.3480987548828, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": 6.129590034484863, "rewards/margins": 12.001909255981445, "rewards/rejected": -5.872319221496582, "step": 3462 }, { "epoch": 1.92, "learning_rate": 3.902172607695487e-09, "logits/chosen": -5.981443405151367, "logits/rejected": -5.930351734161377, "logps/chosen": -336.4249267578125, "logps/rejected": -253.22201538085938, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": 4.058808326721191, "rewards/margins": 11.354291915893555, "rewards/rejected": -7.2954840660095215, "step": 3463 }, { "epoch": 1.92, "learning_rate": 3.846300042314232e-09, "logits/chosen": -5.925289630889893, "logits/rejected": -5.915060043334961, "logps/chosen": -224.10372924804688, "logps/rejected": -138.6991424560547, "loss": 0.0802, "rewards/accuracies": 0.9375, "rewards/chosen": 5.061964511871338, "rewards/margins": 10.910636901855469, "rewards/rejected": -5.848671913146973, "step": 3464 }, { "epoch": 1.92, "learning_rate": 3.790828822785308e-09, "logits/chosen": -6.045083999633789, "logits/rejected": -5.972336292266846, "logps/chosen": -309.34771728515625, "logps/rejected": -147.08827209472656, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": 5.98162317276001, "rewards/margins": 12.428108215332031, "rewards/rejected": -6.446484565734863, "step": 3465 }, { "epoch": 1.92, "learning_rate": 3.735758993980209e-09, "logits/chosen": -5.987639427185059, "logits/rejected": -5.965971946716309, "logps/chosen": -341.61773681640625, "logps/rejected": -256.0211181640625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": 5.895012378692627, "rewards/margins": 14.650274276733398, "rewards/rejected": -8.75526237487793, "step": 3466 }, { "epoch": 1.92, "learning_rate": 3.6810906004457465e-09, "logits/chosen": -6.0813093185424805, "logits/rejected": -5.99302339553833, "logps/chosen": -245.04165649414062, "logps/rejected": -160.6816864013672, "loss": 0.0727, "rewards/accuracies": 1.0, "rewards/chosen": 3.292470693588257, "rewards/margins": 12.365970611572266, "rewards/rejected": -9.07349967956543, "step": 3467 }, { "epoch": 1.93, "learning_rate": 3.6268236864039346e-09, "logits/chosen": -6.044856071472168, "logits/rejected": -6.095248222351074, "logps/chosen": -213.394287109375, "logps/rejected": -154.17076110839844, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": 4.57177209854126, "rewards/margins": 10.213111877441406, "rewards/rejected": -5.641339302062988, "step": 3468 }, { "epoch": 1.93, "learning_rate": 3.5729582957520486e-09, "logits/chosen": -5.994097709655762, "logits/rejected": -6.0640411376953125, "logps/chosen": -281.2271728515625, "logps/rejected": -294.78863525390625, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": 3.49045729637146, "rewards/margins": 12.00459098815918, "rewards/rejected": -8.51413345336914, "step": 3469 }, { "epoch": 1.93, "learning_rate": 3.5194944720625675e-09, "logits/chosen": -6.1333909034729, "logits/rejected": -6.0885796546936035, "logps/chosen": -213.77976989746094, "logps/rejected": -287.43475341796875, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": 1.9802402257919312, "rewards/margins": 14.22973346710205, "rewards/rejected": -12.249492645263672, "step": 3470 }, { "epoch": 1.93, "learning_rate": 3.466432258583174e-09, "logits/chosen": -6.168510437011719, "logits/rejected": -6.047039985656738, "logps/chosen": -232.357666015625, "logps/rejected": -181.43276977539062, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/chosen": 5.794785499572754, "rewards/margins": 12.201013565063477, "rewards/rejected": -6.406228065490723, "step": 3471 }, { "epoch": 1.93, "learning_rate": 3.413771698236645e-09, "logits/chosen": -6.1955671310424805, "logits/rejected": -6.159633159637451, "logps/chosen": -285.417724609375, "logps/rejected": -315.93218994140625, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": 2.6952638626098633, "rewards/margins": 13.902626991271973, "rewards/rejected": -11.207364082336426, "step": 3472 }, { "epoch": 1.93, "learning_rate": 3.3615128336209053e-09, "logits/chosen": -6.016872406005859, "logits/rejected": -6.02901029586792, "logps/chosen": -305.50726318359375, "logps/rejected": -251.29672241210938, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": 5.6003828048706055, "rewards/margins": 13.44202709197998, "rewards/rejected": -7.841643333435059, "step": 3473 }, { "epoch": 1.93, "learning_rate": 3.309655707008863e-09, "logits/chosen": -6.11295747756958, "logits/rejected": -6.081579685211182, "logps/chosen": -314.6492919921875, "logps/rejected": -187.45713806152344, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 7.965195178985596, "rewards/margins": 13.256105422973633, "rewards/rejected": -5.290910720825195, "step": 3474 }, { "epoch": 1.93, "learning_rate": 3.258200360348462e-09, "logits/chosen": -5.896622180938721, "logits/rejected": -5.961899280548096, "logps/chosen": -329.94830322265625, "logps/rejected": -391.4261474609375, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": 1.914290428161621, "rewards/margins": 11.565279960632324, "rewards/rejected": -9.650989532470703, "step": 3475 }, { "epoch": 1.93, "learning_rate": 3.2071468352627416e-09, "logits/chosen": -6.158810615539551, "logits/rejected": -6.041170120239258, "logps/chosen": -231.3763427734375, "logps/rejected": -112.5336685180664, "loss": 0.0926, "rewards/accuracies": 0.9375, "rewards/chosen": 3.6508798599243164, "rewards/margins": 7.823465347290039, "rewards/rejected": -4.172585487365723, "step": 3476 }, { "epoch": 1.93, "learning_rate": 3.1564951730495004e-09, "logits/chosen": -6.147416114807129, "logits/rejected": -6.136944770812988, "logps/chosen": -209.38934326171875, "logps/rejected": -167.55763244628906, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": 3.5944488048553467, "rewards/margins": 11.804041862487793, "rewards/rejected": -8.209592819213867, "step": 3477 }, { "epoch": 1.93, "learning_rate": 3.1062454146817407e-09, "logits/chosen": -6.00371789932251, "logits/rejected": -5.992878437042236, "logps/chosen": -407.4418029785156, "logps/rejected": -228.92025756835938, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 5.5401177406311035, "rewards/margins": 14.150148391723633, "rewards/rejected": -8.610031127929688, "step": 3478 }, { "epoch": 1.93, "learning_rate": 3.0563976008071703e-09, "logits/chosen": -5.9166717529296875, "logits/rejected": -5.949739456176758, "logps/chosen": -243.4456787109375, "logps/rejected": -167.75518798828125, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": 5.596009731292725, "rewards/margins": 11.60885238647461, "rewards/rejected": -6.012842178344727, "step": 3479 }, { "epoch": 1.93, "learning_rate": 3.006951771748423e-09, "logits/chosen": -5.977985858917236, "logits/rejected": -6.04531192779541, "logps/chosen": -249.88388061523438, "logps/rejected": -184.00343322753906, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": 4.810004234313965, "rewards/margins": 11.306212425231934, "rewards/rejected": -6.496208190917969, "step": 3480 }, { "epoch": 1.93, "learning_rate": 2.9579079675028373e-09, "logits/chosen": -6.020839214324951, "logits/rejected": -6.0742292404174805, "logps/chosen": -167.726318359375, "logps/rejected": -244.30148315429688, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": 2.2541985511779785, "rewards/margins": 10.113842010498047, "rewards/rejected": -7.859643459320068, "step": 3481 }, { "epoch": 1.93, "learning_rate": 2.9092662277427906e-09, "logits/chosen": -6.137455940246582, "logits/rejected": -6.088634014129639, "logps/chosen": -311.5147705078125, "logps/rejected": -201.6629180908203, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": 7.539711952209473, "rewards/margins": 12.505352973937988, "rewards/rejected": -4.965641021728516, "step": 3482 }, { "epoch": 1.93, "learning_rate": 2.861026591815141e-09, "logits/chosen": -6.101274490356445, "logits/rejected": -6.024771690368652, "logps/chosen": -395.7692565917969, "logps/rejected": -279.47882080078125, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": 3.364032506942749, "rewards/margins": 11.432245254516602, "rewards/rejected": -8.068212509155273, "step": 3483 }, { "epoch": 1.93, "learning_rate": 2.8131890987417305e-09, "logits/chosen": -6.098984718322754, "logits/rejected": -5.964202404022217, "logps/chosen": -181.3477020263672, "logps/rejected": -159.00070190429688, "loss": 0.0443, "rewards/accuracies": 1.0, "rewards/chosen": 2.6328787803649902, "rewards/margins": 9.750799179077148, "rewards/rejected": -7.117921352386475, "step": 3484 }, { "epoch": 1.93, "learning_rate": 2.7657537872189386e-09, "logits/chosen": -6.032788276672363, "logits/rejected": -6.006532669067383, "logps/chosen": -296.8958435058594, "logps/rejected": -214.95716857910156, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": 4.878408908843994, "rewards/margins": 16.72049903869629, "rewards/rejected": -11.842090606689453, "step": 3485 }, { "epoch": 1.94, "learning_rate": 2.718720695617904e-09, "logits/chosen": -6.057178974151611, "logits/rejected": -5.979121208190918, "logps/chosen": -328.4596252441406, "logps/rejected": -238.0111083984375, "loss": 0.0712, "rewards/accuracies": 0.9375, "rewards/chosen": 4.475223541259766, "rewards/margins": 9.902961730957031, "rewards/rejected": -5.427738666534424, "step": 3486 }, { "epoch": 1.94, "learning_rate": 2.6720898619843614e-09, "logits/chosen": -5.903630256652832, "logits/rejected": -6.066414833068848, "logps/chosen": -335.0663757324219, "logps/rejected": -295.39739990234375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": 5.152307510375977, "rewards/margins": 13.977869033813477, "rewards/rejected": -8.8255615234375, "step": 3487 }, { "epoch": 1.94, "learning_rate": 2.625861324038692e-09, "logits/chosen": -6.1153483390808105, "logits/rejected": -6.036355495452881, "logps/chosen": -286.2695007324219, "logps/rejected": -164.70037841796875, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": 3.780083656311035, "rewards/margins": 10.595297813415527, "rewards/rejected": -6.815213680267334, "step": 3488 }, { "epoch": 1.94, "learning_rate": 2.5800351191757607e-09, "logits/chosen": -5.939139366149902, "logits/rejected": -5.906280517578125, "logps/chosen": -245.23609924316406, "logps/rejected": -255.23428344726562, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": 5.488326072692871, "rewards/margins": 11.060394287109375, "rewards/rejected": -5.572068214416504, "step": 3489 }, { "epoch": 1.94, "learning_rate": 2.5346112844650826e-09, "logits/chosen": -6.0147528648376465, "logits/rejected": -5.954885959625244, "logps/chosen": -251.94882202148438, "logps/rejected": -188.11016845703125, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": 4.9308366775512695, "rewards/margins": 11.364032745361328, "rewards/rejected": -6.433196067810059, "step": 3490 }, { "epoch": 1.94, "learning_rate": 2.489589856650598e-09, "logits/chosen": -5.921786308288574, "logits/rejected": -6.060260772705078, "logps/chosen": -318.4637451171875, "logps/rejected": -288.5469970703125, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": 5.764228820800781, "rewards/margins": 12.817092895507812, "rewards/rejected": -7.052863121032715, "step": 3491 }, { "epoch": 1.94, "learning_rate": 2.444970872150842e-09, "logits/chosen": -5.938498497009277, "logits/rejected": -6.029436111450195, "logps/chosen": -218.88626098632812, "logps/rejected": -278.760009765625, "loss": 0.0456, "rewards/accuracies": 1.0, "rewards/chosen": 2.501951217651367, "rewards/margins": 14.10420036315918, "rewards/rejected": -11.602249145507812, "step": 3492 }, { "epoch": 1.94, "learning_rate": 2.400754367058777e-09, "logits/chosen": -6.130139350891113, "logits/rejected": -6.080806255340576, "logps/chosen": -231.42721557617188, "logps/rejected": -244.24526977539062, "loss": 0.021, "rewards/accuracies": 0.9375, "rewards/chosen": 2.3399405479431152, "rewards/margins": 12.540225982666016, "rewards/rejected": -10.200284957885742, "step": 3493 }, { "epoch": 1.94, "learning_rate": 2.3569403771416807e-09, "logits/chosen": -6.177823543548584, "logits/rejected": -6.208609580993652, "logps/chosen": -236.44705200195312, "logps/rejected": -246.9564971923828, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": 4.692516326904297, "rewards/margins": 12.178943634033203, "rewards/rejected": -7.486428260803223, "step": 3494 }, { "epoch": 1.94, "learning_rate": 2.313528937841369e-09, "logits/chosen": -6.074655055999756, "logits/rejected": -5.939297676086426, "logps/chosen": -245.38555908203125, "logps/rejected": -131.9864959716797, "loss": 0.025, "rewards/accuracies": 0.9375, "rewards/chosen": 3.386793375015259, "rewards/margins": 9.070989608764648, "rewards/rejected": -5.684196949005127, "step": 3495 }, { "epoch": 1.94, "learning_rate": 2.2705200842740303e-09, "logits/chosen": -6.068406581878662, "logits/rejected": -6.082118034362793, "logps/chosen": -220.1829833984375, "logps/rejected": -274.6041259765625, "loss": 0.0509, "rewards/accuracies": 1.0, "rewards/chosen": 1.6616086959838867, "rewards/margins": 9.04934024810791, "rewards/rejected": -7.387731552124023, "step": 3496 }, { "epoch": 1.94, "learning_rate": 2.2279138512300564e-09, "logits/chosen": -6.078299522399902, "logits/rejected": -6.012942314147949, "logps/chosen": -300.67437744140625, "logps/rejected": -297.47418212890625, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": 3.389347791671753, "rewards/margins": 11.714865684509277, "rewards/rejected": -8.325517654418945, "step": 3497 }, { "epoch": 1.94, "learning_rate": 2.1857102731742684e-09, "logits/chosen": -6.078335762023926, "logits/rejected": -5.894580364227295, "logps/chosen": -223.31246948242188, "logps/rejected": -168.22747802734375, "loss": 0.0388, "rewards/accuracies": 0.875, "rewards/chosen": 2.2350850105285645, "rewards/margins": 11.819635391235352, "rewards/rejected": -9.584550857543945, "step": 3498 }, { "epoch": 1.94, "learning_rate": 2.1439093842457456e-09, "logits/chosen": -6.050555229187012, "logits/rejected": -6.004315376281738, "logps/chosen": -362.9958801269531, "logps/rejected": -181.855712890625, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": 4.832165718078613, "rewards/margins": 11.957684516906738, "rewards/rejected": -7.125518798828125, "step": 3499 }, { "epoch": 1.94, "learning_rate": 2.10251121825783e-09, "logits/chosen": -5.9947357177734375, "logits/rejected": -5.9619221687316895, "logps/chosen": -270.7366638183594, "logps/rejected": -174.01943969726562, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": 5.652723789215088, "rewards/margins": 12.241003036499023, "rewards/rejected": -6.5882792472839355, "step": 3500 }, { "epoch": 1.94, "learning_rate": 2.0615158086981222e-09, "logits/chosen": -6.095902442932129, "logits/rejected": -6.053652286529541, "logps/chosen": -230.7842559814453, "logps/rejected": -211.13275146484375, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 3.5434985160827637, "rewards/margins": 10.560678482055664, "rewards/rejected": -7.017180442810059, "step": 3501 }, { "epoch": 1.94, "learning_rate": 2.020923188728374e-09, "logits/chosen": -5.923033237457275, "logits/rejected": -5.985073089599609, "logps/chosen": -282.3372497558594, "logps/rejected": -300.7118835449219, "loss": 0.0458, "rewards/accuracies": 1.0, "rewards/chosen": 6.67247200012207, "rewards/margins": 15.841771125793457, "rewards/rejected": -9.169300079345703, "step": 3502 }, { "epoch": 1.94, "learning_rate": 1.980733391184486e-09, "logits/chosen": -5.924671649932861, "logits/rejected": -5.968496799468994, "logps/chosen": -185.1781005859375, "logps/rejected": -188.52642822265625, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": 5.093865394592285, "rewards/margins": 11.613109588623047, "rewards/rejected": -6.51924467086792, "step": 3503 }, { "epoch": 1.95, "learning_rate": 1.9409464485766746e-09, "logits/chosen": -6.084278583526611, "logits/rejected": -6.0434112548828125, "logps/chosen": -273.1724853515625, "logps/rejected": -207.9993896484375, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 4.025784015655518, "rewards/margins": 13.206989288330078, "rewards/rejected": -9.181203842163086, "step": 3504 }, { "epoch": 1.95, "learning_rate": 1.9015623930890844e-09, "logits/chosen": -6.144705772399902, "logits/rejected": -6.061659336090088, "logps/chosen": -330.2744445800781, "logps/rejected": -180.1812744140625, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": 4.416274070739746, "rewards/margins": 11.65681266784668, "rewards/rejected": -7.240540027618408, "step": 3505 }, { "epoch": 1.95, "learning_rate": 1.8625812565800648e-09, "logits/chosen": -6.029099464416504, "logits/rejected": -6.0183210372924805, "logps/chosen": -223.85540771484375, "logps/rejected": -236.84750366210938, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": 3.6126551628112793, "rewards/margins": 11.820542335510254, "rewards/rejected": -8.207887649536133, "step": 3506 }, { "epoch": 1.95, "learning_rate": 1.8240030705820032e-09, "logits/chosen": -6.065004348754883, "logits/rejected": -6.084295272827148, "logps/chosen": -174.91317749023438, "logps/rejected": -174.56643676757812, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": 3.1390178203582764, "rewards/margins": 11.778066635131836, "rewards/rejected": -8.639049530029297, "step": 3507 }, { "epoch": 1.95, "learning_rate": 1.7858278663013815e-09, "logits/chosen": -5.965869903564453, "logits/rejected": -6.105196952819824, "logps/chosen": -290.0714416503906, "logps/rejected": -375.49810791015625, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": 3.6339755058288574, "rewards/margins": 15.842367172241211, "rewards/rejected": -12.208391189575195, "step": 3508 }, { "epoch": 1.95, "learning_rate": 1.7480556746186647e-09, "logits/chosen": -5.965773105621338, "logits/rejected": -6.096800804138184, "logps/chosen": -240.0678253173828, "logps/rejected": -328.9329833984375, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": 2.8568789958953857, "rewards/margins": 14.156309127807617, "rewards/rejected": -11.299430847167969, "step": 3509 }, { "epoch": 1.95, "learning_rate": 1.7106865260883008e-09, "logits/chosen": -6.061649322509766, "logits/rejected": -6.022239685058594, "logps/chosen": -337.3172912597656, "logps/rejected": -226.254638671875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": 5.413673400878906, "rewards/margins": 12.326997756958008, "rewards/rejected": -6.913325309753418, "step": 3510 }, { "epoch": 1.95, "learning_rate": 1.6737204509387204e-09, "logits/chosen": -6.0975799560546875, "logits/rejected": -6.079135894775391, "logps/chosen": -290.9676818847656, "logps/rejected": -192.00393676757812, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": 3.8809971809387207, "rewards/margins": 13.206079483032227, "rewards/rejected": -9.325082778930664, "step": 3511 }, { "epoch": 1.95, "learning_rate": 1.6371574790723375e-09, "logits/chosen": -6.141584396362305, "logits/rejected": -6.0256667137146, "logps/chosen": -391.50799560546875, "logps/rejected": -164.58712768554688, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": 7.963701248168945, "rewards/margins": 12.90896224975586, "rewards/rejected": -4.9452619552612305, "step": 3512 }, { "epoch": 1.95, "learning_rate": 1.6009976400654935e-09, "logits/chosen": -6.002890586853027, "logits/rejected": -6.015721797943115, "logps/chosen": -244.33737182617188, "logps/rejected": -194.029052734375, "loss": 0.0438, "rewards/accuracies": 0.9375, "rewards/chosen": 4.851968765258789, "rewards/margins": 13.952129364013672, "rewards/rejected": -9.1001615524292, "step": 3513 }, { "epoch": 1.95, "learning_rate": 1.5652409631682906e-09, "logits/chosen": -6.025882720947266, "logits/rejected": -6.195011615753174, "logps/chosen": -222.95474243164062, "logps/rejected": -263.8756408691406, "loss": 0.0725, "rewards/accuracies": 0.9375, "rewards/chosen": 3.2877252101898193, "rewards/margins": 13.087403297424316, "rewards/rejected": -9.799677848815918, "step": 3514 }, { "epoch": 1.95, "learning_rate": 1.5298874773049808e-09, "logits/chosen": -6.00015926361084, "logits/rejected": -6.03995418548584, "logps/chosen": -293.0057678222656, "logps/rejected": -333.94171142578125, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": 5.007261276245117, "rewards/margins": 11.738612174987793, "rewards/rejected": -6.731350898742676, "step": 3515 }, { "epoch": 1.95, "learning_rate": 1.494937211073355e-09, "logits/chosen": -6.098292350769043, "logits/rejected": -6.065463066101074, "logps/chosen": -206.76644897460938, "logps/rejected": -228.3196258544922, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": 1.229130744934082, "rewards/margins": 11.461238861083984, "rewards/rejected": -10.232107162475586, "step": 3516 }, { "epoch": 1.95, "learning_rate": 1.4603901927452977e-09, "logits/chosen": -5.996281623840332, "logits/rejected": -5.956751346588135, "logps/chosen": -453.9504089355469, "logps/rejected": -177.47470092773438, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 6.1926374435424805, "rewards/margins": 11.415544509887695, "rewards/rejected": -5.222907066345215, "step": 3517 }, { "epoch": 1.95, "learning_rate": 1.4262464502663441e-09, "logits/chosen": -6.018889427185059, "logits/rejected": -6.010988235473633, "logps/chosen": -281.2879943847656, "logps/rejected": -163.62667846679688, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": 3.213575839996338, "rewards/margins": 13.193306922912598, "rewards/rejected": -9.979731559753418, "step": 3518 }, { "epoch": 1.95, "learning_rate": 1.3925060112557896e-09, "logits/chosen": -6.175334453582764, "logits/rejected": -6.013047695159912, "logps/chosen": -238.21292114257812, "logps/rejected": -150.87234497070312, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": 2.101940631866455, "rewards/margins": 10.631124496459961, "rewards/rejected": -8.529183387756348, "step": 3519 }, { "epoch": 1.95, "learning_rate": 1.3591689030068576e-09, "logits/chosen": -6.0203022956848145, "logits/rejected": -6.08599853515625, "logps/chosen": -279.2099609375, "logps/rejected": -240.13641357421875, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": 5.461568355560303, "rewards/margins": 15.36393928527832, "rewards/rejected": -9.90237045288086, "step": 3520 }, { "epoch": 1.95, "learning_rate": 1.3262351524864212e-09, "logits/chosen": -6.077332019805908, "logits/rejected": -6.019732475280762, "logps/chosen": -341.2027893066406, "logps/rejected": -218.31919860839844, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": 4.754921913146973, "rewards/margins": 13.189979553222656, "rewards/rejected": -8.43505859375, "step": 3521 }, { "epoch": 1.96, "learning_rate": 1.2937047863350036e-09, "logits/chosen": -5.971331596374512, "logits/rejected": -5.98518705368042, "logps/chosen": -300.9031677246094, "logps/rejected": -230.57769775390625, "loss": 0.0676, "rewards/accuracies": 0.875, "rewards/chosen": 3.274529218673706, "rewards/margins": 6.615817070007324, "rewards/rejected": -3.34128737449646, "step": 3522 }, { "epoch": 1.96, "learning_rate": 1.261577830866889e-09, "logits/chosen": -6.143459320068359, "logits/rejected": -5.99881649017334, "logps/chosen": -274.5684814453125, "logps/rejected": -172.63955688476562, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": 4.502195358276367, "rewards/margins": 14.012771606445312, "rewards/rejected": -9.510576248168945, "step": 3523 }, { "epoch": 1.96, "learning_rate": 1.229854312070011e-09, "logits/chosen": -5.960719108581543, "logits/rejected": -5.966241836547852, "logps/chosen": -180.95315551757812, "logps/rejected": -215.15249633789062, "loss": 0.0278, "rewards/accuracies": 0.9375, "rewards/chosen": 2.2788805961608887, "rewards/margins": 8.181004524230957, "rewards/rejected": -5.902123928070068, "step": 3524 }, { "epoch": 1.96, "learning_rate": 1.1985342556060651e-09, "logits/chosen": -6.097691535949707, "logits/rejected": -6.130370140075684, "logps/chosen": -354.7518005371094, "logps/rejected": -296.8546447753906, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": 3.364924907684326, "rewards/margins": 11.6600341796875, "rewards/rejected": -8.295108795166016, "step": 3525 }, { "epoch": 1.96, "learning_rate": 1.1676176868102294e-09, "logits/chosen": -6.017766952514648, "logits/rejected": -6.008459091186523, "logps/chosen": -256.9984130859375, "logps/rejected": -326.2433166503906, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": 2.149268388748169, "rewards/margins": 12.468599319458008, "rewards/rejected": -10.319332122802734, "step": 3526 }, { "epoch": 1.96, "learning_rate": 1.1371046306914435e-09, "logits/chosen": -6.1534037590026855, "logits/rejected": -5.991032600402832, "logps/chosen": -296.4430847167969, "logps/rejected": -218.27902221679688, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": 4.880176544189453, "rewards/margins": 13.277362823486328, "rewards/rejected": -8.397186279296875, "step": 3527 }, { "epoch": 1.96, "learning_rate": 1.1069951119320186e-09, "logits/chosen": -5.970335960388184, "logits/rejected": -6.004726886749268, "logps/chosen": -304.30047607421875, "logps/rejected": -247.5570526123047, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": 5.8889479637146, "rewards/margins": 11.799724578857422, "rewards/rejected": -5.910776615142822, "step": 3528 }, { "epoch": 1.96, "learning_rate": 1.077289154888028e-09, "logits/chosen": -6.018195152282715, "logits/rejected": -5.981030464172363, "logps/chosen": -258.4404296875, "logps/rejected": -396.00531005859375, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": 3.2187867164611816, "rewards/margins": 15.267446517944336, "rewards/rejected": -12.04865837097168, "step": 3529 }, { "epoch": 1.96, "learning_rate": 1.0479867835891387e-09, "logits/chosen": -6.020017623901367, "logits/rejected": -5.977746486663818, "logps/chosen": -207.3167724609375, "logps/rejected": -125.7259292602539, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": 4.807233810424805, "rewards/margins": 11.543191909790039, "rewards/rejected": -6.735957622528076, "step": 3530 }, { "epoch": 1.96, "learning_rate": 1.0190880217383346e-09, "logits/chosen": -6.013931751251221, "logits/rejected": -6.0273237228393555, "logps/chosen": -370.1236572265625, "logps/rejected": -214.60623168945312, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": 5.098402500152588, "rewards/margins": 10.96194076538086, "rewards/rejected": -5.863537788391113, "step": 3531 }, { "epoch": 1.96, "learning_rate": 9.905928927123608e-10, "logits/chosen": -6.013705253601074, "logits/rejected": -6.09624719619751, "logps/chosen": -283.7756042480469, "logps/rejected": -235.82223510742188, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": 6.211178779602051, "rewards/margins": 12.279295921325684, "rewards/rejected": -6.068118095397949, "step": 3532 }, { "epoch": 1.96, "learning_rate": 9.625014195612235e-10, "logits/chosen": -5.974954605102539, "logits/rejected": -6.096587181091309, "logps/chosen": -226.7860107421875, "logps/rejected": -363.6392517089844, "loss": 0.0288, "rewards/accuracies": 0.9375, "rewards/chosen": 0.7873508930206299, "rewards/margins": 11.993432998657227, "rewards/rejected": -11.20608139038086, "step": 3533 }, { "epoch": 1.96, "learning_rate": 9.348136250085792e-10, "logits/chosen": -6.054863929748535, "logits/rejected": -6.065037727355957, "logps/chosen": -226.00262451171875, "logps/rejected": -187.54794311523438, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/chosen": 5.150884628295898, "rewards/margins": 10.991024017333984, "rewards/rejected": -5.840140342712402, "step": 3534 }, { "epoch": 1.96, "learning_rate": 9.075295314514563e-10, "logits/chosen": -6.061322212219238, "logits/rejected": -5.944295406341553, "logps/chosen": -474.1943054199219, "logps/rejected": -401.8000793457031, "loss": 0.042, "rewards/accuracies": 1.0, "rewards/chosen": 4.989993095397949, "rewards/margins": 13.336891174316406, "rewards/rejected": -8.346898078918457, "step": 3535 }, { "epoch": 1.96, "learning_rate": 8.806491609603673e-10, "logits/chosen": -5.965478897094727, "logits/rejected": -5.9891676902771, "logps/chosen": -256.3458251953125, "logps/rejected": -199.5908203125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": 2.694380283355713, "rewards/margins": 9.441679000854492, "rewards/rejected": -6.7472991943359375, "step": 3536 }, { "epoch": 1.96, "learning_rate": 8.541725352791962e-10, "logits/chosen": -6.065642833709717, "logits/rejected": -6.003116607666016, "logps/chosen": -186.24749755859375, "logps/rejected": -288.3200988769531, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": 0.5785998106002808, "rewards/margins": 12.663833618164062, "rewards/rejected": -12.085233688354492, "step": 3537 }, { "epoch": 1.96, "learning_rate": 8.280996758253111e-10, "logits/chosen": -6.111099720001221, "logits/rejected": -6.1074628829956055, "logps/chosen": -259.43115234375, "logps/rejected": -262.5167236328125, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": 6.72438383102417, "rewards/margins": 15.326423645019531, "rewards/rejected": -8.602041244506836, "step": 3538 }, { "epoch": 1.96, "learning_rate": 8.024306036893968e-10, "logits/chosen": -5.961050510406494, "logits/rejected": -6.103092193603516, "logps/chosen": -239.19862365722656, "logps/rejected": -313.06591796875, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": 3.0553317070007324, "rewards/margins": 11.274025917053223, "rewards/rejected": -8.218692779541016, "step": 3539 }, { "epoch": 1.97, "learning_rate": 7.77165339635566e-10, "logits/chosen": -5.9418792724609375, "logits/rejected": -5.979804039001465, "logps/chosen": -261.86279296875, "logps/rejected": -279.3342590332031, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/chosen": 5.917567729949951, "rewards/margins": 13.58758544921875, "rewards/rejected": -7.670018196105957, "step": 3540 }, { "epoch": 1.97, "learning_rate": 7.52303904101248e-10, "logits/chosen": -5.964398384094238, "logits/rejected": -6.05479097366333, "logps/chosen": -236.07855224609375, "logps/rejected": -272.08221435546875, "loss": 0.0532, "rewards/accuracies": 1.0, "rewards/chosen": 3.128342628479004, "rewards/margins": 14.435422897338867, "rewards/rejected": -11.307080268859863, "step": 3541 }, { "epoch": 1.97, "learning_rate": 7.278463171971338e-10, "logits/chosen": -5.9821014404296875, "logits/rejected": -6.018859386444092, "logps/chosen": -246.02197265625, "logps/rejected": -232.47021484375, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": 3.3519856929779053, "rewards/margins": 10.036417007446289, "rewards/rejected": -6.684431552886963, "step": 3542 }, { "epoch": 1.97, "learning_rate": 7.037925987074534e-10, "logits/chosen": -5.979703903198242, "logits/rejected": -6.012094020843506, "logps/chosen": -332.6609802246094, "logps/rejected": -201.7213592529297, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": 7.685070991516113, "rewards/margins": 12.020378112792969, "rewards/rejected": -4.335307598114014, "step": 3543 }, { "epoch": 1.97, "learning_rate": 6.801427680894755e-10, "logits/chosen": -6.048886299133301, "logits/rejected": -6.163972854614258, "logps/chosen": -266.3352966308594, "logps/rejected": -220.5289306640625, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": 4.033107757568359, "rewards/margins": 9.091632843017578, "rewards/rejected": -5.058524131774902, "step": 3544 }, { "epoch": 1.97, "learning_rate": 6.568968444739531e-10, "logits/chosen": -6.044373512268066, "logits/rejected": -6.023413181304932, "logps/chosen": -291.1968078613281, "logps/rejected": -157.10362243652344, "loss": 0.0496, "rewards/accuracies": 1.0, "rewards/chosen": 6.121502876281738, "rewards/margins": 11.666852951049805, "rewards/rejected": -5.545351028442383, "step": 3545 }, { "epoch": 1.97, "learning_rate": 6.340548466648444e-10, "logits/chosen": -6.104365348815918, "logits/rejected": -6.064380168914795, "logps/chosen": -284.7043762207031, "logps/rejected": -249.49061584472656, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": 4.290594100952148, "rewards/margins": 9.287200927734375, "rewards/rejected": -4.996606826782227, "step": 3546 }, { "epoch": 1.97, "learning_rate": 6.11616793139369e-10, "logits/chosen": -6.033968925476074, "logits/rejected": -6.093561172485352, "logps/chosen": -177.39662170410156, "logps/rejected": -203.09690856933594, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": 1.540842056274414, "rewards/margins": 11.38966178894043, "rewards/rejected": -9.848819732666016, "step": 3547 }, { "epoch": 1.97, "learning_rate": 5.895827020479527e-10, "logits/chosen": -6.141101360321045, "logits/rejected": -5.9428391456604, "logps/chosen": -331.26129150390625, "logps/rejected": -233.45904541015625, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": 5.529767990112305, "rewards/margins": 17.408493041992188, "rewards/rejected": -11.878725051879883, "step": 3548 }, { "epoch": 1.97, "learning_rate": 5.679525912143934e-10, "logits/chosen": -5.9795026779174805, "logits/rejected": -6.066957950592041, "logps/chosen": -291.28765869140625, "logps/rejected": -298.4857177734375, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": 4.868340492248535, "rewards/margins": 13.356557846069336, "rewards/rejected": -8.488216400146484, "step": 3549 }, { "epoch": 1.97, "learning_rate": 5.46726478135473e-10, "logits/chosen": -5.921519756317139, "logits/rejected": -6.067866802215576, "logps/chosen": -201.34982299804688, "logps/rejected": -208.77882385253906, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": 2.897801399230957, "rewards/margins": 11.770488739013672, "rewards/rejected": -8.872688293457031, "step": 3550 }, { "epoch": 1.97, "learning_rate": 5.259043799813456e-10, "logits/chosen": -6.025554180145264, "logits/rejected": -5.89502477645874, "logps/chosen": -270.8896484375, "logps/rejected": -102.4012451171875, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": 4.2507548332214355, "rewards/margins": 10.731829643249512, "rewards/rejected": -6.481074810028076, "step": 3551 }, { "epoch": 1.97, "learning_rate": 5.054863135953158e-10, "logits/chosen": -6.016340255737305, "logits/rejected": -5.981693744659424, "logps/chosen": -196.25021362304688, "logps/rejected": -131.8714599609375, "loss": 0.0481, "rewards/accuracies": 1.0, "rewards/chosen": 4.825373649597168, "rewards/margins": 11.468606948852539, "rewards/rejected": -6.643232822418213, "step": 3552 }, { "epoch": 1.97, "learning_rate": 4.854722954938384e-10, "logits/chosen": -6.072154998779297, "logits/rejected": -5.955625534057617, "logps/chosen": -385.5458984375, "logps/rejected": -165.6158447265625, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": 8.062511444091797, "rewards/margins": 15.004518508911133, "rewards/rejected": -6.9420061111450195, "step": 3553 }, { "epoch": 1.97, "learning_rate": 4.658623418665742e-10, "logits/chosen": -6.032590866088867, "logits/rejected": -5.863121032714844, "logps/chosen": -188.739013671875, "logps/rejected": -93.50192260742188, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": 2.5712485313415527, "rewards/margins": 10.424799919128418, "rewards/rejected": -7.853551387786865, "step": 3554 }, { "epoch": 1.97, "learning_rate": 4.466564685762786e-10, "logits/chosen": -6.098807334899902, "logits/rejected": -5.913786888122559, "logps/chosen": -284.5642395019531, "logps/rejected": -123.04862213134766, "loss": 0.045, "rewards/accuracies": 0.9375, "rewards/chosen": 5.31949520111084, "rewards/margins": 12.266613960266113, "rewards/rejected": -6.947118759155273, "step": 3555 }, { "epoch": 1.97, "learning_rate": 4.278546911588021e-10, "logits/chosen": -5.944956302642822, "logits/rejected": -5.932396411895752, "logps/chosen": -463.0714111328125, "logps/rejected": -268.2464904785156, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": 6.198742389678955, "rewards/margins": 11.458788871765137, "rewards/rejected": -5.260046005249023, "step": 3556 }, { "epoch": 1.97, "learning_rate": 4.094570248232565e-10, "logits/chosen": -6.14539909362793, "logits/rejected": -6.078002452850342, "logps/chosen": -341.1443786621094, "logps/rejected": -337.1946716308594, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 7.835241317749023, "rewards/margins": 16.827346801757812, "rewards/rejected": -8.992105484008789, "step": 3557 }, { "epoch": 1.98, "learning_rate": 3.9146348445173727e-10, "logits/chosen": -6.024198532104492, "logits/rejected": -6.099930763244629, "logps/chosen": -291.9211120605469, "logps/rejected": -223.857421875, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 5.070093154907227, "rewards/margins": 12.949602127075195, "rewards/rejected": -7.8795084953308105, "step": 3558 }, { "epoch": 1.98, "learning_rate": 3.7387408459949034e-10, "logits/chosen": -6.001679420471191, "logits/rejected": -5.916510581970215, "logps/chosen": -185.71258544921875, "logps/rejected": -176.8043975830078, "loss": 0.0644, "rewards/accuracies": 1.0, "rewards/chosen": 2.207879066467285, "rewards/margins": 9.019938468933105, "rewards/rejected": -6.81205940246582, "step": 3559 }, { "epoch": 1.98, "learning_rate": 3.566888394948009e-10, "logits/chosen": -5.944965362548828, "logits/rejected": -5.905450820922852, "logps/chosen": -251.40396118164062, "logps/rejected": -177.07540893554688, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 5.030368328094482, "rewards/margins": 12.884276390075684, "rewards/rejected": -7.853908538818359, "step": 3560 }, { "epoch": 1.98, "learning_rate": 3.399077630391045e-10, "logits/chosen": -6.005918502807617, "logits/rejected": -6.078629016876221, "logps/chosen": -225.1879425048828, "logps/rejected": -278.2947692871094, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": 3.16744065284729, "rewards/margins": 13.249370574951172, "rewards/rejected": -10.081929206848145, "step": 3561 }, { "epoch": 1.98, "learning_rate": 3.2353086880682057e-10, "logits/chosen": -5.942612648010254, "logits/rejected": -5.927872180938721, "logps/chosen": -455.047119140625, "logps/rejected": -286.54669189453125, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": 3.808717727661133, "rewards/margins": 13.28800106048584, "rewards/rejected": -9.479283332824707, "step": 3562 }, { "epoch": 1.98, "learning_rate": 3.0755817004546323e-10, "logits/chosen": -5.933013916015625, "logits/rejected": -6.153873443603516, "logps/chosen": -221.43234252929688, "logps/rejected": -285.06060791015625, "loss": 0.0322, "rewards/accuracies": 0.9375, "rewards/chosen": 3.049053430557251, "rewards/margins": 11.084407806396484, "rewards/rejected": -8.035354614257812, "step": 3563 }, { "epoch": 1.98, "learning_rate": 2.919896796755861e-10, "logits/chosen": -5.875999927520752, "logits/rejected": -5.9128289222717285, "logps/chosen": -300.5440673828125, "logps/rejected": -315.0715026855469, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": 5.409344673156738, "rewards/margins": 11.833295822143555, "rewards/rejected": -6.423951148986816, "step": 3564 }, { "epoch": 1.98, "learning_rate": 2.768254102907819e-10, "logits/chosen": -5.9515910148620605, "logits/rejected": -5.951528072357178, "logps/chosen": -258.88946533203125, "logps/rejected": -179.6787872314453, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": 3.9421679973602295, "rewards/margins": 10.925610542297363, "rewards/rejected": -6.983443260192871, "step": 3565 }, { "epoch": 1.98, "learning_rate": 2.620653741576273e-10, "logits/chosen": -5.960559844970703, "logits/rejected": -5.915632247924805, "logps/chosen": -219.43606567382812, "logps/rejected": -180.13905334472656, "loss": 0.0324, "rewards/accuracies": 0.875, "rewards/chosen": 4.297087669372559, "rewards/margins": 12.500844955444336, "rewards/rejected": -8.203756332397461, "step": 3566 }, { "epoch": 1.98, "learning_rate": 2.477095832156828e-10, "logits/chosen": -5.8150410652160645, "logits/rejected": -5.950998783111572, "logps/chosen": -262.57293701171875, "logps/rejected": -184.63650512695312, "loss": 0.04, "rewards/accuracies": 0.9375, "rewards/chosen": 5.278433322906494, "rewards/margins": 11.590286254882812, "rewards/rejected": -6.311853408813477, "step": 3567 }, { "epoch": 1.98, "learning_rate": 2.3375804907765916e-10, "logits/chosen": -6.006584167480469, "logits/rejected": -5.920169830322266, "logps/chosen": -334.343505859375, "logps/rejected": -179.03456115722656, "loss": 0.0708, "rewards/accuracies": 1.0, "rewards/chosen": 6.5241827964782715, "rewards/margins": 12.733475685119629, "rewards/rejected": -6.209292888641357, "step": 3568 }, { "epoch": 1.98, "learning_rate": 2.2021078302902895e-10, "logits/chosen": -5.859490394592285, "logits/rejected": -5.886247158050537, "logps/chosen": -493.0328674316406, "logps/rejected": -250.95970153808594, "loss": 0.0278, "rewards/accuracies": 0.9375, "rewards/chosen": 9.58972454071045, "rewards/margins": 16.014036178588867, "rewards/rejected": -6.424311637878418, "step": 3569 }, { "epoch": 1.98, "learning_rate": 2.0706779602847058e-10, "logits/chosen": -5.857638359069824, "logits/rejected": -5.925533771514893, "logps/chosen": -285.5166015625, "logps/rejected": -264.1162414550781, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": 4.214808464050293, "rewards/margins": 12.631929397583008, "rewards/rejected": -8.417121887207031, "step": 3570 }, { "epoch": 1.98, "learning_rate": 1.9432909870747972e-10, "logits/chosen": -6.038965702056885, "logits/rejected": -6.185628414154053, "logps/chosen": -295.1720275878906, "logps/rejected": -330.3191833496094, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 4.0694756507873535, "rewards/margins": 14.840751647949219, "rewards/rejected": -10.771276473999023, "step": 3571 }, { "epoch": 1.98, "learning_rate": 1.8199470137059137e-10, "logits/chosen": -6.043792247772217, "logits/rejected": -6.051785945892334, "logps/chosen": -226.74754333496094, "logps/rejected": -328.1220397949219, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": 2.806793451309204, "rewards/margins": 12.770744323730469, "rewards/rejected": -9.96395206451416, "step": 3572 }, { "epoch": 1.98, "learning_rate": 1.7006461399532435e-10, "logits/chosen": -5.926208019256592, "logits/rejected": -6.0798563957214355, "logps/chosen": -202.87771606445312, "logps/rejected": -305.42718505859375, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": 3.2797608375549316, "rewards/margins": 13.107767105102539, "rewards/rejected": -9.82800579071045, "step": 3573 }, { "epoch": 1.98, "learning_rate": 1.5853884623195922e-10, "logits/chosen": -5.957151412963867, "logits/rejected": -5.868594169616699, "logps/chosen": -261.05120849609375, "logps/rejected": -126.42974090576172, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": 4.615716934204102, "rewards/margins": 8.849286079406738, "rewards/rejected": -4.233569145202637, "step": 3574 }, { "epoch": 1.98, "learning_rate": 1.4741740740398245e-10, "logits/chosen": -5.919910430908203, "logits/rejected": -6.013812065124512, "logps/chosen": -497.9259033203125, "logps/rejected": -385.66632080078125, "loss": 0.0625, "rewards/accuracies": 1.0, "rewards/chosen": 2.1273746490478516, "rewards/margins": 11.899396896362305, "rewards/rejected": -9.772022247314453, "step": 3575 }, { "epoch": 1.99, "learning_rate": 1.367003065076422e-10, "logits/chosen": -6.010521411895752, "logits/rejected": -6.024466514587402, "logps/chosen": -302.58935546875, "logps/rejected": -222.10702514648438, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": 3.273439407348633, "rewards/margins": 11.649482727050781, "rewards/rejected": -8.376043319702148, "step": 3576 }, { "epoch": 1.99, "learning_rate": 1.2638755221217046e-10, "logits/chosen": -6.053005218505859, "logits/rejected": -6.161489963531494, "logps/chosen": -292.9205627441406, "logps/rejected": -296.7928466796875, "loss": 0.0292, "rewards/accuracies": 1.0, "rewards/chosen": 4.930507659912109, "rewards/margins": 12.800521850585938, "rewards/rejected": -7.870014667510986, "step": 3577 }, { "epoch": 1.99, "learning_rate": 1.1647915285967203e-10, "logits/chosen": -5.9797210693359375, "logits/rejected": -6.030831336975098, "logps/chosen": -304.6807861328125, "logps/rejected": -272.3958435058594, "loss": 0.0665, "rewards/accuracies": 1.0, "rewards/chosen": 4.688244819641113, "rewards/margins": 11.486315727233887, "rewards/rejected": -6.798071384429932, "step": 3578 }, { "epoch": 1.99, "learning_rate": 1.069751164652355e-10, "logits/chosen": -5.926929473876953, "logits/rejected": -5.9039130210876465, "logps/chosen": -385.7015686035156, "logps/rejected": -169.0665283203125, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": 5.2543182373046875, "rewards/margins": 12.910487174987793, "rewards/rejected": -7.656169891357422, "step": 3579 }, { "epoch": 1.99, "learning_rate": 9.787545071676672e-11, "logits/chosen": -5.9811015129089355, "logits/rejected": -5.960714817047119, "logps/chosen": -292.6483459472656, "logps/rejected": -299.02142333984375, "loss": 0.0145, "rewards/accuracies": 0.9375, "rewards/chosen": 3.5577504634857178, "rewards/margins": 14.374612808227539, "rewards/rejected": -10.816862106323242, "step": 3580 }, { "epoch": 1.99, "learning_rate": 8.91801629751554e-11, "logits/chosen": -5.910850524902344, "logits/rejected": -5.939173221588135, "logps/chosen": -252.8834228515625, "logps/rejected": -356.4087219238281, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": 2.714782953262329, "rewards/margins": 14.637834548950195, "rewards/rejected": -11.923051834106445, "step": 3581 }, { "epoch": 1.99, "learning_rate": 8.088926027410848e-11, "logits/chosen": -6.079899787902832, "logits/rejected": -5.939654350280762, "logps/chosen": -276.74163818359375, "logps/rejected": -253.41952514648438, "loss": 0.2503, "rewards/accuracies": 1.0, "rewards/chosen": 2.8998613357543945, "rewards/margins": 14.276829719543457, "rewards/rejected": -11.376968383789062, "step": 3582 }, { "epoch": 1.99, "learning_rate": 7.30027493203167e-11, "logits/chosen": -6.014939308166504, "logits/rejected": -6.040170669555664, "logps/chosen": -287.20263671875, "logps/rejected": -248.0879364013672, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": 4.344178199768066, "rewards/margins": 12.239800453186035, "rewards/rejected": -7.895623207092285, "step": 3583 }, { "epoch": 1.99, "learning_rate": 6.552063649323259e-11, "logits/chosen": -6.066287517547607, "logits/rejected": -6.028398513793945, "logps/chosen": -339.67620849609375, "logps/rejected": -136.8622589111328, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": 5.825006484985352, "rewards/margins": 11.648284912109375, "rewards/rejected": -5.823278427124023, "step": 3584 }, { "epoch": 1.99, "learning_rate": 5.844292784523697e-11, "logits/chosen": -6.013233184814453, "logits/rejected": -6.036991596221924, "logps/chosen": -255.2646484375, "logps/rejected": -339.7177429199219, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": 2.5493838787078857, "rewards/margins": 14.59773063659668, "rewards/rejected": -12.048345565795898, "step": 3585 }, { "epoch": 1.99, "learning_rate": 5.1769629101638957e-11, "logits/chosen": -6.078779220581055, "logits/rejected": -6.0568718910217285, "logps/chosen": -247.25201416015625, "logps/rejected": -186.01931762695312, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": 4.638570785522461, "rewards/margins": 12.223349571228027, "rewards/rejected": -7.584777355194092, "step": 3586 }, { "epoch": 1.99, "learning_rate": 4.5500745660509433e-11, "logits/chosen": -5.964955806732178, "logits/rejected": -6.212942600250244, "logps/chosen": -209.87884521484375, "logps/rejected": -315.80303955078125, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": 1.0460031032562256, "rewards/margins": 11.44771671295166, "rewards/rejected": -10.401712417602539, "step": 3587 }, { "epoch": 1.99, "learning_rate": 3.963628259290308e-11, "logits/chosen": -6.029909133911133, "logits/rejected": -5.993049621582031, "logps/chosen": -251.8082733154297, "logps/rejected": -178.31658935546875, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": 7.006300926208496, "rewards/margins": 14.256328582763672, "rewards/rejected": -7.250027656555176, "step": 3588 }, { "epoch": 1.99, "learning_rate": 3.4176244642636355e-11, "logits/chosen": -6.156838893890381, "logits/rejected": -5.949793338775635, "logps/chosen": -317.69659423828125, "logps/rejected": -190.65170288085938, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": 3.4959967136383057, "rewards/margins": 11.754593849182129, "rewards/rejected": -8.258596420288086, "step": 3589 }, { "epoch": 1.99, "learning_rate": 2.9120636226398485e-11, "logits/chosen": -5.970013618469238, "logits/rejected": -6.165875434875488, "logps/chosen": -382.0306091308594, "logps/rejected": -492.98126220703125, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": 3.689065456390381, "rewards/margins": 15.17812728881836, "rewards/rejected": -11.48906135559082, "step": 3590 }, { "epoch": 1.99, "learning_rate": 2.4469461433751505e-11, "logits/chosen": -5.950429916381836, "logits/rejected": -6.03140926361084, "logps/chosen": -242.78414916992188, "logps/rejected": -265.2997131347656, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": 2.1320526599884033, "rewards/margins": 12.140387535095215, "rewards/rejected": -10.00833511352539, "step": 3591 }, { "epoch": 1.99, "learning_rate": 2.02227240270747e-11, "logits/chosen": -5.979353427886963, "logits/rejected": -6.089395999908447, "logps/chosen": -173.2784423828125, "logps/rejected": -260.06658935546875, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/chosen": 2.2980871200561523, "rewards/margins": 10.852725982666016, "rewards/rejected": -8.554638862609863, "step": 3592 }, { "epoch": 1.99, "learning_rate": 1.638042744173118e-11, "logits/chosen": -5.933542728424072, "logits/rejected": -5.951957702636719, "logps/chosen": -207.35975646972656, "logps/rejected": -256.01556396484375, "loss": 0.0933, "rewards/accuracies": 1.0, "rewards/chosen": 0.8803331851959229, "rewards/margins": 9.559795379638672, "rewards/rejected": -8.679462432861328, "step": 3593 }, { "epoch": 2.0, "learning_rate": 1.2942574785623772e-11, "logits/chosen": -6.109168529510498, "logits/rejected": -5.842073440551758, "logps/chosen": -294.6489562988281, "logps/rejected": -154.08721923828125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 4.781671047210693, "rewards/margins": 13.5001802444458, "rewards/rejected": -8.718509674072266, "step": 3594 }, { "epoch": 2.0, "learning_rate": 9.909168839861148e-12, "logits/chosen": -6.030223846435547, "logits/rejected": -5.959259033203125, "logps/chosen": -276.584228515625, "logps/rejected": -163.35198974609375, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": 6.291718482971191, "rewards/margins": 14.05348014831543, "rewards/rejected": -7.761762619018555, "step": 3595 }, { "epoch": 2.0, "learning_rate": 7.280212058091706e-12, "logits/chosen": -6.169898509979248, "logits/rejected": -6.025489807128906, "logps/chosen": -150.7490997314453, "logps/rejected": -160.59158325195312, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": 2.1605167388916016, "rewards/margins": 10.656816482543945, "rewards/rejected": -8.496299743652344, "step": 3596 }, { "epoch": 2.0, "learning_rate": 5.0557065669476485e-12, "logits/chosen": -5.937626361846924, "logits/rejected": -5.9661946296691895, "logps/chosen": -361.18780517578125, "logps/rejected": -303.49151611328125, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": 4.806102752685547, "rewards/margins": 12.339515686035156, "rewards/rejected": -7.533413410186768, "step": 3597 }, { "epoch": 2.0, "learning_rate": 3.2356541658784543e-12, "logits/chosen": -6.000273704528809, "logits/rejected": -5.978695869445801, "logps/chosen": -294.3773193359375, "logps/rejected": -239.13780212402344, "loss": 0.0151, "rewards/accuracies": 0.9375, "rewards/chosen": 6.876840591430664, "rewards/margins": 13.89879035949707, "rewards/rejected": -7.021950721740723, "step": 3598 }, { "epoch": 2.0, "learning_rate": 1.820056327150876e-12, "logits/chosen": -6.032283306121826, "logits/rejected": -6.127254962921143, "logps/chosen": -304.83489990234375, "logps/rejected": -320.72418212890625, "loss": 0.0139, "rewards/accuracies": 0.9375, "rewards/chosen": 5.130227088928223, "rewards/margins": 14.576658248901367, "rewards/rejected": -9.446431159973145, "step": 3599 }, { "epoch": 2.0, "learning_rate": 8.089141957934309e-13, "logits/chosen": -5.969815731048584, "logits/rejected": -6.009111404418945, "logps/chosen": -158.8939208984375, "logps/rejected": -226.98440551757812, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": 1.0355467796325684, "rewards/margins": 11.597301483154297, "rewards/rejected": -10.56175422668457, "step": 3600 }, { "epoch": 2.0, "learning_rate": 2.0222858987395396e-13, "logits/chosen": -5.966636657714844, "logits/rejected": -5.977855682373047, "logps/chosen": -265.6329345703125, "logps/rejected": -401.414794921875, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": 5.694124221801758, "rewards/margins": 15.441702842712402, "rewards/rejected": -9.747578620910645, "step": 3601 }, { "epoch": 2.0, "learning_rate": 0.0, "logits/chosen": -5.990757465362549, "logits/rejected": -5.996262550354004, "logps/chosen": -276.2472839355469, "logps/rejected": -175.681396484375, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": 4.243003845214844, "rewards/margins": 10.156950950622559, "rewards/rejected": -5.913947582244873, "step": 3602 } ], "logging_steps": 1.0, "max_steps": 3602, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 3.240754998656631e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }