{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9630296090299362, "eval_steps": 500, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.000000000000001e-07, "logits/chosen": -2.586496591567993, "logits/rejected": -2.58866286277771, "logps/chosen": -140.71868896484375, "logps/rejected": -141.9235382080078, "loss": 0.6941, "rewards/accuracies": 0.4375, "rewards/chosen": -0.005378031637519598, "rewards/margins": -0.0017773156287148595, "rewards/rejected": -0.0036007165908813477, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.0000000000000002e-06, "logits/chosen": -2.7250607013702393, "logits/rejected": -2.737359046936035, "logps/chosen": -239.8214111328125, "logps/rejected": -245.04498291015625, "loss": 0.69, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0041117193177342415, "rewards/margins": 0.006983352825045586, "rewards/rejected": -0.002871633041650057, "step": 2 }, { "epoch": 0.0, "learning_rate": 1.5e-06, "logits/chosen": -2.407137393951416, "logits/rejected": -2.3473005294799805, "logps/chosen": -184.03599548339844, "logps/rejected": -155.21461486816406, "loss": 0.6886, "rewards/accuracies": 0.4375, "rewards/chosen": 0.003687595948576927, "rewards/margins": 0.009552741423249245, "rewards/rejected": -0.0058651454746723175, "step": 3 }, { "epoch": 0.01, "learning_rate": 2.0000000000000003e-06, "logits/chosen": -2.7127842903137207, "logits/rejected": -2.777282953262329, "logps/chosen": -171.6352081298828, "logps/rejected": -191.40785217285156, "loss": 0.7021, "rewards/accuracies": 0.3125, "rewards/chosen": -0.004523229319602251, "rewards/margins": -0.01729598268866539, "rewards/rejected": 0.012772750109434128, "step": 4 }, { "epoch": 0.01, "learning_rate": 2.5e-06, "logits/chosen": -2.4465034008026123, "logits/rejected": -2.466231107711792, "logps/chosen": -254.792724609375, "logps/rejected": -245.39108276367188, "loss": 0.6988, "rewards/accuracies": 0.375, "rewards/chosen": -0.004072976298630238, "rewards/margins": -0.010413647629320621, "rewards/rejected": 0.006340669468045235, "step": 5 }, { "epoch": 0.01, "learning_rate": 3e-06, "logits/chosen": -2.2196829319000244, "logits/rejected": -2.2641539573669434, "logps/chosen": -179.39495849609375, "logps/rejected": -220.39300537109375, "loss": 0.6891, "rewards/accuracies": 0.5625, "rewards/chosen": 0.01630725897848606, "rewards/margins": 0.008536052890121937, "rewards/rejected": 0.00777120515704155, "step": 6 }, { "epoch": 0.01, "learning_rate": 3.5000000000000004e-06, "logits/chosen": -2.3294830322265625, "logits/rejected": -2.380467653274536, "logps/chosen": -194.7043914794922, "logps/rejected": -199.31422424316406, "loss": 0.6919, "rewards/accuracies": 0.5625, "rewards/chosen": 0.005030441097915173, "rewards/margins": 0.0030234563164412975, "rewards/rejected": 0.0020069843158125877, "step": 7 }, { "epoch": 0.01, "learning_rate": 4.000000000000001e-06, "logits/chosen": -2.590843677520752, "logits/rejected": -2.5107312202453613, "logps/chosen": -215.23556518554688, "logps/rejected": -189.38424682617188, "loss": 0.6995, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0025288108736276627, "rewards/margins": -0.012505888007581234, "rewards/rejected": 0.009977078065276146, "step": 8 }, { "epoch": 0.01, "learning_rate": 4.5e-06, "logits/chosen": -2.659663200378418, "logits/rejected": -2.6844048500061035, "logps/chosen": -229.0231170654297, "logps/rejected": -205.54054260253906, "loss": 0.7032, "rewards/accuracies": 0.4375, "rewards/chosen": -0.014649391174316406, "rewards/margins": -0.019562961533665657, "rewards/rejected": 0.0049135684967041016, "step": 9 }, { "epoch": 0.01, "learning_rate": 5e-06, "logits/chosen": -2.6927154064178467, "logits/rejected": -2.6597423553466797, "logps/chosen": -207.30467224121094, "logps/rejected": -197.54656982421875, "loss": 0.692, "rewards/accuracies": 0.625, "rewards/chosen": -0.0019803522154688835, "rewards/margins": 0.002554560313001275, "rewards/rejected": -0.004534911829978228, "step": 10 }, { "epoch": 0.01, "learning_rate": 5.500000000000001e-06, "logits/chosen": -2.6474952697753906, "logits/rejected": -2.6725170612335205, "logps/chosen": -231.35108947753906, "logps/rejected": -274.9834289550781, "loss": 0.6924, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0055945878848433495, "rewards/margins": 0.0016048436518758535, "rewards/rejected": 0.003989744000136852, "step": 11 }, { "epoch": 0.02, "learning_rate": 6e-06, "logits/chosen": -2.2198939323425293, "logits/rejected": -2.2628731727600098, "logps/chosen": -198.29022216796875, "logps/rejected": -170.82257080078125, "loss": 0.688, "rewards/accuracies": 0.625, "rewards/chosen": -0.0031775482930243015, "rewards/margins": 0.010572671890258789, "rewards/rejected": -0.013750219717621803, "step": 12 }, { "epoch": 0.02, "learning_rate": 6.5000000000000004e-06, "logits/chosen": -2.7613415718078613, "logits/rejected": -2.8206562995910645, "logps/chosen": -239.75253295898438, "logps/rejected": -238.97320556640625, "loss": 0.6999, "rewards/accuracies": 0.375, "rewards/chosen": -0.0029205563478171825, "rewards/margins": -0.013042164966464043, "rewards/rejected": 0.010121609084308147, "step": 13 }, { "epoch": 0.02, "learning_rate": 7.000000000000001e-06, "logits/chosen": -2.0942578315734863, "logits/rejected": -2.017871856689453, "logps/chosen": -184.2721710205078, "logps/rejected": -175.73641967773438, "loss": 0.6872, "rewards/accuracies": 0.6875, "rewards/chosen": -0.004038786515593529, "rewards/margins": 0.01193075068295002, "rewards/rejected": -0.015969539061188698, "step": 14 }, { "epoch": 0.02, "learning_rate": 7.5e-06, "logits/chosen": -2.526791572570801, "logits/rejected": -2.5704824924468994, "logps/chosen": -200.25115966796875, "logps/rejected": -223.4153289794922, "loss": 0.6917, "rewards/accuracies": 0.4375, "rewards/chosen": 0.00700113782659173, "rewards/margins": 0.003126000752672553, "rewards/rejected": 0.003875136375427246, "step": 15 }, { "epoch": 0.02, "learning_rate": 8.000000000000001e-06, "logits/chosen": -2.5145041942596436, "logits/rejected": -2.478262424468994, "logps/chosen": -169.4720001220703, "logps/rejected": -179.3622283935547, "loss": 0.687, "rewards/accuracies": 0.4375, "rewards/chosen": -0.00018756365170702338, "rewards/margins": 0.012972594238817692, "rewards/rejected": -0.013160157017409801, "step": 16 }, { "epoch": 0.02, "learning_rate": 8.500000000000002e-06, "logits/chosen": -2.1660895347595215, "logits/rejected": -2.1799678802490234, "logps/chosen": -158.1959686279297, "logps/rejected": -225.02365112304688, "loss": 0.687, "rewards/accuracies": 0.6875, "rewards/chosen": 0.01399233564734459, "rewards/margins": 0.01266777515411377, "rewards/rejected": 0.001324558281339705, "step": 17 }, { "epoch": 0.02, "learning_rate": 9e-06, "logits/chosen": -2.417539596557617, "logits/rejected": -2.4626646041870117, "logps/chosen": -170.4958038330078, "logps/rejected": -178.34300231933594, "loss": 0.6877, "rewards/accuracies": 0.6875, "rewards/chosen": 0.0049651628360152245, "rewards/margins": 0.011217641644179821, "rewards/rejected": -0.006252479739487171, "step": 18 }, { "epoch": 0.02, "learning_rate": 9.5e-06, "logits/chosen": -2.6470260620117188, "logits/rejected": -2.7213802337646484, "logps/chosen": -179.53497314453125, "logps/rejected": -200.94956970214844, "loss": 0.69, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0027004480361938477, "rewards/margins": 0.006518864538520575, "rewards/rejected": -0.00921931304037571, "step": 19 }, { "epoch": 0.03, "learning_rate": 1e-05, "logits/chosen": -2.4962029457092285, "logits/rejected": -2.5313973426818848, "logps/chosen": -208.752685546875, "logps/rejected": -244.1136474609375, "loss": 0.683, "rewards/accuracies": 0.625, "rewards/chosen": 0.009996438398957253, "rewards/margins": 0.020868420600891113, "rewards/rejected": -0.01087198406457901, "step": 20 }, { "epoch": 0.03, "learning_rate": 1.05e-05, "logits/chosen": -2.531207799911499, "logits/rejected": -2.5359435081481934, "logps/chosen": -194.47694396972656, "logps/rejected": -185.80142211914062, "loss": 0.6895, "rewards/accuracies": 0.5625, "rewards/chosen": 0.002445125486701727, "rewards/margins": 0.0077151767909526825, "rewards/rejected": -0.005270051304250956, "step": 21 }, { "epoch": 0.03, "learning_rate": 1.1000000000000001e-05, "logits/chosen": -2.430760622024536, "logits/rejected": -2.395613431930542, "logps/chosen": -144.2230987548828, "logps/rejected": -148.8902587890625, "loss": 0.6942, "rewards/accuracies": 0.4375, "rewards/chosen": -0.00968785211443901, "rewards/margins": -0.001990032149478793, "rewards/rejected": -0.007697821129113436, "step": 22 }, { "epoch": 0.03, "learning_rate": 1.1500000000000002e-05, "logits/chosen": -2.4417223930358887, "logits/rejected": -2.441425323486328, "logps/chosen": -165.82875061035156, "logps/rejected": -178.82815551757812, "loss": 0.6886, "rewards/accuracies": 0.5, "rewards/chosen": 0.0060547590255737305, "rewards/margins": 0.009464193135499954, "rewards/rejected": -0.0034094336442649364, "step": 23 }, { "epoch": 0.03, "learning_rate": 1.2e-05, "logits/chosen": -2.3494763374328613, "logits/rejected": -2.40555477142334, "logps/chosen": -133.24130249023438, "logps/rejected": -151.85446166992188, "loss": 0.6986, "rewards/accuracies": 0.25, "rewards/chosen": -0.011995697394013405, "rewards/margins": -0.010638857260346413, "rewards/rejected": -0.0013568403664976358, "step": 24 }, { "epoch": 0.03, "learning_rate": 1.25e-05, "logits/chosen": -2.3450560569763184, "logits/rejected": -2.5124807357788086, "logps/chosen": -242.19821166992188, "logps/rejected": -259.1798095703125, "loss": 0.6912, "rewards/accuracies": 0.4375, "rewards/chosen": -0.013580609112977982, "rewards/margins": 0.004313135519623756, "rewards/rejected": -0.017893744632601738, "step": 25 }, { "epoch": 0.03, "learning_rate": 1.3000000000000001e-05, "logits/chosen": -2.5465288162231445, "logits/rejected": -2.5980730056762695, "logps/chosen": -202.898193359375, "logps/rejected": -248.03968811035156, "loss": 0.6854, "rewards/accuracies": 0.6875, "rewards/chosen": 0.0018544672057032585, "rewards/margins": 0.016238166019320488, "rewards/rejected": -0.01438369695097208, "step": 26 }, { "epoch": 0.04, "learning_rate": 1.3500000000000001e-05, "logits/chosen": -2.733663320541382, "logits/rejected": -2.712355375289917, "logps/chosen": -205.80429077148438, "logps/rejected": -174.95797729492188, "loss": 0.6933, "rewards/accuracies": 0.4375, "rewards/chosen": -8.790497668087482e-05, "rewards/margins": 2.1266518160700798e-05, "rewards/rejected": -0.0001091718440875411, "step": 27 }, { "epoch": 0.04, "learning_rate": 1.4000000000000001e-05, "logits/chosen": -2.550997257232666, "logits/rejected": -2.4390013217926025, "logps/chosen": -171.59786987304688, "logps/rejected": -176.4530029296875, "loss": 0.6931, "rewards/accuracies": 0.5, "rewards/chosen": -0.02987699769437313, "rewards/margins": 0.0004779808223247528, "rewards/rejected": -0.030354974791407585, "step": 28 }, { "epoch": 0.04, "learning_rate": 1.45e-05, "logits/chosen": -2.4879837036132812, "logits/rejected": -2.480923652648926, "logps/chosen": -149.10079956054688, "logps/rejected": -162.999267578125, "loss": 0.6834, "rewards/accuracies": 0.6875, "rewards/chosen": -0.005112767685204744, "rewards/margins": 0.01993861421942711, "rewards/rejected": -0.025051379576325417, "step": 29 }, { "epoch": 0.04, "learning_rate": 1.5e-05, "logits/chosen": -2.3427562713623047, "logits/rejected": -2.5123889446258545, "logps/chosen": -160.62950134277344, "logps/rejected": -201.14747619628906, "loss": 0.6952, "rewards/accuracies": 0.5625, "rewards/chosen": -0.015586448833346367, "rewards/margins": -0.003298282390460372, "rewards/rejected": -0.012288165278732777, "step": 30 }, { "epoch": 0.04, "learning_rate": 1.55e-05, "logits/chosen": -2.2548415660858154, "logits/rejected": -2.3827996253967285, "logps/chosen": -126.6205825805664, "logps/rejected": -145.7274627685547, "loss": 0.6883, "rewards/accuracies": 0.625, "rewards/chosen": -0.02384941652417183, "rewards/margins": 0.010512137785553932, "rewards/rejected": -0.03436155617237091, "step": 31 }, { "epoch": 0.04, "learning_rate": 1.6000000000000003e-05, "logits/chosen": -2.5090818405151367, "logits/rejected": -2.560624122619629, "logps/chosen": -182.53944396972656, "logps/rejected": -208.08763122558594, "loss": 0.6877, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0014843230601400137, "rewards/margins": 0.011496281251311302, "rewards/rejected": -0.01298060454428196, "step": 32 }, { "epoch": 0.04, "learning_rate": 1.65e-05, "logits/chosen": -2.800107479095459, "logits/rejected": -2.7355594635009766, "logps/chosen": -207.99392700195312, "logps/rejected": -208.31610107421875, "loss": 0.6927, "rewards/accuracies": 0.375, "rewards/chosen": -0.014949416741728783, "rewards/margins": 0.0012244456447660923, "rewards/rejected": -0.016173863783478737, "step": 33 }, { "epoch": 0.04, "learning_rate": 1.7000000000000003e-05, "logits/chosen": -2.6325690746307373, "logits/rejected": -2.630256175994873, "logps/chosen": -196.8790283203125, "logps/rejected": -202.53794860839844, "loss": 0.7116, "rewards/accuracies": 0.25, "rewards/chosen": -0.041500113904476166, "rewards/margins": -0.03608906269073486, "rewards/rejected": -0.005411052145063877, "step": 34 }, { "epoch": 0.05, "learning_rate": 1.75e-05, "logits/chosen": -2.476334571838379, "logits/rejected": -2.473987579345703, "logps/chosen": -256.1812438964844, "logps/rejected": -250.76165771484375, "loss": 0.6882, "rewards/accuracies": 0.75, "rewards/chosen": -0.01104288175702095, "rewards/margins": 0.010350894182920456, "rewards/rejected": -0.021393775939941406, "step": 35 }, { "epoch": 0.05, "learning_rate": 1.8e-05, "logits/chosen": -2.342329502105713, "logits/rejected": -2.3805429935455322, "logps/chosen": -164.03692626953125, "logps/rejected": -217.39849853515625, "loss": 0.688, "rewards/accuracies": 0.5625, "rewards/chosen": -0.009070659056305885, "rewards/margins": 0.010929775424301624, "rewards/rejected": -0.020000435411930084, "step": 36 }, { "epoch": 0.05, "learning_rate": 1.85e-05, "logits/chosen": -2.4253318309783936, "logits/rejected": -2.4550042152404785, "logps/chosen": -146.70420837402344, "logps/rejected": -129.80674743652344, "loss": 0.6845, "rewards/accuracies": 0.5625, "rewards/chosen": -0.031095195561647415, "rewards/margins": 0.017679547891020775, "rewards/rejected": -0.04877474159002304, "step": 37 }, { "epoch": 0.05, "learning_rate": 1.9e-05, "logits/chosen": -2.4936635494232178, "logits/rejected": -2.5476386547088623, "logps/chosen": -164.6866455078125, "logps/rejected": -198.52139282226562, "loss": 0.6895, "rewards/accuracies": 0.6875, "rewards/chosen": -0.025130080059170723, "rewards/margins": 0.007643342949450016, "rewards/rejected": -0.03277342766523361, "step": 38 }, { "epoch": 0.05, "learning_rate": 1.9500000000000003e-05, "logits/chosen": -2.4614553451538086, "logits/rejected": -2.4693663120269775, "logps/chosen": -172.24966430664062, "logps/rejected": -166.52056884765625, "loss": 0.6975, "rewards/accuracies": 0.5, "rewards/chosen": -0.027715325355529785, "rewards/margins": -0.00841212272644043, "rewards/rejected": -0.019303202629089355, "step": 39 }, { "epoch": 0.05, "learning_rate": 2e-05, "logits/chosen": -2.3024024963378906, "logits/rejected": -2.2906301021575928, "logps/chosen": -243.98683166503906, "logps/rejected": -199.12408447265625, "loss": 0.6979, "rewards/accuracies": 0.4375, "rewards/chosen": -0.028235863894224167, "rewards/margins": -0.00847182422876358, "rewards/rejected": -0.019764041528105736, "step": 40 }, { "epoch": 0.05, "learning_rate": 2.05e-05, "logits/chosen": -2.660562753677368, "logits/rejected": -2.6754074096679688, "logps/chosen": -156.67514038085938, "logps/rejected": -147.5068359375, "loss": 0.6887, "rewards/accuracies": 0.625, "rewards/chosen": -0.03220677375793457, "rewards/margins": 0.009226083755493164, "rewards/rejected": -0.041432857513427734, "step": 41 }, { "epoch": 0.05, "learning_rate": 2.1e-05, "logits/chosen": -2.482787847518921, "logits/rejected": -2.4663233757019043, "logps/chosen": -207.6859588623047, "logps/rejected": -182.35931396484375, "loss": 0.6875, "rewards/accuracies": 0.375, "rewards/chosen": -0.014715791679918766, "rewards/margins": 0.012153576128184795, "rewards/rejected": -0.02686937153339386, "step": 42 }, { "epoch": 0.06, "learning_rate": 2.15e-05, "logits/chosen": -2.6544148921966553, "logits/rejected": -2.717411756515503, "logps/chosen": -197.96400451660156, "logps/rejected": -223.0219268798828, "loss": 0.691, "rewards/accuracies": 0.625, "rewards/chosen": -0.024796580895781517, "rewards/margins": 0.005184031091630459, "rewards/rejected": -0.0299806110560894, "step": 43 }, { "epoch": 0.06, "learning_rate": 2.2000000000000003e-05, "logits/chosen": -2.561187744140625, "logits/rejected": -2.6224913597106934, "logps/chosen": -194.60935974121094, "logps/rejected": -194.01577758789062, "loss": 0.6971, "rewards/accuracies": 0.4375, "rewards/chosen": -0.020246006548404694, "rewards/margins": -0.0076716188341379166, "rewards/rejected": -0.012574386782944202, "step": 44 }, { "epoch": 0.06, "learning_rate": 2.25e-05, "logits/chosen": -2.5130224227905273, "logits/rejected": -2.672990083694458, "logps/chosen": -154.0092010498047, "logps/rejected": -190.70748901367188, "loss": 0.6818, "rewards/accuracies": 0.75, "rewards/chosen": -0.018519926816225052, "rewards/margins": 0.023535681888461113, "rewards/rejected": -0.042055610567331314, "step": 45 }, { "epoch": 0.06, "learning_rate": 2.3000000000000003e-05, "logits/chosen": -2.738673686981201, "logits/rejected": -2.82279896736145, "logps/chosen": -174.6641387939453, "logps/rejected": -163.0157928466797, "loss": 0.6967, "rewards/accuracies": 0.4375, "rewards/chosen": -0.014652942307293415, "rewards/margins": -0.006221937946975231, "rewards/rejected": -0.008431006222963333, "step": 46 }, { "epoch": 0.06, "learning_rate": 2.35e-05, "logits/chosen": -2.581393241882324, "logits/rejected": -2.5734691619873047, "logps/chosen": -137.8702392578125, "logps/rejected": -154.74896240234375, "loss": 0.6887, "rewards/accuracies": 0.5, "rewards/chosen": -0.007426738273352385, "rewards/margins": 0.009324884042143822, "rewards/rejected": -0.016751624643802643, "step": 47 }, { "epoch": 0.06, "learning_rate": 2.4e-05, "logits/chosen": -2.544344425201416, "logits/rejected": -2.690079689025879, "logps/chosen": -185.8907012939453, "logps/rejected": -195.9602508544922, "loss": 0.6898, "rewards/accuracies": 0.5, "rewards/chosen": -0.02690129354596138, "rewards/margins": 0.007246016524732113, "rewards/rejected": -0.03414731100201607, "step": 48 }, { "epoch": 0.06, "learning_rate": 2.45e-05, "logits/chosen": -2.583885669708252, "logits/rejected": -2.609614849090576, "logps/chosen": -215.93264770507812, "logps/rejected": -234.49636840820312, "loss": 0.6858, "rewards/accuracies": 0.5, "rewards/chosen": -0.022774625569581985, "rewards/margins": 0.015723586082458496, "rewards/rejected": -0.03849821165204048, "step": 49 }, { "epoch": 0.07, "learning_rate": 2.5e-05, "logits/chosen": -2.6080195903778076, "logits/rejected": -2.655003070831299, "logps/chosen": -229.5977020263672, "logps/rejected": -211.47779846191406, "loss": 0.6925, "rewards/accuracies": 0.5625, "rewards/chosen": -0.026349734514951706, "rewards/margins": 0.0026414887979626656, "rewards/rejected": -0.028991222381591797, "step": 50 }, { "epoch": 0.07, "learning_rate": 2.5500000000000003e-05, "logits/chosen": -2.180418014526367, "logits/rejected": -2.210296869277954, "logps/chosen": -221.1846466064453, "logps/rejected": -185.85655212402344, "loss": 0.6891, "rewards/accuracies": 0.5, "rewards/chosen": -0.013596129603683949, "rewards/margins": 0.009130668826401234, "rewards/rejected": -0.022726796567440033, "step": 51 }, { "epoch": 0.07, "learning_rate": 2.6000000000000002e-05, "logits/chosen": -2.647798538208008, "logits/rejected": -2.715623617172241, "logps/chosen": -186.01138305664062, "logps/rejected": -180.89259338378906, "loss": 0.6785, "rewards/accuracies": 0.6875, "rewards/chosen": 0.019299650564789772, "rewards/margins": 0.030588869005441666, "rewards/rejected": -0.011289214715361595, "step": 52 }, { "epoch": 0.07, "learning_rate": 2.6500000000000004e-05, "logits/chosen": -2.570209503173828, "logits/rejected": -2.5728917121887207, "logps/chosen": -209.2962188720703, "logps/rejected": -216.60145568847656, "loss": 0.6824, "rewards/accuracies": 0.5625, "rewards/chosen": -0.005204535089433193, "rewards/margins": 0.021862652152776718, "rewards/rejected": -0.027067184448242188, "step": 53 }, { "epoch": 0.07, "learning_rate": 2.7000000000000002e-05, "logits/chosen": -2.672147512435913, "logits/rejected": -2.678018808364868, "logps/chosen": -167.4713134765625, "logps/rejected": -170.08120727539062, "loss": 0.691, "rewards/accuracies": 0.5, "rewards/chosen": -0.013661455363035202, "rewards/margins": 0.004836535546928644, "rewards/rejected": -0.018497992306947708, "step": 54 }, { "epoch": 0.07, "learning_rate": 2.7500000000000004e-05, "logits/chosen": -2.4589505195617676, "logits/rejected": -2.462191343307495, "logps/chosen": -200.73976135253906, "logps/rejected": -195.30276489257812, "loss": 0.704, "rewards/accuracies": 0.375, "rewards/chosen": -0.03748317062854767, "rewards/margins": -0.020142268389463425, "rewards/rejected": -0.017340898513793945, "step": 55 }, { "epoch": 0.07, "learning_rate": 2.8000000000000003e-05, "logits/chosen": -2.230989933013916, "logits/rejected": -2.2140636444091797, "logps/chosen": -143.33631896972656, "logps/rejected": -136.0796661376953, "loss": 0.6989, "rewards/accuracies": 0.25, "rewards/chosen": -0.020144915208220482, "rewards/margins": -0.01022801361978054, "rewards/rejected": -0.009916901588439941, "step": 56 }, { "epoch": 0.07, "learning_rate": 2.8499999999999998e-05, "logits/chosen": -2.5017194747924805, "logits/rejected": -2.58662486076355, "logps/chosen": -167.50990295410156, "logps/rejected": -208.83607482910156, "loss": 0.6808, "rewards/accuracies": 0.625, "rewards/chosen": -0.0027861359994858503, "rewards/margins": 0.02604994922876358, "rewards/rejected": -0.02883608266711235, "step": 57 }, { "epoch": 0.08, "learning_rate": 2.9e-05, "logits/chosen": -2.53066349029541, "logits/rejected": -2.608471393585205, "logps/chosen": -241.85113525390625, "logps/rejected": -260.0408630371094, "loss": 0.6939, "rewards/accuracies": 0.5, "rewards/chosen": -0.024016117677092552, "rewards/margins": -0.0005100721027702093, "rewards/rejected": -0.023506049066781998, "step": 58 }, { "epoch": 0.08, "learning_rate": 2.95e-05, "logits/chosen": -2.2705140113830566, "logits/rejected": -2.4387779235839844, "logps/chosen": -179.22540283203125, "logps/rejected": -182.5544891357422, "loss": 0.6775, "rewards/accuracies": 0.5625, "rewards/chosen": 0.00418963422998786, "rewards/margins": 0.033549048006534576, "rewards/rejected": -0.02935941144824028, "step": 59 }, { "epoch": 0.08, "learning_rate": 3e-05, "logits/chosen": -2.46372652053833, "logits/rejected": -2.513279438018799, "logps/chosen": -230.43133544921875, "logps/rejected": -227.0118865966797, "loss": 0.6761, "rewards/accuracies": 0.625, "rewards/chosen": -0.010919665917754173, "rewards/margins": 0.03570995107293129, "rewards/rejected": -0.04662961885333061, "step": 60 }, { "epoch": 0.08, "learning_rate": 3.05e-05, "logits/chosen": -2.530576467514038, "logits/rejected": -2.5415897369384766, "logps/chosen": -200.46568298339844, "logps/rejected": -220.82989501953125, "loss": 0.6794, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0384901762008667, "rewards/margins": 0.03156058490276337, "rewards/rejected": -0.07005076855421066, "step": 61 }, { "epoch": 0.08, "learning_rate": 3.1e-05, "logits/chosen": -2.5888161659240723, "logits/rejected": -2.639291286468506, "logps/chosen": -132.55116271972656, "logps/rejected": -167.082275390625, "loss": 0.7042, "rewards/accuracies": 0.4375, "rewards/chosen": -0.03156990930438042, "rewards/margins": -0.018804360181093216, "rewards/rejected": -0.0127655528485775, "step": 62 }, { "epoch": 0.08, "learning_rate": 3.15e-05, "logits/chosen": -2.46744704246521, "logits/rejected": -2.494837522506714, "logps/chosen": -210.9451141357422, "logps/rejected": -211.72433471679688, "loss": 0.6799, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0017502065747976303, "rewards/margins": 0.027423406019806862, "rewards/rejected": -0.02567320130765438, "step": 63 }, { "epoch": 0.08, "learning_rate": 3.2000000000000005e-05, "logits/chosen": -2.4783239364624023, "logits/rejected": -2.490576982498169, "logps/chosen": -197.71629333496094, "logps/rejected": -207.23048400878906, "loss": 0.6864, "rewards/accuracies": 0.625, "rewards/chosen": -0.01412363164126873, "rewards/margins": 0.01561451144516468, "rewards/rejected": -0.029738139361143112, "step": 64 }, { "epoch": 0.09, "learning_rate": 3.2500000000000004e-05, "logits/chosen": -2.3918569087982178, "logits/rejected": -2.4426636695861816, "logps/chosen": -164.30029296875, "logps/rejected": -201.85992431640625, "loss": 0.6908, "rewards/accuracies": 0.3125, "rewards/chosen": -0.025302361696958542, "rewards/margins": 0.009253643453121185, "rewards/rejected": -0.034556008875370026, "step": 65 }, { "epoch": 0.09, "learning_rate": 3.3e-05, "logits/chosen": -2.4934239387512207, "logits/rejected": -2.5188210010528564, "logps/chosen": -178.12030029296875, "logps/rejected": -211.23947143554688, "loss": 0.6908, "rewards/accuracies": 0.4375, "rewards/chosen": -0.05651288107037544, "rewards/margins": 0.0065690516494214535, "rewards/rejected": -0.06308193504810333, "step": 66 }, { "epoch": 0.09, "learning_rate": 3.35e-05, "logits/chosen": -2.415985345840454, "logits/rejected": -2.3687984943389893, "logps/chosen": -211.91453552246094, "logps/rejected": -216.65591430664062, "loss": 0.6808, "rewards/accuracies": 0.5625, "rewards/chosen": -0.045519113540649414, "rewards/margins": 0.028419354930520058, "rewards/rejected": -0.07393846660852432, "step": 67 }, { "epoch": 0.09, "learning_rate": 3.4000000000000007e-05, "logits/chosen": -2.37481427192688, "logits/rejected": -2.4693892002105713, "logps/chosen": -172.92459106445312, "logps/rejected": -209.3857421875, "loss": 0.6801, "rewards/accuracies": 0.6875, "rewards/chosen": -0.03707296401262283, "rewards/margins": 0.027949143201112747, "rewards/rejected": -0.06502211093902588, "step": 68 }, { "epoch": 0.09, "learning_rate": 3.45e-05, "logits/chosen": -2.3615283966064453, "logits/rejected": -2.3259239196777344, "logps/chosen": -154.25360107421875, "logps/rejected": -161.130126953125, "loss": 0.7022, "rewards/accuracies": 0.3125, "rewards/chosen": -0.04533124342560768, "rewards/margins": -0.016323519870638847, "rewards/rejected": -0.029007721692323685, "step": 69 }, { "epoch": 0.09, "learning_rate": 3.5e-05, "logits/chosen": -2.64551043510437, "logits/rejected": -2.7289645671844482, "logps/chosen": -200.1268768310547, "logps/rejected": -225.6541748046875, "loss": 0.6993, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0578455924987793, "rewards/margins": -0.008899472653865814, "rewards/rejected": -0.04894612357020378, "step": 70 }, { "epoch": 0.09, "learning_rate": 3.55e-05, "logits/chosen": -2.703721046447754, "logits/rejected": -2.669755220413208, "logps/chosen": -208.99668884277344, "logps/rejected": -164.8292694091797, "loss": 0.6756, "rewards/accuracies": 0.6875, "rewards/chosen": -0.022734597325325012, "rewards/margins": 0.03882308304309845, "rewards/rejected": -0.06155767664313316, "step": 71 }, { "epoch": 0.09, "learning_rate": 3.6e-05, "logits/chosen": -2.6480765342712402, "logits/rejected": -2.696770191192627, "logps/chosen": -172.99888610839844, "logps/rejected": -201.09979248046875, "loss": 0.7063, "rewards/accuracies": 0.375, "rewards/chosen": -0.053855180740356445, "rewards/margins": -0.024987507611513138, "rewards/rejected": -0.028867674991488457, "step": 72 }, { "epoch": 0.1, "learning_rate": 3.65e-05, "logits/chosen": -2.6311445236206055, "logits/rejected": -2.6575632095336914, "logps/chosen": -251.72789001464844, "logps/rejected": -271.0470886230469, "loss": 0.6867, "rewards/accuracies": 0.625, "rewards/chosen": -0.03289387375116348, "rewards/margins": 0.014702942222356796, "rewards/rejected": -0.04759681224822998, "step": 73 }, { "epoch": 0.1, "learning_rate": 3.7e-05, "logits/chosen": -2.3990683555603027, "logits/rejected": -2.502671480178833, "logps/chosen": -197.92665100097656, "logps/rejected": -211.00967407226562, "loss": 0.6841, "rewards/accuracies": 0.5, "rewards/chosen": -0.014272330328822136, "rewards/margins": 0.020740672945976257, "rewards/rejected": -0.03501300886273384, "step": 74 }, { "epoch": 0.1, "learning_rate": 3.7500000000000003e-05, "logits/chosen": -2.575502872467041, "logits/rejected": -2.527902364730835, "logps/chosen": -214.5215301513672, "logps/rejected": -201.05194091796875, "loss": 0.6651, "rewards/accuracies": 0.75, "rewards/chosen": -0.006246686447411776, "rewards/margins": 0.059338975697755814, "rewards/rejected": -0.065585657954216, "step": 75 }, { "epoch": 0.1, "learning_rate": 3.8e-05, "logits/chosen": -2.217294692993164, "logits/rejected": -2.223574638366699, "logps/chosen": -174.7819366455078, "logps/rejected": -178.09852600097656, "loss": 0.7013, "rewards/accuracies": 0.5, "rewards/chosen": -0.09115451574325562, "rewards/margins": -0.012327454052865505, "rewards/rejected": -0.07882705330848694, "step": 76 }, { "epoch": 0.1, "learning_rate": 3.85e-05, "logits/chosen": -2.518965721130371, "logits/rejected": -2.443446159362793, "logps/chosen": -201.1251220703125, "logps/rejected": -170.15533447265625, "loss": 0.7029, "rewards/accuracies": 0.5, "rewards/chosen": -0.04162323474884033, "rewards/margins": -0.015459035523235798, "rewards/rejected": -0.02616419643163681, "step": 77 }, { "epoch": 0.1, "learning_rate": 3.9000000000000006e-05, "logits/chosen": -2.521008253097534, "logits/rejected": -2.6866934299468994, "logps/chosen": -269.85284423828125, "logps/rejected": -240.82626342773438, "loss": 0.6928, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05473289266228676, "rewards/margins": 0.00423135980963707, "rewards/rejected": -0.05896425247192383, "step": 78 }, { "epoch": 0.1, "learning_rate": 3.9500000000000005e-05, "logits/chosen": -2.3015501499176025, "logits/rejected": -2.351773500442505, "logps/chosen": -225.17691040039062, "logps/rejected": -249.8846435546875, "loss": 0.6873, "rewards/accuracies": 0.4375, "rewards/chosen": -0.06839534640312195, "rewards/margins": 0.014748764224350452, "rewards/rejected": -0.08314411342144012, "step": 79 }, { "epoch": 0.1, "learning_rate": 4e-05, "logits/chosen": -2.6201720237731934, "logits/rejected": -2.680964708328247, "logps/chosen": -225.08645629882812, "logps/rejected": -232.00096130371094, "loss": 0.6739, "rewards/accuracies": 0.625, "rewards/chosen": -0.06486918777227402, "rewards/margins": 0.04111311584711075, "rewards/rejected": -0.10598230361938477, "step": 80 }, { "epoch": 0.11, "learning_rate": 4.05e-05, "logits/chosen": -2.5782110691070557, "logits/rejected": -2.5646634101867676, "logps/chosen": -249.90745544433594, "logps/rejected": -257.51275634765625, "loss": 0.6976, "rewards/accuracies": 0.5, "rewards/chosen": -0.08720846474170685, "rewards/margins": -0.0017095585353672504, "rewards/rejected": -0.08549890667200089, "step": 81 }, { "epoch": 0.11, "learning_rate": 4.1e-05, "logits/chosen": -2.5777688026428223, "logits/rejected": -2.665797472000122, "logps/chosen": -171.82347106933594, "logps/rejected": -211.63311767578125, "loss": 0.6883, "rewards/accuracies": 0.5, "rewards/chosen": -0.08461908996105194, "rewards/margins": 0.013853237964212894, "rewards/rejected": -0.09847234189510345, "step": 82 }, { "epoch": 0.11, "learning_rate": 4.15e-05, "logits/chosen": -2.3364946842193604, "logits/rejected": -2.3084681034088135, "logps/chosen": -193.4939727783203, "logps/rejected": -184.15200805664062, "loss": 0.717, "rewards/accuracies": 0.375, "rewards/chosen": -0.0981694683432579, "rewards/margins": -0.04480106756091118, "rewards/rejected": -0.053368404507637024, "step": 83 }, { "epoch": 0.11, "learning_rate": 4.2e-05, "logits/chosen": -2.445204257965088, "logits/rejected": -2.5341625213623047, "logps/chosen": -174.59225463867188, "logps/rejected": -210.94137573242188, "loss": 0.699, "rewards/accuracies": 0.375, "rewards/chosen": -0.09675168246030807, "rewards/margins": -0.00985276885330677, "rewards/rejected": -0.08689891546964645, "step": 84 }, { "epoch": 0.11, "learning_rate": 4.25e-05, "logits/chosen": -2.4798362255096436, "logits/rejected": -2.4760632514953613, "logps/chosen": -148.0768585205078, "logps/rejected": -176.31700134277344, "loss": 0.6945, "rewards/accuracies": 0.375, "rewards/chosen": -0.07728591561317444, "rewards/margins": 0.0020556673407554626, "rewards/rejected": -0.0793415829539299, "step": 85 }, { "epoch": 0.11, "learning_rate": 4.3e-05, "logits/chosen": -2.5911381244659424, "logits/rejected": -2.630659341812134, "logps/chosen": -203.1630859375, "logps/rejected": -183.4739227294922, "loss": 0.653, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04599933326244354, "rewards/margins": 0.08558819442987442, "rewards/rejected": -0.13158753514289856, "step": 86 }, { "epoch": 0.11, "learning_rate": 4.35e-05, "logits/chosen": -2.600797414779663, "logits/rejected": -2.6318492889404297, "logps/chosen": -156.8225860595703, "logps/rejected": -161.6145477294922, "loss": 0.6705, "rewards/accuracies": 0.5625, "rewards/chosen": -0.07066044956445694, "rewards/margins": 0.05500438064336777, "rewards/rejected": -0.1256648302078247, "step": 87 }, { "epoch": 0.12, "learning_rate": 4.4000000000000006e-05, "logits/chosen": -2.2124781608581543, "logits/rejected": -2.259019374847412, "logps/chosen": -142.0679473876953, "logps/rejected": -177.9369659423828, "loss": 0.6949, "rewards/accuracies": 0.375, "rewards/chosen": -0.13633377850055695, "rewards/margins": 0.000554969534277916, "rewards/rejected": -0.1368887573480606, "step": 88 }, { "epoch": 0.12, "learning_rate": 4.4500000000000004e-05, "logits/chosen": -2.396183729171753, "logits/rejected": -2.533904552459717, "logps/chosen": -171.38790893554688, "logps/rejected": -202.30979919433594, "loss": 0.6554, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09015192836523056, "rewards/margins": 0.08280421048402786, "rewards/rejected": -0.17295613884925842, "step": 89 }, { "epoch": 0.12, "learning_rate": 4.5e-05, "logits/chosen": -2.640425443649292, "logits/rejected": -2.674539804458618, "logps/chosen": -175.3508758544922, "logps/rejected": -180.97915649414062, "loss": 0.6768, "rewards/accuracies": 0.75, "rewards/chosen": -0.1077599823474884, "rewards/margins": 0.03759467601776123, "rewards/rejected": -0.14535464346408844, "step": 90 }, { "epoch": 0.12, "learning_rate": 4.55e-05, "logits/chosen": -2.5857696533203125, "logits/rejected": -2.5822975635528564, "logps/chosen": -176.55982971191406, "logps/rejected": -196.5354461669922, "loss": 0.7, "rewards/accuracies": 0.375, "rewards/chosen": -0.09103889763355255, "rewards/margins": -0.009319041855633259, "rewards/rejected": -0.08171986043453217, "step": 91 }, { "epoch": 0.12, "learning_rate": 4.600000000000001e-05, "logits/chosen": -2.660163402557373, "logits/rejected": -2.5935940742492676, "logps/chosen": -233.04049682617188, "logps/rejected": -212.1655731201172, "loss": 0.6863, "rewards/accuracies": 0.5, "rewards/chosen": -0.118477463722229, "rewards/margins": 0.017601586878299713, "rewards/rejected": -0.13607905805110931, "step": 92 }, { "epoch": 0.12, "learning_rate": 4.6500000000000005e-05, "logits/chosen": -2.5211241245269775, "logits/rejected": -2.6064364910125732, "logps/chosen": -176.89476013183594, "logps/rejected": -187.39271545410156, "loss": 0.6736, "rewards/accuracies": 0.625, "rewards/chosen": -0.07700353115797043, "rewards/margins": 0.04530329257249832, "rewards/rejected": -0.12230683863162994, "step": 93 }, { "epoch": 0.12, "learning_rate": 4.7e-05, "logits/chosen": -2.6894078254699707, "logits/rejected": -2.6806745529174805, "logps/chosen": -244.7545623779297, "logps/rejected": -212.83453369140625, "loss": 0.6446, "rewards/accuracies": 0.75, "rewards/chosen": -0.10891007632017136, "rewards/margins": 0.10942523181438446, "rewards/rejected": -0.2183353304862976, "step": 94 }, { "epoch": 0.12, "learning_rate": 4.75e-05, "logits/chosen": -2.4758429527282715, "logits/rejected": -2.5564448833465576, "logps/chosen": -161.29554748535156, "logps/rejected": -185.48199462890625, "loss": 0.6991, "rewards/accuracies": 0.375, "rewards/chosen": -0.152861088514328, "rewards/margins": -0.0035788798704743385, "rewards/rejected": -0.14928221702575684, "step": 95 }, { "epoch": 0.13, "learning_rate": 4.8e-05, "logits/chosen": -2.47318959236145, "logits/rejected": -2.439746856689453, "logps/chosen": -241.09283447265625, "logps/rejected": -218.68580627441406, "loss": 0.7017, "rewards/accuracies": 0.375, "rewards/chosen": -0.1952984780073166, "rewards/margins": -0.01051153801381588, "rewards/rejected": -0.18478693068027496, "step": 96 }, { "epoch": 0.13, "learning_rate": 4.85e-05, "logits/chosen": -2.541466474533081, "logits/rejected": -2.56956148147583, "logps/chosen": -180.84725952148438, "logps/rejected": -211.02352905273438, "loss": 0.6627, "rewards/accuracies": 0.625, "rewards/chosen": -0.15894030034542084, "rewards/margins": 0.06507530808448792, "rewards/rejected": -0.22401559352874756, "step": 97 }, { "epoch": 0.13, "learning_rate": 4.9e-05, "logits/chosen": -2.226611852645874, "logits/rejected": -2.359558343887329, "logps/chosen": -145.781494140625, "logps/rejected": -181.21531677246094, "loss": 0.6689, "rewards/accuracies": 0.6875, "rewards/chosen": -0.177232027053833, "rewards/margins": 0.058251187205314636, "rewards/rejected": -0.23548321425914764, "step": 98 }, { "epoch": 0.13, "learning_rate": 4.9500000000000004e-05, "logits/chosen": -2.5605039596557617, "logits/rejected": -2.641690492630005, "logps/chosen": -156.43446350097656, "logps/rejected": -171.6468505859375, "loss": 0.6801, "rewards/accuracies": 0.5625, "rewards/chosen": -0.23164159059524536, "rewards/margins": 0.03432049974799156, "rewards/rejected": -0.2659620940685272, "step": 99 }, { "epoch": 0.13, "learning_rate": 5e-05, "logits/chosen": -2.7840309143066406, "logits/rejected": -2.8028616905212402, "logps/chosen": -221.6836395263672, "logps/rejected": -247.433349609375, "loss": 0.647, "rewards/accuracies": 0.6875, "rewards/chosen": -0.14767608046531677, "rewards/margins": 0.1068597286939621, "rewards/rejected": -0.25453582406044006, "step": 100 }, { "epoch": 0.13, "learning_rate": 4.99999978299634e-05, "logits/chosen": -2.1325531005859375, "logits/rejected": -2.1438934803009033, "logps/chosen": -171.369140625, "logps/rejected": -184.96827697753906, "loss": 0.6417, "rewards/accuracies": 0.5625, "rewards/chosen": -0.18850459158420563, "rewards/margins": 0.12018898129463196, "rewards/rejected": -0.3086935877799988, "step": 101 }, { "epoch": 0.13, "learning_rate": 4.999999131985394e-05, "logits/chosen": -2.6781508922576904, "logits/rejected": -2.5359549522399902, "logps/chosen": -232.26742553710938, "logps/rejected": -214.5188446044922, "loss": 0.7372, "rewards/accuracies": 0.3125, "rewards/chosen": -0.32363808155059814, "rewards/margins": -0.07766950130462646, "rewards/rejected": -0.2459685504436493, "step": 102 }, { "epoch": 0.13, "learning_rate": 4.999998046967279e-05, "logits/chosen": -2.54486346244812, "logits/rejected": -2.6447086334228516, "logps/chosen": -179.6219482421875, "logps/rejected": -222.37010192871094, "loss": 0.6585, "rewards/accuracies": 0.625, "rewards/chosen": -0.23128211498260498, "rewards/margins": 0.08162947744131088, "rewards/rejected": -0.31291159987449646, "step": 103 }, { "epoch": 0.14, "learning_rate": 4.9999965279421804e-05, "logits/chosen": -2.4281625747680664, "logits/rejected": -2.5068893432617188, "logps/chosen": -163.3339385986328, "logps/rejected": -180.06222534179688, "loss": 0.6739, "rewards/accuracies": 0.625, "rewards/chosen": -0.22076301276683807, "rewards/margins": 0.05238480493426323, "rewards/rejected": -0.2731478214263916, "step": 104 }, { "epoch": 0.14, "learning_rate": 4.999994574910364e-05, "logits/chosen": -2.5497522354125977, "logits/rejected": -2.558523178100586, "logps/chosen": -214.64537048339844, "logps/rejected": -210.29678344726562, "loss": 0.6691, "rewards/accuracies": 0.625, "rewards/chosen": -0.19515390694141388, "rewards/margins": 0.06168302148580551, "rewards/rejected": -0.2568369209766388, "step": 105 }, { "epoch": 0.14, "learning_rate": 4.999992187872167e-05, "logits/chosen": -2.6358773708343506, "logits/rejected": -2.672025680541992, "logps/chosen": -235.18109130859375, "logps/rejected": -233.78045654296875, "loss": 0.7203, "rewards/accuracies": 0.5, "rewards/chosen": -0.25453630089759827, "rewards/margins": -0.03941688686609268, "rewards/rejected": -0.215119406580925, "step": 106 }, { "epoch": 0.14, "learning_rate": 4.9999893668280043e-05, "logits/chosen": -2.305128812789917, "logits/rejected": -2.384552478790283, "logps/chosen": -168.1253662109375, "logps/rejected": -210.5357666015625, "loss": 0.6558, "rewards/accuracies": 0.625, "rewards/chosen": -0.30615508556365967, "rewards/margins": 0.10809167474508286, "rewards/rejected": -0.4142467677593231, "step": 107 }, { "epoch": 0.14, "learning_rate": 4.999986111778367e-05, "logits/chosen": -2.4207632541656494, "logits/rejected": -2.4426565170288086, "logps/chosen": -187.8740234375, "logps/rejected": -188.6339569091797, "loss": 0.6383, "rewards/accuracies": 0.75, "rewards/chosen": -0.2796909809112549, "rewards/margins": 0.13547499477863312, "rewards/rejected": -0.4151660203933716, "step": 108 }, { "epoch": 0.14, "learning_rate": 4.999982422723818e-05, "logits/chosen": -2.505187749862671, "logits/rejected": -2.4738471508026123, "logps/chosen": -187.51710510253906, "logps/rejected": -227.13916015625, "loss": 0.7216, "rewards/accuracies": 0.375, "rewards/chosen": -0.5267550945281982, "rewards/margins": -0.03421187028288841, "rewards/rejected": -0.4925432801246643, "step": 109 }, { "epoch": 0.14, "learning_rate": 4.9999782996649994e-05, "logits/chosen": -2.4367451667785645, "logits/rejected": -2.4629135131835938, "logps/chosen": -221.5149688720703, "logps/rejected": -255.66635131835938, "loss": 0.6923, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4186938405036926, "rewards/margins": 0.03650692105293274, "rewards/rejected": -0.45520079135894775, "step": 110 }, { "epoch": 0.15, "learning_rate": 4.999973742602626e-05, "logits/chosen": -2.6166908740997314, "logits/rejected": -2.638493061065674, "logps/chosen": -212.32894897460938, "logps/rejected": -216.49720764160156, "loss": 0.6789, "rewards/accuracies": 0.4375, "rewards/chosen": -0.47340863943099976, "rewards/margins": 0.059217117726802826, "rewards/rejected": -0.5326257944107056, "step": 111 }, { "epoch": 0.15, "learning_rate": 4.999968751537489e-05, "logits/chosen": -2.549008846282959, "logits/rejected": -2.6095943450927734, "logps/chosen": -205.5695343017578, "logps/rejected": -196.99383544921875, "loss": 0.7338, "rewards/accuracies": 0.375, "rewards/chosen": -0.47608208656311035, "rewards/margins": -0.05748309940099716, "rewards/rejected": -0.4185989201068878, "step": 112 }, { "epoch": 0.15, "learning_rate": 4.9999633264704564e-05, "logits/chosen": -2.2947440147399902, "logits/rejected": -2.240459680557251, "logps/chosen": -203.31182861328125, "logps/rejected": -175.49354553222656, "loss": 0.7139, "rewards/accuracies": 0.5, "rewards/chosen": -0.39874717593193054, "rewards/margins": -0.014322709292173386, "rewards/rejected": -0.38442447781562805, "step": 113 }, { "epoch": 0.15, "learning_rate": 4.999957467402468e-05, "logits/chosen": -2.118520736694336, "logits/rejected": -2.0125582218170166, "logps/chosen": -159.09979248046875, "logps/rejected": -151.80679321289062, "loss": 0.6981, "rewards/accuracies": 0.5, "rewards/chosen": -0.43373599648475647, "rewards/margins": 0.0064653148874640465, "rewards/rejected": -0.4402012825012207, "step": 114 }, { "epoch": 0.15, "learning_rate": 4.9999511743345426e-05, "logits/chosen": -2.251662254333496, "logits/rejected": -2.343283176422119, "logps/chosen": -166.46377563476562, "logps/rejected": -213.02813720703125, "loss": 0.6281, "rewards/accuracies": 0.5625, "rewards/chosen": -0.44466906785964966, "rewards/margins": 0.18120086193084717, "rewards/rejected": -0.625869870185852, "step": 115 }, { "epoch": 0.15, "learning_rate": 4.999944447267771e-05, "logits/chosen": -2.347130298614502, "logits/rejected": -2.342409610748291, "logps/chosen": -216.30650329589844, "logps/rejected": -210.64273071289062, "loss": 0.7966, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6098219752311707, "rewards/margins": -0.12615808844566345, "rewards/rejected": -0.4836638867855072, "step": 116 }, { "epoch": 0.15, "learning_rate": 4.999937286203322e-05, "logits/chosen": -2.3265388011932373, "logits/rejected": -2.3534939289093018, "logps/chosen": -204.47706604003906, "logps/rejected": -186.42286682128906, "loss": 0.6674, "rewards/accuracies": 0.625, "rewards/chosen": -0.291274756193161, "rewards/margins": 0.0668107196688652, "rewards/rejected": -0.3580854535102844, "step": 117 }, { "epoch": 0.15, "learning_rate": 4.999929691142439e-05, "logits/chosen": -2.062469720840454, "logits/rejected": -2.2004528045654297, "logps/chosen": -144.79664611816406, "logps/rejected": -149.7763671875, "loss": 0.6225, "rewards/accuracies": 0.625, "rewards/chosen": -0.46431243419647217, "rewards/margins": 0.1997339278459549, "rewards/rejected": -0.6640464067459106, "step": 118 }, { "epoch": 0.16, "learning_rate": 4.99992166208644e-05, "logits/chosen": -2.2899487018585205, "logits/rejected": -2.2257750034332275, "logps/chosen": -208.61517333984375, "logps/rejected": -200.343505859375, "loss": 0.7233, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5118069052696228, "rewards/margins": -0.018164699897170067, "rewards/rejected": -0.4936422109603882, "step": 119 }, { "epoch": 0.16, "learning_rate": 4.999913199036719e-05, "logits/chosen": -2.520472288131714, "logits/rejected": -2.612544298171997, "logps/chosen": -160.17779541015625, "logps/rejected": -191.498291015625, "loss": 0.6421, "rewards/accuracies": 0.5, "rewards/chosen": -0.4993933439254761, "rewards/margins": 0.15375012159347534, "rewards/rejected": -0.6531434059143066, "step": 120 }, { "epoch": 0.16, "learning_rate": 4.9999043019947454e-05, "logits/chosen": -2.5590789318084717, "logits/rejected": -2.594666004180908, "logps/chosen": -219.09494018554688, "logps/rejected": -271.0511474609375, "loss": 0.6746, "rewards/accuracies": 0.5, "rewards/chosen": -0.37969574332237244, "rewards/margins": 0.06747917830944061, "rewards/rejected": -0.44717487692832947, "step": 121 }, { "epoch": 0.16, "learning_rate": 4.9998949709620636e-05, "logits/chosen": -2.292649269104004, "logits/rejected": -2.4197192192077637, "logps/chosen": -156.77658081054688, "logps/rejected": -181.56752014160156, "loss": 0.6224, "rewards/accuracies": 0.625, "rewards/chosen": -0.24466285109519958, "rewards/margins": 0.17808005213737488, "rewards/rejected": -0.4227428734302521, "step": 122 }, { "epoch": 0.16, "learning_rate": 4.999885205940293e-05, "logits/chosen": -2.526571750640869, "logits/rejected": -2.6078643798828125, "logps/chosen": -188.48431396484375, "logps/rejected": -209.88192749023438, "loss": 0.6138, "rewards/accuracies": 0.625, "rewards/chosen": -0.38001808524131775, "rewards/margins": 0.21444422006607056, "rewards/rejected": -0.5944622755050659, "step": 123 }, { "epoch": 0.16, "learning_rate": 4.9998750069311306e-05, "logits/chosen": -2.477735757827759, "logits/rejected": -2.524329900741577, "logps/chosen": -170.30780029296875, "logps/rejected": -167.05538940429688, "loss": 0.7105, "rewards/accuracies": 0.5, "rewards/chosen": -0.45474332571029663, "rewards/margins": -0.0021874159574508667, "rewards/rejected": -0.45255595445632935, "step": 124 }, { "epoch": 0.16, "learning_rate": 4.999864373936345e-05, "logits/chosen": -2.486284017562866, "logits/rejected": -2.511571168899536, "logps/chosen": -213.28150939941406, "logps/rejected": -220.744140625, "loss": 0.7154, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5180980563163757, "rewards/margins": -0.013586281798779964, "rewards/rejected": -0.5045117139816284, "step": 125 }, { "epoch": 0.16, "learning_rate": 4.999853306957783e-05, "logits/chosen": -2.149216651916504, "logits/rejected": -2.331148862838745, "logps/chosen": -212.50167846679688, "logps/rejected": -229.76211547851562, "loss": 0.6783, "rewards/accuracies": 0.5625, "rewards/chosen": -0.45535096526145935, "rewards/margins": 0.13961246609687805, "rewards/rejected": -0.5949634909629822, "step": 126 }, { "epoch": 0.17, "learning_rate": 4.9998418059973654e-05, "logits/chosen": -2.3633456230163574, "logits/rejected": -2.3958113193511963, "logps/chosen": -260.77191162109375, "logps/rejected": -269.7747497558594, "loss": 0.624, "rewards/accuracies": 0.75, "rewards/chosen": -0.3385522663593292, "rewards/margins": 0.154096320271492, "rewards/rejected": -0.4926486015319824, "step": 127 }, { "epoch": 0.17, "learning_rate": 4.99982987105709e-05, "logits/chosen": -2.4027421474456787, "logits/rejected": -2.47273588180542, "logps/chosen": -190.19549560546875, "logps/rejected": -199.79869079589844, "loss": 0.647, "rewards/accuracies": 0.625, "rewards/chosen": -0.45771288871765137, "rewards/margins": 0.16400352120399475, "rewards/rejected": -0.6217163801193237, "step": 128 }, { "epoch": 0.17, "learning_rate": 4.999817502139027e-05, "logits/chosen": -2.459967851638794, "logits/rejected": -2.5030205249786377, "logps/chosen": -152.8588104248047, "logps/rejected": -151.28167724609375, "loss": 0.682, "rewards/accuracies": 0.6875, "rewards/chosen": -0.4902128279209137, "rewards/margins": 0.07620880752801895, "rewards/rejected": -0.566421627998352, "step": 129 }, { "epoch": 0.17, "learning_rate": 4.999804699245325e-05, "logits/chosen": -2.613358497619629, "logits/rejected": -2.6325273513793945, "logps/chosen": -240.51898193359375, "logps/rejected": -209.2674560546875, "loss": 0.6862, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5984245538711548, "rewards/margins": 0.08296503126621246, "rewards/rejected": -0.6813895106315613, "step": 130 }, { "epoch": 0.17, "learning_rate": 4.999791462378206e-05, "logits/chosen": -2.523002862930298, "logits/rejected": -2.544940948486328, "logps/chosen": -209.53164672851562, "logps/rejected": -211.6482391357422, "loss": 0.6617, "rewards/accuracies": 0.4375, "rewards/chosen": -0.4731237292289734, "rewards/margins": 0.139340341091156, "rewards/rejected": -0.6124641299247742, "step": 131 }, { "epoch": 0.17, "learning_rate": 4.999777791539968e-05, "logits/chosen": -2.3467066287994385, "logits/rejected": -2.4002304077148438, "logps/chosen": -229.88079833984375, "logps/rejected": -236.3416290283203, "loss": 0.6545, "rewards/accuracies": 0.5, "rewards/chosen": -0.5287673473358154, "rewards/margins": 0.11849495768547058, "rewards/rejected": -0.6472623348236084, "step": 132 }, { "epoch": 0.17, "learning_rate": 4.9997636867329844e-05, "logits/chosen": -2.3951451778411865, "logits/rejected": -2.3899013996124268, "logps/chosen": -230.42471313476562, "logps/rejected": -204.70712280273438, "loss": 0.6886, "rewards/accuracies": 0.5, "rewards/chosen": -0.7095022797584534, "rewards/margins": 0.06169421225786209, "rewards/rejected": -0.7711963653564453, "step": 133 }, { "epoch": 0.18, "learning_rate": 4.999749147959703e-05, "logits/chosen": -2.4759864807128906, "logits/rejected": -2.461402177810669, "logps/chosen": -208.62255859375, "logps/rejected": -215.98049926757812, "loss": 0.6288, "rewards/accuracies": 0.6875, "rewards/chosen": -0.47989076375961304, "rewards/margins": 0.18254442512989044, "rewards/rejected": -0.6624351143836975, "step": 134 }, { "epoch": 0.18, "learning_rate": 4.99973417522265e-05, "logits/chosen": -2.6637802124023438, "logits/rejected": -2.6086812019348145, "logps/chosen": -201.55992126464844, "logps/rejected": -185.3202667236328, "loss": 0.7158, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5636375546455383, "rewards/margins": 0.09092222899198532, "rewards/rejected": -0.6545597910881042, "step": 135 }, { "epoch": 0.18, "learning_rate": 4.9997187685244234e-05, "logits/chosen": -2.386868953704834, "logits/rejected": -2.438833475112915, "logps/chosen": -149.93809509277344, "logps/rejected": -164.56199645996094, "loss": 0.7271, "rewards/accuracies": 0.3125, "rewards/chosen": -0.43254008889198303, "rewards/margins": -0.0416962131857872, "rewards/rejected": -0.39084386825561523, "step": 136 }, { "epoch": 0.18, "learning_rate": 4.999702927867698e-05, "logits/chosen": -2.382352113723755, "logits/rejected": -2.447472095489502, "logps/chosen": -195.46499633789062, "logps/rejected": -191.3668212890625, "loss": 0.7562, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5873669385910034, "rewards/margins": -0.07318583130836487, "rewards/rejected": -0.5141811370849609, "step": 137 }, { "epoch": 0.18, "learning_rate": 4.999686653255222e-05, "logits/chosen": -2.6838254928588867, "logits/rejected": -2.735358476638794, "logps/chosen": -232.84722900390625, "logps/rejected": -253.7335205078125, "loss": 0.5769, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5806477069854736, "rewards/margins": 0.33124226331710815, "rewards/rejected": -0.9118900299072266, "step": 138 }, { "epoch": 0.18, "learning_rate": 4.9996699446898235e-05, "logits/chosen": -2.3523752689361572, "logits/rejected": -2.455883026123047, "logps/chosen": -158.80831909179688, "logps/rejected": -215.85203552246094, "loss": 0.7447, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5600742697715759, "rewards/margins": 0.03419441357254982, "rewards/rejected": -0.5942687392234802, "step": 139 }, { "epoch": 0.18, "learning_rate": 4.999652802174402e-05, "logits/chosen": -2.5024056434631348, "logits/rejected": -2.4992480278015137, "logps/chosen": -233.17120361328125, "logps/rejected": -249.13568115234375, "loss": 0.6822, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7229613065719604, "rewards/margins": 0.06977183371782303, "rewards/rejected": -0.7927330732345581, "step": 140 }, { "epoch": 0.18, "learning_rate": 4.999635225711933e-05, "logits/chosen": -2.3752949237823486, "logits/rejected": -2.499027729034424, "logps/chosen": -165.4285888671875, "logps/rejected": -213.1377716064453, "loss": 0.6571, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4794481098651886, "rewards/margins": 0.1226089596748352, "rewards/rejected": -0.602057158946991, "step": 141 }, { "epoch": 0.19, "learning_rate": 4.999617215305468e-05, "logits/chosen": -2.578481674194336, "logits/rejected": -2.5905745029449463, "logps/chosen": -160.28704833984375, "logps/rejected": -186.60206604003906, "loss": 0.5771, "rewards/accuracies": 0.625, "rewards/chosen": -0.379621684551239, "rewards/margins": 0.28185874223709106, "rewards/rejected": -0.6614804863929749, "step": 142 }, { "epoch": 0.19, "learning_rate": 4.999598770958134e-05, "logits/chosen": -2.1814939975738525, "logits/rejected": -2.273460865020752, "logps/chosen": -178.5402069091797, "logps/rejected": -246.2064208984375, "loss": 0.6741, "rewards/accuracies": 0.5625, "rewards/chosen": -0.47223299741744995, "rewards/margins": 0.11296035349369049, "rewards/rejected": -0.5851933360099792, "step": 143 }, { "epoch": 0.19, "learning_rate": 4.999579892673133e-05, "logits/chosen": -2.6444742679595947, "logits/rejected": -2.651982545852661, "logps/chosen": -191.74351501464844, "logps/rejected": -191.7657928466797, "loss": 0.6952, "rewards/accuracies": 0.625, "rewards/chosen": -0.6643162965774536, "rewards/margins": 0.027530232444405556, "rewards/rejected": -0.691846489906311, "step": 144 }, { "epoch": 0.19, "learning_rate": 4.9995605804537426e-05, "logits/chosen": -2.4908974170684814, "logits/rejected": -2.550518274307251, "logps/chosen": -188.7380828857422, "logps/rejected": -211.574951171875, "loss": 0.5798, "rewards/accuracies": 0.75, "rewards/chosen": -0.3170633912086487, "rewards/margins": 0.3337726294994354, "rewards/rejected": -0.6508359313011169, "step": 145 }, { "epoch": 0.19, "learning_rate": 4.999540834303315e-05, "logits/chosen": -2.403130054473877, "logits/rejected": -2.5044126510620117, "logps/chosen": -216.05221557617188, "logps/rejected": -224.658203125, "loss": 0.5719, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5969533920288086, "rewards/margins": 0.4197196662425995, "rewards/rejected": -1.0166730880737305, "step": 146 }, { "epoch": 0.19, "learning_rate": 4.999520654225278e-05, "logits/chosen": -2.5251312255859375, "logits/rejected": -2.6076760292053223, "logps/chosen": -171.32049560546875, "logps/rejected": -189.77108764648438, "loss": 0.5929, "rewards/accuracies": 0.5, "rewards/chosen": -0.5219372510910034, "rewards/margins": 0.3483983874320984, "rewards/rejected": -0.8703356385231018, "step": 147 }, { "epoch": 0.19, "learning_rate": 4.9995000402231354e-05, "logits/chosen": -2.564474105834961, "logits/rejected": -2.510350227355957, "logps/chosen": -219.81732177734375, "logps/rejected": -202.83493041992188, "loss": 0.7793, "rewards/accuracies": 0.375, "rewards/chosen": -0.7294387817382812, "rewards/margins": -0.11600115895271301, "rewards/rejected": -0.6134375333786011, "step": 148 }, { "epoch": 0.19, "learning_rate": 4.999478992300466e-05, "logits/chosen": -2.5275182723999023, "logits/rejected": -2.6380527019500732, "logps/chosen": -165.94358825683594, "logps/rejected": -212.28086853027344, "loss": 0.565, "rewards/accuracies": 0.6875, "rewards/chosen": -0.4685615599155426, "rewards/margins": 0.44902655482292175, "rewards/rejected": -0.9175881147384644, "step": 149 }, { "epoch": 0.2, "learning_rate": 4.999457510460923e-05, "logits/chosen": -2.407663106918335, "logits/rejected": -2.5699005126953125, "logps/chosen": -167.9168701171875, "logps/rejected": -214.4752197265625, "loss": 0.5801, "rewards/accuracies": 0.5625, "rewards/chosen": -0.48059117794036865, "rewards/margins": 0.3546959161758423, "rewards/rejected": -0.8352870345115662, "step": 150 }, { "epoch": 0.2, "learning_rate": 4.999435594708236e-05, "logits/chosen": -2.3764097690582275, "logits/rejected": -2.3675248622894287, "logps/chosen": -201.25814819335938, "logps/rejected": -229.6868133544922, "loss": 0.6714, "rewards/accuracies": 0.625, "rewards/chosen": -0.6616516709327698, "rewards/margins": 0.10846880823373795, "rewards/rejected": -0.7701205611228943, "step": 151 }, { "epoch": 0.2, "learning_rate": 4.999413245046211e-05, "logits/chosen": -2.5667271614074707, "logits/rejected": -2.5309338569641113, "logps/chosen": -210.30267333984375, "logps/rejected": -206.71014404296875, "loss": 0.5848, "rewards/accuracies": 0.625, "rewards/chosen": -0.572422444820404, "rewards/margins": 0.30542662739753723, "rewards/rejected": -0.8778490424156189, "step": 152 }, { "epoch": 0.2, "learning_rate": 4.9993904614787254e-05, "logits/chosen": -2.463524341583252, "logits/rejected": -2.530627965927124, "logps/chosen": -179.8057861328125, "logps/rejected": -200.56394958496094, "loss": 0.6231, "rewards/accuracies": 0.625, "rewards/chosen": -0.6925724148750305, "rewards/margins": 0.21750670671463013, "rewards/rejected": -0.9100791215896606, "step": 153 }, { "epoch": 0.2, "learning_rate": 4.999367244009736e-05, "logits/chosen": -2.4720635414123535, "logits/rejected": -2.5370302200317383, "logps/chosen": -194.29440307617188, "logps/rejected": -196.02944946289062, "loss": 0.8265, "rewards/accuracies": 0.4375, "rewards/chosen": -1.0657517910003662, "rewards/margins": -0.16590073704719543, "rewards/rejected": -0.8998512029647827, "step": 154 }, { "epoch": 0.2, "learning_rate": 4.999343592643274e-05, "logits/chosen": -2.3978195190429688, "logits/rejected": -2.397984266281128, "logps/chosen": -229.4518585205078, "logps/rejected": -217.56869506835938, "loss": 0.6598, "rewards/accuracies": 0.625, "rewards/chosen": -0.6397801637649536, "rewards/margins": 0.10495337843894958, "rewards/rejected": -0.7447335720062256, "step": 155 }, { "epoch": 0.2, "learning_rate": 4.999319507383444e-05, "logits/chosen": -2.3354411125183105, "logits/rejected": -2.4454195499420166, "logps/chosen": -149.3455810546875, "logps/rejected": -187.3743438720703, "loss": 0.5981, "rewards/accuracies": 0.625, "rewards/chosen": -0.7036041617393494, "rewards/margins": 0.3835078775882721, "rewards/rejected": -1.0871120691299438, "step": 156 }, { "epoch": 0.21, "learning_rate": 4.999294988234428e-05, "logits/chosen": -2.491671323776245, "logits/rejected": -2.550612449645996, "logps/chosen": -180.36758422851562, "logps/rejected": -251.5354461669922, "loss": 0.6038, "rewards/accuracies": 0.625, "rewards/chosen": -0.7723766565322876, "rewards/margins": 0.2752906382083893, "rewards/rejected": -1.047667384147644, "step": 157 }, { "epoch": 0.21, "learning_rate": 4.999270035200483e-05, "logits/chosen": -2.492790937423706, "logits/rejected": -2.365570306777954, "logps/chosen": -195.1597900390625, "logps/rejected": -185.96673583984375, "loss": 0.7169, "rewards/accuracies": 0.625, "rewards/chosen": -1.2230509519577026, "rewards/margins": 0.14210942387580872, "rewards/rejected": -1.3651604652404785, "step": 158 }, { "epoch": 0.21, "learning_rate": 4.99924464828594e-05, "logits/chosen": -2.412907838821411, "logits/rejected": -2.508164882659912, "logps/chosen": -196.51846313476562, "logps/rejected": -218.0734405517578, "loss": 0.5942, "rewards/accuracies": 0.6875, "rewards/chosen": -0.813502848148346, "rewards/margins": 0.3059760332107544, "rewards/rejected": -1.1194789409637451, "step": 159 }, { "epoch": 0.21, "learning_rate": 4.9992188274952064e-05, "logits/chosen": -2.4737277030944824, "logits/rejected": -2.5720887184143066, "logps/chosen": -155.16171264648438, "logps/rejected": -195.86997985839844, "loss": 0.7379, "rewards/accuracies": 0.5625, "rewards/chosen": -0.8073078989982605, "rewards/margins": 0.11351939290761948, "rewards/rejected": -0.920827329158783, "step": 160 }, { "epoch": 0.21, "learning_rate": 4.999192572832765e-05, "logits/chosen": -2.5303239822387695, "logits/rejected": -2.5628015995025635, "logps/chosen": -281.7694396972656, "logps/rejected": -303.6089782714844, "loss": 0.6735, "rewards/accuracies": 0.625, "rewards/chosen": -0.6562893986701965, "rewards/margins": 0.12363787740468979, "rewards/rejected": -0.7799273133277893, "step": 161 }, { "epoch": 0.21, "learning_rate": 4.999165884303174e-05, "logits/chosen": -2.481058359146118, "logits/rejected": -2.4526753425598145, "logps/chosen": -182.2716064453125, "logps/rejected": -242.58828735351562, "loss": 0.7317, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7687569260597229, "rewards/margins": -0.007903970777988434, "rewards/rejected": -0.7608529329299927, "step": 162 }, { "epoch": 0.21, "learning_rate": 4.999138761911066e-05, "logits/chosen": -2.459987163543701, "logits/rejected": -2.5180883407592773, "logps/chosen": -209.83102416992188, "logps/rejected": -216.1147918701172, "loss": 0.7265, "rewards/accuracies": 0.5625, "rewards/chosen": -1.0292317867279053, "rewards/margins": 0.04909829795360565, "rewards/rejected": -1.0783300399780273, "step": 163 }, { "epoch": 0.21, "learning_rate": 4.99911120566115e-05, "logits/chosen": -2.5340957641601562, "logits/rejected": -2.654463052749634, "logps/chosen": -210.88929748535156, "logps/rejected": -198.64614868164062, "loss": 0.7475, "rewards/accuracies": 0.5625, "rewards/chosen": -1.2450902462005615, "rewards/margins": 0.012418277561664581, "rewards/rejected": -1.2575085163116455, "step": 164 }, { "epoch": 0.22, "learning_rate": 4.99908321555821e-05, "logits/chosen": -2.4791364669799805, "logits/rejected": -2.5890302658081055, "logps/chosen": -192.5817108154297, "logps/rejected": -224.9603271484375, "loss": 0.6089, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9010199308395386, "rewards/margins": 0.4015497863292694, "rewards/rejected": -1.3025696277618408, "step": 165 }, { "epoch": 0.22, "learning_rate": 4.999054791607105e-05, "logits/chosen": -2.428842067718506, "logits/rejected": -2.372598171234131, "logps/chosen": -221.89833068847656, "logps/rejected": -212.29595947265625, "loss": 0.7305, "rewards/accuracies": 0.5625, "rewards/chosen": -1.0515369176864624, "rewards/margins": 0.017772257328033447, "rewards/rejected": -1.0693092346191406, "step": 166 }, { "epoch": 0.22, "learning_rate": 4.999025933812769e-05, "logits/chosen": -2.50471830368042, "logits/rejected": -2.5308945178985596, "logps/chosen": -198.7537384033203, "logps/rejected": -236.5478515625, "loss": 0.6487, "rewards/accuracies": 0.625, "rewards/chosen": -0.8205583095550537, "rewards/margins": 0.18831993639469147, "rewards/rejected": -1.008878231048584, "step": 167 }, { "epoch": 0.22, "learning_rate": 4.9989966421802114e-05, "logits/chosen": -2.410905122756958, "logits/rejected": -2.5401611328125, "logps/chosen": -202.42991638183594, "logps/rejected": -219.6848602294922, "loss": 0.6257, "rewards/accuracies": 0.5625, "rewards/chosen": -0.9180124402046204, "rewards/margins": 0.24881067872047424, "rewards/rejected": -1.166823148727417, "step": 168 }, { "epoch": 0.22, "learning_rate": 4.998966916714519e-05, "logits/chosen": -2.4587323665618896, "logits/rejected": -2.5724194049835205, "logps/chosen": -154.88470458984375, "logps/rejected": -172.47268676757812, "loss": 0.618, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7627564072608948, "rewards/margins": 0.294120192527771, "rewards/rejected": -1.0568766593933105, "step": 169 }, { "epoch": 0.22, "learning_rate": 4.998936757420851e-05, "logits/chosen": -2.284227132797241, "logits/rejected": -2.380892515182495, "logps/chosen": -138.282958984375, "logps/rejected": -180.36441040039062, "loss": 0.6089, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7448742389678955, "rewards/margins": 0.4253446161746979, "rewards/rejected": -1.1702189445495605, "step": 170 }, { "epoch": 0.22, "learning_rate": 4.9989061643044434e-05, "logits/chosen": -2.5783276557922363, "logits/rejected": -2.5471560955047607, "logps/chosen": -186.851318359375, "logps/rejected": -193.73837280273438, "loss": 0.7108, "rewards/accuracies": 0.5, "rewards/chosen": -1.0091472864151, "rewards/margins": 0.033400412648916245, "rewards/rejected": -1.0425477027893066, "step": 171 }, { "epoch": 0.23, "learning_rate": 4.9988751373706075e-05, "logits/chosen": -2.398940086364746, "logits/rejected": -2.3498027324676514, "logps/chosen": -171.69107055664062, "logps/rejected": -209.19424438476562, "loss": 0.7045, "rewards/accuracies": 0.5625, "rewards/chosen": -0.9662235975265503, "rewards/margins": 0.137020543217659, "rewards/rejected": -1.1032441854476929, "step": 172 }, { "epoch": 0.23, "learning_rate": 4.9988436766247284e-05, "logits/chosen": -2.415847063064575, "logits/rejected": -2.3855319023132324, "logps/chosen": -228.1761932373047, "logps/rejected": -195.01991271972656, "loss": 0.6696, "rewards/accuracies": 0.5, "rewards/chosen": -0.9869641661643982, "rewards/margins": 0.11401443928480148, "rewards/rejected": -1.1009787321090698, "step": 173 }, { "epoch": 0.23, "learning_rate": 4.9988117820722704e-05, "logits/chosen": -2.638780117034912, "logits/rejected": -2.607104539871216, "logps/chosen": -234.91183471679688, "logps/rejected": -203.67996215820312, "loss": 0.9279, "rewards/accuracies": 0.375, "rewards/chosen": -1.2375922203063965, "rewards/margins": -0.281325101852417, "rewards/rejected": -0.956267237663269, "step": 174 }, { "epoch": 0.23, "learning_rate": 4.998779453718768e-05, "logits/chosen": -2.254124879837036, "logits/rejected": -2.429471969604492, "logps/chosen": -235.76698303222656, "logps/rejected": -262.7959899902344, "loss": 0.6238, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0454847812652588, "rewards/margins": 0.3030804395675659, "rewards/rejected": -1.3485652208328247, "step": 175 }, { "epoch": 0.23, "learning_rate": 4.9987466915698346e-05, "logits/chosen": -2.276986598968506, "logits/rejected": -2.337780475616455, "logps/chosen": -138.3777313232422, "logps/rejected": -166.30181884765625, "loss": 0.5682, "rewards/accuracies": 0.75, "rewards/chosen": -0.5320202708244324, "rewards/margins": 0.315082311630249, "rewards/rejected": -0.8471025228500366, "step": 176 }, { "epoch": 0.23, "learning_rate": 4.998713495631156e-05, "logits/chosen": -2.420923948287964, "logits/rejected": -2.565662384033203, "logps/chosen": -180.71446228027344, "logps/rejected": -192.2035369873047, "loss": 0.647, "rewards/accuracies": 0.625, "rewards/chosen": -0.8985862731933594, "rewards/margins": 0.2061445415019989, "rewards/rejected": -1.1047309637069702, "step": 177 }, { "epoch": 0.23, "learning_rate": 4.998679865908499e-05, "logits/chosen": -2.5881500244140625, "logits/rejected": -2.6503522396087646, "logps/chosen": -183.07540893554688, "logps/rejected": -233.943603515625, "loss": 0.6889, "rewards/accuracies": 0.5625, "rewards/chosen": -1.1374621391296387, "rewards/margins": 0.19141921401023865, "rewards/rejected": -1.3288813829421997, "step": 178 }, { "epoch": 0.23, "learning_rate": 4.9986458024076984e-05, "logits/chosen": -2.4952712059020996, "logits/rejected": -2.50299072265625, "logps/chosen": -192.31314086914062, "logps/rejected": -197.241455078125, "loss": 0.7729, "rewards/accuracies": 0.4375, "rewards/chosen": -1.13700532913208, "rewards/margins": -0.10435198992490768, "rewards/rejected": -1.0326533317565918, "step": 179 }, { "epoch": 0.24, "learning_rate": 4.998611305134669e-05, "logits/chosen": -2.5952248573303223, "logits/rejected": -2.600719928741455, "logps/chosen": -191.4530487060547, "logps/rejected": -212.82955932617188, "loss": 0.674, "rewards/accuracies": 0.5625, "rewards/chosen": -0.9059873819351196, "rewards/margins": 0.1599172055721283, "rewards/rejected": -1.0659046173095703, "step": 180 }, { "epoch": 0.24, "learning_rate": 4.9985763740954e-05, "logits/chosen": -2.4821391105651855, "logits/rejected": -2.4574878215789795, "logps/chosen": -264.071533203125, "logps/rejected": -256.51611328125, "loss": 0.7333, "rewards/accuracies": 0.375, "rewards/chosen": -1.0796548128128052, "rewards/margins": 0.06612614542245865, "rewards/rejected": -1.1457810401916504, "step": 181 }, { "epoch": 0.24, "learning_rate": 4.9985410092959553e-05, "logits/chosen": -2.4601786136627197, "logits/rejected": -2.519011974334717, "logps/chosen": -158.57687377929688, "logps/rejected": -173.29666137695312, "loss": 0.7693, "rewards/accuracies": 0.25, "rewards/chosen": -0.7817510366439819, "rewards/margins": -0.05254516005516052, "rewards/rejected": -0.729205846786499, "step": 182 }, { "epoch": 0.24, "learning_rate": 4.998505210742472e-05, "logits/chosen": -2.6099700927734375, "logits/rejected": -2.58197021484375, "logps/chosen": -195.2617950439453, "logps/rejected": -188.54165649414062, "loss": 0.9197, "rewards/accuracies": 0.25, "rewards/chosen": -1.1366631984710693, "rewards/margins": -0.32979804277420044, "rewards/rejected": -0.8068650960922241, "step": 183 }, { "epoch": 0.24, "learning_rate": 4.9984689784411686e-05, "logits/chosen": -2.679287910461426, "logits/rejected": -2.5679209232330322, "logps/chosen": -211.75323486328125, "logps/rejected": -245.02603149414062, "loss": 0.6702, "rewards/accuracies": 0.5625, "rewards/chosen": -0.9691303372383118, "rewards/margins": 0.17891569435596466, "rewards/rejected": -1.1480460166931152, "step": 184 }, { "epoch": 0.24, "learning_rate": 4.9984323123983334e-05, "logits/chosen": -2.537733316421509, "logits/rejected": -2.6055424213409424, "logps/chosen": -151.74288940429688, "logps/rejected": -250.8179168701172, "loss": 0.6366, "rewards/accuracies": 0.6875, "rewards/chosen": -0.928146243095398, "rewards/margins": 0.24633805453777313, "rewards/rejected": -1.174484372138977, "step": 185 }, { "epoch": 0.24, "learning_rate": 4.998395212620332e-05, "logits/chosen": -2.4368271827697754, "logits/rejected": -2.529536247253418, "logps/chosen": -207.87677001953125, "logps/rejected": -229.0767822265625, "loss": 0.6199, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7506311535835266, "rewards/margins": 0.24917976558208466, "rewards/rejected": -0.9998109340667725, "step": 186 }, { "epoch": 0.24, "learning_rate": 4.998357679113603e-05, "logits/chosen": -2.5217807292938232, "logits/rejected": -2.6954245567321777, "logps/chosen": -192.20407104492188, "logps/rejected": -200.1387176513672, "loss": 0.6013, "rewards/accuracies": 0.625, "rewards/chosen": -0.6829215288162231, "rewards/margins": 0.2588457763195038, "rewards/rejected": -0.9417673349380493, "step": 187 }, { "epoch": 0.25, "learning_rate": 4.9983197118846655e-05, "logits/chosen": -2.4711546897888184, "logits/rejected": -2.4839439392089844, "logps/chosen": -160.9691925048828, "logps/rejected": -167.21469116210938, "loss": 0.7301, "rewards/accuracies": 0.5, "rewards/chosen": -0.7170782685279846, "rewards/margins": 0.04331670328974724, "rewards/rejected": -0.7603949904441833, "step": 188 }, { "epoch": 0.25, "learning_rate": 4.9982813109401096e-05, "logits/chosen": -2.3429412841796875, "logits/rejected": -2.3843770027160645, "logps/chosen": -221.22274780273438, "logps/rejected": -246.99005126953125, "loss": 0.7711, "rewards/accuracies": 0.375, "rewards/chosen": -1.114179015159607, "rewards/margins": -0.07230883836746216, "rewards/rejected": -1.0418701171875, "step": 189 }, { "epoch": 0.25, "learning_rate": 4.998242476286601e-05, "logits/chosen": -2.7864720821380615, "logits/rejected": -2.7745487689971924, "logps/chosen": -194.08201599121094, "logps/rejected": -226.28591918945312, "loss": 0.71, "rewards/accuracies": 0.3125, "rewards/chosen": -0.8893502354621887, "rewards/margins": 0.06053268164396286, "rewards/rejected": -0.949882984161377, "step": 190 }, { "epoch": 0.25, "learning_rate": 4.998203207930882e-05, "logits/chosen": -2.6679065227508545, "logits/rejected": -2.711076498031616, "logps/chosen": -190.53013610839844, "logps/rejected": -200.187744140625, "loss": 0.8201, "rewards/accuracies": 0.4375, "rewards/chosen": -1.0141409635543823, "rewards/margins": -0.17862781882286072, "rewards/rejected": -0.8355131149291992, "step": 191 }, { "epoch": 0.25, "learning_rate": 4.998163505879769e-05, "logits/chosen": -2.7404122352600098, "logits/rejected": -2.732025146484375, "logps/chosen": -178.48135375976562, "logps/rejected": -183.08074951171875, "loss": 0.7078, "rewards/accuracies": 0.5, "rewards/chosen": -0.8886659145355225, "rewards/margins": 0.043551601469516754, "rewards/rejected": -0.9322174787521362, "step": 192 }, { "epoch": 0.25, "learning_rate": 4.998123370140156e-05, "logits/chosen": -2.6989643573760986, "logits/rejected": -2.7124342918395996, "logps/chosen": -223.03314208984375, "logps/rejected": -221.12120056152344, "loss": 0.6559, "rewards/accuracies": 0.625, "rewards/chosen": -0.764815628528595, "rewards/margins": 0.1835068166255951, "rewards/rejected": -0.9483224153518677, "step": 193 }, { "epoch": 0.25, "learning_rate": 4.99808280071901e-05, "logits/chosen": -2.618727684020996, "logits/rejected": -2.6078970432281494, "logps/chosen": -176.1873779296875, "logps/rejected": -199.52859497070312, "loss": 0.7406, "rewards/accuracies": 0.5, "rewards/chosen": -0.8613646626472473, "rewards/margins": -0.05548467859625816, "rewards/rejected": -0.805880069732666, "step": 194 }, { "epoch": 0.26, "learning_rate": 4.9980417976233735e-05, "logits/chosen": -2.6192498207092285, "logits/rejected": -2.732285261154175, "logps/chosen": -211.3184814453125, "logps/rejected": -195.73056030273438, "loss": 0.7921, "rewards/accuracies": 0.4375, "rewards/chosen": -0.9030998945236206, "rewards/margins": -0.1349114179611206, "rewards/rejected": -0.7681884765625, "step": 195 }, { "epoch": 0.26, "learning_rate": 4.9980003608603656e-05, "logits/chosen": -2.5657694339752197, "logits/rejected": -2.5230712890625, "logps/chosen": -203.0819091796875, "logps/rejected": -226.5617218017578, "loss": 0.6618, "rewards/accuracies": 0.5625, "rewards/chosen": -0.609666109085083, "rewards/margins": 0.08627453446388245, "rewards/rejected": -0.6959406137466431, "step": 196 }, { "epoch": 0.26, "learning_rate": 4.997958490437178e-05, "logits/chosen": -2.641559362411499, "logits/rejected": -2.76076602935791, "logps/chosen": -169.1241455078125, "logps/rejected": -183.4690399169922, "loss": 0.7612, "rewards/accuracies": 0.5, "rewards/chosen": -0.7050959467887878, "rewards/margins": -0.03217136114835739, "rewards/rejected": -0.6729245781898499, "step": 197 }, { "epoch": 0.26, "learning_rate": 4.9979161863610816e-05, "logits/chosen": -2.441647529602051, "logits/rejected": -2.467510223388672, "logps/chosen": -143.13748168945312, "logps/rejected": -172.0884246826172, "loss": 0.6647, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5210601091384888, "rewards/margins": 0.14004816114902496, "rewards/rejected": -0.6611082553863525, "step": 198 }, { "epoch": 0.26, "learning_rate": 4.99787344863942e-05, "logits/chosen": -2.627023458480835, "logits/rejected": -2.5984585285186768, "logps/chosen": -203.31980895996094, "logps/rejected": -218.84060668945312, "loss": 0.6172, "rewards/accuracies": 0.625, "rewards/chosen": -0.5382375717163086, "rewards/margins": 0.20881405472755432, "rewards/rejected": -0.7470515966415405, "step": 199 }, { "epoch": 0.26, "learning_rate": 4.997830277279612e-05, "logits/chosen": -2.595003604888916, "logits/rejected": -2.722808361053467, "logps/chosen": -223.01495361328125, "logps/rejected": -240.90130615234375, "loss": 0.7333, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7480457425117493, "rewards/margins": -0.029575012624263763, "rewards/rejected": -0.7184706926345825, "step": 200 }, { "epoch": 0.26, "learning_rate": 4.997786672289152e-05, "logits/chosen": -2.6220602989196777, "logits/rejected": -2.6606202125549316, "logps/chosen": -204.98797607421875, "logps/rejected": -206.25839233398438, "loss": 0.7368, "rewards/accuracies": 0.375, "rewards/chosen": -0.6979295611381531, "rewards/margins": -0.07002773880958557, "rewards/rejected": -0.6279018521308899, "step": 201 }, { "epoch": 0.26, "learning_rate": 4.997742633675612e-05, "logits/chosen": -2.547811508178711, "logits/rejected": -2.551816463470459, "logps/chosen": -196.38748168945312, "logps/rejected": -222.4247589111328, "loss": 0.6402, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5409201383590698, "rewards/margins": 0.19667872786521912, "rewards/rejected": -0.7375988364219666, "step": 202 }, { "epoch": 0.27, "learning_rate": 4.9976981614466344e-05, "logits/chosen": -2.2461767196655273, "logits/rejected": -2.2968175411224365, "logps/chosen": -185.43954467773438, "logps/rejected": -196.11392211914062, "loss": 0.7743, "rewards/accuracies": 0.375, "rewards/chosen": -0.5852402448654175, "rewards/margins": -0.08380623161792755, "rewards/rejected": -0.5014340281486511, "step": 203 }, { "epoch": 0.27, "learning_rate": 4.997653255609942e-05, "logits/chosen": -2.4160966873168945, "logits/rejected": -2.389375925064087, "logps/chosen": -189.16510009765625, "logps/rejected": -202.54649353027344, "loss": 0.7306, "rewards/accuracies": 0.5625, "rewards/chosen": -0.729293167591095, "rewards/margins": -0.006243497133255005, "rewards/rejected": -0.7230496406555176, "step": 204 }, { "epoch": 0.27, "learning_rate": 4.997607916173329e-05, "logits/chosen": -2.6787378787994385, "logits/rejected": -2.6836445331573486, "logps/chosen": -177.60980224609375, "logps/rejected": -177.10487365722656, "loss": 0.765, "rewards/accuracies": 0.5, "rewards/chosen": -0.5183064341545105, "rewards/margins": -0.11038734763860703, "rewards/rejected": -0.4079190790653229, "step": 205 }, { "epoch": 0.27, "learning_rate": 4.997562143144668e-05, "logits/chosen": -2.559563398361206, "logits/rejected": -2.5205612182617188, "logps/chosen": -208.58892822265625, "logps/rejected": -248.25648498535156, "loss": 0.6524, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5725008845329285, "rewards/margins": 0.11152593046426773, "rewards/rejected": -0.6840267777442932, "step": 206 }, { "epoch": 0.27, "learning_rate": 4.997515936531903e-05, "logits/chosen": -2.6034438610076904, "logits/rejected": -2.528724193572998, "logps/chosen": -159.13522338867188, "logps/rejected": -135.77056884765625, "loss": 0.7413, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5185827016830444, "rewards/margins": -0.04235066473484039, "rewards/rejected": -0.47623202204704285, "step": 207 }, { "epoch": 0.27, "learning_rate": 4.9974692963430595e-05, "logits/chosen": -2.3184618949890137, "logits/rejected": -2.3163304328918457, "logps/chosen": -130.70643615722656, "logps/rejected": -142.0390625, "loss": 0.7362, "rewards/accuracies": 0.5, "rewards/chosen": -0.6057229042053223, "rewards/margins": -0.014669769443571568, "rewards/rejected": -0.5910531282424927, "step": 208 }, { "epoch": 0.27, "learning_rate": 4.99742222258623e-05, "logits/chosen": -2.660839796066284, "logits/rejected": -2.6470634937286377, "logps/chosen": -221.08424377441406, "logps/rejected": -213.25003051757812, "loss": 0.6922, "rewards/accuracies": 0.3125, "rewards/chosen": -0.5536531209945679, "rewards/margins": 0.022671688348054886, "rewards/rejected": -0.5763247609138489, "step": 209 }, { "epoch": 0.27, "learning_rate": 4.997374715269589e-05, "logits/chosen": -2.2223334312438965, "logits/rejected": -2.2852203845977783, "logps/chosen": -172.71421813964844, "logps/rejected": -206.3236541748047, "loss": 0.6722, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5476804375648499, "rewards/margins": 0.10758630931377411, "rewards/rejected": -0.6552667617797852, "step": 210 }, { "epoch": 0.28, "learning_rate": 4.997326774401383e-05, "logits/chosen": -2.262892961502075, "logits/rejected": -2.343332052230835, "logps/chosen": -152.0110626220703, "logps/rejected": -178.68345642089844, "loss": 0.694, "rewards/accuracies": 0.625, "rewards/chosen": -0.48357081413269043, "rewards/margins": 0.07582694292068481, "rewards/rejected": -0.55939781665802, "step": 211 }, { "epoch": 0.28, "learning_rate": 4.9972783999899366e-05, "logits/chosen": -2.2378625869750977, "logits/rejected": -2.2833354473114014, "logps/chosen": -154.71405029296875, "logps/rejected": -177.74502563476562, "loss": 0.709, "rewards/accuracies": 0.3125, "rewards/chosen": -0.5034060478210449, "rewards/margins": -0.0023919548839330673, "rewards/rejected": -0.5010141134262085, "step": 212 }, { "epoch": 0.28, "learning_rate": 4.997229592043645e-05, "logits/chosen": -2.4182140827178955, "logits/rejected": -2.477410078048706, "logps/chosen": -217.6938018798828, "logps/rejected": -236.60140991210938, "loss": 0.7474, "rewards/accuracies": 0.3125, "rewards/chosen": -0.42065930366516113, "rewards/margins": -0.08166539669036865, "rewards/rejected": -0.33899393677711487, "step": 213 }, { "epoch": 0.28, "learning_rate": 4.997180350570984e-05, "logits/chosen": -2.6971006393432617, "logits/rejected": -2.709580659866333, "logps/chosen": -231.80714416503906, "logps/rejected": -234.41290283203125, "loss": 0.6593, "rewards/accuracies": 0.625, "rewards/chosen": -0.4388115406036377, "rewards/margins": 0.1023733913898468, "rewards/rejected": -0.5411849617958069, "step": 214 }, { "epoch": 0.28, "learning_rate": 4.9971306755804995e-05, "logits/chosen": -2.600799083709717, "logits/rejected": -2.5639290809631348, "logps/chosen": -211.75051879882812, "logps/rejected": -185.4350128173828, "loss": 0.6654, "rewards/accuracies": 0.4375, "rewards/chosen": -0.3958936929702759, "rewards/margins": 0.08459535241127014, "rewards/rejected": -0.4804890751838684, "step": 215 }, { "epoch": 0.28, "learning_rate": 4.997080567080817e-05, "logits/chosen": -2.2965052127838135, "logits/rejected": -2.3450510501861572, "logps/chosen": -170.9476318359375, "logps/rejected": -174.3068389892578, "loss": 0.6604, "rewards/accuracies": 0.625, "rewards/chosen": -0.5262787938117981, "rewards/margins": 0.09660908579826355, "rewards/rejected": -0.6228878498077393, "step": 216 }, { "epoch": 0.28, "learning_rate": 4.9970300250806346e-05, "logits/chosen": -2.5405097007751465, "logits/rejected": -2.5496726036071777, "logps/chosen": -266.0992736816406, "logps/rejected": -260.38275146484375, "loss": 0.7048, "rewards/accuracies": 0.5, "rewards/chosen": -0.5492247939109802, "rewards/margins": 0.042401984333992004, "rewards/rejected": -0.591626763343811, "step": 217 }, { "epoch": 0.29, "learning_rate": 4.996979049588727e-05, "logits/chosen": -2.502474308013916, "logits/rejected": -2.439131736755371, "logps/chosen": -227.49476623535156, "logps/rejected": -206.38299560546875, "loss": 0.711, "rewards/accuracies": 0.5, "rewards/chosen": -0.5280985236167908, "rewards/margins": 0.010388553142547607, "rewards/rejected": -0.5384870767593384, "step": 218 }, { "epoch": 0.29, "learning_rate": 4.996927640613944e-05, "logits/chosen": -2.3517448902130127, "logits/rejected": -2.5404696464538574, "logps/chosen": -162.60267639160156, "logps/rejected": -216.92562866210938, "loss": 0.6095, "rewards/accuracies": 0.75, "rewards/chosen": -0.2759331464767456, "rewards/margins": 0.19823111593723297, "rewards/rejected": -0.474164217710495, "step": 219 }, { "epoch": 0.29, "learning_rate": 4.99687579816521e-05, "logits/chosen": -2.411978006362915, "logits/rejected": -2.5019097328186035, "logps/chosen": -201.91445922851562, "logps/rejected": -211.31448364257812, "loss": 0.6797, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4115257263183594, "rewards/margins": 0.07280793786048889, "rewards/rejected": -0.48433366417884827, "step": 220 }, { "epoch": 0.29, "learning_rate": 4.9968235222515246e-05, "logits/chosen": -2.4315428733825684, "logits/rejected": -2.4550862312316895, "logps/chosen": -162.66555786132812, "logps/rejected": -156.19947814941406, "loss": 0.6482, "rewards/accuracies": 0.4375, "rewards/chosen": -0.36304140090942383, "rewards/margins": 0.12375527620315552, "rewards/rejected": -0.48679661750793457, "step": 221 }, { "epoch": 0.29, "learning_rate": 4.996770812881964e-05, "logits/chosen": -2.5614113807678223, "logits/rejected": -2.5391292572021484, "logps/chosen": -214.3634033203125, "logps/rejected": -209.9477996826172, "loss": 0.7018, "rewards/accuracies": 0.375, "rewards/chosen": -0.319223552942276, "rewards/margins": 0.018056731671094894, "rewards/rejected": -0.3372803032398224, "step": 222 }, { "epoch": 0.29, "learning_rate": 4.9967176700656776e-05, "logits/chosen": -2.438467264175415, "logits/rejected": -2.4638776779174805, "logps/chosen": -227.1153564453125, "logps/rejected": -219.0038299560547, "loss": 0.6209, "rewards/accuracies": 0.875, "rewards/chosen": -0.45765751600265503, "rewards/margins": 0.19506625831127167, "rewards/rejected": -0.6527237892150879, "step": 223 }, { "epoch": 0.29, "learning_rate": 4.996664093811892e-05, "logits/chosen": -2.5968563556671143, "logits/rejected": -2.5226142406463623, "logps/chosen": -184.9003143310547, "logps/rejected": -202.8119354248047, "loss": 0.5973, "rewards/accuracies": 0.8125, "rewards/chosen": -0.36458614468574524, "rewards/margins": 0.22884023189544678, "rewards/rejected": -0.5934264063835144, "step": 224 }, { "epoch": 0.29, "learning_rate": 4.996610084129908e-05, "logits/chosen": -2.356076955795288, "logits/rejected": -2.464617967605591, "logps/chosen": -233.56741333007812, "logps/rejected": -252.82614135742188, "loss": 0.7174, "rewards/accuracies": 0.375, "rewards/chosen": -0.475138783454895, "rewards/margins": 0.015569500625133514, "rewards/rejected": -0.49070829153060913, "step": 225 }, { "epoch": 0.3, "learning_rate": 4.996555641029101e-05, "logits/chosen": -2.4807751178741455, "logits/rejected": -2.5052688121795654, "logps/chosen": -218.75250244140625, "logps/rejected": -202.75149536132812, "loss": 0.7541, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5363876819610596, "rewards/margins": -0.06761842221021652, "rewards/rejected": -0.4687691926956177, "step": 226 }, { "epoch": 0.3, "learning_rate": 4.996500764518923e-05, "logits/chosen": -2.5997631549835205, "logits/rejected": -2.6119892597198486, "logps/chosen": -212.090087890625, "logps/rejected": -220.1112823486328, "loss": 0.7185, "rewards/accuracies": 0.3125, "rewards/chosen": -0.5663864016532898, "rewards/margins": -0.016858436167240143, "rewards/rejected": -0.5495280027389526, "step": 227 }, { "epoch": 0.3, "learning_rate": 4.9964454546089026e-05, "logits/chosen": -2.25441837310791, "logits/rejected": -2.251708507537842, "logps/chosen": -216.14996337890625, "logps/rejected": -218.1949005126953, "loss": 0.7617, "rewards/accuracies": 0.3125, "rewards/chosen": -0.46758705377578735, "rewards/margins": -0.09628176689147949, "rewards/rejected": -0.37130531668663025, "step": 228 }, { "epoch": 0.3, "learning_rate": 4.996389711308639e-05, "logits/chosen": -2.475829601287842, "logits/rejected": -2.4804282188415527, "logps/chosen": -197.95965576171875, "logps/rejected": -227.29605102539062, "loss": 0.6275, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2889898717403412, "rewards/margins": 0.18446122109889984, "rewards/rejected": -0.47345107793807983, "step": 229 }, { "epoch": 0.3, "learning_rate": 4.996333534627809e-05, "logits/chosen": -2.4503121376037598, "logits/rejected": -2.40040922164917, "logps/chosen": -190.1997833251953, "logps/rejected": -170.68679809570312, "loss": 0.7168, "rewards/accuracies": 0.4375, "rewards/chosen": -0.4417100250720978, "rewards/margins": -0.0062090009450912476, "rewards/rejected": -0.4355010688304901, "step": 230 }, { "epoch": 0.3, "learning_rate": 4.996276924576169e-05, "logits/chosen": -2.5104312896728516, "logits/rejected": -2.5510809421539307, "logps/chosen": -134.1800079345703, "logps/rejected": -153.13365173339844, "loss": 0.7468, "rewards/accuracies": 0.4375, "rewards/chosen": -0.42858752608299255, "rewards/margins": -0.08554935455322266, "rewards/rejected": -0.3430381715297699, "step": 231 }, { "epoch": 0.3, "learning_rate": 4.996219881163543e-05, "logits/chosen": -2.456216335296631, "logits/rejected": -2.4285402297973633, "logps/chosen": -209.37985229492188, "logps/rejected": -195.06568908691406, "loss": 0.7159, "rewards/accuracies": 0.3125, "rewards/chosen": -0.46927523612976074, "rewards/margins": -0.0166518222540617, "rewards/rejected": -0.4526233971118927, "step": 232 }, { "epoch": 0.3, "learning_rate": 4.996162404399835e-05, "logits/chosen": -2.450845956802368, "logits/rejected": -2.4209864139556885, "logps/chosen": -198.9377899169922, "logps/rejected": -203.7089385986328, "loss": 0.6708, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4800047278404236, "rewards/margins": 0.09784451127052307, "rewards/rejected": -0.5778492093086243, "step": 233 }, { "epoch": 0.31, "learning_rate": 4.996104494295024e-05, "logits/chosen": -2.5744130611419678, "logits/rejected": -2.641494035720825, "logps/chosen": -182.59878540039062, "logps/rejected": -207.31797790527344, "loss": 0.6749, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4756154417991638, "rewards/margins": 0.08592241257429123, "rewards/rejected": -0.5615378618240356, "step": 234 }, { "epoch": 0.31, "learning_rate": 4.996046150859161e-05, "logits/chosen": -2.0956687927246094, "logits/rejected": -2.1018872261047363, "logps/chosen": -200.86215209960938, "logps/rejected": -221.92138671875, "loss": 0.6445, "rewards/accuracies": 0.5625, "rewards/chosen": -0.488430380821228, "rewards/margins": 0.1436658799648285, "rewards/rejected": -0.6320962905883789, "step": 235 }, { "epoch": 0.31, "learning_rate": 4.9959873741023774e-05, "logits/chosen": -2.3680927753448486, "logits/rejected": -2.415205240249634, "logps/chosen": -163.829833984375, "logps/rejected": -182.74481201171875, "loss": 0.6839, "rewards/accuracies": 0.5625, "rewards/chosen": -0.35495901107788086, "rewards/margins": 0.03828255832195282, "rewards/rejected": -0.3932415544986725, "step": 236 }, { "epoch": 0.31, "learning_rate": 4.995928164034876e-05, "logits/chosen": -2.5983476638793945, "logits/rejected": -2.7171671390533447, "logps/chosen": -189.3108673095703, "logps/rejected": -195.94888305664062, "loss": 0.6611, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4241011142730713, "rewards/margins": 0.0877021998167038, "rewards/rejected": -0.5118032693862915, "step": 237 }, { "epoch": 0.31, "learning_rate": 4.995868520666936e-05, "logits/chosen": -2.510660171508789, "logits/rejected": -2.5224432945251465, "logps/chosen": -227.51687622070312, "logps/rejected": -231.05897521972656, "loss": 0.7269, "rewards/accuracies": 0.5, "rewards/chosen": -0.40695255994796753, "rewards/margins": -0.046068765223026276, "rewards/rejected": -0.36088380217552185, "step": 238 }, { "epoch": 0.31, "learning_rate": 4.9958084440089095e-05, "logits/chosen": -2.256051540374756, "logits/rejected": -2.2608485221862793, "logps/chosen": -209.19252014160156, "logps/rejected": -206.2628631591797, "loss": 0.6338, "rewards/accuracies": 0.6875, "rewards/chosen": -0.3002999424934387, "rewards/margins": 0.18500341475009918, "rewards/rejected": -0.4853033423423767, "step": 239 }, { "epoch": 0.31, "learning_rate": 4.995747934071229e-05, "logits/chosen": -2.8373260498046875, "logits/rejected": -2.8713366985321045, "logps/chosen": -186.55723571777344, "logps/rejected": -221.16917419433594, "loss": 0.6489, "rewards/accuracies": 0.625, "rewards/chosen": -0.3408931493759155, "rewards/margins": 0.18887601792812347, "rewards/rejected": -0.5297691822052002, "step": 240 }, { "epoch": 0.32, "learning_rate": 4.995686990864398e-05, "logits/chosen": -2.328822135925293, "logits/rejected": -2.4501209259033203, "logps/chosen": -164.64404296875, "logps/rejected": -164.53396606445312, "loss": 0.6872, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5059252977371216, "rewards/margins": 0.046310462057590485, "rewards/rejected": -0.5522357225418091, "step": 241 }, { "epoch": 0.32, "learning_rate": 4.995625614398996e-05, "logits/chosen": -2.254148006439209, "logits/rejected": -2.45564603805542, "logps/chosen": -215.61080932617188, "logps/rejected": -236.83526611328125, "loss": 0.618, "rewards/accuracies": 0.625, "rewards/chosen": -0.3230094313621521, "rewards/margins": 0.2154739797115326, "rewards/rejected": -0.5384833812713623, "step": 242 }, { "epoch": 0.32, "learning_rate": 4.995563804685678e-05, "logits/chosen": -2.4298973083496094, "logits/rejected": -2.6152379512786865, "logps/chosen": -171.45925903320312, "logps/rejected": -215.27342224121094, "loss": 0.6461, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5956869721412659, "rewards/margins": 0.1696903109550476, "rewards/rejected": -0.7653772830963135, "step": 243 }, { "epoch": 0.32, "learning_rate": 4.995501561735176e-05, "logits/chosen": -2.5064899921417236, "logits/rejected": -2.667478561401367, "logps/chosen": -201.28167724609375, "logps/rejected": -200.6697998046875, "loss": 0.7252, "rewards/accuracies": 0.375, "rewards/chosen": -0.50804603099823, "rewards/margins": -0.021207518875598907, "rewards/rejected": -0.48683851957321167, "step": 244 }, { "epoch": 0.32, "learning_rate": 4.995438885558294e-05, "logits/chosen": -2.575934886932373, "logits/rejected": -2.5915310382843018, "logps/chosen": -241.2596893310547, "logps/rejected": -236.16864013671875, "loss": 0.6113, "rewards/accuracies": 0.625, "rewards/chosen": -0.41618824005126953, "rewards/margins": 0.21841469407081604, "rewards/rejected": -0.6346028447151184, "step": 245 }, { "epoch": 0.32, "learning_rate": 4.995375776165913e-05, "logits/chosen": -2.6145882606506348, "logits/rejected": -2.6391665935516357, "logps/chosen": -230.53524780273438, "logps/rejected": -228.5028076171875, "loss": 0.7431, "rewards/accuracies": 0.5, "rewards/chosen": -0.5892909169197083, "rewards/margins": -0.044729601591825485, "rewards/rejected": -0.5445613265037537, "step": 246 }, { "epoch": 0.32, "learning_rate": 4.995312233568989e-05, "logits/chosen": -2.279015302658081, "logits/rejected": -2.4627904891967773, "logps/chosen": -205.08226013183594, "logps/rejected": -245.180908203125, "loss": 0.6492, "rewards/accuracies": 0.5, "rewards/chosen": -0.37771591544151306, "rewards/margins": 0.17377406358718872, "rewards/rejected": -0.5514900088310242, "step": 247 }, { "epoch": 0.32, "learning_rate": 4.9952482577785545e-05, "logits/chosen": -2.4535770416259766, "logits/rejected": -2.5108795166015625, "logps/chosen": -186.94288635253906, "logps/rejected": -191.04786682128906, "loss": 0.7335, "rewards/accuracies": 0.5, "rewards/chosen": -0.6674529314041138, "rewards/margins": -0.03079444169998169, "rewards/rejected": -0.6366585493087769, "step": 248 }, { "epoch": 0.33, "learning_rate": 4.9951838488057134e-05, "logits/chosen": -2.2177467346191406, "logits/rejected": -2.3985226154327393, "logps/chosen": -151.24977111816406, "logps/rejected": -185.8676300048828, "loss": 0.6516, "rewards/accuracies": 0.625, "rewards/chosen": -0.3900182843208313, "rewards/margins": 0.13504652678966522, "rewards/rejected": -0.5250648260116577, "step": 249 }, { "epoch": 0.33, "learning_rate": 4.9951190066616495e-05, "logits/chosen": -2.42427396774292, "logits/rejected": -2.4583323001861572, "logps/chosen": -255.53150939941406, "logps/rejected": -261.932373046875, "loss": 0.6442, "rewards/accuracies": 0.5625, "rewards/chosen": -0.3976048231124878, "rewards/margins": 0.12972235679626465, "rewards/rejected": -0.5273271799087524, "step": 250 }, { "epoch": 0.33, "learning_rate": 4.995053731357618e-05, "logits/chosen": -2.462376832962036, "logits/rejected": -2.568171262741089, "logps/chosen": -197.1337432861328, "logps/rejected": -202.1964569091797, "loss": 0.6419, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6016325354576111, "rewards/margins": 0.15742458403110504, "rewards/rejected": -0.7590572237968445, "step": 251 }, { "epoch": 0.33, "learning_rate": 4.9949880229049526e-05, "logits/chosen": -2.4696521759033203, "logits/rejected": -2.4089298248291016, "logps/chosen": -187.97976684570312, "logps/rejected": -178.5655059814453, "loss": 0.7341, "rewards/accuracies": 0.5, "rewards/chosen": -0.43453776836395264, "rewards/margins": -0.025840837508440018, "rewards/rejected": -0.4086969494819641, "step": 252 }, { "epoch": 0.33, "learning_rate": 4.994921881315059e-05, "logits/chosen": -2.292558431625366, "logits/rejected": -2.199901580810547, "logps/chosen": -192.6279754638672, "logps/rejected": -203.34487915039062, "loss": 0.6863, "rewards/accuracies": 0.5625, "rewards/chosen": -0.48495134711265564, "rewards/margins": 0.07142490148544312, "rewards/rejected": -0.5563762784004211, "step": 253 }, { "epoch": 0.33, "learning_rate": 4.9948553065994197e-05, "logits/chosen": -2.231633424758911, "logits/rejected": -2.2734503746032715, "logps/chosen": -184.4136962890625, "logps/rejected": -194.4268341064453, "loss": 0.6465, "rewards/accuracies": 0.5, "rewards/chosen": -0.44051364064216614, "rewards/margins": 0.19739174842834473, "rewards/rejected": -0.6379053592681885, "step": 254 }, { "epoch": 0.33, "learning_rate": 4.994788298769593e-05, "logits/chosen": -2.5150344371795654, "logits/rejected": -2.4598844051361084, "logps/chosen": -238.438232421875, "logps/rejected": -217.13897705078125, "loss": 0.6945, "rewards/accuracies": 0.4375, "rewards/chosen": -0.43908870220184326, "rewards/margins": 0.03722059726715088, "rewards/rejected": -0.47630926966667175, "step": 255 }, { "epoch": 0.34, "learning_rate": 4.994720857837211e-05, "logits/chosen": -2.2310400009155273, "logits/rejected": -2.1882810592651367, "logps/chosen": -184.02981567382812, "logps/rejected": -208.0157470703125, "loss": 0.666, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5550325512886047, "rewards/margins": 0.08377734571695328, "rewards/rejected": -0.638809859752655, "step": 256 }, { "epoch": 0.34, "learning_rate": 4.994652983813982e-05, "logits/chosen": -2.4377846717834473, "logits/rejected": -2.5074000358581543, "logps/chosen": -187.0503387451172, "logps/rejected": -195.54049682617188, "loss": 0.5873, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5530596971511841, "rewards/margins": 0.2584923803806305, "rewards/rejected": -0.811552107334137, "step": 257 }, { "epoch": 0.34, "learning_rate": 4.994584676711689e-05, "logits/chosen": -2.464853286743164, "logits/rejected": -2.504178047180176, "logps/chosen": -186.5401611328125, "logps/rejected": -227.3590087890625, "loss": 0.7102, "rewards/accuracies": 0.625, "rewards/chosen": -0.5265690088272095, "rewards/margins": 0.009033482521772385, "rewards/rejected": -0.5356025695800781, "step": 258 }, { "epoch": 0.34, "learning_rate": 4.994515936542191e-05, "logits/chosen": -2.2838244438171387, "logits/rejected": -2.3256828784942627, "logps/chosen": -173.54270935058594, "logps/rejected": -166.37547302246094, "loss": 0.7262, "rewards/accuracies": 0.4375, "rewards/chosen": -0.4617432951927185, "rewards/margins": -0.03625304624438286, "rewards/rejected": -0.42549026012420654, "step": 259 }, { "epoch": 0.34, "learning_rate": 4.99444676331742e-05, "logits/chosen": -2.407790422439575, "logits/rejected": -2.3871612548828125, "logps/chosen": -248.3179931640625, "logps/rejected": -235.79884338378906, "loss": 0.6761, "rewards/accuracies": 0.5625, "rewards/chosen": -0.458143413066864, "rewards/margins": 0.10666979104280472, "rewards/rejected": -0.5648132562637329, "step": 260 }, { "epoch": 0.34, "learning_rate": 4.9943771570493856e-05, "logits/chosen": -2.560073137283325, "logits/rejected": -2.51522159576416, "logps/chosen": -191.05734252929688, "logps/rejected": -195.287841796875, "loss": 0.7288, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6246404051780701, "rewards/margins": -0.01798657327890396, "rewards/rejected": -0.6066538095474243, "step": 261 }, { "epoch": 0.34, "learning_rate": 4.9943071177501724e-05, "logits/chosen": -2.364337682723999, "logits/rejected": -2.39058780670166, "logps/chosen": -200.97915649414062, "logps/rejected": -183.07151794433594, "loss": 0.6839, "rewards/accuracies": 0.75, "rewards/chosen": -0.5552209615707397, "rewards/margins": 0.09874990582466125, "rewards/rejected": -0.6539708375930786, "step": 262 }, { "epoch": 0.34, "learning_rate": 4.994236645431938e-05, "logits/chosen": -2.6555843353271484, "logits/rejected": -2.7670748233795166, "logps/chosen": -244.5951690673828, "logps/rejected": -246.04124450683594, "loss": 0.688, "rewards/accuracies": 0.375, "rewards/chosen": -0.45607852935791016, "rewards/margins": 0.08103760331869125, "rewards/rejected": -0.5371161103248596, "step": 263 }, { "epoch": 0.35, "learning_rate": 4.994165740106918e-05, "logits/chosen": -2.527970314025879, "logits/rejected": -2.6416518688201904, "logps/chosen": -231.80966186523438, "logps/rejected": -246.3621826171875, "loss": 0.6086, "rewards/accuracies": 0.75, "rewards/chosen": -0.5444769263267517, "rewards/margins": 0.21878674626350403, "rewards/rejected": -0.7632635831832886, "step": 264 }, { "epoch": 0.35, "learning_rate": 4.99409440178742e-05, "logits/chosen": -2.584487199783325, "logits/rejected": -2.5451977252960205, "logps/chosen": -184.02330017089844, "logps/rejected": -187.47161865234375, "loss": 0.7151, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6932749152183533, "rewards/margins": -0.005857221782207489, "rewards/rejected": -0.6874176263809204, "step": 265 }, { "epoch": 0.35, "learning_rate": 4.9940226304858296e-05, "logits/chosen": -2.556795597076416, "logits/rejected": -2.5516929626464844, "logps/chosen": -187.25588989257812, "logps/rejected": -206.25067138671875, "loss": 0.7035, "rewards/accuracies": 0.5, "rewards/chosen": -0.563089907169342, "rewards/margins": 0.009414192289113998, "rewards/rejected": -0.5725040435791016, "step": 266 }, { "epoch": 0.35, "learning_rate": 4.993950426214606e-05, "logits/chosen": -2.3932271003723145, "logits/rejected": -2.393653154373169, "logps/chosen": -218.82334899902344, "logps/rejected": -226.3861541748047, "loss": 0.6841, "rewards/accuracies": 0.5, "rewards/chosen": -0.6887062788009644, "rewards/margins": 0.031964417546987534, "rewards/rejected": -0.720670759677887, "step": 267 }, { "epoch": 0.35, "learning_rate": 4.993877788986285e-05, "logits/chosen": -2.3766613006591797, "logits/rejected": -2.507246971130371, "logps/chosen": -165.4411163330078, "logps/rejected": -199.56741333007812, "loss": 0.575, "rewards/accuracies": 0.6875, "rewards/chosen": -0.49406641721725464, "rewards/margins": 0.3596181869506836, "rewards/rejected": -0.853684663772583, "step": 268 }, { "epoch": 0.35, "learning_rate": 4.9938047188134776e-05, "logits/chosen": -2.4775924682617188, "logits/rejected": -2.4857354164123535, "logps/chosen": -195.7847442626953, "logps/rejected": -216.16220092773438, "loss": 0.6795, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5620791912078857, "rewards/margins": 0.07040975242853165, "rewards/rejected": -0.632489025592804, "step": 269 }, { "epoch": 0.35, "learning_rate": 4.993731215708866e-05, "logits/chosen": -2.637573719024658, "logits/rejected": -2.700226306915283, "logps/chosen": -200.697509765625, "logps/rejected": -214.4364471435547, "loss": 0.6027, "rewards/accuracies": 0.625, "rewards/chosen": -0.5127323269844055, "rewards/margins": 0.24725483357906342, "rewards/rejected": -0.7599871754646301, "step": 270 }, { "epoch": 0.35, "learning_rate": 4.993657279685212e-05, "logits/chosen": -2.4737141132354736, "logits/rejected": -2.576711654663086, "logps/chosen": -198.31393432617188, "logps/rejected": -219.9713592529297, "loss": 0.7162, "rewards/accuracies": 0.5, "rewards/chosen": -0.7182321548461914, "rewards/margins": 0.04057084769010544, "rewards/rejected": -0.7588030099868774, "step": 271 }, { "epoch": 0.36, "learning_rate": 4.9935829107553516e-05, "logits/chosen": -2.3163719177246094, "logits/rejected": -2.3124592304229736, "logps/chosen": -198.31076049804688, "logps/rejected": -210.8530731201172, "loss": 0.6581, "rewards/accuracies": 0.5, "rewards/chosen": -0.5568541288375854, "rewards/margins": 0.1273648738861084, "rewards/rejected": -0.6842190623283386, "step": 272 }, { "epoch": 0.36, "learning_rate": 4.993508108932195e-05, "logits/chosen": -2.4512782096862793, "logits/rejected": -2.508169651031494, "logps/chosen": -202.17613220214844, "logps/rejected": -203.27789306640625, "loss": 0.6612, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7900542616844177, "rewards/margins": 0.12749117612838745, "rewards/rejected": -0.91754549741745, "step": 273 }, { "epoch": 0.36, "learning_rate": 4.9934328742287285e-05, "logits/chosen": -2.391414165496826, "logits/rejected": -2.3899242877960205, "logps/chosen": -231.99913024902344, "logps/rejected": -232.77243041992188, "loss": 0.7863, "rewards/accuracies": 0.375, "rewards/chosen": -0.6857910752296448, "rewards/margins": -0.05944066494703293, "rewards/rejected": -0.626350462436676, "step": 274 }, { "epoch": 0.36, "learning_rate": 4.993357206658011e-05, "logits/chosen": -2.5768861770629883, "logits/rejected": -2.6738839149475098, "logps/chosen": -175.33010864257812, "logps/rejected": -185.26443481445312, "loss": 0.7157, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6001064777374268, "rewards/margins": 0.052334412932395935, "rewards/rejected": -0.6524409651756287, "step": 275 }, { "epoch": 0.36, "learning_rate": 4.993281106233182e-05, "logits/chosen": -2.3842387199401855, "logits/rejected": -2.425611972808838, "logps/chosen": -165.09873962402344, "logps/rejected": -185.34361267089844, "loss": 0.5746, "rewards/accuracies": 0.75, "rewards/chosen": -0.4550285339355469, "rewards/margins": 0.2833191156387329, "rewards/rejected": -0.738347589969635, "step": 276 }, { "epoch": 0.36, "learning_rate": 4.9932045729674505e-05, "logits/chosen": -2.370333671569824, "logits/rejected": -2.343536376953125, "logps/chosen": -211.58517456054688, "logps/rejected": -191.9432373046875, "loss": 0.7727, "rewards/accuracies": 0.625, "rewards/chosen": -0.7500544786453247, "rewards/margins": -0.05222831293940544, "rewards/rejected": -0.697826087474823, "step": 277 }, { "epoch": 0.36, "learning_rate": 4.993127606874104e-05, "logits/chosen": -2.3698928356170654, "logits/rejected": -2.459376573562622, "logps/chosen": -241.92343139648438, "logps/rejected": -265.6181945800781, "loss": 0.6437, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6877275705337524, "rewards/margins": 0.16285517811775208, "rewards/rejected": -0.8505828380584717, "step": 278 }, { "epoch": 0.37, "learning_rate": 4.9930502079665025e-05, "logits/chosen": -2.3997507095336914, "logits/rejected": -2.4078052043914795, "logps/chosen": -228.6896514892578, "logps/rejected": -221.53338623046875, "loss": 0.7984, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6329948306083679, "rewards/margins": -0.1108212098479271, "rewards/rejected": -0.5221735835075378, "step": 279 }, { "epoch": 0.37, "learning_rate": 4.9929723762580835e-05, "logits/chosen": -2.498244285583496, "logits/rejected": -2.571398973464966, "logps/chosen": -192.15635681152344, "logps/rejected": -176.19012451171875, "loss": 0.7497, "rewards/accuracies": 0.5, "rewards/chosen": -0.5479970574378967, "rewards/margins": -0.0442587286233902, "rewards/rejected": -0.5037383437156677, "step": 280 }, { "epoch": 0.37, "learning_rate": 4.9928941117623604e-05, "logits/chosen": -2.6329376697540283, "logits/rejected": -2.635847806930542, "logps/chosen": -207.86293029785156, "logps/rejected": -224.18490600585938, "loss": 0.6649, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5313466787338257, "rewards/margins": 0.07021217793226242, "rewards/rejected": -0.6015588045120239, "step": 281 }, { "epoch": 0.37, "learning_rate": 4.9928154144929175e-05, "logits/chosen": -2.5966103076934814, "logits/rejected": -2.571523666381836, "logps/chosen": -192.47384643554688, "logps/rejected": -180.02134704589844, "loss": 0.8799, "rewards/accuracies": 0.4375, "rewards/chosen": -0.8716793656349182, "rewards/margins": -0.2290811538696289, "rewards/rejected": -0.6425981521606445, "step": 282 }, { "epoch": 0.37, "learning_rate": 4.9927362844634186e-05, "logits/chosen": -2.6513888835906982, "logits/rejected": -2.606253147125244, "logps/chosen": -208.56338500976562, "logps/rejected": -200.686279296875, "loss": 0.7609, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5643545389175415, "rewards/margins": -0.08666092902421951, "rewards/rejected": -0.4776935577392578, "step": 283 }, { "epoch": 0.37, "learning_rate": 4.9926567216876e-05, "logits/chosen": -2.5372819900512695, "logits/rejected": -2.5824756622314453, "logps/chosen": -187.18386840820312, "logps/rejected": -211.16427612304688, "loss": 0.6838, "rewards/accuracies": 0.625, "rewards/chosen": -0.5520645380020142, "rewards/margins": 0.10136236250400543, "rewards/rejected": -0.6534268856048584, "step": 284 }, { "epoch": 0.37, "learning_rate": 4.992576726179274e-05, "logits/chosen": -2.5787792205810547, "logits/rejected": -2.5688636302948, "logps/chosen": -199.59490966796875, "logps/rejected": -221.02023315429688, "loss": 0.7033, "rewards/accuracies": 0.5, "rewards/chosen": -0.5855346918106079, "rewards/margins": 0.010282933712005615, "rewards/rejected": -0.5958175659179688, "step": 285 }, { "epoch": 0.37, "learning_rate": 4.9924962979523296e-05, "logits/chosen": -2.588792085647583, "logits/rejected": -2.5663416385650635, "logps/chosen": -197.04592895507812, "logps/rejected": -184.49290466308594, "loss": 0.6537, "rewards/accuracies": 0.4375, "rewards/chosen": -0.46529656648635864, "rewards/margins": 0.1208263412117958, "rewards/rejected": -0.5861228704452515, "step": 286 }, { "epoch": 0.38, "learning_rate": 4.992415437020727e-05, "logits/chosen": -2.6553092002868652, "logits/rejected": -2.763751268386841, "logps/chosen": -217.5886993408203, "logps/rejected": -233.33489990234375, "loss": 0.6027, "rewards/accuracies": 0.6875, "rewards/chosen": -0.4947764575481415, "rewards/margins": 0.2827147841453552, "rewards/rejected": -0.7774912118911743, "step": 287 }, { "epoch": 0.38, "learning_rate": 4.992334143398506e-05, "logits/chosen": -2.5335421562194824, "logits/rejected": -2.600069999694824, "logps/chosen": -204.98912048339844, "logps/rejected": -212.0712890625, "loss": 0.6079, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5413306355476379, "rewards/margins": 0.22219431400299072, "rewards/rejected": -0.7635249495506287, "step": 288 }, { "epoch": 0.38, "learning_rate": 4.992252417099778e-05, "logits/chosen": -2.638511896133423, "logits/rejected": -2.600456476211548, "logps/chosen": -172.59390258789062, "logps/rejected": -192.1689910888672, "loss": 0.7194, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5202851891517639, "rewards/margins": 0.017805464565753937, "rewards/rejected": -0.5380906462669373, "step": 289 }, { "epoch": 0.38, "learning_rate": 4.992170258138732e-05, "logits/chosen": -2.5361521244049072, "logits/rejected": -2.4191856384277344, "logps/chosen": -177.23558044433594, "logps/rejected": -169.40321350097656, "loss": 0.6089, "rewards/accuracies": 0.75, "rewards/chosen": -0.20310896635055542, "rewards/margins": 0.2397383451461792, "rewards/rejected": -0.4428473711013794, "step": 290 }, { "epoch": 0.38, "learning_rate": 4.99208766652963e-05, "logits/chosen": -2.727092742919922, "logits/rejected": -2.613562822341919, "logps/chosen": -228.9248504638672, "logps/rejected": -222.22886657714844, "loss": 0.6345, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5637404322624207, "rewards/margins": 0.21417354047298431, "rewards/rejected": -0.7779139280319214, "step": 291 }, { "epoch": 0.38, "learning_rate": 4.99200464228681e-05, "logits/chosen": -2.530290126800537, "logits/rejected": -2.49137282371521, "logps/chosen": -218.17156982421875, "logps/rejected": -225.41952514648438, "loss": 0.7465, "rewards/accuracies": 0.5, "rewards/chosen": -0.567755937576294, "rewards/margins": -0.07816261053085327, "rewards/rejected": -0.4895933270454407, "step": 292 }, { "epoch": 0.38, "learning_rate": 4.9919211854246874e-05, "logits/chosen": -2.683086633682251, "logits/rejected": -2.588696002960205, "logps/chosen": -268.5683898925781, "logps/rejected": -255.58445739746094, "loss": 0.7069, "rewards/accuracies": 0.5, "rewards/chosen": -0.5820844769477844, "rewards/margins": 0.0005566291511058807, "rewards/rejected": -0.582641065120697, "step": 293 }, { "epoch": 0.38, "learning_rate": 4.9918372959577486e-05, "logits/chosen": -2.7265396118164062, "logits/rejected": -2.7635843753814697, "logps/chosen": -206.09573364257812, "logps/rejected": -214.75784301757812, "loss": 0.6975, "rewards/accuracies": 0.625, "rewards/chosen": -0.6429769396781921, "rewards/margins": 0.1274566352367401, "rewards/rejected": -0.7704335451126099, "step": 294 }, { "epoch": 0.39, "learning_rate": 4.9917529739005574e-05, "logits/chosen": -2.507643938064575, "logits/rejected": -2.4392926692962646, "logps/chosen": -219.877197265625, "logps/rejected": -217.41537475585938, "loss": 0.729, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5904738306999207, "rewards/margins": -0.04813346266746521, "rewards/rejected": -0.5423403382301331, "step": 295 }, { "epoch": 0.39, "learning_rate": 4.991668219267752e-05, "logits/chosen": -2.613016128540039, "logits/rejected": -2.7239303588867188, "logps/chosen": -235.21534729003906, "logps/rejected": -266.6222839355469, "loss": 0.7173, "rewards/accuracies": 0.5, "rewards/chosen": -0.6593362092971802, "rewards/margins": 0.04644089192152023, "rewards/rejected": -0.7057771682739258, "step": 296 }, { "epoch": 0.39, "learning_rate": 4.991583032074047e-05, "logits/chosen": -2.774909257888794, "logits/rejected": -2.7659873962402344, "logps/chosen": -250.04261779785156, "logps/rejected": -242.6505584716797, "loss": 0.7062, "rewards/accuracies": 0.5, "rewards/chosen": -0.6024019718170166, "rewards/margins": 0.010418819263577461, "rewards/rejected": -0.6128207445144653, "step": 297 }, { "epoch": 0.39, "learning_rate": 4.99149741233423e-05, "logits/chosen": -2.447873592376709, "logits/rejected": -2.468458414077759, "logps/chosen": -214.55784606933594, "logps/rejected": -189.22238159179688, "loss": 0.6822, "rewards/accuracies": 0.5, "rewards/chosen": -0.6992676854133606, "rewards/margins": 0.09424494951963425, "rewards/rejected": -0.7935126423835754, "step": 298 }, { "epoch": 0.39, "learning_rate": 4.9914113600631665e-05, "logits/chosen": -2.5641584396362305, "logits/rejected": -2.602008104324341, "logps/chosen": -310.73077392578125, "logps/rejected": -290.17352294921875, "loss": 0.6765, "rewards/accuracies": 0.625, "rewards/chosen": -0.5222407579421997, "rewards/margins": 0.06505537033081055, "rewards/rejected": -0.587296187877655, "step": 299 }, { "epoch": 0.39, "learning_rate": 4.991324875275794e-05, "logits/chosen": -2.5343639850616455, "logits/rejected": -2.610719919204712, "logps/chosen": -199.84573364257812, "logps/rejected": -203.96478271484375, "loss": 0.718, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5667111873626709, "rewards/margins": -0.001658361405134201, "rewards/rejected": -0.5650528073310852, "step": 300 }, { "epoch": 0.39, "learning_rate": 4.991237957987127e-05, "logits/chosen": -2.379605293273926, "logits/rejected": -2.3678669929504395, "logps/chosen": -211.86761474609375, "logps/rejected": -213.41464233398438, "loss": 0.6325, "rewards/accuracies": 0.5625, "rewards/chosen": -0.46346691250801086, "rewards/margins": 0.20269399881362915, "rewards/rejected": -0.6661609411239624, "step": 301 }, { "epoch": 0.4, "learning_rate": 4.991150608212254e-05, "logits/chosen": -2.6828835010528564, "logits/rejected": -2.7126364707946777, "logps/chosen": -196.61216735839844, "logps/rejected": -223.1612548828125, "loss": 0.7503, "rewards/accuracies": 0.5, "rewards/chosen": -0.5514590740203857, "rewards/margins": -0.08642277121543884, "rewards/rejected": -0.4650362730026245, "step": 302 }, { "epoch": 0.4, "learning_rate": 4.9910628259663404e-05, "logits/chosen": -2.4960732460021973, "logits/rejected": -2.608139991760254, "logps/chosen": -197.00277709960938, "logps/rejected": -196.33804321289062, "loss": 0.564, "rewards/accuracies": 0.75, "rewards/chosen": -0.35482868552207947, "rewards/margins": 0.38187745213508606, "rewards/rejected": -0.7367061376571655, "step": 303 }, { "epoch": 0.4, "learning_rate": 4.990974611264625e-05, "logits/chosen": -2.3627309799194336, "logits/rejected": -2.3418447971343994, "logps/chosen": -232.52740478515625, "logps/rejected": -227.15753173828125, "loss": 0.6344, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6158957481384277, "rewards/margins": 0.18052013218402863, "rewards/rejected": -0.7964158654212952, "step": 304 }, { "epoch": 0.4, "learning_rate": 4.990885964122421e-05, "logits/chosen": -2.594547748565674, "logits/rejected": -2.6074790954589844, "logps/chosen": -170.09300231933594, "logps/rejected": -190.0100860595703, "loss": 0.7467, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5137202739715576, "rewards/margins": -0.07406175136566162, "rewards/rejected": -0.439658522605896, "step": 305 }, { "epoch": 0.4, "learning_rate": 4.990796884555119e-05, "logits/chosen": -2.632927417755127, "logits/rejected": -2.6694023609161377, "logps/chosen": -215.45489501953125, "logps/rejected": -203.8738555908203, "loss": 0.6641, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4411565661430359, "rewards/margins": 0.1269129067659378, "rewards/rejected": -0.5680694580078125, "step": 306 }, { "epoch": 0.4, "learning_rate": 4.9907073725781836e-05, "logits/chosen": -2.4353699684143066, "logits/rejected": -2.537163734436035, "logps/chosen": -279.5519714355469, "logps/rejected": -290.08135986328125, "loss": 0.6756, "rewards/accuracies": 0.5, "rewards/chosen": -0.32637274265289307, "rewards/margins": 0.053382109850645065, "rewards/rejected": -0.379754900932312, "step": 307 }, { "epoch": 0.4, "learning_rate": 4.9906174282071535e-05, "logits/chosen": -2.586729049682617, "logits/rejected": -2.583314895629883, "logps/chosen": -197.04298400878906, "logps/rejected": -195.334228515625, "loss": 0.6718, "rewards/accuracies": 0.625, "rewards/chosen": -0.515608549118042, "rewards/margins": 0.06315431743860245, "rewards/rejected": -0.5787628293037415, "step": 308 }, { "epoch": 0.4, "learning_rate": 4.990527051457644e-05, "logits/chosen": -2.45725154876709, "logits/rejected": -2.445600748062134, "logps/chosen": -204.52867126464844, "logps/rejected": -290.3397216796875, "loss": 0.6908, "rewards/accuracies": 0.5, "rewards/chosen": -0.6991719007492065, "rewards/margins": 0.08316923677921295, "rewards/rejected": -0.7823411226272583, "step": 309 }, { "epoch": 0.41, "learning_rate": 4.9904362423453446e-05, "logits/chosen": -2.604722738265991, "logits/rejected": -2.6120543479919434, "logps/chosen": -193.1224822998047, "logps/rejected": -189.89694213867188, "loss": 0.6076, "rewards/accuracies": 0.75, "rewards/chosen": -0.3250601589679718, "rewards/margins": 0.19382469356060028, "rewards/rejected": -0.5188848376274109, "step": 310 }, { "epoch": 0.41, "learning_rate": 4.990345000886019e-05, "logits/chosen": -2.6838831901550293, "logits/rejected": -2.6718058586120605, "logps/chosen": -177.02090454101562, "logps/rejected": -173.667724609375, "loss": 0.6993, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5827906131744385, "rewards/margins": 0.027324199676513672, "rewards/rejected": -0.6101148128509521, "step": 311 }, { "epoch": 0.41, "learning_rate": 4.990253327095509e-05, "logits/chosen": -2.60197377204895, "logits/rejected": -2.522291660308838, "logps/chosen": -173.03811645507812, "logps/rejected": -149.1508026123047, "loss": 0.7939, "rewards/accuracies": 0.375, "rewards/chosen": -0.4563007950782776, "rewards/margins": -0.13545696437358856, "rewards/rejected": -0.32084381580352783, "step": 312 }, { "epoch": 0.41, "learning_rate": 4.9901612209897275e-05, "logits/chosen": -2.643686532974243, "logits/rejected": -2.6443119049072266, "logps/chosen": -187.93931579589844, "logps/rejected": -208.4308624267578, "loss": 0.6686, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6419711709022522, "rewards/margins": 0.10658866912126541, "rewards/rejected": -0.748559832572937, "step": 313 }, { "epoch": 0.41, "learning_rate": 4.990068682584666e-05, "logits/chosen": -2.469681739807129, "logits/rejected": -2.4439382553100586, "logps/chosen": -186.7505340576172, "logps/rejected": -204.6378936767578, "loss": 0.7857, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5965012311935425, "rewards/margins": -0.12374071031808853, "rewards/rejected": -0.47276046872138977, "step": 314 }, { "epoch": 0.41, "learning_rate": 4.989975711896388e-05, "logits/chosen": -2.3346047401428223, "logits/rejected": -2.323143720626831, "logps/chosen": -223.70867919921875, "logps/rejected": -238.58311462402344, "loss": 0.7061, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5982979536056519, "rewards/margins": -0.005420597270131111, "rewards/rejected": -0.5928773283958435, "step": 315 }, { "epoch": 0.41, "learning_rate": 4.989882308941034e-05, "logits/chosen": -2.759549379348755, "logits/rejected": -2.7803878784179688, "logps/chosen": -239.48947143554688, "logps/rejected": -181.70718383789062, "loss": 0.6671, "rewards/accuracies": 0.5625, "rewards/chosen": -0.46423962712287903, "rewards/margins": 0.11677178740501404, "rewards/rejected": -0.5810113549232483, "step": 316 }, { "epoch": 0.41, "learning_rate": 4.9897884737348196e-05, "logits/chosen": -2.485264778137207, "logits/rejected": -2.5983753204345703, "logps/chosen": -175.43971252441406, "logps/rejected": -219.08517456054688, "loss": 0.7124, "rewards/accuracies": 0.5, "rewards/chosen": -0.5487624406814575, "rewards/margins": 0.0002348199486732483, "rewards/rejected": -0.5489972829818726, "step": 317 }, { "epoch": 0.42, "learning_rate": 4.989694206294035e-05, "logits/chosen": -2.6732518672943115, "logits/rejected": -2.6291115283966064, "logps/chosen": -193.59197998046875, "logps/rejected": -207.9779052734375, "loss": 0.6528, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4739716649055481, "rewards/margins": 0.12173515558242798, "rewards/rejected": -0.5957068204879761, "step": 318 }, { "epoch": 0.42, "learning_rate": 4.989599506635044e-05, "logits/chosen": -2.4193854331970215, "logits/rejected": -2.565293073654175, "logps/chosen": -263.6156311035156, "logps/rejected": -251.44192504882812, "loss": 0.6307, "rewards/accuracies": 0.625, "rewards/chosen": -0.47510460019111633, "rewards/margins": 0.25979098677635193, "rewards/rejected": -0.734895646572113, "step": 319 }, { "epoch": 0.42, "learning_rate": 4.989504374774288e-05, "logits/chosen": -2.3681142330169678, "logits/rejected": -2.4691953659057617, "logps/chosen": -163.5777587890625, "logps/rejected": -194.8612060546875, "loss": 0.6185, "rewards/accuracies": 0.625, "rewards/chosen": -0.4559204876422882, "rewards/margins": 0.19214320182800293, "rewards/rejected": -0.648063600063324, "step": 320 }, { "epoch": 0.42, "learning_rate": 4.989408810728281e-05, "logits/chosen": -2.477172613143921, "logits/rejected": -2.5207676887512207, "logps/chosen": -141.12867736816406, "logps/rejected": -146.79791259765625, "loss": 0.657, "rewards/accuracies": 0.5625, "rewards/chosen": -0.22462037205696106, "rewards/margins": 0.09705987572669983, "rewards/rejected": -0.3216802775859833, "step": 321 }, { "epoch": 0.42, "learning_rate": 4.989312814513614e-05, "logits/chosen": -2.534242630004883, "logits/rejected": -2.47320294380188, "logps/chosen": -223.2980499267578, "logps/rejected": -224.9616241455078, "loss": 0.7372, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5472955107688904, "rewards/margins": -0.037168219685554504, "rewards/rejected": -0.5101273059844971, "step": 322 }, { "epoch": 0.42, "learning_rate": 4.989216386146953e-05, "logits/chosen": -2.3438358306884766, "logits/rejected": -2.623857021331787, "logps/chosen": -190.59585571289062, "logps/rejected": -234.1730194091797, "loss": 0.6873, "rewards/accuracies": 0.625, "rewards/chosen": -0.3276049494743347, "rewards/margins": 0.06552110612392426, "rewards/rejected": -0.39312607049942017, "step": 323 }, { "epoch": 0.42, "learning_rate": 4.9891195256450366e-05, "logits/chosen": -2.210477828979492, "logits/rejected": -2.195955991744995, "logps/chosen": -185.43902587890625, "logps/rejected": -224.94488525390625, "loss": 0.678, "rewards/accuracies": 0.625, "rewards/chosen": -0.38528165221214294, "rewards/margins": 0.09291733056306839, "rewards/rejected": -0.47819897532463074, "step": 324 }, { "epoch": 0.43, "learning_rate": 4.989022233024681e-05, "logits/chosen": -2.7818360328674316, "logits/rejected": -2.764118194580078, "logps/chosen": -224.84347534179688, "logps/rejected": -205.856201171875, "loss": 0.6028, "rewards/accuracies": 0.75, "rewards/chosen": -0.3358863592147827, "rewards/margins": 0.23018960654735565, "rewards/rejected": -0.5660759806632996, "step": 325 }, { "epoch": 0.43, "learning_rate": 4.9889245083027755e-05, "logits/chosen": -2.3800809383392334, "logits/rejected": -2.3793954849243164, "logps/chosen": -168.0996551513672, "logps/rejected": -197.4810791015625, "loss": 0.7001, "rewards/accuracies": 0.5625, "rewards/chosen": -0.36885035037994385, "rewards/margins": 0.015535619109869003, "rewards/rejected": -0.3843860328197479, "step": 326 }, { "epoch": 0.43, "learning_rate": 4.988826351496287e-05, "logits/chosen": -2.5323662757873535, "logits/rejected": -2.5999341011047363, "logps/chosen": -215.6177978515625, "logps/rejected": -255.65231323242188, "loss": 0.6679, "rewards/accuracies": 0.5, "rewards/chosen": -0.6092446446418762, "rewards/margins": 0.09709976613521576, "rewards/rejected": -0.7063443660736084, "step": 327 }, { "epoch": 0.43, "learning_rate": 4.988727762622255e-05, "logits/chosen": -2.7337067127227783, "logits/rejected": -2.742793083190918, "logps/chosen": -214.25506591796875, "logps/rejected": -221.33468627929688, "loss": 0.6614, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6302060484886169, "rewards/margins": 0.12371520698070526, "rewards/rejected": -0.7539212107658386, "step": 328 }, { "epoch": 0.43, "learning_rate": 4.9886287416977936e-05, "logits/chosen": -2.510671377182007, "logits/rejected": -2.5760793685913086, "logps/chosen": -169.99656677246094, "logps/rejected": -181.1496124267578, "loss": 0.6957, "rewards/accuracies": 0.6875, "rewards/chosen": -0.4545098841190338, "rewards/margins": 0.014530147425830364, "rewards/rejected": -0.46904003620147705, "step": 329 }, { "epoch": 0.43, "learning_rate": 4.988529288740096e-05, "logits/chosen": -2.6438777446746826, "logits/rejected": -2.6174240112304688, "logps/chosen": -187.69232177734375, "logps/rejected": -198.16409301757812, "loss": 0.6247, "rewards/accuracies": 0.5625, "rewards/chosen": -0.37409377098083496, "rewards/margins": 0.1910334676504135, "rewards/rejected": -0.5651272535324097, "step": 330 }, { "epoch": 0.43, "learning_rate": 4.9884294037664245e-05, "logits/chosen": -2.456332206726074, "logits/rejected": -2.4327914714813232, "logps/chosen": -221.3645782470703, "logps/rejected": -220.4224853515625, "loss": 0.726, "rewards/accuracies": 0.5625, "rewards/chosen": -0.9011685848236084, "rewards/margins": 0.07650195062160492, "rewards/rejected": -0.9776705503463745, "step": 331 }, { "epoch": 0.43, "learning_rate": 4.988329086794122e-05, "logits/chosen": -2.463491439819336, "logits/rejected": -2.49444842338562, "logps/chosen": -197.96144104003906, "logps/rejected": -214.404296875, "loss": 0.6547, "rewards/accuracies": 0.5625, "rewards/chosen": -0.42488348484039307, "rewards/margins": 0.11507010459899902, "rewards/rejected": -0.5399536490440369, "step": 332 }, { "epoch": 0.44, "learning_rate": 4.9882283378406015e-05, "logits/chosen": -2.5106682777404785, "logits/rejected": -2.5157594680786133, "logps/chosen": -167.09947204589844, "logps/rejected": -174.6180419921875, "loss": 0.6179, "rewards/accuracies": 0.6875, "rewards/chosen": -0.4473687410354614, "rewards/margins": 0.22422267496585846, "rewards/rejected": -0.6715914011001587, "step": 333 }, { "epoch": 0.44, "learning_rate": 4.988127156923355e-05, "logits/chosen": -2.4497499465942383, "logits/rejected": -2.491909980773926, "logps/chosen": -167.1006622314453, "logps/rejected": -165.177978515625, "loss": 0.6855, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5522390604019165, "rewards/margins": 0.06803999096155167, "rewards/rejected": -0.6202789545059204, "step": 334 }, { "epoch": 0.44, "learning_rate": 4.9880255440599476e-05, "logits/chosen": -2.445422649383545, "logits/rejected": -2.4025139808654785, "logps/chosen": -199.83444213867188, "logps/rejected": -203.16355895996094, "loss": 0.7404, "rewards/accuracies": 0.375, "rewards/chosen": -0.658203125, "rewards/margins": -0.06064491346478462, "rewards/rejected": -0.5975580811500549, "step": 335 }, { "epoch": 0.44, "learning_rate": 4.987923499268018e-05, "logits/chosen": -2.540513753890991, "logits/rejected": -2.6426820755004883, "logps/chosen": -181.93406677246094, "logps/rejected": -203.6474609375, "loss": 0.6465, "rewards/accuracies": 0.625, "rewards/chosen": -0.45222824811935425, "rewards/margins": 0.17588049173355103, "rewards/rejected": -0.6281087398529053, "step": 336 }, { "epoch": 0.44, "learning_rate": 4.987821022565284e-05, "logits/chosen": -2.608978509902954, "logits/rejected": -2.632282257080078, "logps/chosen": -195.6852569580078, "logps/rejected": -216.0919952392578, "loss": 0.6137, "rewards/accuracies": 0.625, "rewards/chosen": -0.4888995289802551, "rewards/margins": 0.3496631681919098, "rewards/rejected": -0.8385627269744873, "step": 337 }, { "epoch": 0.44, "learning_rate": 4.987718113969534e-05, "logits/chosen": -2.637908458709717, "logits/rejected": -2.6849193572998047, "logps/chosen": -307.9834289550781, "logps/rejected": -321.95867919921875, "loss": 0.5909, "rewards/accuracies": 0.75, "rewards/chosen": -0.4099658131599426, "rewards/margins": 0.3652358055114746, "rewards/rejected": -0.7752016186714172, "step": 338 }, { "epoch": 0.44, "learning_rate": 4.9876147734986335e-05, "logits/chosen": -2.6805038452148438, "logits/rejected": -2.659087657928467, "logps/chosen": -235.20855712890625, "logps/rejected": -222.38107299804688, "loss": 0.7633, "rewards/accuracies": 0.5, "rewards/chosen": -0.7779167294502258, "rewards/margins": -0.11177889257669449, "rewards/rejected": -0.6661379337310791, "step": 339 }, { "epoch": 0.44, "learning_rate": 4.987511001170523e-05, "logits/chosen": -2.4979588985443115, "logits/rejected": -2.509103775024414, "logps/chosen": -205.0665283203125, "logps/rejected": -197.5661163330078, "loss": 0.584, "rewards/accuracies": 0.5625, "rewards/chosen": -0.603629469871521, "rewards/margins": 0.3201472759246826, "rewards/rejected": -0.9237766861915588, "step": 340 }, { "epoch": 0.45, "learning_rate": 4.987406797003218e-05, "logits/chosen": -2.6034300327301025, "logits/rejected": -2.606010913848877, "logps/chosen": -212.59959411621094, "logps/rejected": -266.6811218261719, "loss": 0.7099, "rewards/accuracies": 0.4375, "rewards/chosen": -0.7883586287498474, "rewards/margins": 0.047954261302948, "rewards/rejected": -0.8363128900527954, "step": 341 }, { "epoch": 0.45, "learning_rate": 4.987302161014808e-05, "logits/chosen": -2.26737380027771, "logits/rejected": -2.2702267169952393, "logps/chosen": -149.69259643554688, "logps/rejected": -207.59146118164062, "loss": 0.718, "rewards/accuracies": 0.5, "rewards/chosen": -0.6731799244880676, "rewards/margins": 0.02036801353096962, "rewards/rejected": -0.6935478448867798, "step": 342 }, { "epoch": 0.45, "learning_rate": 4.9871970932234586e-05, "logits/chosen": -2.3482635021209717, "logits/rejected": -2.363096237182617, "logps/chosen": -196.5614013671875, "logps/rejected": -206.21414184570312, "loss": 0.6163, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5754539966583252, "rewards/margins": 0.2127307951450348, "rewards/rejected": -0.7881847620010376, "step": 343 }, { "epoch": 0.45, "learning_rate": 4.9870915936474095e-05, "logits/chosen": -2.7459514141082764, "logits/rejected": -2.7396438121795654, "logps/chosen": -206.6090087890625, "logps/rejected": -211.6893768310547, "loss": 0.6763, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7424849271774292, "rewards/margins": 0.08226263523101807, "rewards/rejected": -0.8247475624084473, "step": 344 }, { "epoch": 0.45, "learning_rate": 4.986985662304976e-05, "logits/chosen": -2.769526243209839, "logits/rejected": -2.807554244995117, "logps/chosen": -184.0075225830078, "logps/rejected": -225.2920379638672, "loss": 0.6217, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7790610194206238, "rewards/margins": 0.24687500298023224, "rewards/rejected": -1.0259360074996948, "step": 345 }, { "epoch": 0.45, "learning_rate": 4.9868792992145484e-05, "logits/chosen": -2.6607320308685303, "logits/rejected": -2.6989316940307617, "logps/chosen": -218.40530395507812, "logps/rejected": -227.24240112304688, "loss": 0.7374, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6406981945037842, "rewards/margins": 0.009773064404726028, "rewards/rejected": -0.6504712104797363, "step": 346 }, { "epoch": 0.45, "learning_rate": 4.9867725043945904e-05, "logits/chosen": -2.5469489097595215, "logits/rejected": -2.539520740509033, "logps/chosen": -222.92864990234375, "logps/rejected": -236.92977905273438, "loss": 0.6992, "rewards/accuracies": 0.625, "rewards/chosen": -0.640974223613739, "rewards/margins": 0.09771876782178879, "rewards/rejected": -0.7386929988861084, "step": 347 }, { "epoch": 0.46, "learning_rate": 4.9866652778636436e-05, "logits/chosen": -2.545572280883789, "logits/rejected": -2.541029453277588, "logps/chosen": -182.341552734375, "logps/rejected": -197.535888671875, "loss": 0.7149, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7396100163459778, "rewards/margins": 0.06020259112119675, "rewards/rejected": -0.7998126149177551, "step": 348 }, { "epoch": 0.46, "learning_rate": 4.986557619640322e-05, "logits/chosen": -2.479189157485962, "logits/rejected": -2.504956007003784, "logps/chosen": -184.10472106933594, "logps/rejected": -184.99070739746094, "loss": 0.5709, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6914107799530029, "rewards/margins": 0.33003267645835876, "rewards/rejected": -1.021443486213684, "step": 349 }, { "epoch": 0.46, "learning_rate": 4.986449529743314e-05, "logits/chosen": -2.7267708778381348, "logits/rejected": -2.711848735809326, "logps/chosen": -225.98228454589844, "logps/rejected": -204.31724548339844, "loss": 0.7177, "rewards/accuracies": 0.4375, "rewards/chosen": -0.8359745740890503, "rewards/margins": 0.011185385286808014, "rewards/rejected": -0.8471599221229553, "step": 350 }, { "epoch": 0.46, "learning_rate": 4.9863410081913875e-05, "logits/chosen": -2.691619396209717, "logits/rejected": -2.669311285018921, "logps/chosen": -157.73721313476562, "logps/rejected": -161.97061157226562, "loss": 0.7285, "rewards/accuracies": 0.375, "rewards/chosen": -0.5839475393295288, "rewards/margins": -0.027979355305433273, "rewards/rejected": -0.555968165397644, "step": 351 }, { "epoch": 0.46, "learning_rate": 4.98623205500338e-05, "logits/chosen": -2.6664159297943115, "logits/rejected": -2.653743267059326, "logps/chosen": -155.03836059570312, "logps/rejected": -141.4946746826172, "loss": 0.7569, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5298622250556946, "rewards/margins": -0.08286149799823761, "rewards/rejected": -0.44700077176094055, "step": 352 }, { "epoch": 0.46, "learning_rate": 4.986122670198205e-05, "logits/chosen": -2.588876485824585, "logits/rejected": -2.706108808517456, "logps/chosen": -170.23751831054688, "logps/rejected": -172.2996826171875, "loss": 0.8284, "rewards/accuracies": 0.375, "rewards/chosen": -0.9628427624702454, "rewards/margins": -0.18341386318206787, "rewards/rejected": -0.779429018497467, "step": 353 }, { "epoch": 0.46, "learning_rate": 4.9860128537948555e-05, "logits/chosen": -2.8683338165283203, "logits/rejected": -2.814868211746216, "logps/chosen": -194.464599609375, "logps/rejected": -197.70077514648438, "loss": 0.7405, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6296394467353821, "rewards/margins": -0.0025831498205661774, "rewards/rejected": -0.6270563006401062, "step": 354 }, { "epoch": 0.46, "learning_rate": 4.9859026058123925e-05, "logits/chosen": -2.978065013885498, "logits/rejected": -2.9368367195129395, "logps/chosen": -217.73974609375, "logps/rejected": -220.19183349609375, "loss": 0.7014, "rewards/accuracies": 0.5, "rewards/chosen": -0.6989356279373169, "rewards/margins": 0.007979679852724075, "rewards/rejected": -0.7069153189659119, "step": 355 }, { "epoch": 0.47, "learning_rate": 4.985791926269958e-05, "logits/chosen": -2.666344165802002, "logits/rejected": -2.8283472061157227, "logps/chosen": -163.43722534179688, "logps/rejected": -190.4180908203125, "loss": 0.6778, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5998314023017883, "rewards/margins": 0.11102181673049927, "rewards/rejected": -0.7108532190322876, "step": 356 }, { "epoch": 0.47, "learning_rate": 4.985680815186764e-05, "logits/chosen": -2.6322436332702637, "logits/rejected": -2.6792352199554443, "logps/chosen": -225.80972290039062, "logps/rejected": -221.8551025390625, "loss": 0.6464, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6963925957679749, "rewards/margins": 0.1484571099281311, "rewards/rejected": -0.844849705696106, "step": 357 }, { "epoch": 0.47, "learning_rate": 4.985569272582101e-05, "logits/chosen": -2.829981803894043, "logits/rejected": -2.8077688217163086, "logps/chosen": -245.94029235839844, "logps/rejected": -279.1697082519531, "loss": 0.6446, "rewards/accuracies": 0.625, "rewards/chosen": -0.6325417757034302, "rewards/margins": 0.1735651046037674, "rewards/rejected": -0.8061069250106812, "step": 358 }, { "epoch": 0.47, "learning_rate": 4.9854572984753334e-05, "logits/chosen": -2.8024842739105225, "logits/rejected": -2.750588893890381, "logps/chosen": -161.65240478515625, "logps/rejected": -201.25054931640625, "loss": 0.6502, "rewards/accuracies": 0.5625, "rewards/chosen": -0.46800869703292847, "rewards/margins": 0.12990553677082062, "rewards/rejected": -0.5979142189025879, "step": 359 }, { "epoch": 0.47, "learning_rate": 4.985344892885899e-05, "logits/chosen": -2.347231149673462, "logits/rejected": -2.4064972400665283, "logps/chosen": -158.72801208496094, "logps/rejected": -167.15805053710938, "loss": 0.7264, "rewards/accuracies": 0.375, "rewards/chosen": -0.7969176173210144, "rewards/margins": 0.016100093722343445, "rewards/rejected": -0.8130176663398743, "step": 360 }, { "epoch": 0.47, "learning_rate": 4.985232055833313e-05, "logits/chosen": -2.603712320327759, "logits/rejected": -2.6721928119659424, "logps/chosen": -211.3256378173828, "logps/rejected": -189.5877685546875, "loss": 0.7658, "rewards/accuracies": 0.5, "rewards/chosen": -0.6097235679626465, "rewards/margins": -0.10059280693531036, "rewards/rejected": -0.5091307163238525, "step": 361 }, { "epoch": 0.47, "learning_rate": 4.985118787337164e-05, "logits/chosen": -2.6673433780670166, "logits/rejected": -2.766993284225464, "logps/chosen": -162.41415405273438, "logps/rejected": -218.15077209472656, "loss": 0.6236, "rewards/accuracies": 0.5, "rewards/chosen": -0.5241696238517761, "rewards/margins": 0.28995370864868164, "rewards/rejected": -0.8141233921051025, "step": 362 }, { "epoch": 0.48, "learning_rate": 4.985005087417115e-05, "logits/chosen": -2.4291069507598877, "logits/rejected": -2.382821798324585, "logps/chosen": -221.48736572265625, "logps/rejected": -205.37940979003906, "loss": 0.6503, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4502621591091156, "rewards/margins": 0.2195020467042923, "rewards/rejected": -0.6697642207145691, "step": 363 }, { "epoch": 0.48, "learning_rate": 4.984890956092905e-05, "logits/chosen": -2.5559186935424805, "logits/rejected": -2.496387481689453, "logps/chosen": -253.63047790527344, "logps/rejected": -244.56698608398438, "loss": 0.9484, "rewards/accuracies": 0.375, "rewards/chosen": -0.9435632228851318, "rewards/margins": -0.268410325050354, "rewards/rejected": -0.6751528978347778, "step": 364 }, { "epoch": 0.48, "learning_rate": 4.984776393384348e-05, "logits/chosen": -2.407367706298828, "logits/rejected": -2.3134310245513916, "logps/chosen": -173.61795043945312, "logps/rejected": -157.41207885742188, "loss": 0.7949, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5948637127876282, "rewards/margins": -0.14962351322174072, "rewards/rejected": -0.44524019956588745, "step": 365 }, { "epoch": 0.48, "learning_rate": 4.984661399311332e-05, "logits/chosen": -2.6404154300689697, "logits/rejected": -2.7036595344543457, "logps/chosen": -155.3403778076172, "logps/rejected": -141.01382446289062, "loss": 0.7475, "rewards/accuracies": 0.5, "rewards/chosen": -0.4795231521129608, "rewards/margins": -0.06228278949856758, "rewards/rejected": -0.4172403812408447, "step": 366 }, { "epoch": 0.48, "learning_rate": 4.9845459738938204e-05, "logits/chosen": -2.5403473377227783, "logits/rejected": -2.5674545764923096, "logps/chosen": -154.85910034179688, "logps/rejected": -143.22906494140625, "loss": 0.6505, "rewards/accuracies": 0.4375, "rewards/chosen": -0.4790140390396118, "rewards/margins": 0.1586410105228424, "rewards/rejected": -0.6376550197601318, "step": 367 }, { "epoch": 0.48, "learning_rate": 4.9844301171518516e-05, "logits/chosen": -2.6081302165985107, "logits/rejected": -2.641979217529297, "logps/chosen": -185.13006591796875, "logps/rejected": -198.2803955078125, "loss": 0.6726, "rewards/accuracies": 0.5, "rewards/chosen": -0.3776107430458069, "rewards/margins": 0.08190791308879852, "rewards/rejected": -0.4595187306404114, "step": 368 }, { "epoch": 0.48, "learning_rate": 4.984313829105538e-05, "logits/chosen": -2.6662757396698, "logits/rejected": -2.7398993968963623, "logps/chosen": -201.8738555908203, "logps/rejected": -195.76390075683594, "loss": 0.5956, "rewards/accuracies": 0.625, "rewards/chosen": -0.38797736167907715, "rewards/margins": 0.2769092321395874, "rewards/rejected": -0.6648865342140198, "step": 369 }, { "epoch": 0.48, "learning_rate": 4.984197109775068e-05, "logits/chosen": -2.4933226108551025, "logits/rejected": -2.484591245651245, "logps/chosen": -202.7592315673828, "logps/rejected": -199.02105712890625, "loss": 0.6536, "rewards/accuracies": 0.4375, "rewards/chosen": -0.35886555910110474, "rewards/margins": 0.1531214416027069, "rewards/rejected": -0.511987030506134, "step": 370 }, { "epoch": 0.49, "learning_rate": 4.984079959180705e-05, "logits/chosen": -2.3560287952423096, "logits/rejected": -2.385936975479126, "logps/chosen": -198.06044006347656, "logps/rejected": -203.5518798828125, "loss": 0.7203, "rewards/accuracies": 0.375, "rewards/chosen": -0.35552191734313965, "rewards/margins": 0.03736201673746109, "rewards/rejected": -0.39288395643234253, "step": 371 }, { "epoch": 0.49, "learning_rate": 4.983962377342786e-05, "logits/chosen": -2.456765651702881, "logits/rejected": -2.522660970687866, "logps/chosen": -236.66744995117188, "logps/rejected": -241.58963012695312, "loss": 0.6049, "rewards/accuracies": 0.6875, "rewards/chosen": -0.551971435546875, "rewards/margins": 0.30021384358406067, "rewards/rejected": -0.8521853089332581, "step": 372 }, { "epoch": 0.49, "learning_rate": 4.983844364281723e-05, "logits/chosen": -2.5745279788970947, "logits/rejected": -2.5961902141571045, "logps/chosen": -186.73411560058594, "logps/rejected": -194.74974060058594, "loss": 0.6354, "rewards/accuracies": 0.5625, "rewards/chosen": -0.312610000371933, "rewards/margins": 0.14659735560417175, "rewards/rejected": -0.45920735597610474, "step": 373 }, { "epoch": 0.49, "learning_rate": 4.983725920018004e-05, "logits/chosen": -2.3747611045837402, "logits/rejected": -2.4170022010803223, "logps/chosen": -179.91162109375, "logps/rejected": -170.89920043945312, "loss": 0.6529, "rewards/accuracies": 0.5625, "rewards/chosen": -0.43839430809020996, "rewards/margins": 0.16247621178627014, "rewards/rejected": -0.6008704900741577, "step": 374 }, { "epoch": 0.49, "learning_rate": 4.9836070445721924e-05, "logits/chosen": -2.573714017868042, "logits/rejected": -2.567589282989502, "logps/chosen": -204.4001922607422, "logps/rejected": -197.39869689941406, "loss": 0.751, "rewards/accuracies": 0.375, "rewards/chosen": -0.350676029920578, "rewards/margins": -0.06606191396713257, "rewards/rejected": -0.28461411595344543, "step": 375 }, { "epoch": 0.49, "learning_rate": 4.983487737964924e-05, "logits/chosen": -2.398193597793579, "logits/rejected": -2.4593253135681152, "logps/chosen": -199.0366973876953, "logps/rejected": -199.51275634765625, "loss": 0.6772, "rewards/accuracies": 0.625, "rewards/chosen": -0.397927463054657, "rewards/margins": 0.08031069487333298, "rewards/rejected": -0.47823822498321533, "step": 376 }, { "epoch": 0.49, "learning_rate": 4.9833680002169105e-05, "logits/chosen": -2.5120034217834473, "logits/rejected": -2.553926706314087, "logps/chosen": -226.15040588378906, "logps/rejected": -249.55255126953125, "loss": 0.6996, "rewards/accuracies": 0.4375, "rewards/chosen": -0.3695937395095825, "rewards/margins": 0.0394027940928936, "rewards/rejected": -0.4089965522289276, "step": 377 }, { "epoch": 0.49, "learning_rate": 4.983247831348939e-05, "logits/chosen": -2.289033889770508, "logits/rejected": -2.2501816749572754, "logps/chosen": -201.75392150878906, "logps/rejected": -217.8618621826172, "loss": 0.6645, "rewards/accuracies": 0.625, "rewards/chosen": -0.28367459774017334, "rewards/margins": 0.1500779092311859, "rewards/rejected": -0.43375247716903687, "step": 378 }, { "epoch": 0.5, "learning_rate": 4.9831272313818716e-05, "logits/chosen": -2.4449117183685303, "logits/rejected": -2.522752046585083, "logps/chosen": -201.63587951660156, "logps/rejected": -215.33270263671875, "loss": 0.6512, "rewards/accuracies": 0.625, "rewards/chosen": -0.23286518454551697, "rewards/margins": 0.127617746591568, "rewards/rejected": -0.36048293113708496, "step": 379 }, { "epoch": 0.5, "learning_rate": 4.983006200336645e-05, "logits/chosen": -2.4317235946655273, "logits/rejected": -2.3696205615997314, "logps/chosen": -243.03509521484375, "logps/rejected": -186.61427307128906, "loss": 0.7762, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5067302584648132, "rewards/margins": -0.11989758908748627, "rewards/rejected": -0.3868326246738434, "step": 380 }, { "epoch": 0.5, "learning_rate": 4.98288473823427e-05, "logits/chosen": -2.4131155014038086, "logits/rejected": -2.498098611831665, "logps/chosen": -190.73397827148438, "logps/rejected": -213.9364471435547, "loss": 0.6336, "rewards/accuracies": 0.625, "rewards/chosen": -0.24977169930934906, "rewards/margins": 0.1747249811887741, "rewards/rejected": -0.42449668049812317, "step": 381 }, { "epoch": 0.5, "learning_rate": 4.982762845095833e-05, "logits/chosen": -2.5675809383392334, "logits/rejected": -2.6289899349212646, "logps/chosen": -233.9481658935547, "logps/rejected": -245.3311767578125, "loss": 0.6879, "rewards/accuracies": 0.5, "rewards/chosen": -0.3384949564933777, "rewards/margins": 0.07099074125289917, "rewards/rejected": -0.40948566794395447, "step": 382 }, { "epoch": 0.5, "learning_rate": 4.982640520942494e-05, "logits/chosen": -2.3582189083099365, "logits/rejected": -2.405932903289795, "logps/chosen": -157.98828125, "logps/rejected": -179.5477294921875, "loss": 0.6256, "rewards/accuracies": 0.5625, "rewards/chosen": -0.30075955390930176, "rewards/margins": 0.2314756065607071, "rewards/rejected": -0.5322352051734924, "step": 383 }, { "epoch": 0.5, "learning_rate": 4.9825177657954914e-05, "logits/chosen": -2.6346352100372314, "logits/rejected": -2.7042407989501953, "logps/chosen": -174.1151123046875, "logps/rejected": -203.845458984375, "loss": 0.6727, "rewards/accuracies": 0.625, "rewards/chosen": -0.3591979742050171, "rewards/margins": 0.10004588216543198, "rewards/rejected": -0.45924389362335205, "step": 384 }, { "epoch": 0.5, "learning_rate": 4.982394579676133e-05, "logits/chosen": -2.3420321941375732, "logits/rejected": -2.3010876178741455, "logps/chosen": -186.43467712402344, "logps/rejected": -183.117431640625, "loss": 0.6836, "rewards/accuracies": 0.5625, "rewards/chosen": -0.3466501235961914, "rewards/margins": 0.06071464717388153, "rewards/rejected": -0.40736478567123413, "step": 385 }, { "epoch": 0.51, "learning_rate": 4.982270962605806e-05, "logits/chosen": -2.4120569229125977, "logits/rejected": -2.449502944946289, "logps/chosen": -168.2112579345703, "logps/rejected": -227.885986328125, "loss": 0.5666, "rewards/accuracies": 0.625, "rewards/chosen": -0.22820252180099487, "rewards/margins": 0.35979852080345154, "rewards/rejected": -0.588001012802124, "step": 386 }, { "epoch": 0.51, "learning_rate": 4.9821469146059704e-05, "logits/chosen": -2.3874173164367676, "logits/rejected": -2.4257519245147705, "logps/chosen": -158.00424194335938, "logps/rejected": -176.4921417236328, "loss": 0.6581, "rewards/accuracies": 0.625, "rewards/chosen": -0.28017669916152954, "rewards/margins": 0.168703094124794, "rewards/rejected": -0.44887974858283997, "step": 387 }, { "epoch": 0.51, "learning_rate": 4.982022435698161e-05, "logits/chosen": -2.444685697555542, "logits/rejected": -2.653350591659546, "logps/chosen": -207.88034057617188, "logps/rejected": -231.86582946777344, "loss": 0.7048, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5877898931503296, "rewards/margins": 0.10215066373348236, "rewards/rejected": -0.6899405121803284, "step": 388 }, { "epoch": 0.51, "learning_rate": 4.981897525903988e-05, "logits/chosen": -2.5300025939941406, "logits/rejected": -2.4865880012512207, "logps/chosen": -155.8424835205078, "logps/rejected": -152.6171875, "loss": 0.8172, "rewards/accuracies": 0.3125, "rewards/chosen": -0.5343464612960815, "rewards/margins": -0.17394664883613586, "rewards/rejected": -0.3603998124599457, "step": 389 }, { "epoch": 0.51, "learning_rate": 4.981772185245135e-05, "logits/chosen": -2.61796236038208, "logits/rejected": -2.662163734436035, "logps/chosen": -198.32403564453125, "logps/rejected": -198.2169647216797, "loss": 0.751, "rewards/accuracies": 0.4375, "rewards/chosen": -0.4620332419872284, "rewards/margins": -0.09868823736906052, "rewards/rejected": -0.3633449673652649, "step": 390 }, { "epoch": 0.51, "learning_rate": 4.981646413743363e-05, "logits/chosen": -2.386425733566284, "logits/rejected": -2.452526092529297, "logps/chosen": -187.49838256835938, "logps/rejected": -181.8330535888672, "loss": 0.606, "rewards/accuracies": 0.625, "rewards/chosen": -0.27474457025527954, "rewards/margins": 0.2754724621772766, "rewards/rejected": -0.5502170324325562, "step": 391 }, { "epoch": 0.51, "learning_rate": 4.981520211420506e-05, "logits/chosen": -2.4169254302978516, "logits/rejected": -2.4883086681365967, "logps/chosen": -175.3878173828125, "logps/rejected": -210.00119018554688, "loss": 0.7642, "rewards/accuracies": 0.4375, "rewards/chosen": -0.532840371131897, "rewards/margins": -0.09793217480182648, "rewards/rejected": -0.4349081814289093, "step": 392 }, { "epoch": 0.51, "learning_rate": 4.9813935782984724e-05, "logits/chosen": -2.528134346008301, "logits/rejected": -2.491497039794922, "logps/chosen": -239.28878784179688, "logps/rejected": -241.08883666992188, "loss": 0.7798, "rewards/accuracies": 0.5, "rewards/chosen": -0.6152268648147583, "rewards/margins": -0.10364706814289093, "rewards/rejected": -0.5115798711776733, "step": 393 }, { "epoch": 0.52, "learning_rate": 4.9812665143992466e-05, "logits/chosen": -2.5107626914978027, "logits/rejected": -2.591826915740967, "logps/chosen": -146.5967559814453, "logps/rejected": -154.36585998535156, "loss": 0.6751, "rewards/accuracies": 0.5, "rewards/chosen": -0.3082922697067261, "rewards/margins": 0.11212008446455002, "rewards/rejected": -0.4204123616218567, "step": 394 }, { "epoch": 0.52, "learning_rate": 4.981139019744887e-05, "logits/chosen": -2.5607235431671143, "logits/rejected": -2.562152862548828, "logps/chosen": -185.95343017578125, "logps/rejected": -194.59483337402344, "loss": 0.6469, "rewards/accuracies": 0.5, "rewards/chosen": -0.2080746442079544, "rewards/margins": 0.12636929750442505, "rewards/rejected": -0.33444392681121826, "step": 395 }, { "epoch": 0.52, "learning_rate": 4.981011094357527e-05, "logits/chosen": -2.4996774196624756, "logits/rejected": -2.5331335067749023, "logps/chosen": -215.7071075439453, "logps/rejected": -231.0633087158203, "loss": 0.7228, "rewards/accuracies": 0.5, "rewards/chosen": -0.4276806116104126, "rewards/margins": 0.03171852231025696, "rewards/rejected": -0.45939913392066956, "step": 396 }, { "epoch": 0.52, "learning_rate": 4.980882738259376e-05, "logits/chosen": -2.3114399909973145, "logits/rejected": -2.3782169818878174, "logps/chosen": -169.04246520996094, "logps/rejected": -188.5185089111328, "loss": 0.7661, "rewards/accuracies": 0.3125, "rewards/chosen": -0.31136631965637207, "rewards/margins": -0.09910126030445099, "rewards/rejected": -0.21226505935192108, "step": 397 }, { "epoch": 0.52, "learning_rate": 4.980753951472715e-05, "logits/chosen": -2.466881275177002, "logits/rejected": -2.5022642612457275, "logps/chosen": -168.63385009765625, "logps/rejected": -192.87521362304688, "loss": 0.6487, "rewards/accuracies": 0.5, "rewards/chosen": -0.3558157682418823, "rewards/margins": 0.13948221504688263, "rewards/rejected": -0.49529796838760376, "step": 398 }, { "epoch": 0.52, "learning_rate": 4.980624734019903e-05, "logits/chosen": -2.5671474933624268, "logits/rejected": -2.6410720348358154, "logps/chosen": -183.6251220703125, "logps/rejected": -214.81272888183594, "loss": 0.7078, "rewards/accuracies": 0.625, "rewards/chosen": -0.4311671555042267, "rewards/margins": 0.0398234948515892, "rewards/rejected": -0.4709906280040741, "step": 399 }, { "epoch": 0.52, "learning_rate": 4.980495085923372e-05, "logits/chosen": -2.602818012237549, "logits/rejected": -2.5820987224578857, "logps/chosen": -167.52012634277344, "logps/rejected": -180.94418334960938, "loss": 0.7179, "rewards/accuracies": 0.5625, "rewards/chosen": -0.41106754541397095, "rewards/margins": 0.013390736654400826, "rewards/rejected": -0.42445826530456543, "step": 400 }, { "epoch": 0.52, "learning_rate": 4.980365007205631e-05, "logits/chosen": -2.4487011432647705, "logits/rejected": -2.5343518257141113, "logps/chosen": -201.9692840576172, "logps/rejected": -244.95530700683594, "loss": 0.5837, "rewards/accuracies": 0.6875, "rewards/chosen": -0.3986441195011139, "rewards/margins": 0.3018190264701843, "rewards/rejected": -0.700463056564331, "step": 401 }, { "epoch": 0.53, "learning_rate": 4.980234497889259e-05, "logits/chosen": -1.9015774726867676, "logits/rejected": -1.9911203384399414, "logps/chosen": -165.83837890625, "logps/rejected": -178.608154296875, "loss": 0.7531, "rewards/accuracies": 0.4375, "rewards/chosen": -0.47146371006965637, "rewards/margins": -0.03502827137708664, "rewards/rejected": -0.43643543124198914, "step": 402 }, { "epoch": 0.53, "learning_rate": 4.980103557996915e-05, "logits/chosen": -2.609720468521118, "logits/rejected": -2.631443738937378, "logps/chosen": -235.0199737548828, "logps/rejected": -234.86245727539062, "loss": 0.6376, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2306605726480484, "rewards/margins": 0.16340221464633942, "rewards/rejected": -0.3940627872943878, "step": 403 }, { "epoch": 0.53, "learning_rate": 4.9799721875513306e-05, "logits/chosen": -2.283406972885132, "logits/rejected": -2.386524200439453, "logps/chosen": -226.02059936523438, "logps/rejected": -220.51963806152344, "loss": 0.7208, "rewards/accuracies": 0.5, "rewards/chosen": -0.5439196228981018, "rewards/margins": 0.06111856549978256, "rewards/rejected": -0.6050382256507874, "step": 404 }, { "epoch": 0.53, "learning_rate": 4.979840386575311e-05, "logits/chosen": -2.2238452434539795, "logits/rejected": -2.3374826908111572, "logps/chosen": -169.82667541503906, "logps/rejected": -197.92726135253906, "loss": 0.7292, "rewards/accuracies": 0.4375, "rewards/chosen": -0.46263885498046875, "rewards/margins": 0.007239609956741333, "rewards/rejected": -0.46987849473953247, "step": 405 }, { "epoch": 0.53, "learning_rate": 4.979708155091737e-05, "logits/chosen": -2.381389856338501, "logits/rejected": -2.358883857727051, "logps/chosen": -153.38771057128906, "logps/rejected": -147.0846405029297, "loss": 0.6555, "rewards/accuracies": 0.75, "rewards/chosen": -0.2459157109260559, "rewards/margins": 0.1274775117635727, "rewards/rejected": -0.3733932375907898, "step": 406 }, { "epoch": 0.53, "learning_rate": 4.979575493123566e-05, "logits/chosen": -2.4942612648010254, "logits/rejected": -2.4526309967041016, "logps/chosen": -222.35780334472656, "logps/rejected": -205.19630432128906, "loss": 0.7278, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4296140670776367, "rewards/margins": -0.018014922738075256, "rewards/rejected": -0.41159915924072266, "step": 407 }, { "epoch": 0.53, "learning_rate": 4.979442400693827e-05, "logits/chosen": -2.4821577072143555, "logits/rejected": -2.4502267837524414, "logps/chosen": -157.32427978515625, "logps/rejected": -167.9297332763672, "loss": 0.7119, "rewards/accuracies": 0.4375, "rewards/chosen": -0.4199235439300537, "rewards/margins": 0.024679476395249367, "rewards/rejected": -0.4446030259132385, "step": 408 }, { "epoch": 0.54, "learning_rate": 4.979308877825626e-05, "logits/chosen": -2.1102793216705322, "logits/rejected": -2.100888729095459, "logps/chosen": -235.02105712890625, "logps/rejected": -232.0230712890625, "loss": 0.747, "rewards/accuracies": 0.5, "rewards/chosen": -0.4274646043777466, "rewards/margins": 0.0070232609286904335, "rewards/rejected": -0.43448784947395325, "step": 409 }, { "epoch": 0.54, "learning_rate": 4.9791749245421434e-05, "logits/chosen": -2.4029147624969482, "logits/rejected": -2.436694622039795, "logps/chosen": -160.94296264648438, "logps/rejected": -219.91183471679688, "loss": 0.7659, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6857771873474121, "rewards/margins": -0.07571852207183838, "rewards/rejected": -0.6100587248802185, "step": 410 }, { "epoch": 0.54, "learning_rate": 4.979040540866632e-05, "logits/chosen": -2.423585891723633, "logits/rejected": -2.583218574523926, "logps/chosen": -152.8751220703125, "logps/rejected": -195.98130798339844, "loss": 0.6592, "rewards/accuracies": 0.5625, "rewards/chosen": -0.29784753918647766, "rewards/margins": 0.13703800737857819, "rewards/rejected": -0.43488556146621704, "step": 411 }, { "epoch": 0.54, "learning_rate": 4.9789057268224234e-05, "logits/chosen": -2.0688211917877197, "logits/rejected": -2.1250336170196533, "logps/chosen": -192.48826599121094, "logps/rejected": -219.7627716064453, "loss": 0.7074, "rewards/accuracies": 0.375, "rewards/chosen": -0.16141924262046814, "rewards/margins": 0.035948604345321655, "rewards/rejected": -0.197367861866951, "step": 412 }, { "epoch": 0.54, "learning_rate": 4.978770482432921e-05, "logits/chosen": -2.4140584468841553, "logits/rejected": -2.394195079803467, "logps/chosen": -177.2361297607422, "logps/rejected": -201.13475036621094, "loss": 0.7109, "rewards/accuracies": 0.5, "rewards/chosen": -0.41423746943473816, "rewards/margins": 0.01436915248632431, "rewards/rejected": -0.42860662937164307, "step": 413 }, { "epoch": 0.54, "learning_rate": 4.9786348077216024e-05, "logits/chosen": -2.399442672729492, "logits/rejected": -2.4223692417144775, "logps/chosen": -184.38839721679688, "logps/rejected": -176.9226531982422, "loss": 0.6572, "rewards/accuracies": 0.5, "rewards/chosen": -0.2739775776863098, "rewards/margins": 0.14736850559711456, "rewards/rejected": -0.4213460683822632, "step": 414 }, { "epoch": 0.54, "learning_rate": 4.9784987027120236e-05, "logits/chosen": -2.331202745437622, "logits/rejected": -2.3324198722839355, "logps/chosen": -199.08740234375, "logps/rejected": -171.50718688964844, "loss": 0.7564, "rewards/accuracies": 0.5, "rewards/chosen": -0.4433014988899231, "rewards/margins": -0.06983453035354614, "rewards/rejected": -0.37346696853637695, "step": 415 }, { "epoch": 0.54, "learning_rate": 4.9783621674278104e-05, "logits/chosen": -2.402174949645996, "logits/rejected": -2.5383033752441406, "logps/chosen": -174.37954711914062, "logps/rejected": -165.9345703125, "loss": 0.6405, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5206565856933594, "rewards/margins": 0.13473454117774963, "rewards/rejected": -0.6553912162780762, "step": 416 }, { "epoch": 0.55, "learning_rate": 4.978225201892667e-05, "logits/chosen": -2.4111886024475098, "logits/rejected": -2.3553833961486816, "logps/chosen": -220.5755615234375, "logps/rejected": -195.2405242919922, "loss": 0.7531, "rewards/accuracies": 0.3125, "rewards/chosen": -0.4778462052345276, "rewards/margins": -0.041694507002830505, "rewards/rejected": -0.4361516833305359, "step": 417 }, { "epoch": 0.55, "learning_rate": 4.97808780613037e-05, "logits/chosen": -2.307465076446533, "logits/rejected": -2.3099443912506104, "logps/chosen": -171.25372314453125, "logps/rejected": -200.85305786132812, "loss": 0.6326, "rewards/accuracies": 0.5, "rewards/chosen": -0.445933997631073, "rewards/margins": 0.2173961102962494, "rewards/rejected": -0.663330078125, "step": 418 }, { "epoch": 0.55, "learning_rate": 4.977949980164773e-05, "logits/chosen": -2.3666648864746094, "logits/rejected": -2.433722496032715, "logps/chosen": -217.6551513671875, "logps/rejected": -234.6415557861328, "loss": 0.5846, "rewards/accuracies": 0.625, "rewards/chosen": -0.3358496427536011, "rewards/margins": 0.309315949678421, "rewards/rejected": -0.6451655626296997, "step": 419 }, { "epoch": 0.55, "learning_rate": 4.977811724019802e-05, "logits/chosen": -2.336599111557007, "logits/rejected": -2.409275770187378, "logps/chosen": -191.2588653564453, "logps/rejected": -208.126953125, "loss": 0.6455, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4646199941635132, "rewards/margins": 0.13836784660816193, "rewards/rejected": -0.6029877662658691, "step": 420 }, { "epoch": 0.55, "learning_rate": 4.9776730377194596e-05, "logits/chosen": -2.1639509201049805, "logits/rejected": -2.2069835662841797, "logps/chosen": -203.13632202148438, "logps/rejected": -169.3188018798828, "loss": 0.7455, "rewards/accuracies": 0.625, "rewards/chosen": -0.23145976662635803, "rewards/margins": 0.013234168291091919, "rewards/rejected": -0.24469394981861115, "step": 421 }, { "epoch": 0.55, "learning_rate": 4.9775339212878215e-05, "logits/chosen": -2.1670339107513428, "logits/rejected": -2.1488430500030518, "logps/chosen": -166.50318908691406, "logps/rejected": -169.4437713623047, "loss": 0.698, "rewards/accuracies": 0.4375, "rewards/chosen": -0.2539255619049072, "rewards/margins": 0.030521919950842857, "rewards/rejected": -0.28444746136665344, "step": 422 }, { "epoch": 0.55, "learning_rate": 4.977394374749039e-05, "logits/chosen": -2.336040735244751, "logits/rejected": -2.3590757846832275, "logps/chosen": -150.48399353027344, "logps/rejected": -154.54652404785156, "loss": 0.667, "rewards/accuracies": 0.625, "rewards/chosen": -0.30479297041893005, "rewards/margins": 0.10228873789310455, "rewards/rejected": -0.4070816934108734, "step": 423 }, { "epoch": 0.55, "learning_rate": 4.9772543981273374e-05, "logits/chosen": -2.25268292427063, "logits/rejected": -2.267082691192627, "logps/chosen": -246.07342529296875, "logps/rejected": -230.28424072265625, "loss": 0.7204, "rewards/accuracies": 0.375, "rewards/chosen": -0.42524397373199463, "rewards/margins": -0.008991291746497154, "rewards/rejected": -0.4162527322769165, "step": 424 }, { "epoch": 0.56, "learning_rate": 4.977113991447017e-05, "logits/chosen": -2.35402250289917, "logits/rejected": -2.3599345684051514, "logps/chosen": -163.17568969726562, "logps/rejected": -175.94029235839844, "loss": 0.7346, "rewards/accuracies": 0.375, "rewards/chosen": -0.4037836790084839, "rewards/margins": 0.02548382803797722, "rewards/rejected": -0.4292675256729126, "step": 425 }, { "epoch": 0.56, "learning_rate": 4.976973154732454e-05, "logits/chosen": -2.3611769676208496, "logits/rejected": -2.3299450874328613, "logps/chosen": -205.6555633544922, "logps/rejected": -213.8665771484375, "loss": 0.7124, "rewards/accuracies": 0.4375, "rewards/chosen": -0.22228708863258362, "rewards/margins": 0.031078480184078217, "rewards/rejected": -0.25336551666259766, "step": 426 }, { "epoch": 0.56, "learning_rate": 4.976831888008096e-05, "logits/chosen": -2.316866636276245, "logits/rejected": -2.299907684326172, "logps/chosen": -168.08087158203125, "logps/rejected": -160.0249786376953, "loss": 0.6877, "rewards/accuracies": 0.5625, "rewards/chosen": -0.13129538297653198, "rewards/margins": 0.04759233817458153, "rewards/rejected": -0.17888770997524261, "step": 427 }, { "epoch": 0.56, "learning_rate": 4.976690191298469e-05, "logits/chosen": -2.1294949054718018, "logits/rejected": -2.1100010871887207, "logps/chosen": -176.1876678466797, "logps/rejected": -175.78619384765625, "loss": 0.7597, "rewards/accuracies": 0.3125, "rewards/chosen": -0.16827335953712463, "rewards/margins": -0.110628642141819, "rewards/rejected": -0.05764475464820862, "step": 428 }, { "epoch": 0.56, "learning_rate": 4.9765480646281716e-05, "logits/chosen": -2.120499849319458, "logits/rejected": -2.2186028957366943, "logps/chosen": -178.4534912109375, "logps/rejected": -200.6439971923828, "loss": 0.6143, "rewards/accuracies": 0.625, "rewards/chosen": -0.11943019181489944, "rewards/margins": 0.28324708342552185, "rewards/rejected": -0.4026772677898407, "step": 429 }, { "epoch": 0.56, "learning_rate": 4.976405508021877e-05, "logits/chosen": -2.430680274963379, "logits/rejected": -2.5126051902770996, "logps/chosen": -201.0321502685547, "logps/rejected": -231.42811584472656, "loss": 0.7632, "rewards/accuracies": 0.375, "rewards/chosen": -0.47548148036003113, "rewards/margins": -0.046219632029533386, "rewards/rejected": -0.42926183342933655, "step": 430 }, { "epoch": 0.56, "learning_rate": 4.9762625215043334e-05, "logits/chosen": -2.396533966064453, "logits/rejected": -2.4125595092773438, "logps/chosen": -236.99049377441406, "logps/rejected": -253.93577575683594, "loss": 0.7165, "rewards/accuracies": 0.4375, "rewards/chosen": -0.4821574091911316, "rewards/margins": -0.007029315456748009, "rewards/rejected": -0.4751281142234802, "step": 431 }, { "epoch": 0.57, "learning_rate": 4.9761191051003644e-05, "logits/chosen": -2.1031394004821777, "logits/rejected": -2.143960475921631, "logps/chosen": -168.68551635742188, "logps/rejected": -209.39537048339844, "loss": 0.6219, "rewards/accuracies": 0.625, "rewards/chosen": -0.1691710501909256, "rewards/margins": 0.2633129060268402, "rewards/rejected": -0.4324839413166046, "step": 432 }, { "epoch": 0.57, "learning_rate": 4.975975258834867e-05, "logits/chosen": -2.451735496520996, "logits/rejected": -2.410836696624756, "logps/chosen": -219.5807342529297, "logps/rejected": -238.2914276123047, "loss": 0.6939, "rewards/accuracies": 0.5, "rewards/chosen": -0.26019036769866943, "rewards/margins": 0.09737718105316162, "rewards/rejected": -0.35756757855415344, "step": 433 }, { "epoch": 0.57, "learning_rate": 4.9758309827328134e-05, "logits/chosen": -2.3229422569274902, "logits/rejected": -2.3514108657836914, "logps/chosen": -181.05836486816406, "logps/rejected": -211.49786376953125, "loss": 0.7734, "rewards/accuracies": 0.5, "rewards/chosen": -0.2722405195236206, "rewards/margins": -0.07979361712932587, "rewards/rejected": -0.19244688749313354, "step": 434 }, { "epoch": 0.57, "learning_rate": 4.9756862768192504e-05, "logits/chosen": -2.3667490482330322, "logits/rejected": -2.328806161880493, "logps/chosen": -201.50511169433594, "logps/rejected": -179.98643493652344, "loss": 0.7353, "rewards/accuracies": 0.3125, "rewards/chosen": -0.3051126301288605, "rewards/margins": -0.007734470069408417, "rewards/rejected": -0.29737818241119385, "step": 435 }, { "epoch": 0.57, "learning_rate": 4.9755411411192996e-05, "logits/chosen": -2.4068377017974854, "logits/rejected": -2.532482862472534, "logps/chosen": -186.65342712402344, "logps/rejected": -205.83154296875, "loss": 0.7144, "rewards/accuracies": 0.4375, "rewards/chosen": -0.17129601538181305, "rewards/margins": -0.002290443517267704, "rewards/rejected": -0.1690055876970291, "step": 436 }, { "epoch": 0.57, "learning_rate": 4.975395575658156e-05, "logits/chosen": -2.382935047149658, "logits/rejected": -2.4527859687805176, "logps/chosen": -200.10215759277344, "logps/rejected": -207.49302673339844, "loss": 0.7113, "rewards/accuracies": 0.5625, "rewards/chosen": -0.33162015676498413, "rewards/margins": 0.029873518273234367, "rewards/rejected": -0.36149367690086365, "step": 437 }, { "epoch": 0.57, "learning_rate": 4.9752495804610916e-05, "logits/chosen": -2.2459821701049805, "logits/rejected": -2.2536497116088867, "logps/chosen": -249.44638061523438, "logps/rejected": -216.59483337402344, "loss": 0.8425, "rewards/accuracies": 0.3125, "rewards/chosen": -0.3279973268508911, "rewards/margins": -0.22765566408634186, "rewards/rejected": -0.10034167766571045, "step": 438 }, { "epoch": 0.57, "learning_rate": 4.9751031555534504e-05, "logits/chosen": -2.416114330291748, "logits/rejected": -2.5028560161590576, "logps/chosen": -186.53961181640625, "logps/rejected": -160.81617736816406, "loss": 0.674, "rewards/accuracies": 0.4375, "rewards/chosen": -0.1654774248600006, "rewards/margins": 0.07480952888727188, "rewards/rejected": -0.2402869611978531, "step": 439 }, { "epoch": 0.58, "learning_rate": 4.9749563009606534e-05, "logits/chosen": -2.4489054679870605, "logits/rejected": -2.527320623397827, "logps/chosen": -202.46507263183594, "logps/rejected": -185.72853088378906, "loss": 0.6135, "rewards/accuracies": 0.6875, "rewards/chosen": -0.22387953102588654, "rewards/margins": 0.19476865231990814, "rewards/rejected": -0.4186481833457947, "step": 440 }, { "epoch": 0.58, "learning_rate": 4.9748090167081936e-05, "logits/chosen": -2.147719144821167, "logits/rejected": -2.249868154525757, "logps/chosen": -152.69786071777344, "logps/rejected": -190.48863220214844, "loss": 0.6568, "rewards/accuracies": 0.5, "rewards/chosen": -0.29554668068885803, "rewards/margins": 0.11645788699388504, "rewards/rejected": -0.4120045602321625, "step": 441 }, { "epoch": 0.58, "learning_rate": 4.974661302821641e-05, "logits/chosen": -2.142338275909424, "logits/rejected": -2.2330148220062256, "logps/chosen": -157.3348388671875, "logps/rejected": -194.91302490234375, "loss": 0.7378, "rewards/accuracies": 0.4375, "rewards/chosen": -0.13629794120788574, "rewards/margins": -0.030040191486477852, "rewards/rejected": -0.10625775158405304, "step": 442 }, { "epoch": 0.58, "learning_rate": 4.974513159326638e-05, "logits/chosen": -2.551227569580078, "logits/rejected": -2.5135083198547363, "logps/chosen": -201.37612915039062, "logps/rejected": -167.43287658691406, "loss": 0.6417, "rewards/accuracies": 0.625, "rewards/chosen": -0.25598540902137756, "rewards/margins": 0.18629388511180878, "rewards/rejected": -0.4422793388366699, "step": 443 }, { "epoch": 0.58, "learning_rate": 4.974364586248904e-05, "logits/chosen": -2.174506425857544, "logits/rejected": -2.1649577617645264, "logps/chosen": -181.4037628173828, "logps/rejected": -171.9210968017578, "loss": 0.8572, "rewards/accuracies": 0.5, "rewards/chosen": -0.3619764745235443, "rewards/margins": -0.19866357743740082, "rewards/rejected": -0.1633128821849823, "step": 444 }, { "epoch": 0.58, "learning_rate": 4.974215583614232e-05, "logits/chosen": -2.3266444206237793, "logits/rejected": -2.418710470199585, "logps/chosen": -207.11448669433594, "logps/rejected": -287.95574951171875, "loss": 0.7347, "rewards/accuracies": 0.4375, "rewards/chosen": -0.41178515553474426, "rewards/margins": -0.002230718731880188, "rewards/rejected": -0.40955445170402527, "step": 445 }, { "epoch": 0.58, "learning_rate": 4.974066151448488e-05, "logits/chosen": -2.5598104000091553, "logits/rejected": -2.559662342071533, "logps/chosen": -231.59133911132812, "logps/rejected": -239.7825469970703, "loss": 0.7079, "rewards/accuracies": 0.4375, "rewards/chosen": -0.42168766260147095, "rewards/margins": 0.02927880734205246, "rewards/rejected": -0.4509664475917816, "step": 446 }, { "epoch": 0.58, "learning_rate": 4.9739162897776146e-05, "logits/chosen": -2.3569555282592773, "logits/rejected": -2.3967788219451904, "logps/chosen": -185.6054229736328, "logps/rejected": -192.92169189453125, "loss": 0.7437, "rewards/accuracies": 0.4375, "rewards/chosen": -0.4934594929218292, "rewards/margins": -0.05449381470680237, "rewards/rejected": -0.43896567821502686, "step": 447 }, { "epoch": 0.59, "learning_rate": 4.973765998627628e-05, "logits/chosen": -2.5715792179107666, "logits/rejected": -2.5525949001312256, "logps/chosen": -252.23037719726562, "logps/rejected": -268.3929443359375, "loss": 0.7203, "rewards/accuracies": 0.5, "rewards/chosen": -0.3784821331501007, "rewards/margins": -0.01087496429681778, "rewards/rejected": -0.3676071763038635, "step": 448 }, { "epoch": 0.59, "learning_rate": 4.973615278024619e-05, "logits/chosen": -2.441962718963623, "logits/rejected": -2.481163501739502, "logps/chosen": -161.66839599609375, "logps/rejected": -192.90371704101562, "loss": 0.6712, "rewards/accuracies": 0.5, "rewards/chosen": -0.3044736981391907, "rewards/margins": 0.08197857439517975, "rewards/rejected": -0.38645222783088684, "step": 449 }, { "epoch": 0.59, "learning_rate": 4.9734641279947535e-05, "logits/chosen": -2.500675916671753, "logits/rejected": -2.487545967102051, "logps/chosen": -215.66336059570312, "logps/rejected": -232.06967163085938, "loss": 0.6796, "rewards/accuracies": 0.5, "rewards/chosen": -0.4935031235218048, "rewards/margins": 0.12285958230495453, "rewards/rejected": -0.6163626909255981, "step": 450 }, { "epoch": 0.59, "learning_rate": 4.973312548564272e-05, "logits/chosen": -2.4314112663269043, "logits/rejected": -2.4294910430908203, "logps/chosen": -170.31471252441406, "logps/rejected": -159.71585083007812, "loss": 0.7036, "rewards/accuracies": 0.4375, "rewards/chosen": -0.32274457812309265, "rewards/margins": 0.005533523857593536, "rewards/rejected": -0.328278124332428, "step": 451 }, { "epoch": 0.59, "learning_rate": 4.9731605397594884e-05, "logits/chosen": -2.4075756072998047, "logits/rejected": -2.32165789604187, "logps/chosen": -186.2813720703125, "logps/rejected": -199.41989135742188, "loss": 0.655, "rewards/accuracies": 0.6875, "rewards/chosen": -0.3598267138004303, "rewards/margins": 0.11941798776388168, "rewards/rejected": -0.4792447090148926, "step": 452 }, { "epoch": 0.59, "learning_rate": 4.973008101606792e-05, "logits/chosen": -2.422717571258545, "logits/rejected": -2.5152597427368164, "logps/chosen": -173.51361083984375, "logps/rejected": -203.63848876953125, "loss": 0.7204, "rewards/accuracies": 0.625, "rewards/chosen": -0.25253090262413025, "rewards/margins": 0.029171746224164963, "rewards/rejected": -0.2817026376724243, "step": 453 }, { "epoch": 0.59, "learning_rate": 4.972855234132646e-05, "logits/chosen": -2.517770767211914, "logits/rejected": -2.501011848449707, "logps/chosen": -202.05300903320312, "logps/rejected": -191.65887451171875, "loss": 0.7912, "rewards/accuracies": 0.375, "rewards/chosen": -0.42360594868659973, "rewards/margins": -0.1400626003742218, "rewards/rejected": -0.28354334831237793, "step": 454 }, { "epoch": 0.6, "learning_rate": 4.9727019373635895e-05, "logits/chosen": -2.612367630004883, "logits/rejected": -2.617859125137329, "logps/chosen": -210.05746459960938, "logps/rejected": -199.4877166748047, "loss": 0.621, "rewards/accuracies": 0.75, "rewards/chosen": -0.12031766027212143, "rewards/margins": 0.193304181098938, "rewards/rejected": -0.3136218190193176, "step": 455 }, { "epoch": 0.6, "learning_rate": 4.972548211326235e-05, "logits/chosen": -2.5540568828582764, "logits/rejected": -2.587779998779297, "logps/chosen": -183.94273376464844, "logps/rejected": -219.0413818359375, "loss": 0.6669, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1985998898744583, "rewards/margins": 0.12929418683052063, "rewards/rejected": -0.32789406180381775, "step": 456 }, { "epoch": 0.6, "learning_rate": 4.9723940560472705e-05, "logits/chosen": -2.573051691055298, "logits/rejected": -2.6791186332702637, "logps/chosen": -193.5257568359375, "logps/rejected": -189.68743896484375, "loss": 0.7393, "rewards/accuracies": 0.4375, "rewards/chosen": -0.31839922070503235, "rewards/margins": -0.04755366966128349, "rewards/rejected": -0.27084553241729736, "step": 457 }, { "epoch": 0.6, "learning_rate": 4.972239471553457e-05, "logits/chosen": -2.127181053161621, "logits/rejected": -2.1366987228393555, "logps/chosen": -180.3720703125, "logps/rejected": -206.37628173828125, "loss": 0.6858, "rewards/accuracies": 0.4375, "rewards/chosen": -0.14177769422531128, "rewards/margins": 0.04955483227968216, "rewards/rejected": -0.19133250415325165, "step": 458 }, { "epoch": 0.6, "learning_rate": 4.97208445787163e-05, "logits/chosen": -2.4841840267181396, "logits/rejected": -2.576199531555176, "logps/chosen": -175.8242645263672, "logps/rejected": -205.9224090576172, "loss": 0.7229, "rewards/accuracies": 0.5, "rewards/chosen": -0.39802610874176025, "rewards/margins": -0.00046849995851516724, "rewards/rejected": -0.3975576162338257, "step": 459 }, { "epoch": 0.6, "learning_rate": 4.9719290150287026e-05, "logits/chosen": -2.4974851608276367, "logits/rejected": -2.552859306335449, "logps/chosen": -204.64480590820312, "logps/rejected": -229.373291015625, "loss": 0.7028, "rewards/accuracies": 0.5625, "rewards/chosen": -0.09486077725887299, "rewards/margins": 0.017538849264383316, "rewards/rejected": -0.1123996153473854, "step": 460 }, { "epoch": 0.6, "learning_rate": 4.9717731430516576e-05, "logits/chosen": -2.5486257076263428, "logits/rejected": -2.505126953125, "logps/chosen": -216.0121307373047, "logps/rejected": -204.5452880859375, "loss": 0.6996, "rewards/accuracies": 0.625, "rewards/chosen": -0.1612711399793625, "rewards/margins": 0.06856641173362732, "rewards/rejected": -0.229837566614151, "step": 461 }, { "epoch": 0.6, "learning_rate": 4.9716168419675555e-05, "logits/chosen": -2.5866286754608154, "logits/rejected": -2.667116403579712, "logps/chosen": -198.39340209960938, "logps/rejected": -250.51675415039062, "loss": 0.6783, "rewards/accuracies": 0.625, "rewards/chosen": -0.3453162908554077, "rewards/margins": 0.07628250867128372, "rewards/rejected": -0.42159876227378845, "step": 462 }, { "epoch": 0.61, "learning_rate": 4.9714601118035325e-05, "logits/chosen": -2.339556932449341, "logits/rejected": -2.2843379974365234, "logps/chosen": -202.150634765625, "logps/rejected": -192.74267578125, "loss": 0.674, "rewards/accuracies": 0.625, "rewards/chosen": -0.1799849569797516, "rewards/margins": 0.09412900358438492, "rewards/rejected": -0.2741139531135559, "step": 463 }, { "epoch": 0.61, "learning_rate": 4.971302952586796e-05, "logits/chosen": -2.3514528274536133, "logits/rejected": -2.313333749771118, "logps/chosen": -216.48309326171875, "logps/rejected": -205.1566925048828, "loss": 0.7625, "rewards/accuracies": 0.375, "rewards/chosen": -0.3306673467159271, "rewards/margins": -0.07343533635139465, "rewards/rejected": -0.25723204016685486, "step": 464 }, { "epoch": 0.61, "learning_rate": 4.971145364344628e-05, "logits/chosen": -2.475745916366577, "logits/rejected": -2.5058016777038574, "logps/chosen": -198.23568725585938, "logps/rejected": -226.68544006347656, "loss": 0.6412, "rewards/accuracies": 0.5, "rewards/chosen": -0.38944318890571594, "rewards/margins": 0.15158149600028992, "rewards/rejected": -0.5410246849060059, "step": 465 }, { "epoch": 0.61, "learning_rate": 4.970987347104389e-05, "logits/chosen": -2.490628242492676, "logits/rejected": -2.5108678340911865, "logps/chosen": -180.23779296875, "logps/rejected": -232.67515563964844, "loss": 0.6863, "rewards/accuracies": 0.5625, "rewards/chosen": -0.3212130665779114, "rewards/margins": 0.09368535876274109, "rewards/rejected": -0.41489848494529724, "step": 466 }, { "epoch": 0.61, "learning_rate": 4.9708289008935096e-05, "logits/chosen": -2.6224091053009033, "logits/rejected": -2.6361618041992188, "logps/chosen": -254.05960083007812, "logps/rejected": -247.78634643554688, "loss": 0.6909, "rewards/accuracies": 0.625, "rewards/chosen": -0.40414828062057495, "rewards/margins": 0.051116712391376495, "rewards/rejected": -0.4552650451660156, "step": 467 }, { "epoch": 0.61, "learning_rate": 4.9706700257394966e-05, "logits/chosen": -2.6673390865325928, "logits/rejected": -2.6409029960632324, "logps/chosen": -160.94146728515625, "logps/rejected": -228.09808349609375, "loss": 0.7103, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4117045998573303, "rewards/margins": 0.047504693269729614, "rewards/rejected": -0.45920926332473755, "step": 468 }, { "epoch": 0.61, "learning_rate": 4.970510721669932e-05, "logits/chosen": -2.12448787689209, "logits/rejected": -2.209928512573242, "logps/chosen": -247.7496337890625, "logps/rejected": -256.54254150390625, "loss": 0.602, "rewards/accuracies": 0.6875, "rewards/chosen": -0.13797114789485931, "rewards/margins": 0.25247055292129517, "rewards/rejected": -0.3904416561126709, "step": 469 }, { "epoch": 0.62, "learning_rate": 4.97035098871247e-05, "logits/chosen": -2.531524896621704, "logits/rejected": -2.5506911277770996, "logps/chosen": -239.89340209960938, "logps/rejected": -232.53662109375, "loss": 0.6477, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2409316897392273, "rewards/margins": 0.11869845539331436, "rewards/rejected": -0.35963016748428345, "step": 470 }, { "epoch": 0.62, "learning_rate": 4.970190826894842e-05, "logits/chosen": -2.4936788082122803, "logits/rejected": -2.6132445335388184, "logps/chosen": -193.3863067626953, "logps/rejected": -196.8754119873047, "loss": 0.6012, "rewards/accuracies": 0.75, "rewards/chosen": -0.33218735456466675, "rewards/margins": 0.244198739528656, "rewards/rejected": -0.5763860940933228, "step": 471 }, { "epoch": 0.62, "learning_rate": 4.9700302362448517e-05, "logits/chosen": -2.7358896732330322, "logits/rejected": -2.860752582550049, "logps/chosen": -193.43789672851562, "logps/rejected": -240.47451782226562, "loss": 0.6757, "rewards/accuracies": 0.4375, "rewards/chosen": -0.2744450271129608, "rewards/margins": 0.07577859610319138, "rewards/rejected": -0.3502236306667328, "step": 472 }, { "epoch": 0.62, "learning_rate": 4.9698692167903794e-05, "logits/chosen": -2.581242799758911, "logits/rejected": -2.7139995098114014, "logps/chosen": -206.38645935058594, "logps/rejected": -210.1461639404297, "loss": 0.5954, "rewards/accuracies": 0.5625, "rewards/chosen": -0.3665695786476135, "rewards/margins": 0.26593247056007385, "rewards/rejected": -0.632502019405365, "step": 473 }, { "epoch": 0.62, "learning_rate": 4.9697077685593766e-05, "logits/chosen": -2.6083261966705322, "logits/rejected": -2.6498053073883057, "logps/chosen": -131.37237548828125, "logps/rejected": -151.5311737060547, "loss": 0.649, "rewards/accuracies": 0.75, "rewards/chosen": -0.15027746558189392, "rewards/margins": 0.126561239361763, "rewards/rejected": -0.2768386900424957, "step": 474 }, { "epoch": 0.62, "learning_rate": 4.969545891579873e-05, "logits/chosen": -2.359971046447754, "logits/rejected": -2.3982698917388916, "logps/chosen": -157.39181518554688, "logps/rejected": -158.3177032470703, "loss": 0.6222, "rewards/accuracies": 0.5625, "rewards/chosen": -0.10072011500597, "rewards/margins": 0.24248063564300537, "rewards/rejected": -0.34320077300071716, "step": 475 }, { "epoch": 0.62, "learning_rate": 4.9693835858799696e-05, "logits/chosen": -2.408668279647827, "logits/rejected": -2.4540035724639893, "logps/chosen": -190.36312866210938, "logps/rejected": -204.20359802246094, "loss": 0.6997, "rewards/accuracies": 0.4375, "rewards/chosen": -0.18467962741851807, "rewards/margins": 0.015133664011955261, "rewards/rejected": -0.19981329143047333, "step": 476 }, { "epoch": 0.62, "learning_rate": 4.9692208514878444e-05, "logits/chosen": -2.270054340362549, "logits/rejected": -2.177553653717041, "logps/chosen": -197.311767578125, "logps/rejected": -205.99948120117188, "loss": 0.7255, "rewards/accuracies": 0.4375, "rewards/chosen": -0.2719980776309967, "rewards/margins": -0.018820375204086304, "rewards/rejected": -0.2531776428222656, "step": 477 }, { "epoch": 0.63, "learning_rate": 4.969057688431748e-05, "logits/chosen": -2.4876270294189453, "logits/rejected": -2.516055107116699, "logps/chosen": -150.5498046875, "logps/rejected": -173.20225524902344, "loss": 0.6482, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2806074023246765, "rewards/margins": 0.18431711196899414, "rewards/rejected": -0.46492451429367065, "step": 478 }, { "epoch": 0.63, "learning_rate": 4.968894096740006e-05, "logits/chosen": -2.699005365371704, "logits/rejected": -2.706855297088623, "logps/chosen": -208.8563232421875, "logps/rejected": -229.45355224609375, "loss": 0.6569, "rewards/accuracies": 0.625, "rewards/chosen": -0.3221612572669983, "rewards/margins": 0.1469050496816635, "rewards/rejected": -0.4690663516521454, "step": 479 }, { "epoch": 0.63, "learning_rate": 4.968730076441017e-05, "logits/chosen": -2.752363681793213, "logits/rejected": -2.8258612155914307, "logps/chosen": -233.5675811767578, "logps/rejected": -261.7062683105469, "loss": 0.7128, "rewards/accuracies": 0.625, "rewards/chosen": -0.604475200176239, "rewards/margins": 0.022365085780620575, "rewards/rejected": -0.6268402338027954, "step": 480 }, { "epoch": 0.63, "learning_rate": 4.9685656275632575e-05, "logits/chosen": -2.5177550315856934, "logits/rejected": -2.510917901992798, "logps/chosen": -217.02195739746094, "logps/rejected": -189.55174255371094, "loss": 0.6434, "rewards/accuracies": 0.625, "rewards/chosen": -0.3391067683696747, "rewards/margins": 0.13497701287269592, "rewards/rejected": -0.4740837812423706, "step": 481 }, { "epoch": 0.63, "learning_rate": 4.968400750135276e-05, "logits/chosen": -2.540860652923584, "logits/rejected": -2.582563877105713, "logps/chosen": -194.17721557617188, "logps/rejected": -198.79684448242188, "loss": 0.6679, "rewards/accuracies": 0.5625, "rewards/chosen": -0.574460506439209, "rewards/margins": 0.10229263454675674, "rewards/rejected": -0.6767531633377075, "step": 482 }, { "epoch": 0.63, "learning_rate": 4.968235444185695e-05, "logits/chosen": -2.7020184993743896, "logits/rejected": -2.7175564765930176, "logps/chosen": -211.08863830566406, "logps/rejected": -235.41836547851562, "loss": 0.7235, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5369959473609924, "rewards/margins": 0.011504769325256348, "rewards/rejected": -0.5485007762908936, "step": 483 }, { "epoch": 0.63, "learning_rate": 4.968069709743212e-05, "logits/chosen": -2.4362404346466064, "logits/rejected": -2.5247411727905273, "logps/chosen": -149.7013397216797, "logps/rejected": -185.23251342773438, "loss": 0.5556, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1017133966088295, "rewards/margins": 0.3292151391506195, "rewards/rejected": -0.4309285283088684, "step": 484 }, { "epoch": 0.63, "learning_rate": 4.9679035468365986e-05, "logits/chosen": -2.3711907863616943, "logits/rejected": -2.412506580352783, "logps/chosen": -202.7202911376953, "logps/rejected": -219.67977905273438, "loss": 0.6658, "rewards/accuracies": 0.5, "rewards/chosen": -0.3317970931529999, "rewards/margins": 0.12470696866512299, "rewards/rejected": -0.45650404691696167, "step": 485 }, { "epoch": 0.64, "learning_rate": 4.967736955494703e-05, "logits/chosen": -2.204392910003662, "logits/rejected": -2.2046332359313965, "logps/chosen": -195.41537475585938, "logps/rejected": -196.8472442626953, "loss": 0.7358, "rewards/accuracies": 0.5, "rewards/chosen": -0.43655478954315186, "rewards/margins": -0.000651337206363678, "rewards/rejected": -0.4359034597873688, "step": 486 }, { "epoch": 0.64, "learning_rate": 4.9675699357464445e-05, "logits/chosen": -2.198075771331787, "logits/rejected": -2.1894662380218506, "logps/chosen": -248.62149047851562, "logps/rejected": -267.9733581542969, "loss": 0.5947, "rewards/accuracies": 0.625, "rewards/chosen": -0.14261871576309204, "rewards/margins": 0.31586068868637085, "rewards/rejected": -0.4584794044494629, "step": 487 }, { "epoch": 0.64, "learning_rate": 4.967402487620818e-05, "logits/chosen": -2.5536584854125977, "logits/rejected": -2.577322006225586, "logps/chosen": -224.8876190185547, "logps/rejected": -227.71383666992188, "loss": 0.663, "rewards/accuracies": 0.625, "rewards/chosen": -0.40421125292778015, "rewards/margins": 0.11385571211576462, "rewards/rejected": -0.5180670022964478, "step": 488 }, { "epoch": 0.64, "learning_rate": 4.9672346111468934e-05, "logits/chosen": -2.508915424346924, "logits/rejected": -2.624011993408203, "logps/chosen": -181.42379760742188, "logps/rejected": -231.30458068847656, "loss": 0.6309, "rewards/accuracies": 0.5625, "rewards/chosen": -0.31741198897361755, "rewards/margins": 0.24159038066864014, "rewards/rejected": -0.5590023994445801, "step": 489 }, { "epoch": 0.64, "learning_rate": 4.967066306353816e-05, "logits/chosen": -2.4504952430725098, "logits/rejected": -2.4388234615325928, "logps/chosen": -229.2549591064453, "logps/rejected": -238.2850341796875, "loss": 0.6922, "rewards/accuracies": 0.5, "rewards/chosen": -0.47322726249694824, "rewards/margins": 0.07009989768266678, "rewards/rejected": -0.5433271527290344, "step": 490 }, { "epoch": 0.64, "learning_rate": 4.966897573270801e-05, "logits/chosen": -2.555962562561035, "logits/rejected": -2.5629987716674805, "logps/chosen": -172.14407348632812, "logps/rejected": -168.1295166015625, "loss": 0.7069, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2661295533180237, "rewards/margins": 0.07032840698957443, "rewards/rejected": -0.3364579379558563, "step": 491 }, { "epoch": 0.64, "learning_rate": 4.966728411927144e-05, "logits/chosen": -2.4634742736816406, "logits/rejected": -2.4717013835906982, "logps/chosen": -206.756591796875, "logps/rejected": -202.884033203125, "loss": 0.7013, "rewards/accuracies": 0.375, "rewards/chosen": -0.5119624733924866, "rewards/margins": 0.043263014405965805, "rewards/rejected": -0.5552253723144531, "step": 492 }, { "epoch": 0.65, "learning_rate": 4.9665588223522096e-05, "logits/chosen": -2.333207607269287, "logits/rejected": -2.3922958374023438, "logps/chosen": -210.04153442382812, "logps/rejected": -239.76979064941406, "loss": 0.7059, "rewards/accuracies": 0.4375, "rewards/chosen": -0.32687515020370483, "rewards/margins": 0.09048546105623245, "rewards/rejected": -0.41736066341400146, "step": 493 }, { "epoch": 0.65, "learning_rate": 4.96638880457544e-05, "logits/chosen": -2.414531707763672, "logits/rejected": -2.4198668003082275, "logps/chosen": -225.68203735351562, "logps/rejected": -252.04661560058594, "loss": 0.6259, "rewards/accuracies": 0.625, "rewards/chosen": -0.44954052567481995, "rewards/margins": 0.19488830864429474, "rewards/rejected": -0.6444287896156311, "step": 494 }, { "epoch": 0.65, "learning_rate": 4.9662183586263514e-05, "logits/chosen": -2.523651361465454, "logits/rejected": -2.4278194904327393, "logps/chosen": -222.92274475097656, "logps/rejected": -203.31668090820312, "loss": 0.8255, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6954134106636047, "rewards/margins": -0.12540754675865173, "rewards/rejected": -0.5700058937072754, "step": 495 }, { "epoch": 0.65, "learning_rate": 4.966047484534533e-05, "logits/chosen": -2.465467929840088, "logits/rejected": -2.4773805141448975, "logps/chosen": -215.38780212402344, "logps/rejected": -222.23593139648438, "loss": 0.6348, "rewards/accuracies": 0.5625, "rewards/chosen": -0.43675118684768677, "rewards/margins": 0.19925275444984436, "rewards/rejected": -0.6360039710998535, "step": 496 }, { "epoch": 0.65, "learning_rate": 4.965876182329648e-05, "logits/chosen": -2.4241979122161865, "logits/rejected": -2.4904754161834717, "logps/chosen": -176.40281677246094, "logps/rejected": -189.89398193359375, "loss": 0.7316, "rewards/accuracies": 0.4375, "rewards/chosen": -0.35698071122169495, "rewards/margins": 0.12830819189548492, "rewards/rejected": -0.48528894782066345, "step": 497 }, { "epoch": 0.65, "learning_rate": 4.965704452041437e-05, "logits/chosen": -2.4424967765808105, "logits/rejected": -2.447554588317871, "logps/chosen": -186.7346954345703, "logps/rejected": -202.56729125976562, "loss": 0.6988, "rewards/accuracies": 0.5625, "rewards/chosen": -0.379626601934433, "rewards/margins": 0.05914265662431717, "rewards/rejected": -0.43876922130584717, "step": 498 }, { "epoch": 0.65, "learning_rate": 4.9655322936997115e-05, "logits/chosen": -2.384314775466919, "logits/rejected": -2.6753270626068115, "logps/chosen": -168.5001220703125, "logps/rejected": -201.58462524414062, "loss": 0.5711, "rewards/accuracies": 0.6875, "rewards/chosen": -0.30074769258499146, "rewards/margins": 0.31708773970603943, "rewards/rejected": -0.6178354024887085, "step": 499 }, { "epoch": 0.65, "learning_rate": 4.9653597073343594e-05, "logits/chosen": -2.44107723236084, "logits/rejected": -2.518571615219116, "logps/chosen": -190.8749542236328, "logps/rejected": -195.59471130371094, "loss": 0.6465, "rewards/accuracies": 0.4375, "rewards/chosen": -0.3541903495788574, "rewards/margins": 0.2186165750026703, "rewards/rejected": -0.5728069543838501, "step": 500 }, { "epoch": 0.66, "learning_rate": 4.9651866929753424e-05, "logits/chosen": -2.580932140350342, "logits/rejected": -2.6375184059143066, "logps/chosen": -201.61231994628906, "logps/rejected": -195.1832275390625, "loss": 0.6788, "rewards/accuracies": 0.4375, "rewards/chosen": -0.4448980689048767, "rewards/margins": 0.1541019082069397, "rewards/rejected": -0.5990000367164612, "step": 501 }, { "epoch": 0.66, "learning_rate": 4.9650132506526944e-05, "logits/chosen": -2.601297378540039, "logits/rejected": -2.5584542751312256, "logps/chosen": -188.46102905273438, "logps/rejected": -176.1941680908203, "loss": 0.6884, "rewards/accuracies": 0.5625, "rewards/chosen": -0.32628124952316284, "rewards/margins": 0.053992465138435364, "rewards/rejected": -0.3802737295627594, "step": 502 }, { "epoch": 0.66, "learning_rate": 4.9648393803965284e-05, "logits/chosen": -2.594449281692505, "logits/rejected": -2.6243650913238525, "logps/chosen": -180.65838623046875, "logps/rejected": -173.06399536132812, "loss": 0.608, "rewards/accuracies": 0.75, "rewards/chosen": -0.3709149658679962, "rewards/margins": 0.24527978897094727, "rewards/rejected": -0.6161947846412659, "step": 503 }, { "epoch": 0.66, "learning_rate": 4.9646650822370265e-05, "logits/chosen": -2.623140811920166, "logits/rejected": -2.6087071895599365, "logps/chosen": -175.10757446289062, "logps/rejected": -198.5271759033203, "loss": 0.6623, "rewards/accuracies": 0.75, "rewards/chosen": -0.34849774837493896, "rewards/margins": 0.2720443308353424, "rewards/rejected": -0.6205421090126038, "step": 504 }, { "epoch": 0.66, "learning_rate": 4.964490356204449e-05, "logits/chosen": -2.3962643146514893, "logits/rejected": -2.4296982288360596, "logps/chosen": -171.71841430664062, "logps/rejected": -189.31715393066406, "loss": 0.7731, "rewards/accuracies": 0.5, "rewards/chosen": -0.5484548211097717, "rewards/margins": 0.05871535837650299, "rewards/rejected": -0.6071702241897583, "step": 505 }, { "epoch": 0.66, "learning_rate": 4.964315202329127e-05, "logits/chosen": -2.1054232120513916, "logits/rejected": -2.0613820552825928, "logps/chosen": -196.4398956298828, "logps/rejected": -197.6771240234375, "loss": 0.6993, "rewards/accuracies": 0.625, "rewards/chosen": -0.4299621880054474, "rewards/margins": 0.10146878659725189, "rewards/rejected": -0.5314310193061829, "step": 506 }, { "epoch": 0.66, "learning_rate": 4.96413962064147e-05, "logits/chosen": -2.4437365531921387, "logits/rejected": -2.6009879112243652, "logps/chosen": -218.52403259277344, "logps/rejected": -253.1461639404297, "loss": 0.5733, "rewards/accuracies": 0.8125, "rewards/chosen": -0.4768885374069214, "rewards/margins": 0.29149848222732544, "rewards/rejected": -0.7683870196342468, "step": 507 }, { "epoch": 0.66, "learning_rate": 4.963963611171957e-05, "logits/chosen": -2.737818479537964, "logits/rejected": -2.8296992778778076, "logps/chosen": -194.77084350585938, "logps/rejected": -196.74485778808594, "loss": 0.6388, "rewards/accuracies": 0.6875, "rewards/chosen": -0.4071928560733795, "rewards/margins": 0.17611998319625854, "rewards/rejected": -0.5833128690719604, "step": 508 }, { "epoch": 0.67, "learning_rate": 4.9637871739511454e-05, "logits/chosen": -2.5671932697296143, "logits/rejected": -2.4917585849761963, "logps/chosen": -208.9228057861328, "logps/rejected": -204.2896270751953, "loss": 0.7151, "rewards/accuracies": 0.4375, "rewards/chosen": -0.38905590772628784, "rewards/margins": 0.02341422438621521, "rewards/rejected": -0.41247016191482544, "step": 509 }, { "epoch": 0.67, "learning_rate": 4.963610309009665e-05, "logits/chosen": -2.3518598079681396, "logits/rejected": -2.3721611499786377, "logps/chosen": -154.779296875, "logps/rejected": -190.75613403320312, "loss": 0.6552, "rewards/accuracies": 0.625, "rewards/chosen": -0.35764750838279724, "rewards/margins": 0.1814635694026947, "rewards/rejected": -0.5391110777854919, "step": 510 }, { "epoch": 0.67, "learning_rate": 4.9634330163782205e-05, "logits/chosen": -2.432107448577881, "logits/rejected": -2.495102882385254, "logps/chosen": -248.51341247558594, "logps/rejected": -233.74266052246094, "loss": 0.7034, "rewards/accuracies": 0.5, "rewards/chosen": -0.45434871315956116, "rewards/margins": 0.08803940564393997, "rewards/rejected": -0.5423881411552429, "step": 511 }, { "epoch": 0.67, "learning_rate": 4.963255296087589e-05, "logits/chosen": -2.4448506832122803, "logits/rejected": -2.462013006210327, "logps/chosen": -143.73773193359375, "logps/rejected": -182.57113647460938, "loss": 0.6068, "rewards/accuracies": 0.5625, "rewards/chosen": -0.3137112855911255, "rewards/margins": 0.21627672016620636, "rewards/rejected": -0.5299879908561707, "step": 512 }, { "epoch": 0.67, "learning_rate": 4.9630771481686244e-05, "logits/chosen": -2.4147915840148926, "logits/rejected": -2.368267297744751, "logps/chosen": -189.82333374023438, "logps/rejected": -175.15501403808594, "loss": 0.6099, "rewards/accuracies": 0.75, "rewards/chosen": -0.34441184997558594, "rewards/margins": 0.29255932569503784, "rewards/rejected": -0.636971116065979, "step": 513 }, { "epoch": 0.67, "learning_rate": 4.9628985726522535e-05, "logits/chosen": -2.424818992614746, "logits/rejected": -2.4481663703918457, "logps/chosen": -204.38375854492188, "logps/rejected": -204.50376892089844, "loss": 0.6846, "rewards/accuracies": 0.5, "rewards/chosen": -0.4307776689529419, "rewards/margins": 0.06390251964330673, "rewards/rejected": -0.4946801960468292, "step": 514 }, { "epoch": 0.67, "learning_rate": 4.9627195695694774e-05, "logits/chosen": -2.523521661758423, "logits/rejected": -2.5060675144195557, "logps/chosen": -163.70565795898438, "logps/rejected": -207.99964904785156, "loss": 0.6256, "rewards/accuracies": 0.5625, "rewards/chosen": -0.32005366683006287, "rewards/margins": 0.24324071407318115, "rewards/rejected": -0.5632944107055664, "step": 515 }, { "epoch": 0.68, "learning_rate": 4.9625401389513715e-05, "logits/chosen": -2.4181556701660156, "logits/rejected": -2.369680881500244, "logps/chosen": -205.77340698242188, "logps/rejected": -155.3488311767578, "loss": 0.8924, "rewards/accuracies": 0.25, "rewards/chosen": -0.7356570363044739, "rewards/margins": -0.26000097393989563, "rewards/rejected": -0.47565609216690063, "step": 516 }, { "epoch": 0.68, "learning_rate": 4.9623602808290855e-05, "logits/chosen": -2.544684410095215, "logits/rejected": -2.5888850688934326, "logps/chosen": -229.81246948242188, "logps/rejected": -251.07574462890625, "loss": 0.6031, "rewards/accuracies": 0.6875, "rewards/chosen": -0.4079195559024811, "rewards/margins": 0.29860368371009827, "rewards/rejected": -0.7065231800079346, "step": 517 }, { "epoch": 0.68, "learning_rate": 4.962179995233843e-05, "logits/chosen": -2.397979259490967, "logits/rejected": -2.5106217861175537, "logps/chosen": -157.80418395996094, "logps/rejected": -175.9936065673828, "loss": 0.6093, "rewards/accuracies": 0.5, "rewards/chosen": -0.44883906841278076, "rewards/margins": 0.32764846086502075, "rewards/rejected": -0.7764875888824463, "step": 518 }, { "epoch": 0.68, "learning_rate": 4.961999282196943e-05, "logits/chosen": -2.4970529079437256, "logits/rejected": -2.582210063934326, "logps/chosen": -194.96694946289062, "logps/rejected": -198.24681091308594, "loss": 0.6883, "rewards/accuracies": 0.6875, "rewards/chosen": -0.37289613485336304, "rewards/margins": 0.0580718033015728, "rewards/rejected": -0.4309679865837097, "step": 519 }, { "epoch": 0.68, "learning_rate": 4.9618181417497566e-05, "logits/chosen": -2.469606399536133, "logits/rejected": -2.54701566696167, "logps/chosen": -215.56979370117188, "logps/rejected": -233.58209228515625, "loss": 0.691, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5863683819770813, "rewards/margins": 0.03697293624281883, "rewards/rejected": -0.6233413815498352, "step": 520 }, { "epoch": 0.68, "learning_rate": 4.961636573923732e-05, "logits/chosen": -2.4047884941101074, "logits/rejected": -2.414797782897949, "logps/chosen": -226.51625061035156, "logps/rejected": -212.8037872314453, "loss": 0.6559, "rewards/accuracies": 0.5, "rewards/chosen": -0.4074936807155609, "rewards/margins": 0.16430170834064484, "rewards/rejected": -0.5717953443527222, "step": 521 }, { "epoch": 0.68, "learning_rate": 4.9614545787503886e-05, "logits/chosen": -2.4261462688446045, "logits/rejected": -2.482429027557373, "logps/chosen": -197.58526611328125, "logps/rejected": -215.66322326660156, "loss": 0.6091, "rewards/accuracies": 0.6875, "rewards/chosen": -0.4302670359611511, "rewards/margins": 0.2678428888320923, "rewards/rejected": -0.6981098651885986, "step": 522 }, { "epoch": 0.68, "learning_rate": 4.961272156261321e-05, "logits/chosen": -2.22928786277771, "logits/rejected": -2.3709988594055176, "logps/chosen": -157.08120727539062, "logps/rejected": -186.32533264160156, "loss": 0.5613, "rewards/accuracies": 0.625, "rewards/chosen": -0.32108116149902344, "rewards/margins": 0.476359486579895, "rewards/rejected": -0.7974405884742737, "step": 523 }, { "epoch": 0.69, "learning_rate": 4.961089306488199e-05, "logits/chosen": -2.3764328956604004, "logits/rejected": -2.423450231552124, "logps/chosen": -225.89686584472656, "logps/rejected": -250.8079376220703, "loss": 0.6351, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4355972707271576, "rewards/margins": 0.2791078984737396, "rewards/rejected": -0.7147052884101868, "step": 524 }, { "epoch": 0.69, "learning_rate": 4.960906029462766e-05, "logits/chosen": -2.338782548904419, "logits/rejected": -2.423982858657837, "logps/chosen": -210.87510681152344, "logps/rejected": -207.5828857421875, "loss": 0.7054, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5041165351867676, "rewards/margins": 0.092525914311409, "rewards/rejected": -0.5966423749923706, "step": 525 }, { "epoch": 0.69, "learning_rate": 4.96072232521684e-05, "logits/chosen": -2.4865126609802246, "logits/rejected": -2.55895733833313, "logps/chosen": -290.5992431640625, "logps/rejected": -275.2317810058594, "loss": 0.6608, "rewards/accuracies": 0.5, "rewards/chosen": -0.5548466444015503, "rewards/margins": 0.17232416570186615, "rewards/rejected": -0.7271708250045776, "step": 526 }, { "epoch": 0.69, "learning_rate": 4.96053819378231e-05, "logits/chosen": -2.6567628383636475, "logits/rejected": -2.5933961868286133, "logps/chosen": -183.54367065429688, "logps/rejected": -175.34194946289062, "loss": 0.7362, "rewards/accuracies": 0.3125, "rewards/chosen": -0.7040901780128479, "rewards/margins": -0.04544142633676529, "rewards/rejected": -0.6586487889289856, "step": 527 }, { "epoch": 0.69, "learning_rate": 4.960353635191145e-05, "logits/chosen": -2.514308452606201, "logits/rejected": -2.5751612186431885, "logps/chosen": -171.15457153320312, "logps/rejected": -163.67311096191406, "loss": 0.606, "rewards/accuracies": 0.625, "rewards/chosen": -0.340698778629303, "rewards/margins": 0.42078471183776855, "rewards/rejected": -0.7614834904670715, "step": 528 }, { "epoch": 0.69, "learning_rate": 4.9601686494753826e-05, "logits/chosen": -2.318474292755127, "logits/rejected": -2.2866580486297607, "logps/chosen": -155.76071166992188, "logps/rejected": -156.44679260253906, "loss": 0.7173, "rewards/accuracies": 0.5, "rewards/chosen": -0.30408087372779846, "rewards/margins": -0.002507832832634449, "rewards/rejected": -0.3015730381011963, "step": 529 }, { "epoch": 0.69, "learning_rate": 4.959983236667138e-05, "logits/chosen": -2.577028274536133, "logits/rejected": -2.6590182781219482, "logps/chosen": -142.8551483154297, "logps/rejected": -166.59149169921875, "loss": 0.6867, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5840120315551758, "rewards/margins": 0.07422871142625809, "rewards/rejected": -0.6582406759262085, "step": 530 }, { "epoch": 0.69, "learning_rate": 4.959797396798599e-05, "logits/chosen": -2.409472703933716, "logits/rejected": -2.4960474967956543, "logps/chosen": -150.38006591796875, "logps/rejected": -176.5684814453125, "loss": 0.6631, "rewards/accuracies": 0.625, "rewards/chosen": -0.5444418787956238, "rewards/margins": 0.12680965662002563, "rewards/rejected": -0.6712515354156494, "step": 531 }, { "epoch": 0.7, "learning_rate": 4.9596111299020284e-05, "logits/chosen": -2.336719512939453, "logits/rejected": -2.3440167903900146, "logps/chosen": -184.50473022460938, "logps/rejected": -187.92713928222656, "loss": 0.627, "rewards/accuracies": 0.6875, "rewards/chosen": -0.33718907833099365, "rewards/margins": 0.19500240683555603, "rewards/rejected": -0.5321914553642273, "step": 532 }, { "epoch": 0.7, "learning_rate": 4.959424436009762e-05, "logits/chosen": -2.1330745220184326, "logits/rejected": -2.2025442123413086, "logps/chosen": -255.2789306640625, "logps/rejected": -287.85736083984375, "loss": 0.5781, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6389483213424683, "rewards/margins": 0.5605921149253845, "rewards/rejected": -1.199540376663208, "step": 533 }, { "epoch": 0.7, "learning_rate": 4.9592373151542105e-05, "logits/chosen": -2.783942699432373, "logits/rejected": -2.766120672225952, "logps/chosen": -185.41404724121094, "logps/rejected": -203.95669555664062, "loss": 0.7158, "rewards/accuracies": 0.5, "rewards/chosen": -0.4609447717666626, "rewards/margins": 0.003200806677341461, "rewards/rejected": -0.46414560079574585, "step": 534 }, { "epoch": 0.7, "learning_rate": 4.959049767367859e-05, "logits/chosen": -2.4105114936828613, "logits/rejected": -2.6107630729675293, "logps/chosen": -172.59347534179688, "logps/rejected": -212.30836486816406, "loss": 0.6269, "rewards/accuracies": 0.625, "rewards/chosen": -0.5956838726997375, "rewards/margins": 0.2615227699279785, "rewards/rejected": -0.8572065830230713, "step": 535 }, { "epoch": 0.7, "learning_rate": 4.958861792683266e-05, "logits/chosen": -2.1016757488250732, "logits/rejected": -2.1774649620056152, "logps/chosen": -204.3402862548828, "logps/rejected": -215.18890380859375, "loss": 0.6164, "rewards/accuracies": 0.5625, "rewards/chosen": -0.49696028232574463, "rewards/margins": 0.2144584059715271, "rewards/rejected": -0.7114187479019165, "step": 536 }, { "epoch": 0.7, "learning_rate": 4.958673391133065e-05, "logits/chosen": -2.6260104179382324, "logits/rejected": -2.648331880569458, "logps/chosen": -180.17063903808594, "logps/rejected": -175.27981567382812, "loss": 0.7315, "rewards/accuracies": 0.5, "rewards/chosen": -0.7630718946456909, "rewards/margins": 0.05031493306159973, "rewards/rejected": -0.813386857509613, "step": 537 }, { "epoch": 0.7, "learning_rate": 4.958484562749963e-05, "logits/chosen": -2.3528480529785156, "logits/rejected": -2.35026216506958, "logps/chosen": -240.7522735595703, "logps/rejected": -284.01177978515625, "loss": 0.6558, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5610861778259277, "rewards/margins": 0.1676333099603653, "rewards/rejected": -0.7287194132804871, "step": 538 }, { "epoch": 0.71, "learning_rate": 4.9582953075667406e-05, "logits/chosen": -2.7893500328063965, "logits/rejected": -2.695694923400879, "logps/chosen": -213.7672119140625, "logps/rejected": -197.13304138183594, "loss": 0.7034, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7025653719902039, "rewards/margins": 0.08315228670835495, "rewards/rejected": -0.7857176661491394, "step": 539 }, { "epoch": 0.71, "learning_rate": 4.958105625616253e-05, "logits/chosen": -2.6460516452789307, "logits/rejected": -2.6226911544799805, "logps/chosen": -186.30870056152344, "logps/rejected": -169.23074340820312, "loss": 0.684, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4503140449523926, "rewards/margins": 0.0906258150935173, "rewards/rejected": -0.5409399271011353, "step": 540 }, { "epoch": 0.71, "learning_rate": 4.95791551693143e-05, "logits/chosen": -2.6068363189697266, "logits/rejected": -2.645115852355957, "logps/chosen": -225.66354370117188, "logps/rejected": -248.78900146484375, "loss": 0.7087, "rewards/accuracies": 0.4375, "rewards/chosen": -0.7523027062416077, "rewards/margins": 0.06183048337697983, "rewards/rejected": -0.8141331672668457, "step": 541 }, { "epoch": 0.71, "learning_rate": 4.957724981545275e-05, "logits/chosen": -2.472942352294922, "logits/rejected": -2.5449466705322266, "logps/chosen": -165.9869384765625, "logps/rejected": -195.7375946044922, "loss": 0.6983, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5627427697181702, "rewards/margins": 0.17754918336868286, "rewards/rejected": -0.7402920126914978, "step": 542 }, { "epoch": 0.71, "learning_rate": 4.957534019490865e-05, "logits/chosen": -2.6267778873443604, "logits/rejected": -2.8627214431762695, "logps/chosen": -208.47406005859375, "logps/rejected": -230.64390563964844, "loss": 0.5822, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6202707290649414, "rewards/margins": 0.35299497842788696, "rewards/rejected": -0.9732657074928284, "step": 543 }, { "epoch": 0.71, "learning_rate": 4.957342630801353e-05, "logits/chosen": -2.302680730819702, "logits/rejected": -2.4394922256469727, "logps/chosen": -169.56790161132812, "logps/rejected": -198.37738037109375, "loss": 0.6269, "rewards/accuracies": 0.625, "rewards/chosen": -0.44470933079719543, "rewards/margins": 0.24785885214805603, "rewards/rejected": -0.6925681233406067, "step": 544 }, { "epoch": 0.71, "learning_rate": 4.957150815509963e-05, "logits/chosen": -2.454188823699951, "logits/rejected": -2.548893451690674, "logps/chosen": -170.51040649414062, "logps/rejected": -232.72727966308594, "loss": 0.7024, "rewards/accuracies": 0.4375, "rewards/chosen": -0.3639673590660095, "rewards/margins": 0.021631799638271332, "rewards/rejected": -0.38559919595718384, "step": 545 }, { "epoch": 0.71, "learning_rate": 4.9569585736499945e-05, "logits/chosen": -2.337388515472412, "logits/rejected": -2.4017117023468018, "logps/chosen": -177.1454620361328, "logps/rejected": -191.3271484375, "loss": 0.5661, "rewards/accuracies": 0.625, "rewards/chosen": -0.3925960958003998, "rewards/margins": 0.34767282009124756, "rewards/rejected": -0.740268886089325, "step": 546 }, { "epoch": 0.72, "learning_rate": 4.9567659052548234e-05, "logits/chosen": -2.653782367706299, "logits/rejected": -2.6659772396087646, "logps/chosen": -198.43539428710938, "logps/rejected": -176.4965362548828, "loss": 0.7005, "rewards/accuracies": 0.625, "rewards/chosen": -0.5790706872940063, "rewards/margins": 0.11143729835748672, "rewards/rejected": -0.6905080080032349, "step": 547 }, { "epoch": 0.72, "learning_rate": 4.956572810357896e-05, "logits/chosen": -2.5503158569335938, "logits/rejected": -2.520476818084717, "logps/chosen": -190.98855590820312, "logps/rejected": -173.04501342773438, "loss": 0.7238, "rewards/accuracies": 0.5, "rewards/chosen": -0.5375619530677795, "rewards/margins": 0.01934710703790188, "rewards/rejected": -0.5569091439247131, "step": 548 }, { "epoch": 0.72, "learning_rate": 4.956379288992733e-05, "logits/chosen": -2.2868709564208984, "logits/rejected": -2.358032464981079, "logps/chosen": -163.09713745117188, "logps/rejected": -187.16030883789062, "loss": 0.5837, "rewards/accuracies": 0.6875, "rewards/chosen": -0.3862106502056122, "rewards/margins": 0.4660491943359375, "rewards/rejected": -0.8522598147392273, "step": 549 }, { "epoch": 0.72, "learning_rate": 4.956185341192933e-05, "logits/chosen": -2.522181987762451, "logits/rejected": -2.5097920894622803, "logps/chosen": -167.14633178710938, "logps/rejected": -205.06365966796875, "loss": 0.7648, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5081343650817871, "rewards/margins": -0.012615025043487549, "rewards/rejected": -0.4955193102359772, "step": 550 }, { "epoch": 0.72, "learning_rate": 4.9559909669921635e-05, "logits/chosen": -2.3636226654052734, "logits/rejected": -2.438326120376587, "logps/chosen": -197.59310913085938, "logps/rejected": -186.26112365722656, "loss": 0.7066, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5027388334274292, "rewards/margins": 0.07569919526576996, "rewards/rejected": -0.5784379839897156, "step": 551 }, { "epoch": 0.72, "learning_rate": 4.9557961664241694e-05, "logits/chosen": -2.425126552581787, "logits/rejected": -2.5314579010009766, "logps/chosen": -181.5507049560547, "logps/rejected": -211.9708251953125, "loss": 0.7133, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6780929565429688, "rewards/margins": 0.19112253189086914, "rewards/rejected": -0.8692154884338379, "step": 552 }, { "epoch": 0.72, "learning_rate": 4.955600939522769e-05, "logits/chosen": -2.52880597114563, "logits/rejected": -2.5222525596618652, "logps/chosen": -178.28521728515625, "logps/rejected": -210.90565490722656, "loss": 0.587, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5996692180633545, "rewards/margins": 0.25250738859176636, "rewards/rejected": -0.8521764874458313, "step": 553 }, { "epoch": 0.73, "learning_rate": 4.955405286321854e-05, "logits/chosen": -2.3732471466064453, "logits/rejected": -2.4644832611083984, "logps/chosen": -210.51051330566406, "logps/rejected": -208.96826171875, "loss": 0.5982, "rewards/accuracies": 0.75, "rewards/chosen": -0.5088058114051819, "rewards/margins": 0.2920345067977905, "rewards/rejected": -0.8008402585983276, "step": 554 }, { "epoch": 0.73, "learning_rate": 4.95520920685539e-05, "logits/chosen": -2.4812893867492676, "logits/rejected": -2.5562007427215576, "logps/chosen": -170.83322143554688, "logps/rejected": -192.9698486328125, "loss": 0.5716, "rewards/accuracies": 0.75, "rewards/chosen": -0.6304159164428711, "rewards/margins": 0.37207356095314026, "rewards/rejected": -1.002489447593689, "step": 555 }, { "epoch": 0.73, "learning_rate": 4.9550127011574176e-05, "logits/chosen": -2.281599760055542, "logits/rejected": -2.3443262577056885, "logps/chosen": -188.46249389648438, "logps/rejected": -179.5142364501953, "loss": 0.6338, "rewards/accuracies": 0.625, "rewards/chosen": -0.41307827830314636, "rewards/margins": 0.17765894532203674, "rewards/rejected": -0.5907372236251831, "step": 556 }, { "epoch": 0.73, "learning_rate": 4.95481576926205e-05, "logits/chosen": -2.584645986557007, "logits/rejected": -2.604243755340576, "logps/chosen": -181.7239990234375, "logps/rejected": -209.90145874023438, "loss": 0.5821, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7002069354057312, "rewards/margins": 0.43418359756469727, "rewards/rejected": -1.1343904733657837, "step": 557 }, { "epoch": 0.73, "learning_rate": 4.954618411203476e-05, "logits/chosen": -2.679819107055664, "logits/rejected": -2.7520828247070312, "logps/chosen": -217.54901123046875, "logps/rejected": -236.81210327148438, "loss": 0.6187, "rewards/accuracies": 0.5625, "rewards/chosen": -0.734368085861206, "rewards/margins": 0.29621487855911255, "rewards/rejected": -1.0305829048156738, "step": 558 }, { "epoch": 0.73, "learning_rate": 4.954420627015957e-05, "logits/chosen": -2.530925750732422, "logits/rejected": -2.5949485301971436, "logps/chosen": -190.8072509765625, "logps/rejected": -226.07455444335938, "loss": 0.5481, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5543414950370789, "rewards/margins": 0.42910295724868774, "rewards/rejected": -0.9834444522857666, "step": 559 }, { "epoch": 0.73, "learning_rate": 4.954222416733829e-05, "logits/chosen": -2.4444892406463623, "logits/rejected": -2.4189417362213135, "logps/chosen": -275.56109619140625, "logps/rejected": -248.05233764648438, "loss": 0.7178, "rewards/accuracies": 0.625, "rewards/chosen": -0.5780796408653259, "rewards/margins": 0.0615503191947937, "rewards/rejected": -0.6396299600601196, "step": 560 }, { "epoch": 0.73, "learning_rate": 4.954023780391501e-05, "logits/chosen": -2.360807180404663, "logits/rejected": -2.308104991912842, "logps/chosen": -172.66574096679688, "logps/rejected": -191.79412841796875, "loss": 0.5558, "rewards/accuracies": 0.75, "rewards/chosen": -0.709125816822052, "rewards/margins": 0.38629868626594543, "rewards/rejected": -1.0954244136810303, "step": 561 }, { "epoch": 0.74, "learning_rate": 4.953824718023459e-05, "logits/chosen": -2.140808582305908, "logits/rejected": -2.2450931072235107, "logps/chosen": -135.81964111328125, "logps/rejected": -176.7286376953125, "loss": 0.7815, "rewards/accuracies": 0.5, "rewards/chosen": -0.7152988910675049, "rewards/margins": 0.006897151470184326, "rewards/rejected": -0.722196102142334, "step": 562 }, { "epoch": 0.74, "learning_rate": 4.953625229664259e-05, "logits/chosen": -2.2745089530944824, "logits/rejected": -2.2524397373199463, "logps/chosen": -174.69879150390625, "logps/rejected": -193.57516479492188, "loss": 0.8268, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6095337867736816, "rewards/margins": -0.15611900389194489, "rewards/rejected": -0.45341476798057556, "step": 563 }, { "epoch": 0.74, "learning_rate": 4.953425315348534e-05, "logits/chosen": -2.4158096313476562, "logits/rejected": -2.5616989135742188, "logps/chosen": -161.41812133789062, "logps/rejected": -195.5721893310547, "loss": 0.5776, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4711029529571533, "rewards/margins": 0.4865836799144745, "rewards/rejected": -0.957686722278595, "step": 564 }, { "epoch": 0.74, "learning_rate": 4.953224975110988e-05, "logits/chosen": -2.3829874992370605, "logits/rejected": -2.3937342166900635, "logps/chosen": -168.71534729003906, "logps/rejected": -178.2794189453125, "loss": 0.7187, "rewards/accuracies": 0.375, "rewards/chosen": -0.5582124590873718, "rewards/margins": -0.010660316795110703, "rewards/rejected": -0.5475521087646484, "step": 565 }, { "epoch": 0.74, "learning_rate": 4.9530242089864026e-05, "logits/chosen": -2.3829197883605957, "logits/rejected": -2.462663173675537, "logps/chosen": -173.0697021484375, "logps/rejected": -207.88348388671875, "loss": 0.6547, "rewards/accuracies": 0.5, "rewards/chosen": -0.4435291290283203, "rewards/margins": 0.13191349804401398, "rewards/rejected": -0.5754426717758179, "step": 566 }, { "epoch": 0.74, "learning_rate": 4.9528230170096305e-05, "logits/chosen": -2.319032907485962, "logits/rejected": -2.3326690196990967, "logps/chosen": -169.28501892089844, "logps/rejected": -194.19927978515625, "loss": 0.7851, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7481305599212646, "rewards/margins": 0.09495264291763306, "rewards/rejected": -0.8430831432342529, "step": 567 }, { "epoch": 0.74, "learning_rate": 4.952621399215598e-05, "logits/chosen": -2.630171537399292, "logits/rejected": -2.617821455001831, "logps/chosen": -192.14907836914062, "logps/rejected": -193.33782958984375, "loss": 0.687, "rewards/accuracies": 0.5, "rewards/chosen": -0.47104740142822266, "rewards/margins": 0.11374848335981369, "rewards/rejected": -0.5847958922386169, "step": 568 }, { "epoch": 0.74, "learning_rate": 4.9524193556393083e-05, "logits/chosen": -2.3562679290771484, "logits/rejected": -2.388498306274414, "logps/chosen": -212.62057495117188, "logps/rejected": -230.94476318359375, "loss": 0.6921, "rewards/accuracies": 0.75, "rewards/chosen": -0.575402557849884, "rewards/margins": 0.19730643928050995, "rewards/rejected": -0.7727090120315552, "step": 569 }, { "epoch": 0.75, "learning_rate": 4.952216886315837e-05, "logits/chosen": -2.414254903793335, "logits/rejected": -2.4329581260681152, "logps/chosen": -175.03147888183594, "logps/rejected": -214.68109130859375, "loss": 0.6274, "rewards/accuracies": 0.625, "rewards/chosen": -0.6071965098381042, "rewards/margins": 0.29113632440567017, "rewards/rejected": -0.8983328342437744, "step": 570 }, { "epoch": 0.75, "learning_rate": 4.952013991280332e-05, "logits/chosen": -2.5487287044525146, "logits/rejected": -2.563288688659668, "logps/chosen": -226.3212890625, "logps/rejected": -214.44525146484375, "loss": 0.746, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6687456965446472, "rewards/margins": -0.017180969938635826, "rewards/rejected": -0.6515647172927856, "step": 571 }, { "epoch": 0.75, "learning_rate": 4.951810670568017e-05, "logits/chosen": -2.445035219192505, "logits/rejected": -2.3831865787506104, "logps/chosen": -214.5970001220703, "logps/rejected": -228.41064453125, "loss": 0.7696, "rewards/accuracies": 0.375, "rewards/chosen": -0.62672358751297, "rewards/margins": -0.040074095129966736, "rewards/rejected": -0.5866495370864868, "step": 572 }, { "epoch": 0.75, "learning_rate": 4.9516069242141885e-05, "logits/chosen": -2.321072578430176, "logits/rejected": -2.4207139015197754, "logps/chosen": -203.04151916503906, "logps/rejected": -206.2476806640625, "loss": 0.766, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6442697644233704, "rewards/margins": -0.051033951342105865, "rewards/rejected": -0.5932357907295227, "step": 573 }, { "epoch": 0.75, "learning_rate": 4.951402752254219e-05, "logits/chosen": -2.3598363399505615, "logits/rejected": -2.510725498199463, "logps/chosen": -160.54183959960938, "logps/rejected": -181.72743225097656, "loss": 0.613, "rewards/accuracies": 0.5625, "rewards/chosen": -0.45199206471443176, "rewards/margins": 0.4041283130645752, "rewards/rejected": -0.8561204075813293, "step": 574 }, { "epoch": 0.75, "learning_rate": 4.951198154723552e-05, "logits/chosen": -2.550727605819702, "logits/rejected": -2.5166475772857666, "logps/chosen": -150.58238220214844, "logps/rejected": -157.48434448242188, "loss": 0.6906, "rewards/accuracies": 0.4375, "rewards/chosen": -0.3471527397632599, "rewards/margins": 0.06844709068536758, "rewards/rejected": -0.41559985280036926, "step": 575 }, { "epoch": 0.75, "learning_rate": 4.950993131657706e-05, "logits/chosen": -2.32822847366333, "logits/rejected": -2.3608272075653076, "logps/chosen": -183.2533721923828, "logps/rejected": -155.5178680419922, "loss": 0.8822, "rewards/accuracies": 0.375, "rewards/chosen": -0.7413718700408936, "rewards/margins": -0.2268693447113037, "rewards/rejected": -0.5145025849342346, "step": 576 }, { "epoch": 0.76, "learning_rate": 4.9507876830922755e-05, "logits/chosen": -2.426633358001709, "logits/rejected": -2.350067138671875, "logps/chosen": -159.07473754882812, "logps/rejected": -177.24722290039062, "loss": 0.6643, "rewards/accuracies": 0.625, "rewards/chosen": -0.4699227511882782, "rewards/margins": 0.37284964323043823, "rewards/rejected": -0.842772364616394, "step": 577 }, { "epoch": 0.76, "learning_rate": 4.950581809062925e-05, "logits/chosen": -2.3766255378723145, "logits/rejected": -2.411029815673828, "logps/chosen": -211.9445343017578, "logps/rejected": -224.93450927734375, "loss": 0.7549, "rewards/accuracies": 0.5, "rewards/chosen": -0.5890372395515442, "rewards/margins": 0.09359408915042877, "rewards/rejected": -0.6826313138008118, "step": 578 }, { "epoch": 0.76, "learning_rate": 4.950375509605396e-05, "logits/chosen": -2.3283326625823975, "logits/rejected": -2.1340157985687256, "logps/chosen": -159.19125366210938, "logps/rejected": -161.9340362548828, "loss": 0.7342, "rewards/accuracies": 0.5, "rewards/chosen": -0.4637477993965149, "rewards/margins": -0.048650771379470825, "rewards/rejected": -0.41509705781936646, "step": 579 }, { "epoch": 0.76, "learning_rate": 4.9501687847555016e-05, "logits/chosen": -2.4342098236083984, "logits/rejected": -2.4008848667144775, "logps/chosen": -214.58380126953125, "logps/rejected": -184.8182373046875, "loss": 0.7425, "rewards/accuracies": 0.4375, "rewards/chosen": -0.45641300082206726, "rewards/margins": -0.036872051656246185, "rewards/rejected": -0.4195409417152405, "step": 580 }, { "epoch": 0.76, "learning_rate": 4.949961634549131e-05, "logits/chosen": -2.4010324478149414, "logits/rejected": -2.397801160812378, "logps/chosen": -175.51223754882812, "logps/rejected": -165.90208435058594, "loss": 0.6852, "rewards/accuracies": 0.6875, "rewards/chosen": -0.3178666830062866, "rewards/margins": 0.06419295072555542, "rewards/rejected": -0.38205963373184204, "step": 581 }, { "epoch": 0.76, "learning_rate": 4.949754059022246e-05, "logits/chosen": -2.494157075881958, "logits/rejected": -2.560157537460327, "logps/chosen": -172.84835815429688, "logps/rejected": -178.97540283203125, "loss": 0.6419, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2800545394420624, "rewards/margins": 0.1691555678844452, "rewards/rejected": -0.44921010732650757, "step": 582 }, { "epoch": 0.76, "learning_rate": 4.949546058210881e-05, "logits/chosen": -2.209298610687256, "logits/rejected": -2.293645143508911, "logps/chosen": -194.64381408691406, "logps/rejected": -237.14974975585938, "loss": 0.5891, "rewards/accuracies": 0.625, "rewards/chosen": -0.257599413394928, "rewards/margins": 0.3168815076351166, "rewards/rejected": -0.5744808912277222, "step": 583 }, { "epoch": 0.76, "learning_rate": 4.9493376321511464e-05, "logits/chosen": -2.446950912475586, "logits/rejected": -2.4006083011627197, "logps/chosen": -199.42282104492188, "logps/rejected": -196.51849365234375, "loss": 0.6841, "rewards/accuracies": 0.625, "rewards/chosen": -0.35894283652305603, "rewards/margins": 0.1623423993587494, "rewards/rejected": -0.5212852358818054, "step": 584 }, { "epoch": 0.77, "learning_rate": 4.9491287808792265e-05, "logits/chosen": -2.3545334339141846, "logits/rejected": -2.452188491821289, "logps/chosen": -198.55056762695312, "logps/rejected": -195.9398956298828, "loss": 0.6768, "rewards/accuracies": 0.5, "rewards/chosen": -0.4273378551006317, "rewards/margins": 0.11289644986391068, "rewards/rejected": -0.5402343273162842, "step": 585 }, { "epoch": 0.77, "learning_rate": 4.948919504431376e-05, "logits/chosen": -2.687852144241333, "logits/rejected": -2.6648173332214355, "logps/chosen": -215.23638916015625, "logps/rejected": -221.06588745117188, "loss": 0.6981, "rewards/accuracies": 0.625, "rewards/chosen": -0.30441591143608093, "rewards/margins": 0.08797214180231094, "rewards/rejected": -0.39238807559013367, "step": 586 }, { "epoch": 0.77, "learning_rate": 4.948709802843929e-05, "logits/chosen": -2.2244153022766113, "logits/rejected": -2.2272024154663086, "logps/chosen": -197.98904418945312, "logps/rejected": -188.39773559570312, "loss": 0.6626, "rewards/accuracies": 0.5, "rewards/chosen": -0.27749666571617126, "rewards/margins": 0.12206709384918213, "rewards/rejected": -0.3995637595653534, "step": 587 }, { "epoch": 0.77, "learning_rate": 4.9484996761532886e-05, "logits/chosen": -2.471318006515503, "logits/rejected": -2.550818920135498, "logps/chosen": -192.51039123535156, "logps/rejected": -192.6507568359375, "loss": 0.7235, "rewards/accuracies": 0.4375, "rewards/chosen": -0.27331510186195374, "rewards/margins": -0.019602246582508087, "rewards/rejected": -0.25371283292770386, "step": 588 }, { "epoch": 0.77, "learning_rate": 4.9482891243959335e-05, "logits/chosen": -2.4945497512817383, "logits/rejected": -2.554481029510498, "logps/chosen": -187.52774047851562, "logps/rejected": -227.6110076904297, "loss": 0.5713, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2509083151817322, "rewards/margins": 0.3469720482826233, "rewards/rejected": -0.5978803038597107, "step": 589 }, { "epoch": 0.77, "learning_rate": 4.948078147608416e-05, "logits/chosen": -2.5424046516418457, "logits/rejected": -2.58070707321167, "logps/chosen": -181.62966918945312, "logps/rejected": -219.62698364257812, "loss": 0.6805, "rewards/accuracies": 0.625, "rewards/chosen": -0.4028276205062866, "rewards/margins": 0.0835791528224945, "rewards/rejected": -0.48640677332878113, "step": 590 }, { "epoch": 0.77, "learning_rate": 4.9478667458273624e-05, "logits/chosen": -2.558363199234009, "logits/rejected": -2.533869743347168, "logps/chosen": -207.1812286376953, "logps/rejected": -201.0562286376953, "loss": 0.6612, "rewards/accuracies": 0.6875, "rewards/chosen": -0.24153894186019897, "rewards/margins": 0.12561962008476257, "rewards/rejected": -0.36715856194496155, "step": 591 }, { "epoch": 0.77, "learning_rate": 4.9476549190894725e-05, "logits/chosen": -2.4393560886383057, "logits/rejected": -2.450369358062744, "logps/chosen": -153.7821044921875, "logps/rejected": -153.88775634765625, "loss": 0.686, "rewards/accuracies": 0.5625, "rewards/chosen": -0.23368775844573975, "rewards/margins": 0.0638914704322815, "rewards/rejected": -0.29757922887802124, "step": 592 }, { "epoch": 0.78, "learning_rate": 4.947442667431522e-05, "logits/chosen": -2.6815226078033447, "logits/rejected": -2.506643056869507, "logps/chosen": -229.4060516357422, "logps/rejected": -240.65362548828125, "loss": 0.7512, "rewards/accuracies": 0.5, "rewards/chosen": -0.3724709153175354, "rewards/margins": -0.04746837168931961, "rewards/rejected": -0.325002521276474, "step": 593 }, { "epoch": 0.78, "learning_rate": 4.9472299908903555e-05, "logits/chosen": -2.3556230068206787, "logits/rejected": -2.4480998516082764, "logps/chosen": -128.82077026367188, "logps/rejected": -170.01109313964844, "loss": 0.6209, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1109066978096962, "rewards/margins": 0.2432006597518921, "rewards/rejected": -0.3541073799133301, "step": 594 }, { "epoch": 0.78, "learning_rate": 4.947016889502895e-05, "logits/chosen": -2.5540051460266113, "logits/rejected": -2.549333095550537, "logps/chosen": -178.5426788330078, "logps/rejected": -172.33694458007812, "loss": 0.7123, "rewards/accuracies": 0.6875, "rewards/chosen": -0.3693382441997528, "rewards/margins": -0.0018721502274274826, "rewards/rejected": -0.36746612191200256, "step": 595 }, { "epoch": 0.78, "learning_rate": 4.946803363306137e-05, "logits/chosen": -2.076066255569458, "logits/rejected": -2.011690139770508, "logps/chosen": -194.8577880859375, "logps/rejected": -176.207763671875, "loss": 0.7698, "rewards/accuracies": 0.375, "rewards/chosen": -0.26399266719818115, "rewards/margins": -0.08310166001319885, "rewards/rejected": -0.1808909922838211, "step": 596 }, { "epoch": 0.78, "learning_rate": 4.946589412337149e-05, "logits/chosen": -2.2557950019836426, "logits/rejected": -2.3377816677093506, "logps/chosen": -148.5780029296875, "logps/rejected": -170.98593139648438, "loss": 0.6012, "rewards/accuracies": 0.5, "rewards/chosen": 0.07843692600727081, "rewards/margins": 0.26211532950401306, "rewards/rejected": -0.18367838859558105, "step": 597 }, { "epoch": 0.78, "learning_rate": 4.9463750366330734e-05, "logits/chosen": -2.2877843379974365, "logits/rejected": -2.359644889831543, "logps/chosen": -187.41412353515625, "logps/rejected": -194.01309204101562, "loss": 0.734, "rewards/accuracies": 0.375, "rewards/chosen": -0.3197644352912903, "rewards/margins": -0.021138954907655716, "rewards/rejected": -0.29862549901008606, "step": 598 }, { "epoch": 0.78, "learning_rate": 4.9461602362311275e-05, "logits/chosen": -2.493452548980713, "logits/rejected": -2.5441408157348633, "logps/chosen": -209.37081909179688, "logps/rejected": -173.10861206054688, "loss": 0.7112, "rewards/accuracies": 0.5625, "rewards/chosen": -0.324692964553833, "rewards/margins": 0.05840418487787247, "rewards/rejected": -0.3830971419811249, "step": 599 }, { "epoch": 0.79, "learning_rate": 4.9459450111686e-05, "logits/chosen": -2.4824540615081787, "logits/rejected": -2.477165937423706, "logps/chosen": -191.7369384765625, "logps/rejected": -181.0056915283203, "loss": 0.692, "rewards/accuracies": 0.5, "rewards/chosen": -0.38107597827911377, "rewards/margins": 0.05237661302089691, "rewards/rejected": -0.4334526062011719, "step": 600 }, { "epoch": 0.79, "learning_rate": 4.945729361482856e-05, "logits/chosen": -2.450261116027832, "logits/rejected": -2.500009536743164, "logps/chosen": -232.3407440185547, "logps/rejected": -212.10638427734375, "loss": 0.5732, "rewards/accuracies": 0.6875, "rewards/chosen": -0.19914206862449646, "rewards/margins": 0.3821813762187958, "rewards/rejected": -0.5813234448432922, "step": 601 }, { "epoch": 0.79, "learning_rate": 4.9455132872113316e-05, "logits/chosen": -2.3420450687408447, "logits/rejected": -2.3474817276000977, "logps/chosen": -216.38914489746094, "logps/rejected": -224.3979949951172, "loss": 0.6047, "rewards/accuracies": 0.6875, "rewards/chosen": -0.1369827687740326, "rewards/margins": 0.2419712096452713, "rewards/rejected": -0.3789539933204651, "step": 602 }, { "epoch": 0.79, "learning_rate": 4.945296788391539e-05, "logits/chosen": -2.3396308422088623, "logits/rejected": -2.3148958683013916, "logps/chosen": -169.25946044921875, "logps/rejected": -197.1276397705078, "loss": 0.7964, "rewards/accuracies": 0.375, "rewards/chosen": -0.32238638401031494, "rewards/margins": -0.1470273733139038, "rewards/rejected": -0.17535902559757233, "step": 603 }, { "epoch": 0.79, "learning_rate": 4.945079865061062e-05, "logits/chosen": -2.275747537612915, "logits/rejected": -2.2709038257598877, "logps/chosen": -139.6877899169922, "logps/rejected": -148.5768585205078, "loss": 0.6796, "rewards/accuracies": 0.375, "rewards/chosen": -0.08058857917785645, "rewards/margins": 0.04861634969711304, "rewards/rejected": -0.12920492887496948, "step": 604 }, { "epoch": 0.79, "learning_rate": 4.94486251725756e-05, "logits/chosen": -2.3698954582214355, "logits/rejected": -2.453796625137329, "logps/chosen": -229.4521484375, "logps/rejected": -210.6240234375, "loss": 0.6212, "rewards/accuracies": 0.625, "rewards/chosen": -0.22786223888397217, "rewards/margins": 0.20501334965229034, "rewards/rejected": -0.4328756034374237, "step": 605 }, { "epoch": 0.79, "learning_rate": 4.944644745018765e-05, "logits/chosen": -2.4110870361328125, "logits/rejected": -2.423731565475464, "logps/chosen": -219.2958221435547, "logps/rejected": -202.20706176757812, "loss": 0.7773, "rewards/accuracies": 0.5, "rewards/chosen": -0.37566569447517395, "rewards/margins": -0.0947473868727684, "rewards/rejected": -0.28091832995414734, "step": 606 }, { "epoch": 0.79, "learning_rate": 4.944426548382482e-05, "logits/chosen": -2.4861879348754883, "logits/rejected": -2.5459907054901123, "logps/chosen": -237.8024444580078, "logps/rejected": -260.2302551269531, "loss": 0.7621, "rewards/accuracies": 0.4375, "rewards/chosen": -0.42855891585350037, "rewards/margins": -0.048416607081890106, "rewards/rejected": -0.38014233112335205, "step": 607 }, { "epoch": 0.8, "learning_rate": 4.944207927386591e-05, "logits/chosen": -2.478734254837036, "logits/rejected": -2.5266644954681396, "logps/chosen": -218.4932403564453, "logps/rejected": -225.3731689453125, "loss": 0.7075, "rewards/accuracies": 0.5, "rewards/chosen": -0.23466593027114868, "rewards/margins": 0.09529409557580948, "rewards/rejected": -0.3299599885940552, "step": 608 }, { "epoch": 0.8, "learning_rate": 4.9439888820690475e-05, "logits/chosen": -2.5482990741729736, "logits/rejected": -2.544922113418579, "logps/chosen": -186.0775909423828, "logps/rejected": -197.2762908935547, "loss": 0.5692, "rewards/accuracies": 0.875, "rewards/chosen": -0.01272206474095583, "rewards/margins": 0.301639199256897, "rewards/rejected": -0.3143612742424011, "step": 609 }, { "epoch": 0.8, "learning_rate": 4.943769412467875e-05, "logits/chosen": -2.2644476890563965, "logits/rejected": -2.3761229515075684, "logps/chosen": -215.5855712890625, "logps/rejected": -246.0447998046875, "loss": 0.5599, "rewards/accuracies": 0.75, "rewards/chosen": -0.19982381165027618, "rewards/margins": 0.4122087359428406, "rewards/rejected": -0.6120326519012451, "step": 610 }, { "epoch": 0.8, "learning_rate": 4.943549518621176e-05, "logits/chosen": -2.4406063556671143, "logits/rejected": -2.529265880584717, "logps/chosen": -175.3257598876953, "logps/rejected": -188.4786376953125, "loss": 0.6989, "rewards/accuracies": 0.5, "rewards/chosen": -0.19547346234321594, "rewards/margins": 0.05771362781524658, "rewards/rejected": -0.2531871199607849, "step": 611 }, { "epoch": 0.8, "learning_rate": 4.9433292005671236e-05, "logits/chosen": -2.4887635707855225, "logits/rejected": -2.48945951461792, "logps/chosen": -187.1964111328125, "logps/rejected": -231.7959747314453, "loss": 0.6501, "rewards/accuracies": 0.5, "rewards/chosen": -0.21680843830108643, "rewards/margins": 0.15266810357570648, "rewards/rejected": -0.3694765567779541, "step": 612 }, { "epoch": 0.8, "learning_rate": 4.943108458343968e-05, "logits/chosen": -2.4516611099243164, "logits/rejected": -2.4493842124938965, "logps/chosen": -175.4194793701172, "logps/rejected": -185.060546875, "loss": 0.6172, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2488647699356079, "rewards/margins": 0.22184765338897705, "rewards/rejected": -0.47071248292922974, "step": 613 }, { "epoch": 0.8, "learning_rate": 4.942887291990028e-05, "logits/chosen": -2.636701822280884, "logits/rejected": -2.5966298580169678, "logps/chosen": -194.91783142089844, "logps/rejected": -182.28858947753906, "loss": 0.6595, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04664868861436844, "rewards/margins": 0.11729076504707336, "rewards/rejected": -0.1639394611120224, "step": 614 }, { "epoch": 0.8, "learning_rate": 4.9426657015436994e-05, "logits/chosen": -2.303603172302246, "logits/rejected": -2.2422983646392822, "logps/chosen": -144.08590698242188, "logps/rejected": -148.51541137695312, "loss": 0.7636, "rewards/accuracies": 0.375, "rewards/chosen": -0.18376705050468445, "rewards/margins": -0.09818767011165619, "rewards/rejected": -0.08557939529418945, "step": 615 }, { "epoch": 0.81, "learning_rate": 4.9424436870434515e-05, "logits/chosen": -2.7395613193511963, "logits/rejected": -2.747368097305298, "logps/chosen": -226.32354736328125, "logps/rejected": -212.107421875, "loss": 0.7233, "rewards/accuracies": 0.4375, "rewards/chosen": -0.19879278540611267, "rewards/margins": 0.0006840275600552559, "rewards/rejected": -0.19947680830955505, "step": 616 }, { "epoch": 0.81, "learning_rate": 4.942221248527826e-05, "logits/chosen": -2.493027448654175, "logits/rejected": -2.686164140701294, "logps/chosen": -166.07200622558594, "logps/rejected": -189.38475036621094, "loss": 0.8368, "rewards/accuracies": 0.1875, "rewards/chosen": -0.28490307927131653, "rewards/margins": -0.22959473729133606, "rewards/rejected": -0.05530836060643196, "step": 617 }, { "epoch": 0.81, "learning_rate": 4.94199838603544e-05, "logits/chosen": -2.3346376419067383, "logits/rejected": -2.4675793647766113, "logps/chosen": -177.6221160888672, "logps/rejected": -186.67446899414062, "loss": 0.4898, "rewards/accuracies": 0.75, "rewards/chosen": 0.1148318499326706, "rewards/margins": 0.5469637513160706, "rewards/rejected": -0.4321318566799164, "step": 618 }, { "epoch": 0.81, "learning_rate": 4.941775099604983e-05, "logits/chosen": -2.5135788917541504, "logits/rejected": -2.6071126461029053, "logps/chosen": -186.59161376953125, "logps/rejected": -205.1910400390625, "loss": 0.6155, "rewards/accuracies": 0.5625, "rewards/chosen": -0.029235512018203735, "rewards/margins": 0.20693793892860413, "rewards/rejected": -0.23617346584796906, "step": 619 }, { "epoch": 0.81, "learning_rate": 4.941551389275217e-05, "logits/chosen": -2.49117374420166, "logits/rejected": -2.5229427814483643, "logps/chosen": -208.4278106689453, "logps/rejected": -191.97398376464844, "loss": 0.6074, "rewards/accuracies": 0.75, "rewards/chosen": -0.18853022158145905, "rewards/margins": 0.2418961077928543, "rewards/rejected": -0.43042635917663574, "step": 620 }, { "epoch": 0.81, "learning_rate": 4.941327255084979e-05, "logits/chosen": -2.4580295085906982, "logits/rejected": -2.4623186588287354, "logps/chosen": -191.52886962890625, "logps/rejected": -169.84417724609375, "loss": 0.7945, "rewards/accuracies": 0.375, "rewards/chosen": -0.2514897584915161, "rewards/margins": -0.04441142827272415, "rewards/rejected": -0.20707830786705017, "step": 621 }, { "epoch": 0.81, "learning_rate": 4.9411026970731805e-05, "logits/chosen": -2.5821356773376465, "logits/rejected": -2.619976758956909, "logps/chosen": -203.17149353027344, "logps/rejected": -217.1163330078125, "loss": 0.6965, "rewards/accuracies": 0.375, "rewards/chosen": -0.33491429686546326, "rewards/margins": 0.05494851991534233, "rewards/rejected": -0.3898628354072571, "step": 622 }, { "epoch": 0.82, "learning_rate": 4.9408777152788044e-05, "logits/chosen": -2.464843273162842, "logits/rejected": -2.5318896770477295, "logps/chosen": -169.26376342773438, "logps/rejected": -182.11395263671875, "loss": 0.6721, "rewards/accuracies": 0.5625, "rewards/chosen": -0.17076966166496277, "rewards/margins": 0.10583178699016571, "rewards/rejected": -0.2766014337539673, "step": 623 }, { "epoch": 0.82, "learning_rate": 4.9406523097409085e-05, "logits/chosen": -2.262134075164795, "logits/rejected": -2.329742670059204, "logps/chosen": -153.96200561523438, "logps/rejected": -157.85238647460938, "loss": 0.6468, "rewards/accuracies": 0.6875, "rewards/chosen": -0.13661958277225494, "rewards/margins": 0.20581209659576416, "rewards/rejected": -0.3424316644668579, "step": 624 }, { "epoch": 0.82, "learning_rate": 4.940426480498623e-05, "logits/chosen": -2.403775453567505, "logits/rejected": -2.459458351135254, "logps/chosen": -167.8171844482422, "logps/rejected": -221.13282775878906, "loss": 0.6038, "rewards/accuracies": 0.625, "rewards/chosen": -0.2118593454360962, "rewards/margins": 0.26167717576026917, "rewards/rejected": -0.47353655099868774, "step": 625 }, { "epoch": 0.82, "learning_rate": 4.940200227591154e-05, "logits/chosen": -1.9581964015960693, "logits/rejected": -2.0107665061950684, "logps/chosen": -124.81986236572266, "logps/rejected": -138.32220458984375, "loss": 0.6216, "rewards/accuracies": 0.625, "rewards/chosen": -0.05847623199224472, "rewards/margins": 0.22099967300891876, "rewards/rejected": -0.2794759273529053, "step": 626 }, { "epoch": 0.82, "learning_rate": 4.9399735510577796e-05, "logits/chosen": -2.2749149799346924, "logits/rejected": -2.283113718032837, "logps/chosen": -155.7340087890625, "logps/rejected": -150.0223388671875, "loss": 0.6522, "rewards/accuracies": 0.5, "rewards/chosen": -0.1051105409860611, "rewards/margins": 0.16768980026245117, "rewards/rejected": -0.27280035614967346, "step": 627 }, { "epoch": 0.82, "learning_rate": 4.93974645093785e-05, "logits/chosen": -2.4072506427764893, "logits/rejected": -2.4812541007995605, "logps/chosen": -195.17230224609375, "logps/rejected": -194.09622192382812, "loss": 0.6298, "rewards/accuracies": 0.6875, "rewards/chosen": -0.06678425520658493, "rewards/margins": 0.17194166779518127, "rewards/rejected": -0.2387259304523468, "step": 628 }, { "epoch": 0.82, "learning_rate": 4.9395189272707916e-05, "logits/chosen": -2.428178310394287, "logits/rejected": -2.5200040340423584, "logps/chosen": -194.95848083496094, "logps/rejected": -215.4308319091797, "loss": 0.699, "rewards/accuracies": 0.625, "rewards/chosen": -0.37522369623184204, "rewards/margins": 0.045931555330753326, "rewards/rejected": -0.4211552143096924, "step": 629 }, { "epoch": 0.82, "learning_rate": 4.939290980096103e-05, "logits/chosen": -2.243135690689087, "logits/rejected": -2.25071120262146, "logps/chosen": -188.89820861816406, "logps/rejected": -219.1158447265625, "loss": 0.7187, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2669258415699005, "rewards/margins": 0.03279988840222359, "rewards/rejected": -0.2997257113456726, "step": 630 }, { "epoch": 0.83, "learning_rate": 4.9390626094533565e-05, "logits/chosen": -2.496492385864258, "logits/rejected": -2.6048879623413086, "logps/chosen": -169.56320190429688, "logps/rejected": -184.27169799804688, "loss": 0.6584, "rewards/accuracies": 0.5, "rewards/chosen": -0.31219586730003357, "rewards/margins": 0.16650965809822083, "rewards/rejected": -0.4787055253982544, "step": 631 }, { "epoch": 0.83, "learning_rate": 4.9388338153821976e-05, "logits/chosen": -2.256747245788574, "logits/rejected": -2.2907259464263916, "logps/chosen": -163.09808349609375, "logps/rejected": -231.26492309570312, "loss": 0.5661, "rewards/accuracies": 0.75, "rewards/chosen": -0.15426135063171387, "rewards/margins": 0.48811718821525574, "rewards/rejected": -0.6423785090446472, "step": 632 }, { "epoch": 0.83, "learning_rate": 4.938604597922346e-05, "logits/chosen": -2.5338540077209473, "logits/rejected": -2.580803871154785, "logps/chosen": -221.49908447265625, "logps/rejected": -222.09754943847656, "loss": 0.7293, "rewards/accuracies": 0.4375, "rewards/chosen": -0.393398642539978, "rewards/margins": 0.04478640481829643, "rewards/rejected": -0.43818506598472595, "step": 633 }, { "epoch": 0.83, "learning_rate": 4.9383749571135946e-05, "logits/chosen": -2.4923906326293945, "logits/rejected": -2.552914619445801, "logps/chosen": -188.54759216308594, "logps/rejected": -201.86276245117188, "loss": 0.6803, "rewards/accuracies": 0.625, "rewards/chosen": -0.19801469147205353, "rewards/margins": 0.08076779544353485, "rewards/rejected": -0.2787824869155884, "step": 634 }, { "epoch": 0.83, "learning_rate": 4.938144892995809e-05, "logits/chosen": -2.6368439197540283, "logits/rejected": -2.681831121444702, "logps/chosen": -172.92823791503906, "logps/rejected": -153.6282501220703, "loss": 0.6554, "rewards/accuracies": 0.5, "rewards/chosen": -0.1948256492614746, "rewards/margins": 0.14521902799606323, "rewards/rejected": -0.34004467725753784, "step": 635 }, { "epoch": 0.83, "learning_rate": 4.93791440560893e-05, "logits/chosen": -2.5344183444976807, "logits/rejected": -2.5658974647521973, "logps/chosen": -198.16390991210938, "logps/rejected": -193.16055297851562, "loss": 0.6981, "rewards/accuracies": 0.3125, "rewards/chosen": -0.06259515881538391, "rewards/margins": 0.05056838318705559, "rewards/rejected": -0.11316356062889099, "step": 636 }, { "epoch": 0.83, "learning_rate": 4.93768349499297e-05, "logits/chosen": -2.6175780296325684, "logits/rejected": -2.561044216156006, "logps/chosen": -194.93130493164062, "logps/rejected": -171.37437438964844, "loss": 0.865, "rewards/accuracies": 0.25, "rewards/chosen": -0.44466546177864075, "rewards/margins": -0.2321683019399643, "rewards/rejected": -0.21249720454216003, "step": 637 }, { "epoch": 0.83, "learning_rate": 4.9374521611880166e-05, "logits/chosen": -2.552360773086548, "logits/rejected": -2.585489273071289, "logps/chosen": -157.82420349121094, "logps/rejected": -171.7198028564453, "loss": 0.6514, "rewards/accuracies": 0.5625, "rewards/chosen": -0.19169321656227112, "rewards/margins": 0.18479710817337036, "rewards/rejected": -0.3764903247356415, "step": 638 }, { "epoch": 0.84, "learning_rate": 4.9372204042342295e-05, "logits/chosen": -2.55067777633667, "logits/rejected": -2.6302952766418457, "logps/chosen": -186.92031860351562, "logps/rejected": -184.224365234375, "loss": 0.676, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2839874029159546, "rewards/margins": 0.20806796848773956, "rewards/rejected": -0.49205532670021057, "step": 639 }, { "epoch": 0.84, "learning_rate": 4.936988224171842e-05, "logits/chosen": -2.6061015129089355, "logits/rejected": -2.609809637069702, "logps/chosen": -209.86328125, "logps/rejected": -199.8352813720703, "loss": 0.6114, "rewards/accuracies": 0.625, "rewards/chosen": -0.38792091608047485, "rewards/margins": 0.29765814542770386, "rewards/rejected": -0.6855790615081787, "step": 640 }, { "epoch": 0.84, "learning_rate": 4.936755621041163e-05, "logits/chosen": -2.600673198699951, "logits/rejected": -2.6286747455596924, "logps/chosen": -217.86569213867188, "logps/rejected": -208.2232208251953, "loss": 0.5999, "rewards/accuracies": 0.6875, "rewards/chosen": -0.3269140124320984, "rewards/margins": 0.2362305074930191, "rewards/rejected": -0.5631444454193115, "step": 641 }, { "epoch": 0.84, "learning_rate": 4.93652259488257e-05, "logits/chosen": -2.331010341644287, "logits/rejected": -2.406320571899414, "logps/chosen": -181.08682250976562, "logps/rejected": -231.1434326171875, "loss": 0.6473, "rewards/accuracies": 0.5, "rewards/chosen": -0.18573978543281555, "rewards/margins": 0.11375062167644501, "rewards/rejected": -0.29949042201042175, "step": 642 }, { "epoch": 0.84, "learning_rate": 4.9362891457365194e-05, "logits/chosen": -2.5639798641204834, "logits/rejected": -2.511385917663574, "logps/chosen": -167.39529418945312, "logps/rejected": -190.17266845703125, "loss": 0.6687, "rewards/accuracies": 0.5625, "rewards/chosen": -0.23445367813110352, "rewards/margins": 0.10250186920166016, "rewards/rejected": -0.33695557713508606, "step": 643 }, { "epoch": 0.84, "learning_rate": 4.9360552736435386e-05, "logits/chosen": -2.484229326248169, "logits/rejected": -2.5917751789093018, "logps/chosen": -160.66372680664062, "logps/rejected": -151.573974609375, "loss": 0.7071, "rewards/accuracies": 0.5, "rewards/chosen": -0.3852943181991577, "rewards/margins": 0.021035168319940567, "rewards/rejected": -0.40632951259613037, "step": 644 }, { "epoch": 0.84, "learning_rate": 4.935820978644228e-05, "logits/chosen": -2.4788904190063477, "logits/rejected": -2.4254837036132812, "logps/chosen": -161.3828887939453, "logps/rejected": -179.1514129638672, "loss": 0.7489, "rewards/accuracies": 0.5, "rewards/chosen": -0.27134445309638977, "rewards/margins": -0.01518234983086586, "rewards/rejected": -0.2561620771884918, "step": 645 }, { "epoch": 0.85, "learning_rate": 4.9355862607792614e-05, "logits/chosen": -2.3145663738250732, "logits/rejected": -2.3181071281433105, "logps/chosen": -175.7195281982422, "logps/rejected": -180.3649139404297, "loss": 0.7226, "rewards/accuracies": 0.5, "rewards/chosen": -0.2857952117919922, "rewards/margins": -0.0009603817015886307, "rewards/rejected": -0.2848348617553711, "step": 646 }, { "epoch": 0.85, "learning_rate": 4.9353511200893865e-05, "logits/chosen": -2.5312018394470215, "logits/rejected": -2.5117855072021484, "logps/chosen": -147.04232788085938, "logps/rejected": -153.8798065185547, "loss": 0.7482, "rewards/accuracies": 0.3125, "rewards/chosen": -0.2138766497373581, "rewards/margins": -0.0894002690911293, "rewards/rejected": -0.12447639554738998, "step": 647 }, { "epoch": 0.85, "learning_rate": 4.935115556615425e-05, "logits/chosen": -2.5504438877105713, "logits/rejected": -2.5526883602142334, "logps/chosen": -198.34768676757812, "logps/rejected": -190.35696411132812, "loss": 0.6935, "rewards/accuracies": 0.5, "rewards/chosen": -0.393574059009552, "rewards/margins": 0.1438981145620346, "rewards/rejected": -0.5374721884727478, "step": 648 }, { "epoch": 0.85, "learning_rate": 4.934879570398272e-05, "logits/chosen": -2.450056314468384, "logits/rejected": -2.440789222717285, "logps/chosen": -198.56884765625, "logps/rejected": -234.9272918701172, "loss": 0.7365, "rewards/accuracies": 0.25, "rewards/chosen": -0.5248376131057739, "rewards/margins": -0.04359120875597, "rewards/rejected": -0.4812464118003845, "step": 649 }, { "epoch": 0.85, "learning_rate": 4.9346431614788945e-05, "logits/chosen": -2.481201648712158, "logits/rejected": -2.540107250213623, "logps/chosen": -193.42530822753906, "logps/rejected": -196.03887939453125, "loss": 0.7004, "rewards/accuracies": 0.5, "rewards/chosen": -0.30591022968292236, "rewards/margins": 0.07436452805995941, "rewards/rejected": -0.3802747428417206, "step": 650 }, { "epoch": 0.85, "learning_rate": 4.934406329898334e-05, "logits/chosen": -2.4828920364379883, "logits/rejected": -2.4486544132232666, "logps/chosen": -174.25201416015625, "logps/rejected": -167.93795776367188, "loss": 0.8821, "rewards/accuracies": 0.25, "rewards/chosen": -0.4259773790836334, "rewards/margins": -0.30132219195365906, "rewards/rejected": -0.12465521693229675, "step": 651 }, { "epoch": 0.85, "learning_rate": 4.934169075697704e-05, "logits/chosen": -2.5756962299346924, "logits/rejected": -2.53938364982605, "logps/chosen": -261.4378356933594, "logps/rejected": -217.41473388671875, "loss": 0.7193, "rewards/accuracies": 0.4375, "rewards/chosen": -0.35280755162239075, "rewards/margins": 0.009605936706066132, "rewards/rejected": -0.36241352558135986, "step": 652 }, { "epoch": 0.85, "learning_rate": 4.9339313989181955e-05, "logits/chosen": -2.479517936706543, "logits/rejected": -2.4295461177825928, "logps/chosen": -168.84185791015625, "logps/rejected": -171.62991333007812, "loss": 0.7111, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4041588306427002, "rewards/margins": 0.014433953911066055, "rewards/rejected": -0.41859275102615356, "step": 653 }, { "epoch": 0.86, "learning_rate": 4.933693299601067e-05, "logits/chosen": -2.2451162338256836, "logits/rejected": -2.4006645679473877, "logps/chosen": -178.17141723632812, "logps/rejected": -210.85440063476562, "loss": 0.5213, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0021903254091739655, "rewards/margins": 0.5191294550895691, "rewards/rejected": -0.516939103603363, "step": 654 }, { "epoch": 0.86, "learning_rate": 4.933454777787654e-05, "logits/chosen": -2.6251702308654785, "logits/rejected": -2.6193838119506836, "logps/chosen": -261.71185302734375, "logps/rejected": -258.5350646972656, "loss": 0.6123, "rewards/accuracies": 0.625, "rewards/chosen": -0.3498530089855194, "rewards/margins": 0.23792549967765808, "rewards/rejected": -0.5877785086631775, "step": 655 }, { "epoch": 0.86, "learning_rate": 4.933215833519366e-05, "logits/chosen": -2.515723466873169, "logits/rejected": -2.4995017051696777, "logps/chosen": -170.08506774902344, "logps/rejected": -171.64215087890625, "loss": 0.6167, "rewards/accuracies": 0.5, "rewards/chosen": -0.03253903239965439, "rewards/margins": 0.2270340621471405, "rewards/rejected": -0.2595731019973755, "step": 656 }, { "epoch": 0.86, "learning_rate": 4.9329764668376824e-05, "logits/chosen": -2.3417258262634277, "logits/rejected": -2.5132431983947754, "logps/chosen": -182.23594665527344, "logps/rejected": -204.4223175048828, "loss": 0.5153, "rewards/accuracies": 0.75, "rewards/chosen": -0.08154846727848053, "rewards/margins": 0.5844264626502991, "rewards/rejected": -0.6659749746322632, "step": 657 }, { "epoch": 0.86, "learning_rate": 4.932736677784158e-05, "logits/chosen": -2.406738758087158, "logits/rejected": -2.4287314414978027, "logps/chosen": -175.0724639892578, "logps/rejected": -212.99569702148438, "loss": 0.6727, "rewards/accuracies": 0.625, "rewards/chosen": -0.5249559879302979, "rewards/margins": 0.10958628356456757, "rewards/rejected": -0.6345422863960266, "step": 658 }, { "epoch": 0.86, "learning_rate": 4.9324964664004226e-05, "logits/chosen": -2.4244885444641113, "logits/rejected": -2.511654853820801, "logps/chosen": -181.31353759765625, "logps/rejected": -189.31832885742188, "loss": 0.7071, "rewards/accuracies": 0.625, "rewards/chosen": -0.27095070481300354, "rewards/margins": -0.0028193406760692596, "rewards/rejected": -0.2681313753128052, "step": 659 }, { "epoch": 0.86, "learning_rate": 4.9322558327281773e-05, "logits/chosen": -2.5121233463287354, "logits/rejected": -2.6510791778564453, "logps/chosen": -230.39369201660156, "logps/rejected": -241.02325439453125, "loss": 0.4769, "rewards/accuracies": 0.9375, "rewards/chosen": -0.02516031824052334, "rewards/margins": 0.551730751991272, "rewards/rejected": -0.5768911242485046, "step": 660 }, { "epoch": 0.87, "learning_rate": 4.9320147768091954e-05, "logits/chosen": -2.076624631881714, "logits/rejected": -2.117382287979126, "logps/chosen": -228.64761352539062, "logps/rejected": -211.5120849609375, "loss": 0.696, "rewards/accuracies": 0.4375, "rewards/chosen": -0.37209880352020264, "rewards/margins": 0.16731634736061096, "rewards/rejected": -0.5394151210784912, "step": 661 }, { "epoch": 0.87, "learning_rate": 4.931773298685326e-05, "logits/chosen": -2.3631484508514404, "logits/rejected": -2.424973249435425, "logps/chosen": -174.7392120361328, "logps/rejected": -198.92578125, "loss": 0.6354, "rewards/accuracies": 0.75, "rewards/chosen": -0.23427049815654755, "rewards/margins": 0.1624229997396469, "rewards/rejected": -0.39669349789619446, "step": 662 }, { "epoch": 0.87, "learning_rate": 4.93153139839849e-05, "logits/chosen": -2.271193742752075, "logits/rejected": -2.2948765754699707, "logps/chosen": -180.6439971923828, "logps/rejected": -192.5905303955078, "loss": 0.7982, "rewards/accuracies": 0.3125, "rewards/chosen": -0.23156273365020752, "rewards/margins": -0.15015332400798798, "rewards/rejected": -0.08140941709280014, "step": 663 }, { "epoch": 0.87, "learning_rate": 4.931289075990682e-05, "logits/chosen": -2.6266207695007324, "logits/rejected": -2.6377651691436768, "logps/chosen": -214.2593536376953, "logps/rejected": -202.11019897460938, "loss": 0.7092, "rewards/accuracies": 0.5, "rewards/chosen": -0.25713613629341125, "rewards/margins": 0.02131093665957451, "rewards/rejected": -0.2784470319747925, "step": 664 }, { "epoch": 0.87, "learning_rate": 4.93104633150397e-05, "logits/chosen": -2.558809995651245, "logits/rejected": -2.580552816390991, "logps/chosen": -200.12879943847656, "logps/rejected": -270.4982604980469, "loss": 0.5893, "rewards/accuracies": 0.5625, "rewards/chosen": -0.21998968720436096, "rewards/margins": 0.367271363735199, "rewards/rejected": -0.5872610807418823, "step": 665 }, { "epoch": 0.87, "learning_rate": 4.930803164980496e-05, "logits/chosen": -2.324084520339966, "logits/rejected": -2.4046638011932373, "logps/chosen": -199.2420196533203, "logps/rejected": -188.57501220703125, "loss": 0.651, "rewards/accuracies": 0.5625, "rewards/chosen": -0.24593767523765564, "rewards/margins": 0.17680732905864716, "rewards/rejected": -0.4227449893951416, "step": 666 }, { "epoch": 0.87, "learning_rate": 4.930559576462473e-05, "logits/chosen": -2.4203665256500244, "logits/rejected": -2.5350735187530518, "logps/chosen": -203.4146728515625, "logps/rejected": -220.17127990722656, "loss": 0.6676, "rewards/accuracies": 0.5625, "rewards/chosen": -0.36869651079177856, "rewards/margins": 0.1714363694190979, "rewards/rejected": -0.5401328802108765, "step": 667 }, { "epoch": 0.87, "learning_rate": 4.930315565992189e-05, "logits/chosen": -2.5649983882904053, "logits/rejected": -2.587256669998169, "logps/chosen": -211.79006958007812, "logps/rejected": -197.28616333007812, "loss": 0.6443, "rewards/accuracies": 0.625, "rewards/chosen": -0.41877490282058716, "rewards/margins": 0.1472373902797699, "rewards/rejected": -0.5660123229026794, "step": 668 }, { "epoch": 0.88, "learning_rate": 4.930071133612005e-05, "logits/chosen": -2.4042892456054688, "logits/rejected": -2.4121689796447754, "logps/chosen": -163.4969940185547, "logps/rejected": -214.19903564453125, "loss": 0.5355, "rewards/accuracies": 0.625, "rewards/chosen": -0.12853318452835083, "rewards/margins": 0.4976612329483032, "rewards/rejected": -0.626194417476654, "step": 669 }, { "epoch": 0.88, "learning_rate": 4.929826279364357e-05, "logits/chosen": -2.6725361347198486, "logits/rejected": -2.6962034702301025, "logps/chosen": -206.29966735839844, "logps/rejected": -225.02146911621094, "loss": 0.7439, "rewards/accuracies": 0.25, "rewards/chosen": -0.4318912625312805, "rewards/margins": -0.017002558335661888, "rewards/rejected": -0.4148887097835541, "step": 670 }, { "epoch": 0.88, "learning_rate": 4.9295810032917486e-05, "logits/chosen": -2.6312649250030518, "logits/rejected": -2.677178144454956, "logps/chosen": -233.12554931640625, "logps/rejected": -249.84649658203125, "loss": 0.6412, "rewards/accuracies": 0.625, "rewards/chosen": -0.3292105495929718, "rewards/margins": 0.18580739200115204, "rewards/rejected": -0.5150179266929626, "step": 671 }, { "epoch": 0.88, "learning_rate": 4.929335305436764e-05, "logits/chosen": -2.350733995437622, "logits/rejected": -2.4738504886627197, "logps/chosen": -159.27565002441406, "logps/rejected": -197.69430541992188, "loss": 0.5996, "rewards/accuracies": 0.75, "rewards/chosen": -0.3497101664543152, "rewards/margins": 0.35428282618522644, "rewards/rejected": -0.7039930820465088, "step": 672 }, { "epoch": 0.88, "learning_rate": 4.9290891858420554e-05, "logits/chosen": -2.341266632080078, "logits/rejected": -2.412942409515381, "logps/chosen": -221.41571044921875, "logps/rejected": -226.8486328125, "loss": 0.5985, "rewards/accuracies": 0.6875, "rewards/chosen": -0.34598851203918457, "rewards/margins": 0.2551480233669281, "rewards/rejected": -0.6011365652084351, "step": 673 }, { "epoch": 0.88, "learning_rate": 4.92884264455035e-05, "logits/chosen": -2.302943706512451, "logits/rejected": -2.3145551681518555, "logps/chosen": -149.34109497070312, "logps/rejected": -138.53506469726562, "loss": 0.644, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1495213508605957, "rewards/margins": 0.15739065408706665, "rewards/rejected": -0.30691200494766235, "step": 674 }, { "epoch": 0.88, "learning_rate": 4.9285956816044486e-05, "logits/chosen": -2.228523015975952, "logits/rejected": -2.173222064971924, "logps/chosen": -177.70559692382812, "logps/rejected": -210.69927978515625, "loss": 0.7657, "rewards/accuracies": 0.4375, "rewards/chosen": -0.3950153589248657, "rewards/margins": -0.07809535413980484, "rewards/rejected": -0.3169199824333191, "step": 675 }, { "epoch": 0.88, "learning_rate": 4.9283482970472245e-05, "logits/chosen": -2.3849382400512695, "logits/rejected": -2.513273000717163, "logps/chosen": -185.79124450683594, "logps/rejected": -201.27066040039062, "loss": 0.6207, "rewards/accuracies": 0.5625, "rewards/chosen": -0.13350968062877655, "rewards/margins": 0.2174827605485916, "rewards/rejected": -0.35099244117736816, "step": 676 }, { "epoch": 0.89, "learning_rate": 4.928100490921625e-05, "logits/chosen": -2.458615779876709, "logits/rejected": -2.4962515830993652, "logps/chosen": -204.7447509765625, "logps/rejected": -199.8260955810547, "loss": 0.7519, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6041467189788818, "rewards/margins": 0.038420408964157104, "rewards/rejected": -0.6425671577453613, "step": 677 }, { "epoch": 0.89, "learning_rate": 4.927852263270667e-05, "logits/chosen": -2.5404415130615234, "logits/rejected": -2.5182065963745117, "logps/chosen": -208.4690399169922, "logps/rejected": -227.29690551757812, "loss": 0.6729, "rewards/accuracies": 0.6875, "rewards/chosen": -0.28778883814811707, "rewards/margins": 0.08314257860183716, "rewards/rejected": -0.3709314167499542, "step": 678 }, { "epoch": 0.89, "learning_rate": 4.9276036141374473e-05, "logits/chosen": -2.528585910797119, "logits/rejected": -2.5861098766326904, "logps/chosen": -211.7762908935547, "logps/rejected": -236.7048797607422, "loss": 0.7562, "rewards/accuracies": 0.4375, "rewards/chosen": -0.4092783033847809, "rewards/margins": -0.03306754678487778, "rewards/rejected": -0.3762107491493225, "step": 679 }, { "epoch": 0.89, "learning_rate": 4.92735454356513e-05, "logits/chosen": -2.172372341156006, "logits/rejected": -2.1387619972229004, "logps/chosen": -230.44168090820312, "logps/rejected": -221.9505615234375, "loss": 0.6126, "rewards/accuracies": 0.5, "rewards/chosen": -0.0995088517665863, "rewards/margins": 0.24330440163612366, "rewards/rejected": -0.34281325340270996, "step": 680 }, { "epoch": 0.89, "learning_rate": 4.927105051596956e-05, "logits/chosen": -2.5018787384033203, "logits/rejected": -2.6133289337158203, "logps/chosen": -159.59141540527344, "logps/rejected": -165.89492797851562, "loss": 0.6031, "rewards/accuracies": 0.625, "rewards/chosen": -0.0614471361041069, "rewards/margins": 0.24630111455917358, "rewards/rejected": -0.3077481985092163, "step": 681 }, { "epoch": 0.89, "learning_rate": 4.926855138276236e-05, "logits/chosen": -2.532803773880005, "logits/rejected": -2.6221201419830322, "logps/chosen": -192.40725708007812, "logps/rejected": -176.5653839111328, "loss": 0.7419, "rewards/accuracies": 0.5, "rewards/chosen": -0.4705914258956909, "rewards/margins": 0.014578044414520264, "rewards/rejected": -0.48516952991485596, "step": 682 }, { "epoch": 0.89, "learning_rate": 4.9266048036463566e-05, "logits/chosen": -2.5754356384277344, "logits/rejected": -2.668449878692627, "logps/chosen": -223.462646484375, "logps/rejected": -249.08425903320312, "loss": 0.5664, "rewards/accuracies": 0.625, "rewards/chosen": -0.4348244071006775, "rewards/margins": 0.38006237149238586, "rewards/rejected": -0.8148868680000305, "step": 683 }, { "epoch": 0.9, "learning_rate": 4.926354047750777e-05, "logits/chosen": -2.148771047592163, "logits/rejected": -2.174349546432495, "logps/chosen": -191.0968780517578, "logps/rejected": -201.15272521972656, "loss": 0.7219, "rewards/accuracies": 0.5, "rewards/chosen": -0.25711679458618164, "rewards/margins": 0.0128050297498703, "rewards/rejected": -0.26992183923721313, "step": 684 }, { "epoch": 0.9, "learning_rate": 4.926102870633029e-05, "logits/chosen": -2.5497798919677734, "logits/rejected": -2.5441908836364746, "logps/chosen": -236.3572998046875, "logps/rejected": -223.45065307617188, "loss": 0.734, "rewards/accuracies": 0.5, "rewards/chosen": -0.43852365016937256, "rewards/margins": 0.028032146394252777, "rewards/rejected": -0.46655580401420593, "step": 685 }, { "epoch": 0.9, "learning_rate": 4.925851272336717e-05, "logits/chosen": -2.49284029006958, "logits/rejected": -2.4962539672851562, "logps/chosen": -187.33041381835938, "logps/rejected": -161.27676391601562, "loss": 0.7157, "rewards/accuracies": 0.4375, "rewards/chosen": -0.4357530474662781, "rewards/margins": 0.01825621724128723, "rewards/rejected": -0.4540092647075653, "step": 686 }, { "epoch": 0.9, "learning_rate": 4.9255992529055195e-05, "logits/chosen": -2.4232540130615234, "logits/rejected": -2.4808590412139893, "logps/chosen": -177.44873046875, "logps/rejected": -173.36376953125, "loss": 0.5234, "rewards/accuracies": 0.8125, "rewards/chosen": -0.000440012663602829, "rewards/margins": 0.42676159739494324, "rewards/rejected": -0.4272015690803528, "step": 687 }, { "epoch": 0.9, "learning_rate": 4.925346812383188e-05, "logits/chosen": -2.5829415321350098, "logits/rejected": -2.539314031600952, "logps/chosen": -201.11561584472656, "logps/rejected": -157.01829528808594, "loss": 0.659, "rewards/accuracies": 0.5625, "rewards/chosen": -0.18970540165901184, "rewards/margins": 0.1848244071006775, "rewards/rejected": -0.37452977895736694, "step": 688 }, { "epoch": 0.9, "learning_rate": 4.925093950813547e-05, "logits/chosen": -2.1912455558776855, "logits/rejected": -2.235294818878174, "logps/chosen": -158.46246337890625, "logps/rejected": -182.1004180908203, "loss": 0.7104, "rewards/accuracies": 0.4375, "rewards/chosen": -0.2667257487773895, "rewards/margins": 0.0735367089509964, "rewards/rejected": -0.3402624726295471, "step": 689 }, { "epoch": 0.9, "learning_rate": 4.924840668240495e-05, "logits/chosen": -2.2375993728637695, "logits/rejected": -2.270219564437866, "logps/chosen": -206.09689331054688, "logps/rejected": -185.82130432128906, "loss": 0.8109, "rewards/accuracies": 0.625, "rewards/chosen": -0.5142911672592163, "rewards/margins": -0.055089227855205536, "rewards/rejected": -0.4592019319534302, "step": 690 }, { "epoch": 0.9, "learning_rate": 4.924586964708e-05, "logits/chosen": -2.683393716812134, "logits/rejected": -2.745443344116211, "logps/chosen": -211.4540252685547, "logps/rejected": -204.97247314453125, "loss": 0.6848, "rewards/accuracies": 0.5625, "rewards/chosen": -0.388435035943985, "rewards/margins": 0.09198732674121857, "rewards/rejected": -0.48042237758636475, "step": 691 }, { "epoch": 0.91, "learning_rate": 4.9243328402601075e-05, "logits/chosen": -2.4001381397247314, "logits/rejected": -2.446291208267212, "logps/chosen": -173.99537658691406, "logps/rejected": -170.11166381835938, "loss": 0.6884, "rewards/accuracies": 0.5, "rewards/chosen": -0.3871830105781555, "rewards/margins": 0.05776102468371391, "rewards/rejected": -0.44494396448135376, "step": 692 }, { "epoch": 0.91, "learning_rate": 4.924078294940935e-05, "logits/chosen": -2.3540093898773193, "logits/rejected": -2.4053566455841064, "logps/chosen": -238.71336364746094, "logps/rejected": -232.11026000976562, "loss": 0.6272, "rewards/accuracies": 0.5625, "rewards/chosen": -0.38184699416160583, "rewards/margins": 0.2840040624141693, "rewards/rejected": -0.6658511161804199, "step": 693 }, { "epoch": 0.91, "learning_rate": 4.92382332879467e-05, "logits/chosen": -2.5318589210510254, "logits/rejected": -2.570647954940796, "logps/chosen": -182.59814453125, "logps/rejected": -190.98745727539062, "loss": 0.6351, "rewards/accuracies": 0.6875, "rewards/chosen": -0.4041218161582947, "rewards/margins": 0.2801179587841034, "rewards/rejected": -0.6842397451400757, "step": 694 }, { "epoch": 0.91, "learning_rate": 4.923567941865577e-05, "logits/chosen": -2.440756320953369, "logits/rejected": -2.4445791244506836, "logps/chosen": -174.62588500976562, "logps/rejected": -186.00222778320312, "loss": 0.6708, "rewards/accuracies": 0.5, "rewards/chosen": -0.3223167061805725, "rewards/margins": 0.1542733609676361, "rewards/rejected": -0.4765900671482086, "step": 695 }, { "epoch": 0.91, "learning_rate": 4.923312134197991e-05, "logits/chosen": -2.3524816036224365, "logits/rejected": -2.2868611812591553, "logps/chosen": -174.77328491210938, "logps/rejected": -193.611083984375, "loss": 0.8216, "rewards/accuracies": 0.4375, "rewards/chosen": -0.4883018434047699, "rewards/margins": -0.1851535439491272, "rewards/rejected": -0.3031483292579651, "step": 696 }, { "epoch": 0.91, "learning_rate": 4.9230559058363223e-05, "logits/chosen": -2.2453904151916504, "logits/rejected": -2.2618868350982666, "logps/chosen": -204.50955200195312, "logps/rejected": -213.33969116210938, "loss": 0.6688, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2155238389968872, "rewards/margins": 0.14622408151626587, "rewards/rejected": -0.3617479205131531, "step": 697 }, { "epoch": 0.91, "learning_rate": 4.922799256825051e-05, "logits/chosen": -2.2087559700012207, "logits/rejected": -2.289468765258789, "logps/chosen": -223.2991943359375, "logps/rejected": -193.14785766601562, "loss": 0.8451, "rewards/accuracies": 0.25, "rewards/chosen": -0.45806586742401123, "rewards/margins": -0.23903487622737885, "rewards/rejected": -0.21903102099895477, "step": 698 }, { "epoch": 0.91, "learning_rate": 4.9225421872087344e-05, "logits/chosen": -2.6118435859680176, "logits/rejected": -2.5954737663269043, "logps/chosen": -218.72201538085938, "logps/rejected": -223.23770141601562, "loss": 0.6797, "rewards/accuracies": 0.5625, "rewards/chosen": -0.33901646733283997, "rewards/margins": 0.09220410138368607, "rewards/rejected": -0.43122053146362305, "step": 699 }, { "epoch": 0.92, "learning_rate": 4.922284697031999e-05, "logits/chosen": -2.532050371170044, "logits/rejected": -2.5437393188476562, "logps/chosen": -205.47463989257812, "logps/rejected": -194.44210815429688, "loss": 0.6153, "rewards/accuracies": 0.6875, "rewards/chosen": -0.293450266122818, "rewards/margins": 0.32601144909858704, "rewards/rejected": -0.619461715221405, "step": 700 }, { "epoch": 0.92, "learning_rate": 4.922026786339545e-05, "logits/chosen": -2.4742062091827393, "logits/rejected": -2.507223606109619, "logps/chosen": -249.90118408203125, "logps/rejected": -243.16616821289062, "loss": 0.6483, "rewards/accuracies": 0.4375, "rewards/chosen": -0.3557482063770294, "rewards/margins": 0.16462744772434235, "rewards/rejected": -0.5203756093978882, "step": 701 }, { "epoch": 0.92, "learning_rate": 4.921768455176149e-05, "logits/chosen": -2.313598155975342, "logits/rejected": -2.38657546043396, "logps/chosen": -218.81178283691406, "logps/rejected": -273.9117126464844, "loss": 0.5901, "rewards/accuracies": 0.75, "rewards/chosen": -0.16350752115249634, "rewards/margins": 0.2863968014717102, "rewards/rejected": -0.44990429282188416, "step": 702 }, { "epoch": 0.92, "learning_rate": 4.921509703586657e-05, "logits/chosen": -2.4725236892700195, "logits/rejected": -2.46490216255188, "logps/chosen": -204.388916015625, "logps/rejected": -202.4940948486328, "loss": 0.6274, "rewards/accuracies": 0.625, "rewards/chosen": -0.11353873461484909, "rewards/margins": 0.18249502778053284, "rewards/rejected": -0.2960337698459625, "step": 703 }, { "epoch": 0.92, "learning_rate": 4.9212505316159876e-05, "logits/chosen": -2.4417524337768555, "logits/rejected": -2.4327855110168457, "logps/chosen": -185.76559448242188, "logps/rejected": -185.82675170898438, "loss": 0.6754, "rewards/accuracies": 0.625, "rewards/chosen": -0.16719910502433777, "rewards/margins": 0.131285160779953, "rewards/rejected": -0.29848429560661316, "step": 704 }, { "epoch": 0.92, "learning_rate": 4.920990939309135e-05, "logits/chosen": -2.405703544616699, "logits/rejected": -2.4366328716278076, "logps/chosen": -182.18052673339844, "logps/rejected": -177.89529418945312, "loss": 0.5939, "rewards/accuracies": 0.75, "rewards/chosen": -0.0767878070473671, "rewards/margins": 0.26920846104621887, "rewards/rejected": -0.34599626064300537, "step": 705 }, { "epoch": 0.92, "learning_rate": 4.9207309267111654e-05, "logits/chosen": -2.267641067504883, "logits/rejected": -2.359164237976074, "logps/chosen": -167.9193115234375, "logps/rejected": -198.30853271484375, "loss": 0.5458, "rewards/accuracies": 0.6875, "rewards/chosen": -0.17353487014770508, "rewards/margins": 0.4786756932735443, "rewards/rejected": -0.6522104144096375, "step": 706 }, { "epoch": 0.93, "learning_rate": 4.920470493867217e-05, "logits/chosen": -2.3301897048950195, "logits/rejected": -2.287158489227295, "logps/chosen": -179.1868133544922, "logps/rejected": -173.25205993652344, "loss": 0.8096, "rewards/accuracies": 0.375, "rewards/chosen": -0.3885132968425751, "rewards/margins": -0.15448781847953796, "rewards/rejected": -0.2340254783630371, "step": 707 }, { "epoch": 0.93, "learning_rate": 4.920209640822503e-05, "logits/chosen": -2.3286001682281494, "logits/rejected": -2.372384786605835, "logps/chosen": -183.89163208007812, "logps/rejected": -171.33837890625, "loss": 0.6243, "rewards/accuracies": 0.5625, "rewards/chosen": -0.200511634349823, "rewards/margins": 0.1888035386800766, "rewards/rejected": -0.3893151581287384, "step": 708 }, { "epoch": 0.93, "learning_rate": 4.919948367622307e-05, "logits/chosen": -2.049145221710205, "logits/rejected": -2.1091737747192383, "logps/chosen": -165.6485595703125, "logps/rejected": -196.3398895263672, "loss": 0.6972, "rewards/accuracies": 0.625, "rewards/chosen": -0.21507728099822998, "rewards/margins": 0.1645798683166504, "rewards/rejected": -0.37965714931488037, "step": 709 }, { "epoch": 0.93, "learning_rate": 4.919686674311987e-05, "logits/chosen": -2.2241785526275635, "logits/rejected": -2.1966073513031006, "logps/chosen": -197.89120483398438, "logps/rejected": -222.09132385253906, "loss": 0.6355, "rewards/accuracies": 0.5625, "rewards/chosen": -0.12498195469379425, "rewards/margins": 0.2979465425014496, "rewards/rejected": -0.42292848229408264, "step": 710 }, { "epoch": 0.93, "learning_rate": 4.9194245609369746e-05, "logits/chosen": -2.3444929122924805, "logits/rejected": -2.3332207202911377, "logps/chosen": -240.3154296875, "logps/rejected": -229.0690460205078, "loss": 0.6236, "rewards/accuracies": 0.625, "rewards/chosen": -0.17825375497341156, "rewards/margins": 0.23597893118858337, "rewards/rejected": -0.41423267126083374, "step": 711 }, { "epoch": 0.93, "learning_rate": 4.9191620275427725e-05, "logits/chosen": -2.472026824951172, "logits/rejected": -2.424983501434326, "logps/chosen": -212.58364868164062, "logps/rejected": -186.08990478515625, "loss": 0.684, "rewards/accuracies": 0.4375, "rewards/chosen": -0.1622922122478485, "rewards/margins": 0.10522370040416718, "rewards/rejected": -0.2675159275531769, "step": 712 }, { "epoch": 0.93, "learning_rate": 4.9188990741749576e-05, "logits/chosen": -2.481387138366699, "logits/rejected": -2.4912960529327393, "logps/chosen": -259.65753173828125, "logps/rejected": -245.88864135742188, "loss": 0.6385, "rewards/accuracies": 0.5, "rewards/chosen": -0.3589114546775818, "rewards/margins": 0.13590216636657715, "rewards/rejected": -0.49481362104415894, "step": 713 }, { "epoch": 0.93, "learning_rate": 4.918635700879179e-05, "logits/chosen": -2.4197511672973633, "logits/rejected": -2.442139148712158, "logps/chosen": -180.3101806640625, "logps/rejected": -201.78469848632812, "loss": 0.696, "rewards/accuracies": 0.5, "rewards/chosen": -0.27619680762290955, "rewards/margins": 0.04660003259778023, "rewards/rejected": -0.32279688119888306, "step": 714 }, { "epoch": 0.94, "learning_rate": 4.918371907701159e-05, "logits/chosen": -2.333615303039551, "logits/rejected": -2.3523952960968018, "logps/chosen": -176.4866485595703, "logps/rejected": -190.32089233398438, "loss": 0.755, "rewards/accuracies": 0.375, "rewards/chosen": -0.353998064994812, "rewards/margins": -0.033618077635765076, "rewards/rejected": -0.32037997245788574, "step": 715 }, { "epoch": 0.94, "learning_rate": 4.9181076946866944e-05, "logits/chosen": -2.45198655128479, "logits/rejected": -2.432290554046631, "logps/chosen": -178.83279418945312, "logps/rejected": -188.7766876220703, "loss": 0.7776, "rewards/accuracies": 0.375, "rewards/chosen": -0.4141733944416046, "rewards/margins": -0.06833554804325104, "rewards/rejected": -0.3458378314971924, "step": 716 }, { "epoch": 0.94, "learning_rate": 4.917843061881652e-05, "logits/chosen": -2.4485764503479004, "logits/rejected": -2.412081241607666, "logps/chosen": -229.4910430908203, "logps/rejected": -225.20999145507812, "loss": 0.7713, "rewards/accuracies": 0.5, "rewards/chosen": -0.4986688494682312, "rewards/margins": -0.1049906462430954, "rewards/rejected": -0.3936781883239746, "step": 717 }, { "epoch": 0.94, "learning_rate": 4.9175780093319723e-05, "logits/chosen": -2.619525194168091, "logits/rejected": -2.5976409912109375, "logps/chosen": -152.8652801513672, "logps/rejected": -190.03179931640625, "loss": 0.6384, "rewards/accuracies": 0.625, "rewards/chosen": -0.044289495795965195, "rewards/margins": 0.1848900467157364, "rewards/rejected": -0.2291795164346695, "step": 718 }, { "epoch": 0.94, "learning_rate": 4.91731253708367e-05, "logits/chosen": -2.2807188034057617, "logits/rejected": -2.338017463684082, "logps/chosen": -182.47259521484375, "logps/rejected": -196.39678955078125, "loss": 0.6927, "rewards/accuracies": 0.5625, "rewards/chosen": -0.26287615299224854, "rewards/margins": 0.050617627799510956, "rewards/rejected": -0.3134937882423401, "step": 719 }, { "epoch": 0.94, "learning_rate": 4.9170466451828326e-05, "logits/chosen": -2.2561631202697754, "logits/rejected": -2.3567490577697754, "logps/chosen": -242.33407592773438, "logps/rejected": -263.4303283691406, "loss": 0.7338, "rewards/accuracies": 0.375, "rewards/chosen": -0.2756967544555664, "rewards/margins": 0.13067740201950073, "rewards/rejected": -0.40637409687042236, "step": 720 }, { "epoch": 0.94, "learning_rate": 4.916780333675618e-05, "logits/chosen": -2.5038459300994873, "logits/rejected": -2.500086545944214, "logps/chosen": -187.3552703857422, "logps/rejected": -187.51268005371094, "loss": 0.6628, "rewards/accuracies": 0.4375, "rewards/chosen": -0.2900952398777008, "rewards/margins": 0.16025833785533905, "rewards/rejected": -0.4503535330295563, "step": 721 }, { "epoch": 0.94, "learning_rate": 4.9165136026082604e-05, "logits/chosen": -2.2002265453338623, "logits/rejected": -2.226296901702881, "logps/chosen": -200.2354278564453, "logps/rejected": -195.80328369140625, "loss": 0.6676, "rewards/accuracies": 0.5, "rewards/chosen": -0.4443664848804474, "rewards/margins": 0.14572381973266602, "rewards/rejected": -0.5900903344154358, "step": 722 }, { "epoch": 0.95, "learning_rate": 4.916246452027063e-05, "logits/chosen": -2.1249847412109375, "logits/rejected": -2.116081953048706, "logps/chosen": -214.81161499023438, "logps/rejected": -220.05429077148438, "loss": 0.664, "rewards/accuracies": 0.5, "rewards/chosen": -0.25689569115638733, "rewards/margins": 0.1331162005662918, "rewards/rejected": -0.39001190662384033, "step": 723 }, { "epoch": 0.95, "learning_rate": 4.9159788819784064e-05, "logits/chosen": -2.3239493370056152, "logits/rejected": -2.3413610458374023, "logps/chosen": -209.00692749023438, "logps/rejected": -198.1204376220703, "loss": 0.627, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2650049328804016, "rewards/margins": 0.20938965678215027, "rewards/rejected": -0.47439464926719666, "step": 724 }, { "epoch": 0.95, "learning_rate": 4.9157108925087405e-05, "logits/chosen": -2.420534610748291, "logits/rejected": -2.442646026611328, "logps/chosen": -211.38424682617188, "logps/rejected": -207.95278930664062, "loss": 0.5418, "rewards/accuracies": 0.6875, "rewards/chosen": -0.22766509652137756, "rewards/margins": 0.3782108724117279, "rewards/rejected": -0.6058759689331055, "step": 725 }, { "epoch": 0.95, "learning_rate": 4.915442483664588e-05, "logits/chosen": -2.6422438621520996, "logits/rejected": -2.4694361686706543, "logps/chosen": -173.2271728515625, "logps/rejected": -145.02798461914062, "loss": 0.8003, "rewards/accuracies": 0.3125, "rewards/chosen": -0.25257498025894165, "rewards/margins": -0.11290948837995529, "rewards/rejected": -0.13966546952724457, "step": 726 }, { "epoch": 0.95, "learning_rate": 4.915173655492547e-05, "logits/chosen": -2.5392398834228516, "logits/rejected": -2.4417877197265625, "logps/chosen": -228.2000732421875, "logps/rejected": -199.39288330078125, "loss": 0.609, "rewards/accuracies": 0.6875, "rewards/chosen": -0.23776192963123322, "rewards/margins": 0.23510348796844482, "rewards/rejected": -0.47286540269851685, "step": 727 }, { "epoch": 0.95, "learning_rate": 4.914904408039286e-05, "logits/chosen": -2.4822921752929688, "logits/rejected": -2.5780792236328125, "logps/chosen": -194.31045532226562, "logps/rejected": -235.24607849121094, "loss": 0.5732, "rewards/accuracies": 0.6875, "rewards/chosen": -0.3551061451435089, "rewards/margins": 0.39131835103034973, "rewards/rejected": -0.7464244961738586, "step": 728 }, { "epoch": 0.95, "learning_rate": 4.914634741351547e-05, "logits/chosen": -2.4005560874938965, "logits/rejected": -2.4636473655700684, "logps/chosen": -222.8491668701172, "logps/rejected": -229.69638061523438, "loss": 0.7059, "rewards/accuracies": 0.5, "rewards/chosen": -0.444613516330719, "rewards/margins": 0.05394814535975456, "rewards/rejected": -0.49856165051460266, "step": 729 }, { "epoch": 0.96, "learning_rate": 4.914364655476146e-05, "logits/chosen": -2.541283369064331, "logits/rejected": -2.4821250438690186, "logps/chosen": -199.74075317382812, "logps/rejected": -244.3222198486328, "loss": 0.5754, "rewards/accuracies": 0.625, "rewards/chosen": -0.24406713247299194, "rewards/margins": 0.44718241691589355, "rewards/rejected": -0.6912496089935303, "step": 730 }, { "epoch": 0.96, "learning_rate": 4.914094150459969e-05, "logits/chosen": -2.3721487522125244, "logits/rejected": -2.4787163734436035, "logps/chosen": -185.42625427246094, "logps/rejected": -214.8418426513672, "loss": 0.741, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5116318464279175, "rewards/margins": -0.03801153600215912, "rewards/rejected": -0.47362035512924194, "step": 731 }, { "epoch": 0.96, "learning_rate": 4.9138232263499784e-05, "logits/chosen": -2.5538036823272705, "logits/rejected": -2.467003583908081, "logps/chosen": -197.03834533691406, "logps/rejected": -214.0052947998047, "loss": 0.812, "rewards/accuracies": 0.375, "rewards/chosen": -0.5602855682373047, "rewards/margins": -0.166466623544693, "rewards/rejected": -0.3938189446926117, "step": 732 }, { "epoch": 0.96, "learning_rate": 4.913551883193206e-05, "logits/chosen": -2.4558639526367188, "logits/rejected": -2.5085341930389404, "logps/chosen": -225.220947265625, "logps/rejected": -227.97213745117188, "loss": 0.6471, "rewards/accuracies": 0.625, "rewards/chosen": -0.2800041437149048, "rewards/margins": 0.34519922733306885, "rewards/rejected": -0.6252033114433289, "step": 733 }, { "epoch": 0.96, "learning_rate": 4.9132801210367586e-05, "logits/chosen": -2.405069351196289, "logits/rejected": -2.4901866912841797, "logps/chosen": -152.56527709960938, "logps/rejected": -152.64671325683594, "loss": 0.6334, "rewards/accuracies": 0.5625, "rewards/chosen": -0.3601643145084381, "rewards/margins": 0.2201770395040512, "rewards/rejected": -0.5803413987159729, "step": 734 }, { "epoch": 0.96, "learning_rate": 4.913007939927814e-05, "logits/chosen": -2.3362531661987305, "logits/rejected": -2.3917181491851807, "logps/chosen": -193.826904296875, "logps/rejected": -231.73898315429688, "loss": 0.6171, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5505282282829285, "rewards/margins": 0.3396502137184143, "rewards/rejected": -0.890178382396698, "step": 735 }, { "epoch": 0.96, "learning_rate": 4.912735339913625e-05, "logits/chosen": -2.132725715637207, "logits/rejected": -2.2681026458740234, "logps/chosen": -198.4554443359375, "logps/rejected": -205.1267852783203, "loss": 0.6419, "rewards/accuracies": 0.5625, "rewards/chosen": -0.34412312507629395, "rewards/margins": 0.1572694331407547, "rewards/rejected": -0.5013926029205322, "step": 736 }, { "epoch": 0.96, "learning_rate": 4.912462321041513e-05, "logits/chosen": -2.352013349533081, "logits/rejected": -2.474015235900879, "logps/chosen": -216.76731872558594, "logps/rejected": -208.0941162109375, "loss": 0.7258, "rewards/accuracies": 0.4375, "rewards/chosen": -0.49095839262008667, "rewards/margins": -0.004371330142021179, "rewards/rejected": -0.4865871071815491, "step": 737 }, { "epoch": 0.97, "learning_rate": 4.9121888833588795e-05, "logits/chosen": -2.395151138305664, "logits/rejected": -2.40535306930542, "logps/chosen": -163.3991241455078, "logps/rejected": -184.1988525390625, "loss": 0.6083, "rewards/accuracies": 0.6875, "rewards/chosen": -0.35962533950805664, "rewards/margins": 0.20414502918720245, "rewards/rejected": -0.5637703537940979, "step": 738 }, { "epoch": 0.97, "learning_rate": 4.91191502691319e-05, "logits/chosen": -2.3076629638671875, "logits/rejected": -2.460707426071167, "logps/chosen": -163.59388732910156, "logps/rejected": -190.1605987548828, "loss": 0.6092, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2011553794145584, "rewards/margins": 0.24963317811489105, "rewards/rejected": -0.45078855752944946, "step": 739 }, { "epoch": 0.97, "learning_rate": 4.911640751751988e-05, "logits/chosen": -2.419591188430786, "logits/rejected": -2.537069797515869, "logps/chosen": -250.05996704101562, "logps/rejected": -225.0284881591797, "loss": 0.7242, "rewards/accuracies": 0.5, "rewards/chosen": -0.6081552505493164, "rewards/margins": 0.06761530041694641, "rewards/rejected": -0.6757705807685852, "step": 740 }, { "epoch": 0.97, "learning_rate": 4.9113660579228886e-05, "logits/chosen": -2.4080655574798584, "logits/rejected": -2.4866342544555664, "logps/chosen": -168.5004119873047, "logps/rejected": -216.32675170898438, "loss": 0.6338, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5959919691085815, "rewards/margins": 0.22804662585258484, "rewards/rejected": -0.8240386247634888, "step": 741 }, { "epoch": 0.97, "learning_rate": 4.91109094547358e-05, "logits/chosen": -2.2880287170410156, "logits/rejected": -2.4074809551239014, "logps/chosen": -184.07696533203125, "logps/rejected": -215.08717346191406, "loss": 0.6027, "rewards/accuracies": 0.6875, "rewards/chosen": -0.49041178822517395, "rewards/margins": 0.23776140809059143, "rewards/rejected": -0.7281732559204102, "step": 742 }, { "epoch": 0.97, "learning_rate": 4.910815414451821e-05, "logits/chosen": -2.4181840419769287, "logits/rejected": -2.4892990589141846, "logps/chosen": -215.46221923828125, "logps/rejected": -229.75799560546875, "loss": 0.608, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7831647992134094, "rewards/margins": 0.38174957036972046, "rewards/rejected": -1.1649143695831299, "step": 743 }, { "epoch": 0.97, "learning_rate": 4.910539464905447e-05, "logits/chosen": -2.2858383655548096, "logits/rejected": -2.3867931365966797, "logps/chosen": -257.6205749511719, "logps/rejected": -225.37481689453125, "loss": 0.6948, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5961012840270996, "rewards/margins": 0.15920449793338776, "rewards/rejected": -0.7553057670593262, "step": 744 }, { "epoch": 0.97, "learning_rate": 4.910263096882362e-05, "logits/chosen": -2.2314653396606445, "logits/rejected": -2.242586851119995, "logps/chosen": -157.20448303222656, "logps/rejected": -189.92567443847656, "loss": 0.7938, "rewards/accuracies": 0.375, "rewards/chosen": -0.5152949690818787, "rewards/margins": -0.07010670006275177, "rewards/rejected": -0.4451882541179657, "step": 745 }, { "epoch": 0.98, "learning_rate": 4.9099863104305446e-05, "logits/chosen": -2.34870982170105, "logits/rejected": -2.3678641319274902, "logps/chosen": -157.9035186767578, "logps/rejected": -194.49575805664062, "loss": 0.6431, "rewards/accuracies": 0.5625, "rewards/chosen": -0.39278292655944824, "rewards/margins": 0.1728055626153946, "rewards/rejected": -0.5655884742736816, "step": 746 }, { "epoch": 0.98, "learning_rate": 4.909709105598045e-05, "logits/chosen": -2.290536403656006, "logits/rejected": -2.316704034805298, "logps/chosen": -173.55892944335938, "logps/rejected": -190.94125366210938, "loss": 0.6078, "rewards/accuracies": 0.6875, "rewards/chosen": -0.3803100883960724, "rewards/margins": 0.2535950839519501, "rewards/rejected": -0.6339051723480225, "step": 747 }, { "epoch": 0.98, "learning_rate": 4.9094314824329876e-05, "logits/chosen": -2.5316176414489746, "logits/rejected": -2.4976158142089844, "logps/chosen": -206.0629119873047, "logps/rejected": -219.75418090820312, "loss": 0.8139, "rewards/accuracies": 0.375, "rewards/chosen": -0.5901838541030884, "rewards/margins": -0.1481025069952011, "rewards/rejected": -0.4420813322067261, "step": 748 }, { "epoch": 0.98, "learning_rate": 4.9091534409835694e-05, "logits/chosen": -2.500584363937378, "logits/rejected": -2.5805132389068604, "logps/chosen": -174.99212646484375, "logps/rejected": -207.73011779785156, "loss": 0.574, "rewards/accuracies": 0.6875, "rewards/chosen": -0.4852156937122345, "rewards/margins": 0.3404516577720642, "rewards/rejected": -0.8256674408912659, "step": 749 }, { "epoch": 0.98, "learning_rate": 4.908874981298057e-05, "logits/chosen": -2.5304508209228516, "logits/rejected": -2.6482512950897217, "logps/chosen": -160.60971069335938, "logps/rejected": -178.6311492919922, "loss": 0.7716, "rewards/accuracies": 0.4375, "rewards/chosen": -0.4607134461402893, "rewards/margins": -0.038665421307086945, "rewards/rejected": -0.42204800248146057, "step": 750 }, { "epoch": 0.98, "learning_rate": 4.908596103424794e-05, "logits/chosen": -2.247166633605957, "logits/rejected": -2.2175076007843018, "logps/chosen": -185.73583984375, "logps/rejected": -191.45596313476562, "loss": 0.8569, "rewards/accuracies": 0.25, "rewards/chosen": -0.6400306820869446, "rewards/margins": -0.2516973912715912, "rewards/rejected": -0.3883333206176758, "step": 751 }, { "epoch": 0.98, "learning_rate": 4.908316807412194e-05, "logits/chosen": -2.492708206176758, "logits/rejected": -2.585394859313965, "logps/chosen": -213.05731201171875, "logps/rejected": -260.92462158203125, "loss": 0.6273, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5991235971450806, "rewards/margins": 0.2069501429796219, "rewards/rejected": -0.806073784828186, "step": 752 }, { "epoch": 0.99, "learning_rate": 4.908037093308742e-05, "logits/chosen": -2.5198185443878174, "logits/rejected": -2.5527148246765137, "logps/chosen": -238.94021606445312, "logps/rejected": -262.2608337402344, "loss": 0.699, "rewards/accuracies": 0.5, "rewards/chosen": -0.38716763257980347, "rewards/margins": 0.033374398946762085, "rewards/rejected": -0.42054203152656555, "step": 753 }, { "epoch": 0.99, "learning_rate": 4.907756961162999e-05, "logits/chosen": -2.26259708404541, "logits/rejected": -2.2545177936553955, "logps/chosen": -150.26882934570312, "logps/rejected": -227.8256072998047, "loss": 0.579, "rewards/accuracies": 0.625, "rewards/chosen": -0.3753708302974701, "rewards/margins": 0.4033656418323517, "rewards/rejected": -0.7787365317344666, "step": 754 }, { "epoch": 0.99, "learning_rate": 4.907476411023596e-05, "logits/chosen": -2.3281123638153076, "logits/rejected": -2.330186128616333, "logps/chosen": -149.7628936767578, "logps/rejected": -193.10093688964844, "loss": 0.7339, "rewards/accuracies": 0.4375, "rewards/chosen": -0.6727653741836548, "rewards/margins": 0.06607560813426971, "rewards/rejected": -0.7388409376144409, "step": 755 }, { "epoch": 0.99, "learning_rate": 4.907195442939237e-05, "logits/chosen": -2.4469387531280518, "logits/rejected": -2.509931802749634, "logps/chosen": -174.05670166015625, "logps/rejected": -189.08465576171875, "loss": 0.701, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5176682472229004, "rewards/margins": 0.04994421452283859, "rewards/rejected": -0.5676124095916748, "step": 756 }, { "epoch": 0.99, "learning_rate": 4.906914056958699e-05, "logits/chosen": -2.6258902549743652, "logits/rejected": -2.6027467250823975, "logps/chosen": -208.80203247070312, "logps/rejected": -200.4151611328125, "loss": 0.7588, "rewards/accuracies": 0.625, "rewards/chosen": -0.6569511294364929, "rewards/margins": -0.044889748096466064, "rewards/rejected": -0.6120614409446716, "step": 757 }, { "epoch": 0.99, "learning_rate": 4.906632253130833e-05, "logits/chosen": -2.5387301445007324, "logits/rejected": -2.496183156967163, "logps/chosen": -193.3511199951172, "logps/rejected": -199.80130004882812, "loss": 0.7945, "rewards/accuracies": 0.5, "rewards/chosen": -0.6182142496109009, "rewards/margins": -0.13901755213737488, "rewards/rejected": -0.4791966676712036, "step": 758 }, { "epoch": 0.99, "learning_rate": 4.9063500315045586e-05, "logits/chosen": -2.5884640216827393, "logits/rejected": -2.641512393951416, "logps/chosen": -186.569091796875, "logps/rejected": -200.59988403320312, "loss": 0.7447, "rewards/accuracies": 0.5, "rewards/chosen": -0.6641149520874023, "rewards/margins": -0.048228248953819275, "rewards/rejected": -0.6158866882324219, "step": 759 }, { "epoch": 0.99, "learning_rate": 4.9060673921288716e-05, "logits/chosen": -2.633413791656494, "logits/rejected": -2.598926305770874, "logps/chosen": -161.99928283691406, "logps/rejected": -154.25979614257812, "loss": 0.7324, "rewards/accuracies": 0.5, "rewards/chosen": -0.37947404384613037, "rewards/margins": -0.021064819768071175, "rewards/rejected": -0.35840916633605957, "step": 760 }, { "epoch": 1.0, "learning_rate": 4.905784335052839e-05, "logits/chosen": -2.534295082092285, "logits/rejected": -2.5714004039764404, "logps/chosen": -213.0132598876953, "logps/rejected": -223.10958862304688, "loss": 0.685, "rewards/accuracies": 0.4375, "rewards/chosen": -0.7191438674926758, "rewards/margins": 0.12883953750133514, "rewards/rejected": -0.8479833602905273, "step": 761 }, { "epoch": 1.0, "learning_rate": 4.905500860325599e-05, "logits/chosen": -2.3805532455444336, "logits/rejected": -2.502260208129883, "logps/chosen": -187.92947387695312, "logps/rejected": -244.21273803710938, "loss": 0.5568, "rewards/accuracies": 0.6875, "rewards/chosen": -0.30298084020614624, "rewards/margins": 0.5082517862319946, "rewards/rejected": -0.8112326264381409, "step": 762 }, { "epoch": 1.0, "learning_rate": 4.905216967996367e-05, "logits/chosen": -2.494596242904663, "logits/rejected": -2.5522210597991943, "logps/chosen": -193.31663513183594, "logps/rejected": -253.28793334960938, "loss": 0.5326, "rewards/accuracies": 0.875, "rewards/chosen": -0.31040558218955994, "rewards/margins": 0.41559797525405884, "rewards/rejected": -0.7260035872459412, "step": 763 }, { "epoch": 1.0, "learning_rate": 4.904932658114423e-05, "logits/chosen": -2.4357709884643555, "logits/rejected": -2.402021884918213, "logps/chosen": -207.14736938476562, "logps/rejected": -224.48757934570312, "loss": 0.7383, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6394506692886353, "rewards/margins": 0.015030726790428162, "rewards/rejected": -0.6544813513755798, "step": 764 }, { "epoch": 1.0, "learning_rate": 4.904647930729128e-05, "logits/chosen": -2.492413282394409, "logits/rejected": -2.5191187858581543, "logps/chosen": -209.2412872314453, "logps/rejected": -204.59518432617188, "loss": 0.2419, "rewards/accuracies": 0.9375, "rewards/chosen": 0.42869845032691956, "rewards/margins": 2.3007590770721436, "rewards/rejected": -1.872060775756836, "step": 765 }, { "epoch": 1.0, "learning_rate": 4.9043627858899086e-05, "logits/chosen": -2.616361618041992, "logits/rejected": -2.640256404876709, "logps/chosen": -180.7271270751953, "logps/rejected": -231.54254150390625, "loss": 0.1455, "rewards/accuracies": 1.0, "rewards/chosen": 0.1313096433877945, "rewards/margins": 2.220848321914673, "rewards/rejected": -2.08953857421875, "step": 766 }, { "epoch": 1.0, "learning_rate": 4.9040772236462695e-05, "logits/chosen": -2.3850975036621094, "logits/rejected": -2.354506254196167, "logps/chosen": -176.04693603515625, "logps/rejected": -191.62985229492188, "loss": 0.2206, "rewards/accuracies": 1.0, "rewards/chosen": 0.09096850454807281, "rewards/margins": 1.8953343629837036, "rewards/rejected": -1.8043657541275024, "step": 767 }, { "epoch": 1.01, "learning_rate": 4.903791244047783e-05, "logits/chosen": -2.448336362838745, "logits/rejected": -2.4442625045776367, "logps/chosen": -159.71731567382812, "logps/rejected": -198.8458251953125, "loss": 0.2542, "rewards/accuracies": 0.9375, "rewards/chosen": 0.14124417304992676, "rewards/margins": 1.9044493436813354, "rewards/rejected": -1.7632049322128296, "step": 768 }, { "epoch": 1.01, "learning_rate": 4.9035048471440966e-05, "logits/chosen": -2.39510178565979, "logits/rejected": -2.3210549354553223, "logps/chosen": -146.8027801513672, "logps/rejected": -182.59580993652344, "loss": 0.2789, "rewards/accuracies": 0.9375, "rewards/chosen": 0.44124722480773926, "rewards/margins": 2.291222095489502, "rewards/rejected": -1.8499748706817627, "step": 769 }, { "epoch": 1.01, "learning_rate": 4.90321803298493e-05, "logits/chosen": -2.610016107559204, "logits/rejected": -2.5617635250091553, "logps/chosen": -191.46194458007812, "logps/rejected": -232.915283203125, "loss": 0.1781, "rewards/accuracies": 1.0, "rewards/chosen": 0.716643214225769, "rewards/margins": 2.4686214923858643, "rewards/rejected": -1.7519782781600952, "step": 770 }, { "epoch": 1.01, "learning_rate": 4.9029308016200735e-05, "logits/chosen": -2.512070655822754, "logits/rejected": -2.522047758102417, "logps/chosen": -228.5362548828125, "logps/rejected": -267.5350341796875, "loss": 0.1084, "rewards/accuracies": 0.9375, "rewards/chosen": 0.7526074051856995, "rewards/margins": 3.0486981868743896, "rewards/rejected": -2.296090602874756, "step": 771 }, { "epoch": 1.01, "learning_rate": 4.902643153099394e-05, "logits/chosen": -2.7084665298461914, "logits/rejected": -2.702622413635254, "logps/chosen": -214.2321319580078, "logps/rejected": -234.52236938476562, "loss": 0.2319, "rewards/accuracies": 0.875, "rewards/chosen": 0.38715142011642456, "rewards/margins": 2.1679859161376953, "rewards/rejected": -1.780834436416626, "step": 772 }, { "epoch": 1.01, "learning_rate": 4.902355087472826e-05, "logits/chosen": -2.4830422401428223, "logits/rejected": -2.449504852294922, "logps/chosen": -157.030517578125, "logps/rejected": -182.37347412109375, "loss": 0.2089, "rewards/accuracies": 1.0, "rewards/chosen": 0.3868522644042969, "rewards/margins": 2.223161220550537, "rewards/rejected": -1.8363089561462402, "step": 773 }, { "epoch": 1.01, "learning_rate": 4.902066604790379e-05, "logits/chosen": -2.387758731842041, "logits/rejected": -2.3118813037872314, "logps/chosen": -216.08274841308594, "logps/rejected": -232.153076171875, "loss": 0.3762, "rewards/accuracies": 0.75, "rewards/chosen": 0.1578804850578308, "rewards/margins": 1.8973913192749023, "rewards/rejected": -1.7395107746124268, "step": 774 }, { "epoch": 1.01, "learning_rate": 4.901777705102135e-05, "logits/chosen": -2.4494662284851074, "logits/rejected": -2.5127110481262207, "logps/chosen": -173.77740478515625, "logps/rejected": -214.7391357421875, "loss": 0.1939, "rewards/accuracies": 1.0, "rewards/chosen": 0.14900563657283783, "rewards/margins": 2.133729934692383, "rewards/rejected": -1.9847242832183838, "step": 775 }, { "epoch": 1.02, "learning_rate": 4.901488388458247e-05, "logits/chosen": -2.4321305751800537, "logits/rejected": -2.449230432510376, "logps/chosen": -173.65748596191406, "logps/rejected": -176.95252990722656, "loss": 0.218, "rewards/accuracies": 0.875, "rewards/chosen": 0.25331249833106995, "rewards/margins": 2.11264967918396, "rewards/rejected": -1.8593370914459229, "step": 776 }, { "epoch": 1.02, "learning_rate": 4.901198654908942e-05, "logits/chosen": -2.3953897953033447, "logits/rejected": -2.4313783645629883, "logps/chosen": -207.082275390625, "logps/rejected": -261.56427001953125, "loss": 0.1348, "rewards/accuracies": 1.0, "rewards/chosen": 0.4747293293476105, "rewards/margins": 3.3515007495880127, "rewards/rejected": -2.8767709732055664, "step": 777 }, { "epoch": 1.02, "learning_rate": 4.9009085045045175e-05, "logits/chosen": -2.613166093826294, "logits/rejected": -2.6309146881103516, "logps/chosen": -218.4897003173828, "logps/rejected": -231.17636108398438, "loss": 0.2987, "rewards/accuracies": 0.875, "rewards/chosen": -0.09049064666032791, "rewards/margins": 1.798804521560669, "rewards/rejected": -1.8892953395843506, "step": 778 }, { "epoch": 1.02, "learning_rate": 4.9006179372953466e-05, "logits/chosen": -2.4088118076324463, "logits/rejected": -2.3893849849700928, "logps/chosen": -231.118408203125, "logps/rejected": -272.7298278808594, "loss": 0.1272, "rewards/accuracies": 0.9375, "rewards/chosen": 0.22063946723937988, "rewards/margins": 3.302457332611084, "rewards/rejected": -3.0818183422088623, "step": 779 }, { "epoch": 1.02, "learning_rate": 4.9003269533318704e-05, "logits/chosen": -2.716017961502075, "logits/rejected": -2.5699970722198486, "logps/chosen": -216.4443817138672, "logps/rejected": -191.3568115234375, "loss": 0.1168, "rewards/accuracies": 1.0, "rewards/chosen": 0.5278897285461426, "rewards/margins": 2.6199638843536377, "rewards/rejected": -2.092073917388916, "step": 780 }, { "epoch": 1.02, "learning_rate": 4.900035552664605e-05, "logits/chosen": -2.5695183277130127, "logits/rejected": -2.6082301139831543, "logps/chosen": -200.3180389404297, "logps/rejected": -233.2794189453125, "loss": 0.1978, "rewards/accuracies": 0.8125, "rewards/chosen": 0.10721993446350098, "rewards/margins": 2.7741177082061768, "rewards/rejected": -2.666897773742676, "step": 781 }, { "epoch": 1.02, "learning_rate": 4.899743735344139e-05, "logits/chosen": -2.1258223056793213, "logits/rejected": -2.2032220363616943, "logps/chosen": -201.60614013671875, "logps/rejected": -282.1663818359375, "loss": 0.1137, "rewards/accuracies": 0.9375, "rewards/chosen": 0.04111824184656143, "rewards/margins": 3.605041027069092, "rewards/rejected": -3.56392240524292, "step": 782 }, { "epoch": 1.02, "learning_rate": 4.899451501421133e-05, "logits/chosen": -2.280001163482666, "logits/rejected": -2.454638957977295, "logps/chosen": -185.27694702148438, "logps/rejected": -216.79037475585938, "loss": 0.181, "rewards/accuracies": 0.875, "rewards/chosen": -0.409013569355011, "rewards/margins": 2.352353572845459, "rewards/rejected": -2.7613673210144043, "step": 783 }, { "epoch": 1.03, "learning_rate": 4.899158850946319e-05, "logits/chosen": -2.4744930267333984, "logits/rejected": -2.5907466411590576, "logps/chosen": -216.58287048339844, "logps/rejected": -261.81353759765625, "loss": 0.1848, "rewards/accuracies": 1.0, "rewards/chosen": 0.11592866480350494, "rewards/margins": 2.6159138679504395, "rewards/rejected": -2.4999852180480957, "step": 784 }, { "epoch": 1.03, "learning_rate": 4.8988657839705024e-05, "logits/chosen": -2.6295738220214844, "logits/rejected": -2.565519332885742, "logps/chosen": -175.5509033203125, "logps/rejected": -192.17001342773438, "loss": 0.2344, "rewards/accuracies": 0.8125, "rewards/chosen": -0.13090485334396362, "rewards/margins": 2.3108062744140625, "rewards/rejected": -2.44171142578125, "step": 785 }, { "epoch": 1.03, "learning_rate": 4.89857230054456e-05, "logits/chosen": -2.4025466442108154, "logits/rejected": -2.478341817855835, "logps/chosen": -160.97872924804688, "logps/rejected": -221.78729248046875, "loss": 0.2185, "rewards/accuracies": 0.875, "rewards/chosen": -0.18785348534584045, "rewards/margins": 2.667365074157715, "rewards/rejected": -2.8552186489105225, "step": 786 }, { "epoch": 1.03, "learning_rate": 4.898278400719442e-05, "logits/chosen": -2.309408664703369, "logits/rejected": -2.336895227432251, "logps/chosen": -194.12857055664062, "logps/rejected": -229.64393615722656, "loss": 0.1647, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5826263427734375, "rewards/margins": 2.673534870147705, "rewards/rejected": -3.2561612129211426, "step": 787 }, { "epoch": 1.03, "learning_rate": 4.897984084546169e-05, "logits/chosen": -2.5822911262512207, "logits/rejected": -2.594147205352783, "logps/chosen": -189.2248992919922, "logps/rejected": -217.4370880126953, "loss": 0.1947, "rewards/accuracies": 0.875, "rewards/chosen": -0.18159398436546326, "rewards/margins": 2.9417152404785156, "rewards/rejected": -3.123309373855591, "step": 788 }, { "epoch": 1.03, "learning_rate": 4.897689352075837e-05, "logits/chosen": -2.395965576171875, "logits/rejected": -2.3766961097717285, "logps/chosen": -234.74583435058594, "logps/rejected": -283.2368469238281, "loss": 0.2304, "rewards/accuracies": 1.0, "rewards/chosen": -0.37718701362609863, "rewards/margins": 3.130066394805908, "rewards/rejected": -3.507253408432007, "step": 789 }, { "epoch": 1.03, "learning_rate": 4.897394203359611e-05, "logits/chosen": -2.223119020462036, "logits/rejected": -2.1850533485412598, "logps/chosen": -163.25404357910156, "logps/rejected": -190.954833984375, "loss": 0.1986, "rewards/accuracies": 0.875, "rewards/chosen": -0.48350968956947327, "rewards/margins": 2.4246490001678467, "rewards/rejected": -2.908158779144287, "step": 790 }, { "epoch": 1.04, "learning_rate": 4.897098638448731e-05, "logits/chosen": -2.617220878601074, "logits/rejected": -2.6898252964019775, "logps/chosen": -191.36790466308594, "logps/rejected": -270.26287841796875, "loss": 0.1789, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7169384360313416, "rewards/margins": 2.659780979156494, "rewards/rejected": -3.3767194747924805, "step": 791 }, { "epoch": 1.04, "learning_rate": 4.896802657394506e-05, "logits/chosen": -2.3435750007629395, "logits/rejected": -2.2688729763031006, "logps/chosen": -154.95065307617188, "logps/rejected": -174.447021484375, "loss": 0.2578, "rewards/accuracies": 0.875, "rewards/chosen": -0.7484995722770691, "rewards/margins": 2.27825927734375, "rewards/rejected": -3.0267586708068848, "step": 792 }, { "epoch": 1.04, "learning_rate": 4.8965062602483205e-05, "logits/chosen": -2.4353139400482178, "logits/rejected": -2.381152629852295, "logps/chosen": -270.47259521484375, "logps/rejected": -281.2034606933594, "loss": 0.16, "rewards/accuracies": 1.0, "rewards/chosen": -0.4860559105873108, "rewards/margins": 3.1953186988830566, "rewards/rejected": -3.6813745498657227, "step": 793 }, { "epoch": 1.04, "learning_rate": 4.89620944706163e-05, "logits/chosen": -2.391406536102295, "logits/rejected": -2.426130533218384, "logps/chosen": -167.80072021484375, "logps/rejected": -207.9481964111328, "loss": 0.1768, "rewards/accuracies": 0.9375, "rewards/chosen": -0.36457765102386475, "rewards/margins": 3.225442409515381, "rewards/rejected": -3.590019941329956, "step": 794 }, { "epoch": 1.04, "learning_rate": 4.8959122178859616e-05, "logits/chosen": -2.5002002716064453, "logits/rejected": -2.584379196166992, "logps/chosen": -194.40185546875, "logps/rejected": -240.38218688964844, "loss": 0.1416, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7205836176872253, "rewards/margins": 3.2785701751708984, "rewards/rejected": -3.9991536140441895, "step": 795 }, { "epoch": 1.04, "learning_rate": 4.8956145727729156e-05, "logits/chosen": -2.7242343425750732, "logits/rejected": -2.7420852184295654, "logps/chosen": -193.07855224609375, "logps/rejected": -213.01675415039062, "loss": 0.2132, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5593734979629517, "rewards/margins": 2.7595760822296143, "rewards/rejected": -3.3189496994018555, "step": 796 }, { "epoch": 1.04, "learning_rate": 4.895316511774164e-05, "logits/chosen": -2.5653488636016846, "logits/rejected": -2.6561145782470703, "logps/chosen": -187.71011352539062, "logps/rejected": -234.84353637695312, "loss": 0.2078, "rewards/accuracies": 0.875, "rewards/chosen": -0.685555100440979, "rewards/margins": 2.5933103561401367, "rewards/rejected": -3.278865337371826, "step": 797 }, { "epoch": 1.04, "learning_rate": 4.895018034941451e-05, "logits/chosen": -2.6024954319000244, "logits/rejected": -2.519937753677368, "logps/chosen": -168.91847229003906, "logps/rejected": -224.6294708251953, "loss": 0.1913, "rewards/accuracies": 0.9375, "rewards/chosen": -0.535056471824646, "rewards/margins": 3.0812528133392334, "rewards/rejected": -3.61630916595459, "step": 798 }, { "epoch": 1.05, "learning_rate": 4.894719142326593e-05, "logits/chosen": -2.21370005607605, "logits/rejected": -2.3001461029052734, "logps/chosen": -166.51718139648438, "logps/rejected": -224.1083526611328, "loss": 0.3661, "rewards/accuracies": 0.75, "rewards/chosen": -0.7483687996864319, "rewards/margins": 2.128541946411133, "rewards/rejected": -2.8769102096557617, "step": 799 }, { "epoch": 1.05, "learning_rate": 4.894419833981478e-05, "logits/chosen": -2.4892656803131104, "logits/rejected": -2.568337917327881, "logps/chosen": -247.6711883544922, "logps/rejected": -264.4457092285156, "loss": 0.1815, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5068062543869019, "rewards/margins": 3.0614163875579834, "rewards/rejected": -3.5682225227355957, "step": 800 }, { "epoch": 1.05, "learning_rate": 4.894120109958068e-05, "logits/chosen": -2.5446887016296387, "logits/rejected": -2.5595788955688477, "logps/chosen": -148.94400024414062, "logps/rejected": -197.58807373046875, "loss": 0.2073, "rewards/accuracies": 0.875, "rewards/chosen": -0.3782866895198822, "rewards/margins": 2.5527689456939697, "rewards/rejected": -2.931055784225464, "step": 801 }, { "epoch": 1.05, "learning_rate": 4.893819970308394e-05, "logits/chosen": -2.6223721504211426, "logits/rejected": -2.6008565425872803, "logps/chosen": -223.73875427246094, "logps/rejected": -270.4544982910156, "loss": 0.131, "rewards/accuracies": 1.0, "rewards/chosen": -0.582880973815918, "rewards/margins": 2.6789181232452393, "rewards/rejected": -3.2617993354797363, "step": 802 }, { "epoch": 1.05, "learning_rate": 4.893519415084564e-05, "logits/chosen": -2.576484203338623, "logits/rejected": -2.459780216217041, "logps/chosen": -176.4852294921875, "logps/rejected": -229.3186492919922, "loss": 0.1817, "rewards/accuracies": 1.0, "rewards/chosen": -0.5410618782043457, "rewards/margins": 2.697970390319824, "rewards/rejected": -3.239032030105591, "step": 803 }, { "epoch": 1.05, "learning_rate": 4.8932184443387536e-05, "logits/chosen": -2.32121205329895, "logits/rejected": -2.379164695739746, "logps/chosen": -186.08949279785156, "logps/rejected": -203.67755126953125, "loss": 0.3161, "rewards/accuracies": 0.8125, "rewards/chosen": -0.43141627311706543, "rewards/margins": 2.3219566345214844, "rewards/rejected": -2.7533726692199707, "step": 804 }, { "epoch": 1.05, "learning_rate": 4.892917058123212e-05, "logits/chosen": -2.4327099323272705, "logits/rejected": -2.4539988040924072, "logps/chosen": -223.7473602294922, "logps/rejected": -243.0752410888672, "loss": 0.2913, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5450294017791748, "rewards/margins": 1.9214046001434326, "rewards/rejected": -2.4664340019226074, "step": 805 }, { "epoch": 1.05, "learning_rate": 4.892615256490262e-05, "logits/chosen": -2.549992084503174, "logits/rejected": -2.67326283454895, "logps/chosen": -192.02581787109375, "logps/rejected": -212.2666778564453, "loss": 0.2081, "rewards/accuracies": 0.8125, "rewards/chosen": -0.1347290426492691, "rewards/margins": 2.5369441509246826, "rewards/rejected": -2.671673059463501, "step": 806 }, { "epoch": 1.06, "learning_rate": 4.8923130394922947e-05, "logits/chosen": -2.503438949584961, "logits/rejected": -2.5232760906219482, "logps/chosen": -202.92604064941406, "logps/rejected": -230.6805877685547, "loss": 0.2341, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3751166760921478, "rewards/margins": 2.6738672256469727, "rewards/rejected": -3.0489840507507324, "step": 807 }, { "epoch": 1.06, "learning_rate": 4.8920104071817786e-05, "logits/chosen": -2.3755786418914795, "logits/rejected": -2.5557618141174316, "logps/chosen": -137.71795654296875, "logps/rejected": -216.0382080078125, "loss": 0.1602, "rewards/accuracies": 0.875, "rewards/chosen": -0.13777978718280792, "rewards/margins": 3.1972692012786865, "rewards/rejected": -3.3350489139556885, "step": 808 }, { "epoch": 1.06, "learning_rate": 4.891707359611251e-05, "logits/chosen": -2.4458518028259277, "logits/rejected": -2.485722303390503, "logps/chosen": -232.0552520751953, "logps/rejected": -290.8924255371094, "loss": 0.112, "rewards/accuracies": 1.0, "rewards/chosen": -0.5124866962432861, "rewards/margins": 3.1787357330322266, "rewards/rejected": -3.6912224292755127, "step": 809 }, { "epoch": 1.06, "learning_rate": 4.89140389683332e-05, "logits/chosen": -2.6487104892730713, "logits/rejected": -2.691654920578003, "logps/chosen": -200.54293823242188, "logps/rejected": -267.951416015625, "loss": 0.0792, "rewards/accuracies": 1.0, "rewards/chosen": -0.20664259791374207, "rewards/margins": 3.448357582092285, "rewards/rejected": -3.6550002098083496, "step": 810 }, { "epoch": 1.06, "learning_rate": 4.8911000189006694e-05, "logits/chosen": -2.47886323928833, "logits/rejected": -2.5354814529418945, "logps/chosen": -155.83856201171875, "logps/rejected": -193.6571502685547, "loss": 0.1333, "rewards/accuracies": 1.0, "rewards/chosen": 0.1021997407078743, "rewards/margins": 3.2138075828552246, "rewards/rejected": -3.111607789993286, "step": 811 }, { "epoch": 1.06, "learning_rate": 4.8907957258660534e-05, "logits/chosen": -2.288719654083252, "logits/rejected": -2.2621853351593018, "logps/chosen": -219.19277954101562, "logps/rejected": -233.79931640625, "loss": 0.291, "rewards/accuracies": 0.9375, "rewards/chosen": -0.2972843647003174, "rewards/margins": 2.0650277137756348, "rewards/rejected": -2.3623123168945312, "step": 812 }, { "epoch": 1.06, "learning_rate": 4.890491017782296e-05, "logits/chosen": -2.5341038703918457, "logits/rejected": -2.636235475540161, "logps/chosen": -166.26109313964844, "logps/rejected": -217.66732788085938, "loss": 0.2438, "rewards/accuracies": 0.75, "rewards/chosen": -0.7079929709434509, "rewards/margins": 2.6824710369110107, "rewards/rejected": -3.3904640674591064, "step": 813 }, { "epoch": 1.07, "learning_rate": 4.890185894702298e-05, "logits/chosen": -2.5911717414855957, "logits/rejected": -2.6073203086853027, "logps/chosen": -208.44723510742188, "logps/rejected": -287.8471374511719, "loss": 0.0952, "rewards/accuracies": 0.9375, "rewards/chosen": 0.16351568698883057, "rewards/margins": 3.8280396461486816, "rewards/rejected": -3.6645243167877197, "step": 814 }, { "epoch": 1.07, "learning_rate": 4.8898803566790296e-05, "logits/chosen": -2.3474974632263184, "logits/rejected": -2.4600653648376465, "logps/chosen": -225.6314697265625, "logps/rejected": -247.96067810058594, "loss": 0.1813, "rewards/accuracies": 0.875, "rewards/chosen": -0.11835913360118866, "rewards/margins": 3.3629634380340576, "rewards/rejected": -3.481323003768921, "step": 815 }, { "epoch": 1.07, "learning_rate": 4.889574403765531e-05, "logits/chosen": -2.314152717590332, "logits/rejected": -2.2856571674346924, "logps/chosen": -222.77545166015625, "logps/rejected": -240.05615234375, "loss": 0.2903, "rewards/accuracies": 0.875, "rewards/chosen": -0.5903599858283997, "rewards/margins": 2.8621745109558105, "rewards/rejected": -3.4525346755981445, "step": 816 }, { "epoch": 1.07, "learning_rate": 4.889268036014918e-05, "logits/chosen": -2.2042934894561768, "logits/rejected": -2.252229928970337, "logps/chosen": -168.29946899414062, "logps/rejected": -220.2432403564453, "loss": 0.181, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6510707139968872, "rewards/margins": 2.5488698482513428, "rewards/rejected": -3.1999404430389404, "step": 817 }, { "epoch": 1.07, "learning_rate": 4.888961253480376e-05, "logits/chosen": -2.330993890762329, "logits/rejected": -2.439598321914673, "logps/chosen": -191.20274353027344, "logps/rejected": -240.98287963867188, "loss": 0.0927, "rewards/accuracies": 0.9375, "rewards/chosen": -0.08680248260498047, "rewards/margins": 3.754706382751465, "rewards/rejected": -3.8415091037750244, "step": 818 }, { "epoch": 1.07, "learning_rate": 4.8886540562151653e-05, "logits/chosen": -2.60939884185791, "logits/rejected": -2.64247989654541, "logps/chosen": -186.75680541992188, "logps/rejected": -217.1247100830078, "loss": 0.1396, "rewards/accuracies": 1.0, "rewards/chosen": -0.13618728518486023, "rewards/margins": 2.6276023387908936, "rewards/rejected": -2.7637898921966553, "step": 819 }, { "epoch": 1.07, "learning_rate": 4.8883464442726146e-05, "logits/chosen": -2.451847791671753, "logits/rejected": -2.550415277481079, "logps/chosen": -194.68426513671875, "logps/rejected": -221.12335205078125, "loss": 0.2368, "rewards/accuracies": 0.8125, "rewards/chosen": -0.38820838928222656, "rewards/margins": 2.107210159301758, "rewards/rejected": -2.4954187870025635, "step": 820 }, { "epoch": 1.07, "learning_rate": 4.888038417706126e-05, "logits/chosen": -2.4475693702697754, "logits/rejected": -2.4619081020355225, "logps/chosen": -185.6713409423828, "logps/rejected": -246.03526306152344, "loss": 0.1785, "rewards/accuracies": 0.9375, "rewards/chosen": -0.18588271737098694, "rewards/margins": 3.4940121173858643, "rewards/rejected": -3.6798949241638184, "step": 821 }, { "epoch": 1.08, "learning_rate": 4.887729976569174e-05, "logits/chosen": -2.6383659839630127, "logits/rejected": -2.641087293624878, "logps/chosen": -220.7265625, "logps/rejected": -246.0904083251953, "loss": 0.1677, "rewards/accuracies": 0.875, "rewards/chosen": -0.5302943587303162, "rewards/margins": 3.091730833053589, "rewards/rejected": -3.6220250129699707, "step": 822 }, { "epoch": 1.08, "learning_rate": 4.8874211209153066e-05, "logits/chosen": -2.7357144355773926, "logits/rejected": -2.756209373474121, "logps/chosen": -228.9771270751953, "logps/rejected": -244.65692138671875, "loss": 0.1871, "rewards/accuracies": 0.9375, "rewards/chosen": -0.2874932289123535, "rewards/margins": 3.040170669555664, "rewards/rejected": -3.3276634216308594, "step": 823 }, { "epoch": 1.08, "learning_rate": 4.8871118507981396e-05, "logits/chosen": -2.541903257369995, "logits/rejected": -2.642256259918213, "logps/chosen": -172.7350311279297, "logps/rejected": -263.6834716796875, "loss": 0.1574, "rewards/accuracies": 0.9375, "rewards/chosen": -0.31187328696250916, "rewards/margins": 3.909829616546631, "rewards/rejected": -4.221703052520752, "step": 824 }, { "epoch": 1.08, "learning_rate": 4.886802166271364e-05, "logits/chosen": -2.5496389865875244, "logits/rejected": -2.632146120071411, "logps/chosen": -189.05809020996094, "logps/rejected": -210.13885498046875, "loss": 0.15, "rewards/accuracies": 1.0, "rewards/chosen": -0.08079969137907028, "rewards/margins": 3.155087947845459, "rewards/rejected": -3.2358880043029785, "step": 825 }, { "epoch": 1.08, "learning_rate": 4.8864920673887434e-05, "logits/chosen": -2.508222818374634, "logits/rejected": -2.495868682861328, "logps/chosen": -178.13548278808594, "logps/rejected": -204.01275634765625, "loss": 0.1071, "rewards/accuracies": 1.0, "rewards/chosen": -0.022821810096502304, "rewards/margins": 3.6802027225494385, "rewards/rejected": -3.703024387359619, "step": 826 }, { "epoch": 1.08, "learning_rate": 4.88618155420411e-05, "logits/chosen": -2.3088462352752686, "logits/rejected": -2.442979574203491, "logps/chosen": -207.34799194335938, "logps/rejected": -256.6688232421875, "loss": 0.1586, "rewards/accuracies": 0.875, "rewards/chosen": -0.63365638256073, "rewards/margins": 2.7970571517944336, "rewards/rejected": -3.430713415145874, "step": 827 }, { "epoch": 1.08, "learning_rate": 4.8858706267713704e-05, "logits/chosen": -2.532752275466919, "logits/rejected": -2.47641658782959, "logps/chosen": -181.96983337402344, "logps/rejected": -179.16371154785156, "loss": 0.2413, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7146714329719543, "rewards/margins": 2.151493787765503, "rewards/rejected": -2.8661651611328125, "step": 828 }, { "epoch": 1.08, "learning_rate": 4.885559285144503e-05, "logits/chosen": -2.4036006927490234, "logits/rejected": -2.451720952987671, "logps/chosen": -179.5876922607422, "logps/rejected": -236.59486389160156, "loss": 0.1633, "rewards/accuracies": 0.875, "rewards/chosen": -0.3670279383659363, "rewards/margins": 3.0567290782928467, "rewards/rejected": -3.4237565994262695, "step": 829 }, { "epoch": 1.09, "learning_rate": 4.885247529377557e-05, "logits/chosen": -2.4562742710113525, "logits/rejected": -2.516852855682373, "logps/chosen": -139.80506896972656, "logps/rejected": -169.3076171875, "loss": 0.2494, "rewards/accuracies": 0.8125, "rewards/chosen": -0.586427628993988, "rewards/margins": 1.9348812103271484, "rewards/rejected": -2.521308660507202, "step": 830 }, { "epoch": 1.09, "learning_rate": 4.884935359524655e-05, "logits/chosen": -2.433465003967285, "logits/rejected": -2.401857852935791, "logps/chosen": -181.75381469726562, "logps/rejected": -226.99884033203125, "loss": 0.1503, "rewards/accuracies": 0.9375, "rewards/chosen": -0.24370934069156647, "rewards/margins": 3.303581953048706, "rewards/rejected": -3.5472910404205322, "step": 831 }, { "epoch": 1.09, "learning_rate": 4.884622775639989e-05, "logits/chosen": -2.4640562534332275, "logits/rejected": -2.4529128074645996, "logps/chosen": -228.76239013671875, "logps/rejected": -277.97503662109375, "loss": 0.1871, "rewards/accuracies": 0.875, "rewards/chosen": -0.40687793493270874, "rewards/margins": 2.9247701168060303, "rewards/rejected": -3.331648111343384, "step": 832 }, { "epoch": 1.09, "learning_rate": 4.884309777777826e-05, "logits/chosen": -2.600520372390747, "logits/rejected": -2.6682722568511963, "logps/chosen": -239.098388671875, "logps/rejected": -298.6829528808594, "loss": 0.0985, "rewards/accuracies": 1.0, "rewards/chosen": -0.40839752554893494, "rewards/margins": 3.9475584030151367, "rewards/rejected": -4.355956077575684, "step": 833 }, { "epoch": 1.09, "learning_rate": 4.883996365992502e-05, "logits/chosen": -2.470278024673462, "logits/rejected": -2.446775197982788, "logps/chosen": -225.15782165527344, "logps/rejected": -273.9099426269531, "loss": 0.1943, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9289353489875793, "rewards/margins": 3.2997019290924072, "rewards/rejected": -4.228637218475342, "step": 834 }, { "epoch": 1.09, "learning_rate": 4.883682540338428e-05, "logits/chosen": -2.265939474105835, "logits/rejected": -2.3391735553741455, "logps/chosen": -203.70504760742188, "logps/rejected": -275.56427001953125, "loss": 0.1327, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4642513394355774, "rewards/margins": 4.042669296264648, "rewards/rejected": -4.506920337677002, "step": 835 }, { "epoch": 1.09, "learning_rate": 4.883368300870083e-05, "logits/chosen": -2.431640148162842, "logits/rejected": -2.453253984451294, "logps/chosen": -199.0352020263672, "logps/rejected": -255.3938751220703, "loss": 0.1519, "rewards/accuracies": 0.9375, "rewards/chosen": -0.729556679725647, "rewards/margins": 3.61323881149292, "rewards/rejected": -4.3427958488464355, "step": 836 }, { "epoch": 1.1, "learning_rate": 4.883053647642022e-05, "logits/chosen": -2.531667947769165, "logits/rejected": -2.473618507385254, "logps/chosen": -258.14215087890625, "logps/rejected": -253.9119110107422, "loss": 0.05, "rewards/accuracies": 1.0, "rewards/chosen": -0.14007891714572906, "rewards/margins": 4.125094890594482, "rewards/rejected": -4.26517391204834, "step": 837 }, { "epoch": 1.1, "learning_rate": 4.8827385807088674e-05, "logits/chosen": -2.368967056274414, "logits/rejected": -2.3205454349517822, "logps/chosen": -237.3386688232422, "logps/rejected": -306.61737060546875, "loss": 0.1212, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8273277282714844, "rewards/margins": 3.641183614730835, "rewards/rejected": -4.468511581420898, "step": 838 }, { "epoch": 1.1, "learning_rate": 4.8824231001253186e-05, "logits/chosen": -2.4554178714752197, "logits/rejected": -2.457733631134033, "logps/chosen": -219.16522216796875, "logps/rejected": -298.85003662109375, "loss": 0.2137, "rewards/accuracies": 0.8125, "rewards/chosen": -1.3690099716186523, "rewards/margins": 2.852839231491089, "rewards/rejected": -4.221848964691162, "step": 839 }, { "epoch": 1.1, "learning_rate": 4.882107205946142e-05, "logits/chosen": -2.19034743309021, "logits/rejected": -2.1936280727386475, "logps/chosen": -196.35073852539062, "logps/rejected": -266.54638671875, "loss": 0.2458, "rewards/accuracies": 0.875, "rewards/chosen": -0.5001093149185181, "rewards/margins": 3.14082670211792, "rewards/rejected": -3.6409358978271484, "step": 840 }, { "epoch": 1.1, "learning_rate": 4.8817908982261765e-05, "logits/chosen": -2.3098065853118896, "logits/rejected": -2.4067697525024414, "logps/chosen": -197.4673614501953, "logps/rejected": -242.01669311523438, "loss": 0.1775, "rewards/accuracies": 1.0, "rewards/chosen": -0.7165354490280151, "rewards/margins": 3.300830841064453, "rewards/rejected": -4.017366409301758, "step": 841 }, { "epoch": 1.1, "learning_rate": 4.881474177020336e-05, "logits/chosen": -2.541229009628296, "logits/rejected": -2.5617356300354004, "logps/chosen": -219.83966064453125, "logps/rejected": -247.85601806640625, "loss": 0.0592, "rewards/accuracies": 1.0, "rewards/chosen": -0.8507525324821472, "rewards/margins": 3.5395710468292236, "rewards/rejected": -4.390323638916016, "step": 842 }, { "epoch": 1.1, "learning_rate": 4.881157042383605e-05, "logits/chosen": -2.2904212474823, "logits/rejected": -2.3347108364105225, "logps/chosen": -231.440673828125, "logps/rejected": -303.37939453125, "loss": 0.2497, "rewards/accuracies": 0.875, "rewards/chosen": -1.1560659408569336, "rewards/margins": 2.958097457885742, "rewards/rejected": -4.114163398742676, "step": 843 }, { "epoch": 1.1, "learning_rate": 4.8808394943710376e-05, "logits/chosen": -2.279738187789917, "logits/rejected": -2.3323707580566406, "logps/chosen": -213.64846801757812, "logps/rejected": -239.2349395751953, "loss": 0.243, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1719560623168945, "rewards/margins": 2.3868768215179443, "rewards/rejected": -3.558833122253418, "step": 844 }, { "epoch": 1.11, "learning_rate": 4.880521533037762e-05, "logits/chosen": -2.3850831985473633, "logits/rejected": -2.4819254875183105, "logps/chosen": -211.81105041503906, "logps/rejected": -253.8992919921875, "loss": 0.1005, "rewards/accuracies": 1.0, "rewards/chosen": -0.6735836267471313, "rewards/margins": 3.870424509048462, "rewards/rejected": -4.544007778167725, "step": 845 }, { "epoch": 1.11, "learning_rate": 4.880203158438975e-05, "logits/chosen": -2.4267005920410156, "logits/rejected": -2.4583656787872314, "logps/chosen": -185.1043243408203, "logps/rejected": -223.85244750976562, "loss": 0.1452, "rewards/accuracies": 1.0, "rewards/chosen": -0.911338746547699, "rewards/margins": 3.077333688735962, "rewards/rejected": -3.9886722564697266, "step": 846 }, { "epoch": 1.11, "learning_rate": 4.87988437062995e-05, "logits/chosen": -2.0992445945739746, "logits/rejected": -2.171211004257202, "logps/chosen": -159.3900909423828, "logps/rejected": -242.703369140625, "loss": 0.25, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7375649213790894, "rewards/margins": 3.8496501445770264, "rewards/rejected": -4.587214946746826, "step": 847 }, { "epoch": 1.11, "learning_rate": 4.879565169666028e-05, "logits/chosen": -2.2952377796173096, "logits/rejected": -2.3588685989379883, "logps/chosen": -201.49908447265625, "logps/rejected": -241.18902587890625, "loss": 0.2027, "rewards/accuracies": 0.875, "rewards/chosen": -0.5940706729888916, "rewards/margins": 3.3175528049468994, "rewards/rejected": -3.91162371635437, "step": 848 }, { "epoch": 1.11, "learning_rate": 4.879245555602624e-05, "logits/chosen": -1.9072455167770386, "logits/rejected": -2.0710573196411133, "logps/chosen": -167.665283203125, "logps/rejected": -231.74786376953125, "loss": 0.2365, "rewards/accuracies": 0.875, "rewards/chosen": -1.0657278299331665, "rewards/margins": 3.313453435897827, "rewards/rejected": -4.379181385040283, "step": 849 }, { "epoch": 1.11, "learning_rate": 4.878925528495223e-05, "logits/chosen": -2.076167106628418, "logits/rejected": -2.0585241317749023, "logps/chosen": -160.9062957763672, "logps/rejected": -198.1348419189453, "loss": 0.2318, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9388778209686279, "rewards/margins": 2.9717299938201904, "rewards/rejected": -3.9106080532073975, "step": 850 }, { "epoch": 1.11, "learning_rate": 4.878605088399384e-05, "logits/chosen": -2.1791696548461914, "logits/rejected": -2.2479426860809326, "logps/chosen": -191.9034881591797, "logps/rejected": -257.7882995605469, "loss": 0.0744, "rewards/accuracies": 1.0, "rewards/chosen": -0.5156456828117371, "rewards/margins": 3.969588041305542, "rewards/rejected": -4.485233783721924, "step": 851 }, { "epoch": 1.12, "learning_rate": 4.878284235370735e-05, "logits/chosen": -2.1952590942382812, "logits/rejected": -2.3678793907165527, "logps/chosen": -195.3539276123047, "logps/rejected": -240.06231689453125, "loss": 0.1083, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9977157115936279, "rewards/margins": 3.5119776725769043, "rewards/rejected": -4.509693622589111, "step": 852 }, { "epoch": 1.12, "learning_rate": 4.877962969464978e-05, "logits/chosen": -2.0996241569519043, "logits/rejected": -2.2190651893615723, "logps/chosen": -184.02914428710938, "logps/rejected": -213.2720489501953, "loss": 0.2316, "rewards/accuracies": 0.875, "rewards/chosen": -1.107424020767212, "rewards/margins": 2.9243013858795166, "rewards/rejected": -4.0317254066467285, "step": 853 }, { "epoch": 1.12, "learning_rate": 4.877641290737884e-05, "logits/chosen": -2.398710012435913, "logits/rejected": -2.455815315246582, "logps/chosen": -196.89927673339844, "logps/rejected": -228.26943969726562, "loss": 0.2229, "rewards/accuracies": 0.875, "rewards/chosen": -0.7392411828041077, "rewards/margins": 3.144235134124756, "rewards/rejected": -3.8834760189056396, "step": 854 }, { "epoch": 1.12, "learning_rate": 4.8773191992453e-05, "logits/chosen": -2.585550546646118, "logits/rejected": -2.6128385066986084, "logps/chosen": -221.81942749023438, "logps/rejected": -235.17910766601562, "loss": 0.1117, "rewards/accuracies": 1.0, "rewards/chosen": -0.5257879495620728, "rewards/margins": 3.225809335708618, "rewards/rejected": -3.7515969276428223, "step": 855 }, { "epoch": 1.12, "learning_rate": 4.87699669504314e-05, "logits/chosen": -2.3643717765808105, "logits/rejected": -2.397857427597046, "logps/chosen": -220.61892700195312, "logps/rejected": -270.6251220703125, "loss": 0.1944, "rewards/accuracies": 0.875, "rewards/chosen": -1.2166273593902588, "rewards/margins": 3.119513511657715, "rewards/rejected": -4.3361406326293945, "step": 856 }, { "epoch": 1.12, "learning_rate": 4.876673778187393e-05, "logits/chosen": -2.182027578353882, "logits/rejected": -2.232729434967041, "logps/chosen": -172.77215576171875, "logps/rejected": -192.00790405273438, "loss": 0.2663, "rewards/accuracies": 0.75, "rewards/chosen": -0.9920349717140198, "rewards/margins": 3.2848072052001953, "rewards/rejected": -4.2768425941467285, "step": 857 }, { "epoch": 1.12, "learning_rate": 4.876350448734117e-05, "logits/chosen": -2.317965507507324, "logits/rejected": -2.4213790893554688, "logps/chosen": -170.614990234375, "logps/rejected": -179.1256866455078, "loss": 0.3067, "rewards/accuracies": 0.8125, "rewards/chosen": -1.353677749633789, "rewards/margins": 1.795522928237915, "rewards/rejected": -3.149200916290283, "step": 858 }, { "epoch": 1.12, "learning_rate": 4.876026706739444e-05, "logits/chosen": -2.362269401550293, "logits/rejected": -2.385591506958008, "logps/chosen": -277.1317138671875, "logps/rejected": -302.71575927734375, "loss": 0.2337, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7664923667907715, "rewards/margins": 2.5697085857391357, "rewards/rejected": -4.336201190948486, "step": 859 }, { "epoch": 1.13, "learning_rate": 4.8757025522595756e-05, "logits/chosen": -2.5295486450195312, "logits/rejected": -2.5323615074157715, "logps/chosen": -205.54013061523438, "logps/rejected": -218.6345977783203, "loss": 0.1368, "rewards/accuracies": 0.9375, "rewards/chosen": -0.2794644236564636, "rewards/margins": 2.9989418983459473, "rewards/rejected": -3.2784063816070557, "step": 860 }, { "epoch": 1.13, "learning_rate": 4.8753779853507874e-05, "logits/chosen": -2.117382764816284, "logits/rejected": -2.120049238204956, "logps/chosen": -196.75279235839844, "logps/rejected": -217.56846618652344, "loss": 0.1823, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5300887227058411, "rewards/margins": 3.763706922531128, "rewards/rejected": -4.293795585632324, "step": 861 }, { "epoch": 1.13, "learning_rate": 4.8750530060694236e-05, "logits/chosen": -1.7907865047454834, "logits/rejected": -1.8680351972579956, "logps/chosen": -134.9779510498047, "logps/rejected": -194.37901306152344, "loss": 0.1639, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8513069152832031, "rewards/margins": 2.7600948810577393, "rewards/rejected": -3.6114020347595215, "step": 862 }, { "epoch": 1.13, "learning_rate": 4.874727614471903e-05, "logits/chosen": -2.110868453979492, "logits/rejected": -2.1362719535827637, "logps/chosen": -194.04483032226562, "logps/rejected": -234.01123046875, "loss": 0.2074, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6307097673416138, "rewards/margins": 3.406496047973633, "rewards/rejected": -5.037205696105957, "step": 863 }, { "epoch": 1.13, "learning_rate": 4.8744018106147135e-05, "logits/chosen": -2.32252836227417, "logits/rejected": -2.344344139099121, "logps/chosen": -224.74435424804688, "logps/rejected": -252.48287963867188, "loss": 0.1992, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4851446151733398, "rewards/margins": 3.0550522804260254, "rewards/rejected": -4.540196418762207, "step": 864 }, { "epoch": 1.13, "learning_rate": 4.8740755945544156e-05, "logits/chosen": -2.2035303115844727, "logits/rejected": -2.198829174041748, "logps/chosen": -147.6013641357422, "logps/rejected": -178.75584411621094, "loss": 0.218, "rewards/accuracies": 0.75, "rewards/chosen": -1.2339822053909302, "rewards/margins": 2.6723711490631104, "rewards/rejected": -3.90635347366333, "step": 865 }, { "epoch": 1.13, "learning_rate": 4.873748966347642e-05, "logits/chosen": -2.1133739948272705, "logits/rejected": -2.088212251663208, "logps/chosen": -183.02053833007812, "logps/rejected": -214.74398803710938, "loss": 0.1746, "rewards/accuracies": 0.9375, "rewards/chosen": -1.064955234527588, "rewards/margins": 3.2688629627227783, "rewards/rejected": -4.333818435668945, "step": 866 }, { "epoch": 1.13, "learning_rate": 4.873421926051095e-05, "logits/chosen": -2.3783798217773438, "logits/rejected": -2.4469428062438965, "logps/chosen": -175.8081817626953, "logps/rejected": -228.7090301513672, "loss": 0.2164, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9884763956069946, "rewards/margins": 3.5400943756103516, "rewards/rejected": -4.528570652008057, "step": 867 }, { "epoch": 1.14, "learning_rate": 4.873094473721552e-05, "logits/chosen": -2.122225284576416, "logits/rejected": -2.229064464569092, "logps/chosen": -163.4876708984375, "logps/rejected": -226.0997314453125, "loss": 0.1045, "rewards/accuracies": 1.0, "rewards/chosen": -1.584133267402649, "rewards/margins": 3.3204777240753174, "rewards/rejected": -4.904611110687256, "step": 868 }, { "epoch": 1.14, "learning_rate": 4.872766609415857e-05, "logits/chosen": -2.342576265335083, "logits/rejected": -2.389909029006958, "logps/chosen": -161.72445678710938, "logps/rejected": -186.76344299316406, "loss": 0.2182, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8539198040962219, "rewards/margins": 2.9423582553863525, "rewards/rejected": -3.7962779998779297, "step": 869 }, { "epoch": 1.14, "learning_rate": 4.872438333190931e-05, "logits/chosen": -2.137856960296631, "logits/rejected": -2.179835081100464, "logps/chosen": -191.9931182861328, "logps/rejected": -224.08697509765625, "loss": 0.1825, "rewards/accuracies": 0.9375, "rewards/chosen": -1.108015775680542, "rewards/margins": 3.190267562866211, "rewards/rejected": -4.298283100128174, "step": 870 }, { "epoch": 1.14, "learning_rate": 4.872109645103762e-05, "logits/chosen": -2.0209460258483887, "logits/rejected": -2.05436372756958, "logps/chosen": -242.2396697998047, "logps/rejected": -256.2543640136719, "loss": 0.233, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9021006226539612, "rewards/margins": 2.7395806312561035, "rewards/rejected": -3.64168119430542, "step": 871 }, { "epoch": 1.14, "learning_rate": 4.871780545211411e-05, "logits/chosen": -2.396819829940796, "logits/rejected": -2.3696892261505127, "logps/chosen": -262.4898376464844, "logps/rejected": -300.5354919433594, "loss": 0.3388, "rewards/accuracies": 0.75, "rewards/chosen": -0.824455976486206, "rewards/margins": 3.6746246814727783, "rewards/rejected": -4.499080657958984, "step": 872 }, { "epoch": 1.14, "learning_rate": 4.8714510335710114e-05, "logits/chosen": -2.2704551219940186, "logits/rejected": -2.289527416229248, "logps/chosen": -190.1211395263672, "logps/rejected": -210.47433471679688, "loss": 0.2625, "rewards/accuracies": 0.875, "rewards/chosen": -0.8054894208908081, "rewards/margins": 3.2580995559692383, "rewards/rejected": -4.063589096069336, "step": 873 }, { "epoch": 1.14, "learning_rate": 4.871121110239768e-05, "logits/chosen": -2.0636115074157715, "logits/rejected": -2.149376392364502, "logps/chosen": -178.03355407714844, "logps/rejected": -191.28359985351562, "loss": 0.1928, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7658480405807495, "rewards/margins": 3.3343939781188965, "rewards/rejected": -4.100241661071777, "step": 874 }, { "epoch": 1.15, "learning_rate": 4.870790775274955e-05, "logits/chosen": -2.3600223064422607, "logits/rejected": -2.313652276992798, "logps/chosen": -202.82212829589844, "logps/rejected": -241.97523498535156, "loss": 0.1404, "rewards/accuracies": 0.875, "rewards/chosen": -1.3490352630615234, "rewards/margins": 3.479907512664795, "rewards/rejected": -4.828942775726318, "step": 875 }, { "epoch": 1.15, "learning_rate": 4.8704600287339205e-05, "logits/chosen": -2.272904396057129, "logits/rejected": -2.3197202682495117, "logps/chosen": -195.68267822265625, "logps/rejected": -230.81982421875, "loss": 0.1807, "rewards/accuracies": 0.875, "rewards/chosen": -1.507330060005188, "rewards/margins": 3.294886827468872, "rewards/rejected": -4.802216529846191, "step": 876 }, { "epoch": 1.15, "learning_rate": 4.870128870674082e-05, "logits/chosen": -2.062432289123535, "logits/rejected": -2.095883846282959, "logps/chosen": -177.28150939941406, "logps/rejected": -238.36048889160156, "loss": 0.0998, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5119123458862305, "rewards/margins": 3.5114619731903076, "rewards/rejected": -5.023374080657959, "step": 877 }, { "epoch": 1.15, "learning_rate": 4.869797301152931e-05, "logits/chosen": -2.322573184967041, "logits/rejected": -2.352694511413574, "logps/chosen": -171.1867218017578, "logps/rejected": -209.19857788085938, "loss": 0.1745, "rewards/accuracies": 1.0, "rewards/chosen": -1.4840812683105469, "rewards/margins": 3.0589983463287354, "rewards/rejected": -4.543079376220703, "step": 878 }, { "epoch": 1.15, "learning_rate": 4.869465320228028e-05, "logits/chosen": -1.5731091499328613, "logits/rejected": -1.5635374784469604, "logps/chosen": -162.3439483642578, "logps/rejected": -218.9515380859375, "loss": 0.1648, "rewards/accuracies": 0.875, "rewards/chosen": -1.3453707695007324, "rewards/margins": 3.3265371322631836, "rewards/rejected": -4.671907901763916, "step": 879 }, { "epoch": 1.15, "learning_rate": 4.869132927957007e-05, "logits/chosen": -2.057164192199707, "logits/rejected": -2.169400930404663, "logps/chosen": -146.08116149902344, "logps/rejected": -217.79507446289062, "loss": 0.1879, "rewards/accuracies": 0.875, "rewards/chosen": -1.3128044605255127, "rewards/margins": 4.181766033172607, "rewards/rejected": -5.494570732116699, "step": 880 }, { "epoch": 1.15, "learning_rate": 4.86880012439757e-05, "logits/chosen": -2.09201979637146, "logits/rejected": -2.123706817626953, "logps/chosen": -205.8859100341797, "logps/rejected": -238.71531677246094, "loss": 0.2388, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5409486293792725, "rewards/margins": 2.8045778274536133, "rewards/rejected": -4.345526695251465, "step": 881 }, { "epoch": 1.15, "learning_rate": 4.868466909607494e-05, "logits/chosen": -1.9597307443618774, "logits/rejected": -2.0664074420928955, "logps/chosen": -208.81736755371094, "logps/rejected": -236.41000366210938, "loss": 0.0614, "rewards/accuracies": 1.0, "rewards/chosen": -1.3827075958251953, "rewards/margins": 3.953075647354126, "rewards/rejected": -5.3357834815979, "step": 882 }, { "epoch": 1.16, "learning_rate": 4.868133283644627e-05, "logits/chosen": -2.1879220008850098, "logits/rejected": -2.2641048431396484, "logps/chosen": -195.26947021484375, "logps/rejected": -233.18475341796875, "loss": 0.1049, "rewards/accuracies": 0.9375, "rewards/chosen": -1.485203742980957, "rewards/margins": 3.5149152278900146, "rewards/rejected": -5.000119209289551, "step": 883 }, { "epoch": 1.16, "learning_rate": 4.867799246566887e-05, "logits/chosen": -1.873902440071106, "logits/rejected": -1.9271022081375122, "logps/chosen": -218.30856323242188, "logps/rejected": -250.31251525878906, "loss": 0.1605, "rewards/accuracies": 0.875, "rewards/chosen": -1.570319652557373, "rewards/margins": 4.087669372558594, "rewards/rejected": -5.657989025115967, "step": 884 }, { "epoch": 1.16, "learning_rate": 4.867464798432262e-05, "logits/chosen": -2.293184995651245, "logits/rejected": -2.348799705505371, "logps/chosen": -204.55638122558594, "logps/rejected": -239.99803161621094, "loss": 0.1635, "rewards/accuracies": 0.875, "rewards/chosen": -1.4229328632354736, "rewards/margins": 4.076297283172607, "rewards/rejected": -5.499229907989502, "step": 885 }, { "epoch": 1.16, "learning_rate": 4.8671299392988146e-05, "logits/chosen": -1.9167914390563965, "logits/rejected": -1.835463285446167, "logps/chosen": -228.05908203125, "logps/rejected": -258.6400146484375, "loss": 0.2577, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6862237453460693, "rewards/margins": 2.3948185443878174, "rewards/rejected": -4.081042289733887, "step": 886 }, { "epoch": 1.16, "learning_rate": 4.866794669224678e-05, "logits/chosen": -2.263591766357422, "logits/rejected": -2.2648370265960693, "logps/chosen": -323.7992248535156, "logps/rejected": -340.5594177246094, "loss": 0.0843, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0585577487945557, "rewards/margins": 3.989744186401367, "rewards/rejected": -5.048301696777344, "step": 887 }, { "epoch": 1.16, "learning_rate": 4.8664589882680544e-05, "logits/chosen": -2.3918349742889404, "logits/rejected": -2.4805490970611572, "logps/chosen": -153.11036682128906, "logps/rejected": -231.31317138671875, "loss": 0.2822, "rewards/accuracies": 0.75, "rewards/chosen": -0.8393661975860596, "rewards/margins": 3.1571884155273438, "rewards/rejected": -3.9965548515319824, "step": 888 }, { "epoch": 1.16, "learning_rate": 4.866122896487221e-05, "logits/chosen": -2.1840920448303223, "logits/rejected": -2.304276466369629, "logps/chosen": -167.58294677734375, "logps/rejected": -200.28634643554688, "loss": 0.1857, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8503859043121338, "rewards/margins": 3.1673898696899414, "rewards/rejected": -4.017776012420654, "step": 889 }, { "epoch": 1.16, "learning_rate": 4.865786393940522e-05, "logits/chosen": -2.069397211074829, "logits/rejected": -2.1860339641571045, "logps/chosen": -179.6785125732422, "logps/rejected": -228.13262939453125, "loss": 0.0932, "rewards/accuracies": 1.0, "rewards/chosen": -1.1931277513504028, "rewards/margins": 3.2066426277160645, "rewards/rejected": -4.399770736694336, "step": 890 }, { "epoch": 1.17, "learning_rate": 4.8654494806863775e-05, "logits/chosen": -2.1783037185668945, "logits/rejected": -2.2106220722198486, "logps/chosen": -193.16891479492188, "logps/rejected": -243.27835083007812, "loss": 0.2, "rewards/accuracies": 0.875, "rewards/chosen": -1.0907084941864014, "rewards/margins": 3.496738910675049, "rewards/rejected": -4.587447643280029, "step": 891 }, { "epoch": 1.17, "learning_rate": 4.865112156783275e-05, "logits/chosen": -2.294567584991455, "logits/rejected": -2.1514720916748047, "logps/chosen": -214.3270263671875, "logps/rejected": -187.45822143554688, "loss": 0.214, "rewards/accuracies": 1.0, "rewards/chosen": -1.0721594095230103, "rewards/margins": 2.2023167610168457, "rewards/rejected": -3.2744765281677246, "step": 892 }, { "epoch": 1.17, "learning_rate": 4.864774422289776e-05, "logits/chosen": -2.0853824615478516, "logits/rejected": -2.1526715755462646, "logps/chosen": -204.90089416503906, "logps/rejected": -246.01055908203125, "loss": 0.112, "rewards/accuracies": 1.0, "rewards/chosen": -0.11933372169733047, "rewards/margins": 4.104992866516113, "rewards/rejected": -4.2243266105651855, "step": 893 }, { "epoch": 1.17, "learning_rate": 4.864436277264511e-05, "logits/chosen": -2.1137301921844482, "logits/rejected": -2.1245357990264893, "logps/chosen": -159.5576171875, "logps/rejected": -167.99693298339844, "loss": 0.3612, "rewards/accuracies": 0.875, "rewards/chosen": -1.1939194202423096, "rewards/margins": 2.4540228843688965, "rewards/rejected": -3.647942543029785, "step": 894 }, { "epoch": 1.17, "learning_rate": 4.864097721766184e-05, "logits/chosen": -2.0749716758728027, "logits/rejected": -2.199066162109375, "logps/chosen": -196.0050048828125, "logps/rejected": -234.677001953125, "loss": 0.1027, "rewards/accuracies": 0.9375, "rewards/chosen": -0.997223973274231, "rewards/margins": 3.582221269607544, "rewards/rejected": -4.5794453620910645, "step": 895 }, { "epoch": 1.17, "learning_rate": 4.8637587558535695e-05, "logits/chosen": -2.2920432090759277, "logits/rejected": -2.3417181968688965, "logps/chosen": -236.25619506835938, "logps/rejected": -282.4396057128906, "loss": 0.2044, "rewards/accuracies": 0.8125, "rewards/chosen": -0.38354048132896423, "rewards/margins": 2.915595531463623, "rewards/rejected": -3.29913592338562, "step": 896 }, { "epoch": 1.17, "learning_rate": 4.863419379585512e-05, "logits/chosen": -2.2515666484832764, "logits/rejected": -2.2877609729766846, "logps/chosen": -189.43197631835938, "logps/rejected": -230.85414123535156, "loss": 0.1691, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3132818937301636, "rewards/margins": 3.6810238361358643, "rewards/rejected": -3.9943056106567383, "step": 897 }, { "epoch": 1.18, "learning_rate": 4.863079593020928e-05, "logits/chosen": -2.3358492851257324, "logits/rejected": -2.2418711185455322, "logps/chosen": -200.99757385253906, "logps/rejected": -208.28480529785156, "loss": 0.1435, "rewards/accuracies": 1.0, "rewards/chosen": -0.17593544721603394, "rewards/margins": 3.3050811290740967, "rewards/rejected": -3.4810166358947754, "step": 898 }, { "epoch": 1.18, "learning_rate": 4.8627393962188075e-05, "logits/chosen": -2.2615466117858887, "logits/rejected": -2.26615047454834, "logps/chosen": -265.6878662109375, "logps/rejected": -265.2508239746094, "loss": 0.2, "rewards/accuracies": 0.75, "rewards/chosen": -0.6857917904853821, "rewards/margins": 3.3371822834014893, "rewards/rejected": -4.022973537445068, "step": 899 }, { "epoch": 1.18, "learning_rate": 4.8623987892382066e-05, "logits/chosen": -2.3570547103881836, "logits/rejected": -2.3362598419189453, "logps/chosen": -199.10662841796875, "logps/rejected": -217.95907592773438, "loss": 0.111, "rewards/accuracies": 1.0, "rewards/chosen": -0.27905502915382385, "rewards/margins": 3.4710071086883545, "rewards/rejected": -3.7500622272491455, "step": 900 }, { "epoch": 1.18, "learning_rate": 4.862057772138258e-05, "logits/chosen": -2.2032244205474854, "logits/rejected": -2.2657198905944824, "logps/chosen": -213.52206420898438, "logps/rejected": -225.04454040527344, "loss": 0.1846, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8243767023086548, "rewards/margins": 3.046542167663574, "rewards/rejected": -3.8709182739257812, "step": 901 }, { "epoch": 1.18, "learning_rate": 4.861716344978162e-05, "logits/chosen": -2.394089698791504, "logits/rejected": -2.292032241821289, "logps/chosen": -234.7174835205078, "logps/rejected": -255.44989013671875, "loss": 0.1544, "rewards/accuracies": 0.875, "rewards/chosen": -0.5697602033615112, "rewards/margins": 3.2541327476501465, "rewards/rejected": -3.8238930702209473, "step": 902 }, { "epoch": 1.18, "learning_rate": 4.861374507817193e-05, "logits/chosen": -2.3428890705108643, "logits/rejected": -2.3085713386535645, "logps/chosen": -221.3970489501953, "logps/rejected": -264.0530090332031, "loss": 0.0761, "rewards/accuracies": 1.0, "rewards/chosen": -0.48718422651290894, "rewards/margins": 3.7521519660949707, "rewards/rejected": -4.239336013793945, "step": 903 }, { "epoch": 1.18, "learning_rate": 4.8610322607146926e-05, "logits/chosen": -2.417067766189575, "logits/rejected": -2.393913745880127, "logps/chosen": -178.41981506347656, "logps/rejected": -186.7463836669922, "loss": 0.277, "rewards/accuracies": 0.875, "rewards/chosen": -0.6322096586227417, "rewards/margins": 1.9191558361053467, "rewards/rejected": -2.551365375518799, "step": 904 }, { "epoch": 1.18, "learning_rate": 4.8606896037300776e-05, "logits/chosen": -2.2859747409820557, "logits/rejected": -2.3659892082214355, "logps/chosen": -210.5970458984375, "logps/rejected": -241.645263671875, "loss": 0.0721, "rewards/accuracies": 1.0, "rewards/chosen": -0.2545009255409241, "rewards/margins": 3.632350444793701, "rewards/rejected": -3.8868515491485596, "step": 905 }, { "epoch": 1.19, "learning_rate": 4.860346536922834e-05, "logits/chosen": -2.369377374649048, "logits/rejected": -2.4897830486297607, "logps/chosen": -254.58274841308594, "logps/rejected": -319.5621643066406, "loss": 0.1598, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3183284401893616, "rewards/margins": 3.658812999725342, "rewards/rejected": -3.9771416187286377, "step": 906 }, { "epoch": 1.19, "learning_rate": 4.860003060352518e-05, "logits/chosen": -2.3488306999206543, "logits/rejected": -2.3815548419952393, "logps/chosen": -171.2317352294922, "logps/rejected": -204.7757568359375, "loss": 0.1461, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7481203675270081, "rewards/margins": 2.8177552223205566, "rewards/rejected": -3.56587553024292, "step": 907 }, { "epoch": 1.19, "learning_rate": 4.85965917407876e-05, "logits/chosen": -2.2379674911499023, "logits/rejected": -2.3881618976593018, "logps/chosen": -165.37860107421875, "logps/rejected": -203.4194793701172, "loss": 0.2971, "rewards/accuracies": 0.875, "rewards/chosen": -0.6805337071418762, "rewards/margins": 2.6565561294555664, "rewards/rejected": -3.337089776992798, "step": 908 }, { "epoch": 1.19, "learning_rate": 4.8593148781612587e-05, "logits/chosen": -2.630882740020752, "logits/rejected": -2.544029951095581, "logps/chosen": -201.74972534179688, "logps/rejected": -221.31356811523438, "loss": 0.1897, "rewards/accuracies": 0.875, "rewards/chosen": -0.4801185429096222, "rewards/margins": 3.1227259635925293, "rewards/rejected": -3.602844715118408, "step": 909 }, { "epoch": 1.19, "learning_rate": 4.858970172659785e-05, "logits/chosen": -2.405344247817993, "logits/rejected": -2.471182107925415, "logps/chosen": -232.18885803222656, "logps/rejected": -307.023681640625, "loss": 0.0831, "rewards/accuracies": 1.0, "rewards/chosen": -0.4412127435207367, "rewards/margins": 3.6795432567596436, "rewards/rejected": -4.12075662612915, "step": 910 }, { "epoch": 1.19, "learning_rate": 4.85862505763418e-05, "logits/chosen": -1.9770020246505737, "logits/rejected": -1.8537812232971191, "logps/chosen": -236.51913452148438, "logps/rejected": -222.99220275878906, "loss": 0.2417, "rewards/accuracies": 0.875, "rewards/chosen": -0.4903448224067688, "rewards/margins": 2.971850872039795, "rewards/rejected": -3.46219539642334, "step": 911 }, { "epoch": 1.19, "learning_rate": 4.858279533144358e-05, "logits/chosen": -2.5417568683624268, "logits/rejected": -2.6035280227661133, "logps/chosen": -172.0741729736328, "logps/rejected": -214.9192657470703, "loss": 0.1949, "rewards/accuracies": 0.875, "rewards/chosen": -0.9094291925430298, "rewards/margins": 2.6431045532226562, "rewards/rejected": -3.5525336265563965, "step": 912 }, { "epoch": 1.19, "learning_rate": 4.857933599250303e-05, "logits/chosen": -2.342681407928467, "logits/rejected": -2.256180763244629, "logps/chosen": -196.65240478515625, "logps/rejected": -195.11648559570312, "loss": 0.1316, "rewards/accuracies": 1.0, "rewards/chosen": -0.5412741899490356, "rewards/margins": 3.1819822788238525, "rewards/rejected": -3.7232565879821777, "step": 913 }, { "epoch": 1.2, "learning_rate": 4.857587256012068e-05, "logits/chosen": -2.2213358879089355, "logits/rejected": -2.223132371902466, "logps/chosen": -217.80227661132812, "logps/rejected": -258.8191833496094, "loss": 0.2183, "rewards/accuracies": 0.9375, "rewards/chosen": -0.33522820472717285, "rewards/margins": 3.4243385791778564, "rewards/rejected": -3.759566307067871, "step": 914 }, { "epoch": 1.2, "learning_rate": 4.857240503489782e-05, "logits/chosen": -2.633331060409546, "logits/rejected": -2.5645124912261963, "logps/chosen": -200.99314880371094, "logps/rejected": -216.61944580078125, "loss": 0.0838, "rewards/accuracies": 1.0, "rewards/chosen": -0.03604121133685112, "rewards/margins": 3.366603136062622, "rewards/rejected": -3.402644157409668, "step": 915 }, { "epoch": 1.2, "learning_rate": 4.8568933417436416e-05, "logits/chosen": -2.60661244392395, "logits/rejected": -2.568995475769043, "logps/chosen": -229.74319458007812, "logps/rejected": -272.7928161621094, "loss": 0.0913, "rewards/accuracies": 1.0, "rewards/chosen": -0.10647392272949219, "rewards/margins": 4.324003219604492, "rewards/rejected": -4.430477142333984, "step": 916 }, { "epoch": 1.2, "learning_rate": 4.856545770833913e-05, "logits/chosen": -2.1530046463012695, "logits/rejected": -2.075482130050659, "logps/chosen": -218.10240173339844, "logps/rejected": -234.6465301513672, "loss": 0.166, "rewards/accuracies": 0.875, "rewards/chosen": 0.3659510612487793, "rewards/margins": 4.201175212860107, "rewards/rejected": -3.8352243900299072, "step": 917 }, { "epoch": 1.2, "learning_rate": 4.8561977908209384e-05, "logits/chosen": -2.322742462158203, "logits/rejected": -2.2862117290496826, "logps/chosen": -188.68533325195312, "logps/rejected": -230.53916931152344, "loss": 0.1539, "rewards/accuracies": 0.9375, "rewards/chosen": -0.2586498260498047, "rewards/margins": 3.89760684967041, "rewards/rejected": -4.156256198883057, "step": 918 }, { "epoch": 1.2, "learning_rate": 4.855849401765127e-05, "logits/chosen": -2.245067834854126, "logits/rejected": -2.264173984527588, "logps/chosen": -163.19442749023438, "logps/rejected": -244.39859008789062, "loss": 0.2118, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5528127551078796, "rewards/margins": 2.983473539352417, "rewards/rejected": -3.5362865924835205, "step": 919 }, { "epoch": 1.2, "learning_rate": 4.8555006037269594e-05, "logits/chosen": -2.4369189739227295, "logits/rejected": -2.4961891174316406, "logps/chosen": -199.59515380859375, "logps/rejected": -218.68148803710938, "loss": 0.2001, "rewards/accuracies": 0.9375, "rewards/chosen": -0.507201611995697, "rewards/margins": 2.782557487487793, "rewards/rejected": -3.2897589206695557, "step": 920 }, { "epoch": 1.21, "learning_rate": 4.855151396766988e-05, "logits/chosen": -2.557804584503174, "logits/rejected": -2.554647922515869, "logps/chosen": -195.2904815673828, "logps/rejected": -226.03387451171875, "loss": 0.1858, "rewards/accuracies": 0.9375, "rewards/chosen": 0.013599276542663574, "rewards/margins": 4.105103492736816, "rewards/rejected": -4.091504096984863, "step": 921 }, { "epoch": 1.21, "learning_rate": 4.854801780945837e-05, "logits/chosen": -1.9899704456329346, "logits/rejected": -1.9653894901275635, "logps/chosen": -201.0773468017578, "logps/rejected": -210.57366943359375, "loss": 0.247, "rewards/accuracies": 0.875, "rewards/chosen": -0.5728083252906799, "rewards/margins": 2.8485476970672607, "rewards/rejected": -3.421356201171875, "step": 922 }, { "epoch": 1.21, "learning_rate": 4.854451756324201e-05, "logits/chosen": -2.334390878677368, "logits/rejected": -2.2994754314422607, "logps/chosen": -231.74093627929688, "logps/rejected": -252.7148895263672, "loss": 0.2625, "rewards/accuracies": 0.875, "rewards/chosen": -0.6329984664916992, "rewards/margins": 2.753272533416748, "rewards/rejected": -3.386270523071289, "step": 923 }, { "epoch": 1.21, "learning_rate": 4.854101322962845e-05, "logits/chosen": -2.4350640773773193, "logits/rejected": -2.4160242080688477, "logps/chosen": -202.79510498046875, "logps/rejected": -196.56761169433594, "loss": 0.1783, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7683102488517761, "rewards/margins": 2.6973066329956055, "rewards/rejected": -3.4656169414520264, "step": 924 }, { "epoch": 1.21, "learning_rate": 4.853750480922604e-05, "logits/chosen": -2.4578471183776855, "logits/rejected": -2.4201016426086426, "logps/chosen": -236.7185516357422, "logps/rejected": -225.40760803222656, "loss": 0.1667, "rewards/accuracies": 0.875, "rewards/chosen": -0.3481481373310089, "rewards/margins": 3.050295352935791, "rewards/rejected": -3.3984434604644775, "step": 925 }, { "epoch": 1.21, "learning_rate": 4.853399230264387e-05, "logits/chosen": -2.5239510536193848, "logits/rejected": -2.5775818824768066, "logps/chosen": -139.86842346191406, "logps/rejected": -190.04673767089844, "loss": 0.3665, "rewards/accuracies": 0.75, "rewards/chosen": -0.7791432738304138, "rewards/margins": 1.9640579223632812, "rewards/rejected": -2.74320125579834, "step": 926 }, { "epoch": 1.21, "learning_rate": 4.853047571049171e-05, "logits/chosen": -2.3366544246673584, "logits/rejected": -2.2847678661346436, "logps/chosen": -184.1388702392578, "logps/rejected": -197.55262756347656, "loss": 0.1811, "rewards/accuracies": 0.875, "rewards/chosen": -0.3248026967048645, "rewards/margins": 2.802187919616699, "rewards/rejected": -3.126990795135498, "step": 927 }, { "epoch": 1.21, "learning_rate": 4.852695503338005e-05, "logits/chosen": -2.2322957515716553, "logits/rejected": -2.232264518737793, "logps/chosen": -218.7916259765625, "logps/rejected": -239.33642578125, "loss": 0.2817, "rewards/accuracies": 0.75, "rewards/chosen": -0.7828483581542969, "rewards/margins": 2.7246387004852295, "rewards/rejected": -3.5074868202209473, "step": 928 }, { "epoch": 1.22, "learning_rate": 4.85234302719201e-05, "logits/chosen": -2.3104605674743652, "logits/rejected": -2.292299509048462, "logps/chosen": -223.07472229003906, "logps/rejected": -220.38296508789062, "loss": 0.1399, "rewards/accuracies": 1.0, "rewards/chosen": -0.13662558794021606, "rewards/margins": 3.366118907928467, "rewards/rejected": -3.502744436264038, "step": 929 }, { "epoch": 1.22, "learning_rate": 4.851990142672376e-05, "logits/chosen": -2.27694034576416, "logits/rejected": -2.416929244995117, "logps/chosen": -163.55599975585938, "logps/rejected": -187.94540405273438, "loss": 0.1647, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6064982414245605, "rewards/margins": 2.823979616165161, "rewards/rejected": -3.4304776191711426, "step": 930 }, { "epoch": 1.22, "learning_rate": 4.8516368498403654e-05, "logits/chosen": -2.1894800662994385, "logits/rejected": -2.198641777038574, "logps/chosen": -192.32850646972656, "logps/rejected": -232.83926391601562, "loss": 0.2586, "rewards/accuracies": 0.875, "rewards/chosen": -0.7106565237045288, "rewards/margins": 2.401294231414795, "rewards/rejected": -3.1119508743286133, "step": 931 }, { "epoch": 1.22, "learning_rate": 4.85128314875731e-05, "logits/chosen": -2.2995123863220215, "logits/rejected": -2.458733320236206, "logps/chosen": -184.31101989746094, "logps/rejected": -263.00048828125, "loss": 0.0955, "rewards/accuracies": 0.9375, "rewards/chosen": -0.1651136875152588, "rewards/margins": 3.4938466548919678, "rewards/rejected": -3.6589601039886475, "step": 932 }, { "epoch": 1.22, "learning_rate": 4.850929039484614e-05, "logits/chosen": -2.3589913845062256, "logits/rejected": -2.4086785316467285, "logps/chosen": -147.282470703125, "logps/rejected": -214.50318908691406, "loss": 0.1216, "rewards/accuracies": 0.9375, "rewards/chosen": -0.45500433444976807, "rewards/margins": 3.236088275909424, "rewards/rejected": -3.6910927295684814, "step": 933 }, { "epoch": 1.22, "learning_rate": 4.850574522083753e-05, "logits/chosen": -2.222720146179199, "logits/rejected": -2.3172354698181152, "logps/chosen": -180.91482543945312, "logps/rejected": -231.47238159179688, "loss": 0.1583, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7558724284172058, "rewards/margins": 2.776618480682373, "rewards/rejected": -3.5324912071228027, "step": 934 }, { "epoch": 1.22, "learning_rate": 4.8502195966162694e-05, "logits/chosen": -2.328801393508911, "logits/rejected": -2.4272334575653076, "logps/chosen": -154.9918212890625, "logps/rejected": -246.27830505371094, "loss": 0.0796, "rewards/accuracies": 1.0, "rewards/chosen": -0.6289770007133484, "rewards/margins": 3.6553382873535156, "rewards/rejected": -4.28431510925293, "step": 935 }, { "epoch": 1.22, "learning_rate": 4.849864263143781e-05, "logits/chosen": -2.335493564605713, "logits/rejected": -2.3662097454071045, "logps/chosen": -163.90878295898438, "logps/rejected": -192.3372802734375, "loss": 0.2689, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5199804306030273, "rewards/margins": 2.6915531158447266, "rewards/rejected": -3.211533546447754, "step": 936 }, { "epoch": 1.23, "learning_rate": 4.8495085217279755e-05, "logits/chosen": -2.3108789920806885, "logits/rejected": -2.411635637283325, "logps/chosen": -178.46237182617188, "logps/rejected": -252.6321563720703, "loss": 0.0912, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3370632529258728, "rewards/margins": 3.8149256706237793, "rewards/rejected": -4.151988506317139, "step": 937 }, { "epoch": 1.23, "learning_rate": 4.849152372430609e-05, "logits/chosen": -2.3515238761901855, "logits/rejected": -2.4710798263549805, "logps/chosen": -186.52601623535156, "logps/rejected": -236.93490600585938, "loss": 0.1544, "rewards/accuracies": 0.8125, "rewards/chosen": -0.4036705791950226, "rewards/margins": 3.555159091949463, "rewards/rejected": -3.958829402923584, "step": 938 }, { "epoch": 1.23, "learning_rate": 4.848795815313511e-05, "logits/chosen": -2.0485763549804688, "logits/rejected": -2.0782063007354736, "logps/chosen": -190.405517578125, "logps/rejected": -239.02328491210938, "loss": 0.1048, "rewards/accuracies": 0.9375, "rewards/chosen": -0.12277688086032867, "rewards/margins": 3.6550817489624023, "rewards/rejected": -3.7778589725494385, "step": 939 }, { "epoch": 1.23, "learning_rate": 4.8484388504385806e-05, "logits/chosen": -2.2379536628723145, "logits/rejected": -2.215625762939453, "logps/chosen": -169.50054931640625, "logps/rejected": -222.1637420654297, "loss": 0.2537, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6230670809745789, "rewards/margins": 2.660698413848877, "rewards/rejected": -3.2837653160095215, "step": 940 }, { "epoch": 1.23, "learning_rate": 4.8480814778677886e-05, "logits/chosen": -2.2735724449157715, "logits/rejected": -2.31823468208313, "logps/chosen": -181.47183227539062, "logps/rejected": -204.40936279296875, "loss": 0.2299, "rewards/accuracies": 0.875, "rewards/chosen": -0.45316997170448303, "rewards/margins": 3.5822412967681885, "rewards/rejected": -4.035411357879639, "step": 941 }, { "epoch": 1.23, "learning_rate": 4.847723697663175e-05, "logits/chosen": -2.504059076309204, "logits/rejected": -2.55910325050354, "logps/chosen": -228.25503540039062, "logps/rejected": -302.8750305175781, "loss": 0.0823, "rewards/accuracies": 1.0, "rewards/chosen": -0.8190544843673706, "rewards/margins": 3.961775541305542, "rewards/rejected": -4.780829906463623, "step": 942 }, { "epoch": 1.23, "learning_rate": 4.847365509886851e-05, "logits/chosen": -2.3396451473236084, "logits/rejected": -2.4200096130371094, "logps/chosen": -202.72042846679688, "logps/rejected": -239.9745635986328, "loss": 0.0795, "rewards/accuracies": 1.0, "rewards/chosen": -0.4111427068710327, "rewards/margins": 3.3438093662261963, "rewards/rejected": -3.7549519538879395, "step": 943 }, { "epoch": 1.24, "learning_rate": 4.847006914601001e-05, "logits/chosen": -2.4212615489959717, "logits/rejected": -2.48210072517395, "logps/chosen": -213.84280395507812, "logps/rejected": -260.6656188964844, "loss": 0.1249, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8076397776603699, "rewards/margins": 3.792834997177124, "rewards/rejected": -4.600475311279297, "step": 944 }, { "epoch": 1.24, "learning_rate": 4.8466479118678766e-05, "logits/chosen": -2.2285969257354736, "logits/rejected": -2.315225124359131, "logps/chosen": -187.538330078125, "logps/rejected": -268.7898254394531, "loss": 0.2034, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2221934795379639, "rewards/margins": 3.516084671020508, "rewards/rejected": -4.738278388977051, "step": 945 }, { "epoch": 1.24, "learning_rate": 4.846288501749802e-05, "logits/chosen": -2.5156943798065186, "logits/rejected": -2.5410516262054443, "logps/chosen": -222.49359130859375, "logps/rejected": -282.3955993652344, "loss": 0.1176, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7577163577079773, "rewards/margins": 4.017005920410156, "rewards/rejected": -4.774722576141357, "step": 946 }, { "epoch": 1.24, "learning_rate": 4.8459286843091724e-05, "logits/chosen": -2.2465834617614746, "logits/rejected": -2.3958494663238525, "logps/chosen": -205.92015075683594, "logps/rejected": -250.27149963378906, "loss": 0.1785, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3019455671310425, "rewards/margins": 3.5620269775390625, "rewards/rejected": -4.8639726638793945, "step": 947 }, { "epoch": 1.24, "learning_rate": 4.845568459608453e-05, "logits/chosen": -2.2198760509490967, "logits/rejected": -2.3069236278533936, "logps/chosen": -164.4407196044922, "logps/rejected": -227.1414794921875, "loss": 0.19, "rewards/accuracies": 0.875, "rewards/chosen": -0.6457600593566895, "rewards/margins": 3.604132652282715, "rewards/rejected": -4.249892711639404, "step": 948 }, { "epoch": 1.24, "learning_rate": 4.8452078277101783e-05, "logits/chosen": -2.1330924034118652, "logits/rejected": -2.2624382972717285, "logps/chosen": -141.55337524414062, "logps/rejected": -193.1168670654297, "loss": 0.18, "rewards/accuracies": 0.875, "rewards/chosen": -0.4278246760368347, "rewards/margins": 3.105529308319092, "rewards/rejected": -3.5333540439605713, "step": 949 }, { "epoch": 1.24, "learning_rate": 4.8448467886769585e-05, "logits/chosen": -1.9669086933135986, "logits/rejected": -2.0849437713623047, "logps/chosen": -181.0906982421875, "logps/rejected": -237.8538055419922, "loss": 0.1504, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0563127994537354, "rewards/margins": 3.4143903255462646, "rewards/rejected": -4.470703125, "step": 950 }, { "epoch": 1.24, "learning_rate": 4.844485342571468e-05, "logits/chosen": -1.7133499383926392, "logits/rejected": -1.701385259628296, "logps/chosen": -169.70852661132812, "logps/rejected": -198.37237548828125, "loss": 0.4077, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4771201610565186, "rewards/margins": 2.8859434127807617, "rewards/rejected": -4.363063335418701, "step": 951 }, { "epoch": 1.25, "learning_rate": 4.844123489456456e-05, "logits/chosen": -2.262361526489258, "logits/rejected": -2.4248523712158203, "logps/chosen": -198.08021545410156, "logps/rejected": -303.7059631347656, "loss": 0.1379, "rewards/accuracies": 0.875, "rewards/chosen": -1.432203769683838, "rewards/margins": 3.657777786254883, "rewards/rejected": -5.089981555938721, "step": 952 }, { "epoch": 1.25, "learning_rate": 4.843761229394741e-05, "logits/chosen": -2.3017332553863525, "logits/rejected": -2.291661500930786, "logps/chosen": -189.6435089111328, "logps/rejected": -246.8826141357422, "loss": 0.0758, "rewards/accuracies": 1.0, "rewards/chosen": -0.8683138489723206, "rewards/margins": 4.264334678649902, "rewards/rejected": -5.132648468017578, "step": 953 }, { "epoch": 1.25, "learning_rate": 4.843398562449212e-05, "logits/chosen": -1.9980626106262207, "logits/rejected": -1.996732473373413, "logps/chosen": -190.13116455078125, "logps/rejected": -222.26028442382812, "loss": 0.2367, "rewards/accuracies": 0.9375, "rewards/chosen": -1.423780918121338, "rewards/margins": 2.6461732387542725, "rewards/rejected": -4.069953918457031, "step": 954 }, { "epoch": 1.25, "learning_rate": 4.84303548868283e-05, "logits/chosen": -2.3841123580932617, "logits/rejected": -2.439812660217285, "logps/chosen": -231.7444610595703, "logps/rejected": -276.234619140625, "loss": 0.1215, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5399219989776611, "rewards/margins": 3.503429651260376, "rewards/rejected": -5.043351650238037, "step": 955 }, { "epoch": 1.25, "learning_rate": 4.842672008158625e-05, "logits/chosen": -2.028545379638672, "logits/rejected": -2.0214412212371826, "logps/chosen": -144.1702880859375, "logps/rejected": -193.11929321289062, "loss": 0.3515, "rewards/accuracies": 0.6875, "rewards/chosen": -1.5417052507400513, "rewards/margins": 2.0481746196746826, "rewards/rejected": -3.5898799896240234, "step": 956 }, { "epoch": 1.25, "learning_rate": 4.842308120939699e-05, "logits/chosen": -2.440342426300049, "logits/rejected": -2.4507193565368652, "logps/chosen": -215.12884521484375, "logps/rejected": -226.1699981689453, "loss": 0.1973, "rewards/accuracies": 0.875, "rewards/chosen": -1.6147558689117432, "rewards/margins": 3.0818982124328613, "rewards/rejected": -4.696654319763184, "step": 957 }, { "epoch": 1.25, "learning_rate": 4.8419438270892226e-05, "logits/chosen": -2.179013252258301, "logits/rejected": -2.0757102966308594, "logps/chosen": -157.2231903076172, "logps/rejected": -176.61666870117188, "loss": 0.2506, "rewards/accuracies": 1.0, "rewards/chosen": -1.5769833326339722, "rewards/margins": 2.5216851234436035, "rewards/rejected": -4.098668098449707, "step": 958 }, { "epoch": 1.26, "learning_rate": 4.8415791266704404e-05, "logits/chosen": -2.1697206497192383, "logits/rejected": -2.1985995769500732, "logps/chosen": -197.36570739746094, "logps/rejected": -242.4941864013672, "loss": 0.217, "rewards/accuracies": 0.875, "rewards/chosen": -1.0622727870941162, "rewards/margins": 3.3413608074188232, "rewards/rejected": -4.4036335945129395, "step": 959 }, { "epoch": 1.26, "learning_rate": 4.841214019746663e-05, "logits/chosen": -2.132883310317993, "logits/rejected": -2.2006301879882812, "logps/chosen": -195.13247680664062, "logps/rejected": -226.83316040039062, "loss": 0.2752, "rewards/accuracies": 0.9375, "rewards/chosen": -1.512275218963623, "rewards/margins": 2.76702880859375, "rewards/rejected": -4.279304027557373, "step": 960 }, { "epoch": 1.26, "learning_rate": 4.840848506381276e-05, "logits/chosen": -2.286079168319702, "logits/rejected": -2.3662781715393066, "logps/chosen": -201.76133728027344, "logps/rejected": -226.46603393554688, "loss": 0.1871, "rewards/accuracies": 0.875, "rewards/chosen": -1.301385521888733, "rewards/margins": 3.0335183143615723, "rewards/rejected": -4.334904193878174, "step": 961 }, { "epoch": 1.26, "learning_rate": 4.840482586637732e-05, "logits/chosen": -2.354762554168701, "logits/rejected": -2.3351876735687256, "logps/chosen": -213.7999267578125, "logps/rejected": -268.6205139160156, "loss": 0.3769, "rewards/accuracies": 0.875, "rewards/chosen": -1.8948153257369995, "rewards/margins": 2.8223719596862793, "rewards/rejected": -4.71718692779541, "step": 962 }, { "epoch": 1.26, "learning_rate": 4.840116260579557e-05, "logits/chosen": -2.3253631591796875, "logits/rejected": -2.4139952659606934, "logps/chosen": -196.48281860351562, "logps/rejected": -258.4675598144531, "loss": 0.0903, "rewards/accuracies": 1.0, "rewards/chosen": -1.2855195999145508, "rewards/margins": 4.241164684295654, "rewards/rejected": -5.526684284210205, "step": 963 }, { "epoch": 1.26, "learning_rate": 4.839749528270345e-05, "logits/chosen": -2.1117494106292725, "logits/rejected": -2.233508586883545, "logps/chosen": -162.23635864257812, "logps/rejected": -216.20143127441406, "loss": 0.1694, "rewards/accuracies": 1.0, "rewards/chosen": -1.412933349609375, "rewards/margins": 3.0781798362731934, "rewards/rejected": -4.49111270904541, "step": 964 }, { "epoch": 1.26, "learning_rate": 4.8393823897737634e-05, "logits/chosen": -1.8761091232299805, "logits/rejected": -1.8636384010314941, "logps/chosen": -202.1988525390625, "logps/rejected": -255.47442626953125, "loss": 0.1544, "rewards/accuracies": 0.875, "rewards/chosen": -1.0524611473083496, "rewards/margins": 3.7740285396575928, "rewards/rejected": -4.8264899253845215, "step": 965 }, { "epoch": 1.26, "learning_rate": 4.839014845153548e-05, "logits/chosen": -2.455805778503418, "logits/rejected": -2.4408648014068604, "logps/chosen": -265.5133056640625, "logps/rejected": -322.55694580078125, "loss": 0.1826, "rewards/accuracies": 0.875, "rewards/chosen": -1.1125422716140747, "rewards/margins": 3.8850979804992676, "rewards/rejected": -4.997640132904053, "step": 966 }, { "epoch": 1.27, "learning_rate": 4.838646894473505e-05, "logits/chosen": -1.7058061361312866, "logits/rejected": -1.7635576725006104, "logps/chosen": -178.57357788085938, "logps/rejected": -224.11141967773438, "loss": 0.2088, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7308789491653442, "rewards/margins": 2.972151279449463, "rewards/rejected": -4.703030586242676, "step": 967 }, { "epoch": 1.27, "learning_rate": 4.8382785377975116e-05, "logits/chosen": -2.187974452972412, "logits/rejected": -2.2810630798339844, "logps/chosen": -189.5592803955078, "logps/rejected": -282.3250732421875, "loss": 0.2646, "rewards/accuracies": 0.75, "rewards/chosen": -1.5101513862609863, "rewards/margins": 3.0083746910095215, "rewards/rejected": -4.518526077270508, "step": 968 }, { "epoch": 1.27, "learning_rate": 4.8379097751895166e-05, "logits/chosen": -2.3393805027008057, "logits/rejected": -2.3446669578552246, "logps/chosen": -280.5021667480469, "logps/rejected": -287.1352233886719, "loss": 0.1423, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8774653077125549, "rewards/margins": 3.9873814582824707, "rewards/rejected": -4.864847183227539, "step": 969 }, { "epoch": 1.27, "learning_rate": 4.837540606713538e-05, "logits/chosen": -2.3770768642425537, "logits/rejected": -2.3758389949798584, "logps/chosen": -198.17864990234375, "logps/rejected": -225.19105529785156, "loss": 0.1787, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3526650667190552, "rewards/margins": 2.9090161323547363, "rewards/rejected": -4.26168155670166, "step": 970 }, { "epoch": 1.27, "learning_rate": 4.837171032433663e-05, "logits/chosen": -2.226538896560669, "logits/rejected": -2.3716554641723633, "logps/chosen": -181.35006713867188, "logps/rejected": -279.17645263671875, "loss": 0.0983, "rewards/accuracies": 1.0, "rewards/chosen": -0.6791074275970459, "rewards/margins": 4.5800275802612305, "rewards/rejected": -5.259134292602539, "step": 971 }, { "epoch": 1.27, "learning_rate": 4.836801052414053e-05, "logits/chosen": -1.950883388519287, "logits/rejected": -2.080726146697998, "logps/chosen": -170.36874389648438, "logps/rejected": -215.67941284179688, "loss": 0.2373, "rewards/accuracies": 0.8125, "rewards/chosen": -1.109346866607666, "rewards/margins": 2.8225221633911133, "rewards/rejected": -3.9318690299987793, "step": 972 }, { "epoch": 1.27, "learning_rate": 4.8364306667189364e-05, "logits/chosen": -2.23186993598938, "logits/rejected": -2.1386542320251465, "logps/chosen": -180.2755126953125, "logps/rejected": -230.13412475585938, "loss": 0.112, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8742038011550903, "rewards/margins": 3.692150115966797, "rewards/rejected": -4.566353797912598, "step": 973 }, { "epoch": 1.27, "learning_rate": 4.836059875412613e-05, "logits/chosen": -2.152693748474121, "logits/rejected": -2.1664419174194336, "logps/chosen": -197.43896484375, "logps/rejected": -206.69210815429688, "loss": 0.2823, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0961377620697021, "rewards/margins": 3.136141538619995, "rewards/rejected": -4.2322797775268555, "step": 974 }, { "epoch": 1.28, "learning_rate": 4.8356886785594544e-05, "logits/chosen": -2.3830044269561768, "logits/rejected": -2.392028331756592, "logps/chosen": -191.5318603515625, "logps/rejected": -222.13458251953125, "loss": 0.082, "rewards/accuracies": 1.0, "rewards/chosen": -0.9227029085159302, "rewards/margins": 4.220012664794922, "rewards/rejected": -5.142715930938721, "step": 975 }, { "epoch": 1.28, "learning_rate": 4.835317076223901e-05, "logits/chosen": -2.298764228820801, "logits/rejected": -2.360914468765259, "logps/chosen": -238.4506378173828, "logps/rejected": -276.4215087890625, "loss": 0.1926, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0809506177902222, "rewards/margins": 3.495990037918091, "rewards/rejected": -4.576941013336182, "step": 976 }, { "epoch": 1.28, "learning_rate": 4.834945068470463e-05, "logits/chosen": -2.3851685523986816, "logits/rejected": -2.402043342590332, "logps/chosen": -215.42071533203125, "logps/rejected": -260.2996520996094, "loss": 0.1705, "rewards/accuracies": 0.875, "rewards/chosen": -0.8322378396987915, "rewards/margins": 3.276536703109741, "rewards/rejected": -4.1087751388549805, "step": 977 }, { "epoch": 1.28, "learning_rate": 4.834572655363723e-05, "logits/chosen": -2.3521459102630615, "logits/rejected": -2.359070301055908, "logps/chosen": -172.63397216796875, "logps/rejected": -249.02813720703125, "loss": 0.1009, "rewards/accuracies": 0.875, "rewards/chosen": -0.6295977830886841, "rewards/margins": 4.736910343170166, "rewards/rejected": -5.3665080070495605, "step": 978 }, { "epoch": 1.28, "learning_rate": 4.834199836968334e-05, "logits/chosen": -2.232576847076416, "logits/rejected": -2.2787835597991943, "logps/chosen": -212.65316772460938, "logps/rejected": -277.7938537597656, "loss": 0.1771, "rewards/accuracies": 0.875, "rewards/chosen": -0.9311662912368774, "rewards/margins": 4.264695644378662, "rewards/rejected": -5.19586181640625, "step": 979 }, { "epoch": 1.28, "learning_rate": 4.833826613349016e-05, "logits/chosen": -2.2517361640930176, "logits/rejected": -2.284072160720825, "logps/chosen": -220.1957550048828, "logps/rejected": -214.6461181640625, "loss": 0.2048, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8580664396286011, "rewards/margins": 3.004542589187622, "rewards/rejected": -3.8626086711883545, "step": 980 }, { "epoch": 1.28, "learning_rate": 4.8334529845705635e-05, "logits/chosen": -2.3734512329101562, "logits/rejected": -2.3852882385253906, "logps/chosen": -185.3246612548828, "logps/rejected": -235.97036743164062, "loss": 0.0987, "rewards/accuracies": 1.0, "rewards/chosen": -0.8764241337776184, "rewards/margins": 3.519378900527954, "rewards/rejected": -4.395802974700928, "step": 981 }, { "epoch": 1.29, "learning_rate": 4.833078950697839e-05, "logits/chosen": -2.318061113357544, "logits/rejected": -2.3952810764312744, "logps/chosen": -203.9101104736328, "logps/rejected": -264.8881530761719, "loss": 0.1093, "rewards/accuracies": 1.0, "rewards/chosen": -1.5848926305770874, "rewards/margins": 3.398397207260132, "rewards/rejected": -4.98328971862793, "step": 982 }, { "epoch": 1.29, "learning_rate": 4.832704511795776e-05, "logits/chosen": -2.257204294204712, "logits/rejected": -2.243340015411377, "logps/chosen": -220.27609252929688, "logps/rejected": -302.30224609375, "loss": 0.0464, "rewards/accuracies": 1.0, "rewards/chosen": -0.7187144756317139, "rewards/margins": 5.5636396408081055, "rewards/rejected": -6.282354354858398, "step": 983 }, { "epoch": 1.29, "learning_rate": 4.8323296679293774e-05, "logits/chosen": -2.0718610286712646, "logits/rejected": -2.163663148880005, "logps/chosen": -201.58529663085938, "logps/rejected": -284.6639099121094, "loss": 0.186, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4365129470825195, "rewards/margins": 3.119493007659912, "rewards/rejected": -4.55600643157959, "step": 984 }, { "epoch": 1.29, "learning_rate": 4.831954419163719e-05, "logits/chosen": -2.378993034362793, "logits/rejected": -2.4145779609680176, "logps/chosen": -194.3405303955078, "logps/rejected": -258.2578430175781, "loss": 0.1569, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0772855281829834, "rewards/margins": 3.5224766731262207, "rewards/rejected": -4.599761962890625, "step": 985 }, { "epoch": 1.29, "learning_rate": 4.8315787655639436e-05, "logits/chosen": -2.158259391784668, "logits/rejected": -2.242321729660034, "logps/chosen": -172.10662841796875, "logps/rejected": -238.54212951660156, "loss": 0.1116, "rewards/accuracies": 1.0, "rewards/chosen": -1.0470584630966187, "rewards/margins": 3.594759225845337, "rewards/rejected": -4.641817569732666, "step": 986 }, { "epoch": 1.29, "learning_rate": 4.8312027071952645e-05, "logits/chosen": -2.3166654109954834, "logits/rejected": -2.371119499206543, "logps/chosen": -201.59706115722656, "logps/rejected": -208.94171142578125, "loss": 0.1019, "rewards/accuracies": 1.0, "rewards/chosen": -1.161280632019043, "rewards/margins": 3.300464630126953, "rewards/rejected": -4.461745262145996, "step": 987 }, { "epoch": 1.29, "learning_rate": 4.8308262441229704e-05, "logits/chosen": -2.368316888809204, "logits/rejected": -2.464874029159546, "logps/chosen": -227.5435333251953, "logps/rejected": -269.635009765625, "loss": 0.1186, "rewards/accuracies": 1.0, "rewards/chosen": -1.2794337272644043, "rewards/margins": 3.7162654399871826, "rewards/rejected": -4.995699882507324, "step": 988 }, { "epoch": 1.29, "learning_rate": 4.830449376412413e-05, "logits/chosen": -2.4263885021209717, "logits/rejected": -2.330914258956909, "logps/chosen": -174.01873779296875, "logps/rejected": -176.23687744140625, "loss": 0.2376, "rewards/accuracies": 0.875, "rewards/chosen": -1.1324949264526367, "rewards/margins": 2.7193217277526855, "rewards/rejected": -3.8518168926239014, "step": 989 }, { "epoch": 1.3, "learning_rate": 4.8300721041290194e-05, "logits/chosen": -2.482351303100586, "logits/rejected": -2.511679172515869, "logps/chosen": -258.9258117675781, "logps/rejected": -289.4918212890625, "loss": 0.0891, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4783955812454224, "rewards/margins": 3.5242300033569336, "rewards/rejected": -5.002625465393066, "step": 990 }, { "epoch": 1.3, "learning_rate": 4.829694427338284e-05, "logits/chosen": -2.210787773132324, "logits/rejected": -2.2695932388305664, "logps/chosen": -242.7936553955078, "logps/rejected": -285.85845947265625, "loss": 0.0737, "rewards/accuracies": 1.0, "rewards/chosen": -1.1135588884353638, "rewards/margins": 4.644796371459961, "rewards/rejected": -5.758354663848877, "step": 991 }, { "epoch": 1.3, "learning_rate": 4.829316346105773e-05, "logits/chosen": -2.5152206420898438, "logits/rejected": -2.598931312561035, "logps/chosen": -233.4760284423828, "logps/rejected": -257.0856628417969, "loss": 0.1722, "rewards/accuracies": 1.0, "rewards/chosen": -1.4175981283187866, "rewards/margins": 3.751023530960083, "rewards/rejected": -5.168622016906738, "step": 992 }, { "epoch": 1.3, "learning_rate": 4.828937860497124e-05, "logits/chosen": -1.9849010705947876, "logits/rejected": -2.02589750289917, "logps/chosen": -202.5492706298828, "logps/rejected": -240.67250061035156, "loss": 0.2435, "rewards/accuracies": 0.875, "rewards/chosen": -1.1498560905456543, "rewards/margins": 2.9630770683288574, "rewards/rejected": -4.112933158874512, "step": 993 }, { "epoch": 1.3, "learning_rate": 4.82855897057804e-05, "logits/chosen": -2.2394447326660156, "logits/rejected": -2.2644007205963135, "logps/chosen": -200.177978515625, "logps/rejected": -232.31613159179688, "loss": 0.1656, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2112644910812378, "rewards/margins": 3.5165750980377197, "rewards/rejected": -4.727839469909668, "step": 994 }, { "epoch": 1.3, "learning_rate": 4.8281796764143e-05, "logits/chosen": -2.1232497692108154, "logits/rejected": -2.1741788387298584, "logps/chosen": -187.11965942382812, "logps/rejected": -211.57391357421875, "loss": 0.0946, "rewards/accuracies": 1.0, "rewards/chosen": -0.9103628396987915, "rewards/margins": 3.7156426906585693, "rewards/rejected": -4.62600564956665, "step": 995 }, { "epoch": 1.3, "learning_rate": 4.8277999780717496e-05, "logits/chosen": -2.3636045455932617, "logits/rejected": -2.3993918895721436, "logps/chosen": -209.7058868408203, "logps/rejected": -273.6556396484375, "loss": 0.1253, "rewards/accuracies": 0.9375, "rewards/chosen": -1.197162389755249, "rewards/margins": 4.630073547363281, "rewards/rejected": -5.827235698699951, "step": 996 }, { "epoch": 1.3, "learning_rate": 4.827419875616306e-05, "logits/chosen": -2.4287328720092773, "logits/rejected": -2.444946765899658, "logps/chosen": -187.06781005859375, "logps/rejected": -227.0997314453125, "loss": 0.3696, "rewards/accuracies": 0.6875, "rewards/chosen": -1.5417091846466064, "rewards/margins": 3.166154623031616, "rewards/rejected": -4.707863807678223, "step": 997 }, { "epoch": 1.31, "learning_rate": 4.827039369113956e-05, "logits/chosen": -2.3056039810180664, "logits/rejected": -2.3643782138824463, "logps/chosen": -218.290283203125, "logps/rejected": -289.4818420410156, "loss": 0.0622, "rewards/accuracies": 1.0, "rewards/chosen": -1.1654748916625977, "rewards/margins": 4.145327091217041, "rewards/rejected": -5.310801982879639, "step": 998 }, { "epoch": 1.31, "learning_rate": 4.826658458630756e-05, "logits/chosen": -2.3559858798980713, "logits/rejected": -2.4298417568206787, "logps/chosen": -279.0400695800781, "logps/rejected": -276.64044189453125, "loss": 0.1911, "rewards/accuracies": 0.875, "rewards/chosen": -1.6909762620925903, "rewards/margins": 3.3049442768096924, "rewards/rejected": -4.995920658111572, "step": 999 }, { "epoch": 1.31, "learning_rate": 4.826277144232834e-05, "logits/chosen": -2.3466007709503174, "logits/rejected": -2.376455068588257, "logps/chosen": -215.84930419921875, "logps/rejected": -249.32769775390625, "loss": 0.1875, "rewards/accuracies": 0.875, "rewards/chosen": -1.2421891689300537, "rewards/margins": 4.225651264190674, "rewards/rejected": -5.467840194702148, "step": 1000 }, { "epoch": 1.31, "learning_rate": 4.825895425986386e-05, "logits/chosen": -2.164769172668457, "logits/rejected": -2.1837494373321533, "logps/chosen": -250.56695556640625, "logps/rejected": -270.6837463378906, "loss": 0.1681, "rewards/accuracies": 0.875, "rewards/chosen": -1.4974141120910645, "rewards/margins": 3.5974225997924805, "rewards/rejected": -5.094837188720703, "step": 1001 }, { "epoch": 1.31, "learning_rate": 4.82551330395768e-05, "logits/chosen": -2.248732328414917, "logits/rejected": -2.308931589126587, "logps/chosen": -226.01544189453125, "logps/rejected": -276.2228698730469, "loss": 0.1258, "rewards/accuracies": 0.875, "rewards/chosen": -1.646399736404419, "rewards/margins": 3.5327749252319336, "rewards/rejected": -5.179174423217773, "step": 1002 }, { "epoch": 1.31, "learning_rate": 4.825130778213055e-05, "logits/chosen": -1.9366941452026367, "logits/rejected": -1.9974042177200317, "logps/chosen": -248.90155029296875, "logps/rejected": -284.5340576171875, "loss": 0.1246, "rewards/accuracies": 1.0, "rewards/chosen": -1.5386114120483398, "rewards/margins": 4.703797340393066, "rewards/rejected": -6.242408752441406, "step": 1003 }, { "epoch": 1.31, "learning_rate": 4.8247478488189176e-05, "logits/chosen": -2.381505012512207, "logits/rejected": -2.374134063720703, "logps/chosen": -215.34832763671875, "logps/rejected": -241.09263610839844, "loss": 0.1462, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2310172319412231, "rewards/margins": 3.3694674968719482, "rewards/rejected": -4.600484848022461, "step": 1004 }, { "epoch": 1.32, "learning_rate": 4.824364515841745e-05, "logits/chosen": -2.39587664604187, "logits/rejected": -2.445971727371216, "logps/chosen": -199.75390625, "logps/rejected": -250.50318908691406, "loss": 0.1496, "rewards/accuracies": 0.875, "rewards/chosen": -1.2829456329345703, "rewards/margins": 3.5297467708587646, "rewards/rejected": -4.812692642211914, "step": 1005 }, { "epoch": 1.32, "learning_rate": 4.8239807793480845e-05, "logits/chosen": -2.064811944961548, "logits/rejected": -2.2144877910614014, "logps/chosen": -206.80836486816406, "logps/rejected": -274.880859375, "loss": 0.0629, "rewards/accuracies": 1.0, "rewards/chosen": -1.6409518718719482, "rewards/margins": 4.403507709503174, "rewards/rejected": -6.044458866119385, "step": 1006 }, { "epoch": 1.32, "learning_rate": 4.823596639404555e-05, "logits/chosen": -1.9515126943588257, "logits/rejected": -1.9221199750900269, "logps/chosen": -183.7147216796875, "logps/rejected": -199.12330627441406, "loss": 0.2176, "rewards/accuracies": 0.875, "rewards/chosen": -1.4174538850784302, "rewards/margins": 2.61428165435791, "rewards/rejected": -4.031735420227051, "step": 1007 }, { "epoch": 1.32, "learning_rate": 4.8232120960778444e-05, "logits/chosen": -2.1660711765289307, "logits/rejected": -2.1986541748046875, "logps/chosen": -219.20086669921875, "logps/rejected": -308.8476867675781, "loss": 0.1746, "rewards/accuracies": 1.0, "rewards/chosen": -1.5317251682281494, "rewards/margins": 4.233796119689941, "rewards/rejected": -5.765522003173828, "step": 1008 }, { "epoch": 1.32, "learning_rate": 4.822827149434709e-05, "logits/chosen": -2.3264455795288086, "logits/rejected": -2.340402126312256, "logps/chosen": -206.29624938964844, "logps/rejected": -227.3938446044922, "loss": 0.2069, "rewards/accuracies": 0.875, "rewards/chosen": -1.2022267580032349, "rewards/margins": 3.670454263687134, "rewards/rejected": -4.872681140899658, "step": 1009 }, { "epoch": 1.32, "learning_rate": 4.822441799541979e-05, "logits/chosen": -1.881190538406372, "logits/rejected": -1.9560871124267578, "logps/chosen": -176.16571044921875, "logps/rejected": -228.5875244140625, "loss": 0.3731, "rewards/accuracies": 0.6875, "rewards/chosen": -1.7666243314743042, "rewards/margins": 2.6788949966430664, "rewards/rejected": -4.44551944732666, "step": 1010 }, { "epoch": 1.32, "learning_rate": 4.82205604646655e-05, "logits/chosen": -1.936165690422058, "logits/rejected": -1.9999370574951172, "logps/chosen": -175.9329833984375, "logps/rejected": -251.57110595703125, "loss": 0.1323, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2743474245071411, "rewards/margins": 4.712624549865723, "rewards/rejected": -5.986972332000732, "step": 1011 }, { "epoch": 1.32, "learning_rate": 4.8216698902753915e-05, "logits/chosen": -2.4014575481414795, "logits/rejected": -2.3882806301116943, "logps/chosen": -235.85289001464844, "logps/rejected": -276.8750915527344, "loss": 0.1432, "rewards/accuracies": 0.875, "rewards/chosen": -1.5504268407821655, "rewards/margins": 4.225334644317627, "rewards/rejected": -5.775761127471924, "step": 1012 }, { "epoch": 1.33, "learning_rate": 4.8212833310355397e-05, "logits/chosen": -2.2567152976989746, "logits/rejected": -2.307617425918579, "logps/chosen": -250.41036987304688, "logps/rejected": -298.62841796875, "loss": 0.1228, "rewards/accuracies": 0.875, "rewards/chosen": -1.641356110572815, "rewards/margins": 3.9862754344940186, "rewards/rejected": -5.627632141113281, "step": 1013 }, { "epoch": 1.33, "learning_rate": 4.820896368814104e-05, "logits/chosen": -2.222958564758301, "logits/rejected": -2.243933916091919, "logps/chosen": -144.96896362304688, "logps/rejected": -212.0235595703125, "loss": 0.115, "rewards/accuracies": 1.0, "rewards/chosen": -0.9734640121459961, "rewards/margins": 3.973369598388672, "rewards/rejected": -4.946833610534668, "step": 1014 }, { "epoch": 1.33, "learning_rate": 4.8205090036782626e-05, "logits/chosen": -2.0626208782196045, "logits/rejected": -2.0410847663879395, "logps/chosen": -171.70816040039062, "logps/rejected": -231.00027465820312, "loss": 0.1491, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3891879320144653, "rewards/margins": 3.029548406600952, "rewards/rejected": -4.418736457824707, "step": 1015 }, { "epoch": 1.33, "learning_rate": 4.820121235695261e-05, "logits/chosen": -2.0706686973571777, "logits/rejected": -2.0150628089904785, "logps/chosen": -160.81874084472656, "logps/rejected": -187.8358917236328, "loss": 0.2422, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6058461666107178, "rewards/margins": 2.6923718452453613, "rewards/rejected": -4.298218250274658, "step": 1016 }, { "epoch": 1.33, "learning_rate": 4.8197330649324184e-05, "logits/chosen": -2.411139965057373, "logits/rejected": -2.2218780517578125, "logps/chosen": -143.00469970703125, "logps/rejected": -175.34141540527344, "loss": 0.2557, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3263788223266602, "rewards/margins": 3.335325241088867, "rewards/rejected": -4.661704063415527, "step": 1017 }, { "epoch": 1.33, "learning_rate": 4.819344491457122e-05, "logits/chosen": -2.058603048324585, "logits/rejected": -2.1647167205810547, "logps/chosen": -168.77676391601562, "logps/rejected": -233.14918518066406, "loss": 0.1883, "rewards/accuracies": 0.875, "rewards/chosen": -2.0997314453125, "rewards/margins": 3.1378016471862793, "rewards/rejected": -5.2375335693359375, "step": 1018 }, { "epoch": 1.33, "learning_rate": 4.818955515336829e-05, "logits/chosen": -2.3335368633270264, "logits/rejected": -2.343855381011963, "logps/chosen": -220.887939453125, "logps/rejected": -275.09100341796875, "loss": 0.2941, "rewards/accuracies": 0.75, "rewards/chosen": -1.5219594240188599, "rewards/margins": 4.077702045440674, "rewards/rejected": -5.599661350250244, "step": 1019 }, { "epoch": 1.33, "learning_rate": 4.8185661366390676e-05, "logits/chosen": -2.0001189708709717, "logits/rejected": -2.10128116607666, "logps/chosen": -237.78350830078125, "logps/rejected": -302.6692810058594, "loss": 0.2217, "rewards/accuracies": 0.8125, "rewards/chosen": -3.2568397521972656, "rewards/margins": 3.168138027191162, "rewards/rejected": -6.4249773025512695, "step": 1020 }, { "epoch": 1.34, "learning_rate": 4.8181763554314345e-05, "logits/chosen": -2.1002883911132812, "logits/rejected": -2.1318533420562744, "logps/chosen": -215.0624237060547, "logps/rejected": -272.5454406738281, "loss": 0.0886, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1878482103347778, "rewards/margins": 5.048128128051758, "rewards/rejected": -6.2359771728515625, "step": 1021 }, { "epoch": 1.34, "learning_rate": 4.8177861717815976e-05, "logits/chosen": -2.43477463722229, "logits/rejected": -2.367626905441284, "logps/chosen": -170.18011474609375, "logps/rejected": -194.78160095214844, "loss": 0.2638, "rewards/accuracies": 0.875, "rewards/chosen": -1.694594383239746, "rewards/margins": 2.889676094055176, "rewards/rejected": -4.584270000457764, "step": 1022 }, { "epoch": 1.34, "learning_rate": 4.8173955857572924e-05, "logits/chosen": -2.2585108280181885, "logits/rejected": -2.2841644287109375, "logps/chosen": -221.76345825195312, "logps/rejected": -282.0390930175781, "loss": 0.123, "rewards/accuracies": 0.875, "rewards/chosen": -1.552736759185791, "rewards/margins": 4.385633945465088, "rewards/rejected": -5.938370704650879, "step": 1023 }, { "epoch": 1.34, "learning_rate": 4.817004597426327e-05, "logits/chosen": -2.252422332763672, "logits/rejected": -2.369584083557129, "logps/chosen": -198.19515991210938, "logps/rejected": -256.17254638671875, "loss": 0.0797, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3669312000274658, "rewards/margins": 4.838632583618164, "rewards/rejected": -6.205563545227051, "step": 1024 }, { "epoch": 1.34, "learning_rate": 4.816613206856577e-05, "logits/chosen": -1.7100903987884521, "logits/rejected": -1.7708444595336914, "logps/chosen": -224.86050415039062, "logps/rejected": -296.8453674316406, "loss": 0.1638, "rewards/accuracies": 0.9375, "rewards/chosen": -1.458321452140808, "rewards/margins": 4.833914756774902, "rewards/rejected": -6.292235851287842, "step": 1025 }, { "epoch": 1.34, "learning_rate": 4.8162214141159914e-05, "logits/chosen": -2.167717933654785, "logits/rejected": -2.2409541606903076, "logps/chosen": -137.0130615234375, "logps/rejected": -195.80404663085938, "loss": 0.2602, "rewards/accuracies": 0.875, "rewards/chosen": -1.234320878982544, "rewards/margins": 3.5150794982910156, "rewards/rejected": -4.749400615692139, "step": 1026 }, { "epoch": 1.34, "learning_rate": 4.815829219272584e-05, "logits/chosen": -2.1251039505004883, "logits/rejected": -2.1560146808624268, "logps/chosen": -217.72801208496094, "logps/rejected": -259.56439208984375, "loss": 0.1545, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7046087980270386, "rewards/margins": 3.3975937366485596, "rewards/rejected": -5.102202415466309, "step": 1027 }, { "epoch": 1.35, "learning_rate": 4.815436622394441e-05, "logits/chosen": -2.1002719402313232, "logits/rejected": -2.108283042907715, "logps/chosen": -123.21249389648438, "logps/rejected": -177.59942626953125, "loss": 0.334, "rewards/accuracies": 0.8125, "rewards/chosen": -1.118762493133545, "rewards/margins": 2.3827548027038574, "rewards/rejected": -3.5015172958374023, "step": 1028 }, { "epoch": 1.35, "learning_rate": 4.815043623549721e-05, "logits/chosen": -1.9848045110702515, "logits/rejected": -2.0297982692718506, "logps/chosen": -191.30886840820312, "logps/rejected": -238.38487243652344, "loss": 0.1696, "rewards/accuracies": 0.875, "rewards/chosen": -1.3014191389083862, "rewards/margins": 3.1689629554748535, "rewards/rejected": -4.470382213592529, "step": 1029 }, { "epoch": 1.35, "learning_rate": 4.814650222806647e-05, "logits/chosen": -2.1499392986297607, "logits/rejected": -2.2093987464904785, "logps/chosen": -200.4322509765625, "logps/rejected": -266.98614501953125, "loss": 0.1951, "rewards/accuracies": 0.875, "rewards/chosen": -1.3251994848251343, "rewards/margins": 3.7016544342041016, "rewards/rejected": -5.026854038238525, "step": 1030 }, { "epoch": 1.35, "learning_rate": 4.8142564202335155e-05, "logits/chosen": -2.294783115386963, "logits/rejected": -2.1916940212249756, "logps/chosen": -168.14263916015625, "logps/rejected": -178.0428466796875, "loss": 0.2519, "rewards/accuracies": 0.8125, "rewards/chosen": -1.3096660375595093, "rewards/margins": 2.79194974899292, "rewards/rejected": -4.101615905761719, "step": 1031 }, { "epoch": 1.35, "learning_rate": 4.813862215898692e-05, "logits/chosen": -2.161398410797119, "logits/rejected": -2.232086181640625, "logps/chosen": -197.67620849609375, "logps/rejected": -290.12420654296875, "loss": 0.1613, "rewards/accuracies": 0.9375, "rewards/chosen": -1.239977478981018, "rewards/margins": 3.1511924266815186, "rewards/rejected": -4.391169548034668, "step": 1032 }, { "epoch": 1.35, "learning_rate": 4.813467609870611e-05, "logits/chosen": -2.2221438884735107, "logits/rejected": -2.1995956897735596, "logps/chosen": -224.52113342285156, "logps/rejected": -237.9773712158203, "loss": 0.0758, "rewards/accuracies": 1.0, "rewards/chosen": -1.0239717960357666, "rewards/margins": 4.245767116546631, "rewards/rejected": -5.269738674163818, "step": 1033 }, { "epoch": 1.35, "learning_rate": 4.813072602217778e-05, "logits/chosen": -2.0670764446258545, "logits/rejected": -2.113513469696045, "logps/chosen": -233.91201782226562, "logps/rejected": -259.1944885253906, "loss": 0.2214, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0860904455184937, "rewards/margins": 4.289690971374512, "rewards/rejected": -5.375781059265137, "step": 1034 }, { "epoch": 1.35, "learning_rate": 4.8126771930087674e-05, "logits/chosen": -2.3435795307159424, "logits/rejected": -2.508849620819092, "logps/chosen": -174.04713439941406, "logps/rejected": -249.47862243652344, "loss": 0.2356, "rewards/accuracies": 0.875, "rewards/chosen": -1.164699673652649, "rewards/margins": 4.200926303863525, "rewards/rejected": -5.365626335144043, "step": 1035 }, { "epoch": 1.36, "learning_rate": 4.8122813823122225e-05, "logits/chosen": -2.4261474609375, "logits/rejected": -2.459974527359009, "logps/chosen": -207.60198974609375, "logps/rejected": -263.3089294433594, "loss": 0.1271, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1393440961837769, "rewards/margins": 4.021913528442383, "rewards/rejected": -5.161257266998291, "step": 1036 }, { "epoch": 1.36, "learning_rate": 4.8118851701968584e-05, "logits/chosen": -2.2980728149414062, "logits/rejected": -2.3554952144622803, "logps/chosen": -187.2412872314453, "logps/rejected": -224.6619873046875, "loss": 0.126, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9625990390777588, "rewards/margins": 3.2137532234191895, "rewards/rejected": -4.176352500915527, "step": 1037 }, { "epoch": 1.36, "learning_rate": 4.811488556731457e-05, "logits/chosen": -1.995209813117981, "logits/rejected": -2.0629470348358154, "logps/chosen": -194.26153564453125, "logps/rejected": -252.84649658203125, "loss": 0.0712, "rewards/accuracies": 1.0, "rewards/chosen": -0.7154987454414368, "rewards/margins": 4.5765485763549805, "rewards/rejected": -5.292047023773193, "step": 1038 }, { "epoch": 1.36, "learning_rate": 4.8110915419848734e-05, "logits/chosen": -2.278034210205078, "logits/rejected": -2.3754935264587402, "logps/chosen": -180.0498809814453, "logps/rejected": -240.5858154296875, "loss": 0.1379, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1113700866699219, "rewards/margins": 3.6900339126586914, "rewards/rejected": -4.801403999328613, "step": 1039 }, { "epoch": 1.36, "learning_rate": 4.8106941260260296e-05, "logits/chosen": -2.460465908050537, "logits/rejected": -2.4936022758483887, "logps/chosen": -239.6426544189453, "logps/rejected": -292.16949462890625, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": -0.9402986764907837, "rewards/margins": 4.606878280639648, "rewards/rejected": -5.547176837921143, "step": 1040 }, { "epoch": 1.36, "learning_rate": 4.8102963089239185e-05, "logits/chosen": -2.076064348220825, "logits/rejected": -2.1251323223114014, "logps/chosen": -140.34951782226562, "logps/rejected": -206.3195343017578, "loss": 0.3204, "rewards/accuracies": 0.75, "rewards/chosen": -1.3504629135131836, "rewards/margins": 3.5889742374420166, "rewards/rejected": -4.939437389373779, "step": 1041 }, { "epoch": 1.36, "learning_rate": 4.809898090747601e-05, "logits/chosen": -2.289942502975464, "logits/rejected": -2.3287506103515625, "logps/chosen": -213.01470947265625, "logps/rejected": -265.6434020996094, "loss": 0.1479, "rewards/accuracies": 0.875, "rewards/chosen": -0.9047419428825378, "rewards/margins": 3.5847079753875732, "rewards/rejected": -4.489449977874756, "step": 1042 }, { "epoch": 1.36, "learning_rate": 4.809499471566211e-05, "logits/chosen": -2.0999417304992676, "logits/rejected": -2.199672222137451, "logps/chosen": -178.01663208007812, "logps/rejected": -248.11026000976562, "loss": 0.105, "rewards/accuracies": 1.0, "rewards/chosen": -0.8936255574226379, "rewards/margins": 4.213724613189697, "rewards/rejected": -5.107350826263428, "step": 1043 }, { "epoch": 1.37, "learning_rate": 4.809100451448949e-05, "logits/chosen": -2.2089450359344482, "logits/rejected": -2.1612417697906494, "logps/chosen": -157.15957641601562, "logps/rejected": -182.26841735839844, "loss": 0.2212, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1464567184448242, "rewards/margins": 3.457570791244507, "rewards/rejected": -4.604027271270752, "step": 1044 }, { "epoch": 1.37, "learning_rate": 4.8087010304650866e-05, "logits/chosen": -2.1104516983032227, "logits/rejected": -2.153797149658203, "logps/chosen": -222.3661651611328, "logps/rejected": -295.9107666015625, "loss": 0.0823, "rewards/accuracies": 1.0, "rewards/chosen": -0.4407971203327179, "rewards/margins": 4.490683078765869, "rewards/rejected": -4.9314799308776855, "step": 1045 }, { "epoch": 1.37, "learning_rate": 4.808301208683963e-05, "logits/chosen": -2.568639039993286, "logits/rejected": -2.5642662048339844, "logps/chosen": -239.6685333251953, "logps/rejected": -248.938720703125, "loss": 0.1162, "rewards/accuracies": 1.0, "rewards/chosen": -1.3235012292861938, "rewards/margins": 4.261366844177246, "rewards/rejected": -5.58486795425415, "step": 1046 }, { "epoch": 1.37, "learning_rate": 4.8079009861749904e-05, "logits/chosen": -2.3855719566345215, "logits/rejected": -2.369443893432617, "logps/chosen": -236.78912353515625, "logps/rejected": -273.231689453125, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": -1.4863171577453613, "rewards/margins": 4.7871551513671875, "rewards/rejected": -6.273472785949707, "step": 1047 }, { "epoch": 1.37, "learning_rate": 4.807500363007647e-05, "logits/chosen": -2.229086399078369, "logits/rejected": -2.2423181533813477, "logps/chosen": -165.6907958984375, "logps/rejected": -186.762451171875, "loss": 0.1962, "rewards/accuracies": 0.875, "rewards/chosen": -0.7180208563804626, "rewards/margins": 2.7720799446105957, "rewards/rejected": -3.490100860595703, "step": 1048 }, { "epoch": 1.37, "learning_rate": 4.8070993392514826e-05, "logits/chosen": -2.305532693862915, "logits/rejected": -2.322361469268799, "logps/chosen": -202.0149688720703, "logps/rejected": -243.12847900390625, "loss": 0.1414, "rewards/accuracies": 1.0, "rewards/chosen": -1.1048834323883057, "rewards/margins": 4.179681301116943, "rewards/rejected": -5.284564971923828, "step": 1049 }, { "epoch": 1.37, "learning_rate": 4.806697914976116e-05, "logits/chosen": -2.4315245151519775, "logits/rejected": -2.420684337615967, "logps/chosen": -206.34152221679688, "logps/rejected": -244.23727416992188, "loss": 0.171, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3189445734024048, "rewards/margins": 3.540530204772949, "rewards/rejected": -4.859475135803223, "step": 1050 }, { "epoch": 1.38, "learning_rate": 4.806296090251236e-05, "logits/chosen": -2.0065677165985107, "logits/rejected": -2.1579909324645996, "logps/chosen": -167.84841918945312, "logps/rejected": -246.53677368164062, "loss": 0.1576, "rewards/accuracies": 0.875, "rewards/chosen": -1.1663308143615723, "rewards/margins": 3.5337471961975098, "rewards/rejected": -4.70007848739624, "step": 1051 }, { "epoch": 1.38, "learning_rate": 4.805893865146601e-05, "logits/chosen": -2.1749401092529297, "logits/rejected": -2.2115371227264404, "logps/chosen": -235.0357208251953, "logps/rejected": -288.3175964355469, "loss": 0.1302, "rewards/accuracies": 0.875, "rewards/chosen": -1.5622204542160034, "rewards/margins": 3.5101852416992188, "rewards/rejected": -5.0724053382873535, "step": 1052 }, { "epoch": 1.38, "learning_rate": 4.805491239732037e-05, "logits/chosen": -2.3291127681732178, "logits/rejected": -2.414362668991089, "logps/chosen": -173.30560302734375, "logps/rejected": -301.0763854980469, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -1.7221314907073975, "rewards/margins": 6.185711860656738, "rewards/rejected": -7.907843589782715, "step": 1053 }, { "epoch": 1.38, "learning_rate": 4.8050882140774425e-05, "logits/chosen": -2.316331624984741, "logits/rejected": -2.383364677429199, "logps/chosen": -177.23760986328125, "logps/rejected": -202.95751953125, "loss": 0.2219, "rewards/accuracies": 0.8125, "rewards/chosen": -1.775866985321045, "rewards/margins": 2.098893404006958, "rewards/rejected": -3.874760150909424, "step": 1054 }, { "epoch": 1.38, "learning_rate": 4.8046847882527826e-05, "logits/chosen": -2.2674598693847656, "logits/rejected": -2.2072408199310303, "logps/chosen": -173.46591186523438, "logps/rejected": -221.97886657714844, "loss": 0.1288, "rewards/accuracies": 0.9375, "rewards/chosen": -1.116098403930664, "rewards/margins": 3.485488176345825, "rewards/rejected": -4.60158634185791, "step": 1055 }, { "epoch": 1.38, "learning_rate": 4.8042809623280946e-05, "logits/chosen": -2.2563393115997314, "logits/rejected": -2.3027093410491943, "logps/chosen": -178.65032958984375, "logps/rejected": -210.56443786621094, "loss": 0.2929, "rewards/accuracies": 0.875, "rewards/chosen": -1.6832330226898193, "rewards/margins": 3.0855979919433594, "rewards/rejected": -4.768831253051758, "step": 1056 }, { "epoch": 1.38, "learning_rate": 4.803876736373483e-05, "logits/chosen": -2.36661958694458, "logits/rejected": -2.373730421066284, "logps/chosen": -216.2982940673828, "logps/rejected": -270.1596374511719, "loss": 0.2312, "rewards/accuracies": 0.875, "rewards/chosen": -1.5078752040863037, "rewards/margins": 3.428542137145996, "rewards/rejected": -4.936417579650879, "step": 1057 }, { "epoch": 1.38, "learning_rate": 4.8034721104591215e-05, "logits/chosen": -2.025160074234009, "logits/rejected": -2.0265681743621826, "logps/chosen": -213.1846160888672, "logps/rejected": -244.3919677734375, "loss": 0.3887, "rewards/accuracies": 0.8125, "rewards/chosen": -1.756600022315979, "rewards/margins": 3.3049325942993164, "rewards/rejected": -5.061532497406006, "step": 1058 }, { "epoch": 1.39, "learning_rate": 4.803067084655257e-05, "logits/chosen": -2.212038278579712, "logits/rejected": -2.2199130058288574, "logps/chosen": -200.26730346679688, "logps/rejected": -226.02320861816406, "loss": 0.1888, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1899981498718262, "rewards/margins": 3.551909923553467, "rewards/rejected": -4.741908073425293, "step": 1059 }, { "epoch": 1.39, "learning_rate": 4.802661659032202e-05, "logits/chosen": -2.1756439208984375, "logits/rejected": -2.2646827697753906, "logps/chosen": -170.65829467773438, "logps/rejected": -227.00344848632812, "loss": 0.2532, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4983477592468262, "rewards/margins": 2.899458408355713, "rewards/rejected": -4.397806167602539, "step": 1060 }, { "epoch": 1.39, "learning_rate": 4.802255833660338e-05, "logits/chosen": -2.0890917778015137, "logits/rejected": -2.20941424369812, "logps/chosen": -194.01486206054688, "logps/rejected": -247.7682342529297, "loss": 0.1442, "rewards/accuracies": 1.0, "rewards/chosen": -1.1267577409744263, "rewards/margins": 4.959884166717529, "rewards/rejected": -6.086641788482666, "step": 1061 }, { "epoch": 1.39, "learning_rate": 4.8018496086101194e-05, "logits/chosen": -2.4077579975128174, "logits/rejected": -2.5162241458892822, "logps/chosen": -242.92034912109375, "logps/rejected": -266.2312316894531, "loss": 0.1722, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1819337606430054, "rewards/margins": 3.8653903007507324, "rewards/rejected": -5.047324180603027, "step": 1062 }, { "epoch": 1.39, "learning_rate": 4.801442983952067e-05, "logits/chosen": -2.206264019012451, "logits/rejected": -2.329880952835083, "logps/chosen": -178.778076171875, "logps/rejected": -243.94203186035156, "loss": 0.2205, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6328147649765015, "rewards/margins": 3.6644177436828613, "rewards/rejected": -5.297232627868652, "step": 1063 }, { "epoch": 1.39, "learning_rate": 4.8010359597567736e-05, "logits/chosen": -2.256436347961426, "logits/rejected": -2.3086161613464355, "logps/chosen": -194.04498291015625, "logps/rejected": -268.311279296875, "loss": 0.1495, "rewards/accuracies": 0.875, "rewards/chosen": -0.80282062292099, "rewards/margins": 4.107694625854492, "rewards/rejected": -4.910515308380127, "step": 1064 }, { "epoch": 1.39, "learning_rate": 4.8006285360948976e-05, "logits/chosen": -2.2415833473205566, "logits/rejected": -2.3424930572509766, "logps/chosen": -214.00341796875, "logps/rejected": -264.5972900390625, "loss": 0.1117, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2024343013763428, "rewards/margins": 3.891465425491333, "rewards/rejected": -5.093900203704834, "step": 1065 }, { "epoch": 1.4, "learning_rate": 4.8002207130371705e-05, "logits/chosen": -2.282310724258423, "logits/rejected": -2.3069472312927246, "logps/chosen": -175.22569274902344, "logps/rejected": -244.1259307861328, "loss": 0.1882, "rewards/accuracies": 0.875, "rewards/chosen": -1.2420991659164429, "rewards/margins": 3.709995985031128, "rewards/rejected": -4.9520955085754395, "step": 1066 }, { "epoch": 1.4, "learning_rate": 4.7998124906543906e-05, "logits/chosen": -2.2816646099090576, "logits/rejected": -2.3789501190185547, "logps/chosen": -186.17849731445312, "logps/rejected": -252.48399353027344, "loss": 0.226, "rewards/accuracies": 0.75, "rewards/chosen": -1.2462586164474487, "rewards/margins": 3.481846332550049, "rewards/rejected": -4.728104591369629, "step": 1067 }, { "epoch": 1.4, "learning_rate": 4.799403869017427e-05, "logits/chosen": -2.060042142868042, "logits/rejected": -2.231752395629883, "logps/chosen": -158.0439910888672, "logps/rejected": -194.29510498046875, "loss": 0.1857, "rewards/accuracies": 0.875, "rewards/chosen": -0.7000864744186401, "rewards/margins": 3.765867233276367, "rewards/rejected": -4.465953826904297, "step": 1068 }, { "epoch": 1.4, "learning_rate": 4.798994848197218e-05, "logits/chosen": -2.6301236152648926, "logits/rejected": -2.6716156005859375, "logps/chosen": -189.2099151611328, "logps/rejected": -236.8180694580078, "loss": 0.3166, "rewards/accuracies": 0.75, "rewards/chosen": -1.7804349660873413, "rewards/margins": 2.868950366973877, "rewards/rejected": -4.649385929107666, "step": 1069 }, { "epoch": 1.4, "learning_rate": 4.79858542826477e-05, "logits/chosen": -2.251466751098633, "logits/rejected": -2.363938808441162, "logps/chosen": -189.79736328125, "logps/rejected": -241.90504455566406, "loss": 0.2008, "rewards/accuracies": 0.875, "rewards/chosen": -1.2981109619140625, "rewards/margins": 3.1607394218444824, "rewards/rejected": -4.458850860595703, "step": 1070 }, { "epoch": 1.4, "learning_rate": 4.798175609291161e-05, "logits/chosen": -2.4197938442230225, "logits/rejected": -2.3917999267578125, "logps/chosen": -212.80386352539062, "logps/rejected": -250.47430419921875, "loss": 0.2364, "rewards/accuracies": 0.875, "rewards/chosen": -1.001808524131775, "rewards/margins": 3.6702208518981934, "rewards/rejected": -4.6720290184021, "step": 1071 }, { "epoch": 1.4, "learning_rate": 4.797765391347534e-05, "logits/chosen": -2.4398419857025146, "logits/rejected": -2.46700119972229, "logps/chosen": -183.11013793945312, "logps/rejected": -220.2843780517578, "loss": 0.1893, "rewards/accuracies": 0.875, "rewards/chosen": -0.9826763272285461, "rewards/margins": 3.509796142578125, "rewards/rejected": -4.4924726486206055, "step": 1072 }, { "epoch": 1.4, "learning_rate": 4.7973547745051074e-05, "logits/chosen": -2.4978957176208496, "logits/rejected": -2.5766334533691406, "logps/chosen": -219.82437133789062, "logps/rejected": -293.62359619140625, "loss": 0.146, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2996751070022583, "rewards/margins": 3.58370304107666, "rewards/rejected": -4.883378028869629, "step": 1073 }, { "epoch": 1.41, "learning_rate": 4.796943758835163e-05, "logits/chosen": -2.3775582313537598, "logits/rejected": -2.4787142276763916, "logps/chosen": -211.21597290039062, "logps/rejected": -254.6476287841797, "loss": 0.0379, "rewards/accuracies": 1.0, "rewards/chosen": -0.7991434335708618, "rewards/margins": 4.452718734741211, "rewards/rejected": -5.251862049102783, "step": 1074 }, { "epoch": 1.41, "learning_rate": 4.796532344409055e-05, "logits/chosen": -2.4883899688720703, "logits/rejected": -2.5110130310058594, "logps/chosen": -183.7747802734375, "logps/rejected": -197.6341552734375, "loss": 0.1991, "rewards/accuracies": 1.0, "rewards/chosen": -1.0323280096054077, "rewards/margins": 2.9341001510620117, "rewards/rejected": -3.966428279876709, "step": 1075 }, { "epoch": 1.41, "learning_rate": 4.796120531298206e-05, "logits/chosen": -2.338197946548462, "logits/rejected": -2.2645106315612793, "logps/chosen": -201.51806640625, "logps/rejected": -266.6871032714844, "loss": 0.1477, "rewards/accuracies": 1.0, "rewards/chosen": -0.9344640374183655, "rewards/margins": 3.64399790763855, "rewards/rejected": -4.57846212387085, "step": 1076 }, { "epoch": 1.41, "learning_rate": 4.795708319574109e-05, "logits/chosen": -2.5567445755004883, "logits/rejected": -2.5801315307617188, "logps/chosen": -264.55853271484375, "logps/rejected": -273.15447998046875, "loss": 0.1021, "rewards/accuracies": 0.9375, "rewards/chosen": -0.982978343963623, "rewards/margins": 4.4724040031433105, "rewards/rejected": -5.455382823944092, "step": 1077 }, { "epoch": 1.41, "learning_rate": 4.795295709308324e-05, "logits/chosen": -2.221895694732666, "logits/rejected": -2.1924304962158203, "logps/chosen": -199.84429931640625, "logps/rejected": -275.3081359863281, "loss": 0.1031, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0770325660705566, "rewards/margins": 4.073465347290039, "rewards/rejected": -5.150498390197754, "step": 1078 }, { "epoch": 1.41, "learning_rate": 4.7948827005724816e-05, "logits/chosen": -2.3237662315368652, "logits/rejected": -2.366284132003784, "logps/chosen": -159.32749938964844, "logps/rejected": -198.11053466796875, "loss": 0.2565, "rewards/accuracies": 0.875, "rewards/chosen": -1.221225380897522, "rewards/margins": 2.5992751121520996, "rewards/rejected": -3.820500373840332, "step": 1079 }, { "epoch": 1.41, "learning_rate": 4.794469293438282e-05, "logits/chosen": -2.443016767501831, "logits/rejected": -2.443124771118164, "logps/chosen": -185.0653076171875, "logps/rejected": -246.856689453125, "loss": 0.1283, "rewards/accuracies": 1.0, "rewards/chosen": -0.7620292901992798, "rewards/margins": 3.877666711807251, "rewards/rejected": -4.639695644378662, "step": 1080 }, { "epoch": 1.41, "learning_rate": 4.7940554879774925e-05, "logits/chosen": -2.181255578994751, "logits/rejected": -2.305636167526245, "logps/chosen": -266.7353210449219, "logps/rejected": -283.93853759765625, "loss": 0.2479, "rewards/accuracies": 0.875, "rewards/chosen": -0.8880407810211182, "rewards/margins": 3.7701759338378906, "rewards/rejected": -4.65821647644043, "step": 1081 }, { "epoch": 1.42, "learning_rate": 4.793641284261953e-05, "logits/chosen": -2.369758367538452, "logits/rejected": -2.4988489151000977, "logps/chosen": -183.67445373535156, "logps/rejected": -233.878173828125, "loss": 0.1349, "rewards/accuracies": 0.875, "rewards/chosen": -1.2128649950027466, "rewards/margins": 3.4675493240356445, "rewards/rejected": -4.680413722991943, "step": 1082 }, { "epoch": 1.42, "learning_rate": 4.793226682363568e-05, "logits/chosen": -2.573646068572998, "logits/rejected": -2.576871395111084, "logps/chosen": -166.67950439453125, "logps/rejected": -206.69732666015625, "loss": 0.2356, "rewards/accuracies": 0.75, "rewards/chosen": -0.9300564527511597, "rewards/margins": 3.6901583671569824, "rewards/rejected": -4.620214939117432, "step": 1083 }, { "epoch": 1.42, "learning_rate": 4.7928116823543155e-05, "logits/chosen": -2.3677217960357666, "logits/rejected": -2.405235528945923, "logps/chosen": -187.89686584472656, "logps/rejected": -253.44918823242188, "loss": 0.1928, "rewards/accuracies": 0.875, "rewards/chosen": -1.2416791915893555, "rewards/margins": 3.303443670272827, "rewards/rejected": -4.545123100280762, "step": 1084 }, { "epoch": 1.42, "learning_rate": 4.79239628430624e-05, "logits/chosen": -2.4083261489868164, "logits/rejected": -2.4456753730773926, "logps/chosen": -216.77603149414062, "logps/rejected": -276.124755859375, "loss": 0.0495, "rewards/accuracies": 1.0, "rewards/chosen": -1.0237876176834106, "rewards/margins": 4.667983531951904, "rewards/rejected": -5.691771030426025, "step": 1085 }, { "epoch": 1.42, "learning_rate": 4.791980488291456e-05, "logits/chosen": -2.082496404647827, "logits/rejected": -2.0862114429473877, "logps/chosen": -197.0124969482422, "logps/rejected": -200.89968872070312, "loss": 0.1189, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8787529468536377, "rewards/margins": 3.3207461833953857, "rewards/rejected": -4.199499130249023, "step": 1086 }, { "epoch": 1.42, "learning_rate": 4.791564294382147e-05, "logits/chosen": -2.251490831375122, "logits/rejected": -2.3277134895324707, "logps/chosen": -176.1337432861328, "logps/rejected": -240.8201904296875, "loss": 0.112, "rewards/accuracies": 1.0, "rewards/chosen": -0.4940316677093506, "rewards/margins": 4.046961784362793, "rewards/rejected": -4.5409932136535645, "step": 1087 }, { "epoch": 1.42, "learning_rate": 4.7911477026505654e-05, "logits/chosen": -2.273149251937866, "logits/rejected": -2.302190065383911, "logps/chosen": -198.4782257080078, "logps/rejected": -251.56442260742188, "loss": 0.1584, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0771201848983765, "rewards/margins": 4.303647994995117, "rewards/rejected": -5.380767822265625, "step": 1088 }, { "epoch": 1.43, "learning_rate": 4.790730713169033e-05, "logits/chosen": -2.35538649559021, "logits/rejected": -2.484128475189209, "logps/chosen": -200.436767578125, "logps/rejected": -264.001708984375, "loss": 0.088, "rewards/accuracies": 1.0, "rewards/chosen": -1.3317238092422485, "rewards/margins": 3.762491226196289, "rewards/rejected": -5.094214916229248, "step": 1089 }, { "epoch": 1.43, "learning_rate": 4.7903133260099385e-05, "logits/chosen": -2.3187928199768066, "logits/rejected": -2.248053550720215, "logps/chosen": -269.09881591796875, "logps/rejected": -317.9813232421875, "loss": 0.1617, "rewards/accuracies": 0.875, "rewards/chosen": -1.5169427394866943, "rewards/margins": 4.865277290344238, "rewards/rejected": -6.382220268249512, "step": 1090 }, { "epoch": 1.43, "learning_rate": 4.789895541245745e-05, "logits/chosen": -2.2053849697113037, "logits/rejected": -2.249541759490967, "logps/chosen": -211.03778076171875, "logps/rejected": -254.56468200683594, "loss": 0.1818, "rewards/accuracies": 0.875, "rewards/chosen": -1.5010100603103638, "rewards/margins": 3.9072840213775635, "rewards/rejected": -5.408294200897217, "step": 1091 }, { "epoch": 1.43, "learning_rate": 4.7894773589489775e-05, "logits/chosen": -2.25164794921875, "logits/rejected": -2.364285945892334, "logps/chosen": -188.2033233642578, "logps/rejected": -213.60580444335938, "loss": 0.1675, "rewards/accuracies": 0.875, "rewards/chosen": -1.2163310050964355, "rewards/margins": 3.9235103130340576, "rewards/rejected": -5.139841079711914, "step": 1092 }, { "epoch": 1.43, "learning_rate": 4.7890587791922364e-05, "logits/chosen": -2.3585565090179443, "logits/rejected": -2.3559505939483643, "logps/chosen": -183.8409423828125, "logps/rejected": -233.60438537597656, "loss": 0.2356, "rewards/accuracies": 0.875, "rewards/chosen": -1.3878989219665527, "rewards/margins": 2.798985719680786, "rewards/rejected": -4.186884880065918, "step": 1093 }, { "epoch": 1.43, "learning_rate": 4.788639802048187e-05, "logits/chosen": -2.4514660835266113, "logits/rejected": -2.468566417694092, "logps/chosen": -189.787841796875, "logps/rejected": -232.9893798828125, "loss": 0.1471, "rewards/accuracies": 1.0, "rewards/chosen": -1.1803812980651855, "rewards/margins": 3.827017307281494, "rewards/rejected": -5.00739860534668, "step": 1094 }, { "epoch": 1.43, "learning_rate": 4.788220427589566e-05, "logits/chosen": -2.278841257095337, "logits/rejected": -2.3293325901031494, "logps/chosen": -177.0409393310547, "logps/rejected": -221.64451599121094, "loss": 0.2064, "rewards/accuracies": 0.875, "rewards/chosen": -0.7956516742706299, "rewards/margins": 4.03370475769043, "rewards/rejected": -4.8293561935424805, "step": 1095 }, { "epoch": 1.43, "learning_rate": 4.787800655889176e-05, "logits/chosen": -2.2903358936309814, "logits/rejected": -2.3189077377319336, "logps/chosen": -185.59381103515625, "logps/rejected": -257.0823059082031, "loss": 0.178, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8907773494720459, "rewards/margins": 4.005603790283203, "rewards/rejected": -4.896381378173828, "step": 1096 }, { "epoch": 1.44, "learning_rate": 4.787380487019893e-05, "logits/chosen": -2.3953933715820312, "logits/rejected": -2.450338363647461, "logps/chosen": -179.10519409179688, "logps/rejected": -219.1466522216797, "loss": 0.1598, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0823649168014526, "rewards/margins": 3.8283493518829346, "rewards/rejected": -4.910714149475098, "step": 1097 }, { "epoch": 1.44, "learning_rate": 4.786959921054659e-05, "logits/chosen": -2.3358683586120605, "logits/rejected": -2.2801101207733154, "logps/chosen": -209.03997802734375, "logps/rejected": -293.61419677734375, "loss": 0.1063, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2742486000061035, "rewards/margins": 3.9310548305511475, "rewards/rejected": -5.205303192138672, "step": 1098 }, { "epoch": 1.44, "learning_rate": 4.7865389580664844e-05, "logits/chosen": -2.279576301574707, "logits/rejected": -2.350735664367676, "logps/chosen": -146.66612243652344, "logps/rejected": -216.35707092285156, "loss": 0.1297, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8987219929695129, "rewards/margins": 3.8822243213653564, "rewards/rejected": -4.780946254730225, "step": 1099 }, { "epoch": 1.44, "learning_rate": 4.78611759812845e-05, "logits/chosen": -2.109327554702759, "logits/rejected": -2.0962979793548584, "logps/chosen": -180.84161376953125, "logps/rejected": -209.8188018798828, "loss": 0.3155, "rewards/accuracies": 0.875, "rewards/chosen": -0.6152057647705078, "rewards/margins": 3.317700147628784, "rewards/rejected": -3.932906150817871, "step": 1100 }, { "epoch": 1.44, "learning_rate": 4.785695841313706e-05, "logits/chosen": -2.3722269535064697, "logits/rejected": -2.3200840950012207, "logps/chosen": -201.63294982910156, "logps/rejected": -254.41258239746094, "loss": 0.1841, "rewards/accuracies": 0.8125, "rewards/chosen": -1.35667884349823, "rewards/margins": 4.864499092102051, "rewards/rejected": -6.2211785316467285, "step": 1101 }, { "epoch": 1.44, "learning_rate": 4.785273687695469e-05, "logits/chosen": -2.3536105155944824, "logits/rejected": -2.4441490173339844, "logps/chosen": -174.35108947753906, "logps/rejected": -237.75894165039062, "loss": 0.116, "rewards/accuracies": 0.9375, "rewards/chosen": -1.450251579284668, "rewards/margins": 4.218486785888672, "rewards/rejected": -5.66873836517334, "step": 1102 }, { "epoch": 1.44, "learning_rate": 4.784851137347028e-05, "logits/chosen": -2.1573073863983154, "logits/rejected": -2.2236359119415283, "logps/chosen": -198.5815887451172, "logps/rejected": -232.4602813720703, "loss": 0.1035, "rewards/accuracies": 1.0, "rewards/chosen": -1.0396865606307983, "rewards/margins": 3.5235390663146973, "rewards/rejected": -4.563225269317627, "step": 1103 }, { "epoch": 1.44, "learning_rate": 4.7844281903417376e-05, "logits/chosen": -2.0304176807403564, "logits/rejected": -2.100639820098877, "logps/chosen": -198.72247314453125, "logps/rejected": -221.755615234375, "loss": 0.2035, "rewards/accuracies": 0.875, "rewards/chosen": -1.3415790796279907, "rewards/margins": 2.7209715843200684, "rewards/rejected": -4.0625505447387695, "step": 1104 }, { "epoch": 1.45, "learning_rate": 4.784004846753023e-05, "logits/chosen": -2.074061393737793, "logits/rejected": -2.121853828430176, "logps/chosen": -166.87387084960938, "logps/rejected": -207.54183959960938, "loss": 0.2033, "rewards/accuracies": 0.875, "rewards/chosen": -0.6906567215919495, "rewards/margins": 4.180294513702393, "rewards/rejected": -4.870951175689697, "step": 1105 }, { "epoch": 1.45, "learning_rate": 4.783581106654378e-05, "logits/chosen": -2.173704147338867, "logits/rejected": -2.194322109222412, "logps/chosen": -160.7693328857422, "logps/rejected": -202.16580200195312, "loss": 0.2204, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3304364681243896, "rewards/margins": 2.7709617614746094, "rewards/rejected": -4.101398468017578, "step": 1106 }, { "epoch": 1.45, "learning_rate": 4.783156970119366e-05, "logits/chosen": -1.8855363130569458, "logits/rejected": -1.914825677871704, "logps/chosen": -188.28408813476562, "logps/rejected": -240.01547241210938, "loss": 0.2478, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0375070571899414, "rewards/margins": 3.5554230213165283, "rewards/rejected": -4.592930793762207, "step": 1107 }, { "epoch": 1.45, "learning_rate": 4.782732437221616e-05, "logits/chosen": -2.5382192134857178, "logits/rejected": -2.4349591732025146, "logps/chosen": -257.2680969238281, "logps/rejected": -267.1951904296875, "loss": 0.1059, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0496553182601929, "rewards/margins": 4.491069316864014, "rewards/rejected": -5.540725231170654, "step": 1108 }, { "epoch": 1.45, "learning_rate": 4.78230750803483e-05, "logits/chosen": -2.325007200241089, "logits/rejected": -2.4198029041290283, "logps/chosen": -162.60813903808594, "logps/rejected": -221.90960693359375, "loss": 0.1349, "rewards/accuracies": 0.9375, "rewards/chosen": -0.49094444513320923, "rewards/margins": 3.7173869609832764, "rewards/rejected": -4.208331108093262, "step": 1109 }, { "epoch": 1.45, "learning_rate": 4.781882182632776e-05, "logits/chosen": -2.475273370742798, "logits/rejected": -2.397214412689209, "logps/chosen": -206.89404296875, "logps/rejected": -258.7071838378906, "loss": 0.1856, "rewards/accuracies": 0.875, "rewards/chosen": -0.805166482925415, "rewards/margins": 4.610111713409424, "rewards/rejected": -5.415278434753418, "step": 1110 }, { "epoch": 1.45, "learning_rate": 4.781456461089294e-05, "logits/chosen": -2.5161237716674805, "logits/rejected": -2.437736749649048, "logps/chosen": -248.8204345703125, "logps/rejected": -275.9759216308594, "loss": 0.1836, "rewards/accuracies": 0.875, "rewards/chosen": -1.067987322807312, "rewards/margins": 4.009271144866943, "rewards/rejected": -5.0772576332092285, "step": 1111 }, { "epoch": 1.46, "learning_rate": 4.781030343478288e-05, "logits/chosen": -2.2793309688568115, "logits/rejected": -2.3616416454315186, "logps/chosen": -213.34039306640625, "logps/rejected": -262.52398681640625, "loss": 0.1304, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2378281354904175, "rewards/margins": 4.064054489135742, "rewards/rejected": -5.301882743835449, "step": 1112 }, { "epoch": 1.46, "learning_rate": 4.780603829873733e-05, "logits/chosen": -2.2147018909454346, "logits/rejected": -2.4757885932922363, "logps/chosen": -275.67681884765625, "logps/rejected": -306.8128662109375, "loss": 0.1641, "rewards/accuracies": 0.875, "rewards/chosen": -1.911482810974121, "rewards/margins": 4.229495048522949, "rewards/rejected": -6.14097785949707, "step": 1113 }, { "epoch": 1.46, "learning_rate": 4.780176920349675e-05, "logits/chosen": -2.2692694664001465, "logits/rejected": -2.4222545623779297, "logps/chosen": -181.1542510986328, "logps/rejected": -243.48069763183594, "loss": 0.1874, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4185149669647217, "rewards/margins": 2.8215103149414062, "rewards/rejected": -4.240025520324707, "step": 1114 }, { "epoch": 1.46, "learning_rate": 4.7797496149802256e-05, "logits/chosen": -2.2589826583862305, "logits/rejected": -2.2955431938171387, "logps/chosen": -163.7628173828125, "logps/rejected": -257.77764892578125, "loss": 0.1802, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0072280168533325, "rewards/margins": 4.691400527954102, "rewards/rejected": -5.6986284255981445, "step": 1115 }, { "epoch": 1.46, "learning_rate": 4.779321913839566e-05, "logits/chosen": -2.281262159347534, "logits/rejected": -2.2819738388061523, "logps/chosen": -233.2173309326172, "logps/rejected": -272.6235656738281, "loss": 0.1662, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1393500566482544, "rewards/margins": 3.8536875247955322, "rewards/rejected": -4.993037700653076, "step": 1116 }, { "epoch": 1.46, "learning_rate": 4.778893817001948e-05, "logits/chosen": -2.2208681106567383, "logits/rejected": -2.1590828895568848, "logps/chosen": -179.7089080810547, "logps/rejected": -216.29774475097656, "loss": 0.1497, "rewards/accuracies": 0.9375, "rewards/chosen": -1.060105323791504, "rewards/margins": 3.382988214492798, "rewards/rejected": -4.443093776702881, "step": 1117 }, { "epoch": 1.46, "learning_rate": 4.7784653245416875e-05, "logits/chosen": -2.1777501106262207, "logits/rejected": -2.2831733226776123, "logps/chosen": -191.6259307861328, "logps/rejected": -239.21493530273438, "loss": 0.1255, "rewards/accuracies": 0.9375, "rewards/chosen": -1.195249080657959, "rewards/margins": 3.476778507232666, "rewards/rejected": -4.672027587890625, "step": 1118 }, { "epoch": 1.46, "learning_rate": 4.778036436533176e-05, "logits/chosen": -2.345694065093994, "logits/rejected": -2.400123357772827, "logps/chosen": -150.64157104492188, "logps/rejected": -217.12583923339844, "loss": 0.1818, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1695704460144043, "rewards/margins": 3.341207504272461, "rewards/rejected": -4.510777950286865, "step": 1119 }, { "epoch": 1.47, "learning_rate": 4.777607153050866e-05, "logits/chosen": -2.27506422996521, "logits/rejected": -2.3480772972106934, "logps/chosen": -226.47450256347656, "logps/rejected": -255.78909301757812, "loss": 0.1828, "rewards/accuracies": 0.875, "rewards/chosen": -1.286346673965454, "rewards/margins": 3.472963333129883, "rewards/rejected": -4.759309768676758, "step": 1120 }, { "epoch": 1.47, "learning_rate": 4.7771774741692844e-05, "logits/chosen": -2.4806315898895264, "logits/rejected": -2.420682430267334, "logps/chosen": -182.59005737304688, "logps/rejected": -220.8089599609375, "loss": 0.1512, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9431337118148804, "rewards/margins": 3.128722906112671, "rewards/rejected": -4.071856498718262, "step": 1121 }, { "epoch": 1.47, "learning_rate": 4.776747399963024e-05, "logits/chosen": -2.4254984855651855, "logits/rejected": -2.4268956184387207, "logps/chosen": -181.3052215576172, "logps/rejected": -244.41795349121094, "loss": 0.1868, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8446401953697205, "rewards/margins": 3.5034520626068115, "rewards/rejected": -4.348092555999756, "step": 1122 }, { "epoch": 1.47, "learning_rate": 4.776316930506747e-05, "logits/chosen": -2.3528339862823486, "logits/rejected": -2.518993616104126, "logps/chosen": -204.74118041992188, "logps/rejected": -297.0326843261719, "loss": 0.0927, "rewards/accuracies": 1.0, "rewards/chosen": -0.9256747364997864, "rewards/margins": 4.606964111328125, "rewards/rejected": -5.5326385498046875, "step": 1123 }, { "epoch": 1.47, "learning_rate": 4.775886065875185e-05, "logits/chosen": -2.428316593170166, "logits/rejected": -2.4729697704315186, "logps/chosen": -192.39427185058594, "logps/rejected": -225.5529327392578, "loss": 0.1996, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1067038774490356, "rewards/margins": 2.738870620727539, "rewards/rejected": -3.845574378967285, "step": 1124 }, { "epoch": 1.47, "learning_rate": 4.775454806143137e-05, "logits/chosen": -2.3744990825653076, "logits/rejected": -2.2757740020751953, "logps/chosen": -174.80218505859375, "logps/rejected": -327.66351318359375, "loss": 0.0678, "rewards/accuracies": 1.0, "rewards/chosen": -0.8998466730117798, "rewards/margins": 4.191055774688721, "rewards/rejected": -5.090902805328369, "step": 1125 }, { "epoch": 1.47, "learning_rate": 4.775023151385469e-05, "logits/chosen": -2.5411629676818848, "logits/rejected": -2.5592217445373535, "logps/chosen": -221.6334228515625, "logps/rejected": -263.060546875, "loss": 0.1147, "rewards/accuracies": 0.9375, "rewards/chosen": -1.669825553894043, "rewards/margins": 3.485670804977417, "rewards/rejected": -5.155496597290039, "step": 1126 }, { "epoch": 1.47, "learning_rate": 4.774591101677121e-05, "logits/chosen": -2.322519540786743, "logits/rejected": -2.3820464611053467, "logps/chosen": -185.3223876953125, "logps/rejected": -212.63223266601562, "loss": 0.2022, "rewards/accuracies": 0.875, "rewards/chosen": -1.0957611799240112, "rewards/margins": 2.9892659187316895, "rewards/rejected": -4.085027694702148, "step": 1127 }, { "epoch": 1.48, "learning_rate": 4.774158657093095e-05, "logits/chosen": -2.4653425216674805, "logits/rejected": -2.5697944164276123, "logps/chosen": -164.1728973388672, "logps/rejected": -223.85023498535156, "loss": 0.1749, "rewards/accuracies": 0.875, "rewards/chosen": -1.4075347185134888, "rewards/margins": 3.1060991287231445, "rewards/rejected": -4.513633728027344, "step": 1128 }, { "epoch": 1.48, "learning_rate": 4.7737258177084665e-05, "logits/chosen": -2.2479147911071777, "logits/rejected": -2.271726369857788, "logps/chosen": -226.23324584960938, "logps/rejected": -316.6651306152344, "loss": 0.0813, "rewards/accuracies": 0.9375, "rewards/chosen": -0.880527138710022, "rewards/margins": 5.674856662750244, "rewards/rejected": -6.555383682250977, "step": 1129 }, { "epoch": 1.48, "learning_rate": 4.7732925835983775e-05, "logits/chosen": -2.1202504634857178, "logits/rejected": -2.141789436340332, "logps/chosen": -218.8267822265625, "logps/rejected": -285.7803039550781, "loss": 0.1359, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9516122341156006, "rewards/margins": 4.8950653076171875, "rewards/rejected": -5.846677780151367, "step": 1130 }, { "epoch": 1.48, "learning_rate": 4.772858954838038e-05, "logits/chosen": -2.4971561431884766, "logits/rejected": -2.5220043659210205, "logps/chosen": -226.0542755126953, "logps/rejected": -299.23779296875, "loss": 0.1074, "rewards/accuracies": 1.0, "rewards/chosen": -0.871485710144043, "rewards/margins": 5.10019588470459, "rewards/rejected": -5.971682071685791, "step": 1131 }, { "epoch": 1.48, "learning_rate": 4.772424931502727e-05, "logits/chosen": -2.4149010181427, "logits/rejected": -2.431765556335449, "logps/chosen": -280.7022399902344, "logps/rejected": -361.54388427734375, "loss": 0.1054, "rewards/accuracies": 1.0, "rewards/chosen": -1.6145488023757935, "rewards/margins": 3.9257521629333496, "rewards/rejected": -5.5403008460998535, "step": 1132 }, { "epoch": 1.48, "learning_rate": 4.771990513667793e-05, "logits/chosen": -2.3434276580810547, "logits/rejected": -2.3973376750946045, "logps/chosen": -183.86436462402344, "logps/rejected": -243.69293212890625, "loss": 0.2449, "rewards/accuracies": 0.875, "rewards/chosen": -1.6164230108261108, "rewards/margins": 2.5519909858703613, "rewards/rejected": -4.168414115905762, "step": 1133 }, { "epoch": 1.48, "learning_rate": 4.771555701408652e-05, "logits/chosen": -2.5856552124023438, "logits/rejected": -2.4191064834594727, "logps/chosen": -237.94381713867188, "logps/rejected": -232.526611328125, "loss": 0.1426, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6252250671386719, "rewards/margins": 3.2301154136657715, "rewards/rejected": -3.8553402423858643, "step": 1134 }, { "epoch": 1.49, "learning_rate": 4.771120494800789e-05, "logits/chosen": -2.196472644805908, "logits/rejected": -2.1777446269989014, "logps/chosen": -198.78469848632812, "logps/rejected": -220.56103515625, "loss": 0.2213, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9573407173156738, "rewards/margins": 3.4439077377319336, "rewards/rejected": -4.401248455047607, "step": 1135 }, { "epoch": 1.49, "learning_rate": 4.7706848939197565e-05, "logits/chosen": -2.4557721614837646, "logits/rejected": -2.4789040088653564, "logps/chosen": -183.08238220214844, "logps/rejected": -270.4992370605469, "loss": 0.1262, "rewards/accuracies": 0.875, "rewards/chosen": -0.5791445970535278, "rewards/margins": 4.073019027709961, "rewards/rejected": -4.652163982391357, "step": 1136 }, { "epoch": 1.49, "learning_rate": 4.7702488988411765e-05, "logits/chosen": -2.4277684688568115, "logits/rejected": -2.4676928520202637, "logps/chosen": -206.0131378173828, "logps/rejected": -222.62411499023438, "loss": 0.2436, "rewards/accuracies": 0.875, "rewards/chosen": -0.7216240763664246, "rewards/margins": 2.8460915088653564, "rewards/rejected": -3.5677154064178467, "step": 1137 }, { "epoch": 1.49, "learning_rate": 4.7698125096407384e-05, "logits/chosen": -2.500568151473999, "logits/rejected": -2.5246546268463135, "logps/chosen": -237.37734985351562, "logps/rejected": -282.74267578125, "loss": 0.0789, "rewards/accuracies": 1.0, "rewards/chosen": -0.6298863887786865, "rewards/margins": 4.078357219696045, "rewards/rejected": -4.7082438468933105, "step": 1138 }, { "epoch": 1.49, "learning_rate": 4.7693757263942015e-05, "logits/chosen": -2.5762946605682373, "logits/rejected": -2.543735980987549, "logps/chosen": -240.41571044921875, "logps/rejected": -255.56991577148438, "loss": 0.0945, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8246884346008301, "rewards/margins": 4.176553726196289, "rewards/rejected": -5.001242637634277, "step": 1139 }, { "epoch": 1.49, "learning_rate": 4.768938549177393e-05, "logits/chosen": -2.426650047302246, "logits/rejected": -2.5201663970947266, "logps/chosen": -195.04554748535156, "logps/rejected": -247.7749786376953, "loss": 0.0827, "rewards/accuracies": 1.0, "rewards/chosen": -0.9046878814697266, "rewards/margins": 3.637204170227051, "rewards/rejected": -4.541892051696777, "step": 1140 }, { "epoch": 1.49, "learning_rate": 4.7685009780662074e-05, "logits/chosen": -2.114713191986084, "logits/rejected": -2.0542666912078857, "logps/chosen": -184.0598602294922, "logps/rejected": -212.22396850585938, "loss": 0.1166, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9158424735069275, "rewards/margins": 3.6370954513549805, "rewards/rejected": -4.5529375076293945, "step": 1141 }, { "epoch": 1.49, "learning_rate": 4.768063013136607e-05, "logits/chosen": -2.494703769683838, "logits/rejected": -2.525118112564087, "logps/chosen": -232.44784545898438, "logps/rejected": -251.73089599609375, "loss": 0.1935, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5020594596862793, "rewards/margins": 2.5554111003875732, "rewards/rejected": -4.057470321655273, "step": 1142 }, { "epoch": 1.5, "learning_rate": 4.7676246544646266e-05, "logits/chosen": -2.445399522781372, "logits/rejected": -2.4102258682250977, "logps/chosen": -188.2194061279297, "logps/rejected": -230.07611083984375, "loss": 0.1288, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5976011753082275, "rewards/margins": 3.648184299468994, "rewards/rejected": -4.245785713195801, "step": 1143 }, { "epoch": 1.5, "learning_rate": 4.767185902126364e-05, "logits/chosen": -2.579176664352417, "logits/rejected": -2.610398292541504, "logps/chosen": -194.1246795654297, "logps/rejected": -236.80584716796875, "loss": 0.1657, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4905986785888672, "rewards/margins": 3.536527395248413, "rewards/rejected": -5.027125835418701, "step": 1144 }, { "epoch": 1.5, "learning_rate": 4.766746756197989e-05, "logits/chosen": -2.4602108001708984, "logits/rejected": -2.4495668411254883, "logps/chosen": -234.55178833007812, "logps/rejected": -269.4857177734375, "loss": 0.3061, "rewards/accuracies": 0.75, "rewards/chosen": -1.2820649147033691, "rewards/margins": 2.805851936340332, "rewards/rejected": -4.087916851043701, "step": 1145 }, { "epoch": 1.5, "learning_rate": 4.766307216755739e-05, "logits/chosen": -2.402568817138672, "logits/rejected": -2.4603004455566406, "logps/chosen": -184.5412139892578, "logps/rejected": -232.99713134765625, "loss": 0.1395, "rewards/accuracies": 0.875, "rewards/chosen": -1.6240134239196777, "rewards/margins": 3.225022077560425, "rewards/rejected": -4.849035263061523, "step": 1146 }, { "epoch": 1.5, "learning_rate": 4.765867283875919e-05, "logits/chosen": -2.3566904067993164, "logits/rejected": -2.315061092376709, "logps/chosen": -194.67453002929688, "logps/rejected": -208.98086547851562, "loss": 0.1716, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9168694019317627, "rewards/margins": 3.383730173110962, "rewards/rejected": -4.300599575042725, "step": 1147 }, { "epoch": 1.5, "learning_rate": 4.765426957634903e-05, "logits/chosen": -2.288248062133789, "logits/rejected": -2.3149735927581787, "logps/chosen": -248.4288330078125, "logps/rejected": -294.727294921875, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": -0.7581281661987305, "rewards/margins": 4.977880954742432, "rewards/rejected": -5.73600959777832, "step": 1148 }, { "epoch": 1.5, "learning_rate": 4.7649862381091326e-05, "logits/chosen": -2.441120147705078, "logits/rejected": -2.507535934448242, "logps/chosen": -145.71258544921875, "logps/rejected": -208.50909423828125, "loss": 0.2425, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5849244594573975, "rewards/margins": 2.7225358486175537, "rewards/rejected": -4.307460308074951, "step": 1149 }, { "epoch": 1.5, "learning_rate": 4.764545125375117e-05, "logits/chosen": -2.0023226737976074, "logits/rejected": -2.0600125789642334, "logps/chosen": -152.4337615966797, "logps/rejected": -222.3336639404297, "loss": 0.1372, "rewards/accuracies": 0.875, "rewards/chosen": -1.2324447631835938, "rewards/margins": 3.39628267288208, "rewards/rejected": -4.628727436065674, "step": 1150 }, { "epoch": 1.51, "learning_rate": 4.764103619509436e-05, "logits/chosen": -2.4556169509887695, "logits/rejected": -2.412785530090332, "logps/chosen": -199.9222869873047, "logps/rejected": -212.75326538085938, "loss": 0.28, "rewards/accuracies": 0.75, "rewards/chosen": -1.5000001192092896, "rewards/margins": 3.5534515380859375, "rewards/rejected": -5.053452014923096, "step": 1151 }, { "epoch": 1.51, "learning_rate": 4.763661720588736e-05, "logits/chosen": -2.436502456665039, "logits/rejected": -2.4594924449920654, "logps/chosen": -209.86231994628906, "logps/rejected": -230.40859985351562, "loss": 0.1923, "rewards/accuracies": 0.875, "rewards/chosen": -1.5364642143249512, "rewards/margins": 3.644205093383789, "rewards/rejected": -5.18066930770874, "step": 1152 }, { "epoch": 1.51, "learning_rate": 4.7632194286897315e-05, "logits/chosen": -2.570007801055908, "logits/rejected": -2.5704219341278076, "logps/chosen": -221.38475036621094, "logps/rejected": -304.7647705078125, "loss": 0.1645, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2075974941253662, "rewards/margins": 5.129228591918945, "rewards/rejected": -6.336826324462891, "step": 1153 }, { "epoch": 1.51, "learning_rate": 4.762776743889207e-05, "logits/chosen": -2.6236629486083984, "logits/rejected": -2.678802967071533, "logps/chosen": -311.6545104980469, "logps/rejected": -333.4806823730469, "loss": 0.1045, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6849138736724854, "rewards/margins": 4.670904636383057, "rewards/rejected": -6.355818271636963, "step": 1154 }, { "epoch": 1.51, "learning_rate": 4.7623336662640116e-05, "logits/chosen": -1.7820888757705688, "logits/rejected": -1.8834837675094604, "logps/chosen": -162.43157958984375, "logps/rejected": -243.75802612304688, "loss": 0.0676, "rewards/accuracies": 1.0, "rewards/chosen": -2.156273365020752, "rewards/margins": 3.5598320960998535, "rewards/rejected": -5.716104984283447, "step": 1155 }, { "epoch": 1.51, "learning_rate": 4.761890195891067e-05, "logits/chosen": -2.580514907836914, "logits/rejected": -2.583678722381592, "logps/chosen": -227.94639587402344, "logps/rejected": -269.7225341796875, "loss": 0.0754, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5052528381347656, "rewards/margins": 4.785727024078369, "rewards/rejected": -6.290980815887451, "step": 1156 }, { "epoch": 1.51, "learning_rate": 4.76144633284736e-05, "logits/chosen": -2.2309510707855225, "logits/rejected": -2.2391128540039062, "logps/chosen": -204.0966033935547, "logps/rejected": -237.06265258789062, "loss": 0.2103, "rewards/accuracies": 0.9375, "rewards/chosen": -2.803736925125122, "rewards/margins": 2.9250359535217285, "rewards/rejected": -5.728772163391113, "step": 1157 }, { "epoch": 1.52, "learning_rate": 4.761002077209946e-05, "logits/chosen": -2.3017187118530273, "logits/rejected": -2.1956186294555664, "logps/chosen": -204.7859344482422, "logps/rejected": -202.06703186035156, "loss": 0.1633, "rewards/accuracies": 0.9375, "rewards/chosen": -1.668914794921875, "rewards/margins": 3.471302032470703, "rewards/rejected": -5.140216827392578, "step": 1158 }, { "epoch": 1.52, "learning_rate": 4.760557429055951e-05, "logits/chosen": -2.2529184818267822, "logits/rejected": -2.1692707538604736, "logps/chosen": -202.95797729492188, "logps/rejected": -222.89297485351562, "loss": 0.2165, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9888824224472046, "rewards/margins": 3.733574390411377, "rewards/rejected": -5.722457408905029, "step": 1159 }, { "epoch": 1.52, "learning_rate": 4.760112388462564e-05, "logits/chosen": -2.5051965713500977, "logits/rejected": -2.461354970932007, "logps/chosen": -207.01870727539062, "logps/rejected": -233.17376708984375, "loss": 0.2913, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4785971641540527, "rewards/margins": 3.71671199798584, "rewards/rejected": -6.195309638977051, "step": 1160 }, { "epoch": 1.52, "learning_rate": 4.759666955507049e-05, "logits/chosen": -2.3772454261779785, "logits/rejected": -2.3350820541381836, "logps/chosen": -194.571533203125, "logps/rejected": -220.87799072265625, "loss": 0.1533, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7106103897094727, "rewards/margins": 3.6144776344299316, "rewards/rejected": -5.325088024139404, "step": 1161 }, { "epoch": 1.52, "learning_rate": 4.759221130266732e-05, "logits/chosen": -2.2555718421936035, "logits/rejected": -2.2726223468780518, "logps/chosen": -175.2388153076172, "logps/rejected": -250.49134826660156, "loss": 0.091, "rewards/accuracies": 1.0, "rewards/chosen": -2.260037422180176, "rewards/margins": 4.190368175506592, "rewards/rejected": -6.450405120849609, "step": 1162 }, { "epoch": 1.52, "learning_rate": 4.758774912819011e-05, "logits/chosen": -2.5848538875579834, "logits/rejected": -2.6254186630249023, "logps/chosen": -295.5448913574219, "logps/rejected": -309.3749084472656, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": -1.9205611944198608, "rewards/margins": 4.6831841468811035, "rewards/rejected": -6.603745460510254, "step": 1163 }, { "epoch": 1.52, "learning_rate": 4.758328303241349e-05, "logits/chosen": -2.445176124572754, "logits/rejected": -2.4309027194976807, "logps/chosen": -228.26736450195312, "logps/rejected": -228.83767700195312, "loss": 0.1921, "rewards/accuracies": 0.8125, "rewards/chosen": -2.568021297454834, "rewards/margins": 3.549888849258423, "rewards/rejected": -6.117909908294678, "step": 1164 }, { "epoch": 1.52, "learning_rate": 4.75788130161128e-05, "logits/chosen": -2.2904882431030273, "logits/rejected": -2.3325557708740234, "logps/chosen": -225.65245056152344, "logps/rejected": -264.3974304199219, "loss": 0.141, "rewards/accuracies": 0.875, "rewards/chosen": -2.110083818435669, "rewards/margins": 4.307819366455078, "rewards/rejected": -6.417903423309326, "step": 1165 }, { "epoch": 1.53, "learning_rate": 4.7574339080064044e-05, "logits/chosen": -2.248126268386841, "logits/rejected": -2.4054722785949707, "logps/chosen": -179.7675323486328, "logps/rejected": -227.87307739257812, "loss": 0.1646, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8558435440063477, "rewards/margins": 3.661419153213501, "rewards/rejected": -5.5172624588012695, "step": 1166 }, { "epoch": 1.53, "learning_rate": 4.756986122504392e-05, "logits/chosen": -2.5831997394561768, "logits/rejected": -2.652710199356079, "logps/chosen": -187.14601135253906, "logps/rejected": -252.51132202148438, "loss": 0.0897, "rewards/accuracies": 1.0, "rewards/chosen": -1.8020074367523193, "rewards/margins": 4.59144401550293, "rewards/rejected": -6.393451690673828, "step": 1167 }, { "epoch": 1.53, "learning_rate": 4.756537945182978e-05, "logits/chosen": -2.3583719730377197, "logits/rejected": -2.4774858951568604, "logps/chosen": -182.31289672851562, "logps/rejected": -262.7928161621094, "loss": 0.0801, "rewards/accuracies": 0.9375, "rewards/chosen": -1.851448655128479, "rewards/margins": 4.432652950286865, "rewards/rejected": -6.284101486206055, "step": 1168 }, { "epoch": 1.53, "learning_rate": 4.7560893761199685e-05, "logits/chosen": -2.4317312240600586, "logits/rejected": -2.548471689224243, "logps/chosen": -224.8054656982422, "logps/rejected": -264.10919189453125, "loss": 0.2068, "rewards/accuracies": 0.8125, "rewards/chosen": -2.833303213119507, "rewards/margins": 3.181811809539795, "rewards/rejected": -6.015115261077881, "step": 1169 }, { "epoch": 1.53, "learning_rate": 4.7556404153932356e-05, "logits/chosen": -2.501443386077881, "logits/rejected": -2.562654495239258, "logps/chosen": -186.91287231445312, "logps/rejected": -231.56109619140625, "loss": 0.0915, "rewards/accuracies": 1.0, "rewards/chosen": -1.9190934896469116, "rewards/margins": 4.339428424835205, "rewards/rejected": -6.258522033691406, "step": 1170 }, { "epoch": 1.53, "learning_rate": 4.755191063080721e-05, "logits/chosen": -2.152017116546631, "logits/rejected": -2.183455467224121, "logps/chosen": -195.06324768066406, "logps/rejected": -249.9405517578125, "loss": 0.1957, "rewards/accuracies": 0.875, "rewards/chosen": -1.957331657409668, "rewards/margins": 3.673862934112549, "rewards/rejected": -5.631194591522217, "step": 1171 }, { "epoch": 1.53, "learning_rate": 4.754741319260433e-05, "logits/chosen": -2.1391313076019287, "logits/rejected": -2.259127616882324, "logps/chosen": -218.9195098876953, "logps/rejected": -249.30020141601562, "loss": 0.1433, "rewards/accuracies": 0.9375, "rewards/chosen": -2.8016879558563232, "rewards/margins": 4.185688018798828, "rewards/rejected": -6.9873762130737305, "step": 1172 }, { "epoch": 1.54, "learning_rate": 4.754291184010449e-05, "logits/chosen": -2.3584299087524414, "logits/rejected": -2.4219892024993896, "logps/chosen": -250.45458984375, "logps/rejected": -282.7275390625, "loss": 0.1347, "rewards/accuracies": 0.9375, "rewards/chosen": -2.450329303741455, "rewards/margins": 3.8380470275878906, "rewards/rejected": -6.288376808166504, "step": 1173 }, { "epoch": 1.54, "learning_rate": 4.753840657408913e-05, "logits/chosen": -2.546412706375122, "logits/rejected": -2.5415077209472656, "logps/chosen": -218.136474609375, "logps/rejected": -255.75633239746094, "loss": 0.0751, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4324724674224854, "rewards/margins": 4.358306884765625, "rewards/rejected": -5.790779113769531, "step": 1174 }, { "epoch": 1.54, "learning_rate": 4.7533897395340384e-05, "logits/chosen": -2.3359920978546143, "logits/rejected": -2.2689504623413086, "logps/chosen": -175.5142364501953, "logps/rejected": -207.70046997070312, "loss": 0.2638, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7872010469436646, "rewards/margins": 3.380298376083374, "rewards/rejected": -5.167499542236328, "step": 1175 }, { "epoch": 1.54, "learning_rate": 4.752938430464105e-05, "logits/chosen": -2.5895543098449707, "logits/rejected": -2.645479679107666, "logps/chosen": -250.36102294921875, "logps/rejected": -294.0989074707031, "loss": 0.0946, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3163928985595703, "rewards/margins": 5.06205940246582, "rewards/rejected": -6.378452301025391, "step": 1176 }, { "epoch": 1.54, "learning_rate": 4.752486730277463e-05, "logits/chosen": -1.9727551937103271, "logits/rejected": -2.0490756034851074, "logps/chosen": -183.3731231689453, "logps/rejected": -266.77679443359375, "loss": 0.2344, "rewards/accuracies": 0.875, "rewards/chosen": -1.9805545806884766, "rewards/margins": 3.784672975540161, "rewards/rejected": -5.765227317810059, "step": 1177 }, { "epoch": 1.54, "learning_rate": 4.752034639052527e-05, "logits/chosen": -2.3057782649993896, "logits/rejected": -2.363804817199707, "logps/chosen": -210.0028533935547, "logps/rejected": -274.5212707519531, "loss": 0.1186, "rewards/accuracies": 0.9375, "rewards/chosen": -1.943210244178772, "rewards/margins": 4.820004463195801, "rewards/rejected": -6.763214588165283, "step": 1178 }, { "epoch": 1.54, "learning_rate": 4.751582156867783e-05, "logits/chosen": -2.40464186668396, "logits/rejected": -2.3199591636657715, "logps/chosen": -238.06265258789062, "logps/rejected": -262.56134033203125, "loss": 0.2005, "rewards/accuracies": 0.875, "rewards/chosen": -1.6629893779754639, "rewards/margins": 3.4837026596069336, "rewards/rejected": -5.146691799163818, "step": 1179 }, { "epoch": 1.54, "learning_rate": 4.751129283801782e-05, "logits/chosen": -2.4328413009643555, "logits/rejected": -2.546642780303955, "logps/chosen": -248.2761993408203, "logps/rejected": -308.90130615234375, "loss": 0.0414, "rewards/accuracies": 1.0, "rewards/chosen": -2.027224540710449, "rewards/margins": 5.0925612449646, "rewards/rejected": -7.119786262512207, "step": 1180 }, { "epoch": 1.55, "learning_rate": 4.7506760199331445e-05, "logits/chosen": -2.329103469848633, "logits/rejected": -2.442920207977295, "logps/chosen": -196.090087890625, "logps/rejected": -244.14735412597656, "loss": 0.133, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5378122329711914, "rewards/margins": 3.5418591499328613, "rewards/rejected": -6.079671859741211, "step": 1181 }, { "epoch": 1.55, "learning_rate": 4.750222365340559e-05, "logits/chosen": -2.4741384983062744, "logits/rejected": -2.5175294876098633, "logps/chosen": -194.5352325439453, "logps/rejected": -271.7839660644531, "loss": 0.1235, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9755550622940063, "rewards/margins": 4.500469207763672, "rewards/rejected": -6.476024627685547, "step": 1182 }, { "epoch": 1.55, "learning_rate": 4.749768320102781e-05, "logits/chosen": -2.535355806350708, "logits/rejected": -2.596473217010498, "logps/chosen": -246.87472534179688, "logps/rejected": -301.79296875, "loss": 0.0759, "rewards/accuracies": 1.0, "rewards/chosen": -1.202014446258545, "rewards/margins": 4.9576096534729, "rewards/rejected": -6.159624099731445, "step": 1183 }, { "epoch": 1.55, "learning_rate": 4.749313884298633e-05, "logits/chosen": -2.47165846824646, "logits/rejected": -2.4970316886901855, "logps/chosen": -244.66204833984375, "logps/rejected": -291.098388671875, "loss": 0.213, "rewards/accuracies": 0.9375, "rewards/chosen": -1.790406584739685, "rewards/margins": 5.0259270668029785, "rewards/rejected": -6.816333770751953, "step": 1184 }, { "epoch": 1.55, "learning_rate": 4.7488590580070074e-05, "logits/chosen": -2.3476386070251465, "logits/rejected": -2.4502997398376465, "logps/chosen": -223.2727508544922, "logps/rejected": -293.4349365234375, "loss": 0.1489, "rewards/accuracies": 0.9375, "rewards/chosen": -2.024826765060425, "rewards/margins": 3.9169485569000244, "rewards/rejected": -5.941775321960449, "step": 1185 }, { "epoch": 1.55, "learning_rate": 4.748403841306863e-05, "logits/chosen": -2.0279314517974854, "logits/rejected": -2.0650389194488525, "logps/chosen": -163.14549255371094, "logps/rejected": -210.16258239746094, "loss": 0.4483, "rewards/accuracies": 0.75, "rewards/chosen": -2.0485494136810303, "rewards/margins": 2.879338264465332, "rewards/rejected": -4.927887916564941, "step": 1186 }, { "epoch": 1.55, "learning_rate": 4.747948234277228e-05, "logits/chosen": -2.5351710319519043, "logits/rejected": -2.5304994583129883, "logps/chosen": -267.7850036621094, "logps/rejected": -258.6395263671875, "loss": 0.1651, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5308005809783936, "rewards/margins": 4.033406734466553, "rewards/rejected": -5.564207077026367, "step": 1187 }, { "epoch": 1.55, "learning_rate": 4.747492236997195e-05, "logits/chosen": -2.3023691177368164, "logits/rejected": -2.3658742904663086, "logps/chosen": -192.9929656982422, "logps/rejected": -237.79534912109375, "loss": 0.2237, "rewards/accuracies": 0.875, "rewards/chosen": -1.313674807548523, "rewards/margins": 3.4801218509674072, "rewards/rejected": -4.793796539306641, "step": 1188 }, { "epoch": 1.56, "learning_rate": 4.747035849545928e-05, "logits/chosen": -2.5478949546813965, "logits/rejected": -2.51932954788208, "logps/chosen": -239.89755249023438, "logps/rejected": -269.83544921875, "loss": 0.0833, "rewards/accuracies": 1.0, "rewards/chosen": -1.7208430767059326, "rewards/margins": 4.114684104919434, "rewards/rejected": -5.835526943206787, "step": 1189 }, { "epoch": 1.56, "learning_rate": 4.746579072002657e-05, "logits/chosen": -2.2504239082336426, "logits/rejected": -2.3190526962280273, "logps/chosen": -224.220703125, "logps/rejected": -260.7024841308594, "loss": 0.3915, "rewards/accuracies": 0.75, "rewards/chosen": -2.0131428241729736, "rewards/margins": 2.687302589416504, "rewards/rejected": -4.700445652008057, "step": 1190 }, { "epoch": 1.56, "learning_rate": 4.7461219044466795e-05, "logits/chosen": -2.1805527210235596, "logits/rejected": -2.193903684616089, "logps/chosen": -184.85508728027344, "logps/rejected": -234.97364807128906, "loss": 0.2377, "rewards/accuracies": 0.6875, "rewards/chosen": -1.2396941184997559, "rewards/margins": 3.461569309234619, "rewards/rejected": -4.701263427734375, "step": 1191 }, { "epoch": 1.56, "learning_rate": 4.745664346957361e-05, "logits/chosen": -2.694643974304199, "logits/rejected": -2.705972194671631, "logps/chosen": -231.14852905273438, "logps/rejected": -266.03460693359375, "loss": 0.189, "rewards/accuracies": 0.9375, "rewards/chosen": -2.598564624786377, "rewards/margins": 3.681683301925659, "rewards/rejected": -6.280247688293457, "step": 1192 }, { "epoch": 1.56, "learning_rate": 4.745206399614135e-05, "logits/chosen": -2.131896734237671, "logits/rejected": -2.225205421447754, "logps/chosen": -222.63446044921875, "logps/rejected": -309.03814697265625, "loss": 0.1005, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2715827226638794, "rewards/margins": 4.563998222351074, "rewards/rejected": -5.835580825805664, "step": 1193 }, { "epoch": 1.56, "learning_rate": 4.744748062496503e-05, "logits/chosen": -2.4835543632507324, "logits/rejected": -2.552267551422119, "logps/chosen": -166.58905029296875, "logps/rejected": -213.53118896484375, "loss": 0.2019, "rewards/accuracies": 0.8125, "rewards/chosen": -1.377805233001709, "rewards/margins": 3.6189918518066406, "rewards/rejected": -4.99679708480835, "step": 1194 }, { "epoch": 1.56, "learning_rate": 4.744289335684034e-05, "logits/chosen": -2.522873640060425, "logits/rejected": -2.517055034637451, "logps/chosen": -203.4969482421875, "logps/rejected": -267.63873291015625, "loss": 0.1573, "rewards/accuracies": 0.9375, "rewards/chosen": -1.246862530708313, "rewards/margins": 4.703207492828369, "rewards/rejected": -5.950070381164551, "step": 1195 }, { "epoch": 1.57, "learning_rate": 4.7438302192563625e-05, "logits/chosen": -2.3815245628356934, "logits/rejected": -2.4553897380828857, "logps/chosen": -231.88101196289062, "logps/rejected": -279.2082824707031, "loss": 0.1052, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3264628648757935, "rewards/margins": 4.349924564361572, "rewards/rejected": -5.676387786865234, "step": 1196 }, { "epoch": 1.57, "learning_rate": 4.743370713293194e-05, "logits/chosen": -2.671739101409912, "logits/rejected": -2.6977474689483643, "logps/chosen": -245.71820068359375, "logps/rejected": -263.50811767578125, "loss": 0.2618, "rewards/accuracies": 0.9375, "rewards/chosen": -2.2178151607513428, "rewards/margins": 3.918609142303467, "rewards/rejected": -6.1364240646362305, "step": 1197 }, { "epoch": 1.57, "learning_rate": 4.7429108178742985e-05, "logits/chosen": -2.3009557723999023, "logits/rejected": -2.2271933555603027, "logps/chosen": -181.82748413085938, "logps/rejected": -212.26087951660156, "loss": 0.5133, "rewards/accuracies": 0.75, "rewards/chosen": -1.7228325605392456, "rewards/margins": 3.406215190887451, "rewards/rejected": -5.129047870635986, "step": 1198 }, { "epoch": 1.57, "learning_rate": 4.742450533079518e-05, "logits/chosen": -2.5072877407073975, "logits/rejected": -2.4657020568847656, "logps/chosen": -183.00059509277344, "logps/rejected": -212.53353881835938, "loss": 0.3148, "rewards/accuracies": 0.875, "rewards/chosen": -1.878244161605835, "rewards/margins": 2.4889166355133057, "rewards/rejected": -4.367160797119141, "step": 1199 }, { "epoch": 1.57, "learning_rate": 4.7419898589887566e-05, "logits/chosen": -2.134070873260498, "logits/rejected": -2.256047487258911, "logps/chosen": -170.70077514648438, "logps/rejected": -231.8829345703125, "loss": 0.1302, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0568268299102783, "rewards/margins": 4.093639373779297, "rewards/rejected": -5.1504669189453125, "step": 1200 }, { "epoch": 1.57, "learning_rate": 4.74152879568199e-05, "logits/chosen": -2.6162269115448, "logits/rejected": -2.652150869369507, "logps/chosen": -233.90402221679688, "logps/rejected": -244.74334716796875, "loss": 0.0997, "rewards/accuracies": 1.0, "rewards/chosen": -1.6784412860870361, "rewards/margins": 3.7588188648223877, "rewards/rejected": -5.437260627746582, "step": 1201 }, { "epoch": 1.57, "learning_rate": 4.7410673432392596e-05, "logits/chosen": -2.484144687652588, "logits/rejected": -2.5014195442199707, "logps/chosen": -250.83456420898438, "logps/rejected": -281.1622314453125, "loss": 0.2326, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0612598657608032, "rewards/margins": 3.5049850940704346, "rewards/rejected": -4.566245079040527, "step": 1202 }, { "epoch": 1.57, "learning_rate": 4.7406055017406754e-05, "logits/chosen": -2.6265833377838135, "logits/rejected": -2.7116756439208984, "logps/chosen": -236.57855224609375, "logps/rejected": -276.8167724609375, "loss": 0.1522, "rewards/accuracies": 0.875, "rewards/chosen": -1.490135669708252, "rewards/margins": 3.229520082473755, "rewards/rejected": -4.719655513763428, "step": 1203 }, { "epoch": 1.58, "learning_rate": 4.740143271266414e-05, "logits/chosen": -2.5222041606903076, "logits/rejected": -2.6354575157165527, "logps/chosen": -229.79931640625, "logps/rejected": -270.754638671875, "loss": 0.0646, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0310989618301392, "rewards/margins": 5.396121025085449, "rewards/rejected": -6.427220344543457, "step": 1204 }, { "epoch": 1.58, "learning_rate": 4.73968065189672e-05, "logits/chosen": -2.396852731704712, "logits/rejected": -2.4251794815063477, "logps/chosen": -192.14022827148438, "logps/rejected": -229.5162353515625, "loss": 0.3256, "rewards/accuracies": 0.6875, "rewards/chosen": -1.227713704109192, "rewards/margins": 2.811344861984253, "rewards/rejected": -4.039058685302734, "step": 1205 }, { "epoch": 1.58, "learning_rate": 4.739217643711906e-05, "logits/chosen": -2.287642478942871, "logits/rejected": -2.3326570987701416, "logps/chosen": -164.69529724121094, "logps/rejected": -231.8920135498047, "loss": 0.3453, "rewards/accuracies": 0.8125, "rewards/chosen": -1.245210886001587, "rewards/margins": 2.8619096279144287, "rewards/rejected": -4.107120513916016, "step": 1206 }, { "epoch": 1.58, "learning_rate": 4.7387542467923505e-05, "logits/chosen": -2.479602575302124, "logits/rejected": -2.5209507942199707, "logps/chosen": -244.21514892578125, "logps/rejected": -303.6318664550781, "loss": 0.0309, "rewards/accuracies": 1.0, "rewards/chosen": -1.2694910764694214, "rewards/margins": 5.570847034454346, "rewards/rejected": -6.840338706970215, "step": 1207 }, { "epoch": 1.58, "learning_rate": 4.738290461218502e-05, "logits/chosen": -2.4771368503570557, "logits/rejected": -2.456359386444092, "logps/chosen": -230.273681640625, "logps/rejected": -269.0933837890625, "loss": 0.1615, "rewards/accuracies": 0.875, "rewards/chosen": -1.9786639213562012, "rewards/margins": 3.434558629989624, "rewards/rejected": -5.413222312927246, "step": 1208 }, { "epoch": 1.58, "learning_rate": 4.737826287070874e-05, "logits/chosen": -2.2615694999694824, "logits/rejected": -2.326125383377075, "logps/chosen": -157.7958526611328, "logps/rejected": -257.5984191894531, "loss": 0.1683, "rewards/accuracies": 0.875, "rewards/chosen": -1.3305480480194092, "rewards/margins": 3.8238983154296875, "rewards/rejected": -5.154446125030518, "step": 1209 }, { "epoch": 1.58, "learning_rate": 4.737361724430048e-05, "logits/chosen": -2.5612540245056152, "logits/rejected": -2.6601011753082275, "logps/chosen": -240.48211669921875, "logps/rejected": -257.5409851074219, "loss": 0.2828, "rewards/accuracies": 0.75, "rewards/chosen": -1.7493575811386108, "rewards/margins": 2.682534694671631, "rewards/rejected": -4.431892395019531, "step": 1210 }, { "epoch": 1.58, "learning_rate": 4.7368967733766756e-05, "logits/chosen": -2.0791330337524414, "logits/rejected": -2.0077402591705322, "logps/chosen": -228.59085083007812, "logps/rejected": -240.2413787841797, "loss": 0.3355, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6128778457641602, "rewards/margins": 3.4234275817871094, "rewards/rejected": -5.036304950714111, "step": 1211 }, { "epoch": 1.59, "learning_rate": 4.7364314339914716e-05, "logits/chosen": -2.223177671432495, "logits/rejected": -2.280726671218872, "logps/chosen": -187.680419921875, "logps/rejected": -279.1700134277344, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": -0.6807892322540283, "rewards/margins": 6.459383010864258, "rewards/rejected": -7.140172004699707, "step": 1212 }, { "epoch": 1.59, "learning_rate": 4.735965706355221e-05, "logits/chosen": -2.3972856998443604, "logits/rejected": -2.450835704803467, "logps/chosen": -180.9072723388672, "logps/rejected": -224.91830444335938, "loss": 0.2092, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4030086994171143, "rewards/margins": 3.2653090953826904, "rewards/rejected": -4.668317794799805, "step": 1213 }, { "epoch": 1.59, "learning_rate": 4.735499590548775e-05, "logits/chosen": -2.483901023864746, "logits/rejected": -2.579958915710449, "logps/chosen": -165.31178283691406, "logps/rejected": -212.93484497070312, "loss": 0.3618, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7332736253738403, "rewards/margins": 2.2384800910949707, "rewards/rejected": -3.9717535972595215, "step": 1214 }, { "epoch": 1.59, "learning_rate": 4.7350330866530536e-05, "logits/chosen": -2.4746031761169434, "logits/rejected": -2.5858569145202637, "logps/chosen": -234.17544555664062, "logps/rejected": -243.3843231201172, "loss": 0.1585, "rewards/accuracies": 0.875, "rewards/chosen": -1.7856451272964478, "rewards/margins": 3.430792808532715, "rewards/rejected": -5.216437816619873, "step": 1215 }, { "epoch": 1.59, "learning_rate": 4.734566194749043e-05, "logits/chosen": -2.1635468006134033, "logits/rejected": -2.184338092803955, "logps/chosen": -167.8646240234375, "logps/rejected": -198.2296142578125, "loss": 0.136, "rewards/accuracies": 1.0, "rewards/chosen": -1.4915691614151, "rewards/margins": 3.2404394149780273, "rewards/rejected": -4.732008934020996, "step": 1216 }, { "epoch": 1.59, "learning_rate": 4.7340989149177955e-05, "logits/chosen": -2.4515089988708496, "logits/rejected": -2.4606142044067383, "logps/chosen": -184.1483154296875, "logps/rejected": -231.7574462890625, "loss": 0.1365, "rewards/accuracies": 0.875, "rewards/chosen": -1.9022963047027588, "rewards/margins": 4.141842842102051, "rewards/rejected": -6.0441389083862305, "step": 1217 }, { "epoch": 1.59, "learning_rate": 4.7336312472404345e-05, "logits/chosen": -2.495224714279175, "logits/rejected": -2.6189403533935547, "logps/chosen": -187.76014709472656, "logps/rejected": -287.38653564453125, "loss": 0.1484, "rewards/accuracies": 1.0, "rewards/chosen": -1.5860087871551514, "rewards/margins": 4.361400604248047, "rewards/rejected": -5.947409629821777, "step": 1218 }, { "epoch": 1.6, "learning_rate": 4.733163191798147e-05, "logits/chosen": -2.3041515350341797, "logits/rejected": -2.3410122394561768, "logps/chosen": -182.3394775390625, "logps/rejected": -240.0974884033203, "loss": 0.2967, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4822897911071777, "rewards/margins": 3.956850290298462, "rewards/rejected": -5.4391398429870605, "step": 1219 }, { "epoch": 1.6, "learning_rate": 4.7326947486721894e-05, "logits/chosen": -2.5003819465637207, "logits/rejected": -2.486321449279785, "logps/chosen": -175.6728057861328, "logps/rejected": -249.07659912109375, "loss": 0.1647, "rewards/accuracies": 0.875, "rewards/chosen": -1.0928356647491455, "rewards/margins": 4.23591423034668, "rewards/rejected": -5.328750133514404, "step": 1220 }, { "epoch": 1.6, "learning_rate": 4.732225917943884e-05, "logits/chosen": -2.34039306640625, "logits/rejected": -2.2606441974639893, "logps/chosen": -209.9175262451172, "logps/rejected": -277.741455078125, "loss": 0.167, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1915336847305298, "rewards/margins": 4.357934474945068, "rewards/rejected": -5.54946756362915, "step": 1221 }, { "epoch": 1.6, "learning_rate": 4.7317566996946235e-05, "logits/chosen": -2.5158519744873047, "logits/rejected": -2.391768455505371, "logps/chosen": -282.4315490722656, "logps/rejected": -324.89337158203125, "loss": 0.0819, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4069547653198242, "rewards/margins": 4.744081497192383, "rewards/rejected": -6.151035785675049, "step": 1222 }, { "epoch": 1.6, "learning_rate": 4.731287094005863e-05, "logits/chosen": -2.2263362407684326, "logits/rejected": -2.276297092437744, "logps/chosen": -216.80142211914062, "logps/rejected": -220.71160888671875, "loss": 0.2046, "rewards/accuracies": 0.875, "rewards/chosen": -2.122260093688965, "rewards/margins": 3.1513662338256836, "rewards/rejected": -5.27362585067749, "step": 1223 }, { "epoch": 1.6, "learning_rate": 4.730817100959128e-05, "logits/chosen": -2.3766252994537354, "logits/rejected": -2.406181812286377, "logps/chosen": -199.9800567626953, "logps/rejected": -254.28375244140625, "loss": 0.2671, "rewards/accuracies": 0.75, "rewards/chosen": -1.7991282939910889, "rewards/margins": 3.4454705715179443, "rewards/rejected": -5.244598388671875, "step": 1224 }, { "epoch": 1.6, "learning_rate": 4.730346720636011e-05, "logits/chosen": -2.5629076957702637, "logits/rejected": -2.5792136192321777, "logps/chosen": -177.7575225830078, "logps/rejected": -288.53857421875, "loss": 0.0896, "rewards/accuracies": 1.0, "rewards/chosen": -1.251127004623413, "rewards/margins": 4.980374813079834, "rewards/rejected": -6.231501579284668, "step": 1225 }, { "epoch": 1.6, "learning_rate": 4.7298759531181717e-05, "logits/chosen": -2.423495054244995, "logits/rejected": -2.53666353225708, "logps/chosen": -225.38986206054688, "logps/rejected": -255.51467895507812, "loss": 0.0392, "rewards/accuracies": 1.0, "rewards/chosen": -2.030442476272583, "rewards/margins": 4.617352485656738, "rewards/rejected": -6.647795677185059, "step": 1226 }, { "epoch": 1.61, "learning_rate": 4.729404798487337e-05, "logits/chosen": -2.4218320846557617, "logits/rejected": -2.541698455810547, "logps/chosen": -189.56942749023438, "logps/rejected": -243.72561645507812, "loss": 0.0691, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4716410636901855, "rewards/margins": 4.515095233917236, "rewards/rejected": -5.98673677444458, "step": 1227 }, { "epoch": 1.61, "learning_rate": 4.7289332568252994e-05, "logits/chosen": -2.4882519245147705, "logits/rejected": -2.520144462585449, "logps/chosen": -194.36102294921875, "logps/rejected": -236.93948364257812, "loss": 0.2329, "rewards/accuracies": 0.875, "rewards/chosen": -1.6610629558563232, "rewards/margins": 3.4544930458068848, "rewards/rejected": -5.115556716918945, "step": 1228 }, { "epoch": 1.61, "learning_rate": 4.728461328213921e-05, "logits/chosen": -2.3431355953216553, "logits/rejected": -2.4315452575683594, "logps/chosen": -184.54681396484375, "logps/rejected": -237.89874267578125, "loss": 0.1567, "rewards/accuracies": 0.9375, "rewards/chosen": -1.615098237991333, "rewards/margins": 4.014577388763428, "rewards/rejected": -5.629674911499023, "step": 1229 }, { "epoch": 1.61, "learning_rate": 4.727989012735129e-05, "logits/chosen": -2.3083677291870117, "logits/rejected": -2.3745315074920654, "logps/chosen": -230.08078002929688, "logps/rejected": -273.61346435546875, "loss": 0.1073, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6126986742019653, "rewards/margins": 4.383516311645508, "rewards/rejected": -5.9962158203125, "step": 1230 }, { "epoch": 1.61, "learning_rate": 4.72751631047092e-05, "logits/chosen": -2.5195953845977783, "logits/rejected": -2.4491376876831055, "logps/chosen": -195.26785278320312, "logps/rejected": -225.06417846679688, "loss": 0.2724, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1910969018936157, "rewards/margins": 2.997663736343384, "rewards/rejected": -4.188760757446289, "step": 1231 }, { "epoch": 1.61, "learning_rate": 4.727043221503355e-05, "logits/chosen": -2.375887632369995, "logits/rejected": -2.3649067878723145, "logps/chosen": -190.8800811767578, "logps/rejected": -264.25982666015625, "loss": 0.1837, "rewards/accuracies": 0.875, "rewards/chosen": -0.8358839750289917, "rewards/margins": 4.462348461151123, "rewards/rejected": -5.298232078552246, "step": 1232 }, { "epoch": 1.61, "learning_rate": 4.7265697459145656e-05, "logits/chosen": -2.2352352142333984, "logits/rejected": -2.3413479328155518, "logps/chosen": -167.3771514892578, "logps/rejected": -217.39353942871094, "loss": 0.3485, "rewards/accuracies": 0.75, "rewards/chosen": -1.4793394804000854, "rewards/margins": 2.465003490447998, "rewards/rejected": -3.944342851638794, "step": 1233 }, { "epoch": 1.61, "learning_rate": 4.7260958837867474e-05, "logits/chosen": -2.3950436115264893, "logits/rejected": -2.3509271144866943, "logps/chosen": -221.51156616210938, "logps/rejected": -274.8773193359375, "loss": 0.237, "rewards/accuracies": 0.875, "rewards/chosen": -1.2772961854934692, "rewards/margins": 3.8298304080963135, "rewards/rejected": -5.107126712799072, "step": 1234 }, { "epoch": 1.62, "learning_rate": 4.725621635202164e-05, "logits/chosen": -2.217377185821533, "logits/rejected": -2.2314724922180176, "logps/chosen": -218.805419921875, "logps/rejected": -250.1678466796875, "loss": 0.274, "rewards/accuracies": 0.875, "rewards/chosen": -1.7175061702728271, "rewards/margins": 3.0603437423706055, "rewards/rejected": -4.777850151062012, "step": 1235 }, { "epoch": 1.62, "learning_rate": 4.7251470002431463e-05, "logits/chosen": -2.3143558502197266, "logits/rejected": -2.3055665493011475, "logps/chosen": -209.69020080566406, "logps/rejected": -267.2593994140625, "loss": 0.0749, "rewards/accuracies": 1.0, "rewards/chosen": -1.4336559772491455, "rewards/margins": 4.277906894683838, "rewards/rejected": -5.711562633514404, "step": 1236 }, { "epoch": 1.62, "learning_rate": 4.724671978992093e-05, "logits/chosen": -2.5497217178344727, "logits/rejected": -2.357327699661255, "logps/chosen": -242.28077697753906, "logps/rejected": -254.4888916015625, "loss": 0.287, "rewards/accuracies": 0.8125, "rewards/chosen": -1.279671311378479, "rewards/margins": 4.20197057723999, "rewards/rejected": -5.48164176940918, "step": 1237 }, { "epoch": 1.62, "learning_rate": 4.724196571531469e-05, "logits/chosen": -2.209345579147339, "logits/rejected": -2.303694725036621, "logps/chosen": -213.60232543945312, "logps/rejected": -218.8167724609375, "loss": 0.3236, "rewards/accuracies": 0.75, "rewards/chosen": -1.613269329071045, "rewards/margins": 2.6522085666656494, "rewards/rejected": -4.265478134155273, "step": 1238 }, { "epoch": 1.62, "learning_rate": 4.7237207779438066e-05, "logits/chosen": -2.289630889892578, "logits/rejected": -2.408109188079834, "logps/chosen": -220.02053833007812, "logps/rejected": -287.1589660644531, "loss": 0.1712, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9712376594543457, "rewards/margins": 5.0107197761535645, "rewards/rejected": -6.98195743560791, "step": 1239 }, { "epoch": 1.62, "learning_rate": 4.7232445983117045e-05, "logits/chosen": -2.221944808959961, "logits/rejected": -2.331416606903076, "logps/chosen": -204.5431365966797, "logps/rejected": -249.51193237304688, "loss": 0.2558, "rewards/accuracies": 0.875, "rewards/chosen": -1.8107631206512451, "rewards/margins": 2.60237455368042, "rewards/rejected": -4.413137435913086, "step": 1240 }, { "epoch": 1.62, "learning_rate": 4.7227680327178294e-05, "logits/chosen": -2.1551241874694824, "logits/rejected": -2.1501760482788086, "logps/chosen": -209.62078857421875, "logps/rejected": -237.73171997070312, "loss": 0.1494, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4279321432113647, "rewards/margins": 3.582655429840088, "rewards/rejected": -5.010587692260742, "step": 1241 }, { "epoch": 1.63, "learning_rate": 4.7222910812449126e-05, "logits/chosen": -2.5197300910949707, "logits/rejected": -2.501941442489624, "logps/chosen": -224.577880859375, "logps/rejected": -275.0819091796875, "loss": 0.1024, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8110631704330444, "rewards/margins": 4.743729591369629, "rewards/rejected": -5.554792404174805, "step": 1242 }, { "epoch": 1.63, "learning_rate": 4.7218137439757574e-05, "logits/chosen": -2.5621564388275146, "logits/rejected": -2.604523181915283, "logps/chosen": -160.7113037109375, "logps/rejected": -195.0877227783203, "loss": 0.2681, "rewards/accuracies": 0.75, "rewards/chosen": -0.9945410490036011, "rewards/margins": 2.9356656074523926, "rewards/rejected": -3.930206298828125, "step": 1243 }, { "epoch": 1.63, "learning_rate": 4.7213360209932286e-05, "logits/chosen": -2.444904088973999, "logits/rejected": -2.7083749771118164, "logps/chosen": -293.1217041015625, "logps/rejected": -356.0822448730469, "loss": 0.1383, "rewards/accuracies": 0.9375, "rewards/chosen": -1.486177921295166, "rewards/margins": 3.4033234119415283, "rewards/rejected": -4.889501571655273, "step": 1244 }, { "epoch": 1.63, "learning_rate": 4.720857912380261e-05, "logits/chosen": -2.3962252140045166, "logits/rejected": -2.4494524002075195, "logps/chosen": -245.69485473632812, "logps/rejected": -278.7045593261719, "loss": 0.1499, "rewards/accuracies": 0.875, "rewards/chosen": -1.5282708406448364, "rewards/margins": 3.444108486175537, "rewards/rejected": -4.972379207611084, "step": 1245 }, { "epoch": 1.63, "learning_rate": 4.7203794182198556e-05, "logits/chosen": -2.1511898040771484, "logits/rejected": -1.9930884838104248, "logps/chosen": -233.6335906982422, "logps/rejected": -218.43870544433594, "loss": 0.2441, "rewards/accuracies": 0.875, "rewards/chosen": -1.1958537101745605, "rewards/margins": 3.0618109703063965, "rewards/rejected": -4.257665157318115, "step": 1246 }, { "epoch": 1.63, "learning_rate": 4.7199005385950813e-05, "logits/chosen": -2.366896629333496, "logits/rejected": -2.437415361404419, "logps/chosen": -172.52511596679688, "logps/rejected": -218.31173706054688, "loss": 0.2482, "rewards/accuracies": 0.75, "rewards/chosen": -1.1920199394226074, "rewards/margins": 3.408511161804199, "rewards/rejected": -4.600531101226807, "step": 1247 }, { "epoch": 1.63, "learning_rate": 4.719421273589071e-05, "logits/chosen": -2.3530538082122803, "logits/rejected": -2.39542555809021, "logps/chosen": -214.77020263671875, "logps/rejected": -258.3968811035156, "loss": 0.2237, "rewards/accuracies": 0.875, "rewards/chosen": -0.9770315289497375, "rewards/margins": 3.545395851135254, "rewards/rejected": -4.522427558898926, "step": 1248 }, { "epoch": 1.63, "learning_rate": 4.718941623285028e-05, "logits/chosen": -2.5741071701049805, "logits/rejected": -2.619910478591919, "logps/chosen": -190.63352966308594, "logps/rejected": -251.1927947998047, "loss": 0.135, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2246308326721191, "rewards/margins": 3.43021559715271, "rewards/rejected": -4.654846668243408, "step": 1249 }, { "epoch": 1.64, "learning_rate": 4.718461587766221e-05, "logits/chosen": -2.167393922805786, "logits/rejected": -2.1985065937042236, "logps/chosen": -184.55860900878906, "logps/rejected": -208.03286743164062, "loss": 0.2788, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9492772221565247, "rewards/margins": 3.1372551918029785, "rewards/rejected": -4.086532115936279, "step": 1250 }, { "epoch": 1.64, "learning_rate": 4.717981167115986e-05, "logits/chosen": -2.3610846996307373, "logits/rejected": -2.4981765747070312, "logps/chosen": -181.03359985351562, "logps/rejected": -254.43211364746094, "loss": 0.1609, "rewards/accuracies": 0.875, "rewards/chosen": -1.5473884344100952, "rewards/margins": 3.8110976219177246, "rewards/rejected": -5.358486175537109, "step": 1251 }, { "epoch": 1.64, "learning_rate": 4.717500361417724e-05, "logits/chosen": -2.58233904838562, "logits/rejected": -2.4361298084259033, "logps/chosen": -227.0497589111328, "logps/rejected": -225.55099487304688, "loss": 0.2847, "rewards/accuracies": 0.875, "rewards/chosen": -1.3530316352844238, "rewards/margins": 3.4566879272460938, "rewards/rejected": -4.809719562530518, "step": 1252 }, { "epoch": 1.64, "learning_rate": 4.717019170754905e-05, "logits/chosen": -2.4212703704833984, "logits/rejected": -2.353156805038452, "logps/chosen": -244.638671875, "logps/rejected": -234.03988647460938, "loss": 0.1988, "rewards/accuracies": 1.0, "rewards/chosen": -0.9548088908195496, "rewards/margins": 3.1238605976104736, "rewards/rejected": -4.07866907119751, "step": 1253 }, { "epoch": 1.64, "learning_rate": 4.716537595211065e-05, "logits/chosen": -1.9767769575119019, "logits/rejected": -2.0549771785736084, "logps/chosen": -250.92616271972656, "logps/rejected": -285.457763671875, "loss": 0.1845, "rewards/accuracies": 0.875, "rewards/chosen": -1.187235951423645, "rewards/margins": 3.0077717304229736, "rewards/rejected": -4.195007801055908, "step": 1254 }, { "epoch": 1.64, "learning_rate": 4.716055634869807e-05, "logits/chosen": -2.5653319358825684, "logits/rejected": -2.4723970890045166, "logps/chosen": -192.06153869628906, "logps/rejected": -212.7266387939453, "loss": 0.2448, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9501335024833679, "rewards/margins": 3.1784751415252686, "rewards/rejected": -4.128608703613281, "step": 1255 }, { "epoch": 1.64, "learning_rate": 4.715573289814801e-05, "logits/chosen": -2.546515941619873, "logits/rejected": -2.493617057800293, "logps/chosen": -171.47616577148438, "logps/rejected": -203.3109130859375, "loss": 0.2335, "rewards/accuracies": 0.875, "rewards/chosen": -0.9509350061416626, "rewards/margins": 2.794473648071289, "rewards/rejected": -3.745408535003662, "step": 1256 }, { "epoch": 1.65, "learning_rate": 4.715090560129782e-05, "logits/chosen": -1.9311320781707764, "logits/rejected": -1.997302770614624, "logps/chosen": -173.90911865234375, "logps/rejected": -195.12417602539062, "loss": 0.2128, "rewards/accuracies": 0.9375, "rewards/chosen": -0.22751878201961517, "rewards/margins": 3.4993607997894287, "rewards/rejected": -3.726879835128784, "step": 1257 }, { "epoch": 1.65, "learning_rate": 4.714607445898556e-05, "logits/chosen": -2.3790111541748047, "logits/rejected": -2.4061365127563477, "logps/chosen": -193.8656463623047, "logps/rejected": -248.78428649902344, "loss": 0.1781, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7878875732421875, "rewards/margins": 3.8841519355773926, "rewards/rejected": -4.67203950881958, "step": 1258 }, { "epoch": 1.65, "learning_rate": 4.714123947204991e-05, "logits/chosen": -2.5352847576141357, "logits/rejected": -2.5170440673828125, "logps/chosen": -207.12197875976562, "logps/rejected": -240.71539306640625, "loss": 0.1627, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1929060220718384, "rewards/margins": 3.0479984283447266, "rewards/rejected": -4.240904331207275, "step": 1259 }, { "epoch": 1.65, "learning_rate": 4.713640064133025e-05, "logits/chosen": -2.321063756942749, "logits/rejected": -2.323094367980957, "logps/chosen": -178.62144470214844, "logps/rejected": -220.08468627929688, "loss": 0.1978, "rewards/accuracies": 0.875, "rewards/chosen": -0.8668509721755981, "rewards/margins": 3.2052500247955322, "rewards/rejected": -4.07210111618042, "step": 1260 }, { "epoch": 1.65, "learning_rate": 4.7131557967666604e-05, "logits/chosen": -2.3726282119750977, "logits/rejected": -2.4751479625701904, "logps/chosen": -178.8557891845703, "logps/rejected": -251.33251953125, "loss": 0.1386, "rewards/accuracies": 1.0, "rewards/chosen": -0.6575889587402344, "rewards/margins": 3.7119858264923096, "rewards/rejected": -4.369575023651123, "step": 1261 }, { "epoch": 1.65, "learning_rate": 4.712671145189968e-05, "logits/chosen": -2.479706048965454, "logits/rejected": -2.519768238067627, "logps/chosen": -240.58731079101562, "logps/rejected": -287.4944763183594, "loss": 0.1786, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6783007979393005, "rewards/margins": 3.953369140625, "rewards/rejected": -4.631669998168945, "step": 1262 }, { "epoch": 1.65, "learning_rate": 4.712186109487085e-05, "logits/chosen": -2.2034120559692383, "logits/rejected": -2.2748920917510986, "logps/chosen": -176.4800262451172, "logps/rejected": -242.2554931640625, "loss": 0.0759, "rewards/accuracies": 1.0, "rewards/chosen": -0.833525538444519, "rewards/margins": 4.219998836517334, "rewards/rejected": -5.053524494171143, "step": 1263 }, { "epoch": 1.65, "learning_rate": 4.7117006897422144e-05, "logits/chosen": -2.3310108184814453, "logits/rejected": -2.3973095417022705, "logps/chosen": -159.26260375976562, "logps/rejected": -178.69801330566406, "loss": 0.2582, "rewards/accuracies": 0.75, "rewards/chosen": -0.5862424969673157, "rewards/margins": 3.4215569496154785, "rewards/rejected": -4.00779914855957, "step": 1264 }, { "epoch": 1.66, "learning_rate": 4.7112148860396265e-05, "logits/chosen": -2.289217710494995, "logits/rejected": -2.3254284858703613, "logps/chosen": -198.0190887451172, "logps/rejected": -251.11077880859375, "loss": 0.1246, "rewards/accuracies": 0.875, "rewards/chosen": -1.1426129341125488, "rewards/margins": 4.011805534362793, "rewards/rejected": -5.154417991638184, "step": 1265 }, { "epoch": 1.66, "learning_rate": 4.7107286984636584e-05, "logits/chosen": -2.2878618240356445, "logits/rejected": -2.4422900676727295, "logps/chosen": -184.45816040039062, "logps/rejected": -227.22039794921875, "loss": 0.2988, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1406644582748413, "rewards/margins": 2.195034980773926, "rewards/rejected": -3.3356995582580566, "step": 1266 }, { "epoch": 1.66, "learning_rate": 4.710242127098714e-05, "logits/chosen": -2.411558151245117, "logits/rejected": -2.433676242828369, "logps/chosen": -276.55462646484375, "logps/rejected": -287.45538330078125, "loss": 0.1436, "rewards/accuracies": 0.875, "rewards/chosen": -1.3652961254119873, "rewards/margins": 2.717362403869629, "rewards/rejected": -4.082658290863037, "step": 1267 }, { "epoch": 1.66, "learning_rate": 4.7097551720292634e-05, "logits/chosen": -2.2663257122039795, "logits/rejected": -2.3046867847442627, "logps/chosen": -229.05191040039062, "logps/rejected": -234.79849243164062, "loss": 0.1054, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1178110837936401, "rewards/margins": 3.994016408920288, "rewards/rejected": -5.1118268966674805, "step": 1268 }, { "epoch": 1.66, "learning_rate": 4.709267833339843e-05, "logits/chosen": -2.4231812953948975, "logits/rejected": -2.46999192237854, "logps/chosen": -243.0467529296875, "logps/rejected": -288.1591796875, "loss": 0.0957, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6723252534866333, "rewards/margins": 4.546743869781494, "rewards/rejected": -5.219069480895996, "step": 1269 }, { "epoch": 1.66, "learning_rate": 4.708780111115057e-05, "logits/chosen": -2.2736947536468506, "logits/rejected": -2.318422794342041, "logps/chosen": -240.78549194335938, "logps/rejected": -262.1117858886719, "loss": 0.0696, "rewards/accuracies": 1.0, "rewards/chosen": -1.0388414859771729, "rewards/margins": 3.992526054382324, "rewards/rejected": -5.031367301940918, "step": 1270 }, { "epoch": 1.66, "learning_rate": 4.708292005439575e-05, "logits/chosen": -2.3022494316101074, "logits/rejected": -2.331955671310425, "logps/chosen": -235.0003204345703, "logps/rejected": -286.7228698730469, "loss": 0.0884, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9852278232574463, "rewards/margins": 4.658226013183594, "rewards/rejected": -5.643453598022461, "step": 1271 }, { "epoch": 1.66, "learning_rate": 4.707803516398134e-05, "logits/chosen": -2.302123546600342, "logits/rejected": -2.2593071460723877, "logps/chosen": -219.12557983398438, "logps/rejected": -229.34735107421875, "loss": 0.2727, "rewards/accuracies": 0.75, "rewards/chosen": -1.6084837913513184, "rewards/margins": 2.6438004970550537, "rewards/rejected": -4.252284049987793, "step": 1272 }, { "epoch": 1.67, "learning_rate": 4.707314644075536e-05, "logits/chosen": -2.2804455757141113, "logits/rejected": -2.313739538192749, "logps/chosen": -177.1968994140625, "logps/rejected": -253.0790557861328, "loss": 0.1938, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1521004438400269, "rewards/margins": 3.550403118133545, "rewards/rejected": -4.702503204345703, "step": 1273 }, { "epoch": 1.67, "learning_rate": 4.706825388556652e-05, "logits/chosen": -2.3808209896087646, "logits/rejected": -2.509162187576294, "logps/chosen": -252.71810913085938, "logps/rejected": -296.3233642578125, "loss": 0.1607, "rewards/accuracies": 0.875, "rewards/chosen": -1.4979945421218872, "rewards/margins": 3.630544424057007, "rewards/rejected": -5.128539085388184, "step": 1274 }, { "epoch": 1.67, "learning_rate": 4.706335749926417e-05, "logits/chosen": -2.3603124618530273, "logits/rejected": -2.442885637283325, "logps/chosen": -187.13800048828125, "logps/rejected": -226.97061157226562, "loss": 0.1575, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0760753154754639, "rewards/margins": 3.3448987007141113, "rewards/rejected": -4.420973777770996, "step": 1275 }, { "epoch": 1.67, "learning_rate": 4.7058457282698336e-05, "logits/chosen": -2.178051233291626, "logits/rejected": -2.302097797393799, "logps/chosen": -181.60252380371094, "logps/rejected": -298.5541687011719, "loss": 0.0962, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3809150457382202, "rewards/margins": 3.7876832485198975, "rewards/rejected": -5.168598175048828, "step": 1276 }, { "epoch": 1.67, "learning_rate": 4.7053553236719726e-05, "logits/chosen": -2.2287707328796387, "logits/rejected": -2.3011856079101562, "logps/chosen": -193.0646514892578, "logps/rejected": -230.15853881835938, "loss": 0.1986, "rewards/accuracies": 0.8125, "rewards/chosen": -1.612187385559082, "rewards/margins": 2.7522740364074707, "rewards/rejected": -4.3644609451293945, "step": 1277 }, { "epoch": 1.67, "learning_rate": 4.7048645362179686e-05, "logits/chosen": -2.3724539279937744, "logits/rejected": -2.3160030841827393, "logps/chosen": -199.37181091308594, "logps/rejected": -222.05191040039062, "loss": 0.1415, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6102732419967651, "rewards/margins": 3.988173723220825, "rewards/rejected": -4.598446369171143, "step": 1278 }, { "epoch": 1.67, "learning_rate": 4.7043733659930235e-05, "logits/chosen": -1.9961585998535156, "logits/rejected": -2.158682346343994, "logps/chosen": -148.25836181640625, "logps/rejected": -226.0478515625, "loss": 0.1392, "rewards/accuracies": 0.875, "rewards/chosen": -1.005221962928772, "rewards/margins": 3.8446872234344482, "rewards/rejected": -4.84990930557251, "step": 1279 }, { "epoch": 1.68, "learning_rate": 4.703881813082406e-05, "logits/chosen": -2.171950101852417, "logits/rejected": -2.174027442932129, "logps/chosen": -188.34934997558594, "logps/rejected": -226.32675170898438, "loss": 0.2559, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7046524286270142, "rewards/margins": 3.046712875366211, "rewards/rejected": -4.751364707946777, "step": 1280 }, { "epoch": 1.68, "learning_rate": 4.703389877571451e-05, "logits/chosen": -2.3705697059631348, "logits/rejected": -2.3761351108551025, "logps/chosen": -262.95513916015625, "logps/rejected": -301.3341369628906, "loss": 0.3501, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5746833086013794, "rewards/margins": 3.5883994102478027, "rewards/rejected": -5.163082599639893, "step": 1281 }, { "epoch": 1.68, "learning_rate": 4.7028975595455615e-05, "logits/chosen": -2.2629284858703613, "logits/rejected": -2.3212735652923584, "logps/chosen": -148.93194580078125, "logps/rejected": -209.88656616210938, "loss": 0.2053, "rewards/accuracies": 0.875, "rewards/chosen": -1.2451274394989014, "rewards/margins": 3.617295026779175, "rewards/rejected": -4.862421989440918, "step": 1282 }, { "epoch": 1.68, "learning_rate": 4.7024048590902036e-05, "logits/chosen": -2.1855812072753906, "logits/rejected": -2.266613721847534, "logps/chosen": -208.891845703125, "logps/rejected": -263.1065979003906, "loss": 0.2646, "rewards/accuracies": 0.75, "rewards/chosen": -1.979274868965149, "rewards/margins": 2.8052868843078613, "rewards/rejected": -4.784561634063721, "step": 1283 }, { "epoch": 1.68, "learning_rate": 4.701911776290912e-05, "logits/chosen": -2.373356342315674, "logits/rejected": -2.4201443195343018, "logps/chosen": -213.91403198242188, "logps/rejected": -248.12171936035156, "loss": 0.1366, "rewards/accuracies": 0.9375, "rewards/chosen": -1.634600281715393, "rewards/margins": 3.5006790161132812, "rewards/rejected": -5.135279655456543, "step": 1284 }, { "epoch": 1.68, "learning_rate": 4.7014183112332874e-05, "logits/chosen": -2.0015125274658203, "logits/rejected": -2.0453169345855713, "logps/chosen": -200.25445556640625, "logps/rejected": -244.46153259277344, "loss": 0.1716, "rewards/accuracies": 0.875, "rewards/chosen": -1.2220110893249512, "rewards/margins": 4.0147528648376465, "rewards/rejected": -5.236764430999756, "step": 1285 }, { "epoch": 1.68, "learning_rate": 4.700924464002998e-05, "logits/chosen": -2.3927388191223145, "logits/rejected": -2.5242536067962646, "logps/chosen": -209.62437438964844, "logps/rejected": -284.5310974121094, "loss": 0.1046, "rewards/accuracies": 1.0, "rewards/chosen": -1.037900447845459, "rewards/margins": 4.573543071746826, "rewards/rejected": -5.611443996429443, "step": 1286 }, { "epoch": 1.68, "learning_rate": 4.700430234685774e-05, "logits/chosen": -1.9511386156082153, "logits/rejected": -1.9067704677581787, "logps/chosen": -237.9818115234375, "logps/rejected": -313.5265197753906, "loss": 0.2084, "rewards/accuracies": 0.875, "rewards/chosen": -1.6042085886001587, "rewards/margins": 3.7971808910369873, "rewards/rejected": -5.401390075683594, "step": 1287 }, { "epoch": 1.69, "learning_rate": 4.699935623367419e-05, "logits/chosen": -2.0798680782318115, "logits/rejected": -2.097235679626465, "logps/chosen": -164.66754150390625, "logps/rejected": -223.6951904296875, "loss": 0.1752, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4378409385681152, "rewards/margins": 3.772213935852051, "rewards/rejected": -5.210054874420166, "step": 1288 }, { "epoch": 1.69, "learning_rate": 4.699440630133794e-05, "logits/chosen": -1.9853588342666626, "logits/rejected": -2.1096606254577637, "logps/chosen": -170.85031127929688, "logps/rejected": -239.245361328125, "loss": 0.153, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4343587160110474, "rewards/margins": 4.021684646606445, "rewards/rejected": -5.456043720245361, "step": 1289 }, { "epoch": 1.69, "learning_rate": 4.698945255070837e-05, "logits/chosen": -2.266202449798584, "logits/rejected": -2.3734381198883057, "logps/chosen": -241.38861083984375, "logps/rejected": -287.3223876953125, "loss": 0.1812, "rewards/accuracies": 0.9375, "rewards/chosen": -1.899042010307312, "rewards/margins": 4.466583728790283, "rewards/rejected": -6.365625381469727, "step": 1290 }, { "epoch": 1.69, "learning_rate": 4.698449498264543e-05, "logits/chosen": -2.0678675174713135, "logits/rejected": -2.1580851078033447, "logps/chosen": -177.24830627441406, "logps/rejected": -204.01907348632812, "loss": 0.2167, "rewards/accuracies": 0.875, "rewards/chosen": -1.0937800407409668, "rewards/margins": 3.3943605422973633, "rewards/rejected": -4.48814058303833, "step": 1291 }, { "epoch": 1.69, "learning_rate": 4.697953359800977e-05, "logits/chosen": -2.3612008094787598, "logits/rejected": -2.470273017883301, "logps/chosen": -190.984375, "logps/rejected": -306.0478515625, "loss": 0.0471, "rewards/accuracies": 1.0, "rewards/chosen": -1.2556102275848389, "rewards/margins": 5.612930774688721, "rewards/rejected": -6.868541240692139, "step": 1292 }, { "epoch": 1.69, "learning_rate": 4.697456839766271e-05, "logits/chosen": -2.3923254013061523, "logits/rejected": -2.461078643798828, "logps/chosen": -164.07113647460938, "logps/rejected": -214.48974609375, "loss": 0.1813, "rewards/accuracies": 0.875, "rewards/chosen": -1.171428918838501, "rewards/margins": 2.888441562652588, "rewards/rejected": -4.059870719909668, "step": 1293 }, { "epoch": 1.69, "learning_rate": 4.6969599382466224e-05, "logits/chosen": -1.8895044326782227, "logits/rejected": -1.9640676975250244, "logps/chosen": -289.2847595214844, "logps/rejected": -292.143310546875, "loss": 0.2552, "rewards/accuracies": 0.75, "rewards/chosen": -1.2626152038574219, "rewards/margins": 3.0650792121887207, "rewards/rejected": -4.327694416046143, "step": 1294 }, { "epoch": 1.69, "learning_rate": 4.696462655328294e-05, "logits/chosen": -2.2127761840820312, "logits/rejected": -2.1274161338806152, "logps/chosen": -225.32937622070312, "logps/rejected": -221.54954528808594, "loss": 0.2618, "rewards/accuracies": 0.8125, "rewards/chosen": -1.374513864517212, "rewards/margins": 2.8716228008270264, "rewards/rejected": -4.246136665344238, "step": 1295 }, { "epoch": 1.7, "learning_rate": 4.695964991097616e-05, "logits/chosen": -2.295297861099243, "logits/rejected": -2.401890754699707, "logps/chosen": -183.36146545410156, "logps/rejected": -258.7024230957031, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": -1.309377670288086, "rewards/margins": 4.095472812652588, "rewards/rejected": -5.404850959777832, "step": 1296 }, { "epoch": 1.7, "learning_rate": 4.695466945640985e-05, "logits/chosen": -2.3360753059387207, "logits/rejected": -2.359584093093872, "logps/chosen": -300.7992858886719, "logps/rejected": -348.9504089355469, "loss": 0.1097, "rewards/accuracies": 0.875, "rewards/chosen": -1.1483001708984375, "rewards/margins": 5.027990818023682, "rewards/rejected": -6.176291465759277, "step": 1297 }, { "epoch": 1.7, "learning_rate": 4.694968519044862e-05, "logits/chosen": -2.1906495094299316, "logits/rejected": -2.2294445037841797, "logps/chosen": -199.27716064453125, "logps/rejected": -267.9615173339844, "loss": 0.1167, "rewards/accuracies": 0.875, "rewards/chosen": -0.9273327589035034, "rewards/margins": 4.393675804138184, "rewards/rejected": -5.321008682250977, "step": 1298 }, { "epoch": 1.7, "learning_rate": 4.6944697113957756e-05, "logits/chosen": -2.2078731060028076, "logits/rejected": -2.256173849105835, "logps/chosen": -191.87767028808594, "logps/rejected": -235.9379425048828, "loss": 0.229, "rewards/accuracies": 0.8125, "rewards/chosen": -1.396527886390686, "rewards/margins": 2.8328018188476562, "rewards/rejected": -4.229329586029053, "step": 1299 }, { "epoch": 1.7, "learning_rate": 4.69397052278032e-05, "logits/chosen": -2.1057164669036865, "logits/rejected": -2.2075157165527344, "logps/chosen": -159.22825622558594, "logps/rejected": -249.6995849609375, "loss": 0.2449, "rewards/accuracies": 0.8125, "rewards/chosen": -1.563463568687439, "rewards/margins": 3.6448473930358887, "rewards/rejected": -5.208311080932617, "step": 1300 }, { "epoch": 1.7, "learning_rate": 4.6934709532851576e-05, "logits/chosen": -1.976876974105835, "logits/rejected": -2.1168007850646973, "logps/chosen": -190.38943481445312, "logps/rejected": -240.00660705566406, "loss": 0.1593, "rewards/accuracies": 0.875, "rewards/chosen": -1.6499911546707153, "rewards/margins": 4.014796257019043, "rewards/rejected": -5.664787769317627, "step": 1301 }, { "epoch": 1.7, "learning_rate": 4.692971002997013e-05, "logits/chosen": -2.2851758003234863, "logits/rejected": -2.3062691688537598, "logps/chosen": -213.82559204101562, "logps/rejected": -243.2498779296875, "loss": 0.2007, "rewards/accuracies": 0.875, "rewards/chosen": -1.8377220630645752, "rewards/margins": 3.4279751777648926, "rewards/rejected": -5.265697002410889, "step": 1302 }, { "epoch": 1.71, "learning_rate": 4.69247067200268e-05, "logits/chosen": -2.130112886428833, "logits/rejected": -2.21236515045166, "logps/chosen": -232.77862548828125, "logps/rejected": -242.76791381835938, "loss": 0.1511, "rewards/accuracies": 0.875, "rewards/chosen": -2.000922679901123, "rewards/margins": 3.678673028945923, "rewards/rejected": -5.679595947265625, "step": 1303 }, { "epoch": 1.71, "learning_rate": 4.691969960389017e-05, "logits/chosen": -2.2007036209106445, "logits/rejected": -2.316322088241577, "logps/chosen": -195.4522247314453, "logps/rejected": -319.2470397949219, "loss": 0.0627, "rewards/accuracies": 1.0, "rewards/chosen": -1.5212204456329346, "rewards/margins": 5.476848602294922, "rewards/rejected": -6.998068809509277, "step": 1304 }, { "epoch": 1.71, "learning_rate": 4.6914688682429496e-05, "logits/chosen": -2.1540913581848145, "logits/rejected": -2.2840263843536377, "logps/chosen": -164.23712158203125, "logps/rejected": -267.3701477050781, "loss": 0.154, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7769471406936646, "rewards/margins": 4.149184226989746, "rewards/rejected": -5.926130771636963, "step": 1305 }, { "epoch": 1.71, "learning_rate": 4.690967395651469e-05, "logits/chosen": -2.2483348846435547, "logits/rejected": -2.29646372795105, "logps/chosen": -198.18312072753906, "logps/rejected": -250.00401306152344, "loss": 0.059, "rewards/accuracies": 1.0, "rewards/chosen": -2.107490301132202, "rewards/margins": 4.931715965270996, "rewards/rejected": -7.039206027984619, "step": 1306 }, { "epoch": 1.71, "learning_rate": 4.6904655427016314e-05, "logits/chosen": -2.344970703125, "logits/rejected": -2.3008577823638916, "logps/chosen": -242.4059600830078, "logps/rejected": -261.99755859375, "loss": 0.211, "rewards/accuracies": 0.9375, "rewards/chosen": -1.909515380859375, "rewards/margins": 4.080948829650879, "rewards/rejected": -5.990464687347412, "step": 1307 }, { "epoch": 1.71, "learning_rate": 4.6899633094805604e-05, "logits/chosen": -2.0280845165252686, "logits/rejected": -2.0966038703918457, "logps/chosen": -218.1485595703125, "logps/rejected": -247.9756317138672, "loss": 0.1563, "rewards/accuracies": 0.875, "rewards/chosen": -1.803059697151184, "rewards/margins": 3.7059831619262695, "rewards/rejected": -5.509043216705322, "step": 1308 }, { "epoch": 1.71, "learning_rate": 4.689460696075446e-05, "logits/chosen": -2.481982707977295, "logits/rejected": -2.4297902584075928, "logps/chosen": -226.631103515625, "logps/rejected": -258.3005676269531, "loss": 0.1634, "rewards/accuracies": 0.875, "rewards/chosen": -1.8385543823242188, "rewards/margins": 3.6741104125976562, "rewards/rejected": -5.512665271759033, "step": 1309 }, { "epoch": 1.71, "learning_rate": 4.688957702573542e-05, "logits/chosen": -2.086306095123291, "logits/rejected": -2.0635809898376465, "logps/chosen": -227.90188598632812, "logps/rejected": -282.12567138671875, "loss": 0.2416, "rewards/accuracies": 0.8125, "rewards/chosen": -1.798666000366211, "rewards/margins": 3.5320932865142822, "rewards/rejected": -5.330759048461914, "step": 1310 }, { "epoch": 1.72, "learning_rate": 4.6884543290621706e-05, "logits/chosen": -2.2293214797973633, "logits/rejected": -2.256197452545166, "logps/chosen": -173.7129669189453, "logps/rejected": -233.62586975097656, "loss": 0.4036, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9006210565567017, "rewards/margins": 2.86419677734375, "rewards/rejected": -4.764817714691162, "step": 1311 }, { "epoch": 1.72, "learning_rate": 4.687950575628718e-05, "logits/chosen": -2.299515962600708, "logits/rejected": -2.3766961097717285, "logps/chosen": -179.56732177734375, "logps/rejected": -205.10609436035156, "loss": 0.2016, "rewards/accuracies": 0.875, "rewards/chosen": -1.3757731914520264, "rewards/margins": 2.9517035484313965, "rewards/rejected": -4.327476501464844, "step": 1312 }, { "epoch": 1.72, "learning_rate": 4.6874464423606376e-05, "logits/chosen": -2.057006597518921, "logits/rejected": -2.160207748413086, "logps/chosen": -161.31712341308594, "logps/rejected": -270.31756591796875, "loss": 0.2467, "rewards/accuracies": 0.8125, "rewards/chosen": -1.198062539100647, "rewards/margins": 3.796602725982666, "rewards/rejected": -4.994665145874023, "step": 1313 }, { "epoch": 1.72, "learning_rate": 4.68694192934545e-05, "logits/chosen": -2.2372817993164062, "logits/rejected": -2.2993078231811523, "logps/chosen": -181.70199584960938, "logps/rejected": -230.60086059570312, "loss": 0.1501, "rewards/accuracies": 1.0, "rewards/chosen": -1.5110223293304443, "rewards/margins": 3.5902843475341797, "rewards/rejected": -5.101306438446045, "step": 1314 }, { "epoch": 1.72, "learning_rate": 4.6864370366707366e-05, "logits/chosen": -2.2221570014953613, "logits/rejected": -2.290891170501709, "logps/chosen": -180.98440551757812, "logps/rejected": -241.5121612548828, "loss": 0.208, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7390170097351074, "rewards/margins": 3.494724988937378, "rewards/rejected": -5.2337422370910645, "step": 1315 }, { "epoch": 1.72, "learning_rate": 4.685931764424152e-05, "logits/chosen": -2.3075151443481445, "logits/rejected": -2.3091671466827393, "logps/chosen": -141.8212890625, "logps/rejected": -196.77894592285156, "loss": 0.161, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0228283405303955, "rewards/margins": 3.695592164993286, "rewards/rejected": -4.718420505523682, "step": 1316 }, { "epoch": 1.72, "learning_rate": 4.6854261126934104e-05, "logits/chosen": -2.3127944469451904, "logits/rejected": -2.3393845558166504, "logps/chosen": -210.4659423828125, "logps/rejected": -244.98114013671875, "loss": 0.1695, "rewards/accuracies": 0.875, "rewards/chosen": -1.8345630168914795, "rewards/margins": 3.397213935852051, "rewards/rejected": -5.231777191162109, "step": 1317 }, { "epoch": 1.72, "learning_rate": 4.684920081566295e-05, "logits/chosen": -2.4269180297851562, "logits/rejected": -2.3492565155029297, "logps/chosen": -233.5777130126953, "logps/rejected": -288.9328918457031, "loss": 0.2221, "rewards/accuracies": 0.875, "rewards/chosen": -1.7820842266082764, "rewards/margins": 4.016000270843506, "rewards/rejected": -5.7980852127075195, "step": 1318 }, { "epoch": 1.73, "learning_rate": 4.684413671130655e-05, "logits/chosen": -2.320209503173828, "logits/rejected": -2.377079486846924, "logps/chosen": -207.27413940429688, "logps/rejected": -261.8699645996094, "loss": 0.1259, "rewards/accuracies": 1.0, "rewards/chosen": -1.6501234769821167, "rewards/margins": 4.435462474822998, "rewards/rejected": -6.085585594177246, "step": 1319 }, { "epoch": 1.73, "learning_rate": 4.683906881474405e-05, "logits/chosen": -2.298753499984741, "logits/rejected": -2.3262522220611572, "logps/chosen": -184.51959228515625, "logps/rejected": -243.18218994140625, "loss": 0.0632, "rewards/accuracies": 1.0, "rewards/chosen": -1.8446322679519653, "rewards/margins": 3.7827131748199463, "rewards/rejected": -5.627345085144043, "step": 1320 }, { "epoch": 1.73, "learning_rate": 4.6833997126855236e-05, "logits/chosen": -2.3990066051483154, "logits/rejected": -2.450573444366455, "logps/chosen": -223.92630004882812, "logps/rejected": -278.1924133300781, "loss": 0.2113, "rewards/accuracies": 0.75, "rewards/chosen": -1.5321887731552124, "rewards/margins": 3.711320638656616, "rewards/rejected": -5.243509292602539, "step": 1321 }, { "epoch": 1.73, "learning_rate": 4.682892164852058e-05, "logits/chosen": -2.2027530670166016, "logits/rejected": -2.214686632156372, "logps/chosen": -239.34274291992188, "logps/rejected": -282.7929992675781, "loss": 0.2324, "rewards/accuracies": 0.875, "rewards/chosen": -1.4780960083007812, "rewards/margins": 3.398163318634033, "rewards/rejected": -4.8762593269348145, "step": 1322 }, { "epoch": 1.73, "learning_rate": 4.68238423806212e-05, "logits/chosen": -2.098435640335083, "logits/rejected": -2.160342216491699, "logps/chosen": -175.86923217773438, "logps/rejected": -210.9672393798828, "loss": 0.2131, "rewards/accuracies": 0.875, "rewards/chosen": -1.5760440826416016, "rewards/margins": 2.946066379547119, "rewards/rejected": -4.522110462188721, "step": 1323 }, { "epoch": 1.73, "learning_rate": 4.681875932403885e-05, "logits/chosen": -2.0780820846557617, "logits/rejected": -2.2287166118621826, "logps/chosen": -220.93051147460938, "logps/rejected": -261.7267761230469, "loss": 0.1903, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6689494848251343, "rewards/margins": 3.8627724647521973, "rewards/rejected": -5.531722068786621, "step": 1324 }, { "epoch": 1.73, "learning_rate": 4.6813672479656e-05, "logits/chosen": -2.196786642074585, "logits/rejected": -2.3375864028930664, "logps/chosen": -191.28599548339844, "logps/rejected": -281.8190612792969, "loss": 0.1382, "rewards/accuracies": 0.9375, "rewards/chosen": -1.868288278579712, "rewards/margins": 4.053330898284912, "rewards/rejected": -5.921619415283203, "step": 1325 }, { "epoch": 1.74, "learning_rate": 4.680858184835572e-05, "logits/chosen": -2.0055956840515137, "logits/rejected": -2.090541124343872, "logps/chosen": -181.52813720703125, "logps/rejected": -213.05496215820312, "loss": 0.171, "rewards/accuracies": 0.875, "rewards/chosen": -1.124434232711792, "rewards/margins": 3.510894536972046, "rewards/rejected": -4.635329246520996, "step": 1326 }, { "epoch": 1.74, "learning_rate": 4.6803487431021766e-05, "logits/chosen": -2.1428966522216797, "logits/rejected": -2.2327733039855957, "logps/chosen": -174.168701171875, "logps/rejected": -196.87881469726562, "loss": 0.222, "rewards/accuracies": 0.875, "rewards/chosen": -1.8680024147033691, "rewards/margins": 3.048375129699707, "rewards/rejected": -4.916378021240234, "step": 1327 }, { "epoch": 1.74, "learning_rate": 4.679838922853853e-05, "logits/chosen": -2.3784139156341553, "logits/rejected": -2.481027126312256, "logps/chosen": -230.0206298828125, "logps/rejected": -294.965087890625, "loss": 0.168, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9025079011917114, "rewards/margins": 4.027909278869629, "rewards/rejected": -5.930417537689209, "step": 1328 }, { "epoch": 1.74, "learning_rate": 4.679328724179109e-05, "logits/chosen": -2.269122362136841, "logits/rejected": -2.279000997543335, "logps/chosen": -191.70877075195312, "logps/rejected": -215.52420043945312, "loss": 0.1554, "rewards/accuracies": 0.875, "rewards/chosen": -2.063866138458252, "rewards/margins": 3.1374106407165527, "rewards/rejected": -5.201276779174805, "step": 1329 }, { "epoch": 1.74, "learning_rate": 4.6788181471665155e-05, "logits/chosen": -1.8508931398391724, "logits/rejected": -1.8643546104431152, "logps/chosen": -205.4655303955078, "logps/rejected": -203.88031005859375, "loss": 0.1438, "rewards/accuracies": 0.9375, "rewards/chosen": -1.508162021636963, "rewards/margins": 3.4267380237579346, "rewards/rejected": -4.934900283813477, "step": 1330 }, { "epoch": 1.74, "learning_rate": 4.67830719190471e-05, "logits/chosen": -2.4449105262756348, "logits/rejected": -2.512434720993042, "logps/chosen": -239.13856506347656, "logps/rejected": -255.4873046875, "loss": 0.1123, "rewards/accuracies": 1.0, "rewards/chosen": -1.7588633298873901, "rewards/margins": 4.075693607330322, "rewards/rejected": -5.83455753326416, "step": 1331 }, { "epoch": 1.74, "learning_rate": 4.677795858482398e-05, "logits/chosen": -2.2655084133148193, "logits/rejected": -2.2468154430389404, "logps/chosen": -166.35696411132812, "logps/rejected": -241.24954223632812, "loss": 0.1517, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7497742176055908, "rewards/margins": 3.5795230865478516, "rewards/rejected": -5.3292975425720215, "step": 1332 }, { "epoch": 1.74, "learning_rate": 4.677284146988346e-05, "logits/chosen": -2.377413749694824, "logits/rejected": -2.4802167415618896, "logps/chosen": -257.7947998046875, "logps/rejected": -343.67376708984375, "loss": 0.0402, "rewards/accuracies": 1.0, "rewards/chosen": -2.074228048324585, "rewards/margins": 5.048233985900879, "rewards/rejected": -7.122462272644043, "step": 1333 }, { "epoch": 1.75, "learning_rate": 4.67677205751139e-05, "logits/chosen": -2.3548061847686768, "logits/rejected": -2.4688026905059814, "logps/chosen": -202.11593627929688, "logps/rejected": -286.65057373046875, "loss": 0.1916, "rewards/accuracies": 0.8125, "rewards/chosen": -1.193711519241333, "rewards/margins": 5.112841606140137, "rewards/rejected": -6.306553840637207, "step": 1334 }, { "epoch": 1.75, "learning_rate": 4.67625959014043e-05, "logits/chosen": -2.31046199798584, "logits/rejected": -2.319591760635376, "logps/chosen": -231.41473388671875, "logps/rejected": -231.21859741210938, "loss": 0.1506, "rewards/accuracies": 0.875, "rewards/chosen": -1.5231584310531616, "rewards/margins": 3.534045934677124, "rewards/rejected": -5.057204723358154, "step": 1335 }, { "epoch": 1.75, "learning_rate": 4.675746744964432e-05, "logits/chosen": -2.524745464324951, "logits/rejected": -2.5354037284851074, "logps/chosen": -280.01513671875, "logps/rejected": -341.6401672363281, "loss": 0.1042, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0203945636749268, "rewards/margins": 4.112698554992676, "rewards/rejected": -6.133092880249023, "step": 1336 }, { "epoch": 1.75, "learning_rate": 4.675233522072426e-05, "logits/chosen": -2.4641785621643066, "logits/rejected": -2.4546477794647217, "logps/chosen": -204.9468536376953, "logps/rejected": -250.145263671875, "loss": 0.1739, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4312738180160522, "rewards/margins": 3.9612386226654053, "rewards/rejected": -5.392512798309326, "step": 1337 }, { "epoch": 1.75, "learning_rate": 4.674719921553511e-05, "logits/chosen": -2.3155698776245117, "logits/rejected": -2.3693156242370605, "logps/chosen": -194.86477661132812, "logps/rejected": -220.62965393066406, "loss": 0.1797, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0034408569335938, "rewards/margins": 3.511406898498535, "rewards/rejected": -5.514848709106445, "step": 1338 }, { "epoch": 1.75, "learning_rate": 4.6742059434968476e-05, "logits/chosen": -1.9812606573104858, "logits/rejected": -2.0393075942993164, "logps/chosen": -197.7154541015625, "logps/rejected": -251.43502807617188, "loss": 0.2036, "rewards/accuracies": 0.8125, "rewards/chosen": -1.391548991203308, "rewards/margins": 3.891415596008301, "rewards/rejected": -5.282964706420898, "step": 1339 }, { "epoch": 1.75, "learning_rate": 4.673691587991667e-05, "logits/chosen": -2.2613768577575684, "logits/rejected": -2.2857589721679688, "logps/chosen": -211.1699981689453, "logps/rejected": -271.5691833496094, "loss": 0.3857, "rewards/accuracies": 0.8125, "rewards/chosen": -2.122715950012207, "rewards/margins": 3.556459903717041, "rewards/rejected": -5.679176330566406, "step": 1340 }, { "epoch": 1.75, "learning_rate": 4.673176855127258e-05, "logits/chosen": -2.3039798736572266, "logits/rejected": -2.286431312561035, "logps/chosen": -209.6785430908203, "logps/rejected": -263.2655334472656, "loss": 0.182, "rewards/accuracies": 0.875, "rewards/chosen": -0.8972321152687073, "rewards/margins": 4.346026420593262, "rewards/rejected": -5.243258953094482, "step": 1341 }, { "epoch": 1.76, "learning_rate": 4.672661744992984e-05, "logits/chosen": -2.2901010513305664, "logits/rejected": -2.3650054931640625, "logps/chosen": -195.44326782226562, "logps/rejected": -268.2812805175781, "loss": 0.1694, "rewards/accuracies": 0.8125, "rewards/chosen": -1.497087001800537, "rewards/margins": 4.011203289031982, "rewards/rejected": -5.5082902908325195, "step": 1342 }, { "epoch": 1.76, "learning_rate": 4.672146257678268e-05, "logits/chosen": -2.4022762775421143, "logits/rejected": -2.3658864498138428, "logps/chosen": -255.15890502929688, "logps/rejected": -328.7245788574219, "loss": 0.1034, "rewards/accuracies": 1.0, "rewards/chosen": -1.900804877281189, "rewards/margins": 4.233809471130371, "rewards/rejected": -6.13461446762085, "step": 1343 }, { "epoch": 1.76, "learning_rate": 4.6716303932726004e-05, "logits/chosen": -2.4683516025543213, "logits/rejected": -2.4238386154174805, "logps/chosen": -181.5071563720703, "logps/rejected": -225.39805603027344, "loss": 0.2542, "rewards/accuracies": 0.875, "rewards/chosen": -1.721781849861145, "rewards/margins": 3.4932799339294434, "rewards/rejected": -5.215061664581299, "step": 1344 }, { "epoch": 1.76, "learning_rate": 4.671114151865536e-05, "logits/chosen": -2.38026762008667, "logits/rejected": -2.38236927986145, "logps/chosen": -227.99789428710938, "logps/rejected": -277.05975341796875, "loss": 0.1147, "rewards/accuracies": 0.875, "rewards/chosen": -1.6720448732376099, "rewards/margins": 4.081615447998047, "rewards/rejected": -5.753660202026367, "step": 1345 }, { "epoch": 1.76, "learning_rate": 4.6705975335466966e-05, "logits/chosen": -2.504228115081787, "logits/rejected": -2.5582098960876465, "logps/chosen": -272.26788330078125, "logps/rejected": -299.64056396484375, "loss": 0.144, "rewards/accuracies": 0.9375, "rewards/chosen": -2.06329083442688, "rewards/margins": 3.614590644836426, "rewards/rejected": -5.677881240844727, "step": 1346 }, { "epoch": 1.76, "learning_rate": 4.6700805384057687e-05, "logits/chosen": -2.207796096801758, "logits/rejected": -2.2440619468688965, "logps/chosen": -251.6385040283203, "logps/rejected": -267.8268127441406, "loss": 0.0849, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2752389907836914, "rewards/margins": 3.857135772705078, "rewards/rejected": -5.1323747634887695, "step": 1347 }, { "epoch": 1.76, "learning_rate": 4.669563166532503e-05, "logits/chosen": -2.1402127742767334, "logits/rejected": -2.2065505981445312, "logps/chosen": -188.67706298828125, "logps/rejected": -221.7375946044922, "loss": 0.1975, "rewards/accuracies": 0.875, "rewards/chosen": -1.9871059656143188, "rewards/margins": 2.8594772815704346, "rewards/rejected": -4.846583366394043, "step": 1348 }, { "epoch": 1.77, "learning_rate": 4.669045418016719e-05, "logits/chosen": -2.2698256969451904, "logits/rejected": -2.274237871170044, "logps/chosen": -197.71026611328125, "logps/rejected": -252.61614990234375, "loss": 0.1191, "rewards/accuracies": 1.0, "rewards/chosen": -1.6768132448196411, "rewards/margins": 3.3656845092773438, "rewards/rejected": -5.042497634887695, "step": 1349 }, { "epoch": 1.77, "learning_rate": 4.668527292948298e-05, "logits/chosen": -2.256514072418213, "logits/rejected": -2.34165358543396, "logps/chosen": -176.2838592529297, "logps/rejected": -236.6180877685547, "loss": 0.1925, "rewards/accuracies": 0.875, "rewards/chosen": -1.650253176689148, "rewards/margins": 3.62697172164917, "rewards/rejected": -5.277225017547607, "step": 1350 }, { "epoch": 1.77, "learning_rate": 4.668008791417188e-05, "logits/chosen": -2.31141996383667, "logits/rejected": -2.3515350818634033, "logps/chosen": -238.41036987304688, "logps/rejected": -284.0400390625, "loss": 0.0897, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6227517127990723, "rewards/margins": 4.4259443283081055, "rewards/rejected": -6.048696041107178, "step": 1351 }, { "epoch": 1.77, "learning_rate": 4.6674899135134024e-05, "logits/chosen": -2.0742459297180176, "logits/rejected": -2.1106269359588623, "logps/chosen": -172.02809143066406, "logps/rejected": -193.6383514404297, "loss": 0.237, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9648047089576721, "rewards/margins": 2.872191905975342, "rewards/rejected": -3.836996555328369, "step": 1352 }, { "epoch": 1.77, "learning_rate": 4.666970659327019e-05, "logits/chosen": -1.8485875129699707, "logits/rejected": -1.9281960725784302, "logps/chosen": -219.42877197265625, "logps/rejected": -291.0449523925781, "loss": 0.1646, "rewards/accuracies": 0.875, "rewards/chosen": -1.602189540863037, "rewards/margins": 3.566793441772461, "rewards/rejected": -5.168982982635498, "step": 1353 }, { "epoch": 1.77, "learning_rate": 4.666451028948183e-05, "logits/chosen": -2.3288824558258057, "logits/rejected": -2.427269458770752, "logps/chosen": -238.8002471923828, "logps/rejected": -297.3604431152344, "loss": 0.0734, "rewards/accuracies": 1.0, "rewards/chosen": -1.7097375392913818, "rewards/margins": 4.595492362976074, "rewards/rejected": -6.305230140686035, "step": 1354 }, { "epoch": 1.77, "learning_rate": 4.665931022467105e-05, "logits/chosen": -2.096285104751587, "logits/rejected": -2.1877405643463135, "logps/chosen": -181.7442169189453, "logps/rejected": -265.5242614746094, "loss": 0.1665, "rewards/accuracies": 0.9375, "rewards/chosen": -1.873305320739746, "rewards/margins": 3.2641849517822266, "rewards/rejected": -5.137490272521973, "step": 1355 }, { "epoch": 1.77, "learning_rate": 4.665410639974057e-05, "logits/chosen": -2.1947834491729736, "logits/rejected": -2.359546422958374, "logps/chosen": -203.13490295410156, "logps/rejected": -254.12127685546875, "loss": 0.189, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1950178146362305, "rewards/margins": 4.417459487915039, "rewards/rejected": -5.612477779388428, "step": 1356 }, { "epoch": 1.78, "learning_rate": 4.664889881559381e-05, "logits/chosen": -2.353022336959839, "logits/rejected": -2.4439656734466553, "logps/chosen": -230.41729736328125, "logps/rejected": -242.9822998046875, "loss": 0.0859, "rewards/accuracies": 1.0, "rewards/chosen": -1.2911330461502075, "rewards/margins": 4.007154941558838, "rewards/rejected": -5.298287391662598, "step": 1357 }, { "epoch": 1.78, "learning_rate": 4.66436874731348e-05, "logits/chosen": -2.055748462677002, "logits/rejected": -2.069406509399414, "logps/chosen": -215.91287231445312, "logps/rejected": -224.975341796875, "loss": 0.1346, "rewards/accuracies": 0.9375, "rewards/chosen": -1.132076382637024, "rewards/margins": 3.4015748500823975, "rewards/rejected": -4.533651351928711, "step": 1358 }, { "epoch": 1.78, "learning_rate": 4.663847237326827e-05, "logits/chosen": -2.2017364501953125, "logits/rejected": -2.1791625022888184, "logps/chosen": -195.00912475585938, "logps/rejected": -187.93539428710938, "loss": 0.2966, "rewards/accuracies": 0.75, "rewards/chosen": -1.539417028427124, "rewards/margins": 2.387049674987793, "rewards/rejected": -3.926466703414917, "step": 1359 }, { "epoch": 1.78, "learning_rate": 4.663325351689956e-05, "logits/chosen": -2.2987170219421387, "logits/rejected": -2.302344560623169, "logps/chosen": -186.64981079101562, "logps/rejected": -223.81967163085938, "loss": 0.182, "rewards/accuracies": 0.875, "rewards/chosen": -1.5449962615966797, "rewards/margins": 3.964876413345337, "rewards/rejected": -5.509873390197754, "step": 1360 }, { "epoch": 1.78, "learning_rate": 4.662803090493469e-05, "logits/chosen": -2.1506714820861816, "logits/rejected": -2.18817400932312, "logps/chosen": -257.8224792480469, "logps/rejected": -328.1229248046875, "loss": 0.1701, "rewards/accuracies": 0.875, "rewards/chosen": -2.0192809104919434, "rewards/margins": 3.9228391647338867, "rewards/rejected": -5.94212007522583, "step": 1361 }, { "epoch": 1.78, "learning_rate": 4.6622804538280305e-05, "logits/chosen": -2.2823166847229004, "logits/rejected": -2.34694242477417, "logps/chosen": -195.22393798828125, "logps/rejected": -255.1166534423828, "loss": 0.1154, "rewards/accuracies": 1.0, "rewards/chosen": -2.038851261138916, "rewards/margins": 3.4105467796325684, "rewards/rejected": -5.449398040771484, "step": 1362 }, { "epoch": 1.78, "learning_rate": 4.661757441784373e-05, "logits/chosen": -2.2843942642211914, "logits/rejected": -2.220759630203247, "logps/chosen": -219.33355712890625, "logps/rejected": -290.8352966308594, "loss": 0.0939, "rewards/accuracies": 1.0, "rewards/chosen": -1.6054083108901978, "rewards/margins": 4.078475475311279, "rewards/rejected": -5.6838836669921875, "step": 1363 }, { "epoch": 1.79, "learning_rate": 4.6612340544532935e-05, "logits/chosen": -2.2316360473632812, "logits/rejected": -2.3092989921569824, "logps/chosen": -200.7064971923828, "logps/rejected": -254.74070739746094, "loss": 0.1406, "rewards/accuracies": 0.875, "rewards/chosen": -1.5896083116531372, "rewards/margins": 3.663377285003662, "rewards/rejected": -5.252985954284668, "step": 1364 }, { "epoch": 1.79, "learning_rate": 4.660710291925652e-05, "logits/chosen": -2.365083694458008, "logits/rejected": -2.388625144958496, "logps/chosen": -230.10348510742188, "logps/rejected": -289.7381286621094, "loss": 0.1506, "rewards/accuracies": 0.875, "rewards/chosen": -1.6961638927459717, "rewards/margins": 3.8952834606170654, "rewards/rejected": -5.591447830200195, "step": 1365 }, { "epoch": 1.79, "learning_rate": 4.660186154292375e-05, "logits/chosen": -2.2262802124023438, "logits/rejected": -2.279958486557007, "logps/chosen": -163.28163146972656, "logps/rejected": -202.38223266601562, "loss": 0.2373, "rewards/accuracies": 0.875, "rewards/chosen": -1.6737639904022217, "rewards/margins": 3.0033376216888428, "rewards/rejected": -4.6771016120910645, "step": 1366 }, { "epoch": 1.79, "learning_rate": 4.659661641644456e-05, "logits/chosen": -2.447139263153076, "logits/rejected": -2.4368622303009033, "logps/chosen": -242.7307891845703, "logps/rejected": -283.7666320800781, "loss": 0.1133, "rewards/accuracies": 0.875, "rewards/chosen": -1.5830082893371582, "rewards/margins": 5.0759172439575195, "rewards/rejected": -6.6589250564575195, "step": 1367 }, { "epoch": 1.79, "learning_rate": 4.6591367540729515e-05, "logits/chosen": -2.524048328399658, "logits/rejected": -2.555225133895874, "logps/chosen": -243.4622039794922, "logps/rejected": -252.66217041015625, "loss": 0.1873, "rewards/accuracies": 0.875, "rewards/chosen": -2.028730869293213, "rewards/margins": 3.0651321411132812, "rewards/rejected": -5.093863487243652, "step": 1368 }, { "epoch": 1.79, "learning_rate": 4.658611491668983e-05, "logits/chosen": -2.3555378913879395, "logits/rejected": -2.4342939853668213, "logps/chosen": -194.3604736328125, "logps/rejected": -248.84271240234375, "loss": 0.1389, "rewards/accuracies": 1.0, "rewards/chosen": -1.7463260889053345, "rewards/margins": 3.3878800868988037, "rewards/rejected": -5.1342058181762695, "step": 1369 }, { "epoch": 1.79, "learning_rate": 4.658085854523737e-05, "logits/chosen": -2.1884450912475586, "logits/rejected": -2.365642786026001, "logps/chosen": -185.33209228515625, "logps/rejected": -265.621337890625, "loss": 0.1218, "rewards/accuracies": 0.9375, "rewards/chosen": -2.218601942062378, "rewards/margins": 3.699212074279785, "rewards/rejected": -5.917815208435059, "step": 1370 }, { "epoch": 1.79, "learning_rate": 4.657559842728467e-05, "logits/chosen": -2.103135347366333, "logits/rejected": -2.149179697036743, "logps/chosen": -214.1287841796875, "logps/rejected": -276.39312744140625, "loss": 0.1939, "rewards/accuracies": 0.75, "rewards/chosen": -1.805924654006958, "rewards/margins": 4.1208319664001465, "rewards/rejected": -5.926756381988525, "step": 1371 }, { "epoch": 1.8, "learning_rate": 4.657033456374489e-05, "logits/chosen": -1.8944284915924072, "logits/rejected": -1.9346532821655273, "logps/chosen": -216.1498565673828, "logps/rejected": -308.89453125, "loss": 0.109, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7429003715515137, "rewards/margins": 4.299877643585205, "rewards/rejected": -6.0427775382995605, "step": 1372 }, { "epoch": 1.8, "learning_rate": 4.6565066955531863e-05, "logits/chosen": -1.657898187637329, "logits/rejected": -1.6330841779708862, "logps/chosen": -213.9786834716797, "logps/rejected": -256.35113525390625, "loss": 0.3878, "rewards/accuracies": 0.6875, "rewards/chosen": -2.641648054122925, "rewards/margins": 2.252370834350586, "rewards/rejected": -4.89401912689209, "step": 1373 }, { "epoch": 1.8, "learning_rate": 4.655979560356005e-05, "logits/chosen": -2.202993154525757, "logits/rejected": -2.2177088260650635, "logps/chosen": -236.91598510742188, "logps/rejected": -295.3562927246094, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": -2.0502257347106934, "rewards/margins": 5.525638103485107, "rewards/rejected": -7.575864315032959, "step": 1374 }, { "epoch": 1.8, "learning_rate": 4.655452050874459e-05, "logits/chosen": -2.14239501953125, "logits/rejected": -2.1285078525543213, "logps/chosen": -187.77191162109375, "logps/rejected": -228.86451721191406, "loss": 0.214, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5740537643432617, "rewards/margins": 3.127923011779785, "rewards/rejected": -5.701977252960205, "step": 1375 }, { "epoch": 1.8, "learning_rate": 4.654924167200123e-05, "logits/chosen": -2.3964757919311523, "logits/rejected": -2.476527214050293, "logps/chosen": -214.957763671875, "logps/rejected": -286.00726318359375, "loss": 0.1093, "rewards/accuracies": 0.875, "rewards/chosen": -1.0560764074325562, "rewards/margins": 5.730381965637207, "rewards/rejected": -6.786458969116211, "step": 1376 }, { "epoch": 1.8, "learning_rate": 4.6543959094246417e-05, "logits/chosen": -2.488255739212036, "logits/rejected": -2.6213529109954834, "logps/chosen": -249.02215576171875, "logps/rejected": -291.32379150390625, "loss": 0.1211, "rewards/accuracies": 0.875, "rewards/chosen": -1.5869184732437134, "rewards/margins": 4.215513706207275, "rewards/rejected": -5.802432060241699, "step": 1377 }, { "epoch": 1.8, "learning_rate": 4.653867277639721e-05, "logits/chosen": -2.311802387237549, "logits/rejected": -2.3098201751708984, "logps/chosen": -172.78297424316406, "logps/rejected": -222.96755981445312, "loss": 0.1179, "rewards/accuracies": 1.0, "rewards/chosen": -2.1166205406188965, "rewards/margins": 3.5452730655670166, "rewards/rejected": -5.661893367767334, "step": 1378 }, { "epoch": 1.8, "learning_rate": 4.653338271937132e-05, "logits/chosen": -2.316065788269043, "logits/rejected": -2.3509740829467773, "logps/chosen": -202.41773986816406, "logps/rejected": -270.28985595703125, "loss": 0.1453, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8998184204101562, "rewards/margins": 3.892240524291992, "rewards/rejected": -5.79205846786499, "step": 1379 }, { "epoch": 1.81, "learning_rate": 4.6528088924087134e-05, "logits/chosen": -2.3619024753570557, "logits/rejected": -2.308654308319092, "logps/chosen": -263.7723083496094, "logps/rejected": -302.5215759277344, "loss": 0.2607, "rewards/accuracies": 0.75, "rewards/chosen": -2.3372600078582764, "rewards/margins": 3.7634713649749756, "rewards/rejected": -6.100731372833252, "step": 1380 }, { "epoch": 1.81, "learning_rate": 4.652279139146366e-05, "logits/chosen": -2.067979097366333, "logits/rejected": -2.1202504634857178, "logps/chosen": -210.7864532470703, "logps/rejected": -255.50363159179688, "loss": 0.1086, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8815836906433105, "rewards/margins": 4.555849552154541, "rewards/rejected": -6.437433242797852, "step": 1381 }, { "epoch": 1.81, "learning_rate": 4.651749012242057e-05, "logits/chosen": -2.3216161727905273, "logits/rejected": -2.276050329208374, "logps/chosen": -210.85401916503906, "logps/rejected": -233.73196411132812, "loss": 0.311, "rewards/accuracies": 0.6875, "rewards/chosen": -2.473651885986328, "rewards/margins": 2.5943374633789062, "rewards/rejected": -5.067989349365234, "step": 1382 }, { "epoch": 1.81, "learning_rate": 4.6512185117878184e-05, "logits/chosen": -2.3193976879119873, "logits/rejected": -2.3813366889953613, "logps/chosen": -205.80455017089844, "logps/rejected": -243.294921875, "loss": 0.2155, "rewards/accuracies": 0.8125, "rewards/chosen": -1.865469217300415, "rewards/margins": 4.035120487213135, "rewards/rejected": -5.900589466094971, "step": 1383 }, { "epoch": 1.81, "learning_rate": 4.6506876378757445e-05, "logits/chosen": -2.170536994934082, "logits/rejected": -2.177908182144165, "logps/chosen": -169.406494140625, "logps/rejected": -262.20025634765625, "loss": 0.1339, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5915058851242065, "rewards/margins": 4.764673233032227, "rewards/rejected": -6.356178283691406, "step": 1384 }, { "epoch": 1.81, "learning_rate": 4.650156390598e-05, "logits/chosen": -1.956441879272461, "logits/rejected": -1.9708319902420044, "logps/chosen": -194.17105102539062, "logps/rejected": -242.59666442871094, "loss": 0.2926, "rewards/accuracies": 0.875, "rewards/chosen": -2.0661697387695312, "rewards/margins": 3.1960580348968506, "rewards/rejected": -5.262228012084961, "step": 1385 }, { "epoch": 1.81, "learning_rate": 4.6496247700468064e-05, "logits/chosen": -2.279972553253174, "logits/rejected": -2.320099115371704, "logps/chosen": -180.3889617919922, "logps/rejected": -240.9708709716797, "loss": 0.1561, "rewards/accuracies": 0.875, "rewards/chosen": -1.7037296295166016, "rewards/margins": 3.612116575241089, "rewards/rejected": -5.3158464431762695, "step": 1386 }, { "epoch": 1.82, "learning_rate": 4.6490927763144586e-05, "logits/chosen": -2.3013417720794678, "logits/rejected": -2.2452809810638428, "logps/chosen": -198.49270629882812, "logps/rejected": -250.34814453125, "loss": 0.2868, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7538549900054932, "rewards/margins": 3.3283233642578125, "rewards/rejected": -5.082179069519043, "step": 1387 }, { "epoch": 1.82, "learning_rate": 4.6485604094933113e-05, "logits/chosen": -2.6034908294677734, "logits/rejected": -2.501837730407715, "logps/chosen": -241.82763671875, "logps/rejected": -259.59930419921875, "loss": 0.226, "rewards/accuracies": 0.875, "rewards/chosen": -1.8224003314971924, "rewards/margins": 4.394843101501465, "rewards/rejected": -6.217243671417236, "step": 1388 }, { "epoch": 1.82, "learning_rate": 4.648027669675784e-05, "logits/chosen": -1.8462001085281372, "logits/rejected": -1.890073537826538, "logps/chosen": -166.96856689453125, "logps/rejected": -255.78610229492188, "loss": 0.2828, "rewards/accuracies": 0.75, "rewards/chosen": -1.7682936191558838, "rewards/margins": 3.327087640762329, "rewards/rejected": -5.095381259918213, "step": 1389 }, { "epoch": 1.82, "learning_rate": 4.647494556954363e-05, "logits/chosen": -2.199425458908081, "logits/rejected": -2.260338306427002, "logps/chosen": -194.01919555664062, "logps/rejected": -243.82003784179688, "loss": 0.2729, "rewards/accuracies": 0.75, "rewards/chosen": -2.3266735076904297, "rewards/margins": 3.0939698219299316, "rewards/rejected": -5.420642852783203, "step": 1390 }, { "epoch": 1.82, "learning_rate": 4.646961071421597e-05, "logits/chosen": -2.479255199432373, "logits/rejected": -2.396368980407715, "logps/chosen": -178.45083618164062, "logps/rejected": -193.24459838867188, "loss": 0.1679, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2833914756774902, "rewards/margins": 2.904581308364868, "rewards/rejected": -4.187972545623779, "step": 1391 }, { "epoch": 1.82, "learning_rate": 4.646427213170102e-05, "logits/chosen": -2.217855453491211, "logits/rejected": -2.2055749893188477, "logps/chosen": -197.3143310546875, "logps/rejected": -273.6307067871094, "loss": 0.2277, "rewards/accuracies": 1.0, "rewards/chosen": -0.942773699760437, "rewards/margins": 3.781911849975586, "rewards/rejected": -4.7246856689453125, "step": 1392 }, { "epoch": 1.82, "learning_rate": 4.6458929822925566e-05, "logits/chosen": -2.133974552154541, "logits/rejected": -2.2947840690612793, "logps/chosen": -196.52053833007812, "logps/rejected": -273.97747802734375, "loss": 0.2357, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2982733249664307, "rewards/margins": 3.5180678367614746, "rewards/rejected": -4.816340923309326, "step": 1393 }, { "epoch": 1.82, "learning_rate": 4.645358378881704e-05, "logits/chosen": -2.507310152053833, "logits/rejected": -2.5766844749450684, "logps/chosen": -179.09832763671875, "logps/rejected": -204.78404235839844, "loss": 0.3219, "rewards/accuracies": 0.875, "rewards/chosen": -1.1929552555084229, "rewards/margins": 2.7457828521728516, "rewards/rejected": -3.9387383460998535, "step": 1394 }, { "epoch": 1.83, "learning_rate": 4.644823403030355e-05, "logits/chosen": -2.13411545753479, "logits/rejected": -2.2497849464416504, "logps/chosen": -193.55628967285156, "logps/rejected": -234.6313934326172, "loss": 0.1959, "rewards/accuracies": 0.875, "rewards/chosen": -1.2598943710327148, "rewards/margins": 3.595004081726074, "rewards/rejected": -4.854898452758789, "step": 1395 }, { "epoch": 1.83, "learning_rate": 4.644288054831381e-05, "logits/chosen": -2.157371997833252, "logits/rejected": -2.2184062004089355, "logps/chosen": -171.0150146484375, "logps/rejected": -226.55062866210938, "loss": 0.1302, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0181159973144531, "rewards/margins": 3.4827423095703125, "rewards/rejected": -4.500858306884766, "step": 1396 }, { "epoch": 1.83, "learning_rate": 4.643752334377721e-05, "logits/chosen": -2.3214855194091797, "logits/rejected": -2.367702007293701, "logps/chosen": -180.56222534179688, "logps/rejected": -226.59286499023438, "loss": 0.1739, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2419052124023438, "rewards/margins": 3.2632222175598145, "rewards/rejected": -4.505127429962158, "step": 1397 }, { "epoch": 1.83, "learning_rate": 4.643216241762377e-05, "logits/chosen": -2.4462032318115234, "logits/rejected": -2.4511361122131348, "logps/chosen": -193.83444213867188, "logps/rejected": -230.96469116210938, "loss": 0.2973, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9721289873123169, "rewards/margins": 2.934811592102051, "rewards/rejected": -3.9069406986236572, "step": 1398 }, { "epoch": 1.83, "learning_rate": 4.642679777078417e-05, "logits/chosen": -2.4158241748809814, "logits/rejected": -2.453239679336548, "logps/chosen": -198.2194061279297, "logps/rejected": -246.531005859375, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": -0.5831554532051086, "rewards/margins": 4.099462985992432, "rewards/rejected": -4.682618141174316, "step": 1399 }, { "epoch": 1.83, "learning_rate": 4.642142940418973e-05, "logits/chosen": -2.476083517074585, "logits/rejected": -2.496399402618408, "logps/chosen": -254.40286254882812, "logps/rejected": -267.8065490722656, "loss": 0.1294, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5694355964660645, "rewards/margins": 4.015815734863281, "rewards/rejected": -4.585251808166504, "step": 1400 }, { "epoch": 1.83, "learning_rate": 4.641605731877241e-05, "logits/chosen": -2.420027256011963, "logits/rejected": -2.445709466934204, "logps/chosen": -193.45823669433594, "logps/rejected": -229.07589721679688, "loss": 0.1181, "rewards/accuracies": 1.0, "rewards/chosen": -0.91693115234375, "rewards/margins": 3.285738706588745, "rewards/rejected": -4.202670097351074, "step": 1401 }, { "epoch": 1.83, "learning_rate": 4.6410681515464815e-05, "logits/chosen": -2.453171491622925, "logits/rejected": -2.4817469120025635, "logps/chosen": -167.9488525390625, "logps/rejected": -211.94888305664062, "loss": 0.2742, "rewards/accuracies": 0.75, "rewards/chosen": -0.7758989334106445, "rewards/margins": 2.3340702056884766, "rewards/rejected": -3.109968900680542, "step": 1402 }, { "epoch": 1.84, "learning_rate": 4.640530199520021e-05, "logits/chosen": -2.478593587875366, "logits/rejected": -2.4444589614868164, "logps/chosen": -274.9038391113281, "logps/rejected": -317.6105041503906, "loss": 0.1598, "rewards/accuracies": 0.875, "rewards/chosen": -1.0752410888671875, "rewards/margins": 3.3734335899353027, "rewards/rejected": -4.44867467880249, "step": 1403 }, { "epoch": 1.84, "learning_rate": 4.639991875891248e-05, "logits/chosen": -1.9536941051483154, "logits/rejected": -2.0602893829345703, "logps/chosen": -164.46133422851562, "logps/rejected": -239.60308837890625, "loss": 0.1324, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1708040237426758, "rewards/margins": 3.239039659500122, "rewards/rejected": -4.409843444824219, "step": 1404 }, { "epoch": 1.84, "learning_rate": 4.639453180753619e-05, "logits/chosen": -2.305426597595215, "logits/rejected": -2.310289144515991, "logps/chosen": -179.86114501953125, "logps/rejected": -254.47314453125, "loss": 0.1235, "rewards/accuracies": 0.875, "rewards/chosen": -0.4751981496810913, "rewards/margins": 4.430361270904541, "rewards/rejected": -4.905559539794922, "step": 1405 }, { "epoch": 1.84, "learning_rate": 4.638914114200652e-05, "logits/chosen": -1.8774938583374023, "logits/rejected": -1.9185031652450562, "logps/chosen": -173.3539276123047, "logps/rejected": -220.61851501464844, "loss": 0.2646, "rewards/accuracies": 0.875, "rewards/chosen": -1.2160310745239258, "rewards/margins": 2.826627016067505, "rewards/rejected": -4.042657852172852, "step": 1406 }, { "epoch": 1.84, "learning_rate": 4.638374676325931e-05, "logits/chosen": -2.312690496444702, "logits/rejected": -2.472609519958496, "logps/chosen": -178.3687744140625, "logps/rejected": -245.68804931640625, "loss": 0.1414, "rewards/accuracies": 0.875, "rewards/chosen": -1.2376123666763306, "rewards/margins": 3.6323118209838867, "rewards/rejected": -4.8699235916137695, "step": 1407 }, { "epoch": 1.84, "learning_rate": 4.637834867223102e-05, "logits/chosen": -2.4253156185150146, "logits/rejected": -2.5353803634643555, "logps/chosen": -192.8672332763672, "logps/rejected": -227.572265625, "loss": 0.1833, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8559720516204834, "rewards/margins": 3.2029502391815186, "rewards/rejected": -4.05892276763916, "step": 1408 }, { "epoch": 1.84, "learning_rate": 4.63729468698588e-05, "logits/chosen": -2.4336678981781006, "logits/rejected": -2.5701303482055664, "logps/chosen": -169.47442626953125, "logps/rejected": -245.16151428222656, "loss": 0.1092, "rewards/accuracies": 1.0, "rewards/chosen": -0.9117591381072998, "rewards/margins": 3.5345542430877686, "rewards/rejected": -4.446313381195068, "step": 1409 }, { "epoch": 1.85, "learning_rate": 4.636754135708041e-05, "logits/chosen": -2.405302047729492, "logits/rejected": -2.4277071952819824, "logps/chosen": -194.17935180664062, "logps/rejected": -215.50399780273438, "loss": 0.2359, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0027927160263062, "rewards/margins": 3.0121240615844727, "rewards/rejected": -4.014916896820068, "step": 1410 }, { "epoch": 1.85, "learning_rate": 4.636213213483427e-05, "logits/chosen": -2.4292078018188477, "logits/rejected": -2.4207870960235596, "logps/chosen": -178.75831604003906, "logps/rejected": -193.9530029296875, "loss": 0.2236, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8789994716644287, "rewards/margins": 2.7491774559020996, "rewards/rejected": -3.62817645072937, "step": 1411 }, { "epoch": 1.85, "learning_rate": 4.635671920405942e-05, "logits/chosen": -2.354250192642212, "logits/rejected": -2.4390838146209717, "logps/chosen": -170.90542602539062, "logps/rejected": -211.04017639160156, "loss": 0.1892, "rewards/accuracies": 0.875, "rewards/chosen": -0.9057228565216064, "rewards/margins": 2.9422061443328857, "rewards/rejected": -3.847928762435913, "step": 1412 }, { "epoch": 1.85, "learning_rate": 4.635130256569558e-05, "logits/chosen": -2.297285795211792, "logits/rejected": -2.392590045928955, "logps/chosen": -168.90338134765625, "logps/rejected": -240.53675842285156, "loss": 0.1883, "rewards/accuracies": 0.875, "rewards/chosen": -1.4901635646820068, "rewards/margins": 3.915353775024414, "rewards/rejected": -5.405517578125, "step": 1413 }, { "epoch": 1.85, "learning_rate": 4.634588222068307e-05, "logits/chosen": -2.1840977668762207, "logits/rejected": -2.1191656589508057, "logps/chosen": -253.16091918945312, "logps/rejected": -288.4150695800781, "loss": 0.2244, "rewards/accuracies": 0.8125, "rewards/chosen": -0.961954653263092, "rewards/margins": 3.244297504425049, "rewards/rejected": -4.206252098083496, "step": 1414 }, { "epoch": 1.85, "learning_rate": 4.63404581699629e-05, "logits/chosen": -2.4387269020080566, "logits/rejected": -2.4661765098571777, "logps/chosen": -205.93373107910156, "logps/rejected": -220.48907470703125, "loss": 0.3181, "rewards/accuracies": 0.75, "rewards/chosen": -1.2564948797225952, "rewards/margins": 2.7682459354400635, "rewards/rejected": -4.024740695953369, "step": 1415 }, { "epoch": 1.85, "learning_rate": 4.633503041447669e-05, "logits/chosen": -2.3245558738708496, "logits/rejected": -2.4340667724609375, "logps/chosen": -189.07936096191406, "logps/rejected": -247.6138916015625, "loss": 0.1197, "rewards/accuracies": 0.875, "rewards/chosen": -0.9286269545555115, "rewards/margins": 4.146144390106201, "rewards/rejected": -5.074770927429199, "step": 1416 }, { "epoch": 1.85, "learning_rate": 4.632959895516672e-05, "logits/chosen": -2.2829172611236572, "logits/rejected": -2.447025775909424, "logps/chosen": -224.01718139648438, "logps/rejected": -285.6311950683594, "loss": 0.1772, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3857183456420898, "rewards/margins": 3.0930581092834473, "rewards/rejected": -4.478775978088379, "step": 1417 }, { "epoch": 1.86, "learning_rate": 4.6324163792975906e-05, "logits/chosen": -2.3150711059570312, "logits/rejected": -2.3638498783111572, "logps/chosen": -239.34747314453125, "logps/rejected": -272.7017822265625, "loss": 0.124, "rewards/accuracies": 0.9375, "rewards/chosen": -1.116297721862793, "rewards/margins": 4.121063232421875, "rewards/rejected": -5.237360954284668, "step": 1418 }, { "epoch": 1.86, "learning_rate": 4.63187249288478e-05, "logits/chosen": -2.2839043140411377, "logits/rejected": -2.2470085620880127, "logps/chosen": -180.84278869628906, "logps/rejected": -192.83535766601562, "loss": 0.2197, "rewards/accuracies": 0.875, "rewards/chosen": -1.1662708520889282, "rewards/margins": 2.785388946533203, "rewards/rejected": -3.951659917831421, "step": 1419 }, { "epoch": 1.86, "learning_rate": 4.631328236372662e-05, "logits/chosen": -2.356921672821045, "logits/rejected": -2.308830738067627, "logps/chosen": -184.10110473632812, "logps/rejected": -209.7080078125, "loss": 0.2305, "rewards/accuracies": 0.8125, "rewards/chosen": -1.795234203338623, "rewards/margins": 2.5254967212677, "rewards/rejected": -4.320731163024902, "step": 1420 }, { "epoch": 1.86, "learning_rate": 4.630783609855719e-05, "logits/chosen": -1.8619661331176758, "logits/rejected": -1.8003392219543457, "logps/chosen": -173.55296325683594, "logps/rejected": -202.5664520263672, "loss": 0.2976, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6667727828025818, "rewards/margins": 3.061650037765503, "rewards/rejected": -3.7284226417541504, "step": 1421 }, { "epoch": 1.86, "learning_rate": 4.6302386134285026e-05, "logits/chosen": -2.434025287628174, "logits/rejected": -2.4339070320129395, "logps/chosen": -221.09487915039062, "logps/rejected": -241.01637268066406, "loss": 0.2679, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9217256307601929, "rewards/margins": 2.6306025981903076, "rewards/rejected": -4.552328109741211, "step": 1422 }, { "epoch": 1.86, "learning_rate": 4.629693247185624e-05, "logits/chosen": -2.406184673309326, "logits/rejected": -2.3965976238250732, "logps/chosen": -233.80032348632812, "logps/rejected": -279.02288818359375, "loss": 0.2578, "rewards/accuracies": 0.75, "rewards/chosen": -1.3796286582946777, "rewards/margins": 3.7771825790405273, "rewards/rejected": -5.156811237335205, "step": 1423 }, { "epoch": 1.86, "learning_rate": 4.629147511221759e-05, "logits/chosen": -2.2653684616088867, "logits/rejected": -2.3729100227355957, "logps/chosen": -148.75238037109375, "logps/rejected": -179.93528747558594, "loss": 0.1934, "rewards/accuracies": 1.0, "rewards/chosen": -1.1495842933654785, "rewards/margins": 2.708228349685669, "rewards/rejected": -3.8578124046325684, "step": 1424 }, { "epoch": 1.86, "learning_rate": 4.628601405631652e-05, "logits/chosen": -2.2800092697143555, "logits/rejected": -2.31841778755188, "logps/chosen": -181.82470703125, "logps/rejected": -225.3800506591797, "loss": 0.1134, "rewards/accuracies": 1.0, "rewards/chosen": -1.0458605289459229, "rewards/margins": 3.472771406173706, "rewards/rejected": -4.518631458282471, "step": 1425 }, { "epoch": 1.87, "learning_rate": 4.6280549305101063e-05, "logits/chosen": -2.3783209323883057, "logits/rejected": -2.4350523948669434, "logps/chosen": -259.91650390625, "logps/rejected": -312.69049072265625, "loss": 0.1717, "rewards/accuracies": 1.0, "rewards/chosen": -1.517119288444519, "rewards/margins": 3.184443950653076, "rewards/rejected": -4.701562881469727, "step": 1426 }, { "epoch": 1.87, "learning_rate": 4.6275080859519926e-05, "logits/chosen": -2.379199981689453, "logits/rejected": -2.4917795658111572, "logps/chosen": -213.16268920898438, "logps/rejected": -263.35296630859375, "loss": 0.1122, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0016521215438843, "rewards/margins": 4.081844806671143, "rewards/rejected": -5.08349609375, "step": 1427 }, { "epoch": 1.87, "learning_rate": 4.626960872052245e-05, "logits/chosen": -2.497894525527954, "logits/rejected": -2.427412509918213, "logps/chosen": -236.83901977539062, "logps/rejected": -263.41766357421875, "loss": 0.1137, "rewards/accuracies": 1.0, "rewards/chosen": -1.0873581171035767, "rewards/margins": 3.7583518028259277, "rewards/rejected": -4.845709323883057, "step": 1428 }, { "epoch": 1.87, "learning_rate": 4.62641328890586e-05, "logits/chosen": -1.8372935056686401, "logits/rejected": -1.892336130142212, "logps/chosen": -197.54855346679688, "logps/rejected": -207.12667846679688, "loss": 0.2948, "rewards/accuracies": 0.75, "rewards/chosen": -1.3495253324508667, "rewards/margins": 2.6124796867370605, "rewards/rejected": -3.9620046615600586, "step": 1429 }, { "epoch": 1.87, "learning_rate": 4.625865336607901e-05, "logits/chosen": -2.2670607566833496, "logits/rejected": -2.344111680984497, "logps/chosen": -214.10784912109375, "logps/rejected": -266.7064208984375, "loss": 0.1006, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1706020832061768, "rewards/margins": 4.015186786651611, "rewards/rejected": -5.185789108276367, "step": 1430 }, { "epoch": 1.87, "learning_rate": 4.625317015253493e-05, "logits/chosen": -2.4320297241210938, "logits/rejected": -2.542137622833252, "logps/chosen": -179.33941650390625, "logps/rejected": -225.77798461914062, "loss": 0.1293, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0320031642913818, "rewards/margins": 3.152787685394287, "rewards/rejected": -4.184791088104248, "step": 1431 }, { "epoch": 1.87, "learning_rate": 4.624768324937827e-05, "logits/chosen": -1.8749370574951172, "logits/rejected": -1.8897876739501953, "logps/chosen": -208.49624633789062, "logps/rejected": -241.69107055664062, "loss": 0.258, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2621411085128784, "rewards/margins": 2.4016261100769043, "rewards/rejected": -3.663767099380493, "step": 1432 }, { "epoch": 1.88, "learning_rate": 4.624219265756158e-05, "logits/chosen": -2.201465606689453, "logits/rejected": -2.145068883895874, "logps/chosen": -205.04013061523438, "logps/rejected": -254.39373779296875, "loss": 0.113, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8891902565956116, "rewards/margins": 4.0929059982299805, "rewards/rejected": -4.9820966720581055, "step": 1433 }, { "epoch": 1.88, "learning_rate": 4.6236698378038026e-05, "logits/chosen": -2.4089853763580322, "logits/rejected": -2.4648375511169434, "logps/chosen": -199.67010498046875, "logps/rejected": -249.1591796875, "loss": 0.1373, "rewards/accuracies": 0.875, "rewards/chosen": -1.287391185760498, "rewards/margins": 4.044575214385986, "rewards/rejected": -5.331966400146484, "step": 1434 }, { "epoch": 1.88, "learning_rate": 4.6231200411761444e-05, "logits/chosen": -2.464130401611328, "logits/rejected": -2.5502843856811523, "logps/chosen": -243.13787841796875, "logps/rejected": -283.830322265625, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": -1.265555500984192, "rewards/margins": 5.3822526931762695, "rewards/rejected": -6.647809028625488, "step": 1435 }, { "epoch": 1.88, "learning_rate": 4.622569875968629e-05, "logits/chosen": -2.321964979171753, "logits/rejected": -2.423978805541992, "logps/chosen": -205.4765625, "logps/rejected": -242.30911254882812, "loss": 0.1069, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7557665705680847, "rewards/margins": 4.173945903778076, "rewards/rejected": -4.929712772369385, "step": 1436 }, { "epoch": 1.88, "learning_rate": 4.6220193422767665e-05, "logits/chosen": -2.3185129165649414, "logits/rejected": -2.246967077255249, "logps/chosen": -190.0450439453125, "logps/rejected": -195.37098693847656, "loss": 0.1683, "rewards/accuracies": 1.0, "rewards/chosen": -1.310142159461975, "rewards/margins": 3.645517587661743, "rewards/rejected": -4.955659866333008, "step": 1437 }, { "epoch": 1.88, "learning_rate": 4.6214684401961314e-05, "logits/chosen": -2.327990770339966, "logits/rejected": -2.4045562744140625, "logps/chosen": -195.2279052734375, "logps/rejected": -307.27642822265625, "loss": 0.1309, "rewards/accuracies": 0.9375, "rewards/chosen": -1.452347993850708, "rewards/margins": 4.286505222320557, "rewards/rejected": -5.738852500915527, "step": 1438 }, { "epoch": 1.88, "learning_rate": 4.620917169822363e-05, "logits/chosen": -2.33237361907959, "logits/rejected": -2.329893112182617, "logps/chosen": -197.7900390625, "logps/rejected": -215.33221435546875, "loss": 0.3258, "rewards/accuracies": 0.875, "rewards/chosen": -1.448103666305542, "rewards/margins": 3.1846346855163574, "rewards/rejected": -4.63273811340332, "step": 1439 }, { "epoch": 1.88, "learning_rate": 4.6203655312511616e-05, "logits/chosen": -2.4939942359924316, "logits/rejected": -2.5082345008850098, "logps/chosen": -257.9974060058594, "logps/rejected": -305.877197265625, "loss": 0.2161, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5948214530944824, "rewards/margins": 3.7099242210388184, "rewards/rejected": -5.304745197296143, "step": 1440 }, { "epoch": 1.89, "learning_rate": 4.619813524578295e-05, "logits/chosen": -2.354602813720703, "logits/rejected": -2.3939921855926514, "logps/chosen": -215.87257385253906, "logps/rejected": -281.49603271484375, "loss": 0.0665, "rewards/accuracies": 0.9375, "rewards/chosen": -1.500991940498352, "rewards/margins": 4.147805690765381, "rewards/rejected": -5.648797512054443, "step": 1441 }, { "epoch": 1.89, "learning_rate": 4.619261149899592e-05, "logits/chosen": -2.2306058406829834, "logits/rejected": -2.329472780227661, "logps/chosen": -195.59654235839844, "logps/rejected": -269.08892822265625, "loss": 0.1455, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8193372488021851, "rewards/margins": 4.419328212738037, "rewards/rejected": -5.23866605758667, "step": 1442 }, { "epoch": 1.89, "learning_rate": 4.618708407310947e-05, "logits/chosen": -2.4757118225097656, "logits/rejected": -2.430291175842285, "logps/chosen": -287.3864440917969, "logps/rejected": -302.011962890625, "loss": 0.1739, "rewards/accuracies": 0.875, "rewards/chosen": -1.6464817523956299, "rewards/margins": 3.5614006519317627, "rewards/rejected": -5.207882881164551, "step": 1443 }, { "epoch": 1.89, "learning_rate": 4.6181552969083165e-05, "logits/chosen": -2.157444715499878, "logits/rejected": -2.1985301971435547, "logps/chosen": -194.09764099121094, "logps/rejected": -267.64715576171875, "loss": 0.1754, "rewards/accuracies": 0.875, "rewards/chosen": -1.1710000038146973, "rewards/margins": 3.7776384353637695, "rewards/rejected": -4.948638439178467, "step": 1444 }, { "epoch": 1.89, "learning_rate": 4.617601818787724e-05, "logits/chosen": -1.9728646278381348, "logits/rejected": -2.009904623031616, "logps/chosen": -160.30093383789062, "logps/rejected": -225.08453369140625, "loss": 0.1576, "rewards/accuracies": 0.875, "rewards/chosen": -1.134428858757019, "rewards/margins": 4.066022872924805, "rewards/rejected": -5.200451374053955, "step": 1445 }, { "epoch": 1.89, "learning_rate": 4.617047973045254e-05, "logits/chosen": -2.374601125717163, "logits/rejected": -2.431764841079712, "logps/chosen": -255.8111114501953, "logps/rejected": -308.56298828125, "loss": 0.0817, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9789934158325195, "rewards/margins": 5.199550628662109, "rewards/rejected": -6.178544044494629, "step": 1446 }, { "epoch": 1.89, "learning_rate": 4.6164937597770555e-05, "logits/chosen": -2.4467074871063232, "logits/rejected": -2.5327441692352295, "logps/chosen": -227.62490844726562, "logps/rejected": -285.66241455078125, "loss": 0.0685, "rewards/accuracies": 1.0, "rewards/chosen": -1.1844345331192017, "rewards/margins": 4.270139217376709, "rewards/rejected": -5.454573631286621, "step": 1447 }, { "epoch": 1.89, "learning_rate": 4.615939179079342e-05, "logits/chosen": -1.8311939239501953, "logits/rejected": -1.9591543674468994, "logps/chosen": -181.0715789794922, "logps/rejected": -255.28375244140625, "loss": 0.2963, "rewards/accuracies": 0.875, "rewards/chosen": -1.9211238622665405, "rewards/margins": 4.1576642990112305, "rewards/rejected": -6.078787803649902, "step": 1448 }, { "epoch": 1.9, "learning_rate": 4.615384231048391e-05, "logits/chosen": -2.24050235748291, "logits/rejected": -2.2615864276885986, "logps/chosen": -227.12237548828125, "logps/rejected": -235.6379852294922, "loss": 0.2308, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8242486715316772, "rewards/margins": 3.2859408855438232, "rewards/rejected": -5.110189437866211, "step": 1449 }, { "epoch": 1.9, "learning_rate": 4.6148289157805406e-05, "logits/chosen": -2.395170211791992, "logits/rejected": -2.547234535217285, "logps/chosen": -170.02056884765625, "logps/rejected": -283.0645751953125, "loss": 0.0816, "rewards/accuracies": 1.0, "rewards/chosen": -1.996381402015686, "rewards/margins": 3.851898670196533, "rewards/rejected": -5.848280429840088, "step": 1450 }, { "epoch": 1.9, "learning_rate": 4.614273233372198e-05, "logits/chosen": -2.4565505981445312, "logits/rejected": -2.4085004329681396, "logps/chosen": -275.6915283203125, "logps/rejected": -321.6641845703125, "loss": 0.219, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4337880611419678, "rewards/margins": 4.953248023986816, "rewards/rejected": -6.387036323547363, "step": 1451 }, { "epoch": 1.9, "learning_rate": 4.61371718391983e-05, "logits/chosen": -2.3917980194091797, "logits/rejected": -2.414000988006592, "logps/chosen": -201.67922973632812, "logps/rejected": -291.4822998046875, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -1.536219596862793, "rewards/margins": 5.365948677062988, "rewards/rejected": -6.902168273925781, "step": 1452 }, { "epoch": 1.9, "learning_rate": 4.6131607675199686e-05, "logits/chosen": -2.4047164916992188, "logits/rejected": -2.3522493839263916, "logps/chosen": -228.041748046875, "logps/rejected": -243.53179931640625, "loss": 0.1546, "rewards/accuracies": 0.9375, "rewards/chosen": -2.3639187812805176, "rewards/margins": 3.640273332595825, "rewards/rejected": -6.004191875457764, "step": 1453 }, { "epoch": 1.9, "learning_rate": 4.612603984269208e-05, "logits/chosen": -2.2534050941467285, "logits/rejected": -2.179338216781616, "logps/chosen": -238.51016235351562, "logps/rejected": -278.68157958984375, "loss": 0.2575, "rewards/accuracies": 0.875, "rewards/chosen": -2.542654037475586, "rewards/margins": 4.30103874206543, "rewards/rejected": -6.843692779541016, "step": 1454 }, { "epoch": 1.9, "learning_rate": 4.61204683426421e-05, "logits/chosen": -2.132852077484131, "logits/rejected": -2.1885266304016113, "logps/chosen": -198.23362731933594, "logps/rejected": -253.08970642089844, "loss": 0.2702, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3060522079467773, "rewards/margins": 3.5543110370635986, "rewards/rejected": -5.860363006591797, "step": 1455 }, { "epoch": 1.91, "learning_rate": 4.611489317601696e-05, "logits/chosen": -2.1477696895599365, "logits/rejected": -2.235487937927246, "logps/chosen": -261.10101318359375, "logps/rejected": -321.97259521484375, "loss": 0.0589, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0646214485168457, "rewards/margins": 4.98212194442749, "rewards/rejected": -7.046743392944336, "step": 1456 }, { "epoch": 1.91, "learning_rate": 4.6109314343784524e-05, "logits/chosen": -2.1931657791137695, "logits/rejected": -2.2348809242248535, "logps/chosen": -168.6276397705078, "logps/rejected": -263.44561767578125, "loss": 0.1359, "rewards/accuracies": 0.9375, "rewards/chosen": -1.65928053855896, "rewards/margins": 3.9209232330322266, "rewards/rejected": -5.580203533172607, "step": 1457 }, { "epoch": 1.91, "learning_rate": 4.61037318469133e-05, "logits/chosen": -2.286311149597168, "logits/rejected": -2.3383705615997314, "logps/chosen": -178.47718811035156, "logps/rejected": -235.9649200439453, "loss": 0.2152, "rewards/accuracies": 0.875, "rewards/chosen": -1.8744279146194458, "rewards/margins": 3.764946937561035, "rewards/rejected": -5.63937520980835, "step": 1458 }, { "epoch": 1.91, "learning_rate": 4.6098145686372415e-05, "logits/chosen": -1.8387361764907837, "logits/rejected": -1.8523186445236206, "logps/chosen": -215.5361328125, "logps/rejected": -265.8430480957031, "loss": 0.1596, "rewards/accuracies": 0.9375, "rewards/chosen": -2.201512575149536, "rewards/margins": 3.9642109870910645, "rewards/rejected": -6.16572380065918, "step": 1459 }, { "epoch": 1.91, "learning_rate": 4.609255586313166e-05, "logits/chosen": -2.1031899452209473, "logits/rejected": -2.1518192291259766, "logps/chosen": -231.7281494140625, "logps/rejected": -219.5949249267578, "loss": 0.1425, "rewards/accuracies": 1.0, "rewards/chosen": -2.150951385498047, "rewards/margins": 3.822639226913452, "rewards/rejected": -5.973590850830078, "step": 1460 }, { "epoch": 1.91, "learning_rate": 4.608696237816143e-05, "logits/chosen": -2.3749542236328125, "logits/rejected": -2.404801845550537, "logps/chosen": -190.76507568359375, "logps/rejected": -212.51536560058594, "loss": 0.2917, "rewards/accuracies": 0.875, "rewards/chosen": -2.243725061416626, "rewards/margins": 3.3376617431640625, "rewards/rejected": -5.581386566162109, "step": 1461 }, { "epoch": 1.91, "learning_rate": 4.6081365232432766e-05, "logits/chosen": -2.2417476177215576, "logits/rejected": -2.29201602935791, "logps/chosen": -309.34808349609375, "logps/rejected": -365.32110595703125, "loss": 0.1465, "rewards/accuracies": 0.875, "rewards/chosen": -3.222254753112793, "rewards/margins": 4.027994155883789, "rewards/rejected": -7.250248908996582, "step": 1462 }, { "epoch": 1.91, "learning_rate": 4.607576442691737e-05, "logits/chosen": -2.15925931930542, "logits/rejected": -2.148500919342041, "logps/chosen": -222.72410583496094, "logps/rejected": -263.0013427734375, "loss": 0.2245, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0160765647888184, "rewards/margins": 4.10772705078125, "rewards/rejected": -6.12380313873291, "step": 1463 }, { "epoch": 1.92, "learning_rate": 4.607015996258753e-05, "logits/chosen": -2.2752912044525146, "logits/rejected": -2.4492502212524414, "logps/chosen": -156.12005615234375, "logps/rejected": -226.24057006835938, "loss": 0.15, "rewards/accuracies": 0.9375, "rewards/chosen": -1.519330382347107, "rewards/margins": 4.664186477661133, "rewards/rejected": -6.183516979217529, "step": 1464 }, { "epoch": 1.92, "learning_rate": 4.606455184041622e-05, "logits/chosen": -2.2248048782348633, "logits/rejected": -2.3291237354278564, "logps/chosen": -167.07228088378906, "logps/rejected": -236.86578369140625, "loss": 0.2488, "rewards/accuracies": 0.8125, "rewards/chosen": -2.011113166809082, "rewards/margins": 3.412134885787964, "rewards/rejected": -5.423248767852783, "step": 1465 }, { "epoch": 1.92, "learning_rate": 4.6058940061377034e-05, "logits/chosen": -2.4281818866729736, "logits/rejected": -2.378067970275879, "logps/chosen": -217.27587890625, "logps/rejected": -243.82916259765625, "loss": 0.3441, "rewards/accuracies": 0.6875, "rewards/chosen": -1.679200291633606, "rewards/margins": 3.3170528411865234, "rewards/rejected": -4.996253490447998, "step": 1466 }, { "epoch": 1.92, "learning_rate": 4.605332462644417e-05, "logits/chosen": -2.0087335109710693, "logits/rejected": -2.0646257400512695, "logps/chosen": -196.29052734375, "logps/rejected": -237.742431640625, "loss": 0.3309, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7196638584136963, "rewards/margins": 3.8741507530212402, "rewards/rejected": -5.593814373016357, "step": 1467 }, { "epoch": 1.92, "learning_rate": 4.604770553659249e-05, "logits/chosen": -2.3972527980804443, "logits/rejected": -2.4377150535583496, "logps/chosen": -185.716796875, "logps/rejected": -259.7029113769531, "loss": 0.1088, "rewards/accuracies": 1.0, "rewards/chosen": -1.643090844154358, "rewards/margins": 3.906405448913574, "rewards/rejected": -5.549496173858643, "step": 1468 }, { "epoch": 1.92, "learning_rate": 4.604208279279749e-05, "logits/chosen": -2.144949436187744, "logits/rejected": -2.169041156768799, "logps/chosen": -160.50648498535156, "logps/rejected": -240.07061767578125, "loss": 0.2066, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6182748079299927, "rewards/margins": 3.4854378700256348, "rewards/rejected": -5.103713035583496, "step": 1469 }, { "epoch": 1.92, "learning_rate": 4.6036456396035294e-05, "logits/chosen": -2.12722110748291, "logits/rejected": -2.192887783050537, "logps/chosen": -230.61842346191406, "logps/rejected": -281.3009338378906, "loss": 0.1416, "rewards/accuracies": 0.9375, "rewards/chosen": -2.241001605987549, "rewards/margins": 4.063772678375244, "rewards/rejected": -6.304775238037109, "step": 1470 }, { "epoch": 1.93, "learning_rate": 4.603082634728266e-05, "logits/chosen": -2.2297396659851074, "logits/rejected": -2.20890736579895, "logps/chosen": -216.92306518554688, "logps/rejected": -302.16015625, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": -1.4535167217254639, "rewards/margins": 5.6094746589660645, "rewards/rejected": -7.062991619110107, "step": 1471 }, { "epoch": 1.93, "learning_rate": 4.602519264751697e-05, "logits/chosen": -2.5697968006134033, "logits/rejected": -2.4660468101501465, "logps/chosen": -272.8087158203125, "logps/rejected": -276.1620788574219, "loss": 0.2615, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8285648822784424, "rewards/margins": 3.2634503841400146, "rewards/rejected": -5.092015266418457, "step": 1472 }, { "epoch": 1.93, "learning_rate": 4.601955529771628e-05, "logits/chosen": -2.385674476623535, "logits/rejected": -2.5492734909057617, "logps/chosen": -209.6891632080078, "logps/rejected": -247.408935546875, "loss": 0.175, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8343769311904907, "rewards/margins": 3.4775278568267822, "rewards/rejected": -5.311904430389404, "step": 1473 }, { "epoch": 1.93, "learning_rate": 4.601391429885922e-05, "logits/chosen": -2.5167958736419678, "logits/rejected": -2.5103375911712646, "logps/chosen": -239.463623046875, "logps/rejected": -271.7610778808594, "loss": 0.1614, "rewards/accuracies": 0.875, "rewards/chosen": -1.7672418355941772, "rewards/margins": 4.362864971160889, "rewards/rejected": -6.130106449127197, "step": 1474 }, { "epoch": 1.93, "learning_rate": 4.600826965192509e-05, "logits/chosen": -2.1353819370269775, "logits/rejected": -2.2745490074157715, "logps/chosen": -144.68832397460938, "logps/rejected": -287.36920166015625, "loss": 0.0856, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7524394989013672, "rewards/margins": 4.990352630615234, "rewards/rejected": -6.742793083190918, "step": 1475 }, { "epoch": 1.93, "learning_rate": 4.6002621357893826e-05, "logits/chosen": -2.486030340194702, "logits/rejected": -2.4928431510925293, "logps/chosen": -223.55197143554688, "logps/rejected": -286.7210693359375, "loss": 0.0372, "rewards/accuracies": 1.0, "rewards/chosen": -1.6188063621520996, "rewards/margins": 5.330704212188721, "rewards/rejected": -6.9495110511779785, "step": 1476 }, { "epoch": 1.93, "learning_rate": 4.5996969417745986e-05, "logits/chosen": -2.2136309146881104, "logits/rejected": -2.255204200744629, "logps/chosen": -209.3388214111328, "logps/rejected": -260.0314025878906, "loss": 0.1217, "rewards/accuracies": 1.0, "rewards/chosen": -1.289798378944397, "rewards/margins": 4.679617404937744, "rewards/rejected": -5.96941614151001, "step": 1477 }, { "epoch": 1.93, "learning_rate": 4.599131383246277e-05, "logits/chosen": -2.519814968109131, "logits/rejected": -2.547168254852295, "logps/chosen": -209.82684326171875, "logps/rejected": -275.3058166503906, "loss": 0.1756, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6560988426208496, "rewards/margins": 4.22465705871582, "rewards/rejected": -5.880756378173828, "step": 1478 }, { "epoch": 1.94, "learning_rate": 4.598565460302599e-05, "logits/chosen": -2.0395500659942627, "logits/rejected": -2.038343906402588, "logps/chosen": -179.4903564453125, "logps/rejected": -207.68734741210938, "loss": 0.2468, "rewards/accuracies": 0.875, "rewards/chosen": -1.0823073387145996, "rewards/margins": 4.204888820648193, "rewards/rejected": -5.287196159362793, "step": 1479 }, { "epoch": 1.94, "learning_rate": 4.5979991730418105e-05, "logits/chosen": -2.3252034187316895, "logits/rejected": -2.3550727367401123, "logps/chosen": -188.61257934570312, "logps/rejected": -251.78277587890625, "loss": 0.1684, "rewards/accuracies": 0.9375, "rewards/chosen": -1.603855848312378, "rewards/margins": 4.1132965087890625, "rewards/rejected": -5.7171525955200195, "step": 1480 }, { "epoch": 1.94, "learning_rate": 4.5974325215622225e-05, "logits/chosen": -2.103459119796753, "logits/rejected": -2.1562860012054443, "logps/chosen": -180.7230987548828, "logps/rejected": -237.24388122558594, "loss": 0.2603, "rewards/accuracies": 0.875, "rewards/chosen": -1.9204591512680054, "rewards/margins": 2.8535118103027344, "rewards/rejected": -4.773970603942871, "step": 1481 }, { "epoch": 1.94, "learning_rate": 4.596865505962205e-05, "logits/chosen": -2.130312919616699, "logits/rejected": -2.2995500564575195, "logps/chosen": -201.30421447753906, "logps/rejected": -257.9309387207031, "loss": 0.212, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0479345321655273, "rewards/margins": 3.392012596130371, "rewards/rejected": -5.439947128295898, "step": 1482 }, { "epoch": 1.94, "learning_rate": 4.596298126340195e-05, "logits/chosen": -2.275320291519165, "logits/rejected": -2.270385980606079, "logps/chosen": -256.5345458984375, "logps/rejected": -315.8306884765625, "loss": 0.0989, "rewards/accuracies": 1.0, "rewards/chosen": -1.094556450843811, "rewards/margins": 4.61052131652832, "rewards/rejected": -5.705077648162842, "step": 1483 }, { "epoch": 1.94, "learning_rate": 4.595730382794691e-05, "logits/chosen": -1.9736368656158447, "logits/rejected": -2.069291353225708, "logps/chosen": -214.57188415527344, "logps/rejected": -224.57400512695312, "loss": 0.2066, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7135539054870605, "rewards/margins": 3.323864459991455, "rewards/rejected": -5.037418842315674, "step": 1484 }, { "epoch": 1.94, "learning_rate": 4.595162275424255e-05, "logits/chosen": -2.2768514156341553, "logits/rejected": -2.439985513687134, "logps/chosen": -168.1714630126953, "logps/rejected": -264.5210876464844, "loss": 0.046, "rewards/accuracies": 1.0, "rewards/chosen": -1.3404206037521362, "rewards/margins": 4.816141128540039, "rewards/rejected": -6.156561374664307, "step": 1485 }, { "epoch": 1.94, "learning_rate": 4.594593804327513e-05, "logits/chosen": -2.3617050647735596, "logits/rejected": -2.4006364345550537, "logps/chosen": -223.51766967773438, "logps/rejected": -264.58575439453125, "loss": 0.1137, "rewards/accuracies": 1.0, "rewards/chosen": -1.4779276847839355, "rewards/margins": 3.821845054626465, "rewards/rejected": -5.299773216247559, "step": 1486 }, { "epoch": 1.95, "learning_rate": 4.594024969603151e-05, "logits/chosen": -2.4014294147491455, "logits/rejected": -2.345073938369751, "logps/chosen": -219.4610137939453, "logps/rejected": -262.7455749511719, "loss": 0.0748, "rewards/accuracies": 1.0, "rewards/chosen": -1.476982831954956, "rewards/margins": 3.7781622409820557, "rewards/rejected": -5.2551445960998535, "step": 1487 }, { "epoch": 1.95, "learning_rate": 4.5934557713499214e-05, "logits/chosen": -2.1576740741729736, "logits/rejected": -2.2681870460510254, "logps/chosen": -248.980712890625, "logps/rejected": -353.2499694824219, "loss": 0.1602, "rewards/accuracies": 0.8125, "rewards/chosen": -1.409653902053833, "rewards/margins": 5.4672088623046875, "rewards/rejected": -6.8768630027771, "step": 1488 }, { "epoch": 1.95, "learning_rate": 4.592886209666639e-05, "logits/chosen": -2.3278369903564453, "logits/rejected": -2.4039418697357178, "logps/chosen": -175.85903930664062, "logps/rejected": -243.25254821777344, "loss": 0.0638, "rewards/accuracies": 1.0, "rewards/chosen": -1.090004563331604, "rewards/margins": 4.7786030769348145, "rewards/rejected": -5.868607997894287, "step": 1489 }, { "epoch": 1.95, "learning_rate": 4.5923162846521824e-05, "logits/chosen": -2.2458479404449463, "logits/rejected": -2.3095836639404297, "logps/chosen": -252.07955932617188, "logps/rejected": -280.0485534667969, "loss": 0.2264, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0053060054779053, "rewards/margins": 3.449532985687256, "rewards/rejected": -5.454838752746582, "step": 1490 }, { "epoch": 1.95, "learning_rate": 4.5917459964054896e-05, "logits/chosen": -2.519906997680664, "logits/rejected": -2.511167049407959, "logps/chosen": -198.07257080078125, "logps/rejected": -263.76898193359375, "loss": 0.1654, "rewards/accuracies": 0.875, "rewards/chosen": -0.930348813533783, "rewards/margins": 4.16477632522583, "rewards/rejected": -5.095125675201416, "step": 1491 }, { "epoch": 1.95, "learning_rate": 4.5911753450255665e-05, "logits/chosen": -2.2251136302948, "logits/rejected": -2.2730202674865723, "logps/chosen": -236.90675354003906, "logps/rejected": -275.7073059082031, "loss": 0.1027, "rewards/accuracies": 0.9375, "rewards/chosen": -1.889454960823059, "rewards/margins": 3.976149082183838, "rewards/rejected": -5.865603446960449, "step": 1492 }, { "epoch": 1.95, "learning_rate": 4.59060433061148e-05, "logits/chosen": -2.395418643951416, "logits/rejected": -2.4963815212249756, "logps/chosen": -207.62921142578125, "logps/rejected": -221.30831909179688, "loss": 0.2778, "rewards/accuracies": 0.875, "rewards/chosen": -1.6155648231506348, "rewards/margins": 3.7796285152435303, "rewards/rejected": -5.395193099975586, "step": 1493 }, { "epoch": 1.96, "learning_rate": 4.5900329532623585e-05, "logits/chosen": -2.3171846866607666, "logits/rejected": -2.2144782543182373, "logps/chosen": -192.01412963867188, "logps/rejected": -202.37173461914062, "loss": 0.4487, "rewards/accuracies": 0.875, "rewards/chosen": -1.814784288406372, "rewards/margins": 2.9413533210754395, "rewards/rejected": -4.756137847900391, "step": 1494 }, { "epoch": 1.96, "learning_rate": 4.589461213077395e-05, "logits/chosen": -1.8302745819091797, "logits/rejected": -1.8340033292770386, "logps/chosen": -185.82254028320312, "logps/rejected": -226.58468627929688, "loss": 0.2119, "rewards/accuracies": 0.8125, "rewards/chosen": -1.3006807565689087, "rewards/margins": 3.979428291320801, "rewards/rejected": -5.280109882354736, "step": 1495 }, { "epoch": 1.96, "learning_rate": 4.588889110155845e-05, "logits/chosen": -2.277804374694824, "logits/rejected": -2.3033041954040527, "logps/chosen": -245.10751342773438, "logps/rejected": -303.5085754394531, "loss": 0.1516, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6710920333862305, "rewards/margins": 4.150707244873047, "rewards/rejected": -5.821799278259277, "step": 1496 }, { "epoch": 1.96, "learning_rate": 4.5883166445970296e-05, "logits/chosen": -2.103105068206787, "logits/rejected": -2.114781618118286, "logps/chosen": -219.78988647460938, "logps/rejected": -306.8294982910156, "loss": 0.2936, "rewards/accuracies": 0.75, "rewards/chosen": -1.6369211673736572, "rewards/margins": 2.928783893585205, "rewards/rejected": -4.565704822540283, "step": 1497 }, { "epoch": 1.96, "learning_rate": 4.587743816500328e-05, "logits/chosen": -2.03912091255188, "logits/rejected": -2.0565059185028076, "logps/chosen": -232.88314819335938, "logps/rejected": -227.30206298828125, "loss": 0.1022, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0165839195251465, "rewards/margins": 3.8692049980163574, "rewards/rejected": -4.885788917541504, "step": 1498 }, { "epoch": 1.96, "learning_rate": 4.587170625965185e-05, "logits/chosen": -1.9884145259857178, "logits/rejected": -1.9488219022750854, "logps/chosen": -204.64010620117188, "logps/rejected": -283.6075134277344, "loss": 0.15, "rewards/accuracies": 0.875, "rewards/chosen": -1.870671033859253, "rewards/margins": 4.004744529724121, "rewards/rejected": -5.875415802001953, "step": 1499 }, { "epoch": 1.96, "learning_rate": 4.586597073091109e-05, "logits/chosen": -2.0228824615478516, "logits/rejected": -2.1101438999176025, "logps/chosen": -197.7698211669922, "logps/rejected": -271.5038757324219, "loss": 0.1237, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7399284243583679, "rewards/margins": 3.924260139465332, "rewards/rejected": -4.664188861846924, "step": 1500 } ], "logging_steps": 1, "max_steps": 7640, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }