{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 3750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.3333333333333334e-08, "logits/chosen": -1.9468104839324951, "logits/rejected": -1.3551281690597534, "logps/chosen": -418.5311279296875, "logps/rejected": -228.03335571289062, "loss": 0.5309, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.3333333333333336e-07, "logits/chosen": -2.1891419887542725, "logits/rejected": -1.312096118927002, "logps/chosen": -321.9786682128906, "logps/rejected": -262.5549011230469, "loss": 0.3989, "rewards/accuracies": 0.3888888955116272, "rewards/chosen": -0.00040232870378531516, "rewards/margins": 0.00023253644758369774, "rewards/rejected": -0.0006348651950247586, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.666666666666667e-07, "logits/chosen": -1.7885940074920654, "logits/rejected": -1.4744794368743896, "logps/chosen": -270.1197204589844, "logps/rejected": -280.1589050292969, "loss": 0.3157, "rewards/accuracies": 0.5, "rewards/chosen": 0.0014780608471482992, "rewards/margins": 0.002728077583014965, "rewards/rejected": -0.0012500169686973095, "step": 20 }, { "epoch": 0.01, "learning_rate": 4.0000000000000003e-07, "logits/chosen": -1.9892597198486328, "logits/rejected": -1.2171941995620728, "logps/chosen": -319.71331787109375, "logps/rejected": -257.2882995605469, "loss": 0.3278, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.00036752174491994083, "rewards/margins": 0.0073011466301977634, "rewards/rejected": -0.006933624390512705, "step": 30 }, { "epoch": 0.01, "learning_rate": 5.333333333333335e-07, "logits/chosen": -1.955718994140625, "logits/rejected": -1.1262027025222778, "logps/chosen": -356.73663330078125, "logps/rejected": -266.71392822265625, "loss": 0.2937, "rewards/accuracies": 0.875, "rewards/chosen": 0.003951634746044874, "rewards/margins": 0.028395619243383408, "rewards/rejected": -0.024443982169032097, "step": 40 }, { "epoch": 0.01, "learning_rate": 6.666666666666667e-07, "logits/chosen": -1.9514992237091064, "logits/rejected": -1.382391333580017, "logps/chosen": -284.41790771484375, "logps/rejected": -230.9414825439453, "loss": 0.2014, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.0021220450289547443, "rewards/margins": 0.07282118499279022, "rewards/rejected": -0.07069914042949677, "step": 50 }, { "epoch": 0.02, "learning_rate": 8.000000000000001e-07, "logits/chosen": -1.9106193780899048, "logits/rejected": -1.3348861932754517, "logps/chosen": -272.3318786621094, "logps/rejected": -241.1548309326172, "loss": 0.2992, "rewards/accuracies": 0.625, "rewards/chosen": -0.0524127297103405, "rewards/margins": 0.1212296113371849, "rewards/rejected": -0.1736423522233963, "step": 60 }, { "epoch": 0.02, "learning_rate": 9.333333333333334e-07, "logits/chosen": -1.9291963577270508, "logits/rejected": -1.4450151920318604, "logps/chosen": -287.9643249511719, "logps/rejected": -281.98760986328125, "loss": 0.2153, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07112433016300201, "rewards/margins": 0.18519330024719238, "rewards/rejected": -0.256317675113678, "step": 70 }, { "epoch": 0.02, "learning_rate": 1.066666666666667e-06, "logits/chosen": -1.8260694742202759, "logits/rejected": -1.2290842533111572, "logps/chosen": -311.75787353515625, "logps/rejected": -303.3421325683594, "loss": 0.1991, "rewards/accuracies": 0.875, "rewards/chosen": -0.050730206072330475, "rewards/margins": 0.30313217639923096, "rewards/rejected": -0.35386237502098083, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.2000000000000002e-06, "logits/chosen": -2.1392667293548584, "logits/rejected": -1.5895378589630127, "logps/chosen": -264.8464050292969, "logps/rejected": -284.0433349609375, "loss": 0.1789, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.06132856756448746, "rewards/margins": 0.34737664461135864, "rewards/rejected": -0.4087051749229431, "step": 90 }, { "epoch": 0.03, "learning_rate": 1.3333333333333334e-06, "logits/chosen": -2.010281801223755, "logits/rejected": -1.4457181692123413, "logps/chosen": -280.8337707519531, "logps/rejected": -301.07421875, "loss": 0.1651, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.003755016950890422, "rewards/margins": 0.3854002356529236, "rewards/rejected": -0.38164520263671875, "step": 100 }, { "epoch": 0.03, "learning_rate": 1.4666666666666669e-06, "logits/chosen": -1.8176605701446533, "logits/rejected": -1.4668805599212646, "logps/chosen": -213.2742919921875, "logps/rejected": -196.9753875732422, "loss": 0.2645, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08552353084087372, "rewards/margins": 0.1824944019317627, "rewards/rejected": -0.2680179476737976, "step": 110 }, { "epoch": 0.03, "learning_rate": 1.6000000000000001e-06, "logits/chosen": -1.8461990356445312, "logits/rejected": -1.6458969116210938, "logps/chosen": -206.00204467773438, "logps/rejected": -255.66989135742188, "loss": 0.2099, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07237216085195541, "rewards/margins": 0.22069768607616425, "rewards/rejected": -0.29306983947753906, "step": 120 }, { "epoch": 0.03, "learning_rate": 1.7333333333333336e-06, "logits/chosen": -2.1012439727783203, "logits/rejected": -1.28009831905365, "logps/chosen": -324.0368957519531, "logps/rejected": -254.3141632080078, "loss": 0.0919, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.01445714384317398, "rewards/margins": 0.3454706370830536, "rewards/rejected": -0.3310135304927826, "step": 130 }, { "epoch": 0.04, "learning_rate": 1.8666666666666669e-06, "logits/chosen": -1.9437087774276733, "logits/rejected": -1.5214288234710693, "logps/chosen": -294.4078063964844, "logps/rejected": -301.8307800292969, "loss": 0.1193, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.07305511832237244, "rewards/margins": 0.38559332489967346, "rewards/rejected": -0.4586483836174011, "step": 140 }, { "epoch": 0.04, "learning_rate": 2.0000000000000003e-06, "logits/chosen": -1.9032337665557861, "logits/rejected": -1.50299870967865, "logps/chosen": -223.5890655517578, "logps/rejected": -276.28955078125, "loss": 0.1492, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.035106100142002106, "rewards/margins": 0.4305610656738281, "rewards/rejected": -0.46566715836524963, "step": 150 }, { "epoch": 0.04, "learning_rate": 2.133333333333334e-06, "logits/chosen": -1.9716815948486328, "logits/rejected": -1.577401876449585, "logps/chosen": -264.44256591796875, "logps/rejected": -326.4450988769531, "loss": 0.1267, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.05759359151124954, "rewards/margins": 0.33429816365242004, "rewards/rejected": -0.3918917775154114, "step": 160 }, { "epoch": 0.05, "learning_rate": 2.266666666666667e-06, "logits/chosen": -1.8987305164337158, "logits/rejected": -1.3819479942321777, "logps/chosen": -205.10104370117188, "logps/rejected": -223.96218872070312, "loss": 0.1927, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.09939566999673843, "rewards/margins": 0.24186280369758606, "rewards/rejected": -0.3412584662437439, "step": 170 }, { "epoch": 0.05, "learning_rate": 2.4000000000000003e-06, "logits/chosen": -2.106255292892456, "logits/rejected": -1.655613660812378, "logps/chosen": -264.42547607421875, "logps/rejected": -308.27130126953125, "loss": 0.2318, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09074126183986664, "rewards/margins": 0.29490557312965393, "rewards/rejected": -0.385646790266037, "step": 180 }, { "epoch": 0.05, "learning_rate": 2.5333333333333338e-06, "logits/chosen": -2.1222376823425293, "logits/rejected": -1.7176287174224854, "logps/chosen": -266.733154296875, "logps/rejected": -274.0609436035156, "loss": 0.1424, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.11357426643371582, "rewards/margins": 0.35620003938674927, "rewards/rejected": -0.4697743356227875, "step": 190 }, { "epoch": 0.05, "learning_rate": 2.666666666666667e-06, "logits/chosen": -1.969417929649353, "logits/rejected": -1.3014360666275024, "logps/chosen": -277.68536376953125, "logps/rejected": -318.40496826171875, "loss": 0.1694, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08816908299922943, "rewards/margins": 0.3808407485485077, "rewards/rejected": -0.4690098166465759, "step": 200 }, { "epoch": 0.06, "learning_rate": 2.8000000000000003e-06, "logits/chosen": -2.0031707286834717, "logits/rejected": -1.5582258701324463, "logps/chosen": -235.62319946289062, "logps/rejected": -280.3308410644531, "loss": 0.1577, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06687523424625397, "rewards/margins": 0.30328303575515747, "rewards/rejected": -0.37015828490257263, "step": 210 }, { "epoch": 0.06, "learning_rate": 2.9333333333333338e-06, "logits/chosen": -2.026477336883545, "logits/rejected": -1.461743950843811, "logps/chosen": -259.7619934082031, "logps/rejected": -246.75588989257812, "loss": 0.1748, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11929011344909668, "rewards/margins": 0.2787408232688904, "rewards/rejected": -0.39803093671798706, "step": 220 }, { "epoch": 0.06, "learning_rate": 3.066666666666667e-06, "logits/chosen": -2.0350136756896973, "logits/rejected": -1.6047760248184204, "logps/chosen": -291.62371826171875, "logps/rejected": -326.29083251953125, "loss": 0.1776, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.035884954035282135, "rewards/margins": 0.35993653535842896, "rewards/rejected": -0.3958215117454529, "step": 230 }, { "epoch": 0.06, "learning_rate": 3.2000000000000003e-06, "logits/chosen": -1.63616144657135, "logits/rejected": -1.2675909996032715, "logps/chosen": -295.3503723144531, "logps/rejected": -272.33544921875, "loss": 0.1512, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.04819333180785179, "rewards/margins": 0.3286735713481903, "rewards/rejected": -0.3768669664859772, "step": 240 }, { "epoch": 0.07, "learning_rate": 3.3333333333333333e-06, "logits/chosen": -2.153151035308838, "logits/rejected": -1.5773793458938599, "logps/chosen": -258.7474670410156, "logps/rejected": -249.0330047607422, "loss": 0.2077, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07630246877670288, "rewards/margins": 0.2940545678138733, "rewards/rejected": -0.37035703659057617, "step": 250 }, { "epoch": 0.07, "learning_rate": 3.4666666666666672e-06, "logits/chosen": -1.8532987833023071, "logits/rejected": -1.365553855895996, "logps/chosen": -308.9462585449219, "logps/rejected": -293.080810546875, "loss": 0.0988, "rewards/accuracies": 0.875, "rewards/chosen": -0.003472552401944995, "rewards/margins": 0.37654823064804077, "rewards/rejected": -0.38002076745033264, "step": 260 }, { "epoch": 0.07, "learning_rate": 3.6000000000000003e-06, "logits/chosen": -1.8927972316741943, "logits/rejected": -1.3772236108779907, "logps/chosen": -268.60845947265625, "logps/rejected": -279.3491516113281, "loss": 0.1617, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.05976073071360588, "rewards/margins": 0.3105442523956299, "rewards/rejected": -0.37030500173568726, "step": 270 }, { "epoch": 0.07, "learning_rate": 3.7333333333333337e-06, "logits/chosen": -2.0276782512664795, "logits/rejected": -1.21445894241333, "logps/chosen": -353.6800842285156, "logps/rejected": -307.1755676269531, "loss": 0.1506, "rewards/accuracies": 0.875, "rewards/chosen": -0.0015403844881802797, "rewards/margins": 0.5047177672386169, "rewards/rejected": -0.5062581300735474, "step": 280 }, { "epoch": 0.08, "learning_rate": 3.866666666666667e-06, "logits/chosen": -2.00919771194458, "logits/rejected": -1.3203434944152832, "logps/chosen": -269.7672119140625, "logps/rejected": -210.7847900390625, "loss": 0.1969, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.009751735255122185, "rewards/margins": 0.2548676133155823, "rewards/rejected": -0.24511587619781494, "step": 290 }, { "epoch": 0.08, "learning_rate": 4.000000000000001e-06, "logits/chosen": -1.9286314249038696, "logits/rejected": -1.4645249843597412, "logps/chosen": -261.30047607421875, "logps/rejected": -284.4623718261719, "loss": 0.2013, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.03132876753807068, "rewards/margins": 0.20891091227531433, "rewards/rejected": -0.24023966491222382, "step": 300 }, { "epoch": 0.08, "learning_rate": 4.133333333333333e-06, "logits/chosen": -1.958105444908142, "logits/rejected": -1.4698688983917236, "logps/chosen": -237.3049774169922, "logps/rejected": -291.87469482421875, "loss": 0.1677, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07907866686582565, "rewards/margins": 0.34895187616348267, "rewards/rejected": -0.4280305802822113, "step": 310 }, { "epoch": 0.09, "learning_rate": 4.266666666666668e-06, "logits/chosen": -1.717402696609497, "logits/rejected": -1.14840567111969, "logps/chosen": -294.26641845703125, "logps/rejected": -323.1462707519531, "loss": 0.1992, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.0834791511297226, "rewards/margins": 0.3541187047958374, "rewards/rejected": -0.4375979006290436, "step": 320 }, { "epoch": 0.09, "learning_rate": 4.4e-06, "logits/chosen": -1.8336519002914429, "logits/rejected": -1.480302333831787, "logps/chosen": -237.1054229736328, "logps/rejected": -276.50689697265625, "loss": 0.0847, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.024688202887773514, "rewards/margins": 0.40132126212120056, "rewards/rejected": -0.3766331076622009, "step": 330 }, { "epoch": 0.09, "learning_rate": 4.533333333333334e-06, "logits/chosen": -1.9516515731811523, "logits/rejected": -1.399285912513733, "logps/chosen": -326.23236083984375, "logps/rejected": -318.9416198730469, "loss": 0.1551, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.04323751479387283, "rewards/margins": 0.38205739855766296, "rewards/rejected": -0.3388199210166931, "step": 340 }, { "epoch": 0.09, "learning_rate": 4.666666666666667e-06, "logits/chosen": -1.6592636108398438, "logits/rejected": -1.313191533088684, "logps/chosen": -246.4650421142578, "logps/rejected": -316.64849853515625, "loss": 0.226, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.002355779055505991, "rewards/margins": 0.34276098012924194, "rewards/rejected": -0.3404052257537842, "step": 350 }, { "epoch": 0.1, "learning_rate": 4.800000000000001e-06, "logits/chosen": -1.8236520290374756, "logits/rejected": -1.3795961141586304, "logps/chosen": -311.75341796875, "logps/rejected": -295.3544921875, "loss": 0.0932, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.029393743723630905, "rewards/margins": 0.3503008484840393, "rewards/rejected": -0.3209070861339569, "step": 360 }, { "epoch": 0.1, "learning_rate": 4.933333333333334e-06, "logits/chosen": -1.7931430339813232, "logits/rejected": -1.2451846599578857, "logps/chosen": -292.35650634765625, "logps/rejected": -278.6131286621094, "loss": 0.1793, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.10223956406116486, "rewards/margins": 0.31418344378471375, "rewards/rejected": -0.41642293334007263, "step": 370 }, { "epoch": 0.1, "learning_rate": 4.999972922944898e-06, "logits/chosen": -1.858270287513733, "logits/rejected": -1.498663306236267, "logps/chosen": -246.0679168701172, "logps/rejected": -283.5242614746094, "loss": 0.1688, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23864665627479553, "rewards/margins": 0.2593352198600769, "rewards/rejected": -0.49798187613487244, "step": 380 }, { "epoch": 0.1, "learning_rate": 4.999756310023261e-06, "logits/chosen": -2.070420265197754, "logits/rejected": -1.164734959602356, "logps/chosen": -382.75543212890625, "logps/rejected": -294.0628662109375, "loss": 0.112, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1453527957201004, "rewards/margins": 0.3971993327140808, "rewards/rejected": -0.54255211353302, "step": 390 }, { "epoch": 0.11, "learning_rate": 4.999323102948655e-06, "logits/chosen": -1.7827831506729126, "logits/rejected": -1.5060867071151733, "logps/chosen": -227.31982421875, "logps/rejected": -290.09149169921875, "loss": 0.1737, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17244981229305267, "rewards/margins": 0.3016803562641144, "rewards/rejected": -0.47413015365600586, "step": 400 }, { "epoch": 0.11, "learning_rate": 4.998673339256785e-06, "logits/chosen": -1.8005807399749756, "logits/rejected": -1.5056277513504028, "logps/chosen": -285.5323791503906, "logps/rejected": -313.2432556152344, "loss": 0.1204, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14575102925300598, "rewards/margins": 0.3333708941936493, "rewards/rejected": -0.4791219234466553, "step": 410 }, { "epoch": 0.11, "learning_rate": 4.997807075247147e-06, "logits/chosen": -1.7242355346679688, "logits/rejected": -1.4590495824813843, "logps/chosen": -228.1505126953125, "logps/rejected": -295.41241455078125, "loss": 0.1442, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.12705926597118378, "rewards/margins": 0.3108959197998047, "rewards/rejected": -0.43795520067214966, "step": 420 }, { "epoch": 0.11, "learning_rate": 4.996724385978142e-06, "logits/chosen": -1.8692153692245483, "logits/rejected": -1.2414253950119019, "logps/chosen": -298.9205322265625, "logps/rejected": -313.76934814453125, "loss": 0.1191, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.0782383382320404, "rewards/margins": 0.44491782784461975, "rewards/rejected": -0.5231561064720154, "step": 430 }, { "epoch": 0.12, "learning_rate": 4.995425365260585e-06, "logits/chosen": -2.0321779251098633, "logits/rejected": -1.462066411972046, "logps/chosen": -301.1550598144531, "logps/rejected": -312.3240966796875, "loss": 0.1602, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.031835995614528656, "rewards/margins": 0.3314369320869446, "rewards/rejected": -0.3632729649543762, "step": 440 }, { "epoch": 0.12, "learning_rate": 4.993910125649561e-06, "logits/chosen": -1.8992176055908203, "logits/rejected": -1.2387077808380127, "logps/chosen": -250.6095428466797, "logps/rejected": -202.1986083984375, "loss": 0.174, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06963483989238739, "rewards/margins": 0.25463372468948364, "rewards/rejected": -0.32426854968070984, "step": 450 }, { "epoch": 0.12, "learning_rate": 4.992178798434684e-06, "logits/chosen": -1.8955596685409546, "logits/rejected": -1.1744760274887085, "logps/chosen": -389.3876647949219, "logps/rejected": -292.69659423828125, "loss": 0.1316, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06589002162218094, "rewards/margins": 0.41696634888648987, "rewards/rejected": -0.4828563630580902, "step": 460 }, { "epoch": 0.13, "learning_rate": 4.990231533628719e-06, "logits/chosen": -1.8016172647476196, "logits/rejected": -1.3248649835586548, "logps/chosen": -238.6211700439453, "logps/rejected": -275.3337707519531, "loss": 0.1501, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.05240337923169136, "rewards/margins": 0.3930678069591522, "rewards/rejected": -0.4454711973667145, "step": 470 }, { "epoch": 0.13, "learning_rate": 4.988068499954578e-06, "logits/chosen": -1.6587965488433838, "logits/rejected": -1.3655837774276733, "logps/chosen": -190.276611328125, "logps/rejected": -268.18341064453125, "loss": 0.1726, "rewards/accuracies": 0.75, "rewards/chosen": -0.02386285737156868, "rewards/margins": 0.3009086549282074, "rewards/rejected": -0.3247714936733246, "step": 480 }, { "epoch": 0.13, "learning_rate": 4.985689884830711e-06, "logits/chosen": -2.0086019039154053, "logits/rejected": -1.4022401571273804, "logps/chosen": -269.7921447753906, "logps/rejected": -315.36761474609375, "loss": 0.1211, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.0037451249081641436, "rewards/margins": 0.34425827860832214, "rewards/rejected": -0.3480033874511719, "step": 490 }, { "epoch": 0.13, "learning_rate": 4.983095894354858e-06, "logits/chosen": -2.0869174003601074, "logits/rejected": -1.3568693399429321, "logps/chosen": -354.6680603027344, "logps/rejected": -353.8328552246094, "loss": 0.1258, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07166734337806702, "rewards/margins": 0.38118380308151245, "rewards/rejected": -0.45285120606422424, "step": 500 }, { "epoch": 0.14, "learning_rate": 4.980286753286196e-06, "logits/chosen": -1.717799186706543, "logits/rejected": -1.27217698097229, "logps/chosen": -256.9685974121094, "logps/rejected": -279.153076171875, "loss": 0.1799, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14614014327526093, "rewards/margins": 0.37342625856399536, "rewards/rejected": -0.5195664167404175, "step": 510 }, { "epoch": 0.14, "learning_rate": 4.97726270502586e-06, "logits/chosen": -1.9094120264053345, "logits/rejected": -1.4152483940124512, "logps/chosen": -214.5406951904297, "logps/rejected": -217.8648223876953, "loss": 0.1649, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2031155824661255, "rewards/margins": 0.3454675078392029, "rewards/rejected": -0.5485831499099731, "step": 520 }, { "epoch": 0.14, "learning_rate": 4.974024011595864e-06, "logits/chosen": -1.9784595966339111, "logits/rejected": -1.203086495399475, "logps/chosen": -336.6568908691406, "logps/rejected": -286.4167785644531, "loss": 0.1357, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.10868784040212631, "rewards/margins": 0.566789984703064, "rewards/rejected": -0.6754778623580933, "step": 530 }, { "epoch": 0.14, "learning_rate": 4.970570953616383e-06, "logits/chosen": -1.8285566568374634, "logits/rejected": -1.075903296470642, "logps/chosen": -302.0743103027344, "logps/rejected": -253.8324432373047, "loss": 0.124, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14791302382946014, "rewards/margins": 0.370217502117157, "rewards/rejected": -0.5181306004524231, "step": 540 }, { "epoch": 0.15, "learning_rate": 4.966903830281449e-06, "logits/chosen": -1.774610161781311, "logits/rejected": -1.4740030765533447, "logps/chosen": -263.2217712402344, "logps/rejected": -301.24090576171875, "loss": 0.1898, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15221451222896576, "rewards/margins": 0.2885417342185974, "rewards/rejected": -0.440756231546402, "step": 550 }, { "epoch": 0.15, "learning_rate": 4.9630229593330226e-06, "logits/chosen": -1.808828592300415, "logits/rejected": -1.260229468345642, "logps/chosen": -251.52474975585938, "logps/rejected": -294.1851806640625, "loss": 0.1529, "rewards/accuracies": 0.75, "rewards/chosen": -0.11935459077358246, "rewards/margins": 0.34634923934936523, "rewards/rejected": -0.4657038748264313, "step": 560 }, { "epoch": 0.15, "learning_rate": 4.958928677033465e-06, "logits/chosen": -1.7420295476913452, "logits/rejected": -1.058304786682129, "logps/chosen": -264.7672424316406, "logps/rejected": -247.16189575195312, "loss": 0.164, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18925343453884125, "rewards/margins": 0.3564419448375702, "rewards/rejected": -0.545695424079895, "step": 570 }, { "epoch": 0.15, "learning_rate": 4.954621338136399e-06, "logits/chosen": -1.4567300081253052, "logits/rejected": -0.9836466908454895, "logps/chosen": -324.0819396972656, "logps/rejected": -318.97796630859375, "loss": 0.1473, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.11459589004516602, "rewards/margins": 0.3253883421421051, "rewards/rejected": -0.4399842321872711, "step": 580 }, { "epoch": 0.16, "learning_rate": 4.95010131585597e-06, "logits/chosen": -1.8161170482635498, "logits/rejected": -1.4203059673309326, "logps/chosen": -230.9877166748047, "logps/rejected": -267.8335266113281, "loss": 0.1543, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07126758992671967, "rewards/margins": 0.3637334704399109, "rewards/rejected": -0.43500104546546936, "step": 590 }, { "epoch": 0.16, "learning_rate": 4.9453690018345144e-06, "logits/chosen": -1.8937923908233643, "logits/rejected": -1.3107613325119019, "logps/chosen": -273.46002197265625, "logps/rejected": -197.23764038085938, "loss": 0.175, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.032990988343954086, "rewards/margins": 0.2917155623435974, "rewards/rejected": -0.324706494808197, "step": 600 }, { "epoch": 0.16, "learning_rate": 4.940424806108619e-06, "logits/chosen": -1.6622874736785889, "logits/rejected": -1.2222545146942139, "logps/chosen": -258.8514404296875, "logps/rejected": -311.0167236328125, "loss": 0.1296, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07337610423564911, "rewards/margins": 0.39577198028564453, "rewards/rejected": -0.46914809942245483, "step": 610 }, { "epoch": 0.17, "learning_rate": 4.935269157073597e-06, "logits/chosen": -1.6483392715454102, "logits/rejected": -1.3003222942352295, "logps/chosen": -242.5626983642578, "logps/rejected": -345.08624267578125, "loss": 0.1396, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.01702979765832424, "rewards/margins": 0.4016871452331543, "rewards/rejected": -0.4187169671058655, "step": 620 }, { "epoch": 0.17, "learning_rate": 4.9299025014463665e-06, "logits/chosen": -1.6613142490386963, "logits/rejected": -1.1542552709579468, "logps/chosen": -288.81915283203125, "logps/rejected": -262.18365478515625, "loss": 0.123, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.008388234302401543, "rewards/margins": 0.43170255422592163, "rewards/rejected": -0.4400908350944519, "step": 630 }, { "epoch": 0.17, "learning_rate": 4.924325304226745e-06, "logits/chosen": -1.544345736503601, "logits/rejected": -1.0032846927642822, "logps/chosen": -304.28668212890625, "logps/rejected": -205.69210815429688, "loss": 0.1941, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.11076553165912628, "rewards/margins": 0.2719104588031769, "rewards/rejected": -0.38267600536346436, "step": 640 }, { "epoch": 0.17, "learning_rate": 4.91853804865716e-06, "logits/chosen": -1.6050885915756226, "logits/rejected": -1.3113044500350952, "logps/chosen": -191.19522094726562, "logps/rejected": -226.1024932861328, "loss": 0.1883, "rewards/accuracies": 0.75, "rewards/chosen": -0.07253624498844147, "rewards/margins": 0.2440672218799591, "rewards/rejected": -0.31660348176956177, "step": 650 }, { "epoch": 0.18, "learning_rate": 4.912541236180779e-06, "logits/chosen": -1.586315393447876, "logits/rejected": -1.435160756111145, "logps/chosen": -202.68026733398438, "logps/rejected": -291.9640197753906, "loss": 0.1334, "rewards/accuracies": 0.875, "rewards/chosen": 0.04099782556295395, "rewards/margins": 0.39952850341796875, "rewards/rejected": -0.3585307002067566, "step": 660 }, { "epoch": 0.18, "learning_rate": 4.9063353863980565e-06, "logits/chosen": -1.5614166259765625, "logits/rejected": -1.4164257049560547, "logps/chosen": -227.6790008544922, "logps/rejected": -290.1402893066406, "loss": 0.1119, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13384926319122314, "rewards/margins": 0.3537839353084564, "rewards/rejected": -0.48763322830200195, "step": 670 }, { "epoch": 0.18, "learning_rate": 4.899921037021719e-06, "logits/chosen": -1.6380398273468018, "logits/rejected": -1.1617525815963745, "logps/chosen": -242.23193359375, "logps/rejected": -266.6896057128906, "loss": 0.1565, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.10104341804981232, "rewards/margins": 0.41885095834732056, "rewards/rejected": -0.5198943614959717, "step": 680 }, { "epoch": 0.18, "learning_rate": 4.893298743830168e-06, "logits/chosen": -1.5904242992401123, "logits/rejected": -1.138726830482483, "logps/chosen": -317.62701416015625, "logps/rejected": -297.424072265625, "loss": 0.163, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16763003170490265, "rewards/margins": 0.35251811146736145, "rewards/rejected": -0.5201481580734253, "step": 690 }, { "epoch": 0.19, "learning_rate": 4.88646908061933e-06, "logits/chosen": -1.7436052560806274, "logits/rejected": -1.1463674306869507, "logps/chosen": -322.777587890625, "logps/rejected": -294.9422912597656, "loss": 0.1452, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.07538704574108124, "rewards/margins": 0.40292349457740784, "rewards/rejected": -0.4783105254173279, "step": 700 }, { "epoch": 0.19, "learning_rate": 4.879432639152935e-06, "logits/chosen": -1.7265838384628296, "logits/rejected": -1.3548028469085693, "logps/chosen": -256.05902099609375, "logps/rejected": -303.35430908203125, "loss": 0.1221, "rewards/accuracies": 0.875, "rewards/chosen": -0.07280706614255905, "rewards/margins": 0.35696297883987427, "rewards/rejected": -0.4297700524330139, "step": 710 }, { "epoch": 0.19, "learning_rate": 4.8721900291112415e-06, "logits/chosen": -1.704933524131775, "logits/rejected": -1.3638499975204468, "logps/chosen": -240.88784790039062, "logps/rejected": -307.12860107421875, "loss": 0.1792, "rewards/accuracies": 0.875, "rewards/chosen": -0.04838447645306587, "rewards/margins": 0.36189574003219604, "rewards/rejected": -0.41028016805648804, "step": 720 }, { "epoch": 0.19, "learning_rate": 4.864741878038218e-06, "logits/chosen": -1.490235686302185, "logits/rejected": -1.2437798976898193, "logps/chosen": -210.82626342773438, "logps/rejected": -235.6994171142578, "loss": 0.1351, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.02063031867146492, "rewards/margins": 0.4025086462497711, "rewards/rejected": -0.42313894629478455, "step": 730 }, { "epoch": 0.2, "learning_rate": 4.857088831287158e-06, "logits/chosen": -1.521240234375, "logits/rejected": -1.1565624475479126, "logps/chosen": -254.61605834960938, "logps/rejected": -277.0546569824219, "loss": 0.167, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.011497171595692635, "rewards/margins": 0.3846563696861267, "rewards/rejected": -0.3731592297554016, "step": 740 }, { "epoch": 0.2, "learning_rate": 4.849231551964771e-06, "logits/chosen": -1.518618106842041, "logits/rejected": -0.9731992483139038, "logps/chosen": -308.2921142578125, "logps/rejected": -279.6849365234375, "loss": 0.1304, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.006476040929555893, "rewards/margins": 0.32605165243148804, "rewards/rejected": -0.31957560777664185, "step": 750 }, { "epoch": 0.2, "learning_rate": 4.841170720873723e-06, "logits/chosen": -1.4644962549209595, "logits/rejected": -0.6850159764289856, "logps/chosen": -292.0091247558594, "logps/rejected": -256.04095458984375, "loss": 0.171, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06350520253181458, "rewards/margins": 0.3185378611087799, "rewards/rejected": -0.3820430338382721, "step": 760 }, { "epoch": 0.21, "learning_rate": 4.832907036453647e-06, "logits/chosen": -1.5311912298202515, "logits/rejected": -1.4509427547454834, "logps/chosen": -149.7740478515625, "logps/rejected": -253.37808227539062, "loss": 0.1809, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.014500129036605358, "rewards/margins": 0.3270939886569977, "rewards/rejected": -0.3415941298007965, "step": 770 }, { "epoch": 0.21, "learning_rate": 4.824441214720629e-06, "logits/chosen": -1.3183726072311401, "logits/rejected": -1.2068135738372803, "logps/chosen": -227.69140625, "logps/rejected": -288.60577392578125, "loss": 0.1621, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.009285451844334602, "rewards/margins": 0.39103344082832336, "rewards/rejected": -0.3817480206489563, "step": 780 }, { "epoch": 0.21, "learning_rate": 4.815773989205165e-06, "logits/chosen": -1.6014219522476196, "logits/rejected": -1.1616547107696533, "logps/chosen": -275.4478454589844, "logps/rejected": -282.85992431640625, "loss": 0.1306, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.057247720658779144, "rewards/margins": 0.3137260377407074, "rewards/rejected": -0.3709737956523895, "step": 790 }, { "epoch": 0.21, "learning_rate": 4.806906110888606e-06, "logits/chosen": -1.7149804830551147, "logits/rejected": -1.0609508752822876, "logps/chosen": -247.8963165283203, "logps/rejected": -251.2998504638672, "loss": 0.1311, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.028542649000883102, "rewards/margins": 0.3868168890476227, "rewards/rejected": -0.4153594970703125, "step": 800 }, { "epoch": 0.22, "learning_rate": 4.7978383481380865e-06, "logits/chosen": -1.5295007228851318, "logits/rejected": -1.3165438175201416, "logps/chosen": -269.34539794921875, "logps/rejected": -316.3207702636719, "loss": 0.1328, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.05052419751882553, "rewards/margins": 0.3775237798690796, "rewards/rejected": -0.4280479848384857, "step": 810 }, { "epoch": 0.22, "learning_rate": 4.788571486639948e-06, "logits/chosen": -1.4131073951721191, "logits/rejected": -1.1108410358428955, "logps/chosen": -291.181640625, "logps/rejected": -362.20416259765625, "loss": 0.1235, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13415458798408508, "rewards/margins": 0.4179055094718933, "rewards/rejected": -0.5520601272583008, "step": 820 }, { "epoch": 0.22, "learning_rate": 4.779106329331665e-06, "logits/chosen": -1.445796251296997, "logits/rejected": -1.2411746978759766, "logps/chosen": -247.7615203857422, "logps/rejected": -265.7200927734375, "loss": 0.1505, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.047227971255779266, "rewards/margins": 0.3476831614971161, "rewards/rejected": -0.39491117000579834, "step": 830 }, { "epoch": 0.22, "learning_rate": 4.769443696332272e-06, "logits/chosen": -1.595717430114746, "logits/rejected": -1.014550805091858, "logps/chosen": -277.58428955078125, "logps/rejected": -247.3020477294922, "loss": 0.1934, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07608959078788757, "rewards/margins": 0.350595623254776, "rewards/rejected": -0.4266851842403412, "step": 840 }, { "epoch": 0.23, "learning_rate": 4.759584424871302e-06, "logits/chosen": -1.7956939935684204, "logits/rejected": -1.1622785329818726, "logps/chosen": -268.0885009765625, "logps/rejected": -260.65069580078125, "loss": 0.1546, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11695736646652222, "rewards/margins": 0.3590616285800934, "rewards/rejected": -0.4760190546512604, "step": 850 }, { "epoch": 0.23, "learning_rate": 4.749529369216246e-06, "logits/chosen": -1.682879090309143, "logits/rejected": -1.2164032459259033, "logps/chosen": -253.8575439453125, "logps/rejected": -316.49078369140625, "loss": 0.1558, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07984492182731628, "rewards/margins": 0.36469632387161255, "rewards/rejected": -0.44454121589660645, "step": 860 }, { "epoch": 0.23, "learning_rate": 4.7392794005985324e-06, "logits/chosen": -1.8441412448883057, "logits/rejected": -1.0908098220825195, "logps/chosen": -320.3082580566406, "logps/rejected": -322.8711853027344, "loss": 0.1675, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.027216767892241478, "rewards/margins": 0.3997232913970947, "rewards/rejected": -0.42694005370140076, "step": 870 }, { "epoch": 0.23, "learning_rate": 4.7288354071380415e-06, "logits/chosen": -1.5839688777923584, "logits/rejected": -1.1818835735321045, "logps/chosen": -305.91607666015625, "logps/rejected": -304.3662109375, "loss": 0.165, "rewards/accuracies": 0.875, "rewards/chosen": -0.08447151631116867, "rewards/margins": 0.3237994313240051, "rewards/rejected": -0.4082708954811096, "step": 880 }, { "epoch": 0.24, "learning_rate": 4.7181982937661485e-06, "logits/chosen": -1.5419700145721436, "logits/rejected": -1.2130048274993896, "logps/chosen": -266.9795837402344, "logps/rejected": -272.76708984375, "loss": 0.1834, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.0005973346414975822, "rewards/margins": 0.3700031042098999, "rewards/rejected": -0.3694057762622833, "step": 890 }, { "epoch": 0.24, "learning_rate": 4.707368982147318e-06, "logits/chosen": -1.478826880455017, "logits/rejected": -1.1886264085769653, "logps/chosen": -249.00161743164062, "logps/rejected": -311.38360595703125, "loss": 0.1527, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.010296178050339222, "rewards/margins": 0.27028435468673706, "rewards/rejected": -0.2599882185459137, "step": 900 }, { "epoch": 0.24, "learning_rate": 4.696348410599244e-06, "logits/chosen": -1.6099306344985962, "logits/rejected": -1.0822367668151855, "logps/chosen": -259.72216796875, "logps/rejected": -275.6427001953125, "loss": 0.1892, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.02974315918982029, "rewards/margins": 0.31014934182167053, "rewards/rejected": -0.2804061770439148, "step": 910 }, { "epoch": 0.25, "learning_rate": 4.685137534011549e-06, "logits/chosen": -1.7383455038070679, "logits/rejected": -1.079681158065796, "logps/chosen": -249.6421661376953, "logps/rejected": -230.7484893798828, "loss": 0.1554, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.0027087554335594177, "rewards/margins": 0.32712188363075256, "rewards/rejected": -0.32441315054893494, "step": 920 }, { "epoch": 0.25, "learning_rate": 4.673737323763048e-06, "logits/chosen": -1.6007206439971924, "logits/rejected": -1.1181820631027222, "logps/chosen": -323.9920349121094, "logps/rejected": -341.07568359375, "loss": 0.137, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.006621385924518108, "rewards/margins": 0.36024874448776245, "rewards/rejected": -0.36687013506889343, "step": 930 }, { "epoch": 0.25, "learning_rate": 4.662148767637578e-06, "logits/chosen": -1.8025529384613037, "logits/rejected": -1.0332825183868408, "logps/chosen": -384.01666259765625, "logps/rejected": -335.40374755859375, "loss": 0.1161, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.06002165004611015, "rewards/margins": 0.43708348274230957, "rewards/rejected": -0.37706178426742554, "step": 940 }, { "epoch": 0.25, "learning_rate": 4.650372869738415e-06, "logits/chosen": -1.8271507024765015, "logits/rejected": -1.211963415145874, "logps/chosen": -328.3116760253906, "logps/rejected": -304.9596252441406, "loss": 0.1885, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.017533788457512856, "rewards/margins": 0.3359457552433014, "rewards/rejected": -0.3534795641899109, "step": 950 }, { "epoch": 0.26, "learning_rate": 4.638410650401267e-06, "logits/chosen": -1.4427958726882935, "logits/rejected": -1.1035863161087036, "logps/chosen": -261.233154296875, "logps/rejected": -296.86358642578125, "loss": 0.1704, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11386547982692719, "rewards/margins": 0.3565741181373596, "rewards/rejected": -0.470439612865448, "step": 960 }, { "epoch": 0.26, "learning_rate": 4.626263146105875e-06, "logits/chosen": -1.6715164184570312, "logits/rejected": -1.2049211263656616, "logps/chosen": -267.9721374511719, "logps/rejected": -272.67218017578125, "loss": 0.1604, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14121413230895996, "rewards/margins": 0.35565823316574097, "rewards/rejected": -0.49687233567237854, "step": 970 }, { "epoch": 0.26, "learning_rate": 4.613931409386196e-06, "logits/chosen": -1.614467978477478, "logits/rejected": -1.3309428691864014, "logps/chosen": -282.2423400878906, "logps/rejected": -315.1558532714844, "loss": 0.1396, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.09741922467947006, "rewards/margins": 0.3464723527431488, "rewards/rejected": -0.44389158487319946, "step": 980 }, { "epoch": 0.26, "learning_rate": 4.601416508739211e-06, "logits/chosen": -1.6913681030273438, "logits/rejected": -1.0562645196914673, "logps/chosen": -260.50457763671875, "logps/rejected": -236.1143798828125, "loss": 0.252, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.14730839431285858, "rewards/margins": 0.2597261965274811, "rewards/rejected": -0.40703457593917847, "step": 990 }, { "epoch": 0.27, "learning_rate": 4.588719528532342e-06, "logits/chosen": -1.7020518779754639, "logits/rejected": -1.1313974857330322, "logps/chosen": -307.2850646972656, "logps/rejected": -269.5376892089844, "loss": 0.1474, "rewards/accuracies": 0.875, "rewards/chosen": -0.04724062234163284, "rewards/margins": 0.38787880539894104, "rewards/rejected": -0.4351194500923157, "step": 1000 }, { "epoch": 0.27, "learning_rate": 4.575841568909494e-06, "logits/chosen": -1.3848850727081299, "logits/rejected": -1.0428290367126465, "logps/chosen": -240.163330078125, "logps/rejected": -317.8233337402344, "loss": 0.2369, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.028999319300055504, "rewards/margins": 0.30743494629859924, "rewards/rejected": -0.3364342451095581, "step": 1010 }, { "epoch": 0.27, "learning_rate": 4.562783745695738e-06, "logits/chosen": -1.3958890438079834, "logits/rejected": -1.2130458354949951, "logps/chosen": -203.06283569335938, "logps/rejected": -265.59600830078125, "loss": 0.1827, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.020870503038167953, "rewards/margins": 0.3209065794944763, "rewards/rejected": -0.34177708625793457, "step": 1020 }, { "epoch": 0.27, "learning_rate": 4.549547190300622e-06, "logits/chosen": -1.5980967283248901, "logits/rejected": -1.1070952415466309, "logps/chosen": -262.9022216796875, "logps/rejected": -253.2421112060547, "loss": 0.1491, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.010922988876700401, "rewards/margins": 0.3387434184551239, "rewards/rejected": -0.34966641664505005, "step": 1030 }, { "epoch": 0.28, "learning_rate": 4.536133049620143e-06, "logits/chosen": -1.7377541065216064, "logits/rejected": -1.0692071914672852, "logps/chosen": -338.3387145996094, "logps/rejected": -277.5804443359375, "loss": 0.1656, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.01836409978568554, "rewards/margins": 0.3910614550113678, "rewards/rejected": -0.4094255566596985, "step": 1040 }, { "epoch": 0.28, "learning_rate": 4.522542485937369e-06, "logits/chosen": -1.4216773509979248, "logits/rejected": -0.8998391032218933, "logps/chosen": -255.6497039794922, "logps/rejected": -270.85980224609375, "loss": 0.1628, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.049015216529369354, "rewards/margins": 0.348868191242218, "rewards/rejected": -0.3978833854198456, "step": 1050 }, { "epoch": 0.28, "learning_rate": 4.508776676821739e-06, "logits/chosen": -1.501990556716919, "logits/rejected": -1.2285462617874146, "logps/chosen": -260.5689392089844, "logps/rejected": -342.2594299316406, "loss": 0.1413, "rewards/accuracies": 0.875, "rewards/chosen": -0.14666099846363068, "rewards/margins": 0.32123270630836487, "rewards/rejected": -0.46789368987083435, "step": 1060 }, { "epoch": 0.29, "learning_rate": 4.494836815027022e-06, "logits/chosen": -1.86409592628479, "logits/rejected": -1.2688671350479126, "logps/chosen": -265.83343505859375, "logps/rejected": -272.33636474609375, "loss": 0.1657, "rewards/accuracies": 0.75, "rewards/chosen": -0.08181764930486679, "rewards/margins": 0.3258339464664459, "rewards/rejected": -0.4076516032218933, "step": 1070 }, { "epoch": 0.29, "learning_rate": 4.4807241083879774e-06, "logits/chosen": -1.7254140377044678, "logits/rejected": -1.1994249820709229, "logps/chosen": -274.38104248046875, "logps/rejected": -289.0826416015625, "loss": 0.1598, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.08081559091806412, "rewards/margins": 0.4157637655735016, "rewards/rejected": -0.4965793192386627, "step": 1080 }, { "epoch": 0.29, "learning_rate": 4.466439779715696e-06, "logits/chosen": -1.5560534000396729, "logits/rejected": -1.0609302520751953, "logps/chosen": -320.48931884765625, "logps/rejected": -237.6038055419922, "loss": 0.1151, "rewards/accuracies": 0.875, "rewards/chosen": -0.13456735014915466, "rewards/margins": 0.3497200012207031, "rewards/rejected": -0.48428741097450256, "step": 1090 }, { "epoch": 0.29, "learning_rate": 4.451985066691649e-06, "logits/chosen": -1.5969184637069702, "logits/rejected": -1.100097417831421, "logps/chosen": -258.00811767578125, "logps/rejected": -307.08203125, "loss": 0.1526, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07336065918207169, "rewards/margins": 0.3625486493110657, "rewards/rejected": -0.43590933084487915, "step": 1100 }, { "epoch": 0.3, "learning_rate": 4.437361221760449e-06, "logits/chosen": -1.6261990070343018, "logits/rejected": -0.9406700134277344, "logps/chosen": -304.49530029296875, "logps/rejected": -281.0793151855469, "loss": 0.1026, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.03398927301168442, "rewards/margins": 0.4203387200832367, "rewards/rejected": -0.4543279707431793, "step": 1110 }, { "epoch": 0.3, "learning_rate": 4.422569512021332e-06, "logits/chosen": -1.5429813861846924, "logits/rejected": -0.9988912343978882, "logps/chosen": -248.56948852539062, "logps/rejected": -265.1681823730469, "loss": 0.0936, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.09713175892829895, "rewards/margins": 0.3802811801433563, "rewards/rejected": -0.4774129390716553, "step": 1120 }, { "epoch": 0.3, "learning_rate": 4.407611219118363e-06, "logits/chosen": -1.5700933933258057, "logits/rejected": -1.0617953538894653, "logps/chosen": -233.2037353515625, "logps/rejected": -200.68634033203125, "loss": 0.144, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.11756577342748642, "rewards/margins": 0.31560632586479187, "rewards/rejected": -0.4331720769405365, "step": 1130 }, { "epoch": 0.3, "learning_rate": 4.3924876391293915e-06, "logits/chosen": -1.8913711309432983, "logits/rejected": -1.039268970489502, "logps/chosen": -353.1129455566406, "logps/rejected": -297.39617919921875, "loss": 0.1334, "rewards/accuracies": 0.875, "rewards/chosen": -0.08230503648519516, "rewards/margins": 0.47759518027305603, "rewards/rejected": -0.5599002838134766, "step": 1140 }, { "epoch": 0.31, "learning_rate": 4.377200082453748e-06, "logits/chosen": -1.538246512413025, "logits/rejected": -1.0102214813232422, "logps/chosen": -364.6609802246094, "logps/rejected": -325.3028869628906, "loss": 0.1223, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1402122676372528, "rewards/margins": 0.35043373703956604, "rewards/rejected": -0.49064597487449646, "step": 1150 }, { "epoch": 0.31, "learning_rate": 4.361749873698707e-06, "logits/chosen": -1.58868408203125, "logits/rejected": -1.048269271850586, "logps/chosen": -210.7605743408203, "logps/rejected": -217.05050659179688, "loss": 0.1285, "rewards/accuracies": 0.75, "rewards/chosen": -0.09829956293106079, "rewards/margins": 0.3576180338859558, "rewards/rejected": -0.4559175372123718, "step": 1160 }, { "epoch": 0.31, "learning_rate": 4.346138351564711e-06, "logits/chosen": -1.5400859117507935, "logits/rejected": -0.9626834988594055, "logps/chosen": -281.67779541015625, "logps/rejected": -280.89837646484375, "loss": 0.2163, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.11693022400140762, "rewards/margins": 0.3672144412994385, "rewards/rejected": -0.4841446876525879, "step": 1170 }, { "epoch": 0.31, "learning_rate": 4.330366868729376e-06, "logits/chosen": -1.5415042638778687, "logits/rejected": -1.026490569114685, "logps/chosen": -233.54293823242188, "logps/rejected": -307.4654846191406, "loss": 0.1482, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.061702560633420944, "rewards/margins": 0.3937299847602844, "rewards/rejected": -0.45543256402015686, "step": 1180 }, { "epoch": 0.32, "learning_rate": 4.3144367917302964e-06, "logits/chosen": -1.8853957653045654, "logits/rejected": -1.1902307271957397, "logps/chosen": -313.8409118652344, "logps/rejected": -296.0729064941406, "loss": 0.1522, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.017522500827908516, "rewards/margins": 0.3799929618835449, "rewards/rejected": -0.39751550555229187, "step": 1190 }, { "epoch": 0.32, "learning_rate": 4.2983495008466285e-06, "logits/chosen": -1.3914777040481567, "logits/rejected": -0.9230860471725464, "logps/chosen": -266.98321533203125, "logps/rejected": -292.6563415527344, "loss": 0.1513, "rewards/accuracies": 0.875, "rewards/chosen": -0.04734548181295395, "rewards/margins": 0.3820473849773407, "rewards/rejected": -0.42939287424087524, "step": 1200 }, { "epoch": 0.32, "learning_rate": 4.2821063899795015e-06, "logits/chosen": -1.335268259048462, "logits/rejected": -1.4020699262619019, "logps/chosen": -224.03970336914062, "logps/rejected": -348.4554138183594, "loss": 0.1196, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.0435425229370594, "rewards/margins": 0.4308520257472992, "rewards/rejected": -0.4743945598602295, "step": 1210 }, { "epoch": 0.33, "learning_rate": 4.265708866531238e-06, "logits/chosen": -1.5381969213485718, "logits/rejected": -1.2920863628387451, "logps/chosen": -273.07086181640625, "logps/rejected": -312.44000244140625, "loss": 0.1688, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03625233843922615, "rewards/margins": 0.26211121678352356, "rewards/rejected": -0.29836350679397583, "step": 1220 }, { "epoch": 0.33, "learning_rate": 4.249158351283414e-06, "logits/chosen": -1.5409828424453735, "logits/rejected": -1.1099916696548462, "logps/chosen": -269.79833984375, "logps/rejected": -264.7692565917969, "loss": 0.1503, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07565125823020935, "rewards/margins": 0.32862424850463867, "rewards/rejected": -0.404275506734848, "step": 1230 }, { "epoch": 0.33, "learning_rate": 4.232456278273743e-06, "logits/chosen": -1.8242772817611694, "logits/rejected": -1.0602861642837524, "logps/chosen": -371.48248291015625, "logps/rejected": -312.5940856933594, "loss": 0.1571, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.10061591863632202, "rewards/margins": 0.3603518605232239, "rewards/rejected": -0.4609677791595459, "step": 1240 }, { "epoch": 0.33, "learning_rate": 4.215604094671835e-06, "logits/chosen": -1.5043697357177734, "logits/rejected": -1.3470711708068848, "logps/chosen": -348.822021484375, "logps/rejected": -378.1570739746094, "loss": 0.1618, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17215891182422638, "rewards/margins": 0.3960806727409363, "rewards/rejected": -0.5682395696640015, "step": 1250 }, { "epoch": 0.34, "learning_rate": 4.198603260653792e-06, "logits/chosen": -1.1872601509094238, "logits/rejected": -0.9705106616020203, "logps/chosen": -272.1139221191406, "logps/rejected": -295.6619873046875, "loss": 0.154, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19514642655849457, "rewards/margins": 0.3168686032295227, "rewards/rejected": -0.5120150446891785, "step": 1260 }, { "epoch": 0.34, "learning_rate": 4.181455249275701e-06, "logits/chosen": -1.668910264968872, "logits/rejected": -1.1042711734771729, "logps/chosen": -373.8921813964844, "logps/rejected": -264.4703369140625, "loss": 0.2755, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1737692654132843, "rewards/margins": 0.26853370666503906, "rewards/rejected": -0.44230300188064575, "step": 1270 }, { "epoch": 0.34, "learning_rate": 4.1641615463459926e-06, "logits/chosen": -1.7166597843170166, "logits/rejected": -0.9484345316886902, "logps/chosen": -329.9338684082031, "logps/rejected": -262.08343505859375, "loss": 0.138, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15403738617897034, "rewards/margins": 0.3067986071109772, "rewards/rejected": -0.4608360230922699, "step": 1280 }, { "epoch": 0.34, "learning_rate": 4.146723650296701e-06, "logits/chosen": -1.5266129970550537, "logits/rejected": -1.1601712703704834, "logps/chosen": -353.5336608886719, "logps/rejected": -334.5635681152344, "loss": 0.1007, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.12546080350875854, "rewards/margins": 0.4080546796321869, "rewards/rejected": -0.5335155129432678, "step": 1290 }, { "epoch": 0.35, "learning_rate": 4.129143072053639e-06, "logits/chosen": -1.7788454294204712, "logits/rejected": -1.1070853471755981, "logps/chosen": -301.1758728027344, "logps/rejected": -272.003662109375, "loss": 0.1172, "rewards/accuracies": 0.875, "rewards/chosen": -0.1457098424434662, "rewards/margins": 0.41548848152160645, "rewards/rejected": -0.5611982345581055, "step": 1300 }, { "epoch": 0.35, "learning_rate": 4.111421334905468e-06, "logits/chosen": -1.445004940032959, "logits/rejected": -1.095983862876892, "logps/chosen": -219.327392578125, "logps/rejected": -277.88983154296875, "loss": 0.2043, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19351212680339813, "rewards/margins": 0.2906314730644226, "rewards/rejected": -0.48414358496665955, "step": 1310 }, { "epoch": 0.35, "learning_rate": 4.093559974371725e-06, "logits/chosen": -1.4865139722824097, "logits/rejected": -1.354561686515808, "logps/chosen": -248.8349151611328, "logps/rejected": -252.2455291748047, "loss": 0.1729, "rewards/accuracies": 0.75, "rewards/chosen": -0.1259056180715561, "rewards/margins": 0.31513845920562744, "rewards/rejected": -0.4410440921783447, "step": 1320 }, { "epoch": 0.35, "learning_rate": 4.075560538069767e-06, "logits/chosen": -1.8343995809555054, "logits/rejected": -1.280969500541687, "logps/chosen": -254.92056274414062, "logps/rejected": -305.6126708984375, "loss": 0.1306, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1363029032945633, "rewards/margins": 0.37759169936180115, "rewards/rejected": -0.5138946175575256, "step": 1330 }, { "epoch": 0.36, "learning_rate": 4.05742458558068e-06, "logits/chosen": -1.6347767114639282, "logits/rejected": -1.1042792797088623, "logps/chosen": -292.10552978515625, "logps/rejected": -341.2361145019531, "loss": 0.1214, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.08132176101207733, "rewards/margins": 0.5133122801780701, "rewards/rejected": -0.5946341156959534, "step": 1340 }, { "epoch": 0.36, "learning_rate": 4.039153688314146e-06, "logits/chosen": -1.6722145080566406, "logits/rejected": -1.1385935544967651, "logps/chosen": -247.4423370361328, "logps/rejected": -231.4825439453125, "loss": 0.1096, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.11472739279270172, "rewards/margins": 0.34514716267585754, "rewards/rejected": -0.45987454056739807, "step": 1350 }, { "epoch": 0.36, "learning_rate": 4.020749429372286e-06, "logits/chosen": -1.7172428369522095, "logits/rejected": -1.2585347890853882, "logps/chosen": -310.28179931640625, "logps/rejected": -296.54071044921875, "loss": 0.1346, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.11566674709320068, "rewards/margins": 0.3406696021556854, "rewards/rejected": -0.4563364088535309, "step": 1360 }, { "epoch": 0.37, "learning_rate": 4.002213403412492e-06, "logits/chosen": -1.479115605354309, "logits/rejected": -1.075224757194519, "logps/chosen": -250.99026489257812, "logps/rejected": -262.92755126953125, "loss": 0.1859, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.14899684488773346, "rewards/margins": 0.3160502314567566, "rewards/rejected": -0.46504706144332886, "step": 1370 }, { "epoch": 0.37, "learning_rate": 3.983547216509254e-06, "logits/chosen": -1.6718261241912842, "logits/rejected": -1.1382570266723633, "logps/chosen": -298.2779235839844, "logps/rejected": -322.4492492675781, "loss": 0.1162, "rewards/accuracies": 0.875, "rewards/chosen": -0.099492147564888, "rewards/margins": 0.4109002947807312, "rewards/rejected": -0.510392427444458, "step": 1380 }, { "epoch": 0.37, "learning_rate": 3.964752486015001e-06, "logits/chosen": -1.741803765296936, "logits/rejected": -1.154526948928833, "logps/chosen": -322.1985778808594, "logps/rejected": -258.2569885253906, "loss": 0.1312, "rewards/accuracies": 0.875, "rewards/chosen": -0.0690280944108963, "rewards/margins": 0.4529304504394531, "rewards/rejected": -0.5219585299491882, "step": 1390 }, { "epoch": 0.37, "learning_rate": 3.945830840419966e-06, "logits/chosen": -1.4246766567230225, "logits/rejected": -1.1284135580062866, "logps/chosen": -253.0252227783203, "logps/rejected": -245.19723510742188, "loss": 0.1713, "rewards/accuracies": 0.75, "rewards/chosen": -0.10711170732975006, "rewards/margins": 0.3032890558242798, "rewards/rejected": -0.41040077805519104, "step": 1400 }, { "epoch": 0.38, "learning_rate": 3.92678391921108e-06, "logits/chosen": -1.580693006515503, "logits/rejected": -1.0569651126861572, "logps/chosen": -289.4805603027344, "logps/rejected": -289.3725280761719, "loss": 0.1539, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08762092143297195, "rewards/margins": 0.33070939779281616, "rewards/rejected": -0.4183303415775299, "step": 1410 }, { "epoch": 0.38, "learning_rate": 3.907613372729916e-06, "logits/chosen": -1.6741529703140259, "logits/rejected": -0.962120532989502, "logps/chosen": -332.26080322265625, "logps/rejected": -344.08209228515625, "loss": 0.1367, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15168026089668274, "rewards/margins": 0.4351193308830261, "rewards/rejected": -0.5867995619773865, "step": 1420 }, { "epoch": 0.38, "learning_rate": 3.888320862029699e-06, "logits/chosen": -1.477236032485962, "logits/rejected": -1.1961562633514404, "logps/chosen": -251.10671997070312, "logps/rejected": -272.6755676269531, "loss": 0.1711, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.10620205104351044, "rewards/margins": 0.34451884031295776, "rewards/rejected": -0.450720876455307, "step": 1430 }, { "epoch": 0.38, "learning_rate": 3.868908058731376e-06, "logits/chosen": -1.5320178270339966, "logits/rejected": -1.2863932847976685, "logps/chosen": -252.96298217773438, "logps/rejected": -267.1416320800781, "loss": 0.1757, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.13874351978302002, "rewards/margins": 0.3073849678039551, "rewards/rejected": -0.4461284577846527, "step": 1440 }, { "epoch": 0.39, "learning_rate": 3.849376644878783e-06, "logits/chosen": -1.695948839187622, "logits/rejected": -1.3465334177017212, "logps/chosen": -256.7879333496094, "logps/rejected": -291.6668701171875, "loss": 0.1414, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08056484907865524, "rewards/margins": 0.3060542345046997, "rewards/rejected": -0.38661906123161316, "step": 1450 }, { "epoch": 0.39, "learning_rate": 3.829728312792895e-06, "logits/chosen": -1.4604734182357788, "logits/rejected": -1.1766102313995361, "logps/chosen": -228.4718780517578, "logps/rejected": -230.71182250976562, "loss": 0.2127, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10572078078985214, "rewards/margins": 0.21217215061187744, "rewards/rejected": -0.3178929388523102, "step": 1460 }, { "epoch": 0.39, "learning_rate": 3.8099647649251984e-06, "logits/chosen": -1.9331858158111572, "logits/rejected": -1.301509976387024, "logps/chosen": -338.3063049316406, "logps/rejected": -301.18365478515625, "loss": 0.1575, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11515772342681885, "rewards/margins": 0.33505210280418396, "rewards/rejected": -0.4502098560333252, "step": 1470 }, { "epoch": 0.39, "learning_rate": 3.790087713710179e-06, "logits/chosen": -1.7642886638641357, "logits/rejected": -1.1480679512023926, "logps/chosen": -296.9232177734375, "logps/rejected": -282.36151123046875, "loss": 0.1711, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.017070520669221878, "rewards/margins": 0.4179867208003998, "rewards/rejected": -0.43505725264549255, "step": 1480 }, { "epoch": 0.4, "learning_rate": 3.770098881416945e-06, "logits/chosen": -1.6907027959823608, "logits/rejected": -1.1980526447296143, "logps/chosen": -303.41778564453125, "logps/rejected": -332.55352783203125, "loss": 0.2434, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10351963341236115, "rewards/margins": 0.2676793932914734, "rewards/rejected": -0.37119898200035095, "step": 1490 }, { "epoch": 0.4, "learning_rate": 3.7500000000000005e-06, "logits/chosen": -1.7876999378204346, "logits/rejected": -1.3319778442382812, "logps/chosen": -264.2928466796875, "logps/rejected": -257.09503173828125, "loss": 0.1408, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.0641285628080368, "rewards/margins": 0.3521239161491394, "rewards/rejected": -0.4162525236606598, "step": 1500 }, { "epoch": 0.4, "learning_rate": 3.7297928109491765e-06, "logits/chosen": -1.5949997901916504, "logits/rejected": -1.1277220249176025, "logps/chosen": -234.57373046875, "logps/rejected": -195.37899780273438, "loss": 0.2328, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13893568515777588, "rewards/margins": 0.26205331087112427, "rewards/rejected": -0.40098896622657776, "step": 1510 }, { "epoch": 0.41, "learning_rate": 3.7094790651387414e-06, "logits/chosen": -1.6848033666610718, "logits/rejected": -1.2798172235488892, "logps/chosen": -236.25436401367188, "logps/rejected": -269.56182861328125, "loss": 0.2051, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.09305624663829803, "rewards/margins": 0.26189345121383667, "rewards/rejected": -0.3549497723579407, "step": 1520 }, { "epoch": 0.41, "learning_rate": 3.689060522675689e-06, "logits/chosen": -1.6376606225967407, "logits/rejected": -1.0957934856414795, "logps/chosen": -259.7792663574219, "logps/rejected": -254.48489379882812, "loss": 0.1368, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.09541145712137222, "rewards/margins": 0.31637534499168396, "rewards/rejected": -0.4117867946624756, "step": 1530 }, { "epoch": 0.41, "learning_rate": 3.668538952747236e-06, "logits/chosen": -1.534891128540039, "logits/rejected": -1.1751198768615723, "logps/chosen": -208.9001007080078, "logps/rejected": -289.44873046875, "loss": 0.1984, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.031213950365781784, "rewards/margins": 0.39292722940444946, "rewards/rejected": -0.42414116859436035, "step": 1540 }, { "epoch": 0.41, "learning_rate": 3.6479161334675294e-06, "logits/chosen": -1.7336101531982422, "logits/rejected": -1.0565919876098633, "logps/chosen": -326.5076599121094, "logps/rejected": -337.8768005371094, "loss": 0.1143, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.03437976911664009, "rewards/margins": 0.49583953619003296, "rewards/rejected": -0.46145981550216675, "step": 1550 }, { "epoch": 0.42, "learning_rate": 3.627193851723577e-06, "logits/chosen": -1.7617276906967163, "logits/rejected": -0.987285315990448, "logps/chosen": -256.71514892578125, "logps/rejected": -230.3134002685547, "loss": 0.1108, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.02552700974047184, "rewards/margins": 0.3413180708885193, "rewards/rejected": -0.3668450117111206, "step": 1560 }, { "epoch": 0.42, "learning_rate": 3.6063739030204226e-06, "logits/chosen": -1.606143593788147, "logits/rejected": -1.3145328760147095, "logps/chosen": -222.5089874267578, "logps/rejected": -284.4768981933594, "loss": 0.1263, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04628473147749901, "rewards/margins": 0.3009468913078308, "rewards/rejected": -0.3472316563129425, "step": 1570 }, { "epoch": 0.42, "learning_rate": 3.5854580913255706e-06, "logits/chosen": -1.6054519414901733, "logits/rejected": -1.0964720249176025, "logps/chosen": -284.88446044921875, "logps/rejected": -234.81417846679688, "loss": 0.1716, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.049481119960546494, "rewards/margins": 0.32506829500198364, "rewards/rejected": -0.37454938888549805, "step": 1580 }, { "epoch": 0.42, "learning_rate": 3.564448228912682e-06, "logits/chosen": -1.707132339477539, "logits/rejected": -1.1808102130889893, "logps/chosen": -215.00680541992188, "logps/rejected": -242.06338500976562, "loss": 0.1289, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.016758020967245102, "rewards/margins": 0.34254926443099976, "rewards/rejected": -0.35930731892585754, "step": 1590 }, { "epoch": 0.43, "learning_rate": 3.543346136204545e-06, "logits/chosen": -1.423949122428894, "logits/rejected": -0.9146944284439087, "logps/chosen": -251.60986328125, "logps/rejected": -246.3702850341797, "loss": 0.1596, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.025834297761321068, "rewards/margins": 0.4158453941345215, "rewards/rejected": -0.39001110196113586, "step": 1600 }, { "epoch": 0.43, "learning_rate": 3.522153641615345e-06, "logits/chosen": -1.6994764804840088, "logits/rejected": -1.346040964126587, "logps/chosen": -256.00592041015625, "logps/rejected": -256.01300048828125, "loss": 0.1082, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.021267935633659363, "rewards/margins": 0.4258691370487213, "rewards/rejected": -0.40460118651390076, "step": 1610 }, { "epoch": 0.43, "learning_rate": 3.5008725813922383e-06, "logits/chosen": -1.6546297073364258, "logits/rejected": -1.3536561727523804, "logps/chosen": -195.32785034179688, "logps/rejected": -239.7685546875, "loss": 0.1337, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.07065526396036148, "rewards/margins": 0.36889463663101196, "rewards/rejected": -0.2982393801212311, "step": 1620 }, { "epoch": 0.43, "learning_rate": 3.4795047994562463e-06, "logits/chosen": -1.8300390243530273, "logits/rejected": -1.308199167251587, "logps/chosen": -241.9082489013672, "logps/rejected": -253.09469604492188, "loss": 0.1305, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.07730422914028168, "rewards/margins": 0.380765438079834, "rewards/rejected": -0.3034612536430359, "step": 1630 }, { "epoch": 0.44, "learning_rate": 3.458052147242494e-06, "logits/chosen": -1.8392903804779053, "logits/rejected": -1.4302622079849243, "logps/chosen": -275.21636962890625, "logps/rejected": -269.1678161621094, "loss": 0.1645, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.058570653200149536, "rewards/margins": 0.3049880862236023, "rewards/rejected": -0.24641743302345276, "step": 1640 }, { "epoch": 0.44, "learning_rate": 3.436516483539781e-06, "logits/chosen": -1.8582258224487305, "logits/rejected": -1.2484705448150635, "logps/chosen": -289.6882019042969, "logps/rejected": -296.31927490234375, "loss": 0.1642, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.04015364870429039, "rewards/margins": 0.35936877131462097, "rewards/rejected": -0.3192150890827179, "step": 1650 }, { "epoch": 0.44, "learning_rate": 3.4148996743295305e-06, "logits/chosen": -1.3779845237731934, "logits/rejected": -0.940921425819397, "logps/chosen": -277.53009033203125, "logps/rejected": -309.68157958984375, "loss": 0.1488, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.04079504683613777, "rewards/margins": 0.37808963656425476, "rewards/rejected": -0.3372945785522461, "step": 1660 }, { "epoch": 0.45, "learning_rate": 3.3932035926241103e-06, "logits/chosen": -1.6403146982192993, "logits/rejected": -1.2490508556365967, "logps/chosen": -270.91436767578125, "logps/rejected": -305.6242370605469, "loss": 0.121, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.047386664897203445, "rewards/margins": 0.4330657422542572, "rewards/rejected": -0.38567906618118286, "step": 1670 }, { "epoch": 0.45, "learning_rate": 3.3714301183045382e-06, "logits/chosen": -1.4912313222885132, "logits/rejected": -1.1307358741760254, "logps/chosen": -237.8841552734375, "logps/rejected": -263.740966796875, "loss": 0.15, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.04139203205704689, "rewards/margins": 0.30756354331970215, "rewards/rejected": -0.26617151498794556, "step": 1680 }, { "epoch": 0.45, "learning_rate": 3.349581137957604e-06, "logits/chosen": -1.5840933322906494, "logits/rejected": -1.2103346586227417, "logps/chosen": -267.5755920410156, "logps/rejected": -250.2428741455078, "loss": 0.1769, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.055591464042663574, "rewards/margins": 0.32637280225753784, "rewards/rejected": -0.27078136801719666, "step": 1690 }, { "epoch": 0.45, "learning_rate": 3.3276585447123957e-06, "logits/chosen": -1.5938819646835327, "logits/rejected": -1.0726745128631592, "logps/chosen": -224.1567840576172, "logps/rejected": -237.98696899414062, "loss": 0.1735, "rewards/accuracies": 0.875, "rewards/chosen": 0.08412063121795654, "rewards/margins": 0.37582629919052124, "rewards/rejected": -0.2917056381702423, "step": 1700 }, { "epoch": 0.46, "learning_rate": 3.3056642380762783e-06, "logits/chosen": -1.77345871925354, "logits/rejected": -1.088060736656189, "logps/chosen": -291.2659606933594, "logps/rejected": -249.79153442382812, "loss": 0.118, "rewards/accuracies": 0.875, "rewards/chosen": 0.06221754476428032, "rewards/margins": 0.37834566831588745, "rewards/rejected": -0.31612807512283325, "step": 1710 }, { "epoch": 0.46, "learning_rate": 3.2836001237702993e-06, "logits/chosen": -1.7594726085662842, "logits/rejected": -1.1787726879119873, "logps/chosen": -252.16256713867188, "logps/rejected": -276.36224365234375, "loss": 0.1346, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.03996184095740318, "rewards/margins": 0.4077722430229187, "rewards/rejected": -0.36781036853790283, "step": 1720 }, { "epoch": 0.46, "learning_rate": 3.2614681135640696e-06, "logits/chosen": -1.6873159408569336, "logits/rejected": -1.11794114112854, "logps/chosen": -331.9217529296875, "logps/rejected": -301.14129638671875, "loss": 0.1049, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.008476448245346546, "rewards/margins": 0.42262354493141174, "rewards/rejected": -0.4310999810695648, "step": 1730 }, { "epoch": 0.46, "learning_rate": 3.2392701251101172e-06, "logits/chosen": -1.977207899093628, "logits/rejected": -1.3080086708068848, "logps/chosen": -304.4135437011719, "logps/rejected": -268.8409729003906, "loss": 0.1299, "rewards/accuracies": 0.875, "rewards/chosen": 0.01740439608693123, "rewards/margins": 0.3875492513179779, "rewards/rejected": -0.3701448440551758, "step": 1740 }, { "epoch": 0.47, "learning_rate": 3.217008081777726e-06, "logits/chosen": -1.6914336681365967, "logits/rejected": -1.2114366292953491, "logps/chosen": -291.5057067871094, "logps/rejected": -306.0625, "loss": 0.1202, "rewards/accuracies": 0.875, "rewards/chosen": 0.02021079882979393, "rewards/margins": 0.4694185256958008, "rewards/rejected": -0.44920778274536133, "step": 1750 }, { "epoch": 0.47, "learning_rate": 3.1946839124862873e-06, "logits/chosen": -1.695512056350708, "logits/rejected": -1.1958303451538086, "logps/chosen": -275.1326599121094, "logps/rejected": -306.881591796875, "loss": 0.1221, "rewards/accuracies": 0.875, "rewards/chosen": -0.05711999535560608, "rewards/margins": 0.4282289147377014, "rewards/rejected": -0.4853488802909851, "step": 1760 }, { "epoch": 0.47, "learning_rate": 3.1722995515381644e-06, "logits/chosen": -1.6805217266082764, "logits/rejected": -1.306983232498169, "logps/chosen": -227.75039672851562, "logps/rejected": -306.4040222167969, "loss": 0.139, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.028795797377824783, "rewards/margins": 0.3543476462364197, "rewards/rejected": -0.38314345479011536, "step": 1770 }, { "epoch": 0.47, "learning_rate": 3.149856938451094e-06, "logits/chosen": -1.7246391773223877, "logits/rejected": -1.169914960861206, "logps/chosen": -301.69305419921875, "logps/rejected": -275.41375732421875, "loss": 0.1121, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.026128137484192848, "rewards/margins": 0.3503456711769104, "rewards/rejected": -0.3242174983024597, "step": 1780 }, { "epoch": 0.48, "learning_rate": 3.127358017790132e-06, "logits/chosen": -1.6676828861236572, "logits/rejected": -1.0200117826461792, "logps/chosen": -245.4638214111328, "logps/rejected": -210.855224609375, "loss": 0.1885, "rewards/accuracies": 0.875, "rewards/chosen": 0.012035062536597252, "rewards/margins": 0.33619141578674316, "rewards/rejected": -0.32415634393692017, "step": 1790 }, { "epoch": 0.48, "learning_rate": 3.1048047389991693e-06, "logits/chosen": -1.7602665424346924, "logits/rejected": -1.1496754884719849, "logps/chosen": -310.8785095214844, "logps/rejected": -265.19781494140625, "loss": 0.1373, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.04427367448806763, "rewards/margins": 0.34181416034698486, "rewards/rejected": -0.29754048585891724, "step": 1800 }, { "epoch": 0.48, "learning_rate": 3.082199056232015e-06, "logits/chosen": -1.7544816732406616, "logits/rejected": -1.1616899967193604, "logps/chosen": -374.69097900390625, "logps/rejected": -291.6187744140625, "loss": 0.1528, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.029999136924743652, "rewards/margins": 0.37635478377342224, "rewards/rejected": -0.346355676651001, "step": 1810 }, { "epoch": 0.49, "learning_rate": 3.059542928183079e-06, "logits/chosen": -1.6221444606781006, "logits/rejected": -1.0722931623458862, "logps/chosen": -284.0110168457031, "logps/rejected": -225.7612762451172, "loss": 0.1678, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.0024113513063639402, "rewards/margins": 0.3399294316768646, "rewards/rejected": -0.34234076738357544, "step": 1820 }, { "epoch": 0.49, "learning_rate": 3.0368383179176584e-06, "logits/chosen": -1.583720088005066, "logits/rejected": -0.8212550282478333, "logps/chosen": -322.15496826171875, "logps/rejected": -311.7652282714844, "loss": 0.1152, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.02223331294953823, "rewards/margins": 0.37121888995170593, "rewards/rejected": -0.34898558259010315, "step": 1830 }, { "epoch": 0.49, "learning_rate": 3.0140871927018466e-06, "logits/chosen": -1.6727044582366943, "logits/rejected": -1.3792378902435303, "logps/chosen": -273.42938232421875, "logps/rejected": -241.69912719726562, "loss": 0.1723, "rewards/accuracies": 0.75, "rewards/chosen": 0.006121881306171417, "rewards/margins": 0.2609279155731201, "rewards/rejected": -0.2548060119152069, "step": 1840 }, { "epoch": 0.49, "learning_rate": 2.9912915238320755e-06, "logits/chosen": -1.4808580875396729, "logits/rejected": -1.312792181968689, "logps/chosen": -254.2381134033203, "logps/rejected": -339.28192138671875, "loss": 0.1501, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.029569197446107864, "rewards/margins": 0.403090238571167, "rewards/rejected": -0.43265944719314575, "step": 1850 }, { "epoch": 0.5, "learning_rate": 2.9684532864643123e-06, "logits/chosen": -1.459582805633545, "logits/rejected": -1.37647545337677, "logps/chosen": -223.28524780273438, "logps/rejected": -268.646484375, "loss": 0.2247, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.08847669512033463, "rewards/margins": 0.2632191777229309, "rewards/rejected": -0.3516958951950073, "step": 1860 }, { "epoch": 0.5, "learning_rate": 2.945574459442917e-06, "logits/chosen": -1.753136396408081, "logits/rejected": -1.0855618715286255, "logps/chosen": -294.66461181640625, "logps/rejected": -320.9414978027344, "loss": 0.1462, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.02983088418841362, "rewards/margins": 0.34618809819221497, "rewards/rejected": -0.3760189414024353, "step": 1870 }, { "epoch": 0.5, "learning_rate": 2.922657025129185e-06, "logits/chosen": -1.909767508506775, "logits/rejected": -1.1421029567718506, "logps/chosen": -255.0342559814453, "logps/rejected": -262.11151123046875, "loss": 0.0831, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.0237131267786026, "rewards/margins": 0.4205241799354553, "rewards/rejected": -0.3968110680580139, "step": 1880 }, { "epoch": 0.5, "learning_rate": 2.8997029692295875e-06, "logits/chosen": -1.6980245113372803, "logits/rejected": -1.4830033779144287, "logps/chosen": -216.7667694091797, "logps/rejected": -314.1409912109375, "loss": 0.1811, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.03470195457339287, "rewards/margins": 0.33328303694725037, "rewards/rejected": -0.3679850101470947, "step": 1890 }, { "epoch": 0.51, "learning_rate": 2.876714280623708e-06, "logits/chosen": -1.5797358751296997, "logits/rejected": -1.1642903089523315, "logps/chosen": -249.17001342773438, "logps/rejected": -283.16326904296875, "loss": 0.1434, "rewards/accuracies": 0.75, "rewards/chosen": 0.01058603823184967, "rewards/margins": 0.3307330906391144, "rewards/rejected": -0.32014700770378113, "step": 1900 }, { "epoch": 0.51, "learning_rate": 2.8536929511919227e-06, "logits/chosen": -1.848733901977539, "logits/rejected": -1.0242502689361572, "logps/chosen": -324.2806091308594, "logps/rejected": -243.5651092529297, "loss": 0.118, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.039272844791412354, "rewards/margins": 0.3580833077430725, "rewards/rejected": -0.31881046295166016, "step": 1910 }, { "epoch": 0.51, "learning_rate": 2.8306409756428067e-06, "logits/chosen": -1.5901503562927246, "logits/rejected": -1.3664997816085815, "logps/chosen": -298.6604309082031, "logps/rejected": -299.3940734863281, "loss": 0.122, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.025997215881943703, "rewards/margins": 0.39433664083480835, "rewards/rejected": -0.368339478969574, "step": 1920 }, { "epoch": 0.51, "learning_rate": 2.807560351340302e-06, "logits/chosen": -1.6624196767807007, "logits/rejected": -1.1935114860534668, "logps/chosen": -271.36151123046875, "logps/rejected": -300.11187744140625, "loss": 0.134, "rewards/accuracies": 0.875, "rewards/chosen": 0.02754376269876957, "rewards/margins": 0.36226534843444824, "rewards/rejected": -0.33472156524658203, "step": 1930 }, { "epoch": 0.52, "learning_rate": 2.7844530781306544e-06, "logits/chosen": -1.7045570611953735, "logits/rejected": -0.918795108795166, "logps/chosen": -334.0121154785156, "logps/rejected": -271.56024169921875, "loss": 0.134, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.007435244508087635, "rewards/margins": 0.3523639440536499, "rewards/rejected": -0.3597991466522217, "step": 1940 }, { "epoch": 0.52, "learning_rate": 2.761321158169134e-06, "logits/chosen": -1.7817418575286865, "logits/rejected": -1.3035436868667603, "logps/chosen": -290.12249755859375, "logps/rejected": -248.0795135498047, "loss": 0.1305, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.0643903911113739, "rewards/margins": 0.3860814571380615, "rewards/rejected": -0.32169100642204285, "step": 1950 }, { "epoch": 0.52, "learning_rate": 2.738166595746554e-06, "logits/chosen": -1.5566781759262085, "logits/rejected": -1.3110841512680054, "logps/chosen": -178.944091796875, "logps/rejected": -215.97299194335938, "loss": 0.1811, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.07068820297718048, "rewards/margins": 0.3479788601398468, "rewards/rejected": -0.2772907018661499, "step": 1960 }, { "epoch": 0.53, "learning_rate": 2.7149913971156105e-06, "logits/chosen": -1.5236642360687256, "logits/rejected": -1.2001490592956543, "logps/chosen": -231.1402130126953, "logps/rejected": -246.59591674804688, "loss": 0.1994, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.03314649313688278, "rewards/margins": 0.3443582057952881, "rewards/rejected": -0.3112117052078247, "step": 1970 }, { "epoch": 0.53, "learning_rate": 2.6917975703170466e-06, "logits/chosen": -1.6045347452163696, "logits/rejected": -1.246459722518921, "logps/chosen": -213.5222625732422, "logps/rejected": -257.655029296875, "loss": 0.1179, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.06492890417575836, "rewards/margins": 0.34639525413513184, "rewards/rejected": -0.2814663350582123, "step": 1980 }, { "epoch": 0.53, "learning_rate": 2.668587125005663e-06, "logits/chosen": -1.5607236623764038, "logits/rejected": -1.0532560348510742, "logps/chosen": -249.9466094970703, "logps/rejected": -285.72845458984375, "loss": 0.1563, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.05708497762680054, "rewards/margins": 0.33061715960502625, "rewards/rejected": -0.2735321819782257, "step": 1990 }, { "epoch": 0.53, "learning_rate": 2.6453620722761897e-06, "logits/chosen": -1.512621521949768, "logits/rejected": -1.4036608934402466, "logps/chosen": -217.87484741210938, "logps/rejected": -261.9187316894531, "loss": 0.176, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.025976067408919334, "rewards/margins": 0.3155084252357483, "rewards/rejected": -0.3414844870567322, "step": 2000 }, { "epoch": 0.54, "learning_rate": 2.6221244244890336e-06, "logits/chosen": -1.547397494316101, "logits/rejected": -1.2165327072143555, "logps/chosen": -319.05810546875, "logps/rejected": -346.9829406738281, "loss": 0.1403, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.00498613715171814, "rewards/margins": 0.37501031160354614, "rewards/rejected": -0.370024174451828, "step": 2010 }, { "epoch": 0.54, "learning_rate": 2.5988761950959133e-06, "logits/chosen": -1.7019367218017578, "logits/rejected": -1.3284026384353638, "logps/chosen": -230.9080047607422, "logps/rejected": -242.2611846923828, "loss": 0.2393, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.054250530898571014, "rewards/margins": 0.22886402904987335, "rewards/rejected": -0.28311458230018616, "step": 2020 }, { "epoch": 0.54, "learning_rate": 2.575619398465402e-06, "logits/chosen": -1.6088831424713135, "logits/rejected": -1.3118395805358887, "logps/chosen": -209.9443817138672, "logps/rejected": -267.1958312988281, "loss": 0.2087, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.013560935854911804, "rewards/margins": 0.3352377414703369, "rewards/rejected": -0.3216767907142639, "step": 2030 }, { "epoch": 0.54, "learning_rate": 2.5523560497083927e-06, "logits/chosen": -1.6162147521972656, "logits/rejected": -1.2510854005813599, "logps/chosen": -231.2008819580078, "logps/rejected": -321.4208068847656, "loss": 0.1237, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.03159638121724129, "rewards/margins": 0.33668285608291626, "rewards/rejected": -0.30508649349212646, "step": 2040 }, { "epoch": 0.55, "learning_rate": 2.5290881645034932e-06, "logits/chosen": -1.6034374237060547, "logits/rejected": -1.2775676250457764, "logps/chosen": -293.4057312011719, "logps/rejected": -363.25360107421875, "loss": 0.1661, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.00653398921713233, "rewards/margins": 0.32143646478652954, "rewards/rejected": -0.3279704451560974, "step": 2050 }, { "epoch": 0.55, "learning_rate": 2.5058177589223766e-06, "logits/chosen": -1.5212050676345825, "logits/rejected": -1.210669755935669, "logps/chosen": -240.904296875, "logps/rejected": -274.26605224609375, "loss": 0.1595, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.03204118087887764, "rewards/margins": 0.3350418508052826, "rewards/rejected": -0.3030007481575012, "step": 2060 }, { "epoch": 0.55, "learning_rate": 2.482546849255096e-06, "logits/chosen": -1.7197357416152954, "logits/rejected": -1.0405242443084717, "logps/chosen": -314.89984130859375, "logps/rejected": -287.49224853515625, "loss": 0.0899, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.03998409956693649, "rewards/margins": 0.43929505348205566, "rewards/rejected": -0.39931100606918335, "step": 2070 }, { "epoch": 0.55, "learning_rate": 2.4592774518353858e-06, "logits/chosen": -1.688256025314331, "logits/rejected": -1.1770254373550415, "logps/chosen": -238.84921264648438, "logps/rejected": -256.46063232421875, "loss": 0.142, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.046583134680986404, "rewards/margins": 0.42613476514816284, "rewards/rejected": -0.37955164909362793, "step": 2080 }, { "epoch": 0.56, "learning_rate": 2.436011582865945e-06, "logits/chosen": -1.4531993865966797, "logits/rejected": -0.9476866722106934, "logps/chosen": -272.5257568359375, "logps/rejected": -301.6541442871094, "loss": 0.107, "rewards/accuracies": 0.875, "rewards/chosen": 0.08881069719791412, "rewards/margins": 0.4413018226623535, "rewards/rejected": -0.352491170167923, "step": 2090 }, { "epoch": 0.56, "learning_rate": 2.4127512582437486e-06, "logits/chosen": -1.719260811805725, "logits/rejected": -1.2589495182037354, "logps/chosen": -293.53753662109375, "logps/rejected": -286.5969543457031, "loss": 0.1363, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.01633891463279724, "rewards/margins": 0.3913685381412506, "rewards/rejected": -0.37502965331077576, "step": 2100 }, { "epoch": 0.56, "learning_rate": 2.3894984933853734e-06, "logits/chosen": -1.6526820659637451, "logits/rejected": -1.3534865379333496, "logps/chosen": -206.388671875, "logps/rejected": -241.95187377929688, "loss": 0.1525, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.007084892597049475, "rewards/margins": 0.36700910329818726, "rewards/rejected": -0.35992416739463806, "step": 2110 }, { "epoch": 0.57, "learning_rate": 2.366255303052377e-06, "logits/chosen": -1.6136153936386108, "logits/rejected": -1.188907504081726, "logps/chosen": -286.76373291015625, "logps/rejected": -252.4059600830078, "loss": 0.1859, "rewards/accuracies": 0.75, "rewards/chosen": 0.005851163994520903, "rewards/margins": 0.316637247800827, "rewards/rejected": -0.31078606843948364, "step": 2120 }, { "epoch": 0.57, "learning_rate": 2.3430237011767166e-06, "logits/chosen": -1.654313325881958, "logits/rejected": -1.2479978799819946, "logps/chosen": -226.00390625, "logps/rejected": -232.9287109375, "loss": 0.1541, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.08312289416790009, "rewards/margins": 0.3975786864757538, "rewards/rejected": -0.3144558072090149, "step": 2130 }, { "epoch": 0.57, "learning_rate": 2.319805700686257e-06, "logits/chosen": -1.6342413425445557, "logits/rejected": -1.109515905380249, "logps/chosen": -234.4524383544922, "logps/rejected": -233.2206573486328, "loss": 0.1168, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.08855343610048294, "rewards/margins": 0.38437074422836304, "rewards/rejected": -0.2958173155784607, "step": 2140 }, { "epoch": 0.57, "learning_rate": 2.296603313330355e-06, "logits/chosen": -1.8358131647109985, "logits/rejected": -1.035390019416809, "logps/chosen": -394.54071044921875, "logps/rejected": -347.41680908203125, "loss": 0.2742, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.042184554040431976, "rewards/margins": 0.3534103333950043, "rewards/rejected": -0.3112257719039917, "step": 2150 }, { "epoch": 0.58, "learning_rate": 2.2734185495055503e-06, "logits/chosen": -1.5863606929779053, "logits/rejected": -1.3380165100097656, "logps/chosen": -271.6787414550781, "logps/rejected": -278.33502197265625, "loss": 0.1506, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.020747099071741104, "rewards/margins": 0.34211981296539307, "rewards/rejected": -0.32137271761894226, "step": 2160 }, { "epoch": 0.58, "learning_rate": 2.250253418081373e-06, "logits/chosen": -1.695081353187561, "logits/rejected": -1.1732103824615479, "logps/chosen": -280.98675537109375, "logps/rejected": -266.2809143066406, "loss": 0.1702, "rewards/accuracies": 0.75, "rewards/chosen": 0.008993232622742653, "rewards/margins": 0.3143147826194763, "rewards/rejected": -0.3053215444087982, "step": 2170 }, { "epoch": 0.58, "learning_rate": 2.22710992622628e-06, "logits/chosen": -1.6708568334579468, "logits/rejected": -1.061140775680542, "logps/chosen": -281.99456787109375, "logps/rejected": -249.1426544189453, "loss": 0.1246, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.04111826419830322, "rewards/margins": 0.35372304916381836, "rewards/rejected": -0.31260478496551514, "step": 2180 }, { "epoch": 0.58, "learning_rate": 2.2039900792337477e-06, "logits/chosen": -1.6424753665924072, "logits/rejected": -1.2045528888702393, "logps/chosen": -228.9128875732422, "logps/rejected": -266.5892028808594, "loss": 0.1208, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.0656760185956955, "rewards/margins": 0.4023992121219635, "rewards/rejected": -0.3367232382297516, "step": 2190 }, { "epoch": 0.59, "learning_rate": 2.1808958803485134e-06, "logits/chosen": -1.7162901163101196, "logits/rejected": -1.1374019384384155, "logps/chosen": -264.79522705078125, "logps/rejected": -279.41595458984375, "loss": 0.1364, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.055478811264038086, "rewards/margins": 0.4204636216163635, "rewards/rejected": -0.36498481035232544, "step": 2200 }, { "epoch": 0.59, "learning_rate": 2.157829330593008e-06, "logits/chosen": -1.537788987159729, "logits/rejected": -1.129476547241211, "logps/chosen": -278.3310241699219, "logps/rejected": -257.82232666015625, "loss": 0.1514, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.022755134850740433, "rewards/margins": 0.32495397329330444, "rewards/rejected": -0.34770917892456055, "step": 2210 }, { "epoch": 0.59, "learning_rate": 2.134792428593971e-06, "logits/chosen": -1.6505171060562134, "logits/rejected": -1.1392511129379272, "logps/chosen": -315.64190673828125, "logps/rejected": -261.23101806640625, "loss": 0.1454, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.04185132309794426, "rewards/margins": 0.35440436005592346, "rewards/rejected": -0.3125530779361725, "step": 2220 }, { "epoch": 0.59, "learning_rate": 2.1117871704092818e-06, "logits/chosen": -1.822789192199707, "logits/rejected": -1.2018978595733643, "logps/chosen": -249.8803253173828, "logps/rejected": -278.4112243652344, "loss": 0.1295, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.07924628257751465, "rewards/margins": 0.4235721528530121, "rewards/rejected": -0.3443259298801422, "step": 2230 }, { "epoch": 0.6, "learning_rate": 2.0888155493550027e-06, "logits/chosen": -1.7266429662704468, "logits/rejected": -1.0663232803344727, "logps/chosen": -269.5586853027344, "logps/rejected": -230.1761474609375, "loss": 0.1131, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.0925656110048294, "rewards/margins": 0.38963964581489563, "rewards/rejected": -0.2970740795135498, "step": 2240 }, { "epoch": 0.6, "learning_rate": 2.0658795558326745e-06, "logits/chosen": -1.8484570980072021, "logits/rejected": -1.2969257831573486, "logps/chosen": -248.79464721679688, "logps/rejected": -239.7518768310547, "loss": 0.1618, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.05543617531657219, "rewards/margins": 0.3949527144432068, "rewards/rejected": -0.3395165801048279, "step": 2250 }, { "epoch": 0.6, "learning_rate": 2.0429811771568468e-06, "logits/chosen": -1.5278081893920898, "logits/rejected": -1.0012762546539307, "logps/chosen": -280.8786315917969, "logps/rejected": -276.75689697265625, "loss": 0.0882, "rewards/accuracies": 0.875, "rewards/chosen": 0.07547347992658615, "rewards/margins": 0.4585336148738861, "rewards/rejected": -0.38306012749671936, "step": 2260 }, { "epoch": 0.61, "learning_rate": 2.0201223973828917e-06, "logits/chosen": -1.6746526956558228, "logits/rejected": -1.205840826034546, "logps/chosen": -274.74298095703125, "logps/rejected": -298.1522216796875, "loss": 0.1112, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.032690681517124176, "rewards/margins": 0.4341716170310974, "rewards/rejected": -0.4014809727668762, "step": 2270 }, { "epoch": 0.61, "learning_rate": 1.997305197135089e-06, "logits/chosen": -1.420925259590149, "logits/rejected": -1.1142023801803589, "logps/chosen": -235.92758178710938, "logps/rejected": -251.2263641357422, "loss": 0.1505, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.04943353682756424, "rewards/margins": 0.3853815197944641, "rewards/rejected": -0.33594799041748047, "step": 2280 }, { "epoch": 0.61, "learning_rate": 1.9745315534350157e-06, "logits/chosen": -1.5054067373275757, "logits/rejected": -1.0819201469421387, "logps/chosen": -313.4949951171875, "logps/rejected": -295.75164794921875, "loss": 0.1577, "rewards/accuracies": 0.875, "rewards/chosen": -3.1620264053344727e-05, "rewards/margins": 0.33664727210998535, "rewards/rejected": -0.3366788923740387, "step": 2290 }, { "epoch": 0.61, "learning_rate": 1.9518034395302413e-06, "logits/chosen": -1.6651229858398438, "logits/rejected": -1.2321889400482178, "logps/chosen": -296.817138671875, "logps/rejected": -304.33294677734375, "loss": 0.1157, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.07290974259376526, "rewards/margins": 0.38127079606056213, "rewards/rejected": -0.3083610236644745, "step": 2300 }, { "epoch": 0.62, "learning_rate": 1.9291228247233607e-06, "logits/chosen": -1.7819957733154297, "logits/rejected": -1.2075005769729614, "logps/chosen": -259.632080078125, "logps/rejected": -265.5983581542969, "loss": 0.1464, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.04883255437016487, "rewards/margins": 0.39770543575286865, "rewards/rejected": -0.3488728702068329, "step": 2310 }, { "epoch": 0.62, "learning_rate": 1.9064916742013515e-06, "logits/chosen": -1.7982776165008545, "logits/rejected": -1.0426654815673828, "logps/chosen": -284.5262756347656, "logps/rejected": -271.22930908203125, "loss": 0.1141, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.08063653856515884, "rewards/margins": 0.4655955731868744, "rewards/rejected": -0.38495901226997375, "step": 2320 }, { "epoch": 0.62, "learning_rate": 1.883911948865306e-06, "logits/chosen": -1.424214243888855, "logits/rejected": -1.0795605182647705, "logps/chosen": -344.02264404296875, "logps/rejected": -331.96990966796875, "loss": 0.3091, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.012640709057450294, "rewards/margins": 0.2547581195831299, "rewards/rejected": -0.24211737513542175, "step": 2330 }, { "epoch": 0.62, "learning_rate": 1.8613856051605242e-06, "logits/chosen": -1.6110725402832031, "logits/rejected": -1.2309287786483765, "logps/chosen": -276.41448974609375, "logps/rejected": -281.2205505371094, "loss": 0.1099, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.046147290617227554, "rewards/margins": 0.3609221577644348, "rewards/rejected": -0.31477484107017517, "step": 2340 }, { "epoch": 0.63, "learning_rate": 1.8389145949069953e-06, "logits/chosen": -1.9351005554199219, "logits/rejected": -1.308622121810913, "logps/chosen": -283.0077209472656, "logps/rejected": -286.6006164550781, "loss": 0.1261, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.1078963652253151, "rewards/margins": 0.4045773446559906, "rewards/rejected": -0.2966809570789337, "step": 2350 }, { "epoch": 0.63, "learning_rate": 1.816500865130279e-06, "logits/chosen": -1.6536674499511719, "logits/rejected": -1.3896677494049072, "logps/chosen": -203.09043884277344, "logps/rejected": -169.11422729492188, "loss": 0.1889, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.039239563047885895, "rewards/margins": 0.27253058552742004, "rewards/rejected": -0.23329100012779236, "step": 2360 }, { "epoch": 0.63, "learning_rate": 1.7941463578928088e-06, "logits/chosen": -1.620586633682251, "logits/rejected": -1.1168583631515503, "logps/chosen": -280.3619079589844, "logps/rejected": -252.0579071044922, "loss": 0.1894, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.06918063014745712, "rewards/margins": 0.3512100577354431, "rewards/rejected": -0.2820294499397278, "step": 2370 }, { "epoch": 0.63, "learning_rate": 1.7718530101256115e-06, "logits/chosen": -1.887487769126892, "logits/rejected": -1.241081714630127, "logps/chosen": -284.033935546875, "logps/rejected": -275.2795104980469, "loss": 0.1358, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.09624558687210083, "rewards/margins": 0.3296979069709778, "rewards/rejected": -0.23345234990119934, "step": 2380 }, { "epoch": 0.64, "learning_rate": 1.7496227534604859e-06, "logits/chosen": -1.785048246383667, "logits/rejected": -1.2693895101547241, "logps/chosen": -263.9631042480469, "logps/rejected": -324.4400634765625, "loss": 0.1713, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.08735568076372147, "rewards/margins": 0.380867063999176, "rewards/rejected": -0.29351136088371277, "step": 2390 }, { "epoch": 0.64, "learning_rate": 1.7274575140626318e-06, "logits/chosen": -1.5414637327194214, "logits/rejected": -0.9286526441574097, "logps/chosen": -300.5523986816406, "logps/rejected": -227.65713500976562, "loss": 0.1229, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.10164747387170792, "rewards/margins": 0.3701394498348236, "rewards/rejected": -0.2684919834136963, "step": 2400 }, { "epoch": 0.64, "learning_rate": 1.7053592124637557e-06, "logits/chosen": -1.7873646020889282, "logits/rejected": -1.1277539730072021, "logps/chosen": -322.7608337402344, "logps/rejected": -267.81787109375, "loss": 0.148, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.05880016088485718, "rewards/margins": 0.3506908118724823, "rewards/rejected": -0.2918906509876251, "step": 2410 }, { "epoch": 0.65, "learning_rate": 1.6833297633956647e-06, "logits/chosen": -1.532529592514038, "logits/rejected": -1.4235907793045044, "logps/chosen": -165.92489624023438, "logps/rejected": -237.23782348632812, "loss": 0.1796, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.004235130734741688, "rewards/margins": 0.31066879630088806, "rewards/rejected": -0.30643370747566223, "step": 2420 }, { "epoch": 0.65, "learning_rate": 1.661371075624363e-06, "logits/chosen": -1.508122444152832, "logits/rejected": -1.1396775245666504, "logps/chosen": -234.5397186279297, "logps/rejected": -293.2003479003906, "loss": 0.1418, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.06259538978338242, "rewards/margins": 0.40436553955078125, "rewards/rejected": -0.3417701721191406, "step": 2430 }, { "epoch": 0.65, "learning_rate": 1.6394850517846621e-06, "logits/chosen": -1.6401771306991577, "logits/rejected": -0.8912142515182495, "logps/chosen": -283.13238525390625, "logps/rejected": -221.9040985107422, "loss": 0.1623, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.08341696858406067, "rewards/margins": 0.4074879288673401, "rewards/rejected": -0.32407090067863464, "step": 2440 }, { "epoch": 0.65, "learning_rate": 1.6176735882153284e-06, "logits/chosen": -1.9544626474380493, "logits/rejected": -1.3714215755462646, "logps/chosen": -302.2862548828125, "logps/rejected": -252.0078582763672, "loss": 0.1017, "rewards/accuracies": 0.875, "rewards/chosen": 0.09628540277481079, "rewards/margins": 0.3641790747642517, "rewards/rejected": -0.2678936719894409, "step": 2450 }, { "epoch": 0.66, "learning_rate": 1.5959385747947697e-06, "logits/chosen": -1.4549901485443115, "logits/rejected": -1.019054651260376, "logps/chosen": -258.96661376953125, "logps/rejected": -268.4969177246094, "loss": 0.1197, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.13393627107143402, "rewards/margins": 0.4325905740261078, "rewards/rejected": -0.29865431785583496, "step": 2460 }, { "epoch": 0.66, "learning_rate": 1.5742818947772875e-06, "logits/chosen": -1.6369373798370361, "logits/rejected": -1.1893460750579834, "logps/chosen": -270.46185302734375, "logps/rejected": -253.77633666992188, "loss": 0.1302, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.04096044600009918, "rewards/margins": 0.3416442275047302, "rewards/rejected": -0.30068379640579224, "step": 2470 }, { "epoch": 0.66, "learning_rate": 1.552705424629898e-06, "logits/chosen": -1.5808465480804443, "logits/rejected": -1.141884446144104, "logps/chosen": -285.2702941894531, "logps/rejected": -277.57745361328125, "loss": 0.1158, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.059014301747083664, "rewards/margins": 0.3862994611263275, "rewards/rejected": -0.32728514075279236, "step": 2480 }, { "epoch": 0.66, "learning_rate": 1.5312110338697427e-06, "logits/chosen": -1.755192518234253, "logits/rejected": -1.2506240606307983, "logps/chosen": -246.61972045898438, "logps/rejected": -237.236328125, "loss": 0.1112, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.06155257299542427, "rewards/margins": 0.430799663066864, "rewards/rejected": -0.3692471385002136, "step": 2490 }, { "epoch": 0.67, "learning_rate": 1.509800584902108e-06, "logits/chosen": -1.532773733139038, "logits/rejected": -1.2026466131210327, "logps/chosen": -243.93820190429688, "logps/rejected": -262.2892150878906, "loss": 0.1839, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.07341442257165909, "rewards/margins": 0.35419246554374695, "rewards/rejected": -0.28077805042266846, "step": 2500 }, { "epoch": 0.67, "learning_rate": 1.4884759328590476e-06, "logits/chosen": -1.8502527475357056, "logits/rejected": -1.1260955333709717, "logps/chosen": -276.49945068359375, "logps/rejected": -261.7967529296875, "loss": 0.1949, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.07880761474370956, "rewards/margins": 0.402778685092926, "rewards/rejected": -0.32397109270095825, "step": 2510 }, { "epoch": 0.67, "learning_rate": 1.467238925438646e-06, "logits/chosen": -1.6124885082244873, "logits/rejected": -1.1764719486236572, "logps/chosen": -242.35806274414062, "logps/rejected": -281.0080261230469, "loss": 0.1401, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.05401073768734932, "rewards/margins": 0.40248528122901917, "rewards/rejected": -0.34847456216812134, "step": 2520 }, { "epoch": 0.67, "learning_rate": 1.446091402744923e-06, "logits/chosen": -1.5193445682525635, "logits/rejected": -0.925061821937561, "logps/chosen": -275.9288024902344, "logps/rejected": -256.40155029296875, "loss": 0.097, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.02620106376707554, "rewards/margins": 0.3423638343811035, "rewards/rejected": -0.31616276502609253, "step": 2530 }, { "epoch": 0.68, "learning_rate": 1.4250351971283937e-06, "logits/chosen": -1.7999191284179688, "logits/rejected": -1.2691766023635864, "logps/chosen": -227.58023071289062, "logps/rejected": -259.0445556640625, "loss": 0.1104, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.06122167035937309, "rewards/margins": 0.41422295570373535, "rewards/rejected": -0.35300129652023315, "step": 2540 }, { "epoch": 0.68, "learning_rate": 1.4040721330273063e-06, "logits/chosen": -1.9836969375610352, "logits/rejected": -1.2529933452606201, "logps/chosen": -243.0897216796875, "logps/rejected": -234.3355712890625, "loss": 0.1477, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.0859227403998375, "rewards/margins": 0.4090031683444977, "rewards/rejected": -0.323080450296402, "step": 2550 }, { "epoch": 0.68, "learning_rate": 1.3832040268095589e-06, "logits/chosen": -1.5172902345657349, "logits/rejected": -1.0578333139419556, "logps/chosen": -260.90411376953125, "logps/rejected": -258.8797302246094, "loss": 0.1353, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.05479772016406059, "rewards/margins": 0.37679368257522583, "rewards/rejected": -0.32199597358703613, "step": 2560 }, { "epoch": 0.69, "learning_rate": 1.362432686615316e-06, "logits/chosen": -1.5748417377471924, "logits/rejected": -1.0296505689620972, "logps/chosen": -261.697021484375, "logps/rejected": -210.9502410888672, "loss": 0.1437, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.08591978251934052, "rewards/margins": 0.34443560242652893, "rewards/rejected": -0.2585158050060272, "step": 2570 }, { "epoch": 0.69, "learning_rate": 1.3417599122003464e-06, "logits/chosen": -1.7466052770614624, "logits/rejected": -1.1247678995132446, "logps/chosen": -253.7733917236328, "logps/rejected": -230.14089965820312, "loss": 0.1104, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.10326331853866577, "rewards/margins": 0.384945809841156, "rewards/rejected": -0.28168249130249023, "step": 2580 }, { "epoch": 0.69, "learning_rate": 1.3211874947800747e-06, "logits/chosen": -1.6235640048980713, "logits/rejected": -1.2449982166290283, "logps/chosen": -286.81756591796875, "logps/rejected": -286.7332763671875, "loss": 0.2038, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.03231514245271683, "rewards/margins": 0.31858521699905396, "rewards/rejected": -0.2862700819969177, "step": 2590 }, { "epoch": 0.69, "learning_rate": 1.3007172168743854e-06, "logits/chosen": -1.9692258834838867, "logits/rejected": -1.3329085111618042, "logps/chosen": -256.65008544921875, "logps/rejected": -220.8751678466797, "loss": 0.1944, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.06459324806928635, "rewards/margins": 0.3410438299179077, "rewards/rejected": -0.27645057439804077, "step": 2600 }, { "epoch": 0.7, "learning_rate": 1.280350852153168e-06, "logits/chosen": -1.6254962682724, "logits/rejected": -1.4488201141357422, "logps/chosen": -212.2681427001953, "logps/rejected": -273.9403076171875, "loss": 0.1321, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.09227583557367325, "rewards/margins": 0.3278459906578064, "rewards/rejected": -0.23557014763355255, "step": 2610 }, { "epoch": 0.7, "learning_rate": 1.260090165282645e-06, "logits/chosen": -1.6674009561538696, "logits/rejected": -1.145491361618042, "logps/chosen": -273.3908996582031, "logps/rejected": -231.9703369140625, "loss": 0.189, "rewards/accuracies": 0.75, "rewards/chosen": 0.08204850554466248, "rewards/margins": 0.3708895742893219, "rewards/rejected": -0.2888410687446594, "step": 2620 }, { "epoch": 0.7, "learning_rate": 1.2399369117724582e-06, "logits/chosen": -1.8269517421722412, "logits/rejected": -1.1365658044815063, "logps/chosen": -305.4709167480469, "logps/rejected": -273.973388671875, "loss": 0.1584, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.08552964776754379, "rewards/margins": 0.3883412480354309, "rewards/rejected": -0.30281156301498413, "step": 2630 }, { "epoch": 0.7, "learning_rate": 1.2198928378235717e-06, "logits/chosen": -1.6987979412078857, "logits/rejected": -1.3719052076339722, "logps/chosen": -225.90505981445312, "logps/rejected": -248.9863739013672, "loss": 0.1964, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.09808424860239029, "rewards/margins": 0.2932734787464142, "rewards/rejected": -0.1951892375946045, "step": 2640 }, { "epoch": 0.71, "learning_rate": 1.1999596801769617e-06, "logits/chosen": -1.6622822284698486, "logits/rejected": -1.46449875831604, "logps/chosen": -194.4739227294922, "logps/rejected": -270.63922119140625, "loss": 0.16, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.08343388140201569, "rewards/margins": 0.3558526933193207, "rewards/rejected": -0.2724188268184662, "step": 2650 }, { "epoch": 0.71, "learning_rate": 1.1801391659631423e-06, "logits/chosen": -1.4197065830230713, "logits/rejected": -0.8729090690612793, "logps/chosen": -261.99517822265625, "logps/rejected": -240.2467041015625, "loss": 0.1424, "rewards/accuracies": 0.875, "rewards/chosen": 0.09511663019657135, "rewards/margins": 0.3631947338581085, "rewards/rejected": -0.2680780589580536, "step": 2660 }, { "epoch": 0.71, "learning_rate": 1.160433012552508e-06, "logits/chosen": -1.6888864040374756, "logits/rejected": -1.1725207567214966, "logps/chosen": -265.87957763671875, "logps/rejected": -228.31982421875, "loss": 0.115, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.11969444900751114, "rewards/margins": 0.4062970280647278, "rewards/rejected": -0.28660255670547485, "step": 2670 }, { "epoch": 0.71, "learning_rate": 1.1408429274065418e-06, "logits/chosen": -1.6261924505233765, "logits/rejected": -1.3828227519989014, "logps/chosen": -223.134521484375, "logps/rejected": -277.44903564453125, "loss": 0.1311, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.11289075762033463, "rewards/margins": 0.34422147274017334, "rewards/rejected": -0.23133070766925812, "step": 2680 }, { "epoch": 0.72, "learning_rate": 1.1213706079298566e-06, "logits/chosen": -1.8407529592514038, "logits/rejected": -1.3841874599456787, "logps/chosen": -282.0005187988281, "logps/rejected": -308.8989562988281, "loss": 0.1637, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.08910055458545685, "rewards/margins": 0.3223797082901001, "rewards/rejected": -0.23327915370464325, "step": 2690 }, { "epoch": 0.72, "learning_rate": 1.1020177413231334e-06, "logits/chosen": -1.6514075994491577, "logits/rejected": -1.1626676321029663, "logps/chosen": -206.6435546875, "logps/rejected": -248.57595825195312, "loss": 0.0849, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.0976477712392807, "rewards/margins": 0.3547229468822479, "rewards/rejected": -0.25707516074180603, "step": 2700 }, { "epoch": 0.72, "learning_rate": 1.0827860044369226e-06, "logits/chosen": -1.8002774715423584, "logits/rejected": -1.2231605052947998, "logps/chosen": -263.72991943359375, "logps/rejected": -265.7281799316406, "loss": 0.0857, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.09039691835641861, "rewards/margins": 0.3749600052833557, "rewards/rejected": -0.2845631241798401, "step": 2710 }, { "epoch": 0.73, "learning_rate": 1.06367706362636e-06, "logits/chosen": -1.7300952672958374, "logits/rejected": -1.0147895812988281, "logps/chosen": -234.8489532470703, "logps/rejected": -245.09164428710938, "loss": 0.0854, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.09726179391145706, "rewards/margins": 0.37331247329711914, "rewards/rejected": -0.27605074644088745, "step": 2720 }, { "epoch": 0.73, "learning_rate": 1.0446925746067768e-06, "logits/chosen": -1.8114553689956665, "logits/rejected": -1.3846733570098877, "logps/chosen": -270.171142578125, "logps/rejected": -253.19253540039062, "loss": 0.1582, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.08086392283439636, "rewards/margins": 0.3226754665374756, "rewards/rejected": -0.24181151390075684, "step": 2730 }, { "epoch": 0.73, "learning_rate": 1.0258341823102418e-06, "logits/chosen": -1.5032013654708862, "logits/rejected": -1.2384767532348633, "logps/chosen": -202.64346313476562, "logps/rejected": -241.52383422851562, "loss": 0.1571, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.04403448849916458, "rewards/margins": 0.30034139752388, "rewards/rejected": -0.25630688667297363, "step": 2740 }, { "epoch": 0.73, "learning_rate": 1.0071035207430352e-06, "logits/chosen": -1.6960475444793701, "logits/rejected": -1.202266812324524, "logps/chosen": -228.84579467773438, "logps/rejected": -282.767333984375, "loss": 0.1184, "rewards/accuracies": 0.875, "rewards/chosen": 0.08044974505901337, "rewards/margins": 0.4057890474796295, "rewards/rejected": -0.32533928751945496, "step": 2750 }, { "epoch": 0.74, "learning_rate": 9.88502212844063e-07, "logits/chosen": -1.7069820165634155, "logits/rejected": -1.3219921588897705, "logps/chosen": -232.90579223632812, "logps/rejected": -257.5270690917969, "loss": 0.1422, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.050495147705078125, "rewards/margins": 0.3361809551715851, "rewards/rejected": -0.2856857478618622, "step": 2760 }, { "epoch": 0.74, "learning_rate": 9.700318703442437e-07, "logits/chosen": -1.813307762145996, "logits/rejected": -1.2688970565795898, "logps/chosen": -252.4823760986328, "logps/rejected": -221.725830078125, "loss": 0.1892, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.03987887501716614, "rewards/margins": 0.2988082468509674, "rewards/rejected": -0.2589293420314789, "step": 2770 }, { "epoch": 0.74, "learning_rate": 9.516940936268504e-07, "logits/chosen": -1.8856050968170166, "logits/rejected": -1.4962949752807617, "logps/chosen": -211.00314331054688, "logps/rejected": -229.956298828125, "loss": 0.2038, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.026142999529838562, "rewards/margins": 0.2894168496131897, "rewards/rejected": -0.2632738947868347, "step": 2780 }, { "epoch": 0.74, "learning_rate": 9.334904715888496e-07, "logits/chosen": -1.7890255451202393, "logits/rejected": -1.1871464252471924, "logps/chosen": -290.72113037109375, "logps/rejected": -280.96856689453125, "loss": 0.1023, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.048764027655124664, "rewards/margins": 0.323917955160141, "rewards/rejected": -0.2751539349555969, "step": 2790 }, { "epoch": 0.75, "learning_rate": 9.154225815032242e-07, "logits/chosen": -1.4765609502792358, "logits/rejected": -1.052433729171753, "logps/chosen": -270.6768493652344, "logps/rejected": -251.9602508544922, "loss": 0.1566, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.08185350149869919, "rewards/margins": 0.3346666097640991, "rewards/rejected": -0.2528131604194641, "step": 2800 }, { "epoch": 0.75, "learning_rate": 8.974919888823164e-07, "logits/chosen": -1.5708179473876953, "logits/rejected": -1.1366405487060547, "logps/chosen": -260.322265625, "logps/rejected": -328.4196472167969, "loss": 0.1115, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.075380340218544, "rewards/margins": 0.4507225453853607, "rewards/rejected": -0.3753421902656555, "step": 2810 }, { "epoch": 0.75, "learning_rate": 8.797002473421729e-07, "logits/chosen": -1.6871554851531982, "logits/rejected": -1.015560269355774, "logps/chosen": -312.00396728515625, "logps/rejected": -289.4263000488281, "loss": 0.0967, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.08846127986907959, "rewards/margins": 0.4753798544406891, "rewards/rejected": -0.3869186043739319, "step": 2820 }, { "epoch": 0.75, "learning_rate": 8.620488984679378e-07, "logits/chosen": -1.5905934572219849, "logits/rejected": -1.0190634727478027, "logps/chosen": -270.3796081542969, "logps/rejected": -255.5897216796875, "loss": 0.1175, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.06361018866300583, "rewards/margins": 0.41740432381629944, "rewards/rejected": -0.3537940979003906, "step": 2830 }, { "epoch": 0.76, "learning_rate": 8.445394716802754e-07, "logits/chosen": -1.716925859451294, "logits/rejected": -1.2719004154205322, "logps/chosen": -282.26434326171875, "logps/rejected": -315.83294677734375, "loss": 0.1543, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.10373795032501221, "rewards/margins": 0.4591018557548523, "rewards/rejected": -0.3553639352321625, "step": 2840 }, { "epoch": 0.76, "learning_rate": 8.271734841028553e-07, "logits/chosen": -1.6560224294662476, "logits/rejected": -1.2010629177093506, "logps/chosen": -273.56549072265625, "logps/rejected": -295.66180419921875, "loss": 0.1, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.09685816615819931, "rewards/margins": 0.42575687170028687, "rewards/rejected": -0.32889872789382935, "step": 2850 }, { "epoch": 0.76, "learning_rate": 8.099524404308948e-07, "logits/chosen": -1.5945422649383545, "logits/rejected": -1.19219172000885, "logps/chosen": -182.81103515625, "logps/rejected": -238.70919799804688, "loss": 0.161, "rewards/accuracies": 0.75, "rewards/chosen": 0.06724058091640472, "rewards/margins": 0.31565195322036743, "rewards/rejected": -0.24841134250164032, "step": 2860 }, { "epoch": 0.77, "learning_rate": 7.928778328007918e-07, "logits/chosen": -1.9054105281829834, "logits/rejected": -1.2900282144546509, "logps/chosen": -260.6278991699219, "logps/rejected": -260.5713806152344, "loss": 0.1954, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.06147562339901924, "rewards/margins": 0.29727303981781006, "rewards/rejected": -0.23579740524291992, "step": 2870 }, { "epoch": 0.77, "learning_rate": 7.759511406608255e-07, "logits/chosen": -1.8338701725006104, "logits/rejected": -1.3442243337631226, "logps/chosen": -289.9970703125, "logps/rejected": -252.2522735595703, "loss": 0.189, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.06250108033418655, "rewards/margins": 0.26397019624710083, "rewards/rejected": -0.20146910846233368, "step": 2880 }, { "epoch": 0.77, "learning_rate": 7.591738306429769e-07, "logits/chosen": -1.7106062173843384, "logits/rejected": -0.9574893712997437, "logps/chosen": -306.48663330078125, "logps/rejected": -267.07965087890625, "loss": 0.1403, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.03719034045934677, "rewards/margins": 0.3732340633869171, "rewards/rejected": -0.33604371547698975, "step": 2890 }, { "epoch": 0.77, "learning_rate": 7.425473564358457e-07, "logits/chosen": -1.8127809762954712, "logits/rejected": -1.1028550863265991, "logps/chosen": -264.7284240722656, "logps/rejected": -247.4577178955078, "loss": 0.1306, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.077170729637146, "rewards/margins": 0.40461188554763794, "rewards/rejected": -0.32744115591049194, "step": 2900 }, { "epoch": 0.78, "learning_rate": 7.260731586586983e-07, "logits/chosen": -1.756270408630371, "logits/rejected": -1.4134676456451416, "logps/chosen": -213.25234985351562, "logps/rejected": -228.63436889648438, "loss": 0.1606, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.0521668866276741, "rewards/margins": 0.32329946756362915, "rewards/rejected": -0.27113252878189087, "step": 2910 }, { "epoch": 0.78, "learning_rate": 7.097526647366379e-07, "logits/chosen": -1.8526099920272827, "logits/rejected": -1.2808058261871338, "logps/chosen": -266.97918701171875, "logps/rejected": -249.385986328125, "loss": 0.1041, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.08820641040802002, "rewards/margins": 0.35536572337150574, "rewards/rejected": -0.2671593129634857, "step": 2920 }, { "epoch": 0.78, "learning_rate": 6.935872887769299e-07, "logits/chosen": -1.659076452255249, "logits/rejected": -1.0968009233474731, "logps/chosen": -307.5328063964844, "logps/rejected": -245.99307250976562, "loss": 0.1618, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.07833143323659897, "rewards/margins": 0.386299729347229, "rewards/rejected": -0.3079683184623718, "step": 2930 }, { "epoch": 0.78, "learning_rate": 6.775784314464717e-07, "logits/chosen": -1.5469852685928345, "logits/rejected": -1.411586046218872, "logps/chosen": -225.2750701904297, "logps/rejected": -242.4783935546875, "loss": 0.1542, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.04528594762086868, "rewards/margins": 0.3220139145851135, "rewards/rejected": -0.27672794461250305, "step": 2940 }, { "epoch": 0.79, "learning_rate": 6.617274798504286e-07, "logits/chosen": -1.6576216220855713, "logits/rejected": -1.1561254262924194, "logps/chosen": -238.072021484375, "logps/rejected": -259.99688720703125, "loss": 0.086, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.1014413982629776, "rewards/margins": 0.43401598930358887, "rewards/rejected": -0.33257460594177246, "step": 2950 }, { "epoch": 0.79, "learning_rate": 6.460358074120518e-07, "logits/chosen": -1.9168809652328491, "logits/rejected": -1.2557293176651, "logps/chosen": -242.58401489257812, "logps/rejected": -269.3041687011719, "loss": 0.152, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.09284786880016327, "rewards/margins": 0.39133232831954956, "rewards/rejected": -0.2984844744205475, "step": 2960 }, { "epoch": 0.79, "learning_rate": 6.305047737536707e-07, "logits/chosen": -1.9695804119110107, "logits/rejected": -1.1151535511016846, "logps/chosen": -290.43756103515625, "logps/rejected": -263.0965881347656, "loss": 0.1432, "rewards/accuracies": 0.875, "rewards/chosen": 0.08852732926607132, "rewards/margins": 0.39591920375823975, "rewards/rejected": -0.307391881942749, "step": 2970 }, { "epoch": 0.79, "learning_rate": 6.151357245788917e-07, "logits/chosen": -1.6923097372055054, "logits/rejected": -0.981489360332489, "logps/chosen": -299.1329650878906, "logps/rejected": -261.29949951171875, "loss": 0.1661, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.09116804599761963, "rewards/margins": 0.45771628618240356, "rewards/rejected": -0.36654824018478394, "step": 2980 }, { "epoch": 0.8, "learning_rate": 5.999299915559956e-07, "logits/chosen": -1.7366340160369873, "logits/rejected": -1.307422399520874, "logps/chosen": -270.86468505859375, "logps/rejected": -257.4667663574219, "loss": 0.1394, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.14943814277648926, "rewards/margins": 0.4425061345100403, "rewards/rejected": -0.293067991733551, "step": 2990 }, { "epoch": 0.8, "learning_rate": 5.848888922025553e-07, "logits/chosen": -1.6904795169830322, "logits/rejected": -1.514775037765503, "logps/chosen": -245.5784454345703, "logps/rejected": -294.32110595703125, "loss": 0.1271, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.1196504607796669, "rewards/margins": 0.3875979781150818, "rewards/rejected": -0.2679474949836731, "step": 3000 }, { "epoch": 0.8, "learning_rate": 5.700137297712749e-07, "logits/chosen": -1.6396605968475342, "logits/rejected": -1.020140290260315, "logps/chosen": -293.1066589355469, "logps/rejected": -233.6679229736328, "loss": 0.1135, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.13376173377037048, "rewards/margins": 0.3968765139579773, "rewards/rejected": -0.26311472058296204, "step": 3010 }, { "epoch": 0.81, "learning_rate": 5.553057931370729e-07, "logits/chosen": -1.6284799575805664, "logits/rejected": -1.0204269886016846, "logps/chosen": -281.06866455078125, "logps/rejected": -226.4606170654297, "loss": 0.1341, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.11780212819576263, "rewards/margins": 0.3741871416568756, "rewards/rejected": -0.25638502836227417, "step": 3020 }, { "epoch": 0.81, "learning_rate": 5.407663566854008e-07, "logits/chosen": -1.796983003616333, "logits/rejected": -1.2661386728286743, "logps/chosen": -281.26593017578125, "logps/rejected": -282.1707458496094, "loss": 0.1166, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.0918949544429779, "rewards/margins": 0.3467631936073303, "rewards/rejected": -0.25486817955970764, "step": 3030 }, { "epoch": 0.81, "learning_rate": 5.263966802018275e-07, "logits/chosen": -1.7147595882415771, "logits/rejected": -1.1799060106277466, "logps/chosen": -264.71673583984375, "logps/rejected": -275.89239501953125, "loss": 0.1189, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.08099232614040375, "rewards/margins": 0.3739253282546997, "rewards/rejected": -0.29293301701545715, "step": 3040 }, { "epoch": 0.81, "learning_rate": 5.121980087628802e-07, "logits/chosen": -1.8341114521026611, "logits/rejected": -1.1735069751739502, "logps/chosen": -296.3938903808594, "logps/rejected": -258.4015808105469, "loss": 0.1205, "rewards/accuracies": 0.875, "rewards/chosen": 0.06104854494333267, "rewards/margins": 0.4131019711494446, "rewards/rejected": -0.35205337405204773, "step": 3050 }, { "epoch": 0.82, "learning_rate": 4.981715726281666e-07, "logits/chosen": -1.6473156213760376, "logits/rejected": -1.303798794746399, "logps/chosen": -253.82205200195312, "logps/rejected": -311.9048767089844, "loss": 0.0928, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.08462455123662949, "rewards/margins": 0.483224093914032, "rewards/rejected": -0.3985995352268219, "step": 3060 }, { "epoch": 0.82, "learning_rate": 4.843185871337722e-07, "logits/chosen": -1.7317914962768555, "logits/rejected": -1.2134945392608643, "logps/chosen": -224.9326629638672, "logps/rejected": -257.9466857910156, "loss": 0.1239, "rewards/accuracies": 0.875, "rewards/chosen": 0.10292349010705948, "rewards/margins": 0.4141048789024353, "rewards/rejected": -0.3111814260482788, "step": 3070 }, { "epoch": 0.82, "learning_rate": 4.706402525869633e-07, "logits/chosen": -1.887621521949768, "logits/rejected": -1.0996310710906982, "logps/chosen": -326.8731384277344, "logps/rejected": -258.768798828125, "loss": 0.1689, "rewards/accuracies": 0.75, "rewards/chosen": 0.0996357649564743, "rewards/margins": 0.36071866750717163, "rewards/rejected": -0.2610829174518585, "step": 3080 }, { "epoch": 0.82, "learning_rate": 4.5713775416217884e-07, "logits/chosen": -1.3531527519226074, "logits/rejected": -1.1717339754104614, "logps/chosen": -224.5545654296875, "logps/rejected": -308.23358154296875, "loss": 0.109, "rewards/accuracies": 0.875, "rewards/chosen": 0.10629155486822128, "rewards/margins": 0.40465980768203735, "rewards/rejected": -0.29836827516555786, "step": 3090 }, { "epoch": 0.83, "learning_rate": 4.438122617983442e-07, "logits/chosen": -1.9190078973770142, "logits/rejected": -1.3009909391403198, "logps/chosen": -285.29534912109375, "logps/rejected": -245.940673828125, "loss": 0.1582, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.12030161917209625, "rewards/margins": 0.36642009019851685, "rewards/rejected": -0.246118426322937, "step": 3100 }, { "epoch": 0.83, "learning_rate": 4.3066493009749853e-07, "logits/chosen": -1.6608394384384155, "logits/rejected": -1.2748819589614868, "logps/chosen": -249.3994140625, "logps/rejected": -303.565673828125, "loss": 0.0973, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.09208109974861145, "rewards/margins": 0.3958059251308441, "rewards/rejected": -0.3037247955799103, "step": 3110 }, { "epoch": 0.83, "learning_rate": 4.1769689822475147e-07, "logits/chosen": -1.7159957885742188, "logits/rejected": -1.2019530534744263, "logps/chosen": -243.9926300048828, "logps/rejected": -216.5254364013672, "loss": 0.1449, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.0733565241098404, "rewards/margins": 0.32575708627700806, "rewards/rejected": -0.25240057706832886, "step": 3120 }, { "epoch": 0.83, "learning_rate": 4.049092898095816e-07, "logits/chosen": -1.695738434791565, "logits/rejected": -1.370802402496338, "logps/chosen": -242.7845458984375, "logps/rejected": -251.65859985351562, "loss": 0.1549, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.05762239173054695, "rewards/margins": 0.2988077998161316, "rewards/rejected": -0.24118542671203613, "step": 3130 }, { "epoch": 0.84, "learning_rate": 3.9230321284847856e-07, "logits/chosen": -1.5597546100616455, "logits/rejected": -1.1920053958892822, "logps/chosen": -202.78463745117188, "logps/rejected": -235.42611694335938, "loss": 0.1828, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.08725061267614365, "rewards/margins": 0.3124812841415405, "rewards/rejected": -0.22523066401481628, "step": 3140 }, { "epoch": 0.84, "learning_rate": 3.798797596089351e-07, "logits/chosen": -1.710565209388733, "logits/rejected": -1.3830753564834595, "logps/chosen": -191.14996337890625, "logps/rejected": -228.13143920898438, "loss": 0.1745, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.07240664213895798, "rewards/margins": 0.2955719232559204, "rewards/rejected": -0.22316527366638184, "step": 3150 }, { "epoch": 0.84, "learning_rate": 3.6764000653481263e-07, "logits/chosen": -1.6754013299942017, "logits/rejected": -1.4069788455963135, "logps/chosen": -212.1529541015625, "logps/rejected": -288.8616027832031, "loss": 0.1174, "rewards/accuracies": 0.75, "rewards/chosen": 0.01046234555542469, "rewards/margins": 0.29962268471717834, "rewards/rejected": -0.2891603410243988, "step": 3160 }, { "epoch": 0.85, "learning_rate": 3.555850141530659e-07, "logits/chosen": -1.9133336544036865, "logits/rejected": -1.308585286140442, "logps/chosen": -272.24310302734375, "logps/rejected": -284.73089599609375, "loss": 0.1114, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.06770848482847214, "rewards/margins": 0.39665132761001587, "rewards/rejected": -0.3289428651332855, "step": 3170 }, { "epoch": 0.85, "learning_rate": 3.4371582698185636e-07, "logits/chosen": -1.67145574092865, "logits/rejected": -1.438444972038269, "logps/chosen": -208.4847412109375, "logps/rejected": -290.7266540527344, "loss": 0.1622, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.05329760164022446, "rewards/margins": 0.38010022044181824, "rewards/rejected": -0.3268026113510132, "step": 3180 }, { "epoch": 0.85, "learning_rate": 3.3203347344004737e-07, "logits/chosen": -1.6708052158355713, "logits/rejected": -1.0412744283676147, "logps/chosen": -257.42547607421875, "logps/rejected": -255.15597534179688, "loss": 0.099, "rewards/accuracies": 0.875, "rewards/chosen": 0.09596081078052521, "rewards/margins": 0.3767240345478058, "rewards/rejected": -0.2807632088661194, "step": 3190 }, { "epoch": 0.85, "learning_rate": 3.2053896575809426e-07, "logits/chosen": -1.5217217206954956, "logits/rejected": -1.2141330242156982, "logps/chosen": -183.3319091796875, "logps/rejected": -227.89114379882812, "loss": 0.1609, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.0313902273774147, "rewards/margins": 0.31350037455558777, "rewards/rejected": -0.28211015462875366, "step": 3200 }, { "epoch": 0.86, "learning_rate": 3.092332998903416e-07, "logits/chosen": -1.786128282546997, "logits/rejected": -1.0563210248947144, "logps/chosen": -275.27703857421875, "logps/rejected": -250.85952758789062, "loss": 0.1375, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.09343347698450089, "rewards/margins": 0.4091528058052063, "rewards/rejected": -0.3157193064689636, "step": 3210 }, { "epoch": 0.86, "learning_rate": 2.981174554287239e-07, "logits/chosen": -1.4629366397857666, "logits/rejected": -1.2211610078811646, "logps/chosen": -213.59805297851562, "logps/rejected": -242.63827514648438, "loss": 0.1274, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.09229601919651031, "rewards/margins": 0.42800164222717285, "rewards/rejected": -0.33570563793182373, "step": 3220 }, { "epoch": 0.86, "learning_rate": 2.871923955178918e-07, "logits/chosen": -1.6027485132217407, "logits/rejected": -1.358605146408081, "logps/chosen": -266.48590087890625, "logps/rejected": -279.26641845703125, "loss": 0.1804, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.019939150661230087, "rewards/margins": 0.34129849076271057, "rewards/rejected": -0.3213593065738678, "step": 3230 }, { "epoch": 0.86, "learning_rate": 2.764590667717562e-07, "logits/chosen": -1.699567437171936, "logits/rejected": -1.2492908239364624, "logps/chosen": -185.8858184814453, "logps/rejected": -198.7812042236328, "loss": 0.1461, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.0718788132071495, "rewards/margins": 0.35736599564552307, "rewards/rejected": -0.28548720479011536, "step": 3240 }, { "epoch": 0.87, "learning_rate": 2.6591839919146963e-07, "logits/chosen": -1.7908653020858765, "logits/rejected": -1.3732439279556274, "logps/chosen": -259.0335998535156, "logps/rejected": -245.4866485595703, "loss": 0.1154, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.03901948407292366, "rewards/margins": 0.35004353523254395, "rewards/rejected": -0.3110240697860718, "step": 3250 }, { "epoch": 0.87, "learning_rate": 2.555713060848433e-07, "logits/chosen": -1.8778269290924072, "logits/rejected": -1.3474853038787842, "logps/chosen": -365.189208984375, "logps/rejected": -387.7873229980469, "loss": 0.105, "rewards/accuracies": 0.875, "rewards/chosen": 0.05286934971809387, "rewards/margins": 0.4107195734977722, "rewards/rejected": -0.35785022377967834, "step": 3260 }, { "epoch": 0.87, "learning_rate": 2.454186839872158e-07, "logits/chosen": -1.6174417734146118, "logits/rejected": -1.245863914489746, "logps/chosen": -199.4110107421875, "logps/rejected": -256.9593200683594, "loss": 0.0899, "rewards/accuracies": 0.875, "rewards/chosen": 0.06935293972492218, "rewards/margins": 0.3890882432460785, "rewards/rejected": -0.3197353482246399, "step": 3270 }, { "epoch": 0.87, "learning_rate": 2.3546141258376786e-07, "logits/chosen": -1.9288465976715088, "logits/rejected": -1.4281173944473267, "logps/chosen": -290.6533508300781, "logps/rejected": -269.7490539550781, "loss": 0.0973, "rewards/accuracies": 0.875, "rewards/chosen": 0.1082816869020462, "rewards/margins": 0.44768819212913513, "rewards/rejected": -0.3394065499305725, "step": 3280 }, { "epoch": 0.88, "learning_rate": 2.257003546333042e-07, "logits/chosen": -2.0008137226104736, "logits/rejected": -1.320913314819336, "logps/chosen": -320.672119140625, "logps/rejected": -271.7630310058594, "loss": 0.1208, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.07430247217416763, "rewards/margins": 0.415428102016449, "rewards/rejected": -0.3411256670951843, "step": 3290 }, { "epoch": 0.88, "learning_rate": 2.1613635589349756e-07, "logits/chosen": -1.7060868740081787, "logits/rejected": -1.4465411901474, "logps/chosen": -220.87380981445312, "logps/rejected": -298.72625732421875, "loss": 0.1216, "rewards/accuracies": 0.875, "rewards/chosen": 0.09051541984081268, "rewards/margins": 0.4572044014930725, "rewards/rejected": -0.366688996553421, "step": 3300 }, { "epoch": 0.88, "learning_rate": 2.0677024504760752e-07, "logits/chosen": -1.638347864151001, "logits/rejected": -1.0680015087127686, "logps/chosen": -267.92608642578125, "logps/rejected": -270.8176574707031, "loss": 0.121, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.08300314098596573, "rewards/margins": 0.42494750022888184, "rewards/rejected": -0.3419443368911743, "step": 3310 }, { "epoch": 0.89, "learning_rate": 1.9760283363267684e-07, "logits/chosen": -1.5746757984161377, "logits/rejected": -1.1135761737823486, "logps/chosen": -221.92343139648438, "logps/rejected": -250.6765899658203, "loss": 0.1311, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.07789792120456696, "rewards/margins": 0.39919474720954895, "rewards/rejected": -0.3212968707084656, "step": 3320 }, { "epoch": 0.89, "learning_rate": 1.8863491596921745e-07, "logits/chosen": -1.5667272806167603, "logits/rejected": -0.9896243810653687, "logps/chosen": -296.76202392578125, "logps/rejected": -303.38665771484375, "loss": 0.1393, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.0856994241476059, "rewards/margins": 0.44408607482910156, "rewards/rejected": -0.35838669538497925, "step": 3330 }, { "epoch": 0.89, "learning_rate": 1.798672690923828e-07, "logits/chosen": -1.6267836093902588, "logits/rejected": -1.1321130990982056, "logps/chosen": -253.453369140625, "logps/rejected": -270.5718078613281, "loss": 0.1355, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.03761683404445648, "rewards/margins": 0.36388128995895386, "rewards/rejected": -0.3262644410133362, "step": 3340 }, { "epoch": 0.89, "learning_rate": 1.713006526846439e-07, "logits/chosen": -1.678847312927246, "logits/rejected": -1.140943169593811, "logps/chosen": -262.73638916015625, "logps/rejected": -221.0591583251953, "loss": 0.12, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.05530963093042374, "rewards/margins": 0.353360652923584, "rewards/rejected": -0.29805102944374084, "step": 3350 }, { "epoch": 0.9, "learning_rate": 1.629358090099639e-07, "logits/chosen": -1.7336199283599854, "logits/rejected": -1.1567232608795166, "logps/chosen": -307.455078125, "logps/rejected": -310.3108825683594, "loss": 0.1219, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.049434565007686615, "rewards/margins": 0.42315396666526794, "rewards/rejected": -0.37371936440467834, "step": 3360 }, { "epoch": 0.9, "learning_rate": 1.5477346284948292e-07, "logits/chosen": -1.746050238609314, "logits/rejected": -1.4284813404083252, "logps/chosen": -237.4105682373047, "logps/rejected": -288.8939514160156, "loss": 0.1522, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.028758401051163673, "rewards/margins": 0.33460500836372375, "rewards/rejected": -0.30584657192230225, "step": 3370 }, { "epoch": 0.9, "learning_rate": 1.4681432143872133e-07, "logits/chosen": -1.6106250286102295, "logits/rejected": -1.148560881614685, "logps/chosen": -258.1077575683594, "logps/rejected": -318.8696594238281, "loss": 0.1528, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.04034858196973801, "rewards/margins": 0.350983202457428, "rewards/rejected": -0.3106346130371094, "step": 3380 }, { "epoch": 0.9, "learning_rate": 1.3905907440629752e-07, "logits/chosen": -1.8278043270111084, "logits/rejected": -1.0977909564971924, "logps/chosen": -309.5404357910156, "logps/rejected": -250.6240692138672, "loss": 0.1186, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.07560743391513824, "rewards/margins": 0.4015159606933594, "rewards/rejected": -0.3259085416793823, "step": 3390 }, { "epoch": 0.91, "learning_rate": 1.31508393714177e-07, "logits/chosen": -1.6950404644012451, "logits/rejected": -1.275434136390686, "logps/chosen": -280.67218017578125, "logps/rejected": -243.7939910888672, "loss": 0.1406, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.06180018186569214, "rewards/margins": 0.3164129853248596, "rewards/rejected": -0.25461283326148987, "step": 3400 }, { "epoch": 0.91, "learning_rate": 1.241629335994471e-07, "logits/chosen": -1.6643226146697998, "logits/rejected": -0.9809878468513489, "logps/chosen": -246.31515502929688, "logps/rejected": -255.1239013671875, "loss": 0.1209, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.06152065843343735, "rewards/margins": 0.4374101161956787, "rewards/rejected": -0.37588945031166077, "step": 3410 }, { "epoch": 0.91, "learning_rate": 1.1702333051763271e-07, "logits/chosen": -1.6431770324707031, "logits/rejected": -1.1887956857681274, "logps/chosen": -247.9853515625, "logps/rejected": -250.0074920654297, "loss": 0.1573, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.052826426923274994, "rewards/margins": 0.3623102903366089, "rewards/rejected": -0.3094838261604309, "step": 3420 }, { "epoch": 0.91, "learning_rate": 1.1009020308754587e-07, "logits/chosen": -1.7229608297348022, "logits/rejected": -1.225477933883667, "logps/chosen": -235.5467529296875, "logps/rejected": -204.58261108398438, "loss": 0.1929, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.04221180081367493, "rewards/margins": 0.2941130995750427, "rewards/rejected": -0.2519012987613678, "step": 3430 }, { "epoch": 0.92, "learning_rate": 1.0336415203768962e-07, "logits/chosen": -1.4921534061431885, "logits/rejected": -1.1619257926940918, "logps/chosen": -285.34259033203125, "logps/rejected": -295.04864501953125, "loss": 0.1181, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.08926016837358475, "rewards/margins": 0.4191388189792633, "rewards/rejected": -0.32987862825393677, "step": 3440 }, { "epoch": 0.92, "learning_rate": 9.684576015420277e-08, "logits/chosen": -1.4940868616104126, "logits/rejected": -0.863502025604248, "logps/chosen": -264.92169189453125, "logps/rejected": -247.3212890625, "loss": 0.1366, "rewards/accuracies": 0.875, "rewards/chosen": 0.027351949363946915, "rewards/margins": 0.341216504573822, "rewards/rejected": -0.3138645589351654, "step": 3450 }, { "epoch": 0.92, "learning_rate": 9.053559223036746e-08, "logits/chosen": -1.7528457641601562, "logits/rejected": -1.0926567316055298, "logps/chosen": -313.1197509765625, "logps/rejected": -255.8910675048828, "loss": 0.1122, "rewards/accuracies": 0.875, "rewards/chosen": 0.09485974907875061, "rewards/margins": 0.4161204397678375, "rewards/rejected": -0.3212606906890869, "step": 3460 }, { "epoch": 0.93, "learning_rate": 8.44341950176683e-08, "logits/chosen": -1.5928373336791992, "logits/rejected": -1.244089126586914, "logps/chosen": -265.8317565917969, "logps/rejected": -325.6632080078125, "loss": 0.1168, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.02779344841837883, "rewards/margins": 0.3878537714481354, "rewards/rejected": -0.36006033420562744, "step": 3470 }, { "epoch": 0.93, "learning_rate": 7.854209717842231e-08, "logits/chosen": -1.4396581649780273, "logits/rejected": -1.2138065099716187, "logps/chosen": -240.5614776611328, "logps/rejected": -349.2239685058594, "loss": 0.1016, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.06546439230442047, "rewards/margins": 0.43350672721862793, "rewards/rejected": -0.36804237961769104, "step": 3480 }, { "epoch": 0.93, "learning_rate": 7.285980923996989e-08, "logits/chosen": -1.752174735069275, "logits/rejected": -1.3645591735839844, "logps/chosen": -234.2113800048828, "logps/rejected": -227.34848022460938, "loss": 0.1145, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.06558214873075485, "rewards/margins": 0.37207797169685364, "rewards/rejected": -0.30649590492248535, "step": 3490 }, { "epoch": 0.93, "learning_rate": 6.738782355044048e-08, "logits/chosen": -1.3806583881378174, "logits/rejected": -1.1515109539031982, "logps/chosen": -230.22073364257812, "logps/rejected": -265.16314697265625, "loss": 0.1637, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.03167334571480751, "rewards/margins": 0.4240112900733948, "rewards/rejected": -0.39233797788619995, "step": 3500 }, { "epoch": 0.94, "learning_rate": 6.212661423609184e-08, "logits/chosen": -1.608327865600586, "logits/rejected": -0.9549871683120728, "logps/chosen": -225.1513214111328, "logps/rejected": -209.7755584716797, "loss": 0.134, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.05912930518388748, "rewards/margins": 0.37384232878685, "rewards/rejected": -0.3147130608558655, "step": 3510 }, { "epoch": 0.94, "learning_rate": 5.707663716023021e-08, "logits/chosen": -1.853945016860962, "logits/rejected": -1.118769884109497, "logps/chosen": -280.1803283691406, "logps/rejected": -295.4366760253906, "loss": 0.1538, "rewards/accuracies": 0.875, "rewards/chosen": 0.05890347808599472, "rewards/margins": 0.4038105010986328, "rewards/rejected": -0.3449070453643799, "step": 3520 }, { "epoch": 0.94, "learning_rate": 5.22383298837098e-08, "logits/chosen": -1.6122610569000244, "logits/rejected": -1.0327246189117432, "logps/chosen": -242.87600708007812, "logps/rejected": -208.82144165039062, "loss": 0.0935, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.06353352963924408, "rewards/margins": 0.38507527112960815, "rewards/rejected": -0.3215416669845581, "step": 3530 }, { "epoch": 0.94, "learning_rate": 4.761211162702117e-08, "logits/chosen": -1.680824875831604, "logits/rejected": -1.130185604095459, "logps/chosen": -244.988037109375, "logps/rejected": -263.0980529785156, "loss": 0.1587, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.06494718790054321, "rewards/margins": 0.3642052412033081, "rewards/rejected": -0.2992580533027649, "step": 3540 }, { "epoch": 0.95, "learning_rate": 4.319838323396691e-08, "logits/chosen": -1.6353555917739868, "logits/rejected": -1.2781399488449097, "logps/chosen": -228.3214569091797, "logps/rejected": -280.24066162109375, "loss": 0.1688, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.013865587301552296, "rewards/margins": 0.3643207550048828, "rewards/rejected": -0.3781863749027252, "step": 3550 }, { "epoch": 0.95, "learning_rate": 3.8997527136930004e-08, "logits/chosen": -1.7263469696044922, "logits/rejected": -1.1389892101287842, "logps/chosen": -293.1500244140625, "logps/rejected": -265.1076354980469, "loss": 0.0911, "rewards/accuracies": 0.875, "rewards/chosen": 0.08173110336065292, "rewards/margins": 0.42599812150001526, "rewards/rejected": -0.3442670404911041, "step": 3560 }, { "epoch": 0.95, "learning_rate": 3.5009907323737826e-08, "logits/chosen": -1.561417579650879, "logits/rejected": -1.165183424949646, "logps/chosen": -252.6567840576172, "logps/rejected": -276.4109802246094, "loss": 0.0987, "rewards/accuracies": 0.875, "rewards/chosen": 0.07267307490110397, "rewards/margins": 0.37683752179145813, "rewards/rejected": -0.30416446924209595, "step": 3570 }, { "epoch": 0.95, "learning_rate": 3.1235869306123766e-08, "logits/chosen": -1.6449648141860962, "logits/rejected": -1.2687057256698608, "logps/chosen": -221.9775848388672, "logps/rejected": -275.42449951171875, "loss": 0.1551, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.01698826439678669, "rewards/margins": 0.35954660177230835, "rewards/rejected": -0.3425583243370056, "step": 3580 }, { "epoch": 0.96, "learning_rate": 2.767574008979007e-08, "logits/chosen": -1.640044927597046, "logits/rejected": -1.0068800449371338, "logps/chosen": -286.2217712402344, "logps/rejected": -278.906494140625, "loss": 0.0995, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.073039710521698, "rewards/margins": 0.4568893015384674, "rewards/rejected": -0.3838495910167694, "step": 3590 }, { "epoch": 0.96, "learning_rate": 2.4329828146074096e-08, "logits/chosen": -1.9251073598861694, "logits/rejected": -1.0933765172958374, "logps/chosen": -352.7594299316406, "logps/rejected": -274.07269287109375, "loss": 0.1353, "rewards/accuracies": 0.875, "rewards/chosen": 0.07194405049085617, "rewards/margins": 0.3892292380332947, "rewards/rejected": -0.3172852396965027, "step": 3600 }, { "epoch": 0.96, "learning_rate": 2.1198423385220822e-08, "logits/chosen": -1.6453202962875366, "logits/rejected": -1.2082674503326416, "logps/chosen": -257.38250732421875, "logps/rejected": -320.1260986328125, "loss": 0.0771, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.10315804183483124, "rewards/margins": 0.4763513505458832, "rewards/rejected": -0.37319326400756836, "step": 3610 }, { "epoch": 0.97, "learning_rate": 1.82817971312621e-08, "logits/chosen": -1.790305733680725, "logits/rejected": -1.1114693880081177, "logps/chosen": -260.489990234375, "logps/rejected": -238.48779296875, "loss": 0.1119, "rewards/accuracies": 0.875, "rewards/chosen": 0.09379683434963226, "rewards/margins": 0.4416617453098297, "rewards/rejected": -0.34786492586135864, "step": 3620 }, { "epoch": 0.97, "learning_rate": 1.5580202098509078e-08, "logits/chosen": -1.6579450368881226, "logits/rejected": -1.3401272296905518, "logps/chosen": -250.71633911132812, "logps/rejected": -241.178466796875, "loss": 0.211, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.027279715985059738, "rewards/margins": 0.31590917706489563, "rewards/rejected": -0.2886294722557068, "step": 3630 }, { "epoch": 0.97, "learning_rate": 1.3093872369654148e-08, "logits/chosen": -1.72607421875, "logits/rejected": -1.3506278991699219, "logps/chosen": -237.07730102539062, "logps/rejected": -328.91607666015625, "loss": 0.1135, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.0632893294095993, "rewards/margins": 0.4014991819858551, "rewards/rejected": -0.3382098376750946, "step": 3640 }, { "epoch": 0.97, "learning_rate": 1.0823023375489128e-08, "logits/chosen": -1.8103011846542358, "logits/rejected": -1.331343412399292, "logps/chosen": -218.52322387695312, "logps/rejected": -215.2735595703125, "loss": 0.1715, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.05841076374053955, "rewards/margins": 0.369015634059906, "rewards/rejected": -0.31060490012168884, "step": 3650 }, { "epoch": 0.98, "learning_rate": 8.767851876239075e-09, "logits/chosen": -1.3743187189102173, "logits/rejected": -1.1308249235153198, "logps/chosen": -215.2615966796875, "logps/rejected": -300.07080078125, "loss": 0.0964, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.0790005549788475, "rewards/margins": 0.41065654158592224, "rewards/rejected": -0.33165597915649414, "step": 3660 }, { "epoch": 0.98, "learning_rate": 6.9285359445145366e-09, "logits/chosen": -1.418792486190796, "logits/rejected": -1.1899298429489136, "logps/chosen": -232.254638671875, "logps/rejected": -266.04376220703125, "loss": 0.153, "rewards/accuracies": 0.875, "rewards/chosen": 0.1082618460059166, "rewards/margins": 0.42941126227378845, "rewards/rejected": -0.3211493492126465, "step": 3670 }, { "epoch": 0.98, "learning_rate": 5.305234949880001e-09, "logits/chosen": -1.5959584712982178, "logits/rejected": -1.2097722291946411, "logps/chosen": -276.74713134765625, "logps/rejected": -315.0145568847656, "loss": 0.1814, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.07109376043081284, "rewards/margins": 0.38560524582862854, "rewards/rejected": -0.3145114779472351, "step": 3680 }, { "epoch": 0.98, "learning_rate": 3.8980895450474455e-09, "logits/chosen": -1.7472782135009766, "logits/rejected": -1.2426128387451172, "logps/chosen": -275.06793212890625, "logps/rejected": -278.3852233886719, "loss": 0.1241, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.09598256647586823, "rewards/margins": 0.42074212431907654, "rewards/rejected": -0.3247596323490143, "step": 3690 }, { "epoch": 0.99, "learning_rate": 2.7072216536885855e-09, "logits/chosen": -1.481673002243042, "logits/rejected": -0.9478602409362793, "logps/chosen": -281.0965576171875, "logps/rejected": -267.8004150390625, "loss": 0.1391, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.02585453912615776, "rewards/margins": 0.3552091717720032, "rewards/rejected": -0.32935458421707153, "step": 3700 }, { "epoch": 0.99, "learning_rate": 1.7327344598702667e-09, "logits/chosen": -1.533521056175232, "logits/rejected": -1.2224196195602417, "logps/chosen": -232.5213623046875, "logps/rejected": -339.6204528808594, "loss": 0.1444, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.004062544088810682, "rewards/margins": 0.3145105242729187, "rewards/rejected": -0.3185730576515198, "step": 3710 }, { "epoch": 0.99, "learning_rate": 9.747123991141193e-10, "logits/chosen": -1.80594801902771, "logits/rejected": -1.2266263961791992, "logps/chosen": -253.4310760498047, "logps/rejected": -281.5027160644531, "loss": 0.1023, "rewards/accuracies": 0.875, "rewards/chosen": 0.06998688727617264, "rewards/margins": 0.3998931050300598, "rewards/rejected": -0.3299062252044678, "step": 3720 }, { "epoch": 0.99, "learning_rate": 4.332211510807427e-10, "logits/chosen": -1.7258926630020142, "logits/rejected": -1.4480842351913452, "logps/chosen": -233.64529418945312, "logps/rejected": -240.29598999023438, "loss": 0.1289, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.031451232731342316, "rewards/margins": 0.3203813433647156, "rewards/rejected": -0.28893011808395386, "step": 3730 }, { "epoch": 1.0, "learning_rate": 1.0830763387897902e-10, "logits/chosen": -1.8174076080322266, "logits/rejected": -1.2164661884307861, "logps/chosen": -286.5456237792969, "logps/rejected": -223.0185546875, "loss": 0.1329, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.08067300915718079, "rewards/margins": 0.36825209856033325, "rewards/rejected": -0.2875790596008301, "step": 3740 }, { "epoch": 1.0, "learning_rate": 0.0, "logits/chosen": -1.574351191520691, "logits/rejected": -1.151698112487793, "logps/chosen": -267.740234375, "logps/rejected": -287.2723693847656, "loss": 0.1084, "rewards/accuracies": 0.875, "rewards/chosen": 0.0795169472694397, "rewards/margins": 0.420736163854599, "rewards/rejected": -0.3412191867828369, "step": 3750 }, { "epoch": 1.0, "step": 3750, "total_flos": 0.0, "train_loss": 0.15019961676597596, "train_runtime": 15622.399, "train_samples_per_second": 0.96, "train_steps_per_second": 0.24 } ], "logging_steps": 10, "max_steps": 3750, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }